aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp557
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp171
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp372
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp138
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp655
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp107
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp1270
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp63
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp558
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp164
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp180
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp96
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp1578
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp247
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp3838
-rw-r--r--contrib/llvm/lib/Analysis/DomPrinter.cpp232
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp141
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraph.cpp338
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp614
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp101
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp607
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp31
-rw-r--r--contrib/llvm/lib/Analysis/IPA/InlineCost.cpp1239
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp335
-rw-r--r--contrib/llvm/lib/Analysis/InstCount.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp3215
-rw-r--r--contrib/llvm/lib/Analysis/Interval.cpp58
-rw-r--r--contrib/llvm/lib/Analysis/IntervalPartition.cpp114
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp1143
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp63
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp692
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp221
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp687
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp363
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp192
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp819
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp1525
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp88
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp443
-rw-r--r--contrib/llvm/lib/Analysis/PathNumbering.cpp521
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileInfo.cpp433
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileVerifier.cpp206
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/ProfileDataLoader.cpp155
-rw-r--r--contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp188
-rw-r--r--contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfo.cpp1079
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp155
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp383
-rw-r--r--contrib/llvm/lib/Analysis/PtrUseVisitor.cpp36
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp834
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp275
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp218
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp7057
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp1753
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp223
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp347
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp558
-rw-r--r--contrib/llvm/lib/Analysis/Trace.cpp53
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp300
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp2034
-rw-r--r--contrib/llvm/lib/Archive/Archive.cpp262
-rw-r--r--contrib/llvm/lib/Archive/ArchiveInternals.h88
-rw-r--r--contrib/llvm/lib/Archive/ArchiveReader.cpp633
-rw-r--r--contrib/llvm/lib/Archive/ArchiveWriter.cpp489
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp884
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.h93
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp4335
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.h410
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h181
-rw-r--r--contrib/llvm/lib/AsmParser/Parser.cpp62
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitReader.cpp88
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp3111
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h349
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp371
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp39
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp2020
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp41
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp540
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h171
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp943
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h184
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h85
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp349
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h71
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp136
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2170
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp553
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp368
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h392
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp264
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h283
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp156
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp1711
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h383
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp2570
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h649
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp736
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h234
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp120
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp114
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp466
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp1725
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h123
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp202
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp180
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp77
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp658
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h110
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp225
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp190
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp183
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp801
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/ErlangGC.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp725
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp225
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp178
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp430
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp1583
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp1295
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.h228
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp565
-rw-r--r--contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp296
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp335
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp995
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h70
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp951
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp1172
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp204
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp380
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h242
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp387
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp154
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp826
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp356
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp1181
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp1165
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp126
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp661
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp334
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp898
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp1867
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp330
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp1489
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp578
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp360
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp364
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp2396
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp712
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp1290
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp1605
-rw-r--r--contrib/llvm/lib/CodeGen/OcamlGC.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp193
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp644
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp746
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp577
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp776
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp170
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp892
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.h173
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp132
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp145
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h108
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp293
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp1117
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp1791
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp638
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp146
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp2193
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.h120
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp793
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp443
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp642
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp1322
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp100
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp248
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp10214
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp1507
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp483
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp982
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h146
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3924
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1451
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3042
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1145
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h750
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp525
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp770
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2795
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp655
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h114
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h56
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp799
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3039
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp914
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h185
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp278
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp6382
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6873
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h559
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp645
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3019
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp299
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp2593
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGC.cpp452
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp1152
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp502
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp250
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp381
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h156
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.cpp194
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h47
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp1432
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h469
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp802
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp370
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp439
-rw-r--r--contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp825
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp970
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp739
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp1305
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp784
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp285
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp309
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp1678
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp215
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp359
-rw-r--r--contrib/llvm/lib/DebugInfo/DIContext.cpp18
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp83
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h54
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFAttribute.h30
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp272
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h143
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFContext.cpp596
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFContext.h197
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp106
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h73
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp151
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h75
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp230
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h104
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp391
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugFrame.h46
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp589
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h176
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp628
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugLine.h254
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp67
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.h78
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp534
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFFormValue.h82
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFRelocMap.h22
-rw-r--r--contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h67
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp1308
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp252
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp302
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h96
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h454
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h70
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c481
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h259
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp1396
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp484
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp98
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h248
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp848
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JIT.h226
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp596
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h77
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp1301
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp932
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp336
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h113
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp226
-rw-r--r--contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp177
-rw-r--r--contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp264
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp214
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h43
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h78
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp537
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp851
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h97
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h344
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp311
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h71
-rw-r--r--contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp99
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp2236
-rw-r--r--contrib/llvm/lib/IR/AttributeImpl.h278
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp1180
-rw-r--r--contrib/llvm/lib/IR/AutoUpgrade.cpp393
-rw-r--r--contrib/llvm/lib/IR/BasicBlock.cpp371
-rw-r--r--contrib/llvm/lib/IR/ConstantFold.cpp2074
-rw-r--r--contrib/llvm/lib/IR/ConstantFold.h56
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp2779
-rw-r--r--contrib/llvm/lib/IR/ConstantsContext.h774
-rw-r--r--contrib/llvm/lib/IR/Core.cpp2458
-rw-r--r--contrib/llvm/lib/IR/DIBuilder.cpp1101
-rw-r--r--contrib/llvm/lib/IR/DataLayout.cpp697
-rw-r--r--contrib/llvm/lib/IR/DebugInfo.cpp1209
-rw-r--r--contrib/llvm/lib/IR/DebugLoc.cpp315
-rw-r--r--contrib/llvm/lib/IR/Dominators.cpp302
-rw-r--r--contrib/llvm/lib/IR/Function.cpp707
-rw-r--r--contrib/llvm/lib/IR/GCOV.cpp283
-rw-r--r--contrib/llvm/lib/IR/GVMaterializer.cpp18
-rw-r--r--contrib/llvm/lib/IR/Globals.cpp269
-rw-r--r--contrib/llvm/lib/IR/IRBuilder.cpp153
-rw-r--r--contrib/llvm/lib/IR/InlineAsm.cpp295
-rw-r--r--contrib/llvm/lib/IR/Instruction.cpp555
-rw-r--r--contrib/llvm/lib/IR/Instructions.cpp3553
-rw-r--r--contrib/llvm/lib/IR/IntrinsicInst.cpp73
-rw-r--r--contrib/llvm/lib/IR/LLVMContext.cpp168
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.cpp156
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.h367
-rw-r--r--contrib/llvm/lib/IR/LeakDetector.cpp69
-rw-r--r--contrib/llvm/lib/IR/LeaksContext.h92
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp745
-rw-r--r--contrib/llvm/lib/IR/Module.cpp451
-rw-r--r--contrib/llvm/lib/IR/Pass.cpp276
-rw-r--r--contrib/llvm/lib/IR/PassManager.cpp1912
-rw-r--r--contrib/llvm/lib/IR/PassRegistry.cpp209
-rw-r--r--contrib/llvm/lib/IR/PrintModulePass.cpp136
-rw-r--r--contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h118
-rw-r--r--contrib/llvm/lib/IR/Type.cpp767
-rw-r--r--contrib/llvm/lib/IR/TypeFinder.cpp148
-rw-r--r--contrib/llvm/lib/IR/Use.cpp145
-rw-r--r--contrib/llvm/lib/IR/User.cpp90
-rw-r--r--contrib/llvm/lib/IR/Value.cpp701
-rw-r--r--contrib/llvm/lib/IR/ValueSymbolTable.cpp117
-rw-r--r--contrib/llvm/lib/IR/ValueTypes.cpp277
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp2144
-rw-r--r--contrib/llvm/lib/IRReader/IRReader.cpp89
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp1322
-rw-r--r--contrib/llvm/lib/Linker/Linker.cpp70
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.cpp1594
-rw-r--r--contrib/llvm/lib/MC/MCAsmBackend.cpp43
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfo.cpp143
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp53
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp64
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp1440
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp1154
-rw-r--r--contrib/llvm/lib/MC/MCAtom.cpp97
-rw-r--r--contrib/llvm/lib/MC/MCCodeEmitter.cpp18
-rw-r--r--contrib/llvm/lib/MC/MCCodeGenInfo.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCContext.cpp390
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler.cpp14
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp223
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/Disassembler.h121
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp1558
-rw-r--r--contrib/llvm/lib/MC/MCELF.cpp86
-rw-r--r--contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp51
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.cpp552
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp658
-rw-r--r--contrib/llvm/lib/MC/MCInst.cpp72
-rw-r--r--contrib/llvm/lib/MC/MCInstPrinter.cpp61
-rw-r--r--contrib/llvm/lib/MC/MCInstrAnalysis.cpp21
-rw-r--r--contrib/llvm/lib/MC/MCLabel.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp436
-rw-r--r--contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCModule.cpp45
-rw-r--r--contrib/llvm/lib/MC/MCNullStreamer.cpp123
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp683
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp360
-rw-r--r--contrib/llvm/lib/MC/MCObjectWriter.cpp56
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmLexer.cpp516
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp4272
-rw-r--r--contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp499
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp826
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp622
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp32
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp50
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp19
-rw-r--r--contrib/llvm/lib/MC/MCPureStreamer.cpp242
-rw-r--r--contrib/llvm/lib/MC/MCRegisterInfo.cpp74
-rw-r--r--contrib/llvm/lib/MC/MCSection.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCSectionCOFF.cpp84
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp150
-rw-r--r--contrib/llvm/lib/MC/MCSectionMachO.cpp303
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp627
-rw-r--r--contrib/llvm/lib/MC/MCSubtargetInfo.cpp118
-rw-r--r--contrib/llvm/lib/MC/MCSymbol.cpp83
-rw-r--r--contrib/llvm/lib/MC/MCValue.cpp38
-rw-r--r--contrib/llvm/lib/MC/MCWin64EH.cpp278
-rw-r--r--contrib/llvm/lib/MC/MachObjectWriter.cpp939
-rw-r--r--contrib/llvm/lib/MC/SubtargetFeature.cpp374
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp903
-rw-r--r--contrib/llvm/lib/MC/WinCOFFStreamer.cpp336
-rw-r--r--contrib/llvm/lib/Object/Archive.cpp301
-rw-r--r--contrib/llvm/lib/Object/Binary.cpp103
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp837
-rw-r--r--contrib/llvm/lib/Object/ELFObjectFile.cpp74
-rw-r--r--contrib/llvm/lib/Object/Error.cpp57
-rw-r--r--contrib/llvm/lib/Object/MachOObject.cpp422
-rw-r--r--contrib/llvm/lib/Object/MachOObjectFile.cpp1340
-rw-r--r--contrib/llvm/lib/Object/Object.cpp218
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp66
-rw-r--r--contrib/llvm/lib/Option/Arg.cpp122
-rw-r--r--contrib/llvm/lib/Option/ArgList.cpp385
-rw-r--r--contrib/llvm/lib/Option/OptTable.cpp387
-rw-r--r--contrib/llvm/lib/Option/Option.cpp202
-rw-r--r--contrib/llvm/lib/Support/APFloat.cpp3592
-rw-r--r--contrib/llvm/lib/Support/APInt.cpp2910
-rw-r--r--contrib/llvm/lib/Support/APSInt.cpp23
-rw-r--r--contrib/llvm/lib/Support/Allocator.cpp196
-rw-r--r--contrib/llvm/lib/Support/Atomic.cpp116
-rw-r--r--contrib/llvm/lib/Support/BlockFrequency.cpp130
-rw-r--r--contrib/llvm/lib/Support/BranchProbability.cpp36
-rw-r--r--contrib/llvm/lib/Support/COPYRIGHT.regex54
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp1420
-rw-r--r--contrib/llvm/lib/Support/ConstantRange.cpp731
-rw-r--r--contrib/llvm/lib/Support/ConvertUTF.c571
-rw-r--r--contrib/llvm/lib/Support/ConvertUTFWrapper.cpp76
-rw-r--r--contrib/llvm/lib/Support/CrashRecoveryContext.cpp346
-rw-r--r--contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp360
-rw-r--r--contrib/llvm/lib/Support/DataExtractor.cpp175
-rw-r--r--contrib/llvm/lib/Support/DataStream.cpp98
-rw-r--r--contrib/llvm/lib/Support/Debug.cpp134
-rw-r--r--contrib/llvm/lib/Support/DeltaAlgorithm.cpp114
-rw-r--r--contrib/llvm/lib/Support/Disassembler.cpp74
-rw-r--r--contrib/llvm/lib/Support/Dwarf.cpp724
-rw-r--r--contrib/llvm/lib/Support/DynamicLibrary.cpp189
-rw-r--r--contrib/llvm/lib/Support/Errno.cpp79
-rw-r--r--contrib/llvm/lib/Support/ErrorHandling.cpp99
-rw-r--r--contrib/llvm/lib/Support/FileOutputBuffer.cpp119
-rw-r--r--contrib/llvm/lib/Support/FileUtilities.cpp280
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp424
-rw-r--r--contrib/llvm/lib/Support/FormattedStream.cpp101
-rw-r--r--contrib/llvm/lib/Support/GraphWriter.cpp209
-rw-r--r--contrib/llvm/lib/Support/Hashing.cpp29
-rw-r--r--contrib/llvm/lib/Support/Host.cpp619
-rw-r--r--contrib/llvm/lib/Support/IncludeFile.cpp20
-rw-r--r--contrib/llvm/lib/Support/IntEqClasses.cpp70
-rw-r--r--contrib/llvm/lib/Support/IntervalMap.cpp161
-rw-r--r--contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp14
-rw-r--r--contrib/llvm/lib/Support/IsInf.cpp49
-rw-r--r--contrib/llvm/lib/Support/IsNAN.cpp33
-rw-r--r--contrib/llvm/lib/Support/Locale.cpp10
-rw-r--r--contrib/llvm/lib/Support/LocaleGeneric.inc17
-rw-r--r--contrib/llvm/lib/Support/LocaleWindows.inc15
-rw-r--r--contrib/llvm/lib/Support/LocaleXlocale.inc61
-rw-r--r--contrib/llvm/lib/Support/LockFileManager.cpp226
-rw-r--r--contrib/llvm/lib/Support/ManagedStatic.cpp81
-rw-r--r--contrib/llvm/lib/Support/Memory.cpp25
-rw-r--r--contrib/llvm/lib/Support/MemoryBuffer.cpp426
-rw-r--r--contrib/llvm/lib/Support/MemoryObject.cpp37
-rw-r--r--contrib/llvm/lib/Support/Mutex.cpp129
-rw-r--r--contrib/llvm/lib/Support/Path.cpp295
-rw-r--r--contrib/llvm/lib/Support/PathV2.cpp946
-rw-r--r--contrib/llvm/lib/Support/PluginLoader.cpp47
-rw-r--r--contrib/llvm/lib/Support/PrettyStackTrace.cpp137
-rw-r--r--contrib/llvm/lib/Support/Process.cpp89
-rw-r--r--contrib/llvm/lib/Support/Program.cpp60
-rw-r--r--contrib/llvm/lib/Support/RWMutex.cpp125
-rw-r--r--contrib/llvm/lib/Support/Regex.cpp170
-rw-r--r--contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp58
-rw-r--r--contrib/llvm/lib/Support/Signals.cpp34
-rw-r--r--contrib/llvm/lib/Support/SmallPtrSet.cpp273
-rw-r--r--contrib/llvm/lib/Support/SmallVector.cpp40
-rw-r--r--contrib/llvm/lib/Support/SourceMgr.cpp489
-rw-r--r--contrib/llvm/lib/Support/Statistic.cpp170
-rw-r--r--contrib/llvm/lib/Support/StreamableMemoryObject.cpp146
-rw-r--r--contrib/llvm/lib/Support/StringExtras.cpp59
-rw-r--r--contrib/llvm/lib/Support/StringMap.cpp238
-rw-r--r--contrib/llvm/lib/Support/StringPool.cpp35
-rw-r--r--contrib/llvm/lib/Support/StringRef.cpp467
-rw-r--r--contrib/llvm/lib/Support/SystemUtils.cpp55
-rw-r--r--contrib/llvm/lib/Support/TargetRegistry.cpp163
-rw-r--r--contrib/llvm/lib/Support/ThreadLocal.cpp92
-rw-r--r--contrib/llvm/lib/Support/Threading.cpp147
-rw-r--r--contrib/llvm/lib/Support/TimeValue.cpp62
-rw-r--r--contrib/llvm/lib/Support/Timer.cpp389
-rw-r--r--contrib/llvm/lib/Support/ToolOutputFile.cpp43
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp785
-rw-r--r--contrib/llvm/lib/Support/Twine.cpp171
-rw-r--r--contrib/llvm/lib/Support/Unix/Host.inc69
-rw-r--r--contrib/llvm/lib/Support/Unix/Memory.inc353
-rw-r--r--contrib/llvm/lib/Support/Unix/Mutex.inc43
-rw-r--r--contrib/llvm/lib/Support/Unix/Path.inc900
-rw-r--r--contrib/llvm/lib/Support/Unix/PathV2.inc695
-rw-r--r--contrib/llvm/lib/Support/Unix/Process.inc343
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc412
-rw-r--r--contrib/llvm/lib/Support/Unix/README.txt16
-rw-r--r--contrib/llvm/lib/Support/Unix/RWMutex.inc43
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc377
-rw-r--r--contrib/llvm/lib/Support/Unix/ThreadLocal.inc26
-rw-r--r--contrib/llvm/lib/Support/Unix/TimeValue.inc57
-rw-r--r--contrib/llvm/lib/Support/Unix/Unix.h81
-rw-r--r--contrib/llvm/lib/Support/Unix/Watchdog.inc32
-rw-r--r--contrib/llvm/lib/Support/Unix/system_error.inc34
-rw-r--r--contrib/llvm/lib/Support/Valgrind.cpp67
-rw-r--r--contrib/llvm/lib/Support/Watchdog.cpp23
-rw-r--r--contrib/llvm/lib/Support/Windows/DynamicLibrary.inc163
-rw-r--r--contrib/llvm/lib/Support/Windows/Host.inc22
-rw-r--r--contrib/llvm/lib/Support/Windows/Memory.inc235
-rw-r--r--contrib/llvm/lib/Support/Windows/Mutex.inc58
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc926
-rw-r--r--contrib/llvm/lib/Support/Windows/PathV2.inc1022
-rw-r--r--contrib/llvm/lib/Support/Windows/Process.inc278
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc399
-rw-r--r--contrib/llvm/lib/Support/Windows/RWMutex.inc134
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc492
-rw-r--r--contrib/llvm/lib/Support/Windows/ThreadLocal.inc53
-rw-r--r--contrib/llvm/lib/Support/Windows/TimeValue.inc51
-rw-r--r--contrib/llvm/lib/Support/Windows/Watchdog.inc24
-rw-r--r--contrib/llvm/lib/Support/Windows/Windows.h150
-rw-r--r--contrib/llvm/lib/Support/Windows/explicit_symbols.inc66
-rw-r--r--contrib/llvm/lib/Support/Windows/system_error.inc142
-rw-r--r--contrib/llvm/lib/Support/YAMLParser.cpp2147
-rw-r--r--contrib/llvm/lib/Support/YAMLTraits.cpp827
-rw-r--r--contrib/llvm/lib/Support/circular_raw_ostream.cpp45
-rw-r--r--contrib/llvm/lib/Support/raw_os_ostream.cpp30
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp789
-rw-r--r--contrib/llvm/lib/Support/regcclass.h70
-rw-r--r--contrib/llvm/lib/Support/regcname.h139
-rw-r--r--contrib/llvm/lib/Support/regcomp.c1553
-rw-r--r--contrib/llvm/lib/Support/regengine.inc1034
-rw-r--r--contrib/llvm/lib/Support/regerror.c135
-rw-r--r--contrib/llvm/lib/Support/regex2.h157
-rw-r--r--contrib/llvm/lib/Support/regex_impl.h108
-rw-r--r--contrib/llvm/lib/Support/regexec.c162
-rw-r--r--contrib/llvm/lib/Support/regfree.c72
-rw-r--r--contrib/llvm/lib/Support/regstrlcpy.c52
-rw-r--r--contrib/llvm/lib/Support/regutils.h53
-rw-r--r--contrib/llvm/lib/Support/system_error.cpp130
-rw-r--r--contrib/llvm/lib/TableGen/Error.cpp75
-rw-r--r--contrib/llvm/lib/TableGen/Main.cpp130
-rw-r--r--contrib/llvm/lib/TableGen/Record.cpp2057
-rw-r--r--contrib/llvm/lib/TableGen/StringMatcher.cpp149
-rw-r--r--contrib/llvm/lib/TableGen/TGLexer.cpp486
-rw-r--r--contrib/llvm/lib/TableGen/TGLexer.h132
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.cpp2591
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.h191
-rw-r--r--contrib/llvm/lib/TableGen/TableGenBackend.cpp56
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.h42
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td70
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp347
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h80
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp600
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td196
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp633
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h108
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp415
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2975
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h247
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td961
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp822
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h112
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td5099
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp140
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h149
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp171
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h76
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td203
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Schedule.td10
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h32
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp43
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h54
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp81
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h69
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h31
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2197
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp803
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp408
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h172
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp585
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp292
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp160
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h113
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp502
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp178
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h167
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp194
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h65
-rw-r--r--contrib/llvm/lib/Target/AArch64/README.txt2
-rw-r--r--contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp1103
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h1068
-rw-r--r--contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h56
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td320
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp1945
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h132
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp4137
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h419
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp740
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h178
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h131
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h159
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td206
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp1916
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2061
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp306
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h226
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1249
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp2973
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp1430
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h82
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp98
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h56
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp3603
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp10445
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h578
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td2094
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp148
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h46
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td5233
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td6806
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1417
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td4327
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1479
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.h183
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp2062
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp125
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h275
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td403
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRelocations.h62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td1075
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td2496
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td1144
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td300
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp200
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h68
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp241
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h315
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp225
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h146
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp53
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp458
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp7874
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp4553
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp1449
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h163
-rwxr-xr-xcontrib/llvm/lib/Target/ARM/LICENSE.TXT47
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h668
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp688
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h421
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp289
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp418
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h119
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp60
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h35
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp1531
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp72
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h76
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp298
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h77
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp497
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h112
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp399
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp407
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h56
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp104
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h61
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp679
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h65
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp274
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp572
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h75
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h42
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp1037
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp2114
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h46
-rw-r--r--contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp28
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h79
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td177
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp314
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h165
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp236
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp204
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h189
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp180
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp183
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp348
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h55
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp1554
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1663
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp1687
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h173
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td379
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td66
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp2576
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h207
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td2765
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td137
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td3531
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td626
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td3503
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td39
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td50
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td369
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td395
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp95
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h75
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp692
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h244
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp645
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOperands.td858
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp323
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp317
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h91
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td167
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp83
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td69
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td74
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td121
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp46
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h40
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp216
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h80
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp180
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h83
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp94
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h40
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp3086
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h141
-rw-r--r--contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp210
-rw-r--r--contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h87
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h183
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp175
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h100
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp97
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h39
-rw-r--r--contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp572
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp719
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h49
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp71
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.h32
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.td73
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp326
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td24
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp254
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp488
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h56
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp277
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp1154
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h179
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td219
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td229
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td228
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp297
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h240
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td1052
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp112
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h33
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td131
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp167
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h47
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.cpp14
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h169
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp145
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h71
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td148
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h47
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td50
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td236
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td267
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp56
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h75
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp81
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h80
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp90
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h40
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp171
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h237
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp77
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp222
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp141
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h56
-rw-r--r--contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp114
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h44
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp31
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp94
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h36
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.h47
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td66
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp167
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp180
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td40
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp297
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h59
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp492
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp1247
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h178
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp326
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h91
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td1211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp153
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h47
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h54
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp162
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h55
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td81
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp34
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h42
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp71
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h69
-rw-r--r--contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp237
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp1775
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp556
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp211
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h103
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp279
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h153
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp81
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h28
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp285
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp89
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h43
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h139
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp47
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp344
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp215
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h72
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp80
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.h31
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h34
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td118
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp180
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h52
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrFormats.td626
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp417
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h124
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td1791
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp151
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h50
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td392
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp153
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h63
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp597
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h90
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td225
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp363
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCondMov.td253
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp85
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td310
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td1271
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp721
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp134
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.h47
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp154
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp3596
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h465
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td529
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td665
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp284
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.h142
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td1328
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp285
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsJITInfo.h71
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp455
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp166
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.h44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp75
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h99
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp219
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h82
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td406
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRelocations.h41
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp468
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h49
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp473
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp442
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h62
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp410
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h96
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp134
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td63
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp74
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h164
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp130
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h102
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp118
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp31
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h86
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp63
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp88
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h36
-rw-r--r--contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h48
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h141
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.td62
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp46
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h49
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp2117
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h298
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp78
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h42
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp1733
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h106
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp1693
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h164
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td43
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp273
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h78
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td2877
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td1736
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp205
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h47
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h16
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp131
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h79
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td64
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h47
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp73
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h41
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp61
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h96
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp121
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h118
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h102
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp504
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h92
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXVector.td1481
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp90
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp177
-rw-r--r--contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/NVPTX/cl_common_defines.h122
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp294
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h69
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp195
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp277
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h54
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp239
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp160
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h68
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h90
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td240
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp1126
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp195
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp775
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td144
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp271
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp1329
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h293
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp245
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h91
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp1565
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp7554
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h632
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td968
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td828
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td1003
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp731
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h157
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td1717
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp471
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h49
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp194
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h162
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp618
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h90
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td232
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td519
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td663
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td766
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td265
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td309
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td81
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td88
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td109
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp159
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h200
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp137
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h100
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp240
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.h51
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.td41
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp145
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h44
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td42
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUConvertToISA.cpp62
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp122
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h44
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp414
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h140
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUIndirectAddressing.cpp343
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp267
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h206
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td82
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstructions.td266
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td60
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp83
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h34
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp22
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h29
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp75
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h66
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td25
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp896
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp87
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h65
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp164
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h70
-rw-r--r--contrib/llvm/lib/Target/R600/AMDIL.h121
-rw-r--r--contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp115
-rw-r--r--contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h72
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILBase.td85
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp3051
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDevice.cpp132
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDevice.h117
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp94
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h88
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDevices.h19
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.cpp169
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.h93
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp643
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp647
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILInstrInfo.td207
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.cpp79
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.h49
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILIntrinsics.td232
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp65
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILNIDevice.h57
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp1215
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILRegisterInfo.td107
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp48
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILSIDevice.h39
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp172
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h54
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp90
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h40
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp113
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h55
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp585
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp201
-rw-r--r--contrib/llvm/lib/Target/R600/Processors.td30
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp268
-rw-r--r--contrib/llvm/lib/Target/R600/R600Defines.h97
-rw-r--r--contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp255
-rw-r--r--contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp297
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.cpp1106
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.h74
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp841
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.h204
-rw-r--r--contrib/llvm/lib/Target/R600/R600Instructions.td2267
-rw-r--r--contrib/llvm/lib/Target/R600/R600Intrinsics.td31
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h32
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp427
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineScheduler.h120
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp99
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.h55
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.td209
-rw-r--r--contrib/llvm/lib/Target/R600/R600Schedule.td36
-rw-r--r--contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp329
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.cpp670
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.h58
-rw-r--r--contrib/llvm/lib/Target/R600/SIInsertWaits.cpp358
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrFormats.td426
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.cpp264
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.h97
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td356
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td1607
-rw-r--r--contrib/llvm/lib/Target/R600/SIIntrinsics.td42
-rw-r--r--contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp501
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h33
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp53
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.h50
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.td182
-rw-r--r--contrib/llvm/lib/Target/R600/SISchedule.td15
-rw-r--r--contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp320
-rw-r--r--contrib/llvm/lib/Target/Sparc/FPMover.cpp141
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp81
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h39
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.h108
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.td72
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp261
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td56
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp96
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h44
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp211
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp1360
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h113
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td285
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td151
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp357
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h107
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td822
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h48
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp110
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h59
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td170
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp46
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h59
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp98
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h87
-rw-r--r--contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/Target.cpp116
-rw-r--r--contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/TargetJITInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/TargetLibraryInfo.cpp645
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp319
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp190
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp197
-rw-r--r--contrib/llvm/lib/Target/TargetSubtargetInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp2221
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp814
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h131
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c1676
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h588
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h398
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp218
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h87
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp511
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h25
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp208
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h96
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp476
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h621
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp227
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h33
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp158
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h58
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp1233
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp442
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h114
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp586
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp65
-rw-r--r--contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp217
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h71
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h75
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td343
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp755
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h79
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td537
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp1499
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp2311
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp1768
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp1837
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h76
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2728
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp18512
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h950
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td103
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td1361
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h183
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td111
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td1839
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td287
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td186
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td368
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td692
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td658
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td432
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp4898
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h419
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td2197
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td624
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td8328
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSVM.td62
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td978
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td522
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrTSX.td39
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVMX.td66
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td311
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.cpp581
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.h81
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp794
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h145
-rw-r--r--contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp212
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp691
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h143
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td423
-rw-r--r--contrib/llvm/lib/Target/X86/X86Relocations.h52
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedHaswell.td126
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td122
-rw-r--r--contrib/llvm/lib/Target/X86/X86Schedule.td573
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleAtom.td530
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp270
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h56
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp495
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h379
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp227
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h137
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp49
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp495
-rw-r--r--contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp293
-rw-r--r--contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp800
-rw-r--r--contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp97
-rw-r--r--contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h44
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp32
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp104
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.h32
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.td53
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp342
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreCallingConv.td36
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp423
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h58
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp296
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp1644
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h199
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td277
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp406
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h93
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td1267
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp117
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h42
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h69
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp259
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h70
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td59
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp30
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.h43
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp65
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h64
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp65
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp897
-rw-r--r--contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp229
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp1078
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp135
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1345
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp211
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp3256
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp279
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp110
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp112
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp613
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp188
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp304
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp870
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp183
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp399
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp260
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp73
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp414
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h389
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp1497
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp2355
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp1405
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp1802
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp3167
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp816
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp1117
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp904
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp945
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp810
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp1281
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp682
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h106
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2508
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp1453
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp125
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp212
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp855
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h111
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp1985
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp225
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp1424
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp169
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h36
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp585
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h79
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp48
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h395
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp175
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp162
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h74
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp541
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp128
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp3026
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp252
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp177
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h80
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp97
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp2012
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp98
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp321
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp134
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp900
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp628
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp2602
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp311
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1845
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp1618
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp895
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp250
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp1138
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp175
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp508
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp4835
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp238
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp1289
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp142
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp1020
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp2007
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp133
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp1936
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp3765
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp189
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp2611
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp247
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp270
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp648
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp728
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp409
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp576
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp780
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp148
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp916
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp524
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp292
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp1001
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp800
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp457
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp374
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp174
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp582
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp305
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp145
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp530
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp4072
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp393
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp1949
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp215
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp3183
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3504
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp43
1444 files changed, 761356 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 000000000000..210b80ab63ef
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,557 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified. This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation. Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->alias(LocA, LocB);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->pointsToConstantMemory(Loc, OrLocal);
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->copyValue(From, To);
+}
+
+void AliasAnalysis::addEscapingUse(Use &U) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->addEscapingUse(U);
+}
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+
+ ModRefResult Mask = ModRef;
+ if (onlyReadsMemory(MRB))
+ Mask = Ref;
+
+ if (onlyAccessesArgPointees(MRB)) {
+ bool doesAlias = false;
+ if (doesAccessArgPointees(MRB)) {
+ MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ const Value *Arg = *AI;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CSLoc(Arg, UnknownSize, CSTag);
+ if (!isNoAlias(CSLoc, Loc)) {
+ doesAlias = true;
+ break;
+ }
+ }
+ }
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ // If Loc is a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && pointsToConstantMemory(Loc))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+ return NoModRef;
+
+ AliasAnalysis::ModRefResult Mask = ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (onlyReadsMemory(CS1B))
+ Mask = ModRefResult(Mask & Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (onlyAccessesArgPointees(CS2B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS2B)) {
+ MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS2Loc(Arg, UnknownSize, CS2Tag);
+ R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
+ if (R == Mask)
+ break;
+ }
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (onlyAccessesArgPointees(CS1B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS1B)) {
+ MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS1Loc(Arg, UnknownSize, CS1Tag);
+ if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
+ R = Mask;
+ break;
+ }
+ }
+ }
+ if (R == NoModRef)
+ return R;
+ }
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // Call back into the alias analysis with the other form of getModRefBehavior
+ // to see if it can give a better response.
+ if (const Function *F = CS.getCalledFunction())
+ Min = getModRefBehavior(F);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Min;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any result we've managed to compute.
+ return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getModRefBehavior(F);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ return Location(LI->getPointerOperand(),
+ getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ return Location(SI->getPointerOperand(),
+ getTypeStoreSize(SI->getValueOperand()->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+ return Location(VI->getPointerOperand(),
+ UnknownSize,
+ VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
+ return Location(CXI->getPointerOperand(),
+ getTypeStoreSize(CXI->getCompareOperand()->getType()),
+ CXI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
+ return Location(RMWI->getPointerOperand(),
+ getTypeStoreSize(RMWI->getValOperand()->getType()),
+ RMWI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+ // Be conservative in the face of volatile/atomic.
+ if (!L->isUnordered())
+ return ModRef;
+
+ // If the load address doesn't alias the given address, it doesn't read
+ // or write the specified memory.
+ if (!alias(getLocation(L), Loc))
+ return NoModRef;
+
+ // Otherwise, a load just reads.
+ return Ref;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+ // Be conservative in the face of volatile/atomic.
+ if (!S->isUnordered())
+ return ModRef;
+
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
+ if (!alias(getLocation(S), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this store.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a store just writes.
+ return Mod;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(getLocation(V), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this va_arg.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a va_arg reads and writes.
+ return ModRef;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
+ // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
+ if (CX->getOrdering() > Monotonic)
+ return ModRef;
+
+ // If the cmpxchg address does not alias the location, it does not access it.
+ if (!alias(getLocation(CX), Loc))
+ return NoModRef;
+
+ return ModRef;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
+ // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
+ if (RMW->getOrdering() > Monotonic)
+ return ModRef;
+
+ // If the atomicrmw address does not alias the location, it does not access it.
+ if (!alias(getLocation(RMW), Loc))
+ return NoModRef;
+
+ return ModRef;
+}
+
+namespace {
+ // Conservatively return true. Return false, if there is a single path
+ // starting from "From" and the path does not reach "To".
+ static bool hasPath(const BasicBlock *From, const BasicBlock *To) {
+ const unsigned MaxCheck = 5;
+ const BasicBlock *Current = From;
+ for (unsigned I = 0; I < MaxCheck; I++) {
+ unsigned NumSuccs = Current->getTerminator()->getNumSuccessors();
+ if (NumSuccs > 1)
+ return true;
+ if (NumSuccs == 0)
+ return false;
+ Current = Current->getTerminator()->getSuccessor(0);
+ if (Current == To)
+ return true;
+ }
+ return true;
+ }
+
+ /// Only find pointer captures which happen before the given instruction. Uses
+ /// the dominator tree to determine whether one instruction is before another.
+ /// Only support the case where the Value is defined in the same basic block
+ /// as the given instruction and the use.
+ struct CapturesBefore : public CaptureTracker {
+ CapturesBefore(const Instruction *I, DominatorTree *DT)
+ : BeforeHere(I), DT(DT), Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ // We explore this usage only if the usage can reach "BeforeHere".
+ // If use is not reachable from entry, there is no need to explore.
+ if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+ return false;
+ // If the value is defined in the same basic block as use and BeforeHere,
+ // there is no need to explore the use if BeforeHere dominates use.
+ // Check whether there is a path from I to BeforeHere.
+ if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+ !hasPath(BB, BeforeHere->getParent()))
+ return false;
+ return true;
+ }
+
+ bool captured(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ // Same logic as in shouldExplore.
+ if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+ return false;
+ if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+ !hasPath(BB, BeforeHere->getParent()))
+ return false;
+ Captured = true;
+ return true;
+ }
+
+ const Instruction *BeforeHere;
+ DominatorTree *DT;
+
+ bool Captured;
+ };
+}
+
+// FIXME: this is really just shoring-up a deficiency in alias analysis.
+// BasicAA isn't willing to spend linear time determining whether an alloca
+// was captured before or after this particular call, while we are. However,
+// with a smarter AA in place, this test is just wasting compile time.
+AliasAnalysis::ModRefResult
+AliasAnalysis::callCapturesBefore(const Instruction *I,
+ const AliasAnalysis::Location &MemLoc,
+ DominatorTree *DT) {
+ if (!DT || !TD) return AliasAnalysis::ModRef;
+
+ const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD);
+ if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
+ isa<Constant>(Object))
+ return AliasAnalysis::ModRef;
+
+ ImmutableCallSite CS(I);
+ if (!CS.getInstruction() || CS.getInstruction() == Object)
+ return AliasAnalysis::ModRef;
+
+ CapturesBefore CB(I, DT);
+ llvm::PointerMayBeCaptured(Object, &CB);
+ if (CB.Captured)
+ return AliasAnalysis::ModRef;
+
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
+ if (!(*CI)->getType()->isPointerTy() ||
+ (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!isNoAlias(AliasAnalysis::Location(*CI),
+ AliasAnalysis::Location(Object))) {
+ return AliasAnalysis::ModRef;
+ }
+ }
+ return AliasAnalysis::NoModRef;
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+ TD = P->getAnalysisIfAvailable<DataLayout>();
+ TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
+ AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>(); // All AA's chain
+}
+
+/// getTypeStoreSize - Return the DataLayout store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
+ return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+ const Location &Loc) {
+ return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr. The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE. I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+ const Instruction &I2,
+ const Location &Loc) {
+ assert(I1.getParent() == I2.getParent() &&
+ "Instructions not in same basic block!");
+ BasicBlock::const_iterator I = &I1;
+ BasicBlock::const_iterator E = &I2;
+ ++E; // Convert from inclusive to exclusive range.
+
+ for (; I != E; ++I) // Check every instruction in range
+ if (getModRefInfo(I, Loc) & Mod)
+ return true;
+ return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return ImmutableCallSite(cast<Instruction>(V))
+ .paramHasAttr(0, Attribute::NoAlias);
+ return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object. This returns true for:
+/// Global Variables and Functions (but not Global Aliases)
+/// Allocas and Mallocs
+/// ByVal and NoAlias Arguments
+/// NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+ return true;
+ if (isNoAliasCall(V))
+ return true;
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasNoAliasAttr() || A->hasByValAttr();
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 000000000000..9f4a47c77e03
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,171 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+ class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
+ unsigned No, May, Partial, Must;
+ unsigned NoMR, JustRef, JustMod, MR;
+ Module *M;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasAnalysisCounter() : ModulePass(ID) {
+ initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+ No = May = Partial = Must = 0;
+ NoMR = JustRef = JustMod = MR = 0;
+ }
+
+ void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+ errs() << " " << Val << " " << Desc << " responses ("
+ << Val*100/Sum << "%)\n";
+ }
+ ~AliasAnalysisCounter() {
+ unsigned AASum = No+May+Partial+Must;
+ unsigned MRSum = NoMR+JustRef+JustMod+MR;
+ if (AASum + MRSum) { // Print a report if any counted queries occurred...
+ errs() << "\n===== Alias Analysis Counter Report =====\n"
+ << " Analysis counted:\n"
+ << " " << AASum << " Total Alias Queries Performed\n";
+ if (AASum) {
+ printLine("no alias", No, AASum);
+ printLine("may alias", May, AASum);
+ printLine("partial alias", Partial, AASum);
+ printLine("must alias", Must, AASum);
+ errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+ << May*100/AASum << "%/"
+ << Partial*100/AASum << "%/"
+ << Must*100/AASum<<"%\n\n";
+ }
+
+ errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
+ if (MRSum) {
+ printLine("no mod/ref", NoMR, MRSum);
+ printLine("ref", JustRef, MRSum);
+ printLine("mod", JustMod, MRSum);
+ printLine("mod/ref", MR, MRSum);
+ errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+ << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+ << "%/" << MR*100/MRSum <<"%\n\n";
+ }
+ }
+ }
+
+ bool runOnModule(Module &M) {
+ this->M = &M;
+ InitializeAliasAnalysis(this);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ // FIXME: We could count these too...
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // Forwarding functions: just delegate to a real AA implementation, counting
+ // the number of responses...
+ AliasResult alias(const Location &LocA, const Location &LocB);
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+ };
+}
+
+char AliasAnalysisCounter::ID = 0;
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+ "Count Alias Analysis Query Responses", false, true, false)
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+ return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+ AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
+
+ const char *AliasString = 0;
+ switch (R) {
+ case NoAlias: No++; AliasString = "No alias"; break;
+ case MayAlias: May++; AliasString = "May alias"; break;
+ case PartialAlias: Partial++; AliasString = "Partial alias"; break;
+ case MustAlias: Must++; AliasString = "Must alias"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+ errs() << AliasString << ":\t";
+ errs() << "[" << LocA.Size << "B] ";
+ WriteAsOperand(errs(), LocA.Ptr, true, M);
+ errs() << ", ";
+ errs() << "[" << LocB.Size << "B] ";
+ WriteAsOperand(errs(), LocB.Ptr, true, M);
+ errs() << "\n";
+ }
+
+ return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
+
+ const char *MRString = 0;
+ switch (R) {
+ case NoModRef: NoMR++; MRString = "NoModRef"; break;
+ case Ref: JustRef++; MRString = "JustRef"; break;
+ case Mod: JustMod++; MRString = "JustMod"; break;
+ case ModRef: MR++; MRString = "ModRef"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == ModRef)) {
+ errs() << MRString << ": Ptr: ";
+ errs() << "[" << Loc.Size << "B] ";
+ WriteAsOperand(errs(), Loc.Ptr, true, M);
+ errs() << "\t<->" << *CS.getInstruction() << '\n';
+ }
+ return R;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 000000000000..a571463dfe12
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,372 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+
+namespace {
+ class AAEval : public FunctionPass {
+ unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
+ unsigned NoModRef, Mod, Ref, ModRef;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AAEval() : FunctionPass(ID) {
+ initializeAAEvalPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) {
+ NoAlias = MayAlias = PartialAlias = MustAlias = 0;
+ NoModRef = Mod = Ref = ModRef = 0;
+
+ if (PrintAll) {
+ PrintNoAlias = PrintMayAlias = true;
+ PrintPartialAlias = PrintMustAlias = true;
+ PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+ }
+ return false;
+ }
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+ const Value *V2, const Module *M) {
+ if (P) {
+ std::string o1, o2;
+ {
+ raw_string_ostream os1(o1), os2(o2);
+ WriteAsOperand(os1, V1, true, M);
+ WriteAsOperand(os2, V2, true, M);
+ }
+
+ if (o2 < o1)
+ std::swap(o1, o2);
+ errs() << " " << Msg << ":\t"
+ << o1 << ", "
+ << o2 << "\n";
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": Ptr: ";
+ WriteAsOperand(errs(), Ptr, true, M);
+ errs() << "\t<->" << *I << '\n';
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *CSA.getInstruction()
+ << " <-> " << *CSB.getInstruction() << '\n';
+ }
+}
+
+static inline void
+PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
+ const Value *V2, const Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *V1
+ << " <-> " << *V2 << '\n';
+ }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+ return V->getType()->isPointerTy()
+ && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ SetVector<Value *> Pointers;
+ SetVector<CallSite> CallSites;
+ SetVector<Value *> Loads;
+ SetVector<Value *> Stores;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ if (I->getType()->isPointerTy()) // Add all pointer arguments.
+ Pointers.insert(I);
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (I->getType()->isPointerTy()) // Add all pointer instructions.
+ Pointers.insert(&*I);
+ if (EvalTBAA && isa<LoadInst>(&*I))
+ Loads.insert(&*I);
+ if (EvalTBAA && isa<StoreInst>(&*I))
+ Stores.insert(&*I);
+ Instruction &Inst = *I;
+ if (CallSite CS = cast<Value>(&Inst)) {
+ Value *Callee = CS.getCalledValue();
+ // Skip actual functions for direct function calls.
+ if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+ Pointers.insert(Callee);
+ // Consider formals.
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (isInterestingPointer(*AI))
+ Pointers.insert(*AI);
+ CallSites.insert(CS);
+ } else {
+ // Consider all operands.
+ for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+ OI != OE; ++OI)
+ if (isInterestingPointer(*OI))
+ Pointers.insert(*OI);
+ }
+ }
+
+ if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+ PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+ errs() << "Function: " << F.getName() << ": " << Pointers.size()
+ << " pointers, " << CallSites.size() << " call sites\n";
+
+ // iterate over the worklist, and run the full (n^2)/2 disambiguations
+ for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+ I1 != E; ++I1) {
+ uint64_t I1Size = AliasAnalysis::UnknownSize;
+ Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+ if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+
+ for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+ uint64_t I2Size = AliasAnalysis::UnknownSize;
+ Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+
+ switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+ case AliasAnalysis::NoAlias:
+ PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+ ++MustAlias; break;
+ }
+ }
+ }
+
+ if (EvalTBAA) {
+ // iterate over all pairs of load, store
+ for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
+ I1 != E; ++I1) {
+ for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
+ I2 != E2; ++I2) {
+ switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
+ AA.getLocation(cast<StoreInst>(*I2)))) {
+ case AliasAnalysis::NoAlias:
+ PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+ F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+ F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+ F.getParent());
+ ++MustAlias; break;
+ }
+ }
+ }
+
+ // iterate over all pairs of store, store
+ for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
+ I1 != E; ++I1) {
+ for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
+ switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
+ AA.getLocation(cast<StoreInst>(*I2)))) {
+ case AliasAnalysis::NoAlias:
+ PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+ F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+ F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+ F.getParent());
+ ++MustAlias; break;
+ }
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls and values
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ Instruction *I = C->getInstruction();
+
+ for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+ V != Ve; ++V) {
+ uint64_t Size = AliasAnalysis::UnknownSize;
+ Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+ if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+
+ switch (AA.getModRefInfo(*C, *V, Size)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ if (D == C)
+ continue;
+ switch (AA.getModRefInfo(*C, *D)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+ errs() << "(" << Num*100ULL/Sum << "."
+ << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+ unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+ errs() << "===== Alias Analysis Evaluator Report =====\n";
+ if (AliasSum == 0) {
+ errs() << " Alias Analysis Evaluator Summary: No pointers!\n";
+ } else {
+ errs() << " " << AliasSum << " Total Alias Queries Performed\n";
+ errs() << " " << NoAlias << " no alias responses ";
+ PrintPercent(NoAlias, AliasSum);
+ errs() << " " << MayAlias << " may alias responses ";
+ PrintPercent(MayAlias, AliasSum);
+ errs() << " " << PartialAlias << " partial alias responses ";
+ PrintPercent(PartialAlias, AliasSum);
+ errs() << " " << MustAlias << " must alias responses ";
+ PrintPercent(MustAlias, AliasSum);
+ errs() << " Alias Analysis Evaluator Pointer Alias Summary: "
+ << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/"
+ << PartialAlias*100/AliasSum << "%/"
+ << MustAlias*100/AliasSum << "%\n";
+ }
+
+ // Display the summary for mod/ref analysis
+ unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+ if (ModRefSum == 0) {
+ errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+ } else {
+ errs() << " " << ModRefSum << " Total ModRef Queries Performed\n";
+ errs() << " " << NoModRef << " no mod/ref responses ";
+ PrintPercent(NoModRef, ModRefSum);
+ errs() << " " << Mod << " mod responses ";
+ PrintPercent(Mod, ModRefSum);
+ errs() << " " << Ref << " ref responses ";
+ PrintPercent(Ref, ModRefSum);
+ errs() << " " << ModRef << " mod & ref responses ";
+ PrintPercent(ModRef, ModRefSum);
+ errs() << " Alias Analysis Evaluator Mod/Ref Summary: "
+ << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/"
+ << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 000000000000..f6178e36f0a9
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,138 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+
+ class AliasDebugger : public ModulePass, public AliasAnalysis {
+
+ //What we do is simple. Keep track of every value the AA could
+ //know about, and verify that queries are one of those.
+ //A query to a value that didn't exist when the AA was created
+ //means someone forgot to update the AA when creating new values
+
+ std::set<const Value*> Vals;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasDebugger() : ModulePass(ID) {
+ initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+
+ for(Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ Vals.insert(&*I);
+ for (User::const_op_iterator OI = I->op_begin(),
+ OE = I->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+
+ for(Module::iterator I = M.begin(),
+ E = M.end(); I != E; ++I){
+ Vals.insert(&*I);
+ if(!I->isDeclaration()) {
+ for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+ AI != AE; ++AI)
+ Vals.insert(&*AI);
+ for (Function::const_iterator FI = I->begin(), FE = I->end();
+ FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ Vals.insert(&*BI);
+ for (User::const_op_iterator OI = BI->op_begin(),
+ OE = BI->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+ }
+
+ }
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB) {
+ assert(Vals.find(LocA.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ assert(Vals.find(LocB.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ return AliasAnalysis::alias(LocA, LocB);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ virtual void deleteValue(Value *V) {
+ assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+ AliasAnalysis::deleteValue(V);
+ }
+ virtual void copyValue(Value *From, Value *To) {
+ Vals.insert(To);
+ AliasAnalysis::copyValue(From, To);
+ }
+
+ };
+}
+
+char AliasDebugger::ID = 0;
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+ "AA use debugger", false, true, false)
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 000000000000..591052671d6e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,655 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+ assert(!AS.Forward && "Alias set is already forwarding!");
+ assert(!Forward && "This set is a forwarding set!!");
+
+ // Update the alias and access types of this set...
+ AccessTy |= AS.AccessTy;
+ AliasTy |= AS.AliasTy;
+ Volatile |= AS.Volatile;
+
+ if (AliasTy == MustAlias) {
+ // Check that these two merged sets really are must aliases. Since both
+ // used to be must-alias sets, we can just check any pointer from each set
+ // for aliasing.
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ PointerRec *L = getSomePointer();
+ PointerRec *R = AS.getSomePointer();
+
+ // If the pointers are not a must-alias pair, this set becomes a may alias.
+ if (AA.alias(AliasAnalysis::Location(L->getValue(),
+ L->getSize(),
+ L->getTBAAInfo()),
+ AliasAnalysis::Location(R->getValue(),
+ R->getSize(),
+ R->getTBAAInfo()))
+ != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ }
+
+ if (UnknownInsts.empty()) { // Merge call sites...
+ if (!AS.UnknownInsts.empty())
+ std::swap(UnknownInsts, AS.UnknownInsts);
+ } else if (!AS.UnknownInsts.empty()) {
+ UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
+ AS.UnknownInsts.clear();
+ }
+
+ AS.Forward = this; // Forward across AS now...
+ addRef(); // AS is now pointing to us...
+
+ // Merge the list of constituent pointers...
+ if (AS.PtrList) {
+ *PtrListEnd = AS.PtrList;
+ AS.PtrList->setPrevInList(PtrListEnd);
+ PtrListEnd = AS.PtrListEnd;
+
+ AS.PtrList = 0;
+ AS.PtrListEnd = &AS.PtrList;
+ assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+ }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+ if (AliasSet *Fwd = AS->Forward) {
+ Fwd->dropRef(*this);
+ AS->Forward = 0;
+ }
+ AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+ assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+ AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+ uint64_t Size, const MDNode *TBAAInfo,
+ bool KnownMustAlias) {
+ assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+ // Check to see if we have to downgrade to _may_ alias.
+ if (isMustAlias() && !KnownMustAlias)
+ if (PointerRec *P = getSomePointer()) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasAnalysis::AliasResult Result =
+ AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+ P->getTBAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ if (Result != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ else // First entry of must alias must have maximum size!
+ P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+ assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+ }
+
+ Entry.setAliasSet(this);
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+
+ // Add it to the end of the list...
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ *PtrListEnd = &Entry;
+ PtrListEnd = Entry.setPrevInList(PtrListEnd);
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ addRef(); // Entry points to alias set.
+}
+
+void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+ UnknownInsts.push_back(I);
+
+ if (!I->mayWriteToMemory()) {
+ AliasTy = MayAlias;
+ AccessTy |= Refs;
+ return;
+ }
+
+ // FIXME: This should use mod/ref information to make this not suck so bad
+ AliasTy = MayAlias;
+ AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo,
+ AliasAnalysis &AA) const {
+ if (AliasTy == MustAlias) {
+ assert(UnknownInsts.empty() && "Illegal must alias set!");
+
+ // If this is a set of MustAliases, only check to see if the pointer aliases
+ // SOME value in the set.
+ PointerRec *SomePtr = getSomePointer();
+ assert(SomePtr && "Empty must-alias set??");
+ return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+ SomePtr->getSize(),
+ SomePtr->getTBAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+ }
+
+ // If this is a may-alias set, we have to check all of the pointers in the set
+ // to be sure it doesn't alias the set...
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ AliasAnalysis::Location(I.getPointer(), I.getSize(),
+ I.getTBAAInfo())))
+ return true;
+
+ // Check the unknown instructions...
+ if (!UnknownInsts.empty()) {
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
+ if (AA.getModRefInfo(UnknownInsts[i],
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ return false;
+}
+
+bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
+ if (!Inst->mayReadOrWriteMemory())
+ return false;
+
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+ CallSite C1 = getUnknownInst(i), C2 = Inst;
+ if (!C1 || !C2 ||
+ AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
+ I.getSize(),
+ I.getTBAAInfo())) !=
+ AliasAnalysis::NoModRef)
+ return true;
+
+ return false;
+}
+
+void AliasSetTracker::clear() {
+ // Delete all the PointerRec entries.
+ for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+ I != E; ++I)
+ I->second->eraseFromList();
+
+ PointerMap.clear();
+
+ // The alias sets should all be clear now.
+ AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into. If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+ uint64_t Size,
+ const MDNode *TBAAInfo) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ }
+
+ return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise. This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+ return true;
+ return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
+ continue;
+
+ if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ else if (!I->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+ const MDNode *TBAAInfo,
+ bool *New) {
+ AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+ // Check to see if the pointer is already known.
+ if (Entry.hasAliasSet()) {
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ // Return the set!
+ return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+ }
+
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+ // Add it to the alias set it aliases.
+ AS->addPointer(*this, Entry, Size, TBAAInfo);
+ return *AS;
+ }
+
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer.
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+ return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ bool NewPtr;
+ addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+ if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+ AliasSet::AccessType ATy = AliasSet::Refs;
+ if (!LI->isUnordered()) ATy = AliasSet::ModRef;
+ bool NewPtr;
+ AliasSet &AS = addPointer(LI->getOperand(0),
+ AA.getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa),
+ ATy, NewPtr);
+ if (LI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+ if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+ AliasSet::AccessType ATy = AliasSet::Mods;
+ if (!SI->isUnordered()) ATy = AliasSet::ModRef;
+ bool NewPtr;
+ Value *Val = SI->getOperand(0);
+ AliasSet &AS = addPointer(SI->getOperand(1),
+ AA.getTypeStoreSize(Val->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa),
+ ATy, NewPtr);
+ if (SI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+ bool NewPtr;
+ addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::ModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::addUnknown(Instruction *Inst) {
+ if (isa<DbgInfoIntrinsic>(Inst))
+ return true; // Ignore DbgInfo Intrinsics.
+ if (!Inst->mayReadOrWriteMemory())
+ return true; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForUnknownInst(Inst);
+ if (AS) {
+ AS->addUnknownInst(Inst, AA);
+ return false;
+ }
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addUnknownInst(Inst, AA);
+ return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+ // Dispatch to one of the other add methods.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return add(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return add(SI);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return add(VAAI);
+ return addUnknown(I);
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+ assert(&AA == &AST.AA &&
+ "Merging AliasSetTracker objects with different Alias Analyses!");
+
+ // Loop over all of the alias sets in AST, adding the pointers contained
+ // therein into the current alias sets. This can cause alias sets to be
+ // merged together in the current AST.
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+ if (I->Forward) continue; // Ignore forwarding alias sets
+
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
+ add(AS.UnknownInsts[i]);
+
+ // Loop over all of the pointers in this alias set.
+ bool X;
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ ASI.getTBAAInfo(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
+ }
+ }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+ // Drop all call sites.
+ AS.UnknownInsts.clear();
+
+ // Clear the alias set.
+ unsigned NumRefs = 0;
+ while (!AS.empty()) {
+ AliasSet::PointerRec *P = AS.PtrList;
+
+ Value *ValToRemove = P->getValue();
+
+ // Unlink and delete entry from the list of values.
+ P->eraseFromList();
+
+ // Remember how many references need to be dropped.
+ ++NumRefs;
+
+ // Finally, remove the entry.
+ PointerMap.erase(ValToRemove);
+ }
+
+ // Stop using the alias set, removing it.
+ AS.RefCount -= NumRefs;
+ if (AS.RefCount == 0)
+ AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+ uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+ uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+ AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa));
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::removeUnknown(Instruction *I) {
+ if (!I->mayReadOrWriteMemory())
+ return false; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForUnknownInst(I);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+ // Dispatch to one of the other remove methods...
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return remove(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return remove(SI);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return remove(VAAI);
+ return removeUnknown(I);
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely. It should be used when an instruction is deleted
+// from the program to update the AST. If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+ // Notify the alias analysis implementation that this value is gone.
+ AA.deleteValue(PtrVal);
+
+ // If this is a call instruction, remove the callsite from the appropriate
+ // AliasSet (if present).
+ if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
+ if (Inst->mayReadOrWriteMemory()) {
+ // Scan all the alias sets to see if this call site is contained.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward) continue;
+
+ I->removeUnknownInst(Inst);
+ }
+ }
+ }
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find_as(PtrVal);
+ if (I == PointerMap.end()) return; // Noop
+
+ // If we found one, remove the pointer from the alias set it is in.
+ AliasSet::PointerRec *PtrValEnt = I->second;
+ AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+ // Unlink and delete from the list of values.
+ PtrValEnt->eraseFromList();
+
+ // Stop using the alias set.
+ AS->dropRef(*this);
+
+ PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value. Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+ // Notify the alias analysis implementation that this value is copied.
+ AA.copyValue(From, To);
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find_as(From);
+ if (I == PointerMap.end())
+ return; // Noop
+ assert(I->second->hasAliasSet() && "Dead entry?");
+
+ AliasSet::PointerRec &Entry = getEntryFor(To);
+ if (Entry.hasAliasSet()) return; // Already in the tracker!
+
+ // Add it to the alias set it aliases...
+ I = PointerMap.find_as(From);
+ AliasSet *AS = I->second->getAliasSet(*this);
+ AS->addPointer(*this, Entry, I->second->getSize(),
+ I->second->getTBAAInfo(),
+ true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+ OS << " AliasSet[" << (const void*)this << ", " << RefCount << "] ";
+ OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+ switch (AccessTy) {
+ case NoModRef: OS << "No access "; break;
+ case Refs : OS << "Ref "; break;
+ case Mods : OS << "Mod "; break;
+ case ModRef : OS << "Mod/Ref "; break;
+ default: llvm_unreachable("Bad value for AccessTy!");
+ }
+ if (isVolatile()) OS << "[volatile] ";
+ if (Forward)
+ OS << " forwarding to " << (void*)Forward;
+
+
+ if (!empty()) {
+ OS << "Pointers: ";
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I != begin()) OS << ", ";
+ WriteAsOperand(OS << "(", I.getPointer());
+ OS << ", " << I.getSize() << ")";
+ }
+ }
+ if (!UnknownInsts.empty()) {
+ OS << "\n " << UnknownInsts.size() << " Unknown instructions: ";
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ WriteAsOperand(OS, UnknownInsts[i]);
+ }
+ }
+ OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+ OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+ << PointerMap.size() << " pointer values.\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ I->print(OS);
+ OS << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+ assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+ AST->deleteValue(getValPtr());
+ // this now dangles!
+}
+
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+ AST->copyValue(getValPtr(), V);
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+ : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+ return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class AliasSetPrinter : public FunctionPass {
+ AliasSetTracker *Tracker;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AliasSetPrinter() : FunctionPass(ID) {
+ initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ Tracker->add(&*I);
+ Tracker->print(errs());
+ delete Tracker;
+ return false;
+ }
+ };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
new file mode 100644
index 000000000000..66e416cd140c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -0,0 +1,107 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+ initializeAliasAnalysisAnalysisGroup(Registry);
+ initializeAliasAnalysisCounterPass(Registry);
+ initializeAAEvalPass(Registry);
+ initializeAliasDebuggerPass(Registry);
+ initializeAliasSetPrinterPass(Registry);
+ initializeNoAAPass(Registry);
+ initializeBasicAliasAnalysisPass(Registry);
+ initializeBlockFrequencyInfoPass(Registry);
+ initializeBranchProbabilityInfoPass(Registry);
+ initializeCostModelAnalysisPass(Registry);
+ initializeCFGViewerPass(Registry);
+ initializeCFGPrinterPass(Registry);
+ initializeCFGOnlyViewerPass(Registry);
+ initializeCFGOnlyPrinterPass(Registry);
+ initializeDependenceAnalysisPass(Registry);
+ initializeDominanceFrontierPass(Registry);
+ initializeDomViewerPass(Registry);
+ initializeDomPrinterPass(Registry);
+ initializeDomOnlyViewerPass(Registry);
+ initializePostDomViewerPass(Registry);
+ initializeDomOnlyPrinterPass(Registry);
+ initializePostDomPrinterPass(Registry);
+ initializePostDomOnlyViewerPass(Registry);
+ initializePostDomOnlyPrinterPass(Registry);
+ initializeIVUsersPass(Registry);
+ initializeInstCountPass(Registry);
+ initializeIntervalPartitionPass(Registry);
+ initializeLazyValueInfoPass(Registry);
+ initializeLibCallAliasAnalysisPass(Registry);
+ initializeLintPass(Registry);
+ initializeLoopInfoPass(Registry);
+ initializeMemDepPrinterPass(Registry);
+ initializeMemoryDependenceAnalysisPass(Registry);
+ initializeModuleDebugInfoPrinterPass(Registry);
+ initializePostDominatorTreePass(Registry);
+ initializeProfileEstimatorPassPass(Registry);
+ initializeNoProfileInfoPass(Registry);
+ initializeNoPathProfileInfoPass(Registry);
+ initializeProfileInfoAnalysisGroup(Registry);
+ initializePathProfileInfoAnalysisGroup(Registry);
+ initializeLoaderPassPass(Registry);
+ initializePathProfileLoaderPassPass(Registry);
+ initializeProfileVerifierPassPass(Registry);
+ initializePathProfileVerifierPass(Registry);
+ initializeProfileMetadataLoaderPassPass(Registry);
+ initializeRegionInfoPass(Registry);
+ initializeRegionViewerPass(Registry);
+ initializeRegionPrinterPass(Registry);
+ initializeRegionOnlyViewerPass(Registry);
+ initializeRegionOnlyPrinterPass(Registry);
+ initializeScalarEvolutionPass(Registry);
+ initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeTargetTransformInfoAnalysisGroup(Registry);
+ initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+ char **OutMessages) {
+ std::string Messages;
+
+ LLVMBool Result = verifyModule(*unwrap(M),
+ static_cast<VerifierFailureAction>(Action),
+ OutMessages? &Messages : 0);
+
+ if (OutMessages)
+ *OutMessages = strdup(Messages.c_str());
+
+ return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+ return verifyFunction(*unwrap<Function>(Fn),
+ static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFGOnly();
+}
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 000000000000..ae6da1af0c4f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1270 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+ // If this is a local allocation, check to see if it escapes.
+ if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ // Set StoreCaptures to True so that we can assume in our callers that the
+ // pointer is not the result of a load instruction. Currently
+ // PointerMayBeCaptured doesn't have any special analysis for the
+ // StoreCaptures=false case; if it did, our callers could be refined to be
+ // more precise.
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+ // If this is an argument that corresponds to a byval or noalias argument,
+ // then it has not escaped before entering the function. Check if it escapes
+ // inside the function.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr() || A->hasNoAliasAttr())
+ // Note even if the argument is marked nocapture we still need to check
+ // for copies made inside the function. The nocapture attribute only
+ // specifies that there are no copies made that outlive the function.
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+ return false;
+}
+
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+ return true;
+
+ // The load case works because isNonEscapingLocalObject considers all
+ // stores to be escapes (it passes true for the StoreCaptures argument
+ // to PointerMayBeCaptured).
+ if (isa<LoadInst>(V))
+ return true;
+
+ return false;
+}
+
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
+ const TargetLibraryInfo &TLI,
+ bool RoundToAlign = false) {
+ uint64_t Size;
+ if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+ return Size;
+ return AliasAnalysis::UnknownSize;
+}
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+ const DataLayout &TD,
+ const TargetLibraryInfo &TLI) {
+ // This function needs to use the aligned object size because we allow
+ // reads a bit past the end given sufficient alignment.
+ uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
+
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
+
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+ const DataLayout &TD, const TargetLibraryInfo &TLI) {
+ uint64_t ObjectSize = getObjectSize(V, TD, TLI);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+ enum ExtensionKind {
+ EK_NotExtended,
+ EK_SignExt,
+ EK_ZeroExt
+ };
+
+ struct VariableGEPIndex {
+ const Value *V;
+ ExtensionKind Extension;
+ int64_t Scale;
+
+ bool operator==(const VariableGEPIndex &Other) const {
+ return V == Other.V && Extension == Other.Extension &&
+ Scale == Other.Scale;
+ }
+
+ bool operator!=(const VariableGEPIndex &Other) const {
+ return !operator==(Other);
+ }
+ };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers. Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends. The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+ ExtensionKind &Extension,
+ const DataLayout &TD, unsigned Depth) {
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+ // Limit our recursion depth.
+ if (Depth == 6) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ switch (BOp->getOpcode()) {
+ default: break;
+ case Instruction::Or:
+ // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
+ // analyze it.
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+ break;
+ // FALL THROUGH.
+ case Instruction::Add:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset += RHSC->getValue();
+ return V;
+ case Instruction::Mul:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset *= RHSC->getValue();
+ Scale *= RHSC->getValue();
+ return V;
+ case Instruction::Shl:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset <<= RHSC->getValue().getLimitedValue();
+ Scale <<= RHSC->getValue().getLimitedValue();
+ return V;
+ }
+ }
+ }
+
+ // Since GEP indices are sign extended anyway, we don't care about the high
+ // bits of a sign or zero extended value - just scales and offsets. The
+ // extensions have to be consistent though.
+ if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+ (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ Value *CastOp = cast<CastInst>(V)->getOperand(0);
+ unsigned OldWidth = Scale.getBitWidth();
+ unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+ Scale = Scale.trunc(SmallWidth);
+ Offset = Offset.trunc(SmallWidth);
+ Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+ TD, Depth+1);
+ Scale = Scale.zext(OldWidth);
+ Offset = Offset.zext(OldWidth);
+
+ return Result;
+ }
+
+ Scale = 1;
+ Offset = 0;
+ return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When DataLayout is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through. When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ const DataLayout *TD) {
+ // Limit recursion depth to limit compile time in crazy cases.
+ unsigned MaxLookup = 6;
+
+ BaseOffs = 0;
+ do {
+ // See if this is a bitcast or GEP.
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (Op == 0) {
+ // The only non-operator case we can handle are GlobalAliases.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden()) {
+ V = GA->getAliasee();
+ continue;
+ }
+ }
+ return V;
+ }
+
+ if (Op->getOpcode() == Instruction::BitCast) {
+ V = Op->getOperand(0);
+ continue;
+ }
+
+ const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+ if (GEPOp == 0) {
+ // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+ // can come up with something. This matches what GetUnderlyingObject does.
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Get a DominatorTree and use it here.
+ if (const Value *Simplified =
+ SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return V;
+
+ // If we are lacking DataLayout information, we can't compute the offets of
+ // elements computed by GEPs. However, we can handle bitcast equivalent
+ // GEPs.
+ if (TD == 0) {
+ if (!GEPOp->hasAllZeroIndices())
+ return V;
+ V = GEPOp->getOperand(0);
+ continue;
+ }
+
+ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+ gep_type_iterator GTI = gep_type_begin(GEPOp);
+ for (User::const_op_iterator I = GEPOp->op_begin()+1,
+ E = GEPOp->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ if (FieldNo == 0) continue;
+
+ BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+ continue;
+ }
+
+ // For an array/pointer, add the element offset, explicitly scaled.
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero()) continue;
+ BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ continue;
+ }
+
+ uint64_t Scale = TD->getTypeAllocSize(*GTI);
+ ExtensionKind Extension = EK_NotExtended;
+
+ // If the integer type is smaller than the pointer size, it is implicitly
+ // sign extended to pointer size.
+ unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+ if (TD->getPointerSizeInBits() > Width)
+ Extension = EK_SignExt;
+
+ // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+ APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+ *TD, 0);
+
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+ BaseOffs += IndexOffset.getSExtValue()*Scale;
+ Scale *= IndexScale.getSExtValue();
+
+
+ // If we already had an occurrence of this index variable, merge this
+ // scale into it. For example, we want to handle:
+ // A[x][x] -> x*16 + x*4 -> x*20
+ // This also ensures that 'x' only appears in the index list once.
+ for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+ if (VarIndices[i].V == Index &&
+ VarIndices[i].Extension == Extension) {
+ Scale += VarIndices[i].Scale;
+ VarIndices.erase(VarIndices.begin()+i);
+ break;
+ }
+ }
+
+ // Make sure that we have a scale that makes sense for this target's
+ // pointer size.
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ Scale <<= ShiftBits;
+ Scale = (int64_t)Scale >> ShiftBits;
+ }
+
+ if (Scale) {
+ VariableGEPIndex Entry = {Index, Extension,
+ static_cast<int64_t>(Scale)};
+ VarIndices.push_back(Entry);
+ }
+ }
+
+ // Analyze the base pointer next.
+ V = GEPOp->getOperand(0);
+ } while (--MaxLookup);
+
+ // If the chain of expressions is too deep, just return early.
+ return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty()) return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin()+j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+namespace {
+ /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+ struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ BasicAliasAnalysis() : ImmutablePass(ID) {
+ initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ virtual AliasResult alias(const Location &LocA,
+ const Location &LocB) {
+ assert(AliasCache.empty() && "AliasCache must be cleared after use!");
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+ LocB.Ptr, LocB.Size, LocB.TBAATag);
+ // AliasCache rarely has more than 1 or 2 elements, always use
+ // shrink_and_clear so it quickly returns to the inline capacity of the
+ // SmallDenseMap if it ever grows larger.
+ // FIXME: This should really be shrink_to_inline_capacity_and_clear().
+ AliasCache.shrink_and_clear();
+ return Alias;
+ }
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// pointsToConstantMemory - Chase pointers until we find a (constant
+ /// global) or not.
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+
+ /// getModRefBehavior - Return the behavior when calling the given
+ /// call site.
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// getModRefBehavior - Return the behavior when calling the given function.
+ /// For use when the call site is not known.
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ // AliasCache - Track alias queries to guard against recursion.
+ typedef std::pair<Location, Location> LocPair;
+ typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
+ AliasCacheTy AliasCache;
+
+ // Visited - Track instructions visited by pointsToConstantMemory.
+ SmallPtrSet<const Value*, 16> Visited;
+
+ // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+ // instruction against another.
+ AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+ // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+ // instruction against another.
+ AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ /// aliasSelect - Disambiguate a Select instruction against another value.
+ AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAATag,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAATag);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
+
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+ return new BasicAliasAnalysis();
+}
+
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Visited.empty() && "Visited must be cleared after use!");
+
+ unsigned MaxLookup = 8;
+ SmallVector<const Value *, 16> Worklist;
+ Worklist.push_back(Loc.Ptr);
+ do {
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+ if (!Visited.insert(V)) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // An alloca instruction defines local memory.
+ if (OrLocal && isa<AllocaInst>(V))
+ continue;
+
+ // A global constant counts as local memory for our purposes.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+ // global to be marked constant in some modules and non-constant in
+ // others. GV may even be a declaration, not a definition.
+ if (!GV->isConstant()) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ continue;
+ }
+
+ // If both select values point to local memory, then so does the select.
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // If all values incoming to a phi node point to local memory, then so does
+ // the phi.
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Don't bother inspecting phi nodes with many operands.
+ if (PN->getNumIncomingValues() > MaxLookup) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Otherwise be conservative.
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ } while (!Worklist.empty() && --MaxLookup);
+
+ Visited.clear();
+ return Worklist.empty();
+}
+
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the callsite knows it only reads memory, don't return worse
+ // than that.
+ if (CS.onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ // If the function declares it doesn't access memory, we can't do better.
+ if (F->doesNotAccessMemory())
+ return DoesNotAccessMemory;
+
+ // For intrinsics, we can check the table.
+ if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the function declares it only reads memory, go with that.
+ if (F->onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // Otherwise be conservative.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+}
+
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object. Since we only look at local properties of this
+/// function, we really can't say much about this query. We do, however, use
+/// simple "address taken" analysis on local objects.
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+ "AliasAnalysis query involving multiple functions!");
+
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+
+ // If this is a tail call and Loc.Ptr points to a stack location, we know that
+ // the tail call cannot access or modify the local stack.
+ // We cannot exclude byval arguments here; these belong to the caller of
+ // the current function not to the current function, and a tail callee
+ // may reference them.
+ if (isa<AllocaInst>(Object))
+ if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (CI->isTailCall())
+ return NoModRef;
+
+ // If the pointer is to a locally allocated object that does not escape,
+ // then the call can not mod/ref the pointer unless the call takes the pointer
+ // as an argument, and itself doesn't capture it.
+ if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+ isNonEscapingLocalObject(Object)) {
+ bool PassedAsArg = false;
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
+ if (!(*CI)->getType()->isPointerTy() ||
+ (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!isNoAlias(Location(*CI), Location(Object))) {
+ PassedAsArg = true;
+ break;
+ }
+ }
+
+ if (!PassedAsArg)
+ return NoModRef;
+ }
+
+ const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ ModRefResult Min = ModRef;
+
+ // Finally, handle specific knowledge of intrinsics.
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != 0)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ uint64_t Len = UnknownSize;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ Value *Src = II->getArgOperand(1);
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ if (isNoAlias(Location(Src, Len), Loc))
+ return NoModRef;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ } else if (isNoAlias(Location(Src, Len), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
+ }
+ break;
+ }
+ case Intrinsic::memset:
+ // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+ // will handle it for the variable length case.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ uint64_t Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ if (isNoAlias(Location(Dest, Len), Loc))
+ return NoModRef;
+ }
+ // We know that memset doesn't load anything.
+ Min = Mod;
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(1),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::invariant_end: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(2),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vld1: {
+ // LLVM's vld1 and vst1 intrinsics currently only support a single
+ // vector register.
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vst1: {
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ }
+
+ // We can bound the aliasing properties of memset_pattern16 just as we can
+ // for memcpy/memset. This is particularly important because the
+ // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+ // whenever possible.
+ else if (TLI.has(LibFunc::memset_pattern16) &&
+ CS.getCalledFunction() &&
+ CS.getCalledFunction()->getName() == "memset_pattern16") {
+ const Function *MS = CS.getCalledFunction();
+ FunctionType *MemsetType = MS->getFunctionType();
+ if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+ isa<PointerType>(MemsetType->getParamType(0)) &&
+ isa<PointerType>(MemsetType->getParamType(1)) &&
+ isa<IntegerType>(MemsetType->getParamType(2))) {
+ uint64_t Len = UnknownSize;
+ if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
+ Len = LenCI->getZExtValue();
+ const Value *Dest = CS.getArgument(0);
+ const Value *Src = CS.getArgument(1);
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ // Always reads 16 bytes of the source.
+ if (isNoAlias(Location(Src, 16), Loc))
+ return NoModRef;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ // Always reads 16 bytes of the source.
+ } else if (isNoAlias(Location(Src, 16), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
+ }
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+}
+
+static bool areVarIndicesEqual(SmallVector<VariableGEPIndex, 4> &Indices1,
+ SmallVector<VariableGEPIndex, 4> &Indices2) {
+ unsigned Size1 = Indices1.size();
+ unsigned Size2 = Indices2.size();
+
+ if (Size1 != Size2)
+ return false;
+
+ for (unsigned I = 0; I != Size1; ++I)
+ if (Indices1[I] != Indices2[I])
+ return false;
+
+ return true;
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer. We know that V1 is a GEP, but we don't know
+/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// UnderlyingV2 is the same for V2.
+///
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1,
+ const Value *UnderlyingV2) {
+ int64_t GEP1BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+ // If we have two gep instructions with must-alias or not-alias'ing base
+ // pointers, figure out if the indexes to the GEP tell us anything about the
+ // derived pointer.
+ if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+ // Do the base pointers alias?
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ UnderlyingV2, UnknownSize, 0);
+
+ // Check for geps of non-aliasing underlying pointers where the offsets are
+ // identical.
+ if ((BaseAlias == MayAlias) && V1Size == V2Size) {
+ // Do the base pointers alias assuming type and size.
+ AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
+ V1TBAAInfo, UnderlyingV2,
+ V2Size, V2TBAAInfo);
+ if (PreciseBaseAlias == NoAlias) {
+ // See if the computed offset from the common pointer tells us about the
+ // relation of the resulting pointer.
+ int64_t GEP2BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+ const Value *GEP2BasePtr =
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+ // DecomposeGEPExpression and GetUnderlyingObject should return the
+ // same result except when DecomposeGEPExpression has no DataLayout.
+ if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+ // Same offsets.
+ if (GEP1BaseOffset == GEP2BaseOffset &&
+ areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices))
+ return NoAlias;
+ GEP1VariableIndices.clear();
+ }
+ }
+
+ // If we get a No or May, then return it immediately, no amount of analysis
+ // will improve this situation.
+ if (BaseAlias != MustAlias) return BaseAlias;
+
+ // Otherwise, we have a MustAlias. Since the base pointers alias each other
+ // exactly, see if the computed offset from the common pointer tells us
+ // about the relation of the resulting pointer.
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ int64_t GEP2BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+ const Value *GEP2BasePtr =
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+
+ // DecomposeGEPExpression and GetUnderlyingObject should return the
+ // same result except when DecomposeGEPExpression has no DataLayout.
+ if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+
+ // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+ // symbolic difference.
+ GEP1BaseOffset -= GEP2BaseOffset;
+ GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+
+ } else {
+ // Check to see if these two pointers are related by the getelementptr
+ // instruction. If one pointer is a GEP with a non-zero index of the other
+ // pointer, we know they cannot alias.
+
+ // If both accesses are unknown size, we can't do anything useful here.
+ if (V1Size == UnknownSize && V2Size == UnknownSize)
+ return MayAlias;
+
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ V2, V2Size, V2TBAAInfo);
+ if (R != MustAlias)
+ // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+ // If V2 is known not to alias GEP base pointer, then the two values
+ // cannot alias per GEP semantics: "A pointer value formed from a
+ // getelementptr instruction is associated with the addresses associated
+ // with the first operand of the getelementptr".
+ return R;
+
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ // DecomposeGEPExpression and GetUnderlyingObject should return the
+ // same result except when DecomposeGEPExpression has no DataLayout.
+ if (GEP1BasePtr != UnderlyingV1) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+ }
+
+ // In the two GEP Case, if there is no difference in the offsets of the
+ // computed pointers, the resultant pointers are a must alias. This
+ // hapens when we have two lexically identical GEP's (for example).
+ //
+ // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+ // must aliases the GEP, the end result is a must alias also.
+ if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+ return MustAlias;
+
+ // If there is a constant difference between the pointers, but the difference
+ // is less than the size of the associated memory object, then we know
+ // that the objects are partially overlapping. If the difference is
+ // greater, we know they do not overlap.
+ if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+ if (GEP1BaseOffset >= 0) {
+ if (V2Size != UnknownSize) {
+ if ((uint64_t)GEP1BaseOffset < V2Size)
+ return PartialAlias;
+ return NoAlias;
+ }
+ } else {
+ if (V1Size != UnknownSize) {
+ if (-(uint64_t)GEP1BaseOffset < V1Size)
+ return PartialAlias;
+ return NoAlias;
+ }
+ }
+ }
+
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales.
+ if (!GEP1VariableIndices.empty()) {
+ uint64_t Modulo = 0;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
+ Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+ Modulo = Modulo ^ (Modulo & (Modulo - 1));
+
+ // We can compute the difference between the two addresses
+ // mod Modulo. Check whether that difference guarantees that the
+ // two locations do not alias.
+ uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
+ if (V1Size != UnknownSize && V2Size != UnknownSize &&
+ ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
+ return NoAlias;
+ }
+
+ // Statically, we can see that the base objects are the same, but the
+ // pointers have dynamic offsets which we can't resolve. And none of our
+ // little tricks above worked.
+ //
+ // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+ // practical effect of this is protecting TBAA in the case of dynamic
+ // indices into arrays of unions or malloc'd memory.
+ return PartialAlias;
+}
+
+static AliasAnalysis::AliasResult
+MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+ // If the results agree, take it.
+ if (A == B)
+ return A;
+ // A mix of PartialAlias and MustAlias is PartialAlias.
+ if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
+ (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
+ return AliasAnalysis::PartialAlias;
+ // Otherwise, we don't know anything.
+ return AliasAnalysis::MayAlias;
+}
+
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for aliases between the values on corresponding arms.
+ if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+ if (SI->getCondition() == SI2->getCondition()) {
+ AliasResult Alias =
+ aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+ SI2->getTrueValue(), V2Size, V2TBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+ AliasResult ThisAlias =
+ aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+ SI2->getFalseValue(), V2Size, V2TBAAInfo);
+ return MergeAliasResults(ThisAlias, Alias);
+ }
+
+ // If both arms of the Select node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ AliasResult Alias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ AliasResult ThisAlias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+ return MergeAliasResults(ThisAlias, Alias);
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If the values are PHIs in the same block, we can do a more precise
+ // as well as efficient check: just check for aliases between the values
+ // on corresponding edges.
+ if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+ if (PN2->getParent() == PN->getParent()) {
+ LocPair Locs(Location(PN, PNSize, PNTBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ if (PN > V2)
+ std::swap(Locs.first, Locs.second);
+ // Analyse the PHIs' inputs under the assumption that the PHIs are
+ // NoAlias.
+ // If the PHIs are May/MustAlias there must be (recursively) an input
+ // operand from outside the PHIs' cycle that is MayAlias/MustAlias or
+ // there must be an operation on the PHIs within the PHIs' value cycle
+ // that causes a MayAlias.
+ // Pretend the phis do not alias.
+ AliasResult Alias = NoAlias;
+ assert(AliasCache.count(Locs) &&
+ "There must exist an entry for the phi node");
+ AliasResult OrigAliasResult = AliasCache[Locs];
+ AliasCache[Locs] = NoAlias;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ AliasResult ThisAlias =
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+ V2Size, V2TBAAInfo);
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
+ }
+
+ // Reset if speculation failed.
+ if (Alias != NoAlias)
+ AliasCache[Locs] = OrigAliasResult;
+
+ return Alias;
+ }
+
+ SmallPtrSet<Value*, 4> UniqueSrc;
+ SmallVector<Value*, 4> V1Srcs;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *PV1 = PN->getIncomingValue(i);
+ if (isa<PHINode>(PV1))
+ // If any of the source itself is a PHI, return MayAlias conservatively
+ // to avoid compile time explosion. The worst possible case is if both
+ // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+ // and 'n' are the number of PHI sources.
+ return MayAlias;
+ if (UniqueSrc.insert(PV1))
+ V1Srcs.push_back(PV1);
+ }
+
+ AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V1Srcs[0], PNSize, PNTBAAInfo);
+ // Early exit if the check of the first PHI source against V2 is MayAlias.
+ // Other results are not possible.
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+ Value *V = V1Srcs[i];
+
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V, PNSize, PNTBAAInfo);
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
+ }
+
+ return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are.
+ if (V1Size == 0 || V2Size == 0)
+ return NoAlias;
+
+ // Strip off any casts if they exist.
+ V1 = V1->stripPointerCasts();
+ V2 = V2->stripPointerCasts();
+
+ // Are we checking for alias of the same value?
+ if (V1 == V2) return MustAlias;
+
+ if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+ return NoAlias; // Scalars cannot alias each other
+
+ // Figure out what objects these things are pointing to if we can.
+ const Value *O1 = GetUnderlyingObject(V1, TD);
+ const Value *O2 = GetUnderlyingObject(V2, TD);
+
+ // Null values in the default address space don't point to any object, so they
+ // don't alias any other pointer.
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+
+ if (O1 != O2) {
+ // If V1/V2 point to two different objects we know that we have no alias.
+ if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+ return NoAlias;
+
+ // Constant pointers can't alias with non-const isIdentifiedObject objects.
+ if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+ (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+ return NoAlias;
+
+ // Arguments can't alias with local allocations or noalias calls
+ // in the same function.
+ if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+ (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+ return NoAlias;
+
+ // Most objects can't alias null.
+ if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+ (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+ return NoAlias;
+
+ // If one pointer is the result of a call/invoke or load and the other is a
+ // non-escaping local object within the same function, then we know the
+ // object couldn't escape to a point where the call could return it.
+ //
+ // Note that if the pointers are in different functions, there are a
+ // variety of complications. A call with a nocapture argument may still
+ // temporary store the nocapture argument's value in a temporary memory
+ // location if that memory location doesn't escape. Or it may pass a
+ // nocapture value to other functions as long as they don't capture it.
+ if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ return NoAlias;
+ if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ return NoAlias;
+ }
+
+ // If the size of one access is larger than the entire object on the other
+ // side, then we know such behavior is undefined and can assume no alias.
+ if (TD)
+ if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
+ (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
+ return NoAlias;
+
+ // Check the cache before climbing up use-def chains. This also terminates
+ // otherwise infinitely recursive queries.
+ LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ if (V1 > V2)
+ std::swap(Locs.first, Locs.second);
+ std::pair<AliasCacheTy::iterator, bool> Pair =
+ AliasCache.insert(std::make_pair(Locs, MayAlias));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+ // GEP can't simplify, we don't even look at the PHI cases.
+ if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ std::swap(O1, O2);
+ std::swap(V1TBAAInfo, V2TBAAInfo);
+ }
+ if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+ AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ std::swap(V1TBAAInfo, V2TBAAInfo);
+ }
+ if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+ AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ std::swap(V1TBAAInfo, V2TBAAInfo);
+ }
+ if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+ AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ // If both pointers are pointing into the same object and one of them
+ // accesses is accessing the entire object, then the accesses must
+ // overlap in some way.
+ if (TD && O1 == O2)
+ if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) ||
+ (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI)))
+ return AliasCache[Locs] = PartialAlias;
+
+ AliasResult Result =
+ AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ return AliasCache[Locs] = Result;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..100e5c8ae7dd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -0,0 +1,63 @@
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+ true, true)
+
+char BlockFrequencyInfo::ID = 0;
+
+
+BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
+ initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequencyInfo::~BlockFrequencyInfo() {
+ delete BFI;
+}
+
+void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<BranchProbabilityInfo>();
+ AU.setPreservesAll();
+}
+
+bool BlockFrequencyInfo::runOnFunction(Function &F) {
+ BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ BFI->doFunction(&F, &BPI);
+ return false;
+}
+
+void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
+ if (BFI) BFI->print(O);
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
+ return BFI->getBlockFreq(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..6c5885601fa3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,558 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfo::ID = 0;
+
+// Weights are for internal use only. They are used by heuristics to help to
+// estimate edges' probability. Example:
+//
+// Using "Loop Branch Heuristics" we predict weights of edges for the
+// block BB2.
+// ...
+// |
+// V
+// BB1<-+
+// | |
+// | | (Weight = 124)
+// V |
+// BB2--+
+// |
+// | (Weight = 4)
+// V
+// BB3
+//
+// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
+static const uint32_t LBH_TAKEN_WEIGHT = 124;
+static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+/// \brief Unreachable-terminating branch taken weight.
+///
+/// This is the weight for a branch being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static const uint32_t UR_TAKEN_WEIGHT = 1;
+
+/// \brief Unreachable-terminating branch not-taken weight.
+///
+/// This is the weight for a branch not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+
+static const uint32_t PH_TAKEN_WEIGHT = 20;
+static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t ZH_TAKEN_WEIGHT = 20;
+static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t FPH_TAKEN_WEIGHT = 20;
+static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+
+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
+// Standard weight value. Used when none of the heuristics set weight for
+// the edge.
+static const uint32_t NORMAL_WEIGHT = 16;
+
+// Minimum weight of an edge. Please note, that weight is NEVER 0.
+static const uint32_t MIN_WEIGHT = 1;
+
+static uint32_t getMaxWeightFor(BasicBlock *BB) {
+ return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+}
+
+
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(TI))
+ PostDominatedByUnreachable.insert(BB);
+ return false;
+ }
+
+ SmallVector<unsigned, 4> UnreachableEdges;
+ SmallVector<unsigned, 4> ReachableEdges;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ if (PostDominatedByUnreachable.count(*I))
+ UnreachableEdges.push_back(I.getSuccessorIndex());
+ else
+ ReachableEdges.push_back(I.getSuccessorIndex());
+ }
+
+ // If all successors are in the set of blocks post-dominated by unreachable,
+ // this block is too.
+ if (UnreachableEdges.size() == TI->getNumSuccessors())
+ PostDominatedByUnreachable.insert(BB);
+
+ // Skip probabilities if this block has a single successor or if all were
+ // reachable.
+ if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
+ return false;
+
+ uint32_t UnreachableWeight =
+ std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT);
+ for (SmallVector<unsigned, 4>::iterator I = UnreachableEdges.begin(),
+ E = UnreachableEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, UnreachableWeight);
+
+ if (ReachableEdges.empty())
+ return true;
+ uint32_t ReachableWeight =
+ std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(),
+ NORMAL_WEIGHT);
+ for (SmallVector<unsigned, 4>::iterator I = ReachableEdges.begin(),
+ E = ReachableEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, ReachableWeight);
+
+ return true;
+}
+
+// Propagate existing explicit probabilities from either profile data or
+// 'expect' intrinsic processing.
+bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ return false;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ return false;
+
+ MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+ if (!WeightsNode)
+ return false;
+
+ // Ensure there are weights for all of the successors. Note that the first
+ // operand to the metadata node is a name, not a weight.
+ if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
+ return false;
+
+ // Build up the final weights that will be used in a temporary buffer, but
+ // don't add them until all weihts are present. Each weight value is clamped
+ // to [1, getMaxWeightFor(BB)].
+ uint32_t WeightLimit = getMaxWeightFor(BB);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.reserve(TI->getNumSuccessors());
+ for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
+ ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i));
+ if (!Weight)
+ return false;
+ Weights.push_back(
+ std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit)));
+ }
+ assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeWeight(BB, i, Weights[i]);
+
+ return true;
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
+ BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI || !CI->isEquality())
+ return false;
+
+ Value *LHS = CI->getOperand(0);
+
+ if (!LHS->getType()->isPointerTy())
+ return false;
+
+ assert(CI->getOperand(1)->getType()->isPointerTy());
+
+ // p != 0 -> isProb = true
+ // p == 0 -> isProb = false
+ // p != q -> isProb = true
+ // p == q -> isProb = false;
+ unsigned TakenIdx = 0, NonTakenIdx = 1;
+ bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
+ if (!isProb)
+ std::swap(TakenIdx, NonTakenIdx);
+
+ setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT);
+ return true;
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return false;
+
+ SmallVector<unsigned, 8> BackEdges;
+ SmallVector<unsigned, 8> ExitingEdges;
+ SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ if (!L->contains(*I))
+ ExitingEdges.push_back(I.getSuccessorIndex());
+ else if (L->getHeader() == *I)
+ BackEdges.push_back(I.getSuccessorIndex());
+ else
+ InEdges.push_back(I.getSuccessorIndex());
+ }
+
+ if (uint32_t numBackEdges = BackEdges.size()) {
+ uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
+ if (backWeight < NORMAL_WEIGHT)
+ backWeight = NORMAL_WEIGHT;
+
+ for (SmallVector<unsigned, 8>::iterator EI = BackEdges.begin(),
+ EE = BackEdges.end(); EI != EE; ++EI) {
+ setEdgeWeight(BB, *EI, backWeight);
+ }
+ }
+
+ if (uint32_t numInEdges = InEdges.size()) {
+ uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
+ if (inWeight < NORMAL_WEIGHT)
+ inWeight = NORMAL_WEIGHT;
+
+ for (SmallVector<unsigned, 8>::iterator EI = InEdges.begin(),
+ EE = InEdges.end(); EI != EE; ++EI) {
+ setEdgeWeight(BB, *EI, inWeight);
+ }
+ }
+
+ if (uint32_t numExitingEdges = ExitingEdges.size()) {
+ uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
+ if (exitWeight < MIN_WEIGHT)
+ exitWeight = MIN_WEIGHT;
+
+ for (SmallVector<unsigned, 8>::iterator EI = ExitingEdges.begin(),
+ EE = ExitingEdges.end(); EI != EE; ++EI) {
+ setEdgeWeight(BB, *EI, exitWeight);
+ }
+ }
+
+ return true;
+}
+
+bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
+ BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI)
+ return false;
+
+ Value *RHS = CI->getOperand(1);
+ ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ if (!CV)
+ return false;
+
+ bool isProb;
+ if (CV->isZero()) {
+ switch (CI->getPredicate()) {
+ case CmpInst::ICMP_EQ:
+ // X == 0 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_NE:
+ // X != 0 -> Likely
+ isProb = true;
+ break;
+ case CmpInst::ICMP_SLT:
+ // X < 0 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_SGT:
+ // X > 0 -> Likely
+ isProb = true;
+ break;
+ default:
+ return false;
+ }
+ } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
+ // InstCombine canonicalizes X <= 0 into X < 1.
+ // X <= 0 -> Unlikely
+ isProb = false;
+ } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
+ // InstCombine canonicalizes X >= 0 into X > -1.
+ // X >= 0 -> Likely
+ isProb = true;
+ } else {
+ return false;
+ }
+
+ unsigned TakenIdx = 0, NonTakenIdx = 1;
+
+ if (!isProb)
+ std::swap(TakenIdx, NonTakenIdx);
+
+ setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);
+
+ return true;
+}
+
+bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
+ if (!FCmp)
+ return false;
+
+ bool isProb;
+ if (FCmp->isEquality()) {
+ // f1 == f2 -> Unlikely
+ // f1 != f2 -> Likely
+ isProb = !FCmp->isTrueWhenEqual();
+ } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
+ // !isnan -> Likely
+ isProb = true;
+ } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
+ // isnan -> Unlikely
+ isProb = false;
+ } else {
+ return false;
+ }
+
+ unsigned TakenIdx = 0, NonTakenIdx = 1;
+
+ if (!isProb)
+ std::swap(TakenIdx, NonTakenIdx);
+
+ setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT);
+
+ return true;
+}
+
+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+ InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+ if (!II)
+ return false;
+
+ setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT);
+ return true;
+}
+
+void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfo::runOnFunction(Function &F) {
+ LastF = &F; // Store the last function we ran on for printing.
+ LI = &getAnalysis<LoopInfo>();
+ assert(PostDominatedByUnreachable.empty());
+
+ // Walk the basic blocks in post-order so that we can build up state about
+ // the successors of a block iteratively.
+ for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
+ E = po_end(&F.getEntryBlock());
+ I != E; ++I) {
+ DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
+ if (calcUnreachableHeuristics(*I))
+ continue;
+ if (calcMetadataWeights(*I))
+ continue;
+ if (calcLoopBranchHeuristics(*I))
+ continue;
+ if (calcPointerHeuristics(*I))
+ continue;
+ if (calcZeroHeuristics(*I))
+ continue;
+ if (calcFloatingPointHeuristics(*I))
+ continue;
+ calcInvokeHeuristics(*I);
+ }
+
+ PostDominatedByUnreachable.clear();
+ return false;
+}
+
+void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "---- Branch Probabilities ----\n";
+ // We print the probabilities from the last function the analysis ran over,
+ // or the function it is currently running over.
+ assert(LastF && "Cannot print prior to running over a function");
+ for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
+ BI != BE; ++BI) {
+ for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
+ SI != SE; ++SI) {
+ printEdgeProbability(OS << " ", BI, *SI);
+ }
+ }
+}
+
+uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
+ uint32_t Sum = 0;
+
+ for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex());
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+ }
+
+ return Sum;
+}
+
+bool BranchProbabilityInfo::
+isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+ uint32_t Sum = 0;
+ uint32_t MaxWeight = 0;
+ BasicBlock *MaxSucc = 0;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(BB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = Succ;
+ }
+ }
+
+ // Hot probability is at least 4/5 = 80%
+ if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
+ return MaxSucc;
+
+ return 0;
+}
+
+/// Get the raw edge weight for the edge. If can't find it, return
+/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index
+/// to the successors.
+uint32_t BranchProbabilityInfo::
+getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
+ DenseMap<Edge, uint32_t>::const_iterator I =
+ Weights.find(std::make_pair(Src, IndexInSuccessors));
+
+ if (I != Weights.end())
+ return I->second;
+
+ return DEFAULT_WEIGHT;
+}
+
+/// Get the raw edge weight calculated for the block pair. This returns the sum
+/// of all raw edge weights from Src to Dst.
+uint32_t BranchProbabilityInfo::
+getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
+ uint32_t Weight = 0;
+ DenseMap<Edge, uint32_t>::const_iterator MapI;
+ for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
+ if (*I == Dst) {
+ MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
+ if (MapI != Weights.end())
+ Weight += MapI->second;
+ }
+ return (Weight == 0) ? DEFAULT_WEIGHT : Weight;
+}
+
+/// Set the edge weight for a given edge specified by PredBlock and an index
+/// to the successors.
+void BranchProbabilityInfo::
+setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
+ uint32_t Weight) {
+ Weights[std::make_pair(Src, IndexInSuccessors)] = Weight;
+ DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
+ << IndexInSuccessors << " successor weight to "
+ << Weight << "\n");
+}
+
+/// Get an edge's probability, relative to other out-edges from Src.
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const {
+ uint32_t N = getEdgeWeight(Src, IndexInSuccessors);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+/// Get the probability of going from Src to Dst. It returns the sum of all
+/// probabilities for edges from Src to Dst.
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
+
+ uint32_t N = getEdgeWeight(Src, Dst);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
+ const BasicBlock *Src,
+ const BasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge " << Src->getName() << " -> " << Dst->getName()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 000000000000..9b6879a42ed4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,164 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct CFGViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFG();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+ struct CFGOnlyViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGOnlyViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFGOnly();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+ struct CFGPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGPrinter() : FunctionPass(ID) {
+ initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getName().str() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+ false, true)
+
+namespace {
+ struct CFGOnlyPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGOnlyPrinter() : FunctionPass(ID) {
+ initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getName().str() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F, true);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)",
+ false, true)
+
+/// viewCFG - This function is meant for use from the debugger. You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function. This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+ ViewGraph(this, "cfg" + getName());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger. It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label. If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+ ViewGraph(this, "cfg" + getName(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+ return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+ return new CFGOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 000000000000..a7292706dfa8
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,180 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call. Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global. Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+
+CaptureTracker::~CaptureTracker() {}
+
+bool CaptureTracker::shouldExplore(Use *U) { return true; }
+
+namespace {
+ struct SimpleCaptureTracker : public CaptureTracker {
+ explicit SimpleCaptureTracker(bool ReturnCaptures)
+ : ReturnCaptures(ReturnCaptures), Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool captured(Use *U) {
+ if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+ return false;
+
+ Captured = true;
+ return true;
+ }
+
+ bool ReturnCaptures;
+
+ bool Captured;
+ };
+}
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist). This routine can
+/// be expensive, so consider caching the results. The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not. The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+ bool ReturnCaptures, bool StoreCaptures) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ // TODO: If StoreCaptures is not true, we could do Fancy analysis
+ // to determine whether this store is not actually an escape point.
+ // In that case, BasicAliasAnalysis should be updated as well to
+ // take advantage of this.
+ (void)StoreCaptures;
+
+ SimpleCaptureTracker SCT(ReturnCaptures);
+ PointerMayBeCaptured(V, &SCT);
+ return SCT.Captured;
+}
+
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
+ assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+ SmallVector<Use*, Threshold> Worklist;
+ SmallSet<Use*, Threshold> Visited;
+ int Count = 0;
+
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return Tracker->tooManyUses();
+
+ Use *U = &UI.getUse();
+ if (!Tracker->shouldExplore(U)) continue;
+ Visited.insert(U);
+ Worklist.push_back(U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ V = U->get();
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ // Not captured if the callee is readonly, doesn't return a copy through
+ // its return value and doesn't unwind (a readonly function can leak bits
+ // by throwing an exception or not depending on the input value).
+ if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+ break;
+
+ // Not captured if only passed via 'nocapture' arguments. Note that
+ // calling a function pointer does not in itself cause the pointer to
+ // be captured. This is a subtle point considering that (for example)
+ // the callee might return its own address. It is analogous to saying
+ // that loading a value from a pointer does not cause the pointer to be
+ // captured, even though the loaded value might be the pointer itself
+ // (think of self-referential objects).
+ CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (CallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V && !CS.doesNotCapture(A - B))
+ // The parameter is not marked 'nocapture' - captured.
+ if (Tracker->captured(U))
+ return;
+ break;
+ }
+ case Instruction::Load:
+ // Loading from a pointer does not cause it to be captured.
+ break;
+ case Instruction::VAArg:
+ // "va-arg" from a pointer does not cause it to be captured.
+ break;
+ case Instruction::Store:
+ if (V == I->getOperand(0))
+ // Stored the pointer - conservatively assume it may be captured.
+ if (Tracker->captured(U))
+ return;
+ // Storing to the pointee does not cause the pointer to be captured.
+ break;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ // The original value is not captured via this if the new value isn't.
+ for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ if (Visited.insert(U))
+ if (Tracker->shouldExplore(U))
+ Worklist.push_back(U);
+ }
+ break;
+ case Instruction::ICmp:
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
+ if (isNoAliasCall(V->stripPointerCasts()))
+ if (ConstantPointerNull *CPN =
+ dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (CPN->getType()->getAddressSpace() == 0)
+ break;
+ // Otherwise, be conservative. There are crazy ways to capture pointers
+ // using comparisons.
+ if (Tracker->captured(U))
+ return;
+ break;
+ default:
+ // Something else - be conservative and say it is captured.
+ if (Tracker->captured(U))
+ return;
+ break;
+ }
+ }
+
+ // All uses examined.
+}
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 000000000000..8cda01a24c0d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,96 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+ const TargetTransformInfo &TTI) {
+ ++NumBlocks;
+ unsigned NumInstsBeforeThisBB = NumInsts;
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ ImmutableCallSite CS(cast<Instruction>(II));
+
+ if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
+ // If this call is to function itself, then the function is recursive.
+ // Inlining it into other functions is a bad idea, because this is
+ // basically just a form of loop peeling, and our metrics aren't useful
+ // for that case.
+ if (F == BB->getParent())
+ isRecursive = true;
+
+ if (TTI.isLoweredToCall(F))
+ ++NumCalls;
+ } else {
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+ ++NumVectorInsts;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(II))
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ notDuplicatable = true;
+
+ if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
+ if (InvI->hasFnAttr(Attribute::NoDuplicate))
+ notDuplicatable = true;
+
+ NumInsts += TTI.getUserCost(&*II);
+ }
+
+ if (isa<ReturnInst>(BB->getTerminator()))
+ ++NumRets;
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions
+ // with indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably
+ // don't want to inline this function.
+ notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
+
+ // Remember NumInsts for this BB.
+ NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 000000000000..09d7608c51da
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1578 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic IR ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// DataLayout information. These functions cannot go in IR due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FEnv.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
+/// DataLayout. This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, Type *DestTy,
+ const DataLayout &TD) {
+ // Catch the obvious splat cases.
+ if (C->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
+ return Constant::getAllOnesValue(DestTy);
+
+ // Handle a vector->integer cast.
+ if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
+ VectorType *VTy = dyn_cast<VectorType>(C->getType());
+ if (VTy == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ unsigned NumSrcElts = VTy->getNumElements();
+ Type *SrcEltTy = VTy->getElementType();
+
+ // If the vector is a vector of floating point, convert it to vector of int
+ // to simplify things.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+ // Ask IR to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ }
+
+ ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+ if (CDV == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // Now that we know that the input value is a vector of integers, just shift
+ // and insert them into our result.
+ unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+ APInt Result(IT->getBitWidth(), 0);
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ Result <<= BitShift;
+ if (TD.isLittleEndian())
+ Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
+ else
+ Result |= CDV->getElementAsInteger(i);
+ }
+
+ return ConstantInt::get(IT, Result);
+ }
+
+ // The code below only handles casts to vectors currently.
+ VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+ if (DestVTy == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+ // vector so the code below can handle it uniformly.
+ if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+ Constant *Ops = C; // don't take the address of C!
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+ }
+
+ // If this is a bitcast from constant vector -> vector, fold it.
+ if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If the element types match, IR can fold it.
+ unsigned NumDstElt = DestVTy->getNumElements();
+ unsigned NumSrcElt = C->getType()->getVectorNumElements();
+ if (NumDstElt == NumSrcElt)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ Type *SrcEltTy = C->getType()->getVectorElementType();
+ Type *DstEltTy = DestVTy->getElementType();
+
+ // Otherwise, we're changing the number of elements in a vector, which
+ // requires endianness information to do the right thing. For example,
+ // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ // folds to (little endian):
+ // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+ // and to (big endian):
+ // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+ // First thing is first. We only want to think about integer here, so if
+ // we have something in FP form, recast it as integer.
+ if (DstEltTy->isFloatingPointTy()) {
+ // Fold to an vector of integers with same size as our FP type.
+ unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+ Type *DestIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+ // Recursively handle this integer conversion, if possible.
+ C = FoldBitCast(C, DestIVTy, TD);
+
+ // Finally, IR can handle this now that #elts line up.
+ return ConstantExpr::getBitCast(C, DestTy);
+ }
+
+ // Okay, we know the destination is integer, if the input is FP, convert
+ // it to integer first.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+ // Ask IR to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ // If IR wasn't able to fold it, bail out.
+ if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.
+ !isa<ConstantDataVector>(C))
+ return C;
+ }
+
+ // Now we know that the input and output vectors are both integer vectors
+ // of the same size, and that their #elements is not the same. Do the
+ // conversion here, which depends on whether the input or output has
+ // more elements.
+ bool isLittleEndian = TD.isLittleEndian();
+
+ SmallVector<Constant*, 32> Result;
+ if (NumDstElt < NumSrcElt) {
+ // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+ Constant *Zero = Constant::getNullValue(DstEltTy);
+ unsigned Ratio = NumSrcElt/NumDstElt;
+ unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned SrcElt = 0;
+ for (unsigned i = 0; i != NumDstElt; ++i) {
+ // Build each element of the result.
+ Constant *Elt = Zero;
+ unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // Zero extend the element to the right size.
+ Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+ // Shift it to the right place, depending on endianness.
+ Src = ConstantExpr::getShl(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+ // Mix it in.
+ Elt = ConstantExpr::getOr(Elt, Src);
+ }
+ Result.push_back(Elt);
+ }
+ return ConstantVector::get(Result);
+ }
+
+ // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ unsigned Ratio = NumDstElt/NumSrcElt;
+ unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+
+ // Loop over each source value, expanding into multiple results.
+ for (unsigned i = 0; i != NumSrcElt; ++i) {
+ Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ // Shift the piece of the value into the right place, depending on
+ // endianness.
+ Constant *Elt = ConstantExpr::getLShr(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+ // Truncate and remember this piece.
+ Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+ }
+ }
+
+ return ConstantVector::get(Result);
+}
+
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant. Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+ APInt &Offset, const DataLayout &TD) {
+ // Trivial case, constant is the global.
+ if ((GV = dyn_cast<GlobalValue>(C))) {
+ Offset.clearAllBits();
+ return true;
+ }
+
+ // Otherwise, if this isn't a constant expr, bail out.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return false;
+
+ // Look through ptr->int and ptr->ptr casts.
+ if (CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::BitCast)
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+
+ // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ return GEP->accumulateConstantOffset(TD, Offset);
+ }
+
+ return false;
+}
+
+/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
+/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the
+/// pointer to copy results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. TD is the target data.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+ unsigned char *CurPtr, unsigned BytesLeft,
+ const DataLayout &TD) {
+ assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+ "Out of range access");
+
+ // If this element is zero or undefined, we can just return since *CurPtr is
+ // zero initialized.
+ if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ return true;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getBitWidth() > 64 ||
+ (CI->getBitWidth() & 7) != 0)
+ return false;
+
+ uint64_t Val = CI->getZExtValue();
+ unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+
+ for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+ int n = ByteOffset;
+ if (!TD.isLittleEndian())
+ n = IntBytes - n - 1;
+ CurPtr[i] = (unsigned char)(Val >> (n * 8));
+ ++ByteOffset;
+ }
+ return true;
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isDoubleTy()) {
+ C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ if (CFP->getType()->isFloatTy()){
+ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ if (CFP->getType()->isHalfTy()){
+ C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ return false;
+ }
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ const StructLayout *SL = TD.getStructLayout(CS->getType());
+ unsigned Index = SL->getElementContainingOffset(ByteOffset);
+ uint64_t CurEltOffset = SL->getElementOffset(Index);
+ ByteOffset -= CurEltOffset;
+
+ while (1) {
+ // If the element access is to the element itself and not to tail padding,
+ // read the bytes from the element.
+ uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+ if (ByteOffset < EltSize &&
+ !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+ BytesLeft, TD))
+ return false;
+
+ ++Index;
+
+ // Check to see if we read from the last struct element, if so we're done.
+ if (Index == CS->getType()->getNumElements())
+ return true;
+
+ // If we read all of the bytes we needed from this element we're done.
+ uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+ if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ return true;
+
+ // Move to the next element of the struct.
+ CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
+ BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ ByteOffset = 0;
+ CurEltOffset = NextEltOffset;
+ }
+ // not reached.
+ }
+
+ if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
+ isa<ConstantDataSequential>(C)) {
+ Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+ uint64_t EltSize = TD.getTypeAllocSize(EltTy);
+ uint64_t Index = ByteOffset / EltSize;
+ uint64_t Offset = ByteOffset - Index * EltSize;
+ uint64_t NumElts;
+ if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
+ NumElts = AT->getNumElements();
+ else
+ NumElts = cast<VectorType>(C->getType())->getNumElements();
+
+ for (; Index != NumElts; ++Index) {
+ if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
+ BytesLeft, TD))
+ return false;
+
+ uint64_t BytesWritten = EltSize - Offset;
+ assert(BytesWritten <= EltSize && "Not indexing into this element?");
+ if (BytesWritten >= BytesLeft)
+ return true;
+
+ Offset = 0;
+ BytesLeft -= BytesWritten;
+ CurPtr += BytesWritten;
+ }
+ return true;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr &&
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ BytesLeft, TD);
+ }
+
+ // Otherwise, unknown initializer type.
+ return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+ const DataLayout &TD) {
+ Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+
+ // If this isn't an integer load we can't fold it directly.
+ if (!IntType) {
+ // If this is a float/double load, we can try folding it as an int32/64 load
+ // and then bitcast the result. This can be useful for union cases. Note
+ // that address spaces don't matter here since we're not going to result in
+ // an actual new load.
+ Type *MapTy;
+ if (LoadTy->isHalfTy())
+ MapTy = Type::getInt16PtrTy(C->getContext());
+ else if (LoadTy->isFloatTy())
+ MapTy = Type::getInt32PtrTy(C->getContext());
+ else if (LoadTy->isDoubleTy())
+ MapTy = Type::getInt64PtrTy(C->getContext());
+ else if (LoadTy->isVectorTy()) {
+ MapTy = IntegerType::get(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy));
+ MapTy = PointerType::getUnqual(MapTy);
+ } else
+ return 0;
+
+ C = FoldBitCast(C, MapTy, TD);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
+ return FoldBitCast(Res, LoadTy, TD);
+ return 0;
+ }
+
+ unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+ if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+
+ GlobalValue *GVal;
+ APInt Offset(TD.getPointerSizeInBits(), 0);
+ if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+ return 0;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ !GV->getInitializer()->getType()->isSized())
+ return 0;
+
+ // If we're loading off the beginning of the global, some bytes may be valid,
+ // but we don't try to handle this.
+ if (Offset.isNegative()) return 0;
+
+ // If we're not accessing anything in this constant, the result is undefined.
+ if (Offset.getZExtValue() >=
+ TD.getTypeAllocSize(GV->getInitializer()->getType()))
+ return UndefValue::get(IntType);
+
+ unsigned char RawBytes[32] = {0};
+ if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
+ BytesLoaded, TD))
+ return 0;
+
+ APInt ResultVal = APInt(IntType->getBitWidth(), 0);
+ if (TD.isLittleEndian()) {
+ ResultVal = RawBytes[BytesLoaded - 1];
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[BytesLoaded-1-i];
+ }
+ } else {
+ ResultVal = RawBytes[0];
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[i];
+ }
+ }
+
+ return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable. If this is not determinable,
+/// return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+ const DataLayout *TD) {
+ // First, try the easy cases:
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+
+ // If the loaded value isn't a constant expr, we can't handle it.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return 0;
+
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *V =
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+ return V;
+ }
+
+ // Instead of loading constant c string, use corresponding integer value
+ // directly if string length is small enough.
+ StringRef Str;
+ if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+ unsigned StrLen = Str.size();
+ Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+ unsigned NumBits = Ty->getPrimitiveSizeInBits();
+ // Replace load with immediate integer if the result is an integer or fp
+ // value.
+ if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+ (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+ APInt StrVal(NumBits, 0);
+ APInt SingleChar(NumBits, 0);
+ if (TD->isLittleEndian()) {
+ for (signed i = StrLen-1; i >= 0; i--) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ } else {
+ for (unsigned i = 0; i < StrLen; i++) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ // Append NULL at the end.
+ SingleChar = 0;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+
+ Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+ if (Ty->isFloatingPointTy())
+ Res = ConstantExpr::getBitCast(Res, Ty);
+ return Res;
+ }
+ }
+
+ // If this load comes from anywhere in a constant global, and if the global
+ // is all undef or zero, we know what it loads.
+ if (GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+ Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+ if (GV->getInitializer()->isNullValue())
+ return Constant::getNullValue(ResTy);
+ if (isa<UndefValue>(GV->getInitializer()))
+ return UndefValue::get(ResTy);
+ }
+ }
+
+ // Try hard to fold loads from bitcasted strange and non-type-safe things.
+ if (TD)
+ return FoldReinterpretLoadFromConstPtr(CE, *TD);
+ return 0;
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
+ if (LI->isVolatile()) return 0;
+
+ if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+ return ConstantFoldLoadFromConstPtr(C, TD);
+
+ return 0;
+}
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together. If target data info is available, it is provided as DL,
+/// otherwise DL is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+ Constant *Op1, const DataLayout *DL){
+ // SROA
+
+ // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+ // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+ // bits.
+
+
+ if (Opc == Instruction::And && DL) {
+ unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+ APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
+ APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
+ ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
+ ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL);
+ if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
+ // All the bits of Op0 that the 'and' could be masking are already zero.
+ return Op0;
+ }
+ if ((KnownOne0 | KnownZero1).isAllOnesValue()) {
+ // All the bits of Op1 that the 'and' could be masking are already zero.
+ return Op1;
+ }
+
+ APInt KnownZero = KnownZero0 | KnownZero1;
+ APInt KnownOne = KnownOne0 & KnownOne1;
+ if ((KnownZero | KnownOne).isAllOnesValue()) {
+ return ConstantInt::get(Op0->getType(), KnownOne);
+ }
+ }
+
+ // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+ // constant. This happens frequently when iterating over a global array.
+ if (Opc == Instruction::Sub && DL) {
+ GlobalValue *GV1, *GV2;
+ unsigned PtrSize = DL->getPointerSizeInBits();
+ unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+ APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
+
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
+ GV1 == GV2) {
+ // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+ // PtrToInt may change the bitwidth so we have convert to the right size
+ // first.
+ return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
+ Offs2.zextOrTrunc(OpSize));
+ }
+ }
+
+ return 0;
+}
+
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
+ Type *ResultTy, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TD) return 0;
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+ bool Any = false;
+ SmallVector<Constant*, 32> NewIdxs;
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+ if ((i == 1 ||
+ !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+ Ops.slice(1, i-1)))) &&
+ Ops[i]->getType() != IntPtrTy) {
+ Any = true;
+ NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+ true,
+ IntPtrTy,
+ true),
+ Ops[i], IntPtrTy));
+ } else
+ NewIdxs.push_back(Ops[i]);
+ }
+ if (!Any) return 0;
+
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ C = Folded;
+ return C;
+}
+
+/// Strip the pointer casts, but preserve the address space information.
+static Constant* StripPtrCastKeepAS(Constant* Ptr) {
+ assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
+ PointerType *OldPtrTy = cast<PointerType>(Ptr->getType());
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ PointerType *NewPtrTy = cast<PointerType>(Ptr->getType());
+
+ // Preserve the address space number of the pointer.
+ if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
+ NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
+ OldPtrTy->getAddressSpace());
+ Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+ }
+ return Ptr;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
+ Type *ResultTy, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ Constant *Ptr = Ops[0];
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ !Ptr->getType()->isPointerTy())
+ return 0;
+
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+ // If this is a constant expr gep that is effectively computing an
+ // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ if (!isa<ConstantInt>(Ops[i])) {
+
+ // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+ // "inttoptr (sub (ptrtoint Ptr), V)"
+ if (Ops.size() == 2 &&
+ cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+ assert((CE == 0 || CE->getType() == IntPtrTy) &&
+ "CastGEPIndices didn't canonicalize index types!");
+ if (CE && CE->getOpcode() == Instruction::Sub &&
+ CE->getOperand(0)->isNullValue()) {
+ Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+ Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+ Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+ if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+ Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
+ return Res;
+ }
+ }
+ return 0;
+ }
+
+ unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+ APInt Offset =
+ APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
+ makeArrayRef((Value *const*)
+ Ops.data() + 1,
+ Ops.size() - 1)));
+ Ptr = StripPtrCastKeepAS(Ptr);
+
+ // If this is a GEP of a GEP, fold it all into a single GEP.
+ while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+ // Do not try the incorporate the sub-GEP if some index is not a number.
+ bool AllConstantInt = true;
+ for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+ if (!isa<ConstantInt>(NestedOps[i])) {
+ AllConstantInt = false;
+ break;
+ }
+ if (!AllConstantInt)
+ break;
+
+ Ptr = cast<Constant>(GEP->getOperand(0));
+ Offset += APInt(BitWidth,
+ TD->getIndexedOffset(Ptr->getType(), NestedOps));
+ Ptr = StripPtrCastKeepAS(Ptr);
+ }
+
+ // If the base value for this address is a literal integer value, fold the
+ // getelementptr to the resulting integer value casted to the pointer type.
+ APInt BasePtr(BitWidth, 0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ if (Ptr->isNullValue() || BasePtr != 0) {
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ return ConstantExpr::getIntToPtr(C, ResultTy);
+ }
+
+ // Otherwise form a regular getelementptr. Recompute the indices so that
+ // we eliminate over-indexing of the notional static type array bounds.
+ // This makes it easy to determine if the getelementptr is "inbounds".
+ // Also, this helps GlobalOpt do SROA on GlobalVariables.
+ Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+ SmallVector<Constant*, 32> NewIdxs;
+ do {
+ if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+ if (ATy->isPointerTy()) {
+ // The only pointer indexing we'll do is on the first index of the GEP.
+ if (!NewIdxs.empty())
+ break;
+
+ // Only handle pointers to sized types, not pointers to functions.
+ if (!ATy->getElementType()->isSized())
+ return 0;
+ }
+
+ // Determine which element of the array the offset points into.
+ APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ if (ElemSize == 0)
+ // The element size is 0. This may be [0 x Ty]*, so just use a zero
+ // index for this level and proceed to the next level to see if it can
+ // accommodate the offset.
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+ else {
+ // The element size is non-zero divide the offset by the element
+ // size (rounding down), to compute the index at this level.
+ APInt NewIdx = Offset.udiv(ElemSize);
+ Offset -= NewIdx * ElemSize;
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+ }
+ Ty = ATy->getElementType();
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ // If we end up with an offset that isn't valid for this struct type, we
+ // can't re-form this GEP in a regular form, so bail out. The pointer
+ // operand likely went through casts that are necessary to make the GEP
+ // sensible.
+ const StructLayout &SL = *TD->getStructLayout(STy);
+ if (Offset.uge(SL.getSizeInBytes()))
+ break;
+
+ // Determine which field of the struct the offset points into. The
+ // getZExtValue is fine as we've already ensured that the offset is
+ // within the range representable by the StructLayout API.
+ unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+ NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ ElIdx));
+ Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+ Ty = STy->getTypeAtIndex(ElIdx);
+ } else {
+ // We've reached some non-indexable type.
+ break;
+ }
+ } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+ // If we haven't used up the entire offset by descending the static
+ // type, then the offset is pointing into the middle of an indivisible
+ // member, so we can't simplify it.
+ if (Offset != 0)
+ return 0;
+
+ // Create a GEP.
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ "Computed GetElementPtr has unexpected type!");
+
+ // If we ended up indexing a member with a type that doesn't match
+ // the type of what the original indices indexed, add a cast.
+ if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ C = FoldBitCast(C, ResultTy, *TD);
+
+ return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant. Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // Handle PHI nodes quickly here...
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Constant *CommonValue = 0;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is undef then skip it. Note that while we could
+ // skip the value if it is equal to the phi node itself we choose not to
+ // because that would break the rule that constant folding only applies if
+ // all operands are constants.
+ if (isa<UndefValue>(Incoming))
+ continue;
+ // If the incoming value is not a constant, then give up.
+ Constant *C = dyn_cast<Constant>(Incoming);
+ if (!C)
+ return 0;
+ // Fold the PHI's operands.
+ if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
+ C = ConstantFoldConstantExpression(NewC, TD, TLI);
+ // If the incoming value is a different constant to
+ // the one we saw previously, then give up.
+ if (CommonValue && C != CommonValue)
+ return 0;
+ CommonValue = C;
+ }
+
+
+ // If we reach here, all incoming values are the same constant or undef.
+ return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+ }
+
+ // Scan the operand list, checking to see if they are all constants, if so,
+ // hand off to ConstantFoldInstOperands.
+ SmallVector<Constant*, 8> Ops;
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+ Constant *Op = dyn_cast<Constant>(*i);
+ if (!Op)
+ return 0; // All operands not constant!
+
+ // Fold the Instruction's operands.
+ if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
+ Op = ConstantFoldConstantExpression(NewCE, TD, TLI);
+
+ Ops.push_back(Op);
+ }
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD, TLI);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return ConstantFoldLoadInst(LI, TD);
+
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ return ConstantExpr::getInsertValue(
+ cast<Constant>(IVI->getAggregateOperand()),
+ cast<Constant>(IVI->getInsertedValueOperand()),
+ IVI->getIndices());
+
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ return ConstantExpr::getExtractValue(
+ cast<Constant>(EVI->getAggregateOperand()),
+ EVI->getIndices());
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout. If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Constant*, 8> Ops;
+ for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+ i != e; ++i) {
+ Constant *NewC = cast<Constant>(*i);
+ // Recursively fold the ConstantExpr's operands.
+ if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+ NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+ Ops.push_back(NewC);
+ }
+
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ TD, TLI);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands. If successful, the constant result is
+/// returned, if not, null is returned. Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
+ ArrayRef<Constant *> Ops,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // Handle easy binops first.
+ if (Instruction::isBinaryOp(Opcode)) {
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ return C;
+
+ return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+ }
+
+ switch (Opcode) {
+ default: return 0;
+ case Instruction::ICmp:
+ case Instruction::FCmp: llvm_unreachable("Invalid for compares");
+ case Instruction::Call:
+ if (Function *F = dyn_cast<Function>(Ops.back()))
+ if (canConstantFoldCallTo(F))
+ return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
+ return 0;
+ case Instruction::PtrToInt:
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ Constant *Input = CE->getOperand(0);
+ unsigned InWidth = Input->getType()->getScalarSizeInBits();
+ if (TD->getPointerSizeInBits() < InWidth) {
+ Constant *Mask =
+ ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
+ TD->getPointerSizeInBits()));
+ Input = ConstantExpr::getAnd(Input, Mask);
+ }
+ // Do a zext or trunc to get to the dest size.
+ return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ }
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::IntToPtr:
+ // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+ // the int size is >= the ptr size. This requires knowing the width of a
+ // pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (TD &&
+ TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+ CE->getOpcode() == Instruction::PtrToInt)
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::BitCast:
+ if (TD)
+ return FoldBitCast(Ops[0], DestTy, *TD);
+ return ConstantExpr::getBitCast(Ops[0], DestTy);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
+ return C;
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
+ return C;
+
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+ }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands. If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+ Constant *Ops0, Constant *Ops1,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+ // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+ //
+ // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // around to know if bit truncation is happening.
+ if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+ if (TD && Ops1->isNullValue()) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if (CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+ if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+ IntPtrTy, false);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if ((CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+ return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+ CE1->getOperand(0), TD, TLI);
+ }
+ }
+
+ // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+ // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+ if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+ CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+ Constant *LHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
+ TD, TLI);
+ Constant *RHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
+ TD, TLI);
+ unsigned OpC =
+ Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ Constant *Ops[] = { LHS, RHS };
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
+ }
+ }
+
+ return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+ ConstantExpr *CE) {
+ if (!CE->getOperand(1)->isNullValue())
+ return 0; // Do not allow stepping over the value!
+
+ // Loop over all of the operands, tracking down which value we are
+ // addressing.
+ for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
+ C = C->getAggregateElement(CE->getOperand(i));
+ if (C == 0) return 0;
+ }
+ return C;
+}
+
+/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
+/// indices (with an *implied* zero pointer index that is not in the list),
+/// return the constant value being addressed by a virtual load, or null if
+/// something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
+ ArrayRef<Constant*> Indices) {
+ // Loop over all of the operands, tracking down which value we are
+ // addressing.
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ C = C->getAggregateElement(Indices[i]);
+ if (C == 0) return 0;
+ }
+ return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::floor:
+ case Intrinsic::sqrt:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::convert_from_fp16:
+ case Intrinsic::convert_to_fp16:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ return true;
+ default:
+ return false;
+ case 0: break;
+ }
+
+ if (!F->hasName()) return false;
+ StringRef Name = F->getName();
+
+ // In these cases, the check of the length is required. We don't want to
+ // return true for a name like "cos\0blah" which strcmp would return equal to
+ // "cos", but has length 8.
+ switch (Name[0]) {
+ default: return false;
+ case 'a':
+ return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2";
+ case 'c':
+ return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+ case 'e':
+ return Name == "exp" || Name == "exp2";
+ case 'f':
+ return Name == "fabs" || Name == "fmod" || Name == "floor";
+ case 'l':
+ return Name == "log" || Name == "log10";
+ case 'p':
+ return Name == "pow";
+ case 's':
+ return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+ Name == "sinf" || Name == "sqrtf";
+ case 't':
+ return Name == "tan" || Name == "tanh";
+ }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+ Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isHalfTy()) {
+ APFloat APF(V);
+ bool unused;
+ APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+ return ConstantFP::get(Ty->getContext(), APF);
+ }
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold half/float/double");
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+ double V, double W, Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V, W);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isHalfTy()) {
+ APFloat APF(V);
+ bool unused;
+ APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+ return ConstantFP::get(Ty->getContext(), APF);
+ }
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold half/float/double");
+}
+
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(const APFloat &Val,
+ bool roundTowardZero, Type *Ty) {
+ // All of these conversion intrinsics form an integer of at most 64bits.
+ unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ assert(ResultWidth <= 64 &&
+ "Can only constant fold conversions to 64 and 32 bit ints");
+
+ uint64_t UIntVal;
+ bool isExact = false;
+ APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+ : APFloat::rmNearestTiesToEven;
+ APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+ /*isSigned=*/true, mode,
+ &isExact);
+ if (status != APFloat::opOK && status != APFloat::opInexact)
+ return 0;
+ return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI) {
+ if (!F->hasName()) return 0;
+ StringRef Name = F->getName();
+
+ Type *Ty = F->getReturnType();
+ if (Operands.size() == 1) {
+ if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+ APFloat Val(Op->getValueAPF());
+
+ bool lost = false;
+ Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+ return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+ }
+ if (!TLI)
+ return 0;
+
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return 0;
+
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V;
+ if (Ty->isFloatTy())
+ V = Op->getValueAPF().convertToFloat();
+ else if (Ty->isDoubleTy())
+ V = Op->getValueAPF().convertToDouble();
+ else {
+ bool unused;
+ APFloat APF = Op->getValueAPF();
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+ V = APF.convertToDouble();
+ }
+
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::fabs:
+ return ConstantFoldFP(fabs, V, Ty);
+#if HAVE_LOG2
+ case Intrinsic::log2:
+ return ConstantFoldFP(log2, V, Ty);
+#endif
+#if HAVE_LOG
+ case Intrinsic::log:
+ return ConstantFoldFP(log, V, Ty);
+#endif
+#if HAVE_LOG10
+ case Intrinsic::log10:
+ return ConstantFoldFP(log10, V, Ty);
+#endif
+#if HAVE_EXP
+ case Intrinsic::exp:
+ return ConstantFoldFP(exp, V, Ty);
+#endif
+#if HAVE_EXP2
+ case Intrinsic::exp2:
+ return ConstantFoldFP(exp2, V, Ty);
+#endif
+ case Intrinsic::floor:
+ return ConstantFoldFP(floor, V, Ty);
+ }
+
+ switch (Name[0]) {
+ case 'a':
+ if (Name == "acos" && TLI->has(LibFunc::acos))
+ return ConstantFoldFP(acos, V, Ty);
+ else if (Name == "asin" && TLI->has(LibFunc::asin))
+ return ConstantFoldFP(asin, V, Ty);
+ else if (Name == "atan" && TLI->has(LibFunc::atan))
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if (Name == "ceil" && TLI->has(LibFunc::ceil))
+ return ConstantFoldFP(ceil, V, Ty);
+ else if (Name == "cos" && TLI->has(LibFunc::cos))
+ return ConstantFoldFP(cos, V, Ty);
+ else if (Name == "cosh" && TLI->has(LibFunc::cosh))
+ return ConstantFoldFP(cosh, V, Ty);
+ else if (Name == "cosf" && TLI->has(LibFunc::cosf))
+ return ConstantFoldFP(cos, V, Ty);
+ break;
+ case 'e':
+ if (Name == "exp" && TLI->has(LibFunc::exp))
+ return ConstantFoldFP(exp, V, Ty);
+
+ if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
+ // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+ // C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+ }
+ break;
+ case 'f':
+ if (Name == "fabs" && TLI->has(LibFunc::fabs))
+ return ConstantFoldFP(fabs, V, Ty);
+ else if (Name == "floor" && TLI->has(LibFunc::floor))
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
+ return ConstantFoldFP(log, V, Ty);
+ else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
+ return ConstantFoldFP(log10, V, Ty);
+ else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+ (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
+ if (V >= -0.0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else // Undefined
+ return Constant::getNullValue(Ty);
+ }
+ break;
+ case 's':
+ if (Name == "sin" && TLI->has(LibFunc::sin))
+ return ConstantFoldFP(sin, V, Ty);
+ else if (Name == "sinh" && TLI->has(LibFunc::sinh))
+ return ConstantFoldFP(sinh, V, Ty);
+ else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sinf" && TLI->has(LibFunc::sinf))
+ return ConstantFoldFP(sin, V, Ty);
+ break;
+ case 't':
+ if (Name == "tan" && TLI->has(LibFunc::tan))
+ return ConstantFoldFP(tan, V, Ty);
+ else if (Name == "tanh" && TLI->has(LibFunc::tanh))
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+ case Intrinsic::ctpop:
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ case Intrinsic::convert_from_fp16: {
+ APFloat Val(APFloat::IEEEhalf, Op->getValue());
+
+ bool lost = false;
+ APFloat::opStatus status =
+ Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+ // Conversion is always precise.
+ (void)status;
+ assert(status == APFloat::opOK && !lost &&
+ "Precision lost during fp16 constfolding");
+
+ return ConstantFP::get(F->getContext(), Val);
+ }
+ default:
+ return 0;
+ }
+ }
+
+ // Support ConstantVector in case we have an Undef in the top.
+ if (isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) {
+ Constant *Op = cast<Constant>(Operands[0]);
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty);
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty);
+ }
+ }
+
+ if (isa<UndefValue>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::bswap)
+ return Operands[0];
+ return 0;
+ }
+
+ return 0;
+ }
+
+ if (Operands.size() == 2) {
+ if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+ double Op1V;
+ if (Ty->isFloatTy())
+ Op1V = Op1->getValueAPF().convertToFloat();
+ else if (Ty->isDoubleTy())
+ Op1V = Op1->getValueAPF().convertToDouble();
+ else {
+ bool unused;
+ APFloat APF = Op1->getValueAPF();
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+ Op1V = APF.convertToDouble();
+ }
+
+ if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ if (Op2->getType() != Op1->getType())
+ return 0;
+
+ double Op2V;
+ if (Ty->isFloatTy())
+ Op2V = Op2->getValueAPF().convertToFloat();
+ else if (Ty->isDoubleTy())
+ Op2V = Op2->getValueAPF().convertToDouble();
+ else {
+ bool unused;
+ APFloat APF = Op2->getValueAPF();
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+ Op2V = APF.convertToDouble();
+ }
+
+ if (F->getIntrinsicID() == Intrinsic::pow) {
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ }
+ if (!TLI)
+ return 0;
+ if (Name == "pow" && TLI->has(LibFunc::pow))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if (Name == "fmod" && TLI->has(LibFunc::fmod))
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ if (Name == "atan2" && TLI->has(LibFunc::atan2))
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
+ }
+ return 0;
+ }
+
+ if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ APInt Res;
+ bool Overflow;
+ switch (F->getIntrinsicID()) {
+ default: llvm_unreachable("Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::ssub_with_overflow:
+ Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::smul_with_overflow:
+ Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::umul_with_overflow:
+ Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+ break;
+ }
+ Constant *Ops[] = {
+ ConstantInt::get(F->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
+ }
+ case Intrinsic::cttz:
+ if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
+ return UndefValue::get(Ty);
+ return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+ case Intrinsic::ctlz:
+ if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
+ return UndefValue::get(Ty);
+ return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
+ }
+ }
+
+ return 0;
+ }
+ return 0;
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
new file mode 100644
index 000000000000..98a7780ad9a6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -0,0 +1,247 @@
+//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the cost model analysis. It provides a very basic cost
+// estimation for LLVM-IR. This analysis uses the services of the codegen
+// to approximate the cost of any IR instruction when lowered to machine
+// instructions. The cost results are unit-less and the cost number represents
+// the throughput of the machine assuming that all loads hit the cache, all
+// branches are predicted, etc. The cost numbers can be added in order to
+// compare two or more transformation alternatives.
+//
+//===----------------------------------------------------------------------===//
+
+#define CM_NAME "cost-model"
+#define DEBUG_TYPE CM_NAME
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class CostModelAnalysis : public FunctionPass {
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
+ initializeCostModelAnalysisPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ /// Returns the expected cost of the instruction.
+ /// Returns -1 if the cost is unknown.
+ /// Note, this method does not cache the cost calculation and it
+ /// can be expensive in some cases.
+ unsigned getInstructionCost(const Instruction *I) const;
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+ virtual void print(raw_ostream &OS, const Module*) const;
+
+ /// The function that we analyze.
+ Function *F;
+ /// Target information.
+ const TargetTransformInfo *TTI;
+ };
+} // End of anonymous namespace
+
+// Register this pass.
+char CostModelAnalysis::ID = 0;
+static const char cm_name[] = "Cost Model Analysis";
+INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
+INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true)
+
+FunctionPass *llvm::createCostModelAnalysisPass() {
+ return new CostModelAnalysis();
+}
+
+void
+CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+bool
+CostModelAnalysis::runOnFunction(Function &F) {
+ this->F = &F;
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+
+ return false;
+}
+
+static bool isReverseVectorMask(SmallVector<int, 16> &Mask) {
+ for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
+ if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+ return false;
+ return true;
+}
+
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+ TargetTransformInfo::OperandValueKind OpInfo =
+ TargetTransformInfo::OK_AnyValue;
+
+ // Check for a splat of a constant.
+ ConstantDataVector *CDV = 0;
+ if ((CDV = dyn_cast<ConstantDataVector>(V)))
+ if (CDV->getSplatValue() != NULL)
+ OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+ ConstantVector *CV = 0;
+ if ((CV = dyn_cast<ConstantVector>(V)))
+ if (CV->getSplatValue() != NULL)
+ OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+
+ return OpInfo;
+}
+
+unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
+ if (!TTI)
+ return -1;
+
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:{
+ Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
+ return TTI->getAddressComputationCost(ValTy);
+ }
+
+ case Instruction::Ret:
+ case Instruction::PHI:
+ case Instruction::Br: {
+ return TTI->getCFInstrCost(I->getOpcode());
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ getOperandInfo(I->getOperand(0));
+ TargetTransformInfo::OperandValueKind Op2VK =
+ getOperandInfo(I->getOperand(1));
+ return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+ Op2VK);
+ }
+ case Instruction::Select: {
+ const SelectInst *SI = cast<SelectInst>(I);
+ Type *CondTy = SI->getCondition()->getType();
+ return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *ValTy = I->getOperand(0)->getType();
+ return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+ }
+ case Instruction::Store: {
+ const StoreInst *SI = cast<StoreInst>(I);
+ Type *ValTy = SI->getValueOperand()->getType();
+ return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
+ SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ }
+ case Instruction::Load: {
+ const LoadInst *LI = cast<LoadInst>(I);
+ return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = I->getOperand(0)->getType();
+ return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return TTI->getVectorInstrCost(I->getOpcode(),
+ EEI->getOperand(0)->getType(), Idx);
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst * IE = cast<InsertElementInst>(I);
+ ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return TTI->getVectorInstrCost(I->getOpcode(),
+ IE->getType(), Idx);
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
+ unsigned NumVecElems = VecTypOp0->getVectorNumElements();
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+
+ if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
+ 0);
+ return -1;
+ }
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ SmallVector<Type*, 4> Tys;
+ for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
+ Tys.push_back(II->getArgOperand(J)->getType());
+
+ return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+ Tys);
+ }
+ return -1;
+ default:
+ // We don't have any information on this instruction.
+ return -1;
+ }
+}
+
+void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
+ if (!F)
+ return;
+
+ for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
+ for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
+ Instruction *Inst = it;
+ unsigned Cost = getInstructionCost(Inst);
+ if (Cost != (unsigned)-1)
+ OS << "Cost Model: Found an estimated cost of " << Cost;
+ else
+ OS << "Cost Model: Unknown cost";
+
+ OS << " for instruction: "<< *Inst << "\n";
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
new file mode 100644
index 000000000000..cbc71bd6e739
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -0,0 +1,3838 @@
+//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DependenceAnalysis is an LLVM pass that analyses dependences between memory
+// accesses. Currently, it is an (incomplete) implementation of the approach
+// described in
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+//
+// There's a single entry point that analyzes the dependence between a pair
+// of memory references in a function, returning either NULL, for no dependence,
+// or a more-or-less detailed description of the dependence between them.
+//
+// Currently, the implementation cannot propagate constraints between
+// coupled RDIV subscripts and lacks a multi-subscript MIV test.
+// Both of these are conservative weaknesses;
+// that is, not a source of correctness problems.
+//
+// The implementation depends on the GEP instruction to
+// differentiate subscripts. Since Clang linearizes subscripts
+// for most arrays, we give up some precision (though the existing MIV tests
+// will help). We trust that the GEP instruction will eventually be extended.
+// In the meantime, we should explore Maslov's ideas about delinearization.
+//
+// We should pay some careful attention to the possibility of integer overflow
+// in the implementation of the various tests. This could happen with Add,
+// Subtract, or Multiply, with both APInt's and SCEV's.
+//
+// Some non-linear subscript pairs can be handled by the GCD test
+// (and perhaps other tests).
+// Should explore how often these things occur.
+//
+// Finally, it seems like certain test cases expose weaknesses in the SCEV
+// simplification, especially in the handling of sign and zero extensions.
+// It could be useful to spend time exploring these.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+//===----------------------------------------------------------------------===//
+// //
+// In memory of Ken Kennedy, 1945 - 2007 //
+// //
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "da"
+
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// statistics
+
+STATISTIC(TotalArrayPairs, "Array pairs tested");
+STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
+STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
+STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
+STATISTIC(ZIVapplications, "ZIV applications");
+STATISTIC(ZIVindependence, "ZIV independence");
+STATISTIC(StrongSIVapplications, "Strong SIV applications");
+STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
+STATISTIC(StrongSIVindependence, "Strong SIV independence");
+STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
+STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
+STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
+STATISTIC(ExactSIVapplications, "Exact SIV applications");
+STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
+STATISTIC(ExactSIVindependence, "Exact SIV independence");
+STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
+STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
+STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
+STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
+STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
+STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
+STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
+STATISTIC(DeltaApplications, "Delta applications");
+STATISTIC(DeltaSuccesses, "Delta successes");
+STATISTIC(DeltaIndependence, "Delta independence");
+STATISTIC(DeltaPropagations, "Delta propagations");
+STATISTIC(GCDapplications, "GCD applications");
+STATISTIC(GCDsuccesses, "GCD successes");
+STATISTIC(GCDindependence, "GCD independence");
+STATISTIC(BanerjeeApplications, "Banerjee applications");
+STATISTIC(BanerjeeIndependence, "Banerjee independence");
+STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+
+//===----------------------------------------------------------------------===//
+// basics
+
+INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
+ "Dependence Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DependenceAnalysis, "da",
+ "Dependence Analysis", true, true)
+
+char DependenceAnalysis::ID = 0;
+
+
+FunctionPass *llvm::createDependenceAnalysisPass() {
+ return new DependenceAnalysis();
+}
+
+
+bool DependenceAnalysis::runOnFunction(Function &F) {
+ this->F = &F;
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfo>();
+ return false;
+}
+
+
+void DependenceAnalysis::releaseMemory() {
+}
+
+
+void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<ScalarEvolution>();
+ AU.addRequiredTransitive<LoopInfo>();
+}
+
+
+// Used to test the dependence analyzer.
+// Looks through the function, noting loads and stores.
+// Calls depends() on every possible pair and prints out the result.
+// Ignores all other instructions.
+static
+void dumpExampleDependence(raw_ostream &OS, Function *F,
+ DependenceAnalysis *DA) {
+ for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
+ SrcI != SrcE; ++SrcI) {
+ if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
+ for (inst_iterator DstI = SrcI, DstE = inst_end(F);
+ DstI != DstE; ++DstI) {
+ if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
+ OS << "da analyze - ";
+ if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
+ D->dump(OS);
+ for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
+ if (D->isSplitable(Level)) {
+ OS << "da analyze - split level = " << Level;
+ OS << ", iteration = " << *DA->getSplitIteration(D, Level);
+ OS << "!\n";
+ }
+ }
+ delete D;
+ }
+ else
+ OS << "none!\n";
+ }
+ }
+ }
+ }
+}
+
+
+void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+ dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence methods
+
+// Returns true if this is an input dependence.
+bool Dependence::isInput() const {
+ return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an output dependence.
+bool Dependence::isOutput() const {
+ return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if this is an flow (aka true) dependence.
+bool Dependence::isFlow() const {
+ return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an anti dependence.
+bool Dependence::isAnti() const {
+ return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+// Leave this out of line, so it will serve as a virtual method anchor
+bool Dependence::isScalar(unsigned level) const {
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FullDependence methods
+
+FullDependence::FullDependence(Instruction *Source,
+ Instruction *Destination,
+ bool PossiblyLoopIndependent,
+ unsigned CommonLevels) :
+ Dependence(Source, Destination),
+ Levels(CommonLevels),
+ LoopIndependent(PossiblyLoopIndependent) {
+ Consistent = true;
+ DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
+}
+
+// The rest are simple getters that hide the implementation.
+
+// getDirection - Returns the direction associated with a particular level.
+unsigned FullDependence::getDirection(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Direction;
+}
+
+
+// Returns the distance (or NULL) associated with a particular level.
+const SCEV *FullDependence::getDistance(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Distance;
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Scalar;
+}
+
+
+// Returns true if peeling the first iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].PeelFirst;
+}
+
+
+// Returns true if peeling the last iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].PeelLast;
+}
+
+
+// Returns true if splitting this loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Splitable;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis::Constraint methods
+
+// If constraint is a point <X, Y>, returns X.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getX() const {
+ assert(Kind == Point && "Kind should be Point");
+ return A;
+}
+
+
+// If constraint is a point <X, Y>, returns Y.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getY() const {
+ assert(Kind == Point && "Kind should be Point");
+ return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns A.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getA() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return A;
+}
+
+
+// If constraint is a line AX + BY = C, returns B.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getB() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns C.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getC() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return C;
+}
+
+
+// If constraint is a distance, returns D.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getD() const {
+ assert(Kind == Distance && "Kind should be Distance");
+ return SE->getNegativeSCEV(C);
+}
+
+
+// Returns the loop associated with this constraint.
+const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
+ assert((Kind == Distance || Kind == Line || Kind == Point) &&
+ "Kind should be Distance, Line, or Point");
+ return AssociatedLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
+ const SCEV *Y,
+ const Loop *CurLoop) {
+ Kind = Point;
+ A = X;
+ B = Y;
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
+ const SCEV *BB,
+ const SCEV *CC,
+ const Loop *CurLoop) {
+ Kind = Line;
+ A = AA;
+ B = BB;
+ C = CC;
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
+ const Loop *CurLoop) {
+ Kind = Distance;
+ A = SE->getConstant(D->getType(), 1);
+ B = SE->getNegativeSCEV(A);
+ C = SE->getNegativeSCEV(D);
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setEmpty() {
+ Kind = Empty;
+}
+
+
+void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
+ SE = NewSE;
+ Kind = Any;
+}
+
+
+// For debugging purposes. Dumps the constraint out to OS.
+void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
+ if (isEmpty())
+ OS << " Empty\n";
+ else if (isAny())
+ OS << " Any\n";
+ else if (isPoint())
+ OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
+ else if (isDistance())
+ OS << " Distance is " << *getD() <<
+ " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
+ else if (isLine())
+ OS << " Line is " << *getA() << "*X + " <<
+ *getB() << "*Y = " << *getC() << "\n";
+ else
+ llvm_unreachable("unknown constraint type in Constraint::dump");
+}
+
+
+// Updates X with the intersection
+// of the Constraints X and Y. Returns true if X has changed.
+// Corresponds to Figure 4 from the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+bool DependenceAnalysis::intersectConstraints(Constraint *X,
+ const Constraint *Y) {
+ ++DeltaApplications;
+ DEBUG(dbgs() << "\tintersect constraints\n");
+ DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
+ DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
+ assert(!Y->isPoint() && "Y must not be a Point");
+ if (X->isAny()) {
+ if (Y->isAny())
+ return false;
+ *X = *Y;
+ return true;
+ }
+ if (X->isEmpty())
+ return false;
+ if (Y->isEmpty()) {
+ X->setEmpty();
+ return true;
+ }
+
+ if (X->isDistance() && Y->isDistance()) {
+ DEBUG(dbgs() << "\t intersect 2 distances\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ // Hmmm, interesting situation.
+ // I guess if either is constant, keep it and ignore the other.
+ if (isa<SCEVConstant>(Y->getD())) {
+ *X = *Y;
+ return true;
+ }
+ return false;
+ }
+
+ // At this point, the pseudo-code in Figure 4 of the paper
+ // checks if (X->isPoint() && Y->isPoint()).
+ // This case can't occur in our implementation,
+ // since a Point can only arise as the result of intersecting
+ // two Line constraints, and the right-hand value, Y, is never
+ // the result of an intersection.
+ assert(!(X->isPoint() && Y->isPoint()) &&
+ "We shouldn't ever see X->isPoint() && Y->isPoint()");
+
+ if (X->isLine() && Y->isLine()) {
+ DEBUG(dbgs() << "\t intersect 2 lines\n");
+ const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
+ const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
+ // slopes are equal, so lines are parallel
+ DEBUG(dbgs() << "\t\tsame slope\n");
+ Prod1 = SE->getMulExpr(X->getC(), Y->getB());
+ Prod2 = SE->getMulExpr(X->getB(), Y->getC());
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+ // slopes differ, so lines intersect
+ DEBUG(dbgs() << "\t\tdifferent slopes\n");
+ const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
+ const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
+ const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
+ const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
+ const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
+ const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
+ const SCEVConstant *C1A2_C2A1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
+ const SCEVConstant *C1B2_C2B1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
+ const SCEVConstant *A1B2_A2B1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
+ const SCEVConstant *A2B1_A1B2 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
+ if (!C1B2_C2B1 || !C1A2_C2A1 ||
+ !A1B2_A2B1 || !A2B1_A1B2)
+ return false;
+ APInt Xtop = C1B2_C2B1->getValue()->getValue();
+ APInt Xbot = A1B2_A2B1->getValue()->getValue();
+ APInt Ytop = C1A2_C2A1->getValue()->getValue();
+ APInt Ybot = A2B1_A1B2->getValue()->getValue();
+ DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
+ DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
+ DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
+ DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
+ APInt Xq = Xtop; // these need to be initialized, even
+ APInt Xr = Xtop; // though they're just going to be overwritten
+ APInt::sdivrem(Xtop, Xbot, Xq, Xr);
+ APInt Yq = Ytop;
+ APInt Yr = Ytop;;
+ APInt::sdivrem(Ytop, Ybot, Yq, Yr);
+ if (Xr != 0 || Yr != 0) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
+ if (Xq.slt(0) || Yq.slt(0)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ if (const SCEVConstant *CUB =
+ collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+ APInt UpperBound = CUB->getValue()->getValue();
+ DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
+ if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ }
+ X->setPoint(SE->getConstant(Xq),
+ SE->getConstant(Yq),
+ X->getAssociatedLoop());
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+
+ // if (X->isLine() && Y->isPoint()) This case can't occur.
+ assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
+
+ if (X->isPoint() && Y->isLine()) {
+ DEBUG(dbgs() << "\t intersect Point and Line\n");
+ const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
+ const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
+ const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+
+ llvm_unreachable("shouldn't reach the end of Constraint intersection");
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis methods
+
+// For debugging purposes. Dumps a dependence to OS.
+void Dependence::dump(raw_ostream &OS) const {
+ bool Splitable = false;
+ if (isConfused())
+ OS << "confused";
+ else {
+ if (isConsistent())
+ OS << "consistent ";
+ if (isFlow())
+ OS << "flow";
+ else if (isOutput())
+ OS << "output";
+ else if (isAnti())
+ OS << "anti";
+ else if (isInput())
+ OS << "input";
+ unsigned Levels = getLevels();
+ OS << " [";
+ for (unsigned II = 1; II <= Levels; ++II) {
+ if (isSplitable(II))
+ Splitable = true;
+ if (isPeelFirst(II))
+ OS << 'p';
+ const SCEV *Distance = getDistance(II);
+ if (Distance)
+ OS << *Distance;
+ else if (isScalar(II))
+ OS << "S";
+ else {
+ unsigned Direction = getDirection(II);
+ if (Direction == DVEntry::ALL)
+ OS << "*";
+ else {
+ if (Direction & DVEntry::LT)
+ OS << "<";
+ if (Direction & DVEntry::EQ)
+ OS << "=";
+ if (Direction & DVEntry::GT)
+ OS << ">";
+ }
+ }
+ if (isPeelLast(II))
+ OS << 'p';
+ if (II < Levels)
+ OS << " ";
+ }
+ if (isLoopIndependent())
+ OS << "|<";
+ OS << "]";
+ if (Splitable)
+ OS << " splitable";
+ }
+ OS << "!\n";
+}
+
+
+
+static
+AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+ const Value *A,
+ const Value *B) {
+ const Value *AObj = GetUnderlyingObject(A);
+ const Value *BObj = GetUnderlyingObject(B);
+ return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
+ BObj, AA->getTypeStoreSize(BObj->getType()));
+}
+
+
+// Returns true if the load or store can be analyzed. Atomic and volatile
+// operations have properties which this analysis does not understand.
+static
+bool isLoadOrStore(const Instruction *I) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isUnordered();
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isUnordered();
+ return false;
+}
+
+
+static
+Value *getPointerOperand(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ llvm_unreachable("Value is not load or store instruction");
+ return 0;
+}
+
+
+// Examines the loop nesting of the Src and Dst
+// instructions and establishes their shared loops. Sets the variables
+// CommonLevels, SrcLevels, and MaxLevels.
+// The source and destination instructions needn't be contained in the same
+// loop. The routine establishNestingLevels finds the level of most deeply
+// nested loop that contains them both, CommonLevels. An instruction that's
+// not contained in a loop is at level = 0. MaxLevels is equal to the level
+// of the source plus the level of the destination, minus CommonLevels.
+// This lets us allocate vectors MaxLevels in length, with room for every
+// distinct loop referenced in both the source and destination subscripts.
+// The variable SrcLevels is the nesting depth of the source instruction.
+// It's used to help calculate distinct loops referenced by the destination.
+// Here's the map from loops to levels:
+// 0 - unused
+// 1 - outermost common loop
+// ... - other common loops
+// CommonLevels - innermost common loop
+// ... - loops containing Src but not Dst
+// SrcLevels - innermost loop containing Src but not Dst
+// ... - loops containing Dst but not Src
+// MaxLevels - innermost loops containing Dst but not Src
+// Consider the follow code fragment:
+// for (a = ...) {
+// for (b = ...) {
+// for (c = ...) {
+// for (d = ...) {
+// A[] = ...;
+// }
+// }
+// for (e = ...) {
+// for (f = ...) {
+// for (g = ...) {
+// ... = A[];
+// }
+// }
+// }
+// }
+// }
+// If we're looking at the possibility of a dependence between the store
+// to A (the Src) and the load from A (the Dst), we'll note that they
+// have 2 loops in common, so CommonLevels will equal 2 and the direction
+// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
+// A map from loop names to loop numbers would look like
+// a - 1
+// b - 2 = CommonLevels
+// c - 3
+// d - 4 = SrcLevels
+// e - 5
+// f - 6
+// g - 7 = MaxLevels
+void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
+ const Instruction *Dst) {
+ const BasicBlock *SrcBlock = Src->getParent();
+ const BasicBlock *DstBlock = Dst->getParent();
+ unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
+ unsigned DstLevel = LI->getLoopDepth(DstBlock);
+ const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
+ const Loop *DstLoop = LI->getLoopFor(DstBlock);
+ SrcLevels = SrcLevel;
+ MaxLevels = SrcLevel + DstLevel;
+ while (SrcLevel > DstLevel) {
+ SrcLoop = SrcLoop->getParentLoop();
+ SrcLevel--;
+ }
+ while (DstLevel > SrcLevel) {
+ DstLoop = DstLoop->getParentLoop();
+ DstLevel--;
+ }
+ while (SrcLoop != DstLoop) {
+ SrcLoop = SrcLoop->getParentLoop();
+ DstLoop = DstLoop->getParentLoop();
+ SrcLevel--;
+ }
+ CommonLevels = SrcLevel;
+ MaxLevels -= CommonLevels;
+}
+
+
+// Given one of the loops containing the source, return
+// its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
+ return SrcLoop->getLoopDepth();
+}
+
+
+// Given one of the loops containing the destination,
+// return its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
+ unsigned D = DstLoop->getLoopDepth();
+ if (D > CommonLevels)
+ return D - CommonLevels + SrcLevels;
+ else
+ return D;
+}
+
+
+// Returns true if Expression is loop invariant in LoopNest.
+bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
+ const Loop *LoopNest) const {
+ if (!LoopNest)
+ return true;
+ return SE->isLoopInvariant(Expression, LoopNest) &&
+ isLoopInvariant(Expression, LoopNest->getParentLoop());
+}
+
+
+
+// Finds the set of loops from the LoopNest that
+// have a level <= CommonLevels and are referred to by the SCEV Expression.
+void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) const {
+ while (LoopNest) {
+ unsigned Level = LoopNest->getLoopDepth();
+ if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
+ Loops.set(Level);
+ LoopNest = LoopNest->getParentLoop();
+ }
+}
+
+
+// removeMatchingExtensions - Examines a subscript pair.
+// If the source and destination are identically sign (or zero)
+// extended, it strips off the extension in an effect to simplify
+// the actual analysis.
+void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
+ const SCEV *Src = Pair->Src;
+ const SCEV *Dst = Pair->Dst;
+ if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
+ (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
+ const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
+ const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
+ if (SrcCast->getType() == DstCast->getType()) {
+ Pair->Src = SrcCast->getOperand();
+ Pair->Dst = DstCast->getOperand();
+ }
+ }
+}
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ if (!AddRec)
+ return isLoopInvariant(Src, LoopNest);
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ if (!isLoopInvariant(Step, LoopNest))
+ return false;
+ Loops.set(mapSrcLoop(AddRec->getLoop()));
+ return checkSrcSubscript(Start, LoopNest, Loops);
+}
+
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (!AddRec)
+ return isLoopInvariant(Dst, LoopNest);
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ if (!isLoopInvariant(Step, LoopNest))
+ return false;
+ Loops.set(mapDstLoop(AddRec->getLoop()));
+ return checkDstSubscript(Start, LoopNest, Loops);
+}
+
+
+// Examines the subscript pair (the Src and Dst SCEVs)
+// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
+// Collects the associated loops in a set.
+DependenceAnalysis::Subscript::ClassificationKind
+DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
+ const SCEV *Dst, const Loop *DstLoopNest,
+ SmallBitVector &Loops) {
+ SmallBitVector SrcLoops(MaxLevels + 1);
+ SmallBitVector DstLoops(MaxLevels + 1);
+ if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
+ return Subscript::NonLinear;
+ if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
+ return Subscript::NonLinear;
+ Loops = SrcLoops;
+ Loops |= DstLoops;
+ unsigned N = Loops.count();
+ if (N == 0)
+ return Subscript::ZIV;
+ if (N == 1)
+ return Subscript::SIV;
+ if (N == 2 && (SrcLoops.count() == 0 ||
+ DstLoops.count() == 0 ||
+ (SrcLoops.count() == 1 && DstLoops.count() == 1)))
+ return Subscript::RDIV;
+ return Subscript::MIV;
+}
+
+
+// A wrapper around SCEV::isKnownPredicate.
+// Looks for cases where we're interested in comparing for equality.
+// If both X and Y have been identically sign or zero extended,
+// it strips off the (confusing) extensions before invoking
+// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
+// will be similarly updated.
+//
+// If SCEV::isKnownPredicate can't prove the predicate,
+// we try simple subtraction, which seems to help in some cases
+// involving symbolics.
+bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
+ const SCEV *X,
+ const SCEV *Y) const {
+ if (Pred == CmpInst::ICMP_EQ ||
+ Pred == CmpInst::ICMP_NE) {
+ if ((isa<SCEVSignExtendExpr>(X) &&
+ isa<SCEVSignExtendExpr>(Y)) ||
+ (isa<SCEVZeroExtendExpr>(X) &&
+ isa<SCEVZeroExtendExpr>(Y))) {
+ const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
+ const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
+ const SCEV *Xop = CX->getOperand();
+ const SCEV *Yop = CY->getOperand();
+ if (Xop->getType() == Yop->getType()) {
+ X = Xop;
+ Y = Yop;
+ }
+ }
+ }
+ if (SE->isKnownPredicate(Pred, X, Y))
+ return true;
+ // If SE->isKnownPredicate can't prove the condition,
+ // we try the brute-force approach of subtracting
+ // and testing the difference.
+ // By testing with SE->isKnownPredicate first, we avoid
+ // the possibility of overflow when the arguments are constants.
+ const SCEV *Delta = SE->getMinusSCEV(X, Y);
+ switch (Pred) {
+ case CmpInst::ICMP_EQ:
+ return Delta->isZero();
+ case CmpInst::ICMP_NE:
+ return SE->isKnownNonZero(Delta);
+ case CmpInst::ICMP_SGE:
+ return SE->isKnownNonNegative(Delta);
+ case CmpInst::ICMP_SLE:
+ return SE->isKnownNonPositive(Delta);
+ case CmpInst::ICMP_SGT:
+ return SE->isKnownPositive(Delta);
+ case CmpInst::ICMP_SLT:
+ return SE->isKnownNegative(Delta);
+ default:
+ llvm_unreachable("unexpected predicate in isKnownPredicate");
+ }
+}
+
+
+// All subscripts are all the same type.
+// Loop bound may be smaller (e.g., a char).
+// Should zero extend loop bound, since it's always >= 0.
+// This routine collects upper bound and extends if needed.
+// Return null if no bound available.
+const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
+ Type *T) const {
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ const SCEV *UB = SE->getBackedgeTakenCount(L);
+ return SE->getNoopOrZeroExtend(UB, T);
+ }
+ return NULL;
+}
+
+
+// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
+// If the cast fails, returns NULL.
+const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
+ Type *T
+ ) const {
+ if (const SCEV *UB = collectUpperBound(L, T))
+ return dyn_cast<SCEVConstant>(UB);
+ return NULL;
+}
+
+
+// testZIV -
+// When we have a pair of subscripts of the form [c1] and [c2],
+// where c1 and c2 are both loop invariant, we attack it using
+// the ZIV test. Basically, we test by comparing the two values,
+// but there are actually three possible results:
+// 1) the values are equal, so there's a dependence
+// 2) the values are different, so there's no dependence
+// 3) the values might be equal, so we have to assume a dependence.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testZIV(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ ++ZIVapplications;
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
+ DEBUG(dbgs() << " provably dependent\n");
+ return false; // provably dependent
+ }
+ if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
+ DEBUG(dbgs() << " provably independent\n");
+ ++ZIVindependence;
+ return true; // provably independent
+ }
+ DEBUG(dbgs() << " possibly dependent\n");
+ Result.Consistent = false;
+ return false; // possibly dependent
+}
+
+
+// strongSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.1
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the Strong SIV test.
+//
+// Can prove independence. Failing that, can compute distance (and direction).
+// In the presence of symbolic terms, we can sometimes make progress.
+//
+// If there's a dependence,
+//
+// c1 + a*i = c2 + a*i'
+//
+// The dependence distance is
+//
+// d = i' - i = (c1 - c2)/a
+//
+// A dependence only exists if d is an integer and abs(d) <= U, where U is the
+// loop's upper bound. If a dependence exists, the dependence direction is
+// defined as
+//
+// { < if d > 0
+// direction = { = if d = 0
+// { > if d < 0
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ DEBUG(dbgs() << "\tStrong SIV test\n");
+ DEBUG(dbgs() << "\t Coeff = " << *Coeff);
+ DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
+ DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst);
+ DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
+ ++StrongSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "level out of range");
+ Level--;
+
+ const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta);
+ DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
+
+ // check that |Delta| < iteration count
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
+ DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
+ const SCEV *AbsDelta =
+ SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
+ const SCEV *AbsCoeff =
+ SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
+ const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
+ // Distance greater than trip count - no dependence
+ ++StrongSIVindependence;
+ ++StrongSIVsuccesses;
+ return true;
+ }
+ }
+
+ // Can we compute distance?
+ if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
+ APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
+ APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+ APInt Distance = ConstDelta; // these need to be initialized
+ APInt Remainder = ConstDelta;
+ APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
+ DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ // Make sure Coeff divides Delta exactly
+ if (Remainder != 0) {
+ // Coeff doesn't divide Distance, no dependence
+ ++StrongSIVindependence;
+ ++StrongSIVsuccesses;
+ return true;
+ }
+ Result.DV[Level].Distance = SE->getConstant(Distance);
+ NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+ if (Distance.sgt(0))
+ Result.DV[Level].Direction &= Dependence::DVEntry::LT;
+ else if (Distance.slt(0))
+ Result.DV[Level].Direction &= Dependence::DVEntry::GT;
+ else
+ Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+ ++StrongSIVsuccesses;
+ }
+ else if (Delta->isZero()) {
+ // since 0/X == 0
+ Result.DV[Level].Distance = Delta;
+ NewConstraint.setDistance(Delta, CurLoop);
+ Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+ ++StrongSIVsuccesses;
+ }
+ else {
+ if (Coeff->isOne()) {
+ DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
+ Result.DV[Level].Distance = Delta; // since X/1 == X
+ NewConstraint.setDistance(Delta, CurLoop);
+ }
+ else {
+ Result.Consistent = false;
+ NewConstraint.setLine(Coeff,
+ SE->getNegativeSCEV(Coeff),
+ SE->getNegativeSCEV(Delta), CurLoop);
+ }
+
+ // maybe we can get a useful direction
+ bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);
+ bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
+ bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
+ bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
+ bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
+ // The double negatives above are confusing.
+ // It helps to read !SE->isKnownNonZero(Delta)
+ // as "Delta might be Zero"
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if ((DeltaMaybePositive && CoeffMaybePositive) ||
+ (DeltaMaybeNegative && CoeffMaybeNegative))
+ NewDirection = Dependence::DVEntry::LT;
+ if (DeltaMaybeZero)
+ NewDirection |= Dependence::DVEntry::EQ;
+ if ((DeltaMaybeNegative && CoeffMaybePositive) ||
+ (DeltaMaybePositive && CoeffMaybeNegative))
+ NewDirection |= Dependence::DVEntry::GT;
+ if (NewDirection < Result.DV[Level].Direction)
+ ++StrongSIVsuccesses;
+ Result.DV[Level].Direction &= NewDirection;
+ }
+ return false;
+}
+
+
+// weakCrossingSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Crossing SIV test.
+//
+// Given c1 + a*i = c2 - a*i', we can look for the intersection of
+// the two lines, where i = i', yielding
+//
+// c1 + a*i = c2 - a*i
+// 2a*i = c2 - c1
+// i = (c2 - c1)/2a
+//
+// If i < 0, there is no dependence.
+// If i > upperbound, there is no dependence.
+// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
+// If i = upperbound, there's a dependence with distance = 0.
+// If i is integral, there's a dependence (all directions).
+// If the non-integer part = 1/2, there's a dependence (<> directions).
+// Otherwise, there's no dependence.
+//
+// Can prove independence. Failing that,
+// can sometimes refine the directions.
+// Can determine iteration for splitting.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
+ DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
+ DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakCrossingSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+ if (Delta->isZero()) {
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+ ++WeakCrossingSIVsuccesses;
+ if (!Result.DV[Level].Direction) {
+ ++WeakCrossingSIVindependence;
+ return true;
+ }
+ Result.DV[Level].Distance = Delta; // = 0
+ return false;
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
+ if (!ConstCoeff)
+ return false;
+
+ Result.DV[Level].Splitable = true;
+ if (SE->isKnownNegative(ConstCoeff)) {
+ ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
+ assert(ConstCoeff &&
+ "dynamic cast of negative of ConstCoeff should yield constant");
+ Delta = SE->getNegativeSCEV(Delta);
+ }
+ assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
+
+ // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
+ SplitIter =
+ SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
+ Delta),
+ SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
+ ConstCoeff));
+ DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
+
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ if (!ConstDelta)
+ return false;
+
+ // We're certain that ConstCoeff > 0; therefore,
+ // if Delta < 0, then no dependence.
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
+ if (SE->isKnownNegative(Delta)) {
+ // No dependence, Delta < 0
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+
+ // We're certain that Delta > 0 and ConstCoeff > 0.
+ // Check Delta/(2*ConstCoeff) against upper loop bound
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
+ const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
+ ConstantTwo);
+ DEBUG(dbgs() << "\t ML = " << *ML << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
+ // Delta too big, no dependence
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
+ // i = i' = UB
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+ ++WeakCrossingSIVsuccesses;
+ if (!Result.DV[Level].Direction) {
+ ++WeakCrossingSIVindependence;
+ return true;
+ }
+ Result.DV[Level].Splitable = false;
+ Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+ return false;
+ }
+ }
+
+ // check that Coeff divides Delta
+ APInt APDelta = ConstDelta->getValue()->getValue();
+ APInt APCoeff = ConstCoeff->getValue()->getValue();
+ APInt Distance = APDelta; // these need to be initialzed
+ APInt Remainder = APDelta;
+ APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ // Coeff doesn't divide Delta, no dependence
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+ DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+
+ // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
+ APInt Two = APInt(Distance.getBitWidth(), 2, true);
+ Remainder = Distance.srem(Two);
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ // Equal direction isn't possible
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
+ ++WeakCrossingSIVsuccesses;
+ }
+ return false;
+}
+
+
+// Kirch's algorithm, from
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+// MIT Press, 1989
+//
+// Program 2.1, page 29.
+// Computes the GCD of AM and BM.
+// Also finds a solution to the equation ax - by = gdc(a, b).
+// Returns true iff the gcd divides Delta.
+static
+bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
+ APInt &G, APInt &X, APInt &Y) {
+ APInt A0(Bits, 1, true), A1(Bits, 0, true);
+ APInt B0(Bits, 0, true), B1(Bits, 1, true);
+ APInt G0 = AM.abs();
+ APInt G1 = BM.abs();
+ APInt Q = G0; // these need to be initialized
+ APInt R = G0;
+ APInt::sdivrem(G0, G1, Q, R);
+ while (R != 0) {
+ APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
+ APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
+ G0 = G1; G1 = R;
+ APInt::sdivrem(G0, G1, Q, R);
+ }
+ G = G1;
+ DEBUG(dbgs() << "\t GCD = " << G << "\n");
+ X = AM.slt(0) ? -A1 : A1;
+ Y = BM.slt(0) ? B1 : -B1;
+
+ // make sure gcd divides Delta
+ R = Delta.srem(G);
+ if (R != 0)
+ return true; // gcd doesn't divide Delta, no dependence
+ Q = Delta.sdiv(G);
+ X *= Q;
+ Y *= Q;
+ return false;
+}
+
+
+static
+APInt floorOfQuotient(APInt A, APInt B) {
+ APInt Q = A; // these need to be initialized
+ APInt R = A;
+ APInt::sdivrem(A, B, Q, R);
+ if (R == 0)
+ return Q;
+ if ((A.sgt(0) && B.sgt(0)) ||
+ (A.slt(0) && B.slt(0)))
+ return Q;
+ else
+ return Q - 1;
+}
+
+
+static
+APInt ceilingOfQuotient(APInt A, APInt B) {
+ APInt Q = A; // these need to be initialized
+ APInt R = A;
+ APInt::sdivrem(A, B, Q, R);
+ if (R == 0)
+ return Q;
+ if ((A.sgt(0) && B.sgt(0)) ||
+ (A.slt(0) && B.slt(0)))
+ return Q + 1;
+ else
+ return Q;
+}
+
+
+static
+APInt maxAPInt(APInt A, APInt B) {
+ return A.sgt(B) ? A : B;
+}
+
+
+static
+APInt minAPInt(APInt A, APInt B) {
+ return A.slt(B) ? A : B;
+}
+
+
+// exactSIVtest -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
+// where i is an induction variable, c1 and c2 are loop invariant, and a1
+// and a2 are constant, we can solve it exactly using an algorithm developed
+// by Banerjee and Wolfe. See Section 2.5.3 in
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+// MIT Press, 1989
+//
+// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
+// so use them if possible. They're also a bit better with symbolics and,
+// in the case of the strong SIV test, can compute Distances.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
+ const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ DEBUG(dbgs() << "\tExact SIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++ExactSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
+ Delta, CurLoop);
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+ return false;
+
+ // find gcd
+ APInt G, X, Y;
+ APInt AM = ConstSrcCoeff->getValue()->getValue();
+ APInt BM = ConstDstCoeff->getValue()->getValue();
+ unsigned Bits = AM.getBitWidth();
+ if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ // gcd doesn't divide Delta, no dependence
+ ++ExactSIVindependence;
+ ++ExactSIVsuccesses;
+ return true;
+ }
+
+ DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+
+ // since SCEV construction normalizes, LM = 0
+ APInt UM(Bits, 1, true);
+ bool UMvalid = false;
+ // UM is perhaps unavailable, let's check
+ if (const SCEVConstant *CUB =
+ collectConstantUpperBound(CurLoop, Delta->getType())) {
+ UM = CUB->getValue()->getValue();
+ DEBUG(dbgs() << "\t UM = " << UM << "\n");
+ UMvalid = true;
+ }
+
+ APInt TU(APInt::getSignedMaxValue(Bits));
+ APInt TL(APInt::getSignedMinValue(Bits));
+
+ // test(BM/G, LM-X) and test(-BM/G, X-UM)
+ APInt TMUL = BM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (UMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (UMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+
+ // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+ TMUL = AM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (UMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (UMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+ if (TL.sgt(TU)) {
+ ++ExactSIVindependence;
+ ++ExactSIVsuccesses;
+ return true;
+ }
+
+ // explore directions
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+
+ // less than
+ APInt SaveTU(TU); // save these
+ APInt SaveTL(TL);
+ DEBUG(dbgs() << "\t exploring LT direction\n");
+ TMUL = AM - BM;
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::LT;
+ ++ExactSIVsuccesses;
+ }
+
+ // equal
+ TU = SaveTU; // restore
+ TL = SaveTL;
+ DEBUG(dbgs() << "\t exploring EQ direction\n");
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ TMUL = BM - AM;
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::EQ;
+ ++ExactSIVsuccesses;
+ }
+
+ // greater than
+ TU = SaveTU; // restore
+ TL = SaveTL;
+ DEBUG(dbgs() << "\t exploring GT direction\n");
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::GT;
+ ++ExactSIVsuccesses;
+ }
+
+ // finished
+ Result.DV[Level].Direction &= NewDirection;
+ if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
+ ++ExactSIVindependence;
+ return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
+}
+
+
+
+// Return true if the divisor evenly divides the dividend.
+static
+bool isRemainderZero(const SCEVConstant *Dividend,
+ const SCEVConstant *Divisor) {
+ APInt ConstDividend = Dividend->getValue()->getValue();
+ APInt ConstDivisor = Divisor->getValue()->getValue();
+ return ConstDividend.srem(ConstDivisor) == 0;
+}
+
+
+// weakZeroSrcSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+// c1 = c2 + a*i
+//
+// we get
+//
+// (c1 - c2)/a = i
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroDstSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ // For the WeakSIV test, it's possible the loop isn't common to
+ // the Src and Dst loops. If it isn't, then there's no need to
+ // record a direction.
+ DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakZeroSIVapplications;
+ assert(0 < Level && Level <= MaxLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
+ DstCoeff, Delta, CurLoop);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+ Result.DV[Level].PeelFirst = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false; // dependences caused by first iteration
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstCoeff)
+ return false;
+ const SCEV *AbsCoeff =
+ SE->isKnownNegative(ConstCoeff) ?
+ SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+ const SCEV *NewDelta =
+ SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+ // check that Delta/SrcCoeff < iteration count
+ // really check NewDelta < count*AbsCoeff
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+ // dependences caused by last iteration
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+ Result.DV[Level].PeelLast = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false;
+ }
+ }
+
+ // check that Delta/SrcCoeff >= 0
+ // really check that NewDelta >= 0
+ if (SE->isKnownNegative(NewDelta)) {
+ // No dependence, newDelta < 0
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+
+ // if SrcCoeff doesn't divide Delta, then no dependence
+ if (isa<SCEVConstant>(Delta) &&
+ !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ return false;
+}
+
+
+// weakZeroDstSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+// c1 + a*i = c2
+//
+// we get
+//
+// i = (c2 - c1)/a
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroSrcSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ // For the WeakSIV test, it's possible the loop isn't common to the
+ // Src and Dst loops. If it isn't, then there's no need to record a direction.
+ DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakZeroSIVapplications;
+ assert(0 < Level && Level <= SrcLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
+ Delta, CurLoop);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+ Result.DV[Level].PeelFirst = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false; // dependences caused by first iteration
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ if (!ConstCoeff)
+ return false;
+ const SCEV *AbsCoeff =
+ SE->isKnownNegative(ConstCoeff) ?
+ SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+ const SCEV *NewDelta =
+ SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+ // check that Delta/SrcCoeff < iteration count
+ // really check NewDelta < count*AbsCoeff
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+ // dependences caused by last iteration
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+ Result.DV[Level].PeelLast = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false;
+ }
+ }
+
+ // check that Delta/SrcCoeff >= 0
+ // really check that NewDelta >= 0
+ if (SE->isKnownNegative(NewDelta)) {
+ // No dependence, newDelta < 0
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+
+ // if SrcCoeff doesn't divide Delta, then no dependence
+ if (isa<SCEVConstant>(Delta) &&
+ !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ return false;
+}
+
+
+// exactRDIVtest - Tests the RDIV subscript pair for dependence.
+// Things of the form [c1 + a*i] and [c2 + b*j],
+// where i and j are induction variable, c1 and c2 are loop invariant,
+// and a and b are constants.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistent.
+// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
+bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
+ const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *SrcLoop,
+ const Loop *DstLoop,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "\tExact RDIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++ExactRDIVapplications;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+ return false;
+
+ // find gcd
+ APInt G, X, Y;
+ APInt AM = ConstSrcCoeff->getValue()->getValue();
+ APInt BM = ConstDstCoeff->getValue()->getValue();
+ unsigned Bits = AM.getBitWidth();
+ if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ // gcd doesn't divide Delta, no dependence
+ ++ExactRDIVindependence;
+ return true;
+ }
+
+ DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+
+ // since SCEV construction seems to normalize, LM = 0
+ APInt SrcUM(Bits, 1, true);
+ bool SrcUMvalid = false;
+ // SrcUM is perhaps unavailable, let's check
+ if (const SCEVConstant *UpperBound =
+ collectConstantUpperBound(SrcLoop, Delta->getType())) {
+ SrcUM = UpperBound->getValue()->getValue();
+ DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
+ SrcUMvalid = true;
+ }
+
+ APInt DstUM(Bits, 1, true);
+ bool DstUMvalid = false;
+ // UM is perhaps unavailable, let's check
+ if (const SCEVConstant *UpperBound =
+ collectConstantUpperBound(DstLoop, Delta->getType())) {
+ DstUM = UpperBound->getValue()->getValue();
+ DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
+ DstUMvalid = true;
+ }
+
+ APInt TU(APInt::getSignedMaxValue(Bits));
+ APInt TL(APInt::getSignedMinValue(Bits));
+
+ // test(BM/G, LM-X) and test(-BM/G, X-UM)
+ APInt TMUL = BM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (SrcUMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (SrcUMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+
+ // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+ TMUL = AM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (DstUMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (DstUMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+ if (TL.sgt(TU))
+ ++ExactRDIVindependence;
+ return TL.sgt(TU);
+}
+
+
+// symbolicRDIVtest -
+// In Section 4.5 of the Practical Dependence Testing paper,the authors
+// introduce a special case of Banerjee's Inequalities (also called the
+// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
+// particularly cases with symbolics. Since it's only able to disprove
+// dependence (not compute distances or directions), we'll use it as a
+// fall back for the other tests.
+//
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables and c1 and c2 are loop invariants,
+// we can use the symbolic tests to disprove some dependences, serving as a
+// backup for the RDIV test. Note that i and j can be the same variable,
+// letting this test serve as a backup for the various SIV tests.
+//
+// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
+// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
+// loop bounds for the i and j loops, respectively. So, ...
+//
+// c1 + a1*i = c2 + a2*j
+// a1*i - a2*j = c2 - c1
+//
+// To test for a dependence, we compute c2 - c1 and make sure it's in the
+// range of the maximum and minimum possible values of a1*i - a2*j.
+// Considering the signs of a1 and a2, we have 4 possible cases:
+//
+// 1) If a1 >= 0 and a2 >= 0, then
+// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
+// -a2*N2 <= c2 - c1 <= a1*N1
+//
+// 2) If a1 >= 0 and a2 <= 0, then
+// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
+// 0 <= c2 - c1 <= a1*N1 - a2*N2
+//
+// 3) If a1 <= 0 and a2 >= 0, then
+// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
+// a1*N1 - a2*N2 <= c2 - c1 <= 0
+//
+// 4) If a1 <= 0 and a2 <= 0, then
+// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
+// a1*N1 <= c2 - c1 <= -a2*N2
+//
+// return true if dependence disproved
+bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
+ const SCEV *A2,
+ const SCEV *C1,
+ const SCEV *C2,
+ const Loop *Loop1,
+ const Loop *Loop2) const {
+ ++SymbolicRDIVapplications;
+ DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
+ DEBUG(dbgs() << "\t A1 = " << *A1);
+ DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
+ DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
+ DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
+ DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
+ const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
+ const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
+ DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
+ DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
+ const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
+ const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
+ DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
+ DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
+ if (SE->isKnownNonNegative(A1)) {
+ if (SE->isKnownNonNegative(A2)) {
+ // A1 >= 0 && A2 >= 0
+ if (N1) {
+ // make sure that c2 - c1 <= a1*N1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ if (N2) {
+ // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ else if (SE->isKnownNonPositive(A2)) {
+ // a1 >= 0 && a2 <= 0
+ if (N1 && N2) {
+ // make sure that c2 - c1 <= a1*N1 - a2*N2
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+ DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ // make sure that 0 <= c2 - c1
+ if (SE->isKnownNegative(C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ else if (SE->isKnownNonPositive(A1)) {
+ if (SE->isKnownNonNegative(A2)) {
+ // a1 <= 0 && a2 >= 0
+ if (N1 && N2) {
+ // make sure that a1*N1 - a2*N2 <= c2 - c1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+ DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ // make sure that c2 - c1 <= 0
+ if (SE->isKnownPositive(C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ else if (SE->isKnownNonPositive(A2)) {
+ // a1 <= 0 && a2 <= 0
+ if (N1) {
+ // make sure that a1*N1 <= c2 - c1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ if (N2) {
+ // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+
+// testSIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
+// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
+// a2 are constant, we attack it with an SIV test. While they can all be
+// solved with the Exact SIV test, it's worthwhile to use simpler tests when
+// they apply; they're cheaper and sometimes more precise.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testSIV(const SCEV *Src,
+ const SCEV *Dst,
+ unsigned &Level,
+ FullDependence &Result,
+ Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (SrcAddRec && DstAddRec) {
+ const SCEV *SrcConst = SrcAddRec->getStart();
+ const SCEV *DstConst = DstAddRec->getStart();
+ const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ const Loop *CurLoop = SrcAddRec->getLoop();
+ assert(CurLoop == DstAddRec->getLoop() &&
+ "both loops in SIV should be same");
+ Level = mapSrcLoop(CurLoop);
+ bool disproven;
+ if (SrcCoeff == DstCoeff)
+ disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint);
+ else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
+ disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint, SplitIter);
+ else
+ disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint);
+ return disproven ||
+ gcdMIVtest(Src, Dst, Result) ||
+ symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+ }
+ if (SrcAddRec) {
+ const SCEV *SrcConst = SrcAddRec->getStart();
+ const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ const SCEV *DstConst = Dst;
+ const Loop *CurLoop = SrcAddRec->getLoop();
+ Level = mapSrcLoop(CurLoop);
+ return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint) ||
+ gcdMIVtest(Src, Dst, Result);
+ }
+ if (DstAddRec) {
+ const SCEV *DstConst = DstAddRec->getStart();
+ const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ const SCEV *SrcConst = Src;
+ const Loop *CurLoop = DstAddRec->getLoop();
+ Level = mapDstLoop(CurLoop);
+ return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
+ CurLoop, Level, Result, NewConstraint) ||
+ gcdMIVtest(Src, Dst, Result);
+ }
+ llvm_unreachable("SIV test expected at least one AddRec");
+ return false;
+}
+
+
+// testRDIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables, c1 and c2 are loop invariant,
+// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
+// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
+// It doesn't make sense to talk about distance or direction in this case,
+// so there's no point in making special versions of the Strong SIV test or
+// the Weak-crossing SIV test.
+//
+// With minor algebra, this test can also be used for things like
+// [c1 + a1*i + a2*j][c2].
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testRDIV(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ // we have 3 possible situations here:
+ // 1) [a*i + b] and [c*j + d]
+ // 2) [a*i + c*j + b] and [d]
+ // 3) [b] and [a*i + c*j + d]
+ // We need to find what we've got and get organized
+
+ const SCEV *SrcConst, *DstConst;
+ const SCEV *SrcCoeff, *DstCoeff;
+ const Loop *SrcLoop, *DstLoop;
+
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (SrcAddRec && DstAddRec) {
+ SrcConst = SrcAddRec->getStart();
+ SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ SrcLoop = SrcAddRec->getLoop();
+ DstConst = DstAddRec->getStart();
+ DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ DstLoop = DstAddRec->getLoop();
+ }
+ else if (SrcAddRec) {
+ if (const SCEVAddRecExpr *tmpAddRec =
+ dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
+ SrcConst = tmpAddRec->getStart();
+ SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
+ SrcLoop = tmpAddRec->getLoop();
+ DstConst = Dst;
+ DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
+ DstLoop = SrcAddRec->getLoop();
+ }
+ else
+ llvm_unreachable("RDIV reached by surprising SCEVs");
+ }
+ else if (DstAddRec) {
+ if (const SCEVAddRecExpr *tmpAddRec =
+ dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
+ DstConst = tmpAddRec->getStart();
+ DstCoeff = tmpAddRec->getStepRecurrence(*SE);
+ DstLoop = tmpAddRec->getLoop();
+ SrcConst = Src;
+ SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
+ SrcLoop = DstAddRec->getLoop();
+ }
+ else
+ llvm_unreachable("RDIV reached by surprising SCEVs");
+ }
+ else
+ llvm_unreachable("RDIV expected at least one AddRec");
+ return exactRDIVtest(SrcCoeff, DstCoeff,
+ SrcConst, DstConst,
+ SrcLoop, DstLoop,
+ Result) ||
+ gcdMIVtest(Src, Dst, Result) ||
+ symbolicRDIVtest(SrcCoeff, DstCoeff,
+ SrcConst, DstConst,
+ SrcLoop, DstLoop);
+}
+
+
+// Tests the single-subscript MIV pair (Src and Dst) for dependence.
+// Return true if dependence disproved.
+// Can sometimes refine direction vectors.
+bool DependenceAnalysis::testMIV(const SCEV *Src,
+ const SCEV *Dst,
+ const SmallBitVector &Loops,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ Result.Consistent = false;
+ return gcdMIVtest(Src, Dst, Result) ||
+ banerjeeMIVtest(Src, Dst, Loops, Result);
+}
+
+
+// Given a product, e.g., 10*X*Y, returns the first constant operand,
+// in this case 10. If there is no constant part, returns NULL.
+static
+const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
+ for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
+ if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
+ return Constant;
+ }
+ return NULL;
+}
+
+
+//===----------------------------------------------------------------------===//
+// gcdMIVtest -
+// Tests an MIV subscript pair for dependence.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistent.
+// Can sometimes disprove the equal direction for 1 or more loops,
+// as discussed in Michael Wolfe's book,
+// High Performance Compilers for Parallel Computing, page 235.
+//
+// We spend some effort (code!) to handle cases like
+// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
+// but M and N are just loop-invariant variables.
+// This should help us handle linearized subscripts;
+// also makes this test a useful backup to the various SIV tests.
+//
+// It occurs to me that the presence of loop-invariant variables
+// changes the nature of the test from "greatest common divisor"
+// to "a common divisor".
+bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "starting gcd\n");
+ ++GCDapplications;
+ unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
+ APInt RunningGCD = APInt::getNullValue(BitWidth);
+
+ // Examine Src coefficients.
+ // Compute running GCD and record source constant.
+ // Because we're looking for the constant at the end of the chain,
+ // we can't quit the loop just because the GCD == 1.
+ const SCEV *Coefficients = Src;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ if (!Constant)
+ return false;
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ Coefficients = AddRec->getStart();
+ }
+ const SCEV *SrcConst = Coefficients;
+
+ // Examine Dst coefficients.
+ // Compute running GCD and record destination constant.
+ // Because we're looking for the constant at the end of the chain,
+ // we can't quit the loop just because the GCD == 1.
+ Coefficients = Dst;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ if (!Constant)
+ return false;
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ Coefficients = AddRec->getStart();
+ }
+ const SCEV *DstConst = Coefficients;
+
+ APInt ExtraGCD = APInt::getNullValue(BitWidth);
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << " Delta = " << *Delta << "\n");
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
+ if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
+ // If Delta is a sum of products, we may be able to make further progress.
+ for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
+ const SCEV *Operand = Sum->getOperand(Op);
+ if (isa<SCEVConstant>(Operand)) {
+ assert(!Constant && "Surprised to find multiple constants");
+ Constant = cast<SCEVConstant>(Operand);
+ }
+ else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
+ // Search for constant operand to participate in GCD;
+ // If none found; return false.
+ const SCEVConstant *ConstOp = getConstantPart(Product);
+ if (!ConstOp)
+ return false;
+ APInt ConstOpValue = ConstOp->getValue()->getValue();
+ ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
+ ConstOpValue.abs());
+ }
+ else
+ return false;
+ }
+ }
+ if (!Constant)
+ return false;
+ APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+ DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
+ if (ConstDelta == 0)
+ return false;
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
+ DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
+ APInt Remainder = ConstDelta.srem(RunningGCD);
+ if (Remainder != 0) {
+ ++GCDindependence;
+ return true;
+ }
+
+ // Try to disprove equal directions.
+ // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
+ // the code above can't disprove the dependence because the GCD = 1.
+ // So we consider what happen if i = i' and what happens if j = j'.
+ // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
+ // which is infeasible, so we can disallow the = direction for the i level.
+ // Setting j = j' doesn't help matters, so we end up with a direction vector
+ // of [<>, *]
+ //
+ // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
+ // we need to remember that the constant part is 5 and the RunningGCD should
+ // be initialized to ExtraGCD = 30.
+ DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
+
+ bool Improved = false;
+ Coefficients = Src;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ Coefficients = AddRec->getStart();
+ const Loop *CurLoop = AddRec->getLoop();
+ RunningGCD = ExtraGCD;
+ const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
+ const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
+ const SCEV *Inner = Src;
+ while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+ AddRec = cast<SCEVAddRecExpr>(Inner);
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ if (CurLoop == AddRec->getLoop())
+ ; // SrcCoeff == Coeff
+ else {
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else
+ Constant = cast<SCEVConstant>(Coeff);
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ }
+ Inner = AddRec->getStart();
+ }
+ Inner = Dst;
+ while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+ AddRec = cast<SCEVAddRecExpr>(Inner);
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ if (CurLoop == AddRec->getLoop())
+ DstCoeff = Coeff;
+ else {
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else
+ Constant = cast<SCEVConstant>(Coeff);
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ }
+ Inner = AddRec->getStart();
+ }
+ Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else if (isa<SCEVConstant>(Delta))
+ Constant = cast<SCEVConstant>(Delta);
+ else {
+ // The difference of the two coefficients might not be a product
+ // or constant, in which case we give up on this direction.
+ continue;
+ }
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
+ if (RunningGCD != 0) {
+ Remainder = ConstDelta.srem(RunningGCD);
+ DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ unsigned Level = mapSrcLoop(CurLoop);
+ Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
+ Improved = true;
+ }
+ }
+ }
+ if (Improved)
+ ++GCDsuccesses;
+ DEBUG(dbgs() << "all done\n");
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// banerjeeMIVtest -
+// Use Banerjee's Inequalities to test an MIV subscript pair.
+// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
+// Generally follows the discussion in Section 2.5.2 of
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+//
+// The inequalities given on page 25 are simplified in that loops are
+// normalized so that the lower bound is always 0 and the stride is always 1.
+// For example, Wolfe gives
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// where A_k is the coefficient of the kth index in the source subscript,
+// B_k is the coefficient of the kth index in the destination subscript,
+// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
+// index, and N_k is the stride of the kth index. Since all loops are normalized
+// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
+// equation to
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
+// = (A^-_k - B_k)^- (U_k - 1) - B_k
+//
+// Similar simplifications are possible for the other equations.
+//
+// When we can't determine the number of iterations for a loop,
+// we use NULL as an indicator for the worst case, infinity.
+// When computing the upper bound, NULL denotes +inf;
+// for the lower bound, NULL denotes -inf.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
+ const SCEV *Dst,
+ const SmallBitVector &Loops,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "starting Banerjee\n");
+ ++BanerjeeApplications;
+ DEBUG(dbgs() << " Src = " << *Src << '\n');
+ const SCEV *A0;
+ CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
+ DEBUG(dbgs() << " Dst = " << *Dst << '\n');
+ const SCEV *B0;
+ CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
+ BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
+ const SCEV *Delta = SE->getMinusSCEV(B0, A0);
+ DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
+
+ // Compute bounds for all the * directions.
+ DEBUG(dbgs() << "\tBounds[*]\n");
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
+ Bound[K].Direction = Dependence::DVEntry::ALL;
+ Bound[K].DirSet = Dependence::DVEntry::NONE;
+ findBoundsALL(A, B, Bound, K);
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\t " << K << '\t');
+ if (Bound[K].Lower[Dependence::DVEntry::ALL])
+ DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[K].Upper[Dependence::DVEntry::ALL])
+ DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+#endif
+ }
+
+ // Test the *, *, *, ... case.
+ bool Disproved = false;
+ if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
+ // Explore the direction vector hierarchy.
+ unsigned DepthExpanded = 0;
+ unsigned NewDeps = exploreDirections(1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+ if (NewDeps > 0) {
+ bool Improved = false;
+ for (unsigned K = 1; K <= CommonLevels; ++K) {
+ if (Loops[K]) {
+ unsigned Old = Result.DV[K - 1].Direction;
+ Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
+ Improved |= Old != Result.DV[K - 1].Direction;
+ if (!Result.DV[K - 1].Direction) {
+ Improved = false;
+ Disproved = true;
+ break;
+ }
+ }
+ }
+ if (Improved)
+ ++BanerjeeSuccesses;
+ }
+ else {
+ ++BanerjeeIndependence;
+ Disproved = true;
+ }
+ }
+ else {
+ ++BanerjeeIndependence;
+ Disproved = true;
+ }
+ delete [] Bound;
+ delete [] A;
+ delete [] B;
+ return Disproved;
+}
+
+
+// Hierarchically expands the direction vector
+// search space, combining the directions of discovered dependences
+// in the DirSet field of Bound. Returns the number of distinct
+// dependences discovered. If the dependence is disproved,
+// it will return 0.
+unsigned DependenceAnalysis::exploreDirections(unsigned Level,
+ CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ const SmallBitVector &Loops,
+ unsigned &DepthExpanded,
+ const SCEV *Delta) const {
+ if (Level > CommonLevels) {
+ // record result
+ DEBUG(dbgs() << "\t[");
+ for (unsigned K = 1; K <= CommonLevels; ++K) {
+ if (Loops[K]) {
+ Bound[K].DirSet |= Bound[K].Direction;
+#ifndef NDEBUG
+ switch (Bound[K].Direction) {
+ case Dependence::DVEntry::LT:
+ DEBUG(dbgs() << " <");
+ break;
+ case Dependence::DVEntry::EQ:
+ DEBUG(dbgs() << " =");
+ break;
+ case Dependence::DVEntry::GT:
+ DEBUG(dbgs() << " >");
+ break;
+ case Dependence::DVEntry::ALL:
+ DEBUG(dbgs() << " *");
+ break;
+ default:
+ llvm_unreachable("unexpected Bound[K].Direction");
+ }
+#endif
+ }
+ }
+ DEBUG(dbgs() << " ]\n");
+ return 1;
+ }
+ if (Loops[Level]) {
+ if (Level > DepthExpanded) {
+ DepthExpanded = Level;
+ // compute bounds for <, =, > at current level
+ findBoundsLT(A, B, Bound, Level);
+ findBoundsGT(A, B, Bound, Level);
+ findBoundsEQ(A, B, Bound, Level);
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
+ DEBUG(dbgs() << "\t <\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::LT])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::LT])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+ DEBUG(dbgs() << "\t =\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::EQ])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::EQ])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+ DEBUG(dbgs() << "\t >\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::GT])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::GT])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+#endif
+ }
+
+ unsigned NewDeps = 0;
+
+ // test bounds for <, *, *, ...
+ if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ // Test bounds for =, *, *, ...
+ if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ // test bounds for >, *, *, ...
+ if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ Bound[Level].Direction = Dependence::DVEntry::ALL;
+ return NewDeps;
+ }
+ else
+ return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
+}
+
+
+// Returns true iff the current bounds are plausible.
+bool DependenceAnalysis::testBounds(unsigned char DirKind,
+ unsigned Level,
+ BoundInfo *Bound,
+ const SCEV *Delta) const {
+ Bound[Level].Direction = DirKind;
+ if (const SCEV *LowerBound = getLowerBound(Bound))
+ if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
+ return false;
+ if (const SCEV *UpperBound = getUpperBound(Bound))
+ if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
+ return false;
+ return true;
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the * direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
+// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^*_k = (A^-_k - B^+_k)U_k
+// UB^*_k = (A^+_k - B^-_k)U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ Bound[K].Lower[Dependence::DVEntry::ALL] =
+ SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
+ Bound[K].Iterations);
+ Bound[K].Upper[Dependence::DVEntry::ALL] =
+ SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
+ Bound[K].Iterations);
+ }
+ else {
+ // If the difference is 0, we won't need to know the number of iterations.
+ if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
+ Bound[K].Lower[Dependence::DVEntry::ALL] =
+ SE->getConstant(A[K].Coeff->getType(), 0);
+ if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
+ Bound[K].Upper[Dependence::DVEntry::ALL] =
+ SE->getConstant(A[K].Coeff->getType(), 0);
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the = direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
+// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^=_k = (A_k - B_k)^- U_k
+// UB^=_k = (A_k - B_k)^+ U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+ const SCEV *NegativePart = getNegativePart(Delta);
+ Bound[K].Lower[Dependence::DVEntry::EQ] =
+ SE->getMulExpr(NegativePart, Bound[K].Iterations);
+ const SCEV *PositivePart = getPositivePart(Delta);
+ Bound[K].Upper[Dependence::DVEntry::EQ] =
+ SE->getMulExpr(PositivePart, Bound[K].Iterations);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+ const SCEV *NegativePart = getNegativePart(Delta);
+ if (NegativePart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
+ const SCEV *PositivePart = getPositivePart(Delta);
+ if (PositivePart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the < direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
+// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Iter_1 =
+ SE->getMinusSCEV(Bound[K].Iterations,
+ SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+ Bound[K].Lower[Dependence::DVEntry::LT] =
+ SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+ Bound[K].Upper[Dependence::DVEntry::LT] =
+ SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+ if (NegPart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+ if (PosPart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the > direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
+// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Iter_1 =
+ SE->getMinusSCEV(Bound[K].Iterations,
+ SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+ Bound[K].Lower[Dependence::DVEntry::GT] =
+ SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+ Bound[K].Upper[Dependence::DVEntry::GT] =
+ SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+ if (NegPart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
+ const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+ if (PosPart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
+ }
+}
+
+
+// X^+ = max(X, 0)
+const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
+ return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// X^- = min(X, 0)
+const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
+ return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// Walks through the subscript,
+// collecting each coefficient, the associated loop bounds,
+// and recording its positive and negative parts for later use.
+DependenceAnalysis::CoefficientInfo *
+DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
+ bool SrcFlag,
+ const SCEV *&Constant) const {
+ const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+ CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ CI[K].Coeff = Zero;
+ CI[K].PosPart = Zero;
+ CI[K].NegPart = Zero;
+ CI[K].Iterations = NULL;
+ }
+ while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
+ const Loop *L = AddRec->getLoop();
+ unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
+ CI[K].Coeff = AddRec->getStepRecurrence(*SE);
+ CI[K].PosPart = getPositivePart(CI[K].Coeff);
+ CI[K].NegPart = getNegativePart(CI[K].Coeff);
+ CI[K].Iterations = collectUpperBound(L, Subscript->getType());
+ Subscript = AddRec->getStart();
+ }
+ Constant = Subscript;
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\tCoefficient Info\n");
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
+ DEBUG(dbgs() << "\tPos Part = ");
+ DEBUG(dbgs() << *CI[K].PosPart);
+ DEBUG(dbgs() << "\tNeg Part = ");
+ DEBUG(dbgs() << *CI[K].NegPart);
+ DEBUG(dbgs() << "\tUpper Bound = ");
+ if (CI[K].Iterations)
+ DEBUG(dbgs() << *CI[K].Iterations);
+ else
+ DEBUG(dbgs() << "+inf");
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
+#endif
+ return CI;
+}
+
+
+// Looks through all the bounds info and
+// computes the lower bound given the current direction settings
+// at each level. If the lower bound for any level is -inf,
+// the result is -inf.
+const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
+ const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
+ for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+ if (Bound[K].Lower[Bound[K].Direction])
+ Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
+ else
+ Sum = NULL;
+ }
+ return Sum;
+}
+
+
+// Looks through all the bounds info and
+// computes the upper bound given the current direction settings
+// at each level. If the upper bound at any level is +inf,
+// the result is +inf.
+const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
+ const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
+ for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+ if (Bound[K].Upper[Bound[K].Direction])
+ Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
+ else
+ Sum = NULL;
+ }
+ return Sum;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constraint manipulation for Delta test.
+
+// Given a linear SCEV,
+// return the coefficient (the step)
+// corresponding to the specified loop.
+// If there isn't one, return 0.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield b.
+const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec)
+ return SE->getConstant(Expr->getType(), 0);
+ if (AddRec->getLoop() == TargetLoop)
+ return AddRec->getStepRecurrence(*SE);
+ return findCoefficient(AddRec->getStart(), TargetLoop);
+}
+
+
+// Given a linear SCEV,
+// return the SCEV given by zeroing out the coefficient
+// corresponding to the specified loop.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield a*i + c*k.
+const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec)
+ return Expr; // ignore
+ if (AddRec->getLoop() == TargetLoop)
+ return AddRec->getStart();
+ return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
+ AddRec->getStepRecurrence(*SE),
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+}
+
+
+// Given a linear SCEV Expr,
+// return the SCEV given by adding some Value to the
+// coefficient corresponding to the specified TargetLoop.
+// For example, given a*i + b*j + c*k, adding 1 to the coefficient
+// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
+const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop,
+ const SCEV *Value) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec) // create a new addRec
+ return SE->getAddRecExpr(Expr,
+ Value,
+ TargetLoop,
+ SCEV::FlagAnyWrap); // Worst case, with no info.
+ if (AddRec->getLoop() == TargetLoop) {
+ const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
+ if (Sum->isZero())
+ return AddRec->getStart();
+ return SE->getAddRecExpr(AddRec->getStart(),
+ Sum,
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+ }
+ return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
+ TargetLoop, Value),
+ AddRec->getStepRecurrence(*SE),
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+}
+
+
+// Review the constraints, looking for opportunities
+// to simplify a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+// Corresponds to Figure 5 from the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+bool DependenceAnalysis::propagate(const SCEV *&Src,
+ const SCEV *&Dst,
+ SmallBitVector &Loops,
+ SmallVector<Constraint, 4> &Constraints,
+ bool &Consistent) {
+ bool Result = false;
+ for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+ DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
+ DEBUG(Constraints[LI].dump(dbgs()));
+ if (Constraints[LI].isDistance())
+ Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
+ else if (Constraints[LI].isLine())
+ Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
+ else if (Constraints[LI].isPoint())
+ Result |= propagatePoint(Src, Dst, Constraints[LI]);
+ }
+ return Result;
+}
+
+
+// Attempt to propagate a distance
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint,
+ bool &Consistent) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ if (A_K->isZero())
+ return false;
+ const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
+ Src = SE->getMinusSCEV(Src, DA_K);
+ Src = zeroCoefficient(Src, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+ DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ return true;
+}
+
+
+// Attempt to propagate a line
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateLine(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint,
+ bool &Consistent) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const SCEV *A = CurConstraint.getA();
+ const SCEV *B = CurConstraint.getB();
+ const SCEV *C = CurConstraint.getC();
+ DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
+ DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
+ if (A->isZero()) {
+ const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Bconst || !Cconst) return false;
+ APInt Beta = Bconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivB = Charlie.sdiv(Beta);
+ assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
+ const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+ Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+ Dst = zeroCoefficient(Dst, CurLoop);
+ if (!findCoefficient(Src, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else if (B->isZero()) {
+ const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Aconst || !Cconst) return false;
+ APInt Alpha = Aconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivA = Charlie.sdiv(Alpha);
+ assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+ Src = zeroCoefficient(Src, CurLoop);
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
+ const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Aconst || !Cconst) return false;
+ APInt Alpha = Aconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivA = Charlie.sdiv(Alpha);
+ assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+ Src = zeroCoefficient(Src, CurLoop);
+ Dst = addToCoefficient(Dst, CurLoop, A_K);
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else {
+ // paper is incorrect here, or perhaps just misleading
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getMulExpr(Src, A);
+ Dst = SE->getMulExpr(Dst, A);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
+ Src = zeroCoefficient(Src, CurLoop);
+ Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
+ return true;
+}
+
+
+// Attempt to propagate a point
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
+ const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
+ DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
+ Src = zeroCoefficient(Src, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ Dst = zeroCoefficient(Dst, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ return true;
+}
+
+
+// Update direction vector entry based on the current constraint.
+void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
+ const Constraint &CurConstraint
+ ) const {
+ DEBUG(dbgs() << "\tUpdate direction, constraint =");
+ DEBUG(CurConstraint.dump(dbgs()));
+ if (CurConstraint.isAny())
+ ; // use defaults
+ else if (CurConstraint.isDistance()) {
+ // this one is consistent, the others aren't
+ Level.Scalar = false;
+ Level.Distance = CurConstraint.getD();
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
+ NewDirection = Dependence::DVEntry::EQ;
+ if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
+ NewDirection |= Dependence::DVEntry::LT;
+ if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
+ NewDirection |= Dependence::DVEntry::GT;
+ Level.Direction &= NewDirection;
+ }
+ else if (CurConstraint.isLine()) {
+ Level.Scalar = false;
+ Level.Distance = NULL;
+ // direction should be accurate
+ }
+ else if (CurConstraint.isPoint()) {
+ Level.Scalar = false;
+ Level.Distance = NULL;
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if (!isKnownPredicate(CmpInst::ICMP_NE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if X may be = Y
+ NewDirection |= Dependence::DVEntry::EQ;
+ if (!isKnownPredicate(CmpInst::ICMP_SLE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if Y may be > X
+ NewDirection |= Dependence::DVEntry::LT;
+ if (!isKnownPredicate(CmpInst::ICMP_SGE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if Y may be < X
+ NewDirection |= Dependence::DVEntry::GT;
+ Level.Direction &= NewDirection;
+ }
+ else
+ llvm_unreachable("constraint has unexpected kind");
+}
+
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// For debugging purposes, dump a small bit vector to dbgs().
+static void dumpSmallBitVector(SmallBitVector &BV) {
+ dbgs() << "{";
+ for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+ dbgs() << VI;
+ if (BV.find_next(VI) >= 0)
+ dbgs() << ' ';
+ }
+ dbgs() << "}\n";
+}
+#endif
+
+
+// depends -
+// Returns NULL if there is no dependence.
+// Otherwise, return a Dependence with as many details as possible.
+// Corresponds to Section 3.1 in the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+//
+// Care is required to keep the routine below, getSplitIteration(),
+// up to date with respect to this routine.
+Dependence *DependenceAnalysis::depends(Instruction *Src,
+ Instruction *Dst,
+ bool PossiblyLoopIndependent) {
+ if (Src == Dst)
+ PossiblyLoopIndependent = false;
+
+ if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
+ (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+ // if both instructions don't reference memory, there's no dependence
+ return NULL;
+
+ if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
+ // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+ DEBUG(dbgs() << "can only handle simple loads and stores\n");
+ return new Dependence(Src, Dst);
+ }
+
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
+
+ switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ // cannot analyse objects if we don't understand their aliasing.
+ DEBUG(dbgs() << "can't analyze may or partial alias\n");
+ return new Dependence(Src, Dst);
+ case AliasAnalysis::NoAlias:
+ // If the objects noalias, they are distinct, accesses are independent.
+ DEBUG(dbgs() << "no alias\n");
+ return NULL;
+ case AliasAnalysis::MustAlias:
+ break; // The underlying objects alias; test accesses for dependence.
+ }
+
+ // establish loop nesting levels
+ establishNestingLevels(Src, Dst);
+ DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
+ DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
+
+ FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+ ++TotalArrayPairs;
+
+ // See if there are GEPs we can use.
+ bool UsefulGEP = false;
+ GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ if (SrcGEP && DstGEP &&
+ SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+ const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+ const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+ DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
+ DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n");
+
+ UsefulGEP =
+ isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ }
+ unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
+ SmallVector<Subscript, 4> Pair(Pairs);
+ if (UsefulGEP) {
+ DEBUG(dbgs() << " using GEPs\n");
+ unsigned P = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin();
+ SrcIdx != SrcEnd;
+ ++SrcIdx, ++DstIdx, ++P) {
+ Pair[P].Src = SE->getSCEV(*SrcIdx);
+ Pair[P].Dst = SE->getSCEV(*DstIdx);
+ }
+ }
+ else {
+ DEBUG(dbgs() << " ignoring GEPs\n");
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
+ DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
+ }
+
+ for (unsigned P = 0; P < Pairs; ++P) {
+ Pair[P].Loops.resize(MaxLevels + 1);
+ Pair[P].GroupLoops.resize(MaxLevels + 1);
+ Pair[P].Group.resize(Pairs);
+ removeMatchingExtensions(&Pair[P]);
+ Pair[P].Classification =
+ classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+ Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[P].Loops);
+ Pair[P].GroupLoops = Pair[P].Loops;
+ Pair[P].Group.set(P);
+ DEBUG(dbgs() << " subscript " << P << "\n");
+ DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
+ DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
+ DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
+ DEBUG(dbgs() << "\tloops = ");
+ DEBUG(dumpSmallBitVector(Pair[P].Loops));
+ }
+
+ SmallBitVector Separable(Pairs);
+ SmallBitVector Coupled(Pairs);
+
+ // Partition subscripts into separable and minimally-coupled groups
+ // Algorithm in paper is algorithmically better;
+ // this may be faster in practice. Check someday.
+ //
+ // Here's an example of how it works. Consider this code:
+ //
+ // for (i = ...) {
+ // for (j = ...) {
+ // for (k = ...) {
+ // for (l = ...) {
+ // for (m = ...) {
+ // A[i][j][k][m] = ...;
+ // ... = A[0][j][l][i + j];
+ // }
+ // }
+ // }
+ // }
+ // }
+ //
+ // There are 4 subscripts here:
+ // 0 [i] and [0]
+ // 1 [j] and [j]
+ // 2 [k] and [l]
+ // 3 [m] and [i + j]
+ //
+ // We've already classified each subscript pair as ZIV, SIV, etc.,
+ // and collected all the loops mentioned by pair P in Pair[P].Loops.
+ // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
+ // and set Pair[P].Group = {P}.
+ //
+ // Src Dst Classification Loops GroupLoops Group
+ // 0 [i] [0] SIV {1} {1} {0}
+ // 1 [j] [j] SIV {2} {2} {1}
+ // 2 [k] [l] RDIV {3,4} {3,4} {2}
+ // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
+ //
+ // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
+ // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
+ //
+ // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
+ // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
+ // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
+ // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
+ // to either Separable or Coupled).
+ //
+ // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
+ // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+ // so Pair[3].Group = {0, 1, 3} and Done = false.
+ //
+ // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
+ // Since Done remains true, we add 2 to the set of Separable pairs.
+ //
+ // Finally, we consider 3. There's nothing to compare it with,
+ // so Done remains true and we add it to the Coupled set.
+ // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
+ //
+ // In the end, we've got 1 separable subscript and 1 coupled group.
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ if (Pair[SI].Classification == Subscript::NonLinear) {
+ // ignore these, but collect loops for later
+ ++NonlinearSubscriptPairs;
+ collectCommonLoops(Pair[SI].Src,
+ LI->getLoopFor(Src->getParent()),
+ Pair[SI].Loops);
+ collectCommonLoops(Pair[SI].Dst,
+ LI->getLoopFor(Dst->getParent()),
+ Pair[SI].Loops);
+ Result.Consistent = false;
+ }
+ else if (Pair[SI].Classification == Subscript::ZIV) {
+ // always separable
+ Separable.set(SI);
+ }
+ else {
+ // SIV, RDIV, or MIV, so check for coupled group
+ bool Done = true;
+ for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+ SmallBitVector Intersection = Pair[SI].GroupLoops;
+ Intersection &= Pair[SJ].GroupLoops;
+ if (Intersection.any()) {
+ // accumulate set of all the loops in group
+ Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+ // accumulate set of all subscripts in group
+ Pair[SJ].Group |= Pair[SI].Group;
+ Done = false;
+ }
+ }
+ if (Done) {
+ if (Pair[SI].Group.count() == 1) {
+ Separable.set(SI);
+ ++SeparableSubscriptPairs;
+ }
+ else {
+ Coupled.set(SI);
+ ++CoupledSubscriptPairs;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Separable = ");
+ DEBUG(dumpSmallBitVector(Separable));
+ DEBUG(dbgs() << " Coupled = ");
+ DEBUG(dumpSmallBitVector(Coupled));
+
+ Constraint NewConstraint;
+ NewConstraint.setAny(SE);
+
+ // test separable subscripts
+ for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ DEBUG(dbgs() << "testing subscript " << SI);
+ switch (Pair[SI].Classification) {
+ case Subscript::ZIV:
+ DEBUG(dbgs() << ", ZIV\n");
+ if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
+ return NULL;
+ break;
+ case Subscript::SIV: {
+ DEBUG(dbgs() << ", SIV\n");
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+ Result, NewConstraint, SplitIter))
+ return NULL;
+ break;
+ }
+ case Subscript::RDIV:
+ DEBUG(dbgs() << ", RDIV\n");
+ if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
+ return NULL;
+ break;
+ case Subscript::MIV:
+ DEBUG(dbgs() << ", MIV\n");
+ if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
+ return NULL;
+ break;
+ default:
+ llvm_unreachable("subscript has unexpected classification");
+ }
+ }
+
+ if (Coupled.count()) {
+ // test coupled subscript groups
+ DEBUG(dbgs() << "starting on coupled subscripts\n");
+ DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
+ SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+ for (unsigned II = 0; II <= MaxLevels; ++II)
+ Constraints[II].setAny(SE);
+ for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ DEBUG(dbgs() << "testing subscript group " << SI << " { ");
+ SmallBitVector Group(Pair[SI].Group);
+ SmallBitVector Sivs(Pairs);
+ SmallBitVector Mivs(Pairs);
+ SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ DEBUG(dbgs() << SJ << " ");
+ if (Pair[SJ].Classification == Subscript::SIV)
+ Sivs.set(SJ);
+ else
+ Mivs.set(SJ);
+ }
+ DEBUG(dbgs() << "}\n");
+ while (Sivs.any()) {
+ bool Changed = false;
+ for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
+ // SJ is an SIV subscript that's part of the current coupled group
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ DEBUG(dbgs() << "SIV\n");
+ if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+ Result, NewConstraint, SplitIter))
+ return NULL;
+ ConstrainedLevels.set(Level);
+ if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
+ if (Constraints[Level].isEmpty()) {
+ ++DeltaIndependence;
+ return NULL;
+ }
+ Changed = true;
+ }
+ Sivs.reset(SJ);
+ }
+ if (Changed) {
+ // propagate, possibly creating new SIVs and ZIVs
+ DEBUG(dbgs() << " propagating\n");
+ DEBUG(dbgs() << "\tMivs = ");
+ DEBUG(dumpSmallBitVector(Mivs));
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ // SJ is an MIV subscript that's part of the current coupled group
+ DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
+ if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
+ Constraints, Result.Consistent)) {
+ DEBUG(dbgs() << "\t Changed\n");
+ ++DeltaPropagations;
+ Pair[SJ].Classification =
+ classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+ Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[SJ].Loops);
+ switch (Pair[SJ].Classification) {
+ case Subscript::ZIV:
+ DEBUG(dbgs() << "ZIV\n");
+ if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+ return NULL;
+ Mivs.reset(SJ);
+ break;
+ case Subscript::SIV:
+ Sivs.set(SJ);
+ Mivs.reset(SJ);
+ break;
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("bad subscript classification");
+ }
+ }
+ }
+ }
+ }
+
+ // test & propagate remaining RDIVs
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::RDIV) {
+ DEBUG(dbgs() << "RDIV test\n");
+ if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+ return NULL;
+ // I don't yet understand how to propagate RDIV results
+ Mivs.reset(SJ);
+ }
+ }
+
+ // test remaining MIVs
+ // This code is temporary.
+ // Better to somehow test all remaining subscripts simultaneously.
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::MIV) {
+ DEBUG(dbgs() << "MIV test\n");
+ if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
+ return NULL;
+ }
+ else
+ llvm_unreachable("expected only MIV subscripts at this point");
+ }
+
+ // update Result.DV from constraint vector
+ DEBUG(dbgs() << " updating\n");
+ for (int SJ = ConstrainedLevels.find_first();
+ SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+ updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
+ if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
+ return NULL;
+ }
+ }
+ }
+
+ // Make sure the Scalar flags are set correctly.
+ SmallBitVector CompleteLoops(MaxLevels + 1);
+ for (unsigned SI = 0; SI < Pairs; ++SI)
+ CompleteLoops |= Pair[SI].Loops;
+ for (unsigned II = 1; II <= CommonLevels; ++II)
+ if (CompleteLoops[II])
+ Result.DV[II - 1].Scalar = false;
+
+ if (PossiblyLoopIndependent) {
+ // Make sure the LoopIndependent flag is set correctly.
+ // All directions must include equal, otherwise no
+ // loop-independent dependence is possible.
+ for (unsigned II = 1; II <= CommonLevels; ++II) {
+ if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
+ Result.LoopIndependent = false;
+ break;
+ }
+ }
+ }
+ else {
+ // On the other hand, if all directions are equal and there's no
+ // loop-independent dependence possible, then no dependence exists.
+ bool AllEqual = true;
+ for (unsigned II = 1; II <= CommonLevels; ++II) {
+ if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
+ AllEqual = false;
+ break;
+ }
+ }
+ if (AllEqual)
+ return NULL;
+ }
+
+ FullDependence *Final = new FullDependence(Result);
+ Result.DV = NULL;
+ return Final;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// getSplitIteration -
+// Rather than spend rarely-used space recording the splitting iteration
+// during the Weak-Crossing SIV test, we re-compute it on demand.
+// The re-computation is basically a repeat of the entire dependence test,
+// though simplified since we know that the dependence exists.
+// It's tedious, since we must go through all propagations, etc.
+//
+// Care is required to keep this code up to date with respect to the routine
+// above, depends().
+//
+// Generally, the dependence analyzer will be used to build
+// a dependence graph for a function (basically a map from instructions
+// to dependences). Looking for cycles in the graph shows us loops
+// that cannot be trivially vectorized/parallelized.
+//
+// We can try to improve the situation by examining all the dependences
+// that make up the cycle, looking for ones we can break.
+// Sometimes, peeling the first or last iteration of a loop will break
+// dependences, and we've got flags for those possibilities.
+// Sometimes, splitting a loop at some other iteration will do the trick,
+// and we've got a flag for that case. Rather than waste the space to
+// record the exact iteration (since we rarely know), we provide
+// a method that calculates the iteration. It's a drag that it must work
+// from scratch, but wonderful in that it's possible.
+//
+// Here's an example:
+//
+// for (i = 0; i < 10; i++)
+// A[i] = ...
+// ... = A[11 - i]
+//
+// There's a loop-carried flow dependence from the store to the load,
+// found by the weak-crossing SIV test. The dependence will have a flag,
+// indicating that the dependence can be broken by splitting the loop.
+// Calling getSplitIteration will return 5.
+// Splitting the loop breaks the dependence, like so:
+//
+// for (i = 0; i <= 5; i++)
+// A[i] = ...
+// ... = A[11 - i]
+// for (i = 6; i < 10; i++)
+// A[i] = ...
+// ... = A[11 - i]
+//
+// breaks the dependence and allows us to vectorize/parallelize
+// both loops.
+const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
+ unsigned SplitLevel) {
+ assert(Dep && "expected a pointer to a Dependence");
+ assert(Dep->isSplitable(SplitLevel) &&
+ "Dep should be splitable at SplitLevel");
+ Instruction *Src = Dep->getSrc();
+ Instruction *Dst = Dep->getDst();
+ assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
+ assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
+ assert(isLoadOrStore(Src));
+ assert(isLoadOrStore(Dst));
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
+ assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
+ AliasAnalysis::MustAlias);
+
+ // establish loop nesting levels
+ establishNestingLevels(Src, Dst);
+
+ FullDependence Result(Src, Dst, false, CommonLevels);
+
+ // See if there are GEPs we can use.
+ bool UsefulGEP = false;
+ GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ if (SrcGEP && DstGEP &&
+ SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+ const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+ const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+ UsefulGEP =
+ isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ }
+ unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
+ SmallVector<Subscript, 4> Pair(Pairs);
+ if (UsefulGEP) {
+ unsigned P = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin();
+ SrcIdx != SrcEnd;
+ ++SrcIdx, ++DstIdx, ++P) {
+ Pair[P].Src = SE->getSCEV(*SrcIdx);
+ Pair[P].Dst = SE->getSCEV(*DstIdx);
+ }
+ }
+ else {
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
+ }
+
+ for (unsigned P = 0; P < Pairs; ++P) {
+ Pair[P].Loops.resize(MaxLevels + 1);
+ Pair[P].GroupLoops.resize(MaxLevels + 1);
+ Pair[P].Group.resize(Pairs);
+ removeMatchingExtensions(&Pair[P]);
+ Pair[P].Classification =
+ classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+ Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[P].Loops);
+ Pair[P].GroupLoops = Pair[P].Loops;
+ Pair[P].Group.set(P);
+ }
+
+ SmallBitVector Separable(Pairs);
+ SmallBitVector Coupled(Pairs);
+
+ // partition subscripts into separable and minimally-coupled groups
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ if (Pair[SI].Classification == Subscript::NonLinear) {
+ // ignore these, but collect loops for later
+ collectCommonLoops(Pair[SI].Src,
+ LI->getLoopFor(Src->getParent()),
+ Pair[SI].Loops);
+ collectCommonLoops(Pair[SI].Dst,
+ LI->getLoopFor(Dst->getParent()),
+ Pair[SI].Loops);
+ Result.Consistent = false;
+ }
+ else if (Pair[SI].Classification == Subscript::ZIV)
+ Separable.set(SI);
+ else {
+ // SIV, RDIV, or MIV, so check for coupled group
+ bool Done = true;
+ for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+ SmallBitVector Intersection = Pair[SI].GroupLoops;
+ Intersection &= Pair[SJ].GroupLoops;
+ if (Intersection.any()) {
+ // accumulate set of all the loops in group
+ Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+ // accumulate set of all subscripts in group
+ Pair[SJ].Group |= Pair[SI].Group;
+ Done = false;
+ }
+ }
+ if (Done) {
+ if (Pair[SI].Group.count() == 1)
+ Separable.set(SI);
+ else
+ Coupled.set(SI);
+ }
+ }
+ }
+
+ Constraint NewConstraint;
+ NewConstraint.setAny(SE);
+
+ // test separable subscripts
+ for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ switch (Pair[SI].Classification) {
+ case Subscript::SIV: {
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+ Result, NewConstraint, SplitIter);
+ if (Level == SplitLevel) {
+ assert(SplitIter != NULL);
+ return SplitIter;
+ }
+ break;
+ }
+ case Subscript::ZIV:
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("subscript has unexpected classification");
+ }
+ }
+
+ if (Coupled.count()) {
+ // test coupled subscript groups
+ SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+ for (unsigned II = 0; II <= MaxLevels; ++II)
+ Constraints[II].setAny(SE);
+ for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ SmallBitVector Group(Pair[SI].Group);
+ SmallBitVector Sivs(Pairs);
+ SmallBitVector Mivs(Pairs);
+ SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::SIV)
+ Sivs.set(SJ);
+ else
+ Mivs.set(SJ);
+ }
+ while (Sivs.any()) {
+ bool Changed = false;
+ for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ // SJ is an SIV subscript that's part of the current coupled group
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+ Result, NewConstraint, SplitIter);
+ if (Level == SplitLevel && SplitIter)
+ return SplitIter;
+ ConstrainedLevels.set(Level);
+ if (intersectConstraints(&Constraints[Level], &NewConstraint))
+ Changed = true;
+ Sivs.reset(SJ);
+ }
+ if (Changed) {
+ // propagate, possibly creating new SIVs and ZIVs
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ // SJ is an MIV subscript that's part of the current coupled group
+ if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
+ Pair[SJ].Loops, Constraints, Result.Consistent)) {
+ Pair[SJ].Classification =
+ classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+ Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[SJ].Loops);
+ switch (Pair[SJ].Classification) {
+ case Subscript::ZIV:
+ Mivs.reset(SJ);
+ break;
+ case Subscript::SIV:
+ Sivs.set(SJ);
+ Mivs.reset(SJ);
+ break;
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("bad subscript classification");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ llvm_unreachable("somehow reached end of routine");
+ return NULL;
+}
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 000000000000..cde431459d50
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,232 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+ BasicBlock *BB = Node->getBlock();
+
+ if (!BB)
+ return "Post dominance root node";
+
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(DominatorTree *DT) {
+ return "Dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+ : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(PostDominatorTree *DT) {
+ return "Post dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+}
+
+namespace {
+struct DomViewer
+ : public DOTGraphTraitsViewer<DominatorTree, false> {
+ static char ID;
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+ initializeDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyViewer
+ : public DOTGraphTraitsViewer<DominatorTree, true> {
+ static char ID;
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+ initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+ static char ID;
+ PostDomViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+ initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+ initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+ "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+ "View dominance tree of function (with no function bodies)",
+ false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+ "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+ "View postdominance tree of function "
+ "(with no function bodies)",
+ false, false)
+
+namespace {
+struct DomPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, false> {
+ static char ID;
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+ initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, true> {
+ static char ID;
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+ initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+ static char ID;
+ PostDomPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+ initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+ initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+ "Print dominance tree of function to 'dot' file",
+ false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+ "Print dominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file",
+ false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+ "Print postdominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+ return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+ return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+ return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+ return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+ return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+ return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+ return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+ return new PostDomOnlyViewer();
+}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 000000000000..7e4a89f1bd57
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,141 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+
+namespace {
+ class DFCalculateWorkObject {
+ public:
+ DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
+ const DomTreeNode *N,
+ const DomTreeNode *PN)
+ : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+ BasicBlock *currentBB;
+ BasicBlock *parentBB;
+ const DomTreeNode *Node;
+ const DomTreeNode *parentNode;
+ };
+}
+
+void DominanceFrontier::anchor() { }
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+ const DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DomSetType *Result = NULL;
+
+ std::vector<DFCalculateWorkObject> workList;
+ SmallPtrSet<BasicBlock *, 32> visited;
+
+ workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ do {
+ DFCalculateWorkObject *currentW = &workList.back();
+ assert (currentW && "Missing work object.");
+
+ BasicBlock *currentBB = currentW->currentBB;
+ BasicBlock *parentBB = currentW->parentBB;
+ const DomTreeNode *currentNode = currentW->Node;
+ const DomTreeNode *parentNode = currentW->parentNode;
+ assert (currentBB && "Invalid work object. Missing current Basic Block");
+ assert (currentNode && "Invalid work object. Missing current Node");
+ DomSetType &S = Frontiers[currentBB];
+
+ // Visit each block only once.
+ if (visited.count(currentBB) == 0) {
+ visited.insert(currentBB);
+
+ // Loop over CFG successors to calculate DFlocal[currentNode]
+ for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this successor?
+ if (DT[*SI]->getIDom() != currentNode)
+ S.insert(*SI);
+ }
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ bool visitChild = false;
+ for (DomTreeNode::const_iterator NI = currentNode->begin(),
+ NE = currentNode->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ BasicBlock *childBB = IDominee->getBlock();
+ if (visited.count(childBB) == 0) {
+ workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+ IDominee, currentNode));
+ visitChild = true;
+ }
+ }
+
+ // If all children are visited or there is any child then pop this block
+ // from the workList.
+ if (!visitChild) {
+
+ if (!parentBB) {
+ Result = &S;
+ break;
+ }
+
+ DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+ DomSetType &parentSet = Frontiers[parentBB];
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+ parentSet.insert(*CDFI);
+ }
+ workList.pop_back();
+ }
+
+ } while (!workList.empty());
+
+ return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " DomFrontier for BB ";
+ if (I->first)
+ WriteAsOperand(OS, I->first, false);
+ else
+ OS << " <<exit node>>";
+ OS << " is:\t";
+
+ const std::set<BasicBlock*> &BBs = I->second;
+
+ for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+ I != E; ++I) {
+ OS << ' ';
+ if (*I)
+ WriteAsOperand(OS, *I, false);
+ else
+ OS << "<<exit node>>";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 000000000000..7620fd9842cc
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,338 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class BasicCallGraph : public ModulePass, public CallGraph {
+ // Root is root of the call graph, or the external node if a 'main' function
+ // couldn't be found.
+ //
+ CallGraphNode *Root;
+
+ // ExternalCallingNode - This node has edges to all external functions and
+ // those internal functions that have their address taken.
+ CallGraphNode *ExternalCallingNode;
+
+ // CallsExternalNode - This node has edges to it from all functions making
+ // indirect calls or calling an external function.
+ CallGraphNode *CallsExternalNode;
+
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ BasicCallGraph() : ModulePass(ID), Root(0),
+ ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnModule - Compute the call graph for the specified module.
+ virtual bool runOnModule(Module &M) {
+ CallGraph::initialize(M);
+
+ ExternalCallingNode = getOrInsertFunction(0);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
+
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
+
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0) Root = ExternalCallingNode;
+
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual void print(raw_ostream &OS, const Module *) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = getRoot()->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << getRoot() << ">>\n";
+ }
+
+ CallGraph::print(OS, 0);
+ }
+
+ virtual void releaseMemory() {
+ destroy();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it should
+ /// override this to adjust the this pointer as needed for the specified pass
+ /// info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &CallGraph::ID)
+ return (CallGraph*)this;
+ return this;
+ }
+
+ CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+ CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
+
+ // getRoot - Return the root of the call graph, which is either main, or if
+ // main cannot be found, the external node.
+ //
+ CallGraphNode *getRoot() { return Root; }
+ const CallGraphNode *getRoot() const { return Root; }
+
+private:
+ //===---------------------------------------------------------------------
+ // Implementation of CallGraph construction
+ //
+
+ // addToCallGraph - Add a function to the call graph, and link the node to all
+ // of the functions that it calls.
+ //
+ void addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
+
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
+ }
+ }
+
+ // If this function has its address taken, anything could call it.
+ if (F->hasAddressTaken())
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS) {
+ const Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
+ Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ }
+ }
+ }
+
+ //
+ // destroy - Release memory for the call graph
+ virtual void destroy() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+ CallGraph::destroy();
+ }
+};
+
+} //End anonymous namespace
+
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+ "Basic CallGraph Construction", false, true, true)
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+ Mod = &M;
+}
+
+void CallGraph::destroy() {
+ if (FunctionMap.empty()) return;
+
+ // Reset all node's use counts to zero before deleting them to prevent an
+ // assertion from firing.
+#ifndef NDEBUG
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ I->second->allReferencesDropped();
+#endif
+
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ delete I->second;
+ FunctionMap.clear();
+}
+
+void CallGraph::print(raw_ostream &OS, Module*) const {
+ for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+ I->second->print(OS);
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void CallGraph::dump() const {
+ print(dbgs(), 0);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it. Because this removes the function from the module, the call graph node
+// is destroyed. This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN). The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+ assert(CGN->empty() && "Cannot remove function from call "
+ "graph if it references other functions!");
+ Function *F = CGN->getFunction(); // Get the function for the call graph node
+ delete CGN; // Delete the call graph node for this func
+ FunctionMap.erase(F); // Remove the call graph node from the map
+
+ Mod->getFunctionList().remove(F);
+ return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+ assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+ assert(!FunctionMap.count(To) &&
+ "Pointing CallGraphNode at a function that already exists");
+ FunctionMapTy::iterator I = FunctionMap.find(From);
+ I->second->F = const_cast<Function*>(To);
+ FunctionMap[To] = I->second;
+ FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+ CallGraphNode *&CGN = FunctionMap[F];
+ if (CGN) return CGN;
+
+ assert((!F || F->getParent() == Mod) && "Function not in current module!");
+ return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(raw_ostream &OS) const {
+ if (Function *F = getFunction())
+ OS << "Call graph node for function: '" << F->getName() << "'";
+ else
+ OS << "Call graph node <<null function>>";
+
+ OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n';
+
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " CS<" << I->first << "> calls ";
+ if (Function *FI = I->second->getFunction())
+ OS << "function '" << FI->getName() <<"'\n";
+ else
+ OS << "external node\n";
+ }
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void CallGraphNode::dump() const { print(dbgs()); }
+#endif
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site. Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function. This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+ for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+ if (CalledFunctions[i].second == Callee) {
+ Callee->DropRef();
+ CalledFunctions[i] = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ --i; --e;
+ }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+ CallRecord &CR = *I;
+ if (CR.second == Callee && CR.first == 0) {
+ Callee->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one. Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+ CallSite NewCS, CallGraphNode *NewNode){
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ I->first = NewCS.getInstruction();
+ I->second = NewNode;
+ NewNode->AddRef();
+ return;
+ }
+ }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 000000000000..a0d788f34a3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,614 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph. Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cgscc-passmgr"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+ static char ID;
+ explicit CGPassManager()
+ : ModulePass(ID), PMDataManager() { }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ using ModulePass::doInitialization;
+ using ModulePass::doFinalization;
+
+ bool doInitialization(CallGraph &CG);
+ bool doFinalization(CallGraph &CG);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ // CGPassManager walks SCC and it needs CallGraph.
+ Info.addRequired<CallGraph>();
+ Info.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "CallGraph Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+ }
+
+ Pass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<Pass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_CallGraphPassManager;
+ }
+
+private:
+ bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall);
+
+ bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall);
+ bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+ PMDataManager *PM = P->getAsPMDataManager();
+
+ if (PM == 0) {
+ CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+ if (!CallGraphUpToDate) {
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ CallGraphUpToDate = true;
+ }
+
+ {
+ TimeRegion PassTimer(getPassTimer(CGSP));
+ Changed = CGSP->runOnSCC(CurSCC);
+ }
+
+ // After the CGSCCPass is done, when assertions are enabled, use
+ // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+ if (Changed)
+ RefreshCallGraph(CurSCC, CG, true);
+#endif
+
+ return Changed;
+ }
+
+
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ FPPassManager *FPP = (FPPassManager*)P;
+
+ // Run pass P on all functions in the current SCC.
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (Function *F = (*I)->getFunction()) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+ TimeRegion PassTimer(getPassTimer(FPP));
+ Changed |= FPP->runOnFunction(*F);
+ }
+ }
+
+ // The function pass(es) modified the IR, they may have clobbered the
+ // callgraph.
+ if (Changed && CallGraphUpToDate) {
+ DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+ << P->getPassName() << '\n');
+ CallGraphUpToDate = false;
+ }
+ return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it. This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call. This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+ CallGraph &CG, bool CheckingMode) {
+ DenseMap<Value*, CallGraphNode*> CallSites;
+
+ DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+ << " nodes:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ );
+
+ bool MadeChange = false;
+ bool DevirtualizedCall = false;
+
+ // Scan all functions in the SCC.
+ unsigned FunctionNo = 0;
+ for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+ SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+ CallGraphNode *CGN = *SCCIdx;
+ Function *F = CGN->getFunction();
+ if (F == 0 || F->isDeclaration()) continue;
+
+ // Walk the function body looking for call sites. Sync up the call sites in
+ // CGN with those actually in the function.
+
+ // Keep track of the number of direct and indirect calls that were
+ // invalidated and removed.
+ unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+
+ // Get the set of call sites currently in the function.
+ for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+ // If this call site is null, then the function pass deleted the call
+ // entirely and the WeakVH nulled it out.
+ if (I->first == 0 ||
+ // If we've already seen this call site, then the FunctionPass RAUW'd
+ // one call with another, which resulted in two "uses" in the edge
+ // list of the same call.
+ CallSites.count(I->first) ||
+
+ // If the call edge is not from a call or invoke, then the function
+ // pass RAUW'd a call with another value. This can happen when
+ // constant folding happens of well known functions etc.
+ !CallSite(I->first)) {
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If this was an indirect call site, count it.
+ if (I->second->getFunction() == 0)
+ ++NumIndirectRemoved;
+ else
+ ++NumDirectRemoved;
+
+ // Just remove the edge from the set of callees, keep track of whether
+ // I points to the last element of the vector.
+ bool WasLast = I + 1 == E;
+ CGN->removeCallEdge(I);
+
+ // If I pointed to the last element of the vector, we have to bail out:
+ // iterator checking rejects comparisons of the resultant pointer with
+ // end.
+ if (WasLast)
+ break;
+ E = CGN->end();
+ continue;
+ }
+
+ assert(!CallSites.count(I->first) &&
+ "Call site occurs in node multiple times");
+ CallSites.insert(std::make_pair(I->first, I->second));
+ ++I;
+ }
+
+ // Loop over all of the instructions in the function, getting the callsites.
+ // Keep track of the number of direct/indirect calls added.
+ unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ if (!CS) continue;
+ Function *Callee = CS.getCalledFunction();
+ if (Callee && Callee->isIntrinsic()) continue;
+
+ // If this call site already existed in the callgraph, just verify it
+ // matches up to expectations and remove it from CallSites.
+ DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+ CallSites.find(CS.getInstruction());
+ if (ExistingIt != CallSites.end()) {
+ CallGraphNode *ExistingNode = ExistingIt->second;
+
+ // Remove from CallSites since we have now seen it.
+ CallSites.erase(ExistingIt);
+
+ // Verify that the callee is right.
+ if (ExistingNode->getFunction() == CS.getCalledFunction())
+ continue;
+
+ // If we are in checking mode, we are not allowed to actually mutate
+ // the callgraph. If this is a case where we can infer that the
+ // callgraph is less precise than it could be (e.g. an indirect call
+ // site could be turned direct), don't reject it in checking mode, and
+ // don't tweak it to be more precise.
+ if (CheckingMode && CS.getCalledFunction() &&
+ ExistingNode->getFunction() == 0)
+ continue;
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If not, we either went from a direct call to indirect, indirect to
+ // direct, or direct to different direct.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ // Keep track of whether we turned an indirect call into a direct
+ // one.
+ if (ExistingNode->getFunction() == 0) {
+ DevirtualizedCall = true;
+ DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
+ << Callee->getName() << "'\n");
+ }
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ }
+
+ // Update the edge target in CGN.
+ CGN->replaceCallEdge(CS, CS, CalleeNode);
+ MadeChange = true;
+ continue;
+ }
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If the call site didn't exist in the CGN yet, add it.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ ++NumDirectAdded;
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ ++NumIndirectAdded;
+ }
+
+ CGN->addCalledFunction(CS, CalleeNode);
+ MadeChange = true;
+ }
+
+ // We scanned the old callgraph node, removing invalidated call sites and
+ // then added back newly found call sites. One thing that can happen is
+ // that an old indirect call site was deleted and replaced with a new direct
+ // call. In this case, we have devirtualized a call, and CGSCCPM would like
+ // to iteratively optimize the new code. Unfortunately, we don't really
+ // have a great way to detect when this happens. As an approximation, we
+ // just look at whether the number of indirect calls is reduced and the
+ // number of direct calls is increased. There are tons of ways to fool this
+ // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+ // direct call) but this is close enough.
+ if (NumIndirectRemoved > NumIndirectAdded &&
+ NumDirectRemoved < NumDirectAdded)
+ DevirtualizedCall = true;
+
+ // After scanning this function, if we still have entries in callsites, then
+ // they are dangling pointers. WeakVH should save us for this, so abort if
+ // this happens.
+ assert(CallSites.empty() && "Dangling pointers found in call sites map");
+
+ // Periodically do an explicit clear to remove tombstones when processing
+ // large scc's.
+ if ((FunctionNo & 15) == 15)
+ CallSites.clear();
+ }
+
+ DEBUG(if (MadeChange) {
+ dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ if (DevirtualizedCall)
+ dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+ } else {
+ dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+ }
+ );
+ (void)MadeChange;
+
+ return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the
+/// specified SCC. This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+
+ // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+ // up-to-date or not. The CGSSC pass manager runs two types of passes:
+ // CallGraphSCC Passes and other random function passes. Because other
+ // random function passes are not CallGraph aware, they may clobber the
+ // call graph by introducing new calls or deleting other ones. This flag
+ // is set to false when we run a function pass so that we know to clean up
+ // the callgraph when we need to run a CGSCCPass again.
+ bool CallGraphUpToDate = true;
+
+ // Run all passes on current SCC.
+ for (unsigned PassNo = 0, e = getNumContainedPasses();
+ PassNo != e; ++PassNo) {
+ Pass *P = getContainedPass(PassNo);
+
+ // If we're in -debug-pass=Executions mode, construct the SCC node list,
+ // otherwise avoid constructing this string as it is expensive.
+ if (isPassDebuggingExecutionsOrMore()) {
+ std::string Functions;
+ #ifndef NDEBUG
+ raw_string_ostream OS(Functions);
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (I != CurSCC.begin()) OS << ", ";
+ (*I)->print(OS);
+ }
+ OS.flush();
+ #endif
+ dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+ }
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ // Actually run this pass on the current SCC.
+ Changed |= RunPassOnSCC(P, CurSCC, CG,
+ CallGraphUpToDate, DevirtualizedCall);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+ dumpPreservedSet(P);
+
+ verifyPreservedAnalysis(P);
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P, "", ON_CG_MSG);
+ }
+
+ // If the callgraph was left out of date (because the last pass run was a
+ // functionpass), refresh it before we move on to the next SCC.
+ if (!CallGraphUpToDate)
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ return Changed;
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool Changed = doInitialization(CG);
+
+ // Walk the callgraph in bottom-up SCC order.
+ scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+ CallGraphSCC CurSCC(&CGI);
+ while (!CGI.isAtEnd()) {
+ // Copy the current SCC and increment past it so that the pass can hack
+ // on the SCC if it wants to without invalidating our iterator.
+ std::vector<CallGraphNode*> &NodeVec = *CGI;
+ CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+ ++CGI;
+
+ // At the top level, we run all the passes in this pass manager on the
+ // functions in this SCC. However, we support iterative compilation in the
+ // case where a function pass devirtualizes a call to a function. For
+ // example, it is very common for a function pass (often GVN or instcombine)
+ // to eliminate the addressing that feeds into a call. With that improved
+ // information, we would like the call to be an inline candidate, infer
+ // mod-ref information etc.
+ //
+ // Because of this, we allow iteration up to a specified iteration count.
+ // This only happens in the case of a devirtualized call, so we only burn
+ // compile time in the case that we're making progress. We also have a hard
+ // iteration count limit in case there is crazy code.
+ unsigned Iteration = 0;
+ bool DevirtualizedCall = false;
+ do {
+ DEBUG(if (Iteration)
+ dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #"
+ << Iteration << '\n');
+ DevirtualizedCall = false;
+ Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+ } while (Iteration++ < MaxIterations && DevirtualizedCall);
+
+ if (DevirtualizedCall)
+ DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration
+ << " times, due to -max-cg-scc-iterations\n");
+
+ if (Iteration > MaxSCCIterations)
+ MaxSCCIterations = Iteration;
+
+ }
+ Changed |= doFinalization(CG);
+ return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+ }
+ }
+ return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+ }
+ }
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+ assert(Old != New && "Should not replace node with self");
+ for (unsigned i = 0; ; ++i) {
+ assert(i != Nodes.size() && "Node not in SCC");
+ if (Nodes[i] != Old) continue;
+ Nodes[i] = New;
+ break;
+ }
+
+ // Update the active scc_iterator so that it doesn't contain dangling
+ // pointers to the old CallGraphNode.
+ scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+ CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find CGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+ PMS.pop();
+
+ assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+ CGPassManager *CGP;
+
+ if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+ CGP = (CGPassManager*)PMS.top();
+ else {
+ // Create new Call Graph SCC Pass Manager if it does not exist.
+ assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ CGP = new CGPassManager();
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(CGP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = CGP;
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(CGP);
+ }
+
+ CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ AU.addPreserved<CallGraph>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+ ///
+ class PrintCallGraphPass : public CallGraphSCCPass {
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+ public:
+ static char ID;
+ PrintCallGraphPass(const std::string &B, raw_ostream &o)
+ : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) {
+ Out << Banner;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ (*I)->getFunction()->print(Out);
+ return false;
+ }
+ };
+
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
new file mode 100644
index 000000000000..306ae7a4dbfb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
@@ -0,0 +1,87 @@
+//===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot
+// containing the call graph of a module.
+//
+// There is also a pass available to directly call dotty ('-view-callgraph').
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+template<>
+struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(CallGraph *Graph) {
+ return "Call graph";
+ }
+
+ std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
+ if (Function *Func = Node->getFunction())
+ return Func->getName();
+
+ return "external node";
+ }
+};
+
+} // end llvm namespace
+
+namespace {
+
+struct CallGraphViewer
+ : public DOTGraphTraitsModuleViewer<CallGraph, true> {
+ static char ID;
+
+ CallGraphViewer()
+ : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) {
+ initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct CallGraphPrinter
+ : public DOTGraphTraitsModulePrinter<CallGraph, true> {
+ static char ID;
+
+ CallGraphPrinter()
+ : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) {
+ initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+} // end anonymous namespace
+
+char CallGraphViewer::ID = 0;
+INITIALIZE_PASS(CallGraphViewer, "view-callgraph",
+ "View call graph",
+ false, false)
+
+char CallGraphPrinter::ID = 0;
+INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
+ "Print call graph to 'dot' file",
+ false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+ModulePass *llvm::createCallGraphViewerPass() {
+ return new CallGraphViewer();
+}
+
+ModulePass *llvm::createCallGraphPrinterPass() {
+ return new CallGraphPrinter();
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 000000000000..1c4f17d3819a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,101 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program. Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+ "Find Used Types", false, true)
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(Type *Ty) {
+ // If ty doesn't already exist in the used types map, add it now, otherwise
+ // return.
+ if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
+
+ // Make sure to add any types this type references now.
+ //
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+ IncorporateType(V->getType());
+
+ // If this is a constant, it could be using other types...
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (!isa<GlobalValue>(C))
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI);
+ }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+ UsedTypes.clear(); // reset if run multiple times...
+
+ // Loop over global variables, incorporating their types
+ for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+ I != E; ++I) {
+ IncorporateType(I->getType());
+ if (I->hasInitializer())
+ IncorporateValue(I->getInitializer());
+ }
+
+ for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+ IncorporateType(MI->getType());
+ const Function &F = *MI;
+
+ // Loop over all of the instructions in the function, adding their return
+ // type as well as the types of their operands.
+ //
+ for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+ II != IE; ++II) {
+ const Instruction &I = *II;
+
+ IncorporateType(I.getType()); // Incorporate the type of the instruction
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI); // Insert inst operand types as well
+ }
+ }
+
+ return false;
+}
+
+// Print the types found in the module. If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+ OS << "Types in use by this module:\n";
+ for (SetVector<Type *>::const_iterator I = UsedTypes.begin(),
+ E = UsedTypes.end(); I != E; ++I) {
+ OS << " " << **I << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 000000000000..92d0d2318e0d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,607 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+ /// FunctionRecord - One instance of this structure is stored for every
+ /// function in the program. Later, the entries for these functions are
+ /// removed if the function is found to call an external function (in which
+ /// case we know nothing about it.
+ struct FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<const GlobalValue*, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<const GlobalValue*, unsigned>::const_iterator I =
+ GlobalInfo.find(GV);
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
+
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
+
+ FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+ };
+
+ /// GlobalsModRef - The actual analysis pass.
+ class GlobalsModRef : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<const GlobalValue*> NonAddressTakenGlobals;
+
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<const GlobalValue*> IndirectGlobals;
+
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<const Function*, FunctionRecord> FunctionInfo;
+
+ public:
+ static char ID;
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+ AnalyzeGlobals(M); // find non-addr taken globals
+ AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraph>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(const Function *F) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (const Function* F = CS.getCalledFunction())
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ }
+
+ virtual void deleteValue(Value *V);
+ virtual void copyValue(Value *From, Value *To);
+ virtual void addEscapingUse(Use &U);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function*, FunctionRecord>::iterator I =
+ FunctionInfo.find(F);
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return 0;
+ }
+
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest = 0);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+ };
+}
+
+char GlobalsModRef::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+ std::vector<Function*> Readers, Writers;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(I);
+ ++NumNonAddrTakenFunctions;
+ }
+ Readers.clear(); Writers.clear();
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(I);
+
+ for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+ FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+ if (!I->isConstant()) // No need to keep track of writers to constants
+ for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+ FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (I->getType()->getElementType()->isPointerTy() &&
+ AnalyzeIndirectGlobalMemory(I))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear(); Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+ std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!V->getType()->isPointerTy()) return true;
+
+ for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ Readers.push_back(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (V == SI->getOperand(1)) {
+ Writers.push_back(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+ return true;
+ } else if (isFreeCall(U, TLI)) {
+ Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (CI->getArgOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+ if (II->getArgOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+ return true;
+ } else {
+ return true;
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value*> AllocRelatedValues;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV) return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+
+ if (!isAllocLikeFn(Ptr, TLI))
+ return false; // Too hard to analyze.
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+ ++I) {
+ std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction()) {
+ // Calls externally - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+ bool KnowNothing = false;
+ unsigned FunctionEffect = 0;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FunctionEffect |= Ref;
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FR.MayReadAnyGlobal = true;
+ } else {
+ FunctionEffect |= ModRef;
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FunctionEffect |= CalleeFR->FunctionEffect;
+
+ // Incorporate callee's effects on globals into our info.
+ for (std::map<const GlobalValue*, unsigned>::iterator GI =
+ CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+ GI != E; ++GI)
+ FR.GlobalInfo[GI->first] |= GI->second;
+ FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfo map.
+ if (KnowNothing) {
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+ E = inst_end(SCC[i]->getFunction());
+ II != E && FunctionEffect != ModRef; ++II)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
+ FunctionEffect |= Ref;
+ if (LI->isVolatile())
+ // Volatile loads may have side-effects, so mark them as writing
+ // memory (for example, a flag inside the processor).
+ FunctionEffect |= Mod;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&*II)) {
+ FunctionEffect |= Mod;
+ if (SI->isVolatile())
+ // Treat volatile stores as reading memory somewhere.
+ FunctionEffect |= Ref;
+ } else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) {
+ FunctionEffect |= ModRef;
+ } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) {
+ // The callgraph doesn't include intrinsic calls.
+ Function *Callee = Intrinsic->getCalledFunction();
+ ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee);
+ FunctionEffect |= (Behaviour & ModRef);
+ }
+
+ if ((FunctionEffect & Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FunctionEffect == 0)
+ ++NumNoMemFunctions;
+ FR.FunctionEffect = FunctionEffect;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfo[SCC[i]->getFunction()] = FR;
+ }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Location &LocA,
+ const Location &LocB) {
+ // Get the base object these pointers point to.
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+ // If the two pointers are derived from two different non-addr-taken
+ // globals, or if one is and the other isn't, we know these can't alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = 0;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (AllocsForIndirectGlobals.count(UV1))
+ GV1 = AllocsForIndirectGlobals[UV1];
+ if (AllocsForIndirectGlobals.count(UV2))
+ GV2 = AllocsForIndirectGlobals[UV2];
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If either pointer is based on an
+ // indirect global and if they are not both based on the same indirect global,
+ // they cannot alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AliasAnalysis::alias(LocA, LocB);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ unsigned Known = ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+ if (GV->hasLocalLinkage())
+ if (const Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (const FunctionRecord *FR = getFunctionInfo(F))
+ Known = FR->getInfoForGlobal(GV);
+
+ if (Known == NoModRef)
+ return NoModRef; // No need to query other mod/ref analyses
+ return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and remove
+ // any AllocRelatedValues for it.
+ if (IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (std::map<const Value*, const GlobalValue*>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end(); I != E; ) {
+ if (I->second == GV) {
+ AllocsForIndirectGlobals.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise, if this is an allocation related to an indirect global, remove
+ // it.
+ AllocsForIndirectGlobals.erase(V);
+
+ AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+ AliasAnalysis::copyValue(From, To);
+}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+ // For the purposes of this analysis, it is conservatively correct to treat
+ // a newly escaping value equivalently to a deleted one. We could perhaps
+ // be more precise by processing the new use and attempting to update our
+ // saved analysis results to accommodate it.
+ deleteValue(U);
+
+ AliasAnalysis::addEscapingUse(U);
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 000000000000..aa5164e9e79b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,31 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+ initializeBasicCallGraphPass(Registry);
+ initializeCallGraphAnalysisGroup(Registry);
+ initializeCallGraphPrinterPass(Registry);
+ initializeCallGraphViewerPass(Registry);
+ initializeFindUsedTypesPass(Registry);
+ initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeIPA(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
new file mode 100644
index 000000000000..35c45e61808b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
@@ -0,0 +1,1239 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline-cost"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+ typedef InstVisitor<CallAnalyzer, bool> Base;
+ friend class InstVisitor<CallAnalyzer, bool>;
+
+ // DataLayout if available, or null.
+ const DataLayout *const TD;
+
+ /// The TargetTransformInfo available for this compilation.
+ const TargetTransformInfo &TTI;
+
+ // The called function.
+ Function &F;
+
+ int Threshold;
+ int Cost;
+
+ bool IsCallerRecursive;
+ bool IsRecursiveCall;
+ bool ExposesReturnsTwice;
+ bool HasDynamicAlloca;
+ bool ContainsNoDuplicateCall;
+
+ /// Number of bytes allocated statically by the callee.
+ uint64_t AllocatedSize;
+ unsigned NumInstructions, NumVectorInstructions;
+ int FiftyPercentVectorBonus, TenPercentVectorBonus;
+ int VectorBonus;
+
+ // While we walk the potentially-inlined instructions, we build up and
+ // maintain a mapping of simplified values specific to this callsite. The
+ // idea is to propagate any special information we have about arguments to
+ // this call through the inlinable section of the function, and account for
+ // likely simplifications post-inlining. The most important aspect we track
+ // is CFG altering simplifications -- when we prove a basic block dead, that
+ // can cause dramatic shifts in the cost of inlining a function.
+ DenseMap<Value *, Constant *> SimplifiedValues;
+
+ // Keep track of the values which map back (through function arguments) to
+ // allocas on the caller stack which could be simplified through SROA.
+ DenseMap<Value *, Value *> SROAArgValues;
+
+ // The mapping of caller Alloca values to their accumulated cost savings. If
+ // we have to disable SROA for one of the allocas, this tells us how much
+ // cost must be added.
+ DenseMap<Value *, int> SROAArgCosts;
+
+ // Keep track of values which map to a pointer base and constant offset.
+ DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+ // Custom simplification helper routines.
+ bool isAllocaDerivedArg(Value *V);
+ bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+ DenseMap<Value *, int>::iterator &CostIt);
+ void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+ void disableSROA(Value *V);
+ void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+ bool simplifyCallSite(Function *F, CallSite CS);
+ ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+ // Custom analysis routines.
+ bool analyzeBlock(BasicBlock *BB);
+
+ // Disable several entry points to the visitor so we don't accidentally use
+ // them by declaring but not defining them here.
+ void visit(Module *); void visit(Module &);
+ void visit(Function *); void visit(Function &);
+ void visit(BasicBlock *); void visit(BasicBlock &);
+
+ // Provide base case for our instruction visit.
+ bool visitInstruction(Instruction &I);
+
+ // Our visit overrides.
+ bool visitAlloca(AllocaInst &I);
+ bool visitPHI(PHINode &I);
+ bool visitGetElementPtr(GetElementPtrInst &I);
+ bool visitBitCast(BitCastInst &I);
+ bool visitPtrToInt(PtrToIntInst &I);
+ bool visitIntToPtr(IntToPtrInst &I);
+ bool visitCastInst(CastInst &I);
+ bool visitUnaryInstruction(UnaryInstruction &I);
+ bool visitICmp(ICmpInst &I);
+ bool visitSub(BinaryOperator &I);
+ bool visitBinaryOperator(BinaryOperator &I);
+ bool visitLoad(LoadInst &I);
+ bool visitStore(StoreInst &I);
+ bool visitExtractValue(ExtractValueInst &I);
+ bool visitInsertValue(InsertValueInst &I);
+ bool visitCallSite(CallSite CS);
+
+public:
+ CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
+ Function &Callee, int Threshold)
+ : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+ IsCallerRecursive(false), IsRecursiveCall(false),
+ ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
+ NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+ TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
+
+ bool analyzeCall(CallSite CS);
+
+ int getThreshold() { return Threshold; }
+ int getCost() { return Cost; }
+
+ // Keep a bunch of stats about the cost savings found so we can print them
+ // out when debugging.
+ unsigned NumConstantArgs;
+ unsigned NumConstantOffsetPtrArgs;
+ unsigned NumAllocaArgs;
+ unsigned NumConstantPtrCmps;
+ unsigned NumConstantPtrDiffs;
+ unsigned NumInstructionsSimplified;
+ unsigned SROACostSavings;
+ unsigned SROACostSavingsLost;
+
+ void dump();
+};
+
+} // namespace
+
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+ return SROAArgValues.count(V);
+}
+
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+ Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+ if (SROAArgValues.empty() || SROAArgCosts.empty())
+ return false;
+
+ DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+ if (ArgIt == SROAArgValues.end())
+ return false;
+
+ Arg = ArgIt->second;
+ CostIt = SROAArgCosts.find(Arg);
+ return CostIt != SROAArgCosts.end();
+}
+
+/// \brief Disable SROA for the candidate marked by this cost iterator.
+///
+/// This marks the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+ // If we're no longer able to perform SROA we need to undo its cost savings
+ // and prevent subsequent analysis.
+ Cost += CostIt->second;
+ SROACostSavings -= CostIt->second;
+ SROACostSavingsLost += CostIt->second;
+ SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+ disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ CostIt->second += InstructionCost;
+ SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ if (IsSROAValid) {
+ accumulateSROACost(CostIt, InstructionCost);
+ return true;
+ }
+
+ disableSROA(CostIt);
+ return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+ return false;
+
+ return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+ if (!TD)
+ return false;
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+ OpC = dyn_cast<ConstantInt>(SimpleOp);
+ if (!OpC)
+ return false;
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+ }
+ return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+ // FIXME: Check whether inlining will turn a dynamic alloca into a static
+ // alloca, and handle that case.
+
+ // Accumulate the allocated size.
+ if (I.isStaticAlloca()) {
+ Type *Ty = I.getAllocatedType();
+ AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
+ Ty->getPrimitiveSizeInBits());
+ }
+
+ // We will happily inline static alloca instructions.
+ if (I.isStaticAlloca())
+ return Base::visitAlloca(I);
+
+ // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+ // a variety of reasons, and so we would like to not inline them into
+ // functions which don't currently have a dynamic alloca. This simply
+ // disables inlining altogether in the presence of a dynamic alloca.
+ HasDynamicAlloca = true;
+ return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+ // FIXME: We should potentially be tracking values through phi nodes,
+ // especially when they collapse to a single value due to deleted CFG edges
+ // during inlining.
+
+ // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+ // though we don't want to propagate it's bonuses. The idea is to disable
+ // SROA if it *might* be used in an inappropriate manner.
+
+ // Phi nodes are always zero-cost.
+ return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+ SROAArg, CostIt);
+
+ // Try to fold GEPs of constant-offset call site argument pointers. This
+ // requires target data and inbounds GEPs.
+ if (TD && I.isInBounds()) {
+ // Check if we have a base + offset for the pointer.
+ Value *Ptr = I.getPointerOperand();
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+ if (BaseAndOffset.first) {
+ // Check if the offset of this GEP is constant, and if so accumulate it
+ // into Offset.
+ if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+ // Non-constant GEPs aren't folded, and disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
+ return false;
+ }
+
+ // Add the result as a new mapping to Base + Offset.
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also handle SROA candidates here, we already know that the GEP is
+ // all-constant indexed.
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ return true;
+ }
+ }
+
+ if (isGEPOffsetConstant(I)) {
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ // Constant GEPs are modeled as free.
+ return true;
+ }
+
+ // Variable GEPs will require math and will disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
+ return false;
+}
+
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+ // Propagate constants through bitcasts.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Track base/offsets through casts
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ // Casts don't change the offset, just wrap it up.
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also look for SROA candidates here.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ // Bitcasts are always zero cost.
+ return true;
+}
+
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+ // Propagate constants through ptrtoint.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Track base/offset pairs when converted to a plain integer provided the
+ // integer is large enough to represent the pointer.
+ unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+ if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+ }
+
+ // This is really weird. Technically, ptrtoint will disable SROA. However,
+ // unless that ptrtoint is *used* somewhere in the live basic blocks after
+ // inlining, it will be nuked, and SROA should proceed. All of the uses which
+ // would block SROA would also block SROA if applied directly to a pointer,
+ // and so we can just add the integer in here. The only places where SROA is
+ // preserved either cannot fire on an integer, or won't in-and-of themselves
+ // disable SROA (ext) w/o some later use that we would see and disable.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+ // Propagate constants through ptrtoint.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Track base/offset pairs when round-tripped through a pointer without
+ // modifications provided the integer is not too large.
+ Value *Op = I.getOperand(0);
+ unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+ if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+ }
+
+ // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+ // Propagate constants through ptrtoint.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+ disableSROA(I.getOperand(0));
+
+ return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+ Value *Operand = I.getOperand(0);
+ Constant *COp = dyn_cast<Constant>(Operand);
+ if (!COp)
+ COp = SimplifiedValues.lookup(Operand);
+ if (COp)
+ if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+ COp, TD)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Disable any SROA on the argument to arbitrary unary operators.
+ disableSROA(Operand);
+
+ return false;
+}
+
+bool CallAnalyzer::visitICmp(ICmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Otherwise look for a comparison between constant offset pointers with
+ // a common base.
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the icmp to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrCmps;
+ return true;
+ }
+ }
+ }
+
+ // If the comparison is an equality comparison with null, we can simplify it
+ // for any alloca-derived argument.
+ if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
+ if (isAllocaDerivedArg(I.getOperand(0))) {
+ // We can actually predict the result of comparisons between an
+ // alloca-derived value and null. Note that this fires regardless of
+ // SROA firing.
+ bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+ SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+ : ConstantInt::getFalse(I.getType());
+ return true;
+ }
+
+ // Finally check for SROA candidates in comparisons.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (isa<ConstantPointerNull>(I.getOperand(1))) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
+
+ disableSROA(CostIt);
+ }
+
+ return false;
+}
+
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+ // Try to handle a special case: we can fold computing the difference of two
+ // constant-related pointers.
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the subtract to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrDiffs;
+ return true;
+ }
+ }
+ }
+
+ // Otherwise, fall back to the generic logic for simplifying and handling
+ // instructions.
+ return Base::visitSub(I);
+}
+
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+ disableSROA(LHS);
+ disableSROA(RHS);
+
+ return false;
+}
+
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
+
+ disableSROA(CostIt);
+ }
+
+ return false;
+}
+
+bool CallAnalyzer::visitStore(StoreInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
+
+ disableSROA(CostIt);
+ }
+
+ return false;
+}
+
+bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
+ // Constant folding for extract value is trivial.
+ Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
+ if (!C)
+ C = SimplifiedValues.lookup(I.getAggregateOperand());
+ if (C) {
+ SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
+ return true;
+ }
+
+ // SROA can look through these but give them a cost.
+ return false;
+}
+
+bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
+ // Constant folding for insert value is trivial.
+ Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
+ if (!AggC)
+ AggC = SimplifiedValues.lookup(I.getAggregateOperand());
+ Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
+ if (!InsertedC)
+ InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
+ if (AggC && InsertedC) {
+ SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
+ I.getIndices());
+ return true;
+ }
+
+ // SROA can look through these but give them a cost.
+ return false;
+}
+
+/// \brief Try to simplify a call site.
+///
+/// Takes a concrete function and callsite and tries to actually simplify it by
+/// analyzing the arguments and call itself with instsimplify. Returns true if
+/// it has simplified the callsite to some other entity (a constant), making it
+/// free.
+bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+ // FIXME: Using the instsimplify logic directly for this is inefficient
+ // because we have to continually rebuild the argument list even when no
+ // simplifications can be performed. Until that is fixed with remapping
+ // inside of instsimplify, directly constant fold calls here.
+ if (!canConstantFoldCallTo(F))
+ return false;
+
+ // Try to re-map the arguments to constants.
+ SmallVector<Constant *, 4> ConstantArgs;
+ ConstantArgs.reserve(CS.arg_size());
+ for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I) {
+ Constant *C = dyn_cast<Constant>(*I);
+ if (!C)
+ C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+ if (!C)
+ return false; // This argument doesn't map to a constant.
+
+ ConstantArgs.push_back(C);
+ }
+ if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
+ SimplifiedValues[CS.getInstruction()] = C;
+ return true;
+ }
+
+ return false;
+}
+
+bool CallAnalyzer::visitCallSite(CallSite CS) {
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ReturnsTwice)) {
+ // This aborts the entire analysis.
+ ExposesReturnsTwice = true;
+ return false;
+ }
+ if (CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
+ ContainsNoDuplicateCall = true;
+
+ if (Function *F = CS.getCalledFunction()) {
+ // When we have a concrete function, first try to simplify it directly.
+ if (simplifyCallSite(F, CS))
+ return true;
+
+ // Next check if it is an intrinsic we know about.
+ // FIXME: Lift this into part of the InstVisitor.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return Base::visitCallSite(CS);
+
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ // SROA can usually chew through these intrinsics, but they aren't free.
+ return false;
+ }
+ }
+
+ if (F == CS.getInstruction()->getParent()->getParent()) {
+ // This flag will fully abort the analysis, so don't bother with anything
+ // else.
+ IsRecursiveCall = true;
+ return false;
+ }
+
+ if (TTI.isLoweredToCall(F)) {
+ // We account for the average 1 instruction per call argument setup
+ // here.
+ Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+ // Everything other than inline ASM will also have a significant cost
+ // merely from making the call.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ Cost += InlineConstants::CallPenalty;
+ }
+
+ return Base::visitCallSite(CS);
+ }
+
+ // Otherwise we're in a very special case -- an indirect function call. See
+ // if we can be particularly clever about this.
+ Value *Callee = CS.getCalledValue();
+
+ // First, pay the price of the argument setup. We account for the average
+ // 1 instruction per call argument setup here.
+ Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+ // Next, check if this happens to be an indirect function call to a known
+ // function in this inline context. If not, we've done all we can.
+ Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+ if (!F)
+ return Base::visitCallSite(CS);
+
+ // If we have a constant that we are calling as a function, we can peer
+ // through it and see the function target. This happens not infrequently
+ // during devirtualization and so we want to give it a hefty bonus for
+ // inlining, but cap that bonus in the event that inlining wouldn't pan
+ // out. Pretend to inline the function, with a custom threshold.
+ CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold);
+ if (CA.analyzeCall(CS)) {
+ // We were able to inline the indirect call! Subtract the cost from the
+ // bonus we want to apply, but don't go below zero.
+ Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+ }
+
+ return Base::visitCallSite(CS);
+}
+
+bool CallAnalyzer::visitInstruction(Instruction &I) {
+ // Some instructions are free. All of the free intrinsics can also be
+ // handled by SROA, etc.
+ if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
+ return true;
+
+ // We found something we don't understand or can't handle. Mark any SROA-able
+ // values in the operand list as no longer viable.
+ for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
+ disableSROA(*OI);
+
+ return false;
+}
+
+
+/// \brief Analyze a basic block for its contribution to the inline cost.
+///
+/// This method walks the analyzer over every instruction in the given basic
+/// block and accounts for their cost during inlining at this callsite. It
+/// aborts early if the threshold has been exceeded or an impossible to inline
+/// construct has been detected. It returns false if inlining is no longer
+/// viable, and true if inlining remains viable.
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
+ I != E; ++I) {
+ ++NumInstructions;
+ if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+ ++NumVectorInstructions;
+
+ // If the instruction simplified to a constant, there is no cost to this
+ // instruction. Visit the instructions using our InstVisitor to account for
+ // all of the per-instruction logic. The visit tree returns true if we
+ // consumed the instruction in any way, and false if the instruction's base
+ // cost should count against inlining.
+ if (Base::visit(I))
+ ++NumInstructionsSimplified;
+ else
+ Cost += InlineConstants::InstrCost;
+
+ // If the visit this instruction detected an uninlinable pattern, abort.
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ return false;
+
+ // If the caller is a recursive function then we don't want to inline
+ // functions which allocate a lot of stack space because it would increase
+ // the caller stack usage dramatically.
+ if (IsCallerRecursive &&
+ AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+ return false;
+
+ if (NumVectorInstructions > NumInstructions/2)
+ VectorBonus = FiftyPercentVectorBonus;
+ else if (NumVectorInstructions > NumInstructions/10)
+ VectorBonus = TenPercentVectorBonus;
+ else
+ VectorBonus = 0;
+
+ // Check if we've past the threshold so we don't spin in huge basic
+ // blocks that will never inline.
+ if (Cost > (Threshold + VectorBonus))
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
+ if (!TD || !V->getType()->isPointerTy())
+ return 0;
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
+ return 0;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ break;
+ V = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+ return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
+}
+
+/// \brief Analyze a call site for potential inlining.
+///
+/// Returns true if inlining this call is viable, and false if it is not
+/// viable. It computes the cost and adjusts the threshold based on numerous
+/// factors and heuristics. If this method returns false but the computed cost
+/// is below the computed threshold, then inlining was forcibly disabled by
+/// some artifact of the routine.
+bool CallAnalyzer::analyzeCall(CallSite CS) {
+ ++NumCallsAnalyzed;
+
+ // Track whether the post-inlining function would have more than one basic
+ // block. A single basic block is often intended for inlining. Balloon the
+ // threshold by 50% until we pass the single-BB phase.
+ bool SingleBB = true;
+ int SingleBBBonus = Threshold / 2;
+ Threshold += SingleBBBonus;
+
+ // Perform some tweaks to the cost and threshold based on the direct
+ // callsite information.
+
+ // We want to more aggressively inline vector-dense kernels, so up the
+ // threshold, and we'll lower it if the % of vector instructions gets too
+ // low.
+ assert(NumInstructions == 0);
+ assert(NumVectorInstructions == 0);
+ FiftyPercentVectorBonus = Threshold;
+ TenPercentVectorBonus = Threshold / 2;
+
+ // Give out bonuses per argument, as the instructions setting them up will
+ // be gone after inlining.
+ for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+ if (TD && CS.isByValArgument(I)) {
+ // We approximate the number of loads and stores needed by dividing the
+ // size of the byval type by the target's pointer size.
+ PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = TD->getPointerSizeInBits();
+ // Ceiling division.
+ unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+ // If it generates more than 8 stores it is likely to be expanded as an
+ // inline memcpy so we take that as an upper bound. Otherwise we assume
+ // one load and one store per word copied.
+ // FIXME: The maxStoresPerMemcpy setting from the target should be used
+ // here instead of a magic number of 8, but it's not available via
+ // DataLayout.
+ NumStores = std::min(NumStores, 8U);
+
+ Cost -= 2 * NumStores * InlineConstants::InstrCost;
+ } else {
+ // For non-byval arguments subtract off one instruction per call
+ // argument.
+ Cost -= InlineConstants::InstrCost;
+ }
+ }
+
+ // If there is only one call of the function, and it has internal linkage,
+ // the cost of inlining it drops dramatically.
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+ &F == CS.getCalledFunction();
+ if (OnlyOneCallAndLocalLinkage)
+ Cost += InlineConstants::LastCallToStaticBonus;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this unless there is literally zero
+ // cost.
+ Instruction *Instr = CS.getInstruction();
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+ Threshold = 1;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+ Threshold = 1;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (F.getCallingConv() == CallingConv::Cold)
+ Cost += InlineConstants::ColdccPenalty;
+
+ // Check if we're done. This can happen due to bonuses and penalties.
+ if (Cost > Threshold)
+ return false;
+
+ if (F.empty())
+ return true;
+
+ Function *Caller = CS.getInstruction()->getParent()->getParent();
+ // Check if the caller function is recursive itself.
+ for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
+ U != E; ++U) {
+ CallSite Site(cast<Value>(*U));
+ if (!Site)
+ continue;
+ Instruction *I = Site.getInstruction();
+ if (I->getParent()->getParent() == Caller) {
+ IsCallerRecursive = true;
+ break;
+ }
+ }
+
+ // Track whether we've seen a return instruction. The first return
+ // instruction is free, as at least one will usually disappear in inlining.
+ bool HasReturn = false;
+
+ // Populate our simplified values by mapping from function arguments to call
+ // arguments with known important simplifications.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
+ FAI != FAE; ++FAI, ++CAI) {
+ assert(CAI != CS.arg_end());
+ if (Constant *C = dyn_cast<Constant>(CAI))
+ SimplifiedValues[FAI] = C;
+
+ Value *PtrArg = *CAI;
+ if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
+ ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+
+ // We can SROA any pointer arguments derived from alloca instructions.
+ if (isa<AllocaInst>(PtrArg)) {
+ SROAArgValues[FAI] = PtrArg;
+ SROAArgCosts[PtrArg] = 0;
+ }
+ }
+ }
+ NumConstantArgs = SimplifiedValues.size();
+ NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
+ NumAllocaArgs = SROAArgValues.size();
+
+ // The worklist of live basic blocks in the callee *after* inlining. We avoid
+ // adding basic blocks of the callee which can be proven to be dead for this
+ // particular call site in order to get more accurate cost estimates. This
+ // requires a somewhat heavyweight iteration pattern: we need to walk the
+ // basic blocks in a breadth-first order as we insert live successors. To
+ // accomplish this, prioritizing for small iterations because we exit after
+ // crossing our threshold, we use a small-size optimized SetVector.
+ typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
+ SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+ BBSetVector BBWorklist;
+ BBWorklist.insert(&F.getEntryBlock());
+ // Note that we *must not* cache the size, this loop grows the worklist.
+ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+ // Bail out the moment we cross the threshold. This means we'll under-count
+ // the cost, but only when undercounting doesn't matter.
+ if (Cost > (Threshold + VectorBonus))
+ break;
+
+ BasicBlock *BB = BBWorklist[Idx];
+ if (BB->empty())
+ continue;
+
+ // Handle the terminator cost here where we can track returns and other
+ // function-wide constructs.
+ TerminatorInst *TI = BB->getTerminator();
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions
+ // with indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably
+ // don't want to inline this function.
+ if (isa<IndirectBrInst>(TI))
+ return false;
+
+ if (!HasReturn && isa<ReturnInst>(TI))
+ HasReturn = true;
+ else
+ Cost += InlineConstants::InstrCost;
+
+ // Analyze the cost of this block. If we blow through the threshold, this
+ // returns false, and we can bail on out.
+ if (!analyzeBlock(BB)) {
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ return false;
+
+ // If the caller is a recursive function then we don't want to inline
+ // functions which allocate a lot of stack space because it would increase
+ // the caller stack usage dramatically.
+ if (IsCallerRecursive &&
+ AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+ return false;
+
+ break;
+ }
+
+ // Add in the live successors by first checking whether we have terminator
+ // that may be simplified based on the values simplified by this call.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional()) {
+ Value *Cond = BI->getCondition();
+ if (ConstantInt *SimpleCond
+ = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+ continue;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *Cond = SI->getCondition();
+ if (ConstantInt *SimpleCond
+ = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+ continue;
+ }
+ }
+
+ // If we're unable to select a particular successor, just count all of
+ // them.
+ for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
+ ++TIdx)
+ BBWorklist.insert(TI->getSuccessor(TIdx));
+
+ // If we had any successors at this point, than post-inlining is likely to
+ // have them as well. Note that we assume any basic blocks which existed
+ // due to branches or switches which folded above will also fold after
+ // inlining.
+ if (SingleBB && TI->getNumSuccessors() > 1) {
+ // Take off the bonus we applied to the threshold.
+ Threshold -= SingleBBBonus;
+ SingleBB = false;
+ }
+ }
+
+ // If this is a noduplicate call, we can still inline as long as
+ // inlining this would cause the removal of the caller (so the instruction
+ // is not actually duplicated, just moved).
+ if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
+ return false;
+
+ Threshold += VectorBonus;
+
+ return Cost < Threshold;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// \brief Dump stats about this call's analysis.
+void CallAnalyzer::dump() {
+#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n"
+ DEBUG_PRINT_STAT(NumConstantArgs);
+ DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
+ DEBUG_PRINT_STAT(NumAllocaArgs);
+ DEBUG_PRINT_STAT(NumConstantPtrCmps);
+ DEBUG_PRINT_STAT(NumConstantPtrDiffs);
+ DEBUG_PRINT_STAT(NumInstructionsSimplified);
+ DEBUG_PRINT_STAT(SROACostSavings);
+ DEBUG_PRINT_STAT(SROACostSavingsLost);
+ DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
+#undef DEBUG_PRINT_STAT
+}
+#endif
+
+INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
+ true, true)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
+ true, true)
+
+char InlineCostAnalysis::ID = 0;
+
+InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {}
+
+InlineCostAnalysis::~InlineCostAnalysis() {}
+
+void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetTransformInfo>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+}
+
+bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ return false;
+}
+
+InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
+ return getInlineCost(CS, CS.getCalledFunction(), Threshold);
+}
+
+InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
+ int Threshold) {
+ // Cannot inline indirect calls.
+ if (!Callee)
+ return llvm::InlineCost::getNever();
+
+ // Calls to functions with always-inline attributes should be inlined
+ // whenever possible.
+ if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline)) {
+ if (isInlineViable(*Callee))
+ return llvm::InlineCost::getAlways();
+ return llvm::InlineCost::getNever();
+ }
+
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
+ if (Callee->mayBeOverridden() ||
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoInline) ||
+ CS.isNoInline())
+ return llvm::InlineCost::getNever();
+
+ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
+ << "...\n");
+
+ CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
+ bool ShouldInline = CA.analyzeCall(CS);
+
+ DEBUG(CA.dump());
+
+ // Check if there was a reason to force inlining or no inlining.
+ if (!ShouldInline && CA.getCost() < CA.getThreshold())
+ return InlineCost::getNever();
+ if (ShouldInline && CA.getCost() >= CA.getThreshold())
+ return InlineCost::getAlways();
+
+ return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+}
+
+bool InlineCostAnalysis::isInlineViable(Function &F) {
+ bool ReturnsTwice =
+ F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ReturnsTwice);
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Disallow inlining of functions which contain an indirect branch.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return false;
+
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ CallSite CS(II);
+ if (!CS)
+ continue;
+
+ // Disallow recursive calls.
+ if (&F == CS.getCalledFunction())
+ return false;
+
+ // Disallow calls which expose returns-twice to a function not previously
+ // attributed as such.
+ if (!ReturnsTwice && CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->canReturnTwice())
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
new file mode 100644
index 000000000000..b33e2cb9999e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,335 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+ return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+ ScalarEvolution *SE, LoopInfo *LI) {
+ // An addrec is interesting if it's affine or if it has an interesting start.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Keep things simple. Don't touch loop-variant strides unless they're
+ // only used outside the loop and we can simplify them.
+ if (AR->getLoop() == L)
+ return AR->isAffine() ||
+ (!L->contains(I) &&
+ SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
+ // Otherwise recurse to see if the start value is interesting, and that
+ // the step value is not interesting, since we don't yet know how to
+ // do effective SCEV expansions for addrecs with interesting steps.
+ return isInteresting(AR->getStart(), I, L, SE, LI) &&
+ !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
+ }
+
+ // An add is interesting if exactly one of its operands is interesting.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ bool AnyInterestingYet = false;
+ for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+ OI != OE; ++OI)
+ if (isInteresting(*OI, I, L, SE, LI)) {
+ if (AnyInterestingYet)
+ return false;
+ AnyInterestingYet = true;
+ }
+ return AnyInterestingYet;
+ }
+
+ // Nothing else is interesting here.
+ return false;
+}
+
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+ const LoopInfo *LI,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ Loop *NearestLoop = 0;
+ for (DomTreeNode *Rung = DT->getNode(BB);
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *DomBB = Rung->getBlock();
+ Loop *DomLoop = LI->getLoopFor(DomBB);
+ if (DomLoop && DomLoop->getHeader() == DomBB) {
+ // If the domtree walk reaches a loop with no preheader, return false.
+ if (!DomLoop->isLoopSimplifyForm())
+ return false;
+ // If we have already checked this loop nest, stop checking.
+ if (SimpleLoopNests.count(DomLoop))
+ break;
+ // If we have not already checked this loop nest, remember the loop
+ // header nearest to BB. The nearest loop may not contain BB.
+ if (!NearestLoop)
+ NearestLoop = DomLoop;
+ }
+ }
+ if (NearestLoop)
+ SimpleLoopNests.insert(NearestLoop);
+ return true;
+}
+
+/// AddUsersImpl - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool IVUsers::AddUsersImpl(Instruction *I,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ // Add this IV user to the Processed set before returning false to ensure that
+ // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
+ if (!SE->isSCEVable(I->getType()))
+ return false; // Void and FP expressions cannot be reduced.
+
+ // IVUsers is used by LSR which assumes that all SCEV expressions are safe to
+ // pass to SCEVExpander. Expressions are not safe to expand if they represent
+ // operations that are not safe to speculate, namely integer division.
+ if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, TD))
+ return false;
+
+ // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+ // Also avoid creating IVs of non-native types. For example, we don't want a
+ // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+ uint64_t Width = SE->getTypeSizeInBits(I->getType());
+ if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *ISE = SE->getSCEV(I);
+
+ // If we've come to an uninteresting expression, stop the traversal and
+ // call this a user.
+ if (!isInteresting(ISE, I, L, SE, LI))
+ return false;
+
+ SmallPtrSet<Instruction *, 4> UniqueUsers;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!UniqueUsers.insert(User))
+ continue;
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && Processed.count(User))
+ continue;
+
+ // Only consider IVUsers that are dominated by simplified loop
+ // headers. Otherwise, SCEVExpander will crash.
+ BasicBlock *UseBB = User->getParent();
+ // A phi's use is live out of its predecessor block.
+ if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+ unsigned OperandNo = UI.getOperandNo();
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ UseBB = PHI->getIncomingBlock(ValNo);
+ }
+ if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+ return false;
+
+ // Descend recursively, but not into PHI nodes outside the current loop.
+ // It's important to see the entire expression outside the loop to get
+ // choices that depend on addressing mode use right, although we won't
+ // consider references outside the loop in all cases.
+ // If User is already in Processed, we don't want to recurse into it again,
+ // but do want to record a second reference in the same instruction.
+ bool AddUserToIVUsers = false;
+ if (LI->getLoopFor(User->getParent()) != L) {
+ if (isa<PHINode>(User) || Processed.count(User) ||
+ !AddUsersImpl(User, SimpleLoopNests)) {
+ DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+ } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
+ DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+
+ if (AddUserToIVUsers) {
+ // Okay, we found a user that we cannot reduce.
+ IVUses.push_back(new IVStrideUse(this, User, I));
+ IVStrideUse &NewUse = IVUses.back();
+ // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+ // The regular return value here is discarded; instead of recording
+ // it, we just recompute it when we need it.
+ ISE = TransformForPostIncUse(NormalizeAutodetect,
+ ISE, User, I,
+ NewUse.PostIncLoops,
+ *SE, *DT);
+ DEBUG(if (SE->getSCEV(I) != ISE)
+ dbgs() << " NORMALIZED TO: " << *ISE << '\n');
+ }
+ }
+ return true;
+}
+
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ // SCEVExpander can only handle users that are dominated by simplified loop
+ // entries. Keep track of all loops that are only dominated by other simple
+ // loops so we don't traverse the domtree for each user.
+ SmallPtrSet<Loop*,16> SimpleLoopNests;
+
+ return AddUsersImpl(I, SimpleLoopNests);
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+ IVUses.push_back(new IVStrideUse(this, User, Operand));
+ return IVUses.back();
+}
+
+IVUsers::IVUsers()
+ : LoopPass(ID) {
+ initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+ L = l;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+
+ // Find all uses of induction variables in this loop, and categorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ (void)AddUsersIfInteresting(I);
+
+ return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+ OS << "IV Users for loop ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << " with backedge-taken count "
+ << *SE->getBackedgeTakenCount(L);
+ }
+ OS << ":\n";
+
+ for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+ E = IVUses.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = " << *getReplacementExpr(*UI);
+ for (PostIncLoopSet::const_iterator
+ I = UI->PostIncLoops.begin(),
+ E = UI->PostIncLoops.end(); I != E; ++I) {
+ OS << " (post-inc with loop ";
+ WriteAsOperand(OS, (*I)->getHeader(), false);
+ OS << ")";
+ }
+ OS << " in ";
+ UI->getUser()->print(OS);
+ OS << '\n';
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void IVUsers::dump() const {
+ print(dbgs());
+}
+#endif
+
+void IVUsers::releaseMemory() {
+ Processed.clear();
+ IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+ return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+ return
+ TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+ IU.getUser(), IU.getOperandValToReplace(),
+ const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+ *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (AR->getLoop() == L)
+ return AR;
+ return findAddRecForLoop(AR->getStart(), L);
+ }
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+ return AR;
+ return 0;
+ }
+
+ return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+ return AR->getStepRecurrence(*SE);
+ return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+ PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+ // Remove this user from the list.
+ Parent->Processed.erase(this->getUser());
+ Parent->IVUses.erase(this);
+ // this now dangles!
+}
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
new file mode 100644
index 000000000000..75a49eb90a88
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -0,0 +1,87 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/IR/Instruction.def"
+
+
+namespace {
+ class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+ friend class InstVisitor<InstCount>;
+
+ void visitFunction (Function &F) { ++TotalFuncs; }
+ void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/IR/Instruction.def"
+
+ void visitInstruction(Instruction &I) {
+ errs() << "Instruction Count does not know about " << I;
+ llvm_unreachable(0);
+ }
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCount() : FunctionPass(ID) {
+ initializeInstCountPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+ "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+ unsigned StartMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ visit(F);
+ unsigned EndMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ TotalMemInst += EndMemInsts-StartMemInsts;
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 000000000000..4a3c74e9db35
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,3215 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions. This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+struct Query {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const DominatorTree *DT;
+
+ Query(const DataLayout *td, const TargetLibraryInfo *tli,
+ const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}
+};
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+ unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+ unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+
+/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// a vector with every element false, as appropriate for the type.
+static Constant *getFalse(Type *Ty) {
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
+ "Expected i1 type or a vector of i1!");
+ return Constant::getNullValue(Ty);
+}
+
+/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// a vector with every element true, as appropriate for the type.
+static Constant *getTrue(Type *Ty) {
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
+ "Expected i1 type or a vector of i1!");
+ return Constant::getAllOnesValue(Ty);
+}
+
+/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
+static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
+ CmpInst *Cmp = dyn_cast<CmpInst>(V);
+ if (!Cmp)
+ return false;
+ CmpInst::Predicate CPred = Cmp->getPredicate();
+ Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1);
+ if (CPred == Pred && CLHS == LHS && CRHS == RHS)
+ return true;
+ return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
+ CRHS == LHS;
+}
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ // Arguments and constants dominate all instructions.
+ return true;
+
+ // If we are processing instructions (and/or basic blocks) that have not been
+ // fully added to a function, the parent nodes may still be null. Simply
+ // return the conservative answer in these cases.
+ if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+ return false;
+
+ // If we have a DominatorTree then do a precise test.
+ if (DT) {
+ if (!DT->isReachableFromEntry(P->getParent()))
+ return true;
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return false;
+ return DT->dominates(I, P);
+ }
+
+ // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // then it obviously dominates all phi nodes.
+ if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+ !isa<InvokeInst>(I))
+ return true;
+
+ return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExpand, const Query &Q,
+ unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Check whether the expression has the form "(A op' B) op C".
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (Op0->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op C) op' (B op C)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == B && R == A)) {
+ ++NumExpand;
+ return LHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ // Check whether the expression has the form "A op (B op' C)".
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (Op1->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op B) op' (A op C)".
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == C && R == B)) {
+ ++NumExpand;
+ return RHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract. For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExtract, const Query &Q,
+ unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+ !Op1 || Op1->getOpcode() != OpcodeToExtract)
+ return 0;
+
+ // The expression has the form "(A op' B) op (C op' D)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+ // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+ Value *DD = A == C ? D : C;
+ // Form "A op' (B op DD)" if it simplifies completely.
+ // Does "B op DD" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
+ // It does! Return "A op' V" if it simplifies or is already available.
+ // If V equals B then "A op' V" is just the LHS. If V equals DD then
+ // "A op' V" is just the RHS.
+ if (V == B || V == DD) {
+ ++NumFactor;
+ return V == B ? LHS : RHS;
+ }
+ // Otherwise return "A op' V" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+ Value *CC = B == D ? C : D;
+ // Form "(A op CC) op' B" if it simplifies completely..
+ // Does "A op CC" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
+ // It does! Return "V op' B" if it simplifies or is already available.
+ // If V equals A then "V op' B" is just the LHS. If V equals CC then
+ // "V op' B" is just the RHS.
+ if (V == A || V == CC) {
+ ++NumFactor;
+ return V == A ? LHS : RHS;
+ }
+ // Otherwise return "V op' B" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations. Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+ assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+ // It does! Return "A op V" if it simplifies or is already available.
+ // If V equals B then "A op V" is just the LHS.
+ if (V == B) return LHS;
+ // Otherwise return "A op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
+ // It does! Return "V op C" if it simplifies or is already available.
+ // If V equals B then "V op C" is just the RHS.
+ if (V == B) return RHS;
+ // Otherwise return "V op C" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // The remaining transforms require commutativity as well as associativity.
+ if (!Instruction::isCommutative(Opcode))
+ return 0;
+
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+ // It does! Return "V op B" if it simplifies or is already available.
+ // If V equals A then "V op B" is just the LHS.
+ if (V == A) return LHS;
+ // Otherwise return "V op B" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+ // It does! Return "B op V" if it simplifies or is already available.
+ // If V equals C then "B op V" is just the RHS.
+ if (V == C) return RHS;
+ // Otherwise return "B op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ SelectInst *SI;
+ if (isa<SelectInst>(LHS)) {
+ SI = cast<SelectInst>(LHS);
+ } else {
+ assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+ SI = cast<SelectInst>(RHS);
+ }
+
+ // Evaluate the BinOp on the true and false branches of the select.
+ Value *TV;
+ Value *FV;
+ if (SI == LHS) {
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
+ } else {
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
+ }
+
+ // If they simplified to the same value, then return the common value.
+ // If they both failed to simplify then return null.
+ if (TV == FV)
+ return TV;
+
+ // If one branch simplified to undef, return the other one.
+ if (TV && isa<UndefValue>(TV))
+ return FV;
+ if (FV && isa<UndefValue>(FV))
+ return TV;
+
+ // If applying the operation did not change the true and false select values,
+ // then the result of the binop is the select itself.
+ if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+ return SI;
+
+ // If one branch simplified and the other did not, and the simplified
+ // value is equal to the unsimplified one, return the simplified value.
+ // For example, select (cond, X, X & Z) & Z -> X & Z.
+ if ((FV && !TV) || (TV && !FV)) {
+ // Check that the simplified value has the form "X op Y" where "op" is the
+ // same as the original operation.
+ Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+ if (Simplified && Simplified->getOpcode() == Opcode) {
+ // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+ // We already know that "op" is the same as for the simplified value. See
+ // if the operands match too. If so, return the simplified value.
+ Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+ Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+ Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+ if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+ Simplified->getOperand(1) == UnsimplifiedRHS)
+ return Simplified;
+ if (Simplified->isCommutative() &&
+ Simplified->getOperand(1) == UnsimplifiedLHS &&
+ Simplified->getOperand(0) == UnsimplifiedRHS)
+ return Simplified;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value. Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the select is on the LHS.
+ if (!isa<SelectInst>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+ SelectInst *SI = cast<SelectInst>(LHS);
+ Value *Cond = SI->getCondition();
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+
+ // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+ // Does "cmp TV, RHS" simplify?
+ Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
+ if (TCmp == Cond) {
+ // It not only simplified, it simplified to the select condition. Replace
+ // it with 'true'.
+ TCmp = getTrue(Cond->getType());
+ } else if (!TCmp) {
+ // It didn't simplify. However if "cmp TV, RHS" is equal to the select
+ // condition then we can replace it with 'true'. Otherwise give up.
+ if (!isSameCompare(Cond, Pred, TV, RHS))
+ return 0;
+ TCmp = getTrue(Cond->getType());
+ }
+
+ // Does "cmp FV, RHS" simplify?
+ Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
+ if (FCmp == Cond) {
+ // It not only simplified, it simplified to the select condition. Replace
+ // it with 'false'.
+ FCmp = getFalse(Cond->getType());
+ } else if (!FCmp) {
+ // It didn't simplify. However if "cmp FV, RHS" is equal to the select
+ // condition then we can replace it with 'false'. Otherwise give up.
+ if (!isSameCompare(Cond, Pred, FV, RHS))
+ return 0;
+ FCmp = getFalse(Cond->getType());
+ }
+
+ // If both sides simplified to the same value, then use it as the result of
+ // the original comparison.
+ if (TCmp == FCmp)
+ return TCmp;
+
+ // The remaining cases only make sense if the select condition has the same
+ // type as the result of the comparison, so bail out if this is not so.
+ if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
+ return 0;
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V =
+ SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+ Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value. If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ PHINode *PI;
+ if (isa<PHINode>(LHS)) {
+ PI = cast<PHINode>(LHS);
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
+ return 0;
+ } else {
+ assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+ PI = cast<PHINode>(RHS);
+ // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(LHS, PI, Q.DT))
+ return 0;
+ }
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = PI == LHS ?
+ SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time. If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the phi is on the LHS.
+ if (!isa<PHINode>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+ PHINode *PI = cast<PHINode>(LHS);
+
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
+ return 0;
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+ Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X + undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X + (Y - X) -> Y
+ // (Y - X) + X -> Y
+ // Eg: X + -X -> 0
+ Value *Y = 0;
+ if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+ match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+ return Y;
+
+ // X + ~X -> -1 since ~X = -X-1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ /// i1 add -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+ Q, MaxRecurse))
+ return V;
+
+ // Threading Add over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A + select(cond, B, C)" means evaluating
+ // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+///
+/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
+/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
+/// folding.
+static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
+ Value *&V) {
+ assert(V->getType()->getScalarType()->isPointerTy());
+
+ // Without DataLayout, just be conservative for now. Theoretically, more could
+ // be done in this case.
+ if (!TD)
+ return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
+ break;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ break;
+ V = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(V->getType()->getScalarType()->isPointerTy() &&
+ "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+ Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
+ if (V->getType()->isVectorTy())
+ return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
+ OffsetIntPtr);
+ return OffsetIntPtr;
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const DataLayout *TD,
+ Value *LHS, Value *RHS) {
+ Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+ Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+
+ // If LHS and RHS are not related via constant offsets to the same base
+ // value, there is nothing we can do here.
+ if (LHS != RHS)
+ return 0;
+
+ // Otherwise, the difference of LHS - RHS can be computed as:
+ // LHS - RHS
+ // = (LHSOffset + Base) - (RHSOffset + Base)
+ // = LHSOffset - RHSOffset
+ return ConstantExpr::getSub(LHSOffset, RHSOffset);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0))
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // X - undef -> undef
+ // undef - X -> undef
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return UndefValue::get(Op0->getType());
+
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X - X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // (X*2) - X -> X
+ // (X<<1) - X -> X
+ Value *X = 0;
+ if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+ match(Op0, m_Shl(m_Specific(Op1), m_One())))
+ return Op1;
+
+ // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+ // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+ Value *Y = 0, *Z = Op1;
+ if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+ // See if "V === Y - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
+ // It does! Now see if "X + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+ // It does! Now see if "Y + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+ // For example, X - (X + 1) -> -1
+ X = Op0;
+ if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ // It does! Now see if "V - Z" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+ // It does! Now see if "V - Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+ // For example, X - (X - Y) -> Y.
+ Z = Op0;
+ if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+ // See if "V === Z - X" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
+ // It does! Now see if "V + Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+
+ // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+ if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+ match(Op1, m_Trunc(m_Value(Y))))
+ if (X->getType() == Y->getType())
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ // It does! Now see if "trunc V" simplifies.
+ if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+ // It does, return the simplified "trunc V".
+ return W;
+
+ // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+ if (match(Op0, m_PtrToInt(m_Value(X))) &&
+ match(Op1, m_PtrToInt(m_Value(Y))))
+ if (Constant *Result = computePointerDifference(Q.TD, X, Y))
+ return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
+ // Mul distributes over Sub. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+ Q, MaxRecurse))
+ return V;
+
+ // i1 sub -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+ return V;
+
+ // Threading Sub over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A - select(cond, B, C)" means evaluating
+ // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// Given operands for an FAdd, see if we can fold the result. If not, this
+/// returns null.
+static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // fadd X, -0 ==> X
+ if (match(Op1, m_NegZero()))
+ return Op0;
+
+ // fadd X, 0 ==> X, when we know X is not -0
+ if (match(Op1, m_Zero()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+ return Op0;
+
+ // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+ // where nnan and ninf have to occur at least once somewhere in this
+ // expression
+ Value *SubOp = 0;
+ if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
+ SubOp = Op1;
+ else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
+ SubOp = Op0;
+ if (SubOp) {
+ Instruction *FSub = cast<Instruction>(SubOp);
+ if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
+ (FMF.noInfs() || FSub->hasNoInfs()))
+ return Constant::getNullValue(Op0->getType());
+ }
+
+ return 0;
+}
+
+/// Given operands for an FSub, see if we can fold the result. If not, this
+/// returns null.
+static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+ }
+
+ // fsub X, 0 ==> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // fsub X, -0 ==> X, when we know X is not -0
+ if (match(Op1, m_NegZero()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+ return Op0;
+
+ // fsub 0, (fsub -0.0, X) ==> X
+ Value *X;
+ if (match(Op0, m_AnyZero())) {
+ if (match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+ return X;
+ if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+ return X;
+ }
+
+ // fsub nnan ninf x, x ==> 0.0
+ if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ return 0;
+}
+
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
+ FastMathFlags FMF,
+ const Query &Q,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+ return Op0;
+
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+ return Op1;
+
+ return 0;
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X * undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X * 0 -> 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X * 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ // (X / Y) * Y -> X if the division is exact.
+ Value *X = 0;
+ if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+ match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y)
+ return X;
+
+ // i1 mul -> and.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+ Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
+ FastMathFlags FMF,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+ }
+ }
+
+ bool isSigned = Opcode == Instruction::SDiv;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef / X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 / X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X / 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be division by zero, hence it must be division by one.
+ return Op0;
+
+ // X / X -> 1
+ if (Op0 == Op1)
+ return ConstantInt::get(Op0->getType(), 1);
+
+ // (X * Y) / Y -> X if the multiplication does not overflow.
+ Value *X = 0, *Y = 0;
+ if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+ if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+ OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
+ // If the Mul knows it does not overflow, then we are good to go.
+ if ((isSigned && Mul->hasNoSignedWrap()) ||
+ (!isSigned && Mul->hasNoUnsignedWrap()))
+ return X;
+ // If X has the form X = A / Y then X * Y cannot overflow.
+ if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+ if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+ return X;
+ }
+
+ // (X rem Y) / Y -> 0
+ if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned) {
+ // undef / X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+ }
+ }
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef % X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 % X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X % 0 -> undef, we don't need to preserve faults!
+ if (match(Op1, m_Zero()))
+ return UndefValue::get(Op0->getType());
+
+ // X % 1 -> 0
+ if (match(Op1, m_One()))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be remainder by zero, hence it must be remainder by one.
+ return Constant::getNullValue(Op0->getType());
+
+ // X % X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
+ unsigned) {
+ // undef % X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+ }
+ }
+
+ // 0 shift by X -> 0
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X shift by 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X shift by undef -> undef because it may shift by the bitwidth.
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+ if (CI->getValue().getLimitedValue() >=
+ Op0->getType()->getScalarSizeInBits())
+ return UndefValue::get(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // undef << X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X >> A) << A -> X
+ Value *X;
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
+ return X;
+ return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // undef >>l X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // all ones >>a X -> all ones
+ if (match(Op0, m_AllOnes()))
+ return Op0;
+
+ // undef >>a X -> all ones
+ if (match(Op0, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X & undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X & X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X & 0 = 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X & -1 = X
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
+ // A & ~A = ~A & A = 0
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getNullValue(Op0->getType());
+
+ // (A | ?) & A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A & (A | ?) = A
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // A & (-A) = A if A is a power of two or zero.
+ if (match(Op0, m_Neg(m_Specific(Op1))) ||
+ match(Op1, m_Neg(m_Specific(Op0)))) {
+ if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true))
+ return Op0;
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true))
+ return Op1;
+ }
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ Q, MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+ Q, MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X | undef -> -1
+ if (match(Op1, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // X | X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X | 0 = X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X | -1 = -1
+ if (match(Op1, m_AllOnes()))
+ return Op1;
+
+ // A | ~A = ~A | A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (A & ?) | A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A | (A & ?) = A
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // ~(A & ?) | A = -1
+ if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op1 || B == Op1))
+ return Constant::getAllOnesValue(Op1->getType());
+
+ // A | ~(A & ?) = -1
+ if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op0 || B == Op0))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+ Ops, Q.TD, Q.TLI);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // A ^ undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // A ^ 0 = A
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // A ^ A = 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // A ^ ~A = ~A ^ A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+ Q, MaxRecurse))
+ return V;
+
+ // Threading Xor over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+ // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+}
+
+static Type *GetCompareTy(Value *Op) {
+ return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS". Return such a value if found,
+/// otherwise return null. Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (!SI)
+ return 0;
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+ if (!Cmp)
+ return 0;
+ Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+ if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+ return Cmp;
+ if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+ LHS == CmpRHS && RHS == CmpLHS)
+ return Cmp;
+ return 0;
+}
+
+// A significant optimization not implemented here is assuming that alloca
+// addresses are not equal to incoming argument values. They don't *alias*,
+// as we say, but that doesn't mean they aren't equal, so we take a
+// conservative approach.
+//
+// This is inspired in part by C++11 5.10p1:
+// "Two pointers of the same type compare equal if and only if they are both
+// null, both point to the same function, or both represent the same
+// address."
+//
+// This is pretty permissive.
+//
+// It's also partly due to C11 6.5.9p6:
+// "Two pointers compare equal if and only if both are null pointers, both are
+// pointers to the same object (including a pointer to an object and a
+// subobject at its beginning) or function, both are pointers to one past the
+// last element of the same array object, or one is a pointer to one past the
+// end of one array object and the other is a pointer to the start of a
+// different array object that happens to immediately follow the first array
+// object in the address space.)
+//
+// C11's version is more restrictive, however there's no reason why an argument
+// couldn't be a one-past-the-end value for a stack object in the caller and be
+// equal to the beginning of a stack object in the callee.
+//
+// If the C and C++ standards are ever made sufficiently restrictive in this
+// area, it may be possible to update LLVM's semantics accordingly and reinstate
+// this optimization.
+static Constant *computePointerICmp(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ // First, skip past any trivial no-ops.
+ LHS = LHS->stripPointerCasts();
+ RHS = RHS->stripPointerCasts();
+
+ // A non-null pointer is not equal to a null pointer.
+ if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+ (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
+ return ConstantInt::get(GetCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
+
+ // We can only fold certain predicates on pointer comparisons.
+ switch (Pred) {
+ default:
+ return 0;
+
+ // Equality comaprisons are easy to fold.
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_NE:
+ break;
+
+ // We can only handle unsigned relational comparisons because 'inbounds' on
+ // a GEP only protects against unsigned wrapping.
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ // However, we have to switch them to their signed variants to handle
+ // negative indices from the base pointer.
+ Pred = ICmpInst::getSignedPredicate(Pred);
+ break;
+ }
+
+ // Strip off any constant offsets so that we can reason about them.
+ // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets
+ // here and compare base addresses like AliasAnalysis does, however there are
+ // numerous hazards. AliasAnalysis and its utilities rely on special rules
+ // governing loads and stores which don't apply to icmps. Also, AliasAnalysis
+ // doesn't need to guarantee pointer inequality when it says NoAlias.
+ Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+ Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+
+ // If LHS and RHS are related via constant offsets to the same base
+ // value, we can replace it with an icmp which just compares the offsets.
+ if (LHS == RHS)
+ return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+
+ // Various optimizations for (in)equality comparisons.
+ if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+ // Different non-empty allocations that exist at the same time have
+ // different addresses (if the program can tell). Global variables always
+ // exist, so they always exist during the lifetime of each other and all
+ // allocas. Two different allocas usually have different addresses...
+ //
+ // However, if there's an @llvm.stackrestore dynamically in between two
+ // allocas, they may have the same address. It's tempting to reduce the
+ // scope of the problem by only looking at *static* allocas here. That would
+ // cover the majority of allocas while significantly reducing the likelihood
+ // of having an @llvm.stackrestore pop up in the middle. However, it's not
+ // actually impossible for an @llvm.stackrestore to pop up in the middle of
+ // an entry block. Also, if we have a block that's not attached to a
+ // function, we can't tell if it's "static" under the current definition.
+ // Theoretically, this problem could be fixed by creating a new kind of
+ // instruction kind specifically for static allocas. Such a new instruction
+ // could be required to be at the top of the entry block, thus preventing it
+ // from being subject to a @llvm.stackrestore. Instcombine could even
+ // convert regular allocas into these special allocas. It'd be nifty.
+ // However, until then, this problem remains open.
+ //
+ // So, we'll assume that two non-empty allocas have different addresses
+ // for now.
+ //
+ // With all that, if the offsets are within the bounds of their allocations
+ // (and not one-past-the-end! so we can't use inbounds!), and their
+ // allocations aren't the same, the pointers are not equal.
+ //
+ // Note that it's not necessary to check for LHS being a global variable
+ // address, due to canonicalization and constant folding.
+ if (isa<AllocaInst>(LHS) &&
+ (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
+ ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
+ ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
+ uint64_t LHSSize, RHSSize;
+ if (LHSOffsetCI && RHSOffsetCI &&
+ getObjectSize(LHS, LHSSize, TD, TLI) &&
+ getObjectSize(RHS, RHSSize, TD, TLI)) {
+ const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
+ const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
+ if (!LHSOffsetValue.isNegative() &&
+ !RHSOffsetValue.isNegative() &&
+ LHSOffsetValue.ult(LHSSize) &&
+ RHSOffsetValue.ult(RHSSize)) {
+ return ConstantInt::get(GetCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
+ }
+ }
+
+ // Repeat the above check but this time without depending on DataLayout
+ // or being able to compute a precise size.
+ if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
+ !cast<PointerType>(RHS->getType())->isEmptyTy() &&
+ LHSOffset->isNullValue() &&
+ RHSOffset->isNullValue())
+ return ConstantInt::get(GetCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
+ }
+ }
+
+ // Otherwise, fail.
+ return 0;
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *OpTy = LHS->getType(); // The operand type.
+
+ // icmp X, X -> true/false
+ // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
+ // because X could be 0.
+ if (LHS == RHS || isa<UndefValue>(RHS))
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ // Special case logic when the operands have i1 type.
+ if (OpTy->getScalarType()->isIntegerTy(1)) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ // X == 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_NE:
+ // X != 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // X <s 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <=s -1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ }
+ }
+
+ // If we are comparing with zero then try hard since this is a common case.
+ if (match(RHS, m_Zero())) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ switch (Pred) {
+ default: llvm_unreachable("Unknown ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_UGE:
+ return getTrue(ITy);
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, Q.TD))
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, Q.TD))
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SLT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+ if (LHSKnownNegative)
+ return getTrue(ITy);
+ if (LHSKnownNonNegative)
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+ if (LHSKnownNegative)
+ return getTrue(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+ if (LHSKnownNegative)
+ return getFalse(ITy);
+ if (LHSKnownNonNegative)
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SGT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+ if (LHSKnownNegative)
+ return getFalse(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
+ return getTrue(ITy);
+ break;
+ }
+ }
+
+ // See if we are doing a comparison with a constant integer.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Rule out tautological comparisons (eg., ult 0 or uge 0).
+ ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+ if (RHS_CR.isEmptySet())
+ return ConstantInt::getFalse(CI->getContext());
+ if (RHS_CR.isFullSet())
+ return ConstantInt::getTrue(CI->getContext());
+
+ // Many binary operators with constant RHS have easy to compute constant
+ // range. Use them to check whether the comparison is a tautology.
+ uint32_t Width = CI->getBitWidth();
+ APInt Lower = APInt(Width, 0);
+ APInt Upper = APInt(Width, 0);
+ ConstantInt *CI2;
+ if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'urem x, CI2' produces [0, CI2).
+ Upper = CI2->getValue();
+ } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'srem x, CI2' produces (-|CI2|, |CI2|).
+ Upper = CI2->getValue().abs();
+ Lower = (-Upper) + 1;
+ } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
+ // 'udiv CI2, x' produces [0, CI2].
+ Upper = CI2->getValue() + 1;
+ } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (!CI2->isZero())
+ Upper = NegOne.udiv(CI2->getValue()) + 1;
+ } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ APInt Val = CI2->getValue().abs();
+ if (!Val.isMinValue()) {
+ Lower = IntMin.sdiv(Val);
+ Upper = IntMax.sdiv(Val) + 1;
+ }
+ } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (CI2->getValue().ult(Width))
+ Upper = NegOne.lshr(CI2->getValue()) + 1;
+ } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (CI2->getValue().ult(Width)) {
+ Lower = IntMin.ashr(CI2->getValue());
+ Upper = IntMax.ashr(CI2->getValue()) + 1;
+ }
+ } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+ // 'or x, CI2' produces [CI2, UINT_MAX].
+ Lower = CI2->getValue();
+ } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+ // 'and x, CI2' produces [0, CI2].
+ Upper = CI2->getValue() + 1;
+ }
+ if (Lower != Upper) {
+ ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+ if (RHS_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+ if (RHS_CR.inverse().contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
+ // Compare of cast, for example (zext X) != 0 -> X != 0
+ if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+ Instruction *LI = cast<CastInst>(LHS);
+ Value *SrcOp = LI->getOperand(0);
+ Type *SrcTy = SrcOp->getType();
+ Type *DstTy = LI->getType();
+
+ // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+ // if the integer type is the same size as the pointer type.
+ if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
+ Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // Transfer the cast to the constant.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ ConstantExpr::getIntToPtr(RHSC, SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+ if (RI->getOperand(0)->getType() == SrcTy)
+ // Compare without the cast.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ Q, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (isa<ZExtInst>(LHS)) {
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+ // same type.
+ if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that signed predicates become unsigned.
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, RI->getOperand(0), Q,
+ MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two zero-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, Q, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+ // there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default: llvm_unreachable("Unknown ICmp predicate!");
+ // LHS <u RHS.
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
+ // is non-negative then LHS <s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+ }
+ }
+ }
+ }
+
+ if (isa<SExtInst>(LHS)) {
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that the predicate does not change.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ Q, MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two sign-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are all equal, while RHS has varying
+ // bits there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default: llvm_unreachable("Unknown ICmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ return ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_NE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // If RHS is non-negative then LHS <s RHS. If RHS is negative then
+ // LHS >s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+
+ // If LHS is non-negative then LHS <u RHS. If LHS is negative then
+ // LHS >u RHS.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // Comparison is true iff the LHS <s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // Comparison is true iff the LHS >=s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+ if (MaxRecurse && (LBO || RBO)) {
+ // Analyze the case when either LHS or RHS is an add instruction.
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+ bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+ if (LBO && LBO->getOpcode() == Instruction::Add) {
+ A = LBO->getOperand(0); B = LBO->getOperand(1);
+ NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ }
+ if (RBO && RBO->getOpcode() == Instruction::Add) {
+ C = RBO->getOperand(0); D = RBO->getOperand(1);
+ NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ }
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+ Constant::getNullValue(RHS->getType()),
+ Q, MaxRecurse-1))
+ return V;
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred,
+ Constant::getNullValue(LHS->getType()),
+ C == LHS ? D : C, Q, MaxRecurse-1))
+ return V;
+
+ // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoLHSWrapProblem && NoRHSWrapProblem) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return getTrue(ITy);
+ }
+ }
+ if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return getTrue(ITy);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return getFalse(ITy);
+ }
+ }
+
+ // x udiv y <=u x.
+ if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
+ // icmp pred (X /u Y), X
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+ LBO->getOperand(1) == RBO->getOperand(1)) {
+ switch (LBO->getOpcode()) {
+ default: break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (ICmpInst::isSigned(Pred))
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!LBO->isExact() || !RBO->isExact())
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), Q, MaxRecurse-1))
+ return V;
+ break;
+ case Instruction::Shl: {
+ bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
+ bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && ICmpInst::isSigned(Pred))
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), Q, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+
+ // Simplify comparisons involving max/min.
+ Value *A, *B;
+ CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+ CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+ // Signed variants on "max(a,b)>=a -> true".
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_SLE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_SGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_SGE:
+ // Always true.
+ return getTrue(ITy);
+ case CmpInst::ICMP_SLT:
+ // Always false.
+ return getFalse(ITy);
+ }
+ }
+
+ // Unsigned variants on "max(a,b)>=a -> true".
+ P = CmpInst::BAD_ICMP_PREDICATE;
+ if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_ULE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_UGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_UGE:
+ // Always true.
+ return getTrue(ITy);
+ case CmpInst::ICMP_ULT:
+ // Always false.
+ return getFalse(ITy);
+ }
+ }
+
+ // Variants on "max(x,y) >= min(x,z)".
+ Value *C, *D;
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_SGE)
+ // Always true.
+ return getTrue(ITy);
+ if (Pred == CmpInst::ICMP_SLT)
+ // Always false.
+ return getFalse(ITy);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_SLE)
+ // Always true.
+ return getTrue(ITy);
+ if (Pred == CmpInst::ICMP_SGT)
+ // Always false.
+ return getFalse(ITy);
+ } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_UGE)
+ // Always true.
+ return getTrue(ITy);
+ if (Pred == CmpInst::ICMP_ULT)
+ // Always false.
+ return getFalse(ITy);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_ULE)
+ // Always true.
+ return getTrue(ITy);
+ if (Pred == CmpInst::ICMP_UGT)
+ // Always false.
+ return getFalse(ITy);
+ }
+
+ // Simplify comparisons of related pointers using a powerful, recursive
+ // GEP-walk when we have target data available..
+ if (LHS->getType()->isPointerTy())
+ if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS))
+ return C;
+
+ if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
+ if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
+ if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
+ GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
+ (ICmpInst::isEquality(Pred) ||
+ (GLHS->isInBounds() && GRHS->isInBounds() &&
+ Pred == ICmpInst::getSignedPredicate(Pred)))) {
+ // The bases are equal and the indices are constant. Build a constant
+ // expression GEP with the same indices and a null base pointer to see
+ // what constant folding can make out of it.
+ Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
+ SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
+ Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+
+ SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
+ Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+ return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
+ }
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Fold trivial predicates.
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+
+ if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
+ return UndefValue::get(GetCompareTy(LHS));
+
+ // fcmp x,x -> true/false. Not all compares are foldable.
+ if (LHS == RHS) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ if (CmpInst::isFalseWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result. If not, this returns null.
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+ Value *FalseVal, const Query &Q,
+ unsigned MaxRecurse) {
+ // select true, X, Y -> X
+ // select false, X, Y -> Y
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+ return CB->getZExtValue() ? TrueVal : FalseVal;
+
+ // select C, X, X -> X
+ if (TrueVal == FalseVal)
+ return TrueVal;
+
+ if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y
+ if (isa<Constant>(TrueVal))
+ return TrueVal;
+ return FalseVal;
+ }
+ if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
+ return FalseVal;
+ if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
+ return TrueVal;
+
+ return 0;
+}
+
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+ // The type of the GEP pointer operand.
+ PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
+ // The GEP pointer operand is not a pointer, it's a vector of pointers.
+ if (!PtrTy)
+ return 0;
+
+ // getelementptr P -> P.
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ if (isa<UndefValue>(Ops[0])) {
+ // Compute the (pointer) type returned by the GEP instruction.
+ Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+ return UndefValue::get(GEPTy);
+ }
+
+ if (Ops.size() == 2) {
+ // getelementptr P, 0 -> P.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+ if (C->isZero())
+ return Ops[0];
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (Q.TD) {
+ Type *Ty = PtrTy->getElementType();
+ if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
+ return Ops[0];
+ }
+ }
+
+ // Check to see if this is constant foldable.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (!isa<Constant>(Ops[i]))
+ return 0;
+
+ return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+}
+
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
+/// can fold the result. If not, this returns null.
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs, const Query &Q,
+ unsigned) {
+ if (Constant *CAgg = dyn_cast<Constant>(Agg))
+ if (Constant *CVal = dyn_cast<Constant>(Val))
+ return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
+
+ // insertvalue x, undef, n -> x
+ if (match(Val, m_Undef()))
+ return Agg;
+
+ // insertvalue x, (extractvalue y, n), n
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
+ if (EV->getAggregateOperand()->getType() == Agg->getType() &&
+ EV->getIndices() == Idxs) {
+ // insertvalue undef, (extractvalue y, n), n -> y
+ if (match(Agg, m_Undef()))
+ return EV->getAggregateOperand();
+
+ // insertvalue y, (extractvalue y, n), n -> y
+ if (Agg == EV->getAggregateOperand())
+ return Agg;
+ }
+
+ return 0;
+}
+
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
+ // If all of the PHI's incoming values are the same then replace the PHI node
+ // with the common value.
+ Value *CommonValue = 0;
+ bool HasUndefInput = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PN) continue;
+ if (isa<UndefValue>(Incoming)) {
+ // Remember that we saw an undef value, but otherwise ignore them.
+ HasUndefInput = true;
+ continue;
+ }
+ if (CommonValue && Incoming != CommonValue)
+ return 0; // Not the same, bail out.
+ CommonValue = Incoming;
+ }
+
+ // If CommonValue is null then all of the incoming values were either undef or
+ // equal to the phi node itself.
+ if (!CommonValue)
+ return UndefValue::get(PN->getType());
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot return X as the result of the PHI node unless it
+ // dominates the PHI block.
+ if (HasUndefInput)
+ return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
+
+ return CommonValue;
+}
+
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+ if (Constant *C = dyn_cast<Constant>(Op))
+ return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+
+ return 0;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+}
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ Q, MaxRecurse);
+ case Instruction::FAdd:
+ return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
+ case Instruction::Sub:
+ return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ Q, MaxRecurse);
+ case Instruction::FSub:
+ return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::FMul:
+ return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Shl:
+ return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ Q, MaxRecurse);
+ case Instruction::LShr:
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+ case Instruction::AShr:
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+ default:
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Constant *COps[] = {CLHS, CRHS};
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+ Q.TLI);
+ }
+
+ // If the operation is associative, try some generic simplifications.
+ if (Instruction::isAssociative(Opcode))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ return 0;
+ }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+ return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+static bool IsIdempotent(Intrinsic::ID ID) {
+ switch (ID) {
+ default: return false;
+
+ // Unary idempotent: f(f(x)) = f(x)
+ case Intrinsic::fabs:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ return true;
+ }
+}
+
+template <typename IterTy>
+static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd,
+ const Query &Q, unsigned MaxRecurse) {
+ // Perform idempotent optimizations
+ if (!IsIdempotent(IID))
+ return 0;
+
+ // Unary Ops
+ if (std::distance(ArgBegin, ArgEnd) == 1)
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+ if (II->getIntrinsicID() == IID)
+ return II;
+
+ return 0;
+}
+
+template <typename IterTy>
+static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
+ const Query &Q, unsigned MaxRecurse) {
+ Type *Ty = V->getType();
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ Ty = PTy->getElementType();
+ FunctionType *FTy = cast<FunctionType>(Ty);
+
+ // call undef -> undef
+ if (isa<UndefValue>(V))
+ return UndefValue::get(FTy->getReturnType());
+
+ Function *F = dyn_cast<Function>(V);
+ if (!F)
+ return 0;
+
+ if (unsigned IID = F->getIntrinsicID())
+ if (Value *Ret =
+ SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse))
+ return Ret;
+
+ if (!canConstantFoldCallTo(F))
+ return 0;
+
+ SmallVector<Constant *, 4> ConstantArgs;
+ ConstantArgs.reserve(ArgEnd - ArgBegin);
+ for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
+ Constant *C = dyn_cast<Constant>(*I);
+ if (!C)
+ return 0;
+ ConstantArgs.push_back(C);
+ }
+
+ return ConstantFoldCall(F, ConstantArgs, Q.TLI);
+}
+
+Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
+ User::op_iterator ArgEnd, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT),
+ RecursionLimit);
+}
+
+Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction. If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ Value *Result;
+
+ switch (I->getOpcode()) {
+ default:
+ Result = ConstantFoldInstruction(I, TD, TLI);
+ break;
+ case Instruction::FAdd:
+ Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), TD, TLI, DT);
+ break;
+ case Instruction::Add:
+ Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, TLI, DT);
+ break;
+ case Instruction::FSub:
+ Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), TD, TLI, DT);
+ break;
+ case Instruction::Sub:
+ Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, TLI, DT);
+ break;
+ case Instruction::FMul:
+ Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), TD, TLI, DT);
+ break;
+ case Instruction::Mul:
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::SDiv:
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::UDiv:
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::FDiv:
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::SRem:
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::URem:
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::FRem:
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::Shl:
+ Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, TLI, DT);
+ break;
+ case Instruction::LShr:
+ Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, TLI, DT);
+ break;
+ case Instruction::AShr:
+ Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, TLI, DT);
+ break;
+ case Instruction::And:
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::Or:
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::Xor:
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::ICmp:
+ Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::FCmp:
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+ break;
+ case Instruction::Select:
+ Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), TD, TLI, DT);
+ break;
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ Result = SimplifyGEPInst(Ops, TD, TLI, DT);
+ break;
+ }
+ case Instruction::InsertValue: {
+ InsertValueInst *IV = cast<InsertValueInst>(I);
+ Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
+ IV->getInsertedValueOperand(),
+ IV->getIndices(), TD, TLI, DT);
+ break;
+ }
+ case Instruction::PHI:
+ Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
+ break;
+ case Instruction::Call: {
+ CallSite CS(cast<CallInst>(I));
+ Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+ TD, TLI, DT);
+ break;
+ }
+ case Instruction::Trunc:
+ Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
+ break;
+ }
+
+ /// If called on unreachable code, the above logic may report that the
+ /// instruction simplified to itself. Make life easier for users by
+ /// detecting that case here, returning a safe value instead.
+ return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// \brief Implementation of recursive simplification through an instructions
+/// uses.
+///
+/// This is the common implementation of the recursive simplification routines.
+/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
+/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
+/// instructions to process and attempt to simplify it using
+/// InstructionSimplify.
+///
+/// This routine returns 'true' only when *it* simplifies something. The passed
+/// in simplified value does not count toward this.
+static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ bool Simplified = false;
+ SmallSetVector<Instruction *, 8> Worklist;
+
+ // If we have an explicit value to collapse to, do that round of the
+ // simplification loop by hand initially.
+ if (SimpleV) {
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ if (*UI != I)
+ Worklist.insert(cast<Instruction>(*UI));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // Gracefully handle edge cases where the instruction is not wired into any
+ // parent block.
+ if (I->getParent())
+ I->eraseFromParent();
+ } else {
+ Worklist.insert(I);
+ }
+
+ // Note that we must test the size on each iteration, the worklist can grow.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ I = Worklist[Idx];
+
+ // See if this instruction simplifies.
+ SimpleV = SimplifyInstruction(I, TD, TLI, DT);
+ if (!SimpleV)
+ continue;
+
+ Simplified = true;
+
+ // Stash away all the uses of the old instruction so we can check them for
+ // recursive simplifications after a RAUW. This is cheaper than checking all
+ // uses of To on the recursive step in most cases.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ Worklist.insert(cast<Instruction>(*UI));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // Gracefully handle edge cases where the instruction is not wired into any
+ // parent block.
+ if (I->getParent())
+ I->eraseFromParent();
+ }
+ return Simplified;
+}
+
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT);
+}
+
+bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
+ assert(SimpleV && "Must provide a simplified value.");
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT);
+}
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
new file mode 100644
index 000000000000..26a0322407ec
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+ // There is a loop in this interval iff one of the predecessors of the header
+ // node lives in the interval.
+ for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+ I != E; ++I)
+ if (contains(*I))
+ return true;
+ return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+ OS << "-------------------------------------------------------------\n"
+ << "Interval Contents:\n";
+
+ // Print out all of the basic blocks in the interval...
+ for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+ E = Nodes.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Predecessors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+ E = Predecessors.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Successors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+ E = Successors.end(); I != E; ++I)
+ OS << **I << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 000000000000..2e259b147b8b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+ "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ delete Intervals[i];
+ IntervalMap.clear();
+ Intervals.clear();
+ RootInterval = 0;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+ for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+ Intervals.push_back(I);
+
+ // Add mappings for all of the basic blocks in I to the IntervalPartition
+ for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+ It != End; ++It)
+ IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures. After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+ BasicBlock *Header = Int->getHeaderNode();
+ for (Interval::succ_iterator I = Int->Successors.begin(),
+ E = Int->Successors.end(); I != E; ++I)
+ getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+ // Pass false to intervals_begin because we take ownership of it's memory
+ function_interval_iterator I = intervals_begin(&F, false);
+ assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+ return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph. This takes an additional boolean parameter to
+// distinguish it from a copy constructor. Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+ : FunctionPass(ID) {
+ assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+ // Pass false to intervals_begin because we take ownership of it's memory
+ interval_part_interval_iterator I = intervals_begin(IP, false);
+ assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 000000000000..66b5e852c02f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1143 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <map>
+#include <stack>
+using namespace llvm;
+using namespace PatternMatch;
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+ FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This Value has no known value yet.
+ undefined,
+
+ /// constant - This Value has a specific constant value.
+ constant,
+ /// notconstant - This Value is known to not have the specified value.
+ notconstant,
+
+ /// constantrange - The Value falls within this range.
+ constantrange,
+
+ /// overdefined - This value is not known to be constant, and we know that
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'notconstant' value.
+ LatticeValueTy Tag;
+ Constant *Val;
+ ConstantRange Range;
+
+public:
+ LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+
+ static LVILatticeVal get(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getNot(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markNotConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getRange(ConstantRange CR) {
+ LVILatticeVal Res;
+ Res.markConstantRange(CR);
+ return Res;
+ }
+
+ bool isUndefined() const { return Tag == undefined; }
+ bool isConstant() const { return Tag == constant; }
+ bool isNotConstant() const { return Tag == notconstant; }
+ bool isConstantRange() const { return Tag == constantrange; }
+ bool isOverdefined() const { return Tag == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val;
+ }
+
+ Constant *getNotConstant() const {
+ assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+ return Val;
+ }
+
+ ConstantRange getConstantRange() const {
+ assert(isConstantRange() &&
+ "Cannot get the constant-range of a non-constant-range!");
+ return Range;
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+ Tag = overdefined;
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() == V) &&
+ "Marking constant with different value");
+ assert(isUndefined());
+ Tag = constant;
+ Val = V;
+ return true;
+ }
+
+ /// markNotConstant - Return true if this is a change in status.
+ bool markNotConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() != V) &&
+ "Marking constant !constant with same value");
+ assert((!isNotConstant() || getNotConstant() == V) &&
+ "Marking !constant with different value");
+ assert(isUndefined() || isConstant());
+ Tag = notconstant;
+ Val = V;
+ return true;
+ }
+
+ /// markConstantRange - Return true if this is a change in status.
+ bool markConstantRange(const ConstantRange NewR) {
+ if (isConstantRange()) {
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ bool changed = Range != NewR;
+ Range = NewR;
+ return changed;
+ }
+
+ assert(isUndefined());
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ Tag = constantrange;
+ Range = NewR;
+ return true;
+ }
+
+ /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// one and returning true if anything changed.
+ bool mergeIn(const LVILatticeVal &RHS) {
+ if (RHS.isUndefined() || isOverdefined()) return false;
+ if (RHS.isOverdefined()) return markOverdefined();
+
+ if (isUndefined()) {
+ Tag = RHS.Tag;
+ Val = RHS.Val;
+ Range = RHS.Range;
+ return true;
+ }
+
+ if (isConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getConstant(),
+ RHS.getNotConstant())))
+ if (Res->isOne())
+ return markNotConstant(RHS.getNotConstant());
+
+ return markOverdefined();
+ }
+
+ // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+ // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+ // a function. The correct result is to pick up RHS.
+
+ return markOverdefined();
+ }
+
+ if (isNotConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getNotConstant(),
+ RHS.getConstant())))
+ if (Res->isOne())
+ return false;
+
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ return markOverdefined();
+ }
+
+ assert(isConstantRange() && "New LVILattice type?");
+ if (!RHS.isConstantRange())
+ return markOverdefined();
+
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ return markConstantRange(NewR);
+ }
+};
+
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+ LLVM_ATTRIBUTE_USED;
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+ if (Val.isUndefined())
+ return OS << "undefined";
+ if (Val.isOverdefined())
+ return OS << "overdefined";
+
+ if (Val.isNotConstant())
+ return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ else if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << '>';
+ return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LVIValueHandle - A callback value handle updates the cache when
+ /// values are erased.
+ class LazyValueInfoCache;
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value *V) {
+ deleted();
+ }
+ };
+}
+
+namespace {
+ /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// maintains information about queries across the clients' queries.
+ class LazyValueInfoCache {
+ /// ValueCacheEntryTy - This is all of the cached block information for
+ /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// entries, allowing us to do a lookup with a binary search.
+ typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+
+ /// ValueCache - This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+ DenseSet<OverDefinedPairTy> OverDefinedCache;
+
+ /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+ /// don't spend time removing unused blocks from our caches.
+ DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
+ /// BlockValueStack - This stack holds the state of the value solver
+ /// during a query. It basically emulates the callstack of the naive
+ /// recursive value lookup process.
+ std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ friend struct LVIValueHandle;
+
+ /// OverDefinedCacheUpdater - A helper object that ensures that the
+ /// OverDefinedCache is updated whenever solveBlockValue returns.
+ struct OverDefinedCacheUpdater {
+ LazyValueInfoCache *Parent;
+ Value *Val;
+ BasicBlock *BB;
+ LVILatticeVal &BBLV;
+
+ OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+ LazyValueInfoCache *P)
+ : Parent(P), Val(V), BB(B), BBLV(LV) { }
+
+ bool markResult(bool changed) {
+ if (changed && BBLV.isOverdefined())
+ Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+ return changed;
+ }
+ };
+
+
+
+ LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+ bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+ LVILatticeVal &Result);
+ bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+ // These methods process one work item and may add more. A false value
+ // returned means that the work item was not completely processed and must
+ // be revisited after going through the new items.
+ bool solveBlockValue(Value *Val, BasicBlock *BB);
+ bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB);
+ bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB);
+ bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI, BasicBlock *BB);
+
+ void solve();
+
+ ValueCacheEntryTy &lookup(Value *V) {
+ return ValueCache[LVIValueHandle(V, this)];
+ }
+
+ public:
+ /// getValueInBlock - This is the query interface to determine the lattice
+ /// value for the specified Value* at the end of the specified block.
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+ /// getValueOnEdge - This is the query interface to determine the lattice
+ /// value for the specified Value* that is true on the specified edge.
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+
+ /// threadEdge - This is the update interface to inform the cache that an
+ /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+ /// NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ /// eraseBlock - This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// clear - Empty the cache.
+ void clear() {
+ SeenBlocks.clear();
+ ValueCache.clear();
+ OverDefinedCache.clear();
+ }
+ };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator
+ I = Parent->OverDefinedCache.begin(),
+ E = Parent->OverDefinedCache.end();
+ I != E; ++I) {
+ if (I->second == getValPtr())
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ Parent->OverDefinedCache.erase(*I);
+
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ // Shortcut if we have never seen this block.
+ DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+ if (I == SeenBlocks.end())
+ return;
+ SeenBlocks.erase(I);
+
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == BB)
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ OverDefinedCache.erase(*I);
+
+ for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
+ I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+ while (!BlockValueStack.empty()) {
+ std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ if (solveBlockValue(e.second, e.first)) {
+ assert(BlockValueStack.top() == e);
+ BlockValueStack.pop();
+ }
+ }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (isa<Constant>(Val))
+ return true;
+
+ LVIValueHandle ValHandle(Val, this);
+ std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I =
+ ValueCache.find(ValHandle);
+ if (I == ValueCache.end()) return false;
+ return I->second.count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val))
+ return LVILatticeVal::get(VC);
+
+ SeenBlocks.insert(BB);
+ return lookup(Val)[BB];
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+ if (isa<Constant>(Val))
+ return true;
+
+ ValueCacheEntryTy &Cache = lookup(Val);
+ SeenBlocks.insert(BB);
+ LVILatticeVal &BBLV = Cache[BB];
+
+ // OverDefinedCacheUpdater is a helper object that will update
+ // the OverDefinedCache for us when this method exits. Make sure to
+ // call markResult on it as we exist, passing a bool to indicate if the
+ // cache needs updating, i.e. if we have solve a new value or not.
+ OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
+ // If we've already computed this block's value, return it.
+ if (!BBLV.isUndefined()) {
+ DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+
+ // Since we're reusing a cached value here, we don't need to update the
+ // OverDefinedCahce. The cache will have been properly updated
+ // whenever the cached value was inserted.
+ ODCacheUpdater.markResult(false);
+ return true;
+ }
+
+ // Otherwise, this is the first time we're seeing this block. Reset the
+ // lattice value to overdefined, so that cycles will terminate and be
+ // conservatively correct.
+ BBLV.markOverdefined();
+
+ Instruction *BBI = dyn_cast<Instruction>(Val);
+ if (BBI == 0 || BBI->getParent() != BB) {
+ return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+ }
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+ BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // We can only analyze the definitions of certain classes of instructions
+ // (integral binops and casts at the moment), so bail if this isn't one.
+ LVILatticeVal Result;
+ if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+ !BBI->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // FIXME: We're currently limited to binops with a constant RHS. This should
+ // be improved.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+ if (BO && !isa<ConstantInt>(BO->getOperand(1))) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ return L->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(L->getPointerOperand()) == Ptr;
+ }
+ if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ return S->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(S->getPointerOperand()) == Ptr;
+ }
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (MI->isVolatile()) return false;
+
+ // FIXME: check whether it has a valuerange that excludes zero?
+ ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+ if (!Len || Len->isZero()) return false;
+
+ if (MI->getDestAddressSpace() == 0)
+ if (GetUnderlyingObject(MI->getRawDest()) == Ptr)
+ return true;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ if (MTI->getSourceAddressSpace() == 0)
+ if (GetUnderlyingObject(MTI->getRawSource()) == Ptr)
+ return true;
+ }
+ return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ if (isKnownNonNull(Val)) {
+ NotNull = true;
+ } else {
+ Value *UnderlyingVal = GetUnderlyingObject(Val);
+ // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
+ // inside InstructionDereferencesPointer either.
+ if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ if (InstructionDereferencesPointer(BI, UnderlyingVal)) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // If this is the entry block, we must be asking about an argument. The
+ // value is overdefined.
+ if (BB == &BB->getParent()->getEntryBlock()) {
+ assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+ if (NotNull) {
+ PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ } else {
+ Result.markOverdefined();
+ }
+ BBLV = Result;
+ return true;
+ }
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PhiBB = PN->getIncomingBlock(i);
+ Value *PhiVal = PN->getIncomingValue(i);
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI,
+ BasicBlock *BB) {
+ // Figure out the range of the LHS. If that fails, bail.
+ if (!hasBlockValue(BBI->getOperand(0), BB)) {
+ BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+ return false;
+ }
+
+ LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+ if (!LHSVal.isConstantRange()) {
+ BBLV.markOverdefined();
+ return true;
+ }
+
+ ConstantRange LHSRange = LHSVal.getConstantRange();
+ ConstantRange RHSRange(1);
+ IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ if (isa<BinaryOperator>(BBI)) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+ RHSRange = ConstantRange(RHS->getValue());
+ } else {
+ BBLV.markOverdefined();
+ return true;
+ }
+ }
+
+ // NOTE: We're currently limited by the set of operations that ConstantRange
+ // can evaluate symbolically. Enhancing that set will allows us to analyze
+ // more definitions.
+ LVILatticeVal Result;
+ switch (BBI->getOpcode()) {
+ case Instruction::Add:
+ Result.markConstantRange(LHSRange.add(RHSRange));
+ break;
+ case Instruction::Sub:
+ Result.markConstantRange(LHSRange.sub(RHSRange));
+ break;
+ case Instruction::Mul:
+ Result.markConstantRange(LHSRange.multiply(RHSRange));
+ break;
+ case Instruction::UDiv:
+ Result.markConstantRange(LHSRange.udiv(RHSRange));
+ break;
+ case Instruction::Shl:
+ Result.markConstantRange(LHSRange.shl(RHSRange));
+ break;
+ case Instruction::LShr:
+ Result.markConstantRange(LHSRange.lshr(RHSRange));
+ break;
+ case Instruction::Trunc:
+ Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+ break;
+ case Instruction::SExt:
+ Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::ZExt:
+ Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::BitCast:
+ Result.markConstantRange(LHSRange);
+ break;
+ case Instruction::And:
+ Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+ break;
+ case Instruction::Or:
+ Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+ break;
+
+ // Unhandled instructions are overdefined.
+ default:
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ break;
+ }
+
+ BBLV = Result;
+ return true;
+}
+
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
+/// Val is not constrained on the edge.
+static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, LVILatticeVal &Result) {
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // know that v != 0.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+ // If this is a conditional branch and only one successor goes to BBTo, then
+ // we maybe able to infer something from the condition.
+ if (BI->isConditional() &&
+ BI->getSuccessor(0) != BI->getSuccessor(1)) {
+ bool isTrueDest = BI->getSuccessor(0) == BBTo;
+ assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+ "BBTo isn't a successor of BBFrom");
+
+ // If V is the condition of the branch itself, then we know exactly what
+ // it is.
+ if (BI->getCondition() == Val) {
+ Result = LVILatticeVal::get(ConstantInt::get(
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
+ return true;
+ }
+
+ // If the condition of the branch is an equality comparison, we may be
+ // able to infer the value.
+ ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (ICI && isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
+ }
+
+ // Recognize the range checking idiom that InstCombine produces.
+ // (X-C1) u< C2 --> [C1, C1+C2)
+ ConstantInt *NegOffset = 0;
+ if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+ match(ICI->getOperand(0), m_Add(m_Specific(Val),
+ m_ConstantInt(NegOffset)));
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+ if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue());
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ if (NegOffset) // Apply the offset from above.
+ TrueValues = TrueValues.subtract(NegOffset->getValue());
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+ }
+ }
+ }
+
+ // If the edge was formed by a switch on the value, then we may know exactly
+ // what it is.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+ if (SI->getCondition() != Val)
+ return false;
+
+ bool DefaultCase = SI->getDefaultDest() == BBTo;
+ unsigned BitWidth = Val->getType()->getIntegerBitWidth();
+ ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
+
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ ConstantRange EdgeVal(i.getCaseValue()->getValue());
+ if (DefaultCase) {
+ // It is possible that the default destination is the destination of
+ // some cases. There is no need to perform difference for those cases.
+ if (i.getCaseSuccessor() != BBTo)
+ EdgesVals = EdgesVals.difference(EdgeVal);
+ } else if (i.getCaseSuccessor() == BBTo)
+ EdgesVals = EdgesVals.unionWith(EdgeVal);
+ }
+ Result = LVILatticeVal::getRange(EdgesVals);
+ return true;
+ }
+ return false;
+}
+
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at
+/// the basic block if the edge does not constraint Val.
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, LVILatticeVal &Result) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val)) {
+ Result = LVILatticeVal::get(VC);
+ return true;
+ }
+
+ if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) {
+ if (!Result.isConstantRange() ||
+ Result.getConstantRange().getSingleElement())
+ return true;
+
+ // FIXME: this check should be moved to the beginning of the function when
+ // LVI better supports recursive values. Even for the single value case, we
+ // can intersect to detect dead code (an empty range).
+ if (!hasBlockValue(Val, BBFrom)) {
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+ }
+
+ // Try to intersect ranges of the BB and the constraint on the edge.
+ LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ if (!InBlock.isConstantRange())
+ return true;
+
+ ConstantRange Range =
+ Result.getConstantRange().intersectWith(InBlock.getConstantRange());
+ Result = LVILatticeVal::getRange(Range);
+ return true;
+ }
+
+ if (!hasBlockValue(Val, BBFrom)) {
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+ }
+
+ // if we couldn't compute the value on the edge, use the value from the BB
+ Result = getBlockValue(Val, BBFrom);
+ return true;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+ DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+ << BB->getName() << "'\n");
+
+ BlockValueStack.push(std::make_pair(BB, V));
+ solve();
+ LVILatticeVal Result = getBlockValue(V, BB);
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+ DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+ << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
+ LVILatticeVal Result;
+ if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ solve();
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ (void)WasFastQuery;
+ assert(WasFastQuery && "More work to do after problem solved?");
+ }
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be possible
+ // to solve now. We do NOT try to proactively update these values. Instead,
+ // we clear their entries from the cache, and allow lazy updating to recompute
+ // them when needed.
+
+ // The updating process is fairly simple: we need to dropped cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ DenseSet<Value*> ClearSet;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == OldSucc)
+ ClearSet.insert(I->second);
+ }
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ bool changed = false;
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
+ I != E; ++I) {
+ // If a value was marked overdefined in OldSucc, and is here too...
+ DenseSet<OverDefinedPairTy>::iterator OI =
+ OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ if (OI == OverDefinedCache.end()) continue;
+
+ // Remove it from the caches.
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+ assert(CI != Entry.end() && "Couldn't find entry to update?");
+ Entry.erase(CI);
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+ if (!PImpl)
+ PImpl = new LazyValueInfoCache();
+ return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+ if (PImpl)
+ getCache(PImpl).clear();
+
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // Fully lazy.
+ return false;
+}
+
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfo>();
+}
+
+void LazyValueInfo::releaseMemory() {
+ // If the cache was allocated, free it.
+ if (PImpl) {
+ delete &getCache(PImpl);
+ PImpl = 0;
+ }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+ LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge. Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+ BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ // If we know the value is a constant, evaluate the conditional.
+ Constant *Res = 0;
+ if (Result.isConstant()) {
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+ TLI);
+ if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+ return ResCI->isZero() ? False : True;
+ return Unknown;
+ }
+
+ if (Result.isConstantRange()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return Unknown;
+
+ ConstantRange CR = Result.getConstantRange();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (!CR.contains(CI->getValue()))
+ return False;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return True;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (!CR.contains(CI->getValue()))
+ return True;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return False;
+ }
+
+ // Handle more complex predicates.
+ ConstantRange TrueValues =
+ ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+ if (TrueValues.contains(CR))
+ return True;
+ if (TrueValues.inverse().contains(CR))
+ return False;
+ return Unknown;
+ }
+
+ if (Result.isNotConstant()) {
+ // If this is an equality comparison, we can try to fold it knowing that
+ // "V != C1".
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // !C1 == C -> false iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD,
+ TLI);
+ if (Res->isNullValue())
+ return False;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ // !C1 != C -> true iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD,
+ TLI);
+ if (Res->isNullValue())
+ return True;
+ }
+ return Unknown;
+ }
+
+ return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+ if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 000000000000..fefa51660f92
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,137 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+ "LibCall Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+ return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+ delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+ ImmutableCallSite CS,
+ const Location &Loc) {
+ // If we have a function, check to see what kind of mod/ref effects it
+ // has. Start by including any info globally known about the function.
+ AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+ if (MRInfo == NoModRef) return MRInfo;
+
+ // If that didn't tell us that the function is 'readnone', check to see
+ // if we have detailed info and if 'P' is any of the locations we know
+ // about.
+ const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+ if (Details == 0)
+ return MRInfo;
+
+ // If the details array is of the 'DoesNot' kind, we only know something if
+ // the pointer is a match for one of the locations in 'Details'. If we find a
+ // match, we can prove some interactions cannot happen.
+ //
+ if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+ // Find out if the pointer refers to a known location.
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res != LibCallLocationInfo::Yes) continue;
+
+ // If we find a match against a location that we 'do not' interact with,
+ // learn this info into MRInfo.
+ return ModRefResult(MRInfo & ~Details[i].MRInfo);
+ }
+ return MRInfo;
+ }
+
+ // If the details are of the 'DoesOnly' sort, we know something if the pointer
+ // is a match for one of the locations in 'Details'. Also, if we can prove
+ // that the pointers is *not* one of the locations in 'Details', we know that
+ // the call is NoModRef.
+ assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+
+ // Find out if the pointer refers to a known location.
+ bool NoneMatch = true;
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res == LibCallLocationInfo::No) continue;
+
+ // If we don't know if this pointer points to the location, then we have to
+ // assume it might alias in some case.
+ if (Res == LibCallLocationInfo::Unknown) {
+ NoneMatch = false;
+ continue;
+ }
+
+ // If we know that this pointer definitely is pointing into the location,
+ // merge in this information.
+ return ModRefResult(MRInfo & Details[i].MRInfo);
+ }
+
+ // If we found that the pointer is guaranteed to not match any of the
+ // locations in our 'DoesOnly' rule, then we know that the pointer must point
+ // to some other location. Since the libcall doesn't mod/ref any other
+ // locations, return NoModRef.
+ if (NoneMatch)
+ return NoModRef;
+
+ // Otherwise, return any other info gained so far.
+ return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult MRInfo = ModRef;
+
+ // If this is a direct call to a function that LCI knows about, get the
+ // information about the runtime function.
+ if (LCI) {
+ if (const Function *F = CS.getCalledFunction()) {
+ if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+ MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
+ if (MRInfo == NoModRef) return NoModRef;
+ }
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 000000000000..0592ccb26c12
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,63 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+ return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+ delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+ // Get location info on the first call.
+ if (NumLocations == 0)
+ NumLocations = getLocationInfo(Locations);
+
+ assert(LocID < NumLocations && "Invalid location ID!");
+ return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it. If not, return null.
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
+ StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+
+ /// If this is the first time we are querying for this info, lazily construct
+ /// the StringMap to index it.
+ if (Map == 0) {
+ Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+
+ const LibCallFunctionInfo *Array = getFunctionInfoArray();
+ if (Array == 0) return 0;
+
+ // We now have the array of entries. Populate the StringMap.
+ for (unsigned i = 0; Array[i].Name; ++i)
+ (*Map)[Array[i].Name] = Array+i;
+ }
+
+ // Look up this function in the string map.
+ return Map->lookup(F->getName());
+}
+
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
new file mode 100644
index 000000000000..9393508a9e67
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -0,0 +1,692 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+//
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+//
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Lint.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+
+namespace {
+ namespace MemRef {
+ static unsigned Read = 1;
+ static unsigned Write = 2;
+ static unsigned Callee = 4;
+ static unsigned Branchee = 8;
+ }
+
+ class Lint : public FunctionPass, public InstVisitor<Lint> {
+ friend class InstVisitor<Lint>;
+
+ void visitFunction(Function &F);
+
+ void visitCallSite(CallSite CS);
+ void visitMemoryReference(Instruction &I, Value *Ptr,
+ uint64_t Size, unsigned Align,
+ Type *Ty, unsigned Flags);
+
+ void visitCallInst(CallInst &I);
+ void visitInvokeInst(InvokeInst &I);
+ void visitReturnInst(ReturnInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitXor(BinaryOperator &I);
+ void visitSub(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+ void visitShl(BinaryOperator &I);
+ void visitSDiv(BinaryOperator &I);
+ void visitUDiv(BinaryOperator &I);
+ void visitSRem(BinaryOperator &I);
+ void visitURem(BinaryOperator &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitVAArgInst(VAArgInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const;
+
+ public:
+ Module *Mod;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ static char ID; // Pass identification, replacement for typeid
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {
+ initializeLintPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<DominatorTree>();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ WriteAsOperand(MessagesStr, V, true, Mod);
+ MessagesStr << '\n';
+ }
+ }
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ }
+ };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+ Mod = F.getParent();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ visit(F);
+ dbgs() << MessagesStr.str();
+ Messages.clear();
+ return false;
+}
+
+void Lint::visitFunction(Function &F) {
+ // This isn't undefined behavior, it's just a little unusual, and it's a
+ // fairly common mistake to neglect to name a function.
+ Assert1(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
+
+ // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+ Instruction &I = *CS.getInstruction();
+ Value *Callee = CS.getCalledValue();
+
+ visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Callee);
+
+ if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
+ Assert1(CS.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
+
+ FunctionType *FT = F->getFunctionType();
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+ Assert1(FT->isVarArg() ?
+ FT->getNumParams() <= NumActualArgs :
+ FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count", &I);
+
+ Assert1(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type", &I);
+
+ // Check argument types (in case the callee was casted) and attributes.
+ // TODO: Verify that caller and callee attributes are compatible.
+ Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ for (; AI != AE; ++AI) {
+ Value *Actual = *AI;
+ if (PI != PE) {
+ Argument *Formal = PI++;
+ Assert1(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type", &I);
+
+ // Check that noalias arguments don't alias other arguments. This is
+ // not fully precise because we don't know the sizes of the dereferenced
+ // memory regions.
+ if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+ if (AI != BI && (*BI)->getType()->isPointerTy()) {
+ AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+ Assert1(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
+ }
+
+ // Check that an sret argument points to valid memory.
+ if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+ Type *Ty =
+ cast<PointerType>(Formal->getType())->getElementType();
+ visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+ TD ? TD->getABITypeAlignment(Ty) : 0,
+ Ty, MemRef::Read | MemRef::Write);
+ }
+ }
+ }
+ }
+
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca", &I);
+ }
+
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // TODO: Check more intrinsics
+
+ case Intrinsic::memcpy: {
+ MemCpyInst *MCI = cast<MemCpyInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Read);
+
+ // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+ // isn't expressive enough for what we really want to do. Known partial
+ // overlap is not distinguished from the case where nothing is known.
+ uint64_t Size = 0;
+ if (const ConstantInt *Len =
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+ /*OffsetOk=*/false)))
+ if (Len->getValue().isIntN(32))
+ Size = Len->getValue().getZExtValue();
+ Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasAnalysis::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
+ break;
+ }
+ case Intrinsic::memmove: {
+ MemMoveInst *MMI = cast<MemMoveInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Read);
+ break;
+ }
+ case Intrinsic::memset: {
+ MemSetInst *MSI = cast<MemSetInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+ MSI->getAlignment(), 0,
+ MemRef::Write);
+ break;
+ }
+
+ case Intrinsic::vastart:
+ Assert1(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
+
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ case Intrinsic::vacopy:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read);
+ break;
+ case Intrinsic::vaend:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+
+ case Intrinsic::stackrestore:
+ // Stackrestore doesn't read or write memory, but it sets the
+ // stack pointer, which the compiler may read from or write to
+ // at any time, so check it for both readability and writeability.
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+ Function *F = I.getParent()->getParent();
+ Assert1(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute",
+ &I);
+
+ if (Value *V = I.getReturnValue()) {
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Unusual: Returning alloca value", &I);
+ }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+ Value *Ptr, uint64_t Size, unsigned Align,
+ Type *Ty, unsigned Flags) {
+ // If no memory is being referenced, it doesn't matter if the pointer
+ // is valid.
+ if (Size == 0)
+ return;
+
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+ Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Assert1(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
+
+ if (Flags & MemRef::Write) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+ Assert1(!GV->isConstant(),
+ "Undefined behavior: Write to read-only memory", &I);
+ Assert1(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
+ }
+ if (Flags & MemRef::Read) {
+ Assert1(!isa<Function>(UnderlyingObject),
+ "Unusual: Load from function body", &I);
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
+ }
+ if (Flags & MemRef::Callee) {
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
+ }
+ if (Flags & MemRef::Branchee) {
+ Assert1(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
+ }
+
+ // Check for buffer overflows and misalignment.
+ // Only handles memory references that read/write something simple like an
+ // alloca instruction or a global variable.
+ int64_t Offset = 0;
+ if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) {
+ // OK, so the access is to a constant offset from Ptr. Check that Ptr is
+ // something we can handle and if so extract the size of this base object
+ // along with its alignment.
+ uint64_t BaseSize = AliasAnalysis::UnknownSize;
+ unsigned BaseAlign = 0;
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ Type *ATy = AI->getAllocatedType();
+ if (TD && !AI->isArrayAllocation() && ATy->isSized())
+ BaseSize = TD->getTypeAllocSize(ATy);
+ BaseAlign = AI->getAlignment();
+ if (TD && BaseAlign == 0 && ATy->isSized())
+ BaseAlign = TD->getABITypeAlignment(ATy);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ // If the global may be defined differently in another compilation unit
+ // then don't warn about funky memory accesses.
+ if (GV->hasDefinitiveInitializer()) {
+ Type *GTy = GV->getType()->getElementType();
+ if (TD && GTy->isSized())
+ BaseSize = TD->getTypeAllocSize(GTy);
+ BaseAlign = GV->getAlignment();
+ if (TD && BaseAlign == 0 && GTy->isSized())
+ BaseAlign = TD->getABITypeAlignment(GTy);
+ }
+ }
+
+ // Accesses from before the start or after the end of the object are not
+ // defined.
+ Assert1(Size == AliasAnalysis::UnknownSize ||
+ BaseSize == AliasAnalysis::UnknownSize ||
+ (Offset >= 0 && Offset + Size <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
+
+ // Accesses that say that the memory is more aligned than it is are not
+ // defined.
+ if (TD && Align == 0 && Ty && Ty->isSized())
+ Align = TD->getABITypeAlignment(Ty);
+ Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+ "Undefined behavior: Memory reference address is misaligned", &I);
+ }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ I.getAlignment(),
+ I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, DataLayout *TD) {
+ // Assume undef could be zero.
+ if (isa<UndefValue>(V)) return true;
+
+ unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD);
+ return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+ if (isa<ConstantInt>(I.getArraySize()))
+ // This isn't undefined behavior, it's just an obvious pessimization.
+ Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
+
+ // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+ visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+ visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Branchee);
+
+ Assert1(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+ // This isn't undefined behavior, it's merely suspicious.
+ Assert1(&I == I.getParent()->begin() ||
+ prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects", &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value. If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+ SmallPtrSet<Value *, 4> Visited;
+ return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const {
+ // Detect self-referential values.
+ if (!Visited.insert(V))
+ return UndefValue::get(V->getType());
+
+ // TODO: Look through sext or zext cast, when the result is known to
+ // be interpreted as signed or unsigned, respectively.
+ // TODO: Look through eliminable cast pairs.
+ // TODO: Look through calls with unique return values.
+ // TODO: Look through vector insert/extract/shuffle.
+ V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+ if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+ BasicBlock::iterator BBI = L;
+ BasicBlock *BB = L->getParent();
+ SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ for (;;) {
+ if (!VisitedBlocks.insert(BB)) break;
+ if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, 6, AA))
+ return findValueImpl(U, OffsetOk, Visited);
+ if (BBI != BB->begin()) break;
+ BB = BB->getUniquePredecessor();
+ if (!BB) break;
+ BBI = BB->end();
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (Value *W = PN->hasConstantValue())
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+ if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+ Ex->getIndices()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // Same as above, but for ConstantExpr instead of Instruction.
+ if (Instruction::isCast(CE->getOpcode())) {
+ if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+ CE->getOperand(0)->getType(),
+ CE->getType(),
+ TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ } else if (CE->getOpcode() == Instruction::ExtractValue) {
+ ArrayRef<unsigned> Indices = CE->getIndices();
+ if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+ }
+
+ // As a last resort, try SimplifyInstruction or constant folding.
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+
+ return V;
+}
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+ return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot lint external functions");
+
+ FunctionPassManager FPM(F.getParent());
+ Lint *V = new Lint();
+ FPM.add(V);
+ FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+ PassManager PM;
+ Lint *V = new Lint();
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
new file mode 100644
index 000000000000..0902a39a9f81
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -0,0 +1,221 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come from identical arithmetic instructions.
+ // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+ // this function is only used when one address use dominates the
+ // other, which means that they'll always either have the same
+ // value or one of them will have an undefined value.
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+ isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (const Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+ unsigned Align, const DataLayout *TD) {
+ int64_t ByteOffset = 0;
+ Value *Base = V;
+ Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD);
+
+ if (ByteOffset < 0) // out of bounds
+ return false;
+
+ Type *BaseType = 0;
+ unsigned BaseAlign = 0;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ // An alloca is safe to load from as load as it is suitably aligned.
+ BaseType = AI->getAllocatedType();
+ BaseAlign = AI->getAlignment();
+ } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ // Global variables are safe to load from but their size cannot be
+ // guaranteed if they are overridden.
+ if (!GV->mayBeOverridden()) {
+ BaseType = GV->getType()->getElementType();
+ BaseAlign = GV->getAlignment();
+ }
+ }
+
+ if (BaseType && BaseType->isSized()) {
+ if (TD && BaseAlign == 0)
+ BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+ if (Align <= BaseAlign) {
+ if (!TD)
+ return true; // Loading directly from an alloca or global is OK.
+
+ // Check if the load is within the bounds of the underlying object.
+ PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+ if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ (Align == 0 || (ByteOffset % Align) == 0))
+ return true;
+ }
+ }
+
+ // Otherwise, be a little bit aggressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+ }
+ }
+ return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the
+/// value would be live through. If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null. ScanFrom could also be left
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there. If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA,
+ MDNode **TBAATag) {
+ if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+ // If we're using alias analysis to disambiguate get the size of *Ptr.
+ uint64_t AccessSize = 0;
+ if (AA) {
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ AccessSize = AA->getTypeStoreSize(AccessTy);
+ }
+
+ while (ScanFrom != ScanBB->begin()) {
+ // We must ignore debug info directives when counting (otherwise they
+ // would affect codegen).
+ Instruction *Inst = --ScanFrom;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ // Restore ScanFrom to expected value in case next test succeeds
+ ScanFrom++;
+
+ // Don't scan huge blocks.
+ if (MaxInstsToScan-- == 0) return 0;
+
+ --ScanFrom;
+ // If this is a load of Ptr, the loaded value is available.
+ // (This is true even if the load is volatile or atomic, although
+ // those cases are unlikely.)
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+ if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ return LI;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If this is a store through Ptr, the value is available!
+ // (This is true even if the store is volatile or atomic, although
+ // those cases are unlikely.)
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+ if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
+ return SI->getOperand(0);
+ }
+
+ // If Ptr is an alloca and this is a store to a different alloca, ignore
+ // the store. This is a trivial form of alias analysis that is important
+ // for reg2mem'd code.
+ if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+ (isa<AllocaInst>(SI->getOperand(1)) ||
+ isa<GlobalVariable>(SI->getOperand(1))))
+ continue;
+
+ // If we have alias analysis and it says the store won't modify the loaded
+ // value, ignore the store.
+ if (AA &&
+ (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // Otherwise the store that may or may not alias the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+
+ // If this is some other instruction that may clobber Ptr, bail out.
+ if (Inst->mayWriteToMemory()) {
+ // If alias analysis claims that it really won't modify the load,
+ // ignore it.
+ if (AA &&
+ (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // May modify the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+ }
+
+ // Got to the start of the block, we didn't find it, but are done for this
+ // block.
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 000000000000..f1ad6506e4ba
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,687 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG. Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops.
+template class llvm::LoopBase<BasicBlock, Loop>;
+template class llvm::LoopInfoBase<BasicBlock, Loop>;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+ cl::desc("Verify loop info (time consuming)"));
+
+char LoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return !contains(I);
+ return true; // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant.
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ return true;
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+ Instruction *InsertPt) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return makeLoopInvariant(I, Changed, InsertPt);
+ return true; // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+ Instruction *InsertPt) const {
+ // Test if the value is already loop-invariant.
+ if (isLoopInvariant(I))
+ return true;
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+ if (I->mayReadFromMemory())
+ return false;
+ // The landingpad instruction is immobile.
+ if (isa<LandingPadInst>(I))
+ return false;
+ // Determine the insertion point, unless one was given.
+ if (!InsertPt) {
+ BasicBlock *Preheader = getLoopPreheader();
+ // Without a preheader, hoisting is not feasible.
+ if (!Preheader)
+ return false;
+ InsertPt = Preheader->getTerminator();
+ }
+ // Don't hoist instructions with loop-variant operands.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+ return false;
+
+ // Hoist.
+ I->moveBefore(InsertPt);
+ Changed = true;
+ return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop. If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+ BasicBlock *H = getHeader();
+
+ BasicBlock *Incoming = 0, *Backedge = 0;
+ pred_iterator PI = pred_begin(H);
+ assert(PI != pred_end(H) &&
+ "Loop must have at least one backedge!");
+ Backedge = *PI++;
+ if (PI == pred_end(H)) return 0; // dead loop
+ Incoming = *PI++;
+ if (PI != pred_end(H)) return 0; // multiple backedges?
+
+ if (contains(Incoming)) {
+ if (contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical indvar.
+ for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+ if (CI->isNullValue())
+ if (Instruction *Inc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+ if (Inc->getOpcode() == Instruction::Add &&
+ Inc->getOperand(0) == PN)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+ if (CI->equalsInt(1))
+ return PN;
+ }
+ return 0;
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
+
+ for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+ BasicBlock *BB = *BI;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U))
+ UserBB = P->getIncomingBlock(UI);
+
+ // Check the current block, as a fast-path, before checking whether
+ // the use is anywhere in the loop. Most values are used in the same
+ // block they are defined in. Also, blocks not reachable from the
+ // entry are special; uses in them don't need to go through PHIs.
+ if (UserBB != BB &&
+ !LoopBBs.count(UserBB) &&
+ DT.isReachableFromEntry(UserBB))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+ // Normal-form loops have a preheader, a single backedge, and all of their
+ // exits have all their predecessors inside the loop.
+ return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// isSafeToClone - Return true if the loop body is safe to clone in practice.
+/// Routines that reform the loop CFG and split edges often fail on indirectbr.
+bool Loop::isSafeToClone() const {
+ // Return false if any loop blocks contain indirectbrs, or there are any calls
+ // to noduplicate functions.
+ for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+ if (isa<IndirectBrInst>((*I)->getTerminator())) {
+ return false;
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+ if (II->hasFnAttr(Attribute::NoDuplicate))
+ return false;
+ }
+
+ for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
+ if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool Loop::isAnnotatedParallel() const {
+
+ BasicBlock *latch = getLoopLatch();
+ if (latch == NULL)
+ return false;
+
+ MDNode *desiredLoopIdMetadata =
+ latch->getTerminator()->getMetadata("llvm.loop.parallel");
+
+ if (!desiredLoopIdMetadata)
+ return false;
+
+ // The loop branch contains the parallel loop metadata. In order to ensure
+ // that any parallel-loop-unaware optimization pass hasn't added loop-carried
+ // dependencies (thus converted the loop back to a sequential loop), check
+ // that all the memory instructions in the loop contain parallelism metadata
+ // that point to the same unique "loop id metadata" the loop branch does.
+ for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) {
+ for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end();
+ II != EE; II++) {
+
+ if (!II->mayReadOrWriteMemory())
+ continue;
+
+ if (!II->getMetadata("llvm.mem.parallel_loop_access"))
+ return false;
+
+ // The memory instruction can refer to the loop identifier metadata
+ // directly or indirectly through another list metadata (in case of
+ // nested parallel loops). The loop identifier metadata refers to
+ // itself so we can check both cases with the same routine.
+ MDNode *loopIdMD =
+ dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+ bool loopIdMDFound = false;
+ for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
+ if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
+ loopIdMDFound = true;
+ break;
+ }
+ }
+
+ if (!loopIdMDFound)
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+ // Each predecessor of each exit block of a normal loop is contained
+ // within the loop.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+ PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+ if (!LoopBBs.count(*PI))
+ return false;
+ // All the requirements are met.
+ return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+ assert(hasDedicatedExits() &&
+ "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+ std::sort(LoopBBs.begin(), LoopBBs.end());
+
+ SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+ for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+ BasicBlock *current = *BI;
+ switchExitBlocks.clear();
+
+ for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+ // If block is inside the loop then it is not a exit block.
+ if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ continue;
+
+ pred_iterator PI = pred_begin(*I);
+ BasicBlock *firstPred = *PI;
+
+ // If current basic block is this exit block's first predecessor
+ // then only insert exit block in to the output ExitBlocks vector.
+ // This ensures that same exit block is not inserted twice into
+ // ExitBlocks vector.
+ if (current != firstPred)
+ continue;
+
+ // If a terminator has more then two successors, for example SwitchInst,
+ // then it is possible that there are multiple edges from current block
+ // to one exit block.
+ if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+ ExitBlocks.push_back(*I);
+ continue;
+ }
+
+ // In case of multiple edges from current block to exit block, collect
+ // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+ // duplicate edges.
+ if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+ == switchExitBlocks.end()) {
+ switchExitBlocks.push_back(*I);
+ ExitBlocks.push_back(*I);
+ }
+ }
+ }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+ SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+ getUniqueExitBlocks(UniqueExitBlocks);
+ if (UniqueExitBlocks.size() == 1)
+ return UniqueExitBlocks[0];
+ return 0;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void Loop::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// UnloopUpdater implementation
+//
+
+namespace {
+/// Find the new parent loop for all blocks within the "unloop" whose last
+/// backedges has just been removed.
+class UnloopUpdater {
+ Loop *Unloop;
+ LoopInfo *LI;
+
+ LoopBlocksDFS DFS;
+
+ // Map unloop's immediate subloops to their nearest reachable parents. Nested
+ // loops within these subloops will not change parents. However, an immediate
+ // subloop's new parent will be the nearest loop reachable from either its own
+ // exits *or* any of its nested loop's exits.
+ DenseMap<Loop*, Loop*> SubloopParents;
+
+ // Flag the presence of an irreducible backedge whose destination is a block
+ // directly contained by the original unloop.
+ bool FoundIB;
+
+public:
+ UnloopUpdater(Loop *UL, LoopInfo *LInfo) :
+ Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+
+ void updateBlockParents();
+
+ void removeBlocksFromAncestors();
+
+ void updateSubloopParents();
+
+protected:
+ Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop);
+};
+} // end anonymous namespace
+
+/// updateBlockParents - Update the parent loop for all blocks that are directly
+/// contained within the original "unloop".
+void UnloopUpdater::updateBlockParents() {
+ if (Unloop->getNumBlocks()) {
+ // Perform a post order CFG traversal of all blocks within this loop,
+ // propagating the nearest loop from sucessors to predecessors.
+ LoopBlocksTraversal Traversal(DFS, LI);
+ for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+ POE = Traversal.end(); POI != POE; ++POI) {
+
+ Loop *L = LI->getLoopFor(*POI);
+ Loop *NL = getNearestLoop(*POI, L);
+
+ if (NL != L) {
+ // For reducible loops, NL is now an ancestor of Unloop.
+ assert((NL != Unloop && (!NL || NL->contains(Unloop))) &&
+ "uninitialized successor");
+ LI->changeLoopFor(*POI, NL);
+ }
+ else {
+ // Or the current block is part of a subloop, in which case its parent
+ // is unchanged.
+ assert((FoundIB || Unloop->contains(L)) && "uninitialized successor");
+ }
+ }
+ }
+ // Each irreducible loop within the unloop induces a round of iteration using
+ // the DFS result cached by Traversal.
+ bool Changed = FoundIB;
+ for (unsigned NIters = 0; Changed; ++NIters) {
+ assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm");
+
+ // Iterate over the postorder list of blocks, propagating the nearest loop
+ // from successors to predecessors as before.
+ Changed = false;
+ for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(),
+ POE = DFS.endPostorder(); POI != POE; ++POI) {
+
+ Loop *L = LI->getLoopFor(*POI);
+ Loop *NL = getNearestLoop(*POI, L);
+ if (NL != L) {
+ assert(NL != Unloop && (!NL || NL->contains(Unloop)) &&
+ "uninitialized successor");
+ LI->changeLoopFor(*POI, NL);
+ Changed = true;
+ }
+ }
+ }
+}
+
+/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
+/// their new parents.
+void UnloopUpdater::removeBlocksFromAncestors() {
+ // Remove all unloop's blocks (including those in nested subloops) from
+ // ancestors below the new parent loop.
+ for (Loop::block_iterator BI = Unloop->block_begin(),
+ BE = Unloop->block_end(); BI != BE; ++BI) {
+ Loop *OuterParent = LI->getLoopFor(*BI);
+ if (Unloop->contains(OuterParent)) {
+ while (OuterParent->getParentLoop() != Unloop)
+ OuterParent = OuterParent->getParentLoop();
+ OuterParent = SubloopParents[OuterParent];
+ }
+ // Remove blocks from former Ancestors except Unloop itself which will be
+ // deleted.
+ for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent;
+ OldParent = OldParent->getParentLoop()) {
+ assert(OldParent && "new loop is not an ancestor of the original");
+ OldParent->removeBlockFromLoop(*BI);
+ }
+ }
+}
+
+/// updateSubloopParents - Update the parent loop for all subloops directly
+/// nested within unloop.
+void UnloopUpdater::updateSubloopParents() {
+ while (!Unloop->empty()) {
+ Loop *Subloop = *llvm::prior(Unloop->end());
+ Unloop->removeChildLoop(llvm::prior(Unloop->end()));
+
+ assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop");
+ if (Loop *Parent = SubloopParents[Subloop])
+ Parent->addChildLoop(Subloop);
+ else
+ LI->addTopLevelLoop(Subloop);
+ }
+}
+
+/// getNearestLoop - Return the nearest parent loop among this block's
+/// successors. If a successor is a subloop header, consider its parent to be
+/// the nearest parent of the subloop's exits.
+///
+/// For subloop blocks, simply update SubloopParents and return NULL.
+Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
+
+ // Initially for blocks directly contained by Unloop, NearLoop == Unloop and
+ // is considered uninitialized.
+ Loop *NearLoop = BBLoop;
+
+ Loop *Subloop = 0;
+ if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
+ Subloop = NearLoop;
+ // Find the subloop ancestor that is directly contained within Unloop.
+ while (Subloop->getParentLoop() != Unloop) {
+ Subloop = Subloop->getParentLoop();
+ assert(Subloop && "subloop is not an ancestor of the original loop");
+ }
+ // Get the current nearest parent of the Subloop exits, initially Unloop.
+ NearLoop =
+ SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second;
+ }
+
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) {
+ assert(!Subloop && "subloop blocks must have a successor");
+ NearLoop = 0; // unloop blocks may now exit the function.
+ }
+ for (; I != E; ++I) {
+ if (*I == BB)
+ continue; // self loops are uninteresting
+
+ Loop *L = LI->getLoopFor(*I);
+ if (L == Unloop) {
+ // This successor has not been processed. This path must lead to an
+ // irreducible backedge.
+ assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB");
+ FoundIB = true;
+ }
+ if (L != Unloop && Unloop->contains(L)) {
+ // Successor is in a subloop.
+ if (Subloop)
+ continue; // Branching within subloops. Ignore it.
+
+ // BB branches from the original into a subloop header.
+ assert(L->getParentLoop() == Unloop && "cannot skip into nested loops");
+
+ // Get the current nearest parent of the Subloop's exits.
+ L = SubloopParents[L];
+ // L could be Unloop if the only exit was an irreducible backedge.
+ }
+ if (L == Unloop) {
+ continue;
+ }
+ // Handle critical edges from Unloop into a sibling loop.
+ if (L && !L->contains(Unloop)) {
+ L = L->getParentLoop();
+ }
+ // Remember the nearest parent loop among successors or subloop exits.
+ if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L))
+ NearLoop = L;
+ }
+ if (Subloop) {
+ SubloopParents[Subloop] = NearLoop;
+ return BBLoop;
+ }
+ return NearLoop;
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+ releaseMemory();
+ LI.Analyze(getAnalysis<DominatorTree>().getBase());
+ return false;
+}
+
+/// updateUnloop - The last backedge has been removed from a loop--now the
+/// "unloop". Find a new parent for the blocks contained within unloop and
+/// update the loop tree. We don't necessarily have valid dominators at this
+/// point, but LoopInfo is still valid except for the removal of this loop.
+///
+/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
+/// checking first is illegal.
+void LoopInfo::updateUnloop(Loop *Unloop) {
+
+ // First handle the special case of no parent loop to simplify the algorithm.
+ if (!Unloop->getParentLoop()) {
+ // Since BBLoop had no parent, Unloop blocks are no longer in a loop.
+ for (Loop::block_iterator I = Unloop->block_begin(),
+ E = Unloop->block_end(); I != E; ++I) {
+
+ // Don't reparent blocks in subloops.
+ if (getLoopFor(*I) != Unloop)
+ continue;
+
+ // Blocks no longer have a parent but are still referenced by Unloop until
+ // the Unloop object is deleted.
+ LI.changeLoopFor(*I, 0);
+ }
+
+ // Remove the loop from the top-level LoopInfo object.
+ for (LoopInfo::iterator I = LI.begin();; ++I) {
+ assert(I != LI.end() && "Couldn't find loop");
+ if (*I == Unloop) {
+ LI.removeLoop(I);
+ break;
+ }
+ }
+
+ // Move all of the subloops to the top-level.
+ while (!Unloop->empty())
+ LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end())));
+
+ return;
+ }
+
+ // Update the parent loop for all blocks within the loop. Blocks within
+ // subloops will not change parents.
+ UnloopUpdater Updater(Unloop, this);
+ Updater.updateBlockParents();
+
+ // Remove blocks from former ancestor loops.
+ Updater.removeBlocksFromAncestors();
+
+ // Add direct subloops as children in their new parent loop.
+ Updater.updateSubloopParents();
+
+ // Remove unloop from its parent loop.
+ Loop *ParentLoop = Unloop->getParentLoop();
+ for (Loop::iterator I = ParentLoop->begin();; ++I) {
+ assert(I != ParentLoop->end() && "Couldn't find loop");
+ if (*I == Unloop) {
+ ParentLoop->removeChildLoop(I);
+ break;
+ }
+ }
+}
+
+void LoopInfo::verifyAnalysis() const {
+ // LoopInfo is a FunctionPass, but verifying every loop in the function
+ // each time verifyAnalysis is called is very expensive. The
+ // -verify-loop-info option can enable this. In order to perform some
+ // checking by default, LoopPass has been taught to call verifyLoop
+ // manually during loop pass sequences.
+
+ if (!VerifyLoopInfo) return;
+
+ DenseSet<const Loop*> Loops;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+ (*I)->verifyLoopNest(&Loops);
+ }
+
+ // Verify that blocks are mapped to valid loops.
+ for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
+ E = LI.BBMap.end(); I != E; ++I) {
+ assert(Loops.count(I->second) && "orphaned loop");
+ assert(I->second->contains(I->first) && "orphaned block");
+ }
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTree>();
+}
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+ LI.print(OS);
+}
+
+//===----------------------------------------------------------------------===//
+// LoopBlocksDFS implementation
+//
+
+/// Traverse the loop blocks and store the DFS result.
+/// Useful for clients that just want the final DFS result and don't need to
+/// visit blocks during the initial traversal.
+void LoopBlocksDFS::perform(LoopInfo *LI) {
+ LoopBlocksTraversal Traversal(*this, LI);
+ for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+ POE = Traversal.end(); POI != POE; ++POI) ;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
new file mode 100644
index 000000000000..1540112fe10c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,363 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintLoopPass(const std::string &B, raw_ostream &o)
+ : LoopPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &) {
+ Out << Banner;
+ for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+ b != be;
+ ++b) {
+ (*b)->print(Out);
+ }
+ return false;
+ }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager()
+ : FunctionPass(ID), PMDataManager() {
+ skipThisLoop = false;
+ redoThisLoop = false;
+ LI = NULL;
+ CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+ LI->updateUnloop(L);
+
+ // If L is current loop then skip rest of the passes and let
+ // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+ // and continue applying other passes on CurrentLoop.
+ if (CurrentLoop == L)
+ skipThisLoop = true;
+
+ delete L;
+
+ if (skipThisLoop)
+ return;
+
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L) {
+ LQ.erase(I);
+ break;
+ }
+ }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+ assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+ // Insert into loop nest
+ if (ParentLoop)
+ ParentLoop->addChildLoop(L);
+ else
+ LI->addTopLevelLoop(L);
+
+ insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
+ // Insert L into loop queue
+ if (L == CurrentLoop)
+ redoLoop(L);
+ else if (!L->getParentLoop())
+ // This is top level loop.
+ LQ.push_front(L);
+ else {
+ // Insert L after the parent loop.
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L->getParentLoop()) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+ assert (CurrentLoop == L && "Can redo only CurrentLoop");
+ redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+ BasicBlock *To, Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->cloneBasicBlockAnalysis(From, To, L);
+ }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+ ++BI) {
+ Instruction &I = *BI;
+ deleteSimpleAnalysisValue(&I, L);
+ }
+ }
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->deleteAnalysisValue(V, L);
+ }
+}
+
+
+// Recurse through all subloops and all loops into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+ LQ.push_back(L);
+ for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+ // become part of LPPassManager.
+ Info.addRequired<LoopInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ // Populate the loop queue in reverse program order. There is no clear need to
+ // process sibling loops in either forward or reverse order. There may be some
+ // advantage in deleting uses in a later loop before optimizing the
+ // definitions in an earlier loop. If we find a clear reason to process in
+ // forward order, then a forward variant of LoopPassManager should be created.
+ for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+
+ if (LQ.empty()) // No loops, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+ I != E; ++I) {
+ Loop *L = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doInitialization(L, *this);
+ }
+ }
+
+ // Walk Loops
+ while (!LQ.empty()) {
+
+ CurrentLoop = LQ.back();
+ skipThisLoop = false;
+ redoThisLoop = false;
+
+ // Run all passes on the current Loop.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+ CurrentLoop->getHeader()->getName());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+ TimeRegion PassTimer(getPassTimer(P));
+
+ Changed |= P->runOnLoop(CurrentLoop, *this);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName());
+ dumpPreservedSet(P);
+
+ if (!skipThisLoop) {
+ // Manually check that this loop is still healthy. This is done
+ // instead of relying on LoopInfo::verifyLoop since LoopInfo
+ // is a function pass and it's really expensive to verify every
+ // loop in the function every time. That level of checking can be
+ // enabled with the -verify-loop-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(LI));
+ CurrentLoop->verifyLoop();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName(),
+ ON_LOOP_MSG);
+
+ if (skipThisLoop)
+ // Do not run other passes on this loop.
+ break;
+ }
+
+ // If the loop was deleted, release all the loop passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisLoop)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_LOOP_MSG);
+ }
+
+ // Pop the loop from queue after running all passes.
+ LQ.pop_back();
+
+ if (redoThisLoop)
+ LQ.push_back(CurrentLoop);
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Loop Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintLoopPass(Banner, O);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new LPPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ LPPassManager *LPPM;
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+ LPPM = (LPPassManager*)PMS.top();
+ else {
+ // Create new Loop Pass Manager if it does not exist.
+ assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Loop Pass Manager
+ LPPM = new LPPassManager();
+ LPPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(LPPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = LPPM->getAsPass();
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(LPPM);
+ }
+
+ LPPM->add(this);
+}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 000000000000..d26aaf1b9048
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,192 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MemDepPrinter : public FunctionPass {
+ const Function *F;
+
+ enum DepType {
+ Clobber = 0,
+ Def,
+ NonFuncLocal,
+ Unknown
+ };
+
+ static const char *const DepTypeStr[];
+
+ typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair;
+ typedef std::pair<InstTypePair, const BasicBlock *> Dep;
+ typedef SmallSetVector<Dep, 4> DepSet;
+ typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+ DepSetMap Deps;
+
+ static char ID; // Pass identifcation, replacement for typeid
+ MemDepPrinter() : FunctionPass(ID) {
+ initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ void print(raw_ostream &OS, const Module * = 0) const;
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ virtual void releaseMemory() {
+ Deps.clear();
+ F = 0;
+ }
+
+ private:
+ static InstTypePair getInstTypePair(MemDepResult dep) {
+ if (dep.isClobber())
+ return InstTypePair(dep.getInst(), Clobber);
+ if (dep.isDef())
+ return InstTypePair(dep.getInst(), Def);
+ if (dep.isNonFuncLocal())
+ return InstTypePair(dep.getInst(), NonFuncLocal);
+ assert(dep.isUnknown() && "unexptected dependence type");
+ return InstTypePair(dep.getInst(), Unknown);
+ }
+ static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
+ return InstTypePair(inst, type);
+ }
+ };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+ return new MemDepPrinter();
+}
+
+const char *const MemDepPrinter::DepTypeStr[]
+ = {"Clobber", "Def", "NonFuncLocal", "Unknown"};
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+ this->F = &F;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+ // All this code uses non-const interfaces because MemDep is not
+ // const-friendly, though nothing is actually modified.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+ continue;
+
+ MemDepResult Res = MDA.getDependency(Inst);
+ if (!Res.isNonLocal()) {
+ Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
+ static_cast<BasicBlock *>(0)));
+ } else if (CallSite CS = cast<Value>(Inst)) {
+ const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+ MDA.getNonLocalCallDependency(CS);
+
+ DepSet &InstDeps = Deps[Inst];
+ for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
+ }
+ } else {
+ SmallVector<NonLocalDepResult, 4> NLDI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (!LI->isUnordered()) {
+ // FIXME: Handle atomic/volatile loads.
+ Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+ static_cast<BasicBlock *>(0)));
+ continue;
+ }
+ AliasAnalysis::Location Loc = AA.getLocation(LI);
+ MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (!SI->isUnordered()) {
+ // FIXME: Handle atomic/volatile stores.
+ Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+ static_cast<BasicBlock *>(0)));
+ continue;
+ }
+ AliasAnalysis::Location Loc = AA.getLocation(SI);
+ MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+ AliasAnalysis::Location Loc = AA.getLocation(VI);
+ MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+ } else {
+ llvm_unreachable("Unknown memory instruction!");
+ }
+
+ DepSet &InstDeps = Deps[Inst];
+ for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
+ }
+ }
+ }
+
+ return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+ for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+ const Instruction *Inst = &*I;
+
+ DepSetMap::const_iterator DI = Deps.find(Inst);
+ if (DI == Deps.end())
+ continue;
+
+ const DepSet &InstDeps = DI->second;
+
+ for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+ I != E; ++I) {
+ const Instruction *DepInst = I->first.getPointer();
+ DepType type = I->first.getInt();
+ const BasicBlock *DepBB = I->second;
+
+ OS << " ";
+ OS << DepTypeStr[type];
+ if (DepBB) {
+ OS << " in block ";
+ WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+ }
+ if (DepInst) {
+ OS << " from: ";
+ DepInst->print(OS);
+ }
+ OS << "\n";
+ }
+
+ Inst->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 000000000000..d490d5419f75
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,819 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memory-builtins"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+enum AllocType {
+ MallocLike = 1<<0, // allocates
+ CallocLike = 1<<1, // allocates + bzero
+ ReallocLike = 1<<2, // reallocates
+ StrDupLike = 1<<3,
+ AllocLike = MallocLike | CallocLike | StrDupLike,
+ AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike
+};
+
+struct AllocFnsTy {
+ LibFunc::Func Func;
+ AllocType AllocTy;
+ unsigned char NumParams;
+ // First and Second size parameters (or -1 if unused)
+ signed char FstParam, SndParam;
+};
+
+// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
+// know which functions are nounwind, noalias, nocapture parameters, etc.
+static const AllocFnsTy AllocationFnData[] = {
+ {LibFunc::malloc, MallocLike, 1, 0, -1},
+ {LibFunc::valloc, MallocLike, 1, 0, -1},
+ {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
+ {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long)
+ {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow)
+ {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
+ {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long)
+ {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
+ {LibFunc::posix_memalign, MallocLike, 3, 2, -1},
+ {LibFunc::calloc, CallocLike, 2, 0, 1},
+ {LibFunc::realloc, ReallocLike, 2, 1, -1},
+ {LibFunc::reallocf, ReallocLike, 2, 1, -1},
+ {LibFunc::strdup, StrDupLike, 1, -1, -1},
+ {LibFunc::strndup, StrDupLike, 2, 1, -1}
+};
+
+
+static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
+ if (LookThroughBitCast)
+ V = V->stripPointerCasts();
+
+ CallSite CS(const_cast<Value*>(V));
+ if (!CS.getInstruction())
+ return 0;
+
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee || !Callee->isDeclaration())
+ return 0;
+ return Callee;
+}
+
+/// \brief Returns the allocation data for the given value if it is a call to a
+/// known allocation function, and NULL otherwise.
+static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
+ const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false) {
+ // Skip intrinsics
+ if (isa<IntrinsicInst>(V))
+ return 0;
+
+ Function *Callee = getCalledFunction(V, LookThroughBitCast);
+ if (!Callee)
+ return 0;
+
+ // Make sure that the function is available.
+ StringRef FnName = Callee->getName();
+ LibFunc::Func TLIFn;
+ if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
+ return 0;
+
+ unsigned i = 0;
+ bool found = false;
+ for ( ; i < array_lengthof(AllocationFnData); ++i) {
+ if (AllocationFnData[i].Func == TLIFn) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return 0;
+
+ const AllocFnsTy *FnData = &AllocationFnData[i];
+ if ((FnData->AllocTy & AllocTy) == 0)
+ return 0;
+
+ // Check function prototype.
+ int FstParam = FnData->FstParam;
+ int SndParam = FnData->SndParam;
+ FunctionType *FTy = Callee->getFunctionType();
+
+ if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) &&
+ FTy->getNumParams() == FnData->NumParams &&
+ (FstParam < 0 ||
+ (FTy->getParamType(FstParam)->isIntegerTy(32) ||
+ FTy->getParamType(FstParam)->isIntegerTy(64))) &&
+ (SndParam < 0 ||
+ FTy->getParamType(SndParam)->isIntegerTy(32) ||
+ FTy->getParamType(SndParam)->isIntegerTy(64)))
+ return FnData;
+ return 0;
+}
+
+static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
+ ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
+ return CS && CS.hasFnAttr(Attribute::NoAlias);
+}
+
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
+/// like).
+bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a function that returns a
+/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
+bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ // it's safe to consider realloc as noalias since accessing the original
+ // pointer is undefined behavior
+ return isAllocationFn(V, TLI, LookThroughBitCast) ||
+ hasNoAliasAttr(V, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates uninitialized memory (such as malloc).
+bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, MallocLike, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates zero-filled memory (such as calloc).
+bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, CallocLike, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory (either malloc, calloc, or strdup like).
+bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// reallocates memory (such as realloc).
+bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I,
+ const TargetLibraryInfo *TLI) {
+ return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
+}
+
+static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool LookThroughSExt = false) {
+ if (!CI)
+ return 0;
+
+ // The size of the malloc's result type must be known to determine array size.
+ Type *T = getMallocAllocatedType(CI, TLI);
+ if (!T || !T->isSized() || !TD)
+ return 0;
+
+ unsigned ElementSize = TD->getTypeAllocSize(T);
+ if (StructType *ST = dyn_cast<StructType>(T))
+ ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+
+ // If malloc call's arg can be determined to be a multiple of ElementSize,
+ // return the multiple. Otherwise, return NULL.
+ Value *MallocArg = CI->getArgOperand(0);
+ Value *Multiple = 0;
+ if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+ LookThroughSExt))
+ return Multiple;
+
+ return 0;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1. Otherwise, return NULL.
+const CallInst *llvm::isArrayMalloc(const Value *I,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ const CallInst *CI = extractMallocCall(I, TLI);
+ Value *ArraySize = computeArraySize(CI, TD, TLI);
+
+ if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
+ if (ConstSize->isOne())
+ return CI;
+
+ // CI is a non-array malloc or we can't figure out that it is an array malloc.
+ return 0;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+PointerType *llvm::getMallocType(const CallInst *CI,
+ const TargetLibraryInfo *TLI) {
+ assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
+
+ PointerType *MallocType = 0;
+ unsigned NumOfBitCastUses = 0;
+
+ // Determine if CallInst has a bitcast use.
+ for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; )
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+ MallocType = cast<PointerType>(BCI->getDestTy());
+ NumOfBitCastUses++;
+ }
+
+ // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+ if (NumOfBitCastUses == 1)
+ return MallocType;
+
+ // Malloc call was not bitcast, so type is the malloc function's return type.
+ if (NumOfBitCastUses == 0)
+ return cast<PointerType>(CI->getType());
+
+ // Type could not be determined.
+ return 0;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the malloc calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+Type *llvm::getMallocAllocatedType(const CallInst *CI,
+ const TargetLibraryInfo *TLI) {
+ PointerType *PT = getMallocType(CI, TLI);
+ return PT ? PT->getElementType() : 0;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call. If the
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple. For non-array mallocs, the multiple is
+/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool LookThroughSExt) {
+ assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
+ return computeArraySize(CI, TD, TLI, LookThroughSExt);
+}
+
+
+/// extractCallocCall - Returns the corresponding CallInst if the instruction
+/// is a calloc call.
+const CallInst *llvm::extractCallocCall(const Value *I,
+ const TargetLibraryInfo *TLI) {
+ return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0;
+}
+
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI || isa<IntrinsicInst>(CI))
+ return 0;
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration())
+ return 0;
+
+ StringRef FnName = Callee->getName();
+ LibFunc::Func TLIFn;
+ if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
+ return 0;
+
+ if (TLIFn != LibFunc::free &&
+ TLIFn != LibFunc::ZdlPv && // operator delete(void*)
+ TLIFn != LibFunc::ZdaPv) // operator delete[](void*)
+ return 0;
+
+ // Check free prototype.
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // attribute will exist.
+ FunctionType *FTy = Callee->getFunctionType();
+ if (!FTy->getReturnType()->isVoidTy())
+ return 0;
+ if (FTy->getNumParams() != 1)
+ return 0;
+ if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+ return 0;
+
+ return CI;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Utility functions to compute size of objects.
+//
+
+
+/// \brief Compute the size of the object pointed by Ptr. Returns true and the
+/// object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
+ const TargetLibraryInfo *TLI, bool RoundToAlign) {
+ if (!TD)
+ return false;
+
+ ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
+ if (!Visitor.bothKnown(Data))
+ return false;
+
+ APInt ObjSize = Data.first, Offset = Data.second;
+ // check for overflow
+ if (Offset.slt(0) || ObjSize.ult(Offset))
+ Size = 0;
+ else
+ Size = (ObjSize - Offset).getZExtValue();
+ return true;
+}
+
+/// \brief Compute the size of the underlying object pointed by Ptr. Returns
+/// true and the object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool RoundToAlign) {
+ if (!TD)
+ return false;
+
+ ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
+ if (!Visitor.knownSize(Data))
+ return false;
+
+ Size = Data.first.getZExtValue();
+ return true;
+}
+
+
+STATISTIC(ObjectVisitorArgument,
+ "Number of arguments with unsolved size and offset");
+STATISTIC(ObjectVisitorLoad,
+ "Number of load instructions with unsolved size and offset");
+
+
+APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
+ if (RoundToAlign && Align)
+ return APInt(IntTyBits, RoundUpToAlignment(Size.getZExtValue(), Align));
+ return Size;
+}
+
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ LLVMContext &Context,
+ bool RoundToAlign)
+: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
+ IntegerType *IntTy = TD->getIntPtrType(Context);
+ IntTyBits = IntTy->getBitWidth();
+ Zero = APInt::getNullValue(IntTyBits);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+ V = V->stripPointerCasts();
+
+ if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
+ // Return cached value or insert unknown in cache if size of V was not
+ // computed yet in order to avoid recursions in PHis.
+ std::pair<CacheMapTy::iterator, bool> CacheVal =
+ CacheMap.insert(std::make_pair(V, unknown()));
+ if (!CacheVal.second)
+ return CacheVal.first->second;
+
+ SizeOffsetType Result;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ Result = visitGEPOperator(*GEP);
+ else
+ Result = visit(cast<Instruction>(*V));
+ return CacheMap[V] = Result;
+ }
+
+ if (Argument *A = dyn_cast<Argument>(V))
+ return visitArgument(*A);
+ if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
+ return visitConstantPointerNull(*P);
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return visitGlobalAlias(*GA);
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return visitGlobalVariable(*GV);
+ if (UndefValue *UV = dyn_cast<UndefValue>(V))
+ return visitUndefValue(*UV);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ return unknown(); // clueless
+ }
+
+ DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
+ << '\n');
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
+ if (!I.getAllocatedType()->isSized())
+ return unknown();
+
+ APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType()));
+ if (!I.isArrayAllocation())
+ return std::make_pair(align(Size, I.getAlignment()), Zero);
+
+ Value *ArraySize = I.getArraySize();
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
+ Size *= C->getValue().zextOrSelf(IntTyBits);
+ return std::make_pair(align(Size, I.getAlignment()), Zero);
+ }
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
+ // no interprocedural analysis is done at the moment
+ if (!A.hasByValAttr()) {
+ ++ObjectVisitorArgument;
+ return unknown();
+ }
+ PointerType *PT = cast<PointerType>(A.getType());
+ APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
+ return std::make_pair(align(Size, A.getParamAlignment()), Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
+ const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc,
+ TLI);
+ if (!FnData)
+ return unknown();
+
+ // handle strdup-like functions separately
+ if (FnData->AllocTy == StrDupLike) {
+ APInt Size(IntTyBits, GetStringLength(CS.getArgument(0)));
+ if (!Size)
+ return unknown();
+
+ // strndup limits strlen
+ if (FnData->FstParam > 0) {
+ ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+ if (!Arg)
+ return unknown();
+
+ APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+ if (Size.ugt(MaxSize))
+ Size = MaxSize + 1;
+ }
+ return std::make_pair(Size, Zero);
+ }
+
+ ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+ if (!Arg)
+ return unknown();
+
+ APInt Size = Arg->getValue().zextOrSelf(IntTyBits);
+ // size determined by just 1 parameter
+ if (FnData->SndParam < 0)
+ return std::make_pair(Size, Zero);
+
+ Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam));
+ if (!Arg)
+ return unknown();
+
+ Size *= Arg->getValue().zextOrSelf(IntTyBits);
+ return std::make_pair(Size, Zero);
+
+ // TODO: handle more standard functions (+ wchar cousins):
+ // - strdup / strndup
+ // - strcpy / strncpy
+ // - strcat / strncat
+ // - memcpy / memmove
+ // - strcat / strncat
+ // - memset
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) {
+ return std::make_pair(Zero, Zero);
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) {
+ return unknown();
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
+ // Easy cases were already folded by previous passes.
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
+ SizeOffsetType PtrData = compute(GEP.getPointerOperand());
+ APInt Offset(IntTyBits, 0);
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
+ return unknown();
+
+ return std::make_pair(PtrData.first, PtrData.second + Offset);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
+ if (GA.mayBeOverridden())
+ return unknown();
+ return compute(GA.getAliasee());
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
+ if (!GV.hasDefinitiveInitializer())
+ return unknown();
+
+ APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType()));
+ return std::make_pair(align(Size, GV.getAlignment()), Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) {
+ // clueless
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
+ ++ObjectVisitorLoad;
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
+ if (PHI.getNumIncomingValues() == 0)
+ return unknown();
+
+ SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
+ if (!bothKnown(Ret))
+ return unknown();
+
+ // Verify that all PHI incoming pointers have the same size and offset.
+ for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
+ SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
+ if (!bothKnown(EdgeData) || EdgeData != Ret)
+ return unknown();
+ }
+ return Ret;
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
+ SizeOffsetType TrueSide = compute(I.getTrueValue());
+ SizeOffsetType FalseSide = compute(I.getFalseValue());
+ if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide)
+ return TrueSide;
+ return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
+ return std::make_pair(Zero, Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
+ DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n');
+ return unknown();
+}
+
+
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ LLVMContext &Context)
+: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
+ IntTy = TD->getIntPtrType(Context);
+ Zero = ConstantInt::get(IntTy, 0);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
+ SizeOffsetEvalType Result = compute_(V);
+
+ if (!bothKnown(Result)) {
+ // erase everything that was computed in this iteration from the cache, so
+ // that no dangling references are left behind. We could be a bit smarter if
+ // we kept a dependency graph. It's probably not worth the complexity.
+ for (PtrSetTy::iterator I=SeenVals.begin(), E=SeenVals.end(); I != E; ++I) {
+ CacheMapTy::iterator CacheIt = CacheMap.find(*I);
+ // non-computable results can be safely cached
+ if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
+ CacheMap.erase(CacheIt);
+ }
+ }
+
+ SeenVals.clear();
+ return Result;
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
+ ObjectSizeOffsetVisitor Visitor(TD, TLI, Context);
+ SizeOffsetType Const = Visitor.compute(V);
+ if (Visitor.bothKnown(Const))
+ return std::make_pair(ConstantInt::get(Context, Const.first),
+ ConstantInt::get(Context, Const.second));
+
+ V = V->stripPointerCasts();
+
+ // check cache
+ CacheMapTy::iterator CacheIt = CacheMap.find(V);
+ if (CacheIt != CacheMap.end())
+ return CacheIt->second;
+
+ // always generate code immediately before the instruction being
+ // processed, so that the generated code dominates the same BBs
+ Instruction *PrevInsertPoint = Builder.GetInsertPoint();
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Builder.SetInsertPoint(I);
+
+ // record the pointers that were handled in this run, so that they can be
+ // cleaned later if something fails
+ SeenVals.insert(V);
+
+ // now compute the size and offset
+ SizeOffsetEvalType Result;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ Result = visitGEPOperator(*GEP);
+ } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Result = visit(*I);
+ } else if (isa<Argument>(V) ||
+ (isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) ||
+ isa<GlobalAlias>(V) ||
+ isa<GlobalVariable>(V)) {
+ // ignore values where we cannot do more than what ObjectSizeVisitor can
+ Result = unknown();
+ } else {
+ DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: "
+ << *V << '\n');
+ Result = unknown();
+ }
+
+ if (PrevInsertPoint)
+ Builder.SetInsertPoint(PrevInsertPoint);
+
+ // Don't reuse CacheIt since it may be invalid at this point.
+ CacheMap[V] = Result;
+ return Result;
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
+ if (!I.getAllocatedType()->isSized())
+ return unknown();
+
+ // must be a VLA
+ assert(I.isArrayAllocation());
+ Value *ArraySize = I.getArraySize();
+ Value *Size = ConstantInt::get(ArraySize->getType(),
+ TD->getTypeAllocSize(I.getAllocatedType()));
+ Size = Builder.CreateMul(Size, ArraySize);
+ return std::make_pair(Size, Zero);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
+ const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc,
+ TLI);
+ if (!FnData)
+ return unknown();
+
+ // handle strdup-like functions separately
+ if (FnData->AllocTy == StrDupLike) {
+ // TODO
+ return unknown();
+ }
+
+ Value *FirstArg = CS.getArgument(FnData->FstParam);
+ FirstArg = Builder.CreateZExt(FirstArg, IntTy);
+ if (FnData->SndParam < 0)
+ return std::make_pair(FirstArg, Zero);
+
+ Value *SecondArg = CS.getArgument(FnData->SndParam);
+ SecondArg = Builder.CreateZExt(SecondArg, IntTy);
+ Value *Size = Builder.CreateMul(FirstArg, SecondArg);
+ return std::make_pair(Size, Zero);
+
+ // TODO: handle more standard functions (+ wchar cousins):
+ // - strdup / strndup
+ // - strcpy / strncpy
+ // - strcat / strncat
+ // - memcpy / memmove
+ // - strcat / strncat
+ // - memset
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) {
+ return unknown();
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) {
+ return unknown();
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
+ SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand());
+ if (!bothKnown(PtrData))
+ return unknown();
+
+ Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true);
+ Offset = Builder.CreateAdd(PtrData.second, Offset);
+ return std::make_pair(PtrData.first, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) {
+ // clueless
+ return unknown();
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) {
+ return unknown();
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
+ // create 2 PHIs: one for size and another for offset
+ PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
+ PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
+
+ // insert right away in the cache to handle recursive PHIs
+ CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI);
+
+ // compute offset/size for each PHI incoming pointer
+ for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
+ Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt());
+ SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
+
+ if (!bothKnown(EdgeData)) {
+ OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
+ OffsetPHI->eraseFromParent();
+ SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
+ SizePHI->eraseFromParent();
+ return unknown();
+ }
+ SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
+ OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
+ }
+
+ Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp;
+ if ((Tmp = SizePHI->hasConstantValue())) {
+ Size = Tmp;
+ SizePHI->replaceAllUsesWith(Size);
+ SizePHI->eraseFromParent();
+ }
+ if ((Tmp = OffsetPHI->hasConstantValue())) {
+ Offset = Tmp;
+ OffsetPHI->replaceAllUsesWith(Offset);
+ OffsetPHI->eraseFromParent();
+ }
+ return std::make_pair(Size, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) {
+ SizeOffsetEvalType TrueSide = compute_(I.getTrueValue());
+ SizeOffsetEvalType FalseSide = compute_(I.getFalseValue());
+
+ if (!bothKnown(TrueSide) || !bothKnown(FalseSide))
+ return unknown();
+ if (TrueSide == FalseSide)
+ return TrueSide;
+
+ Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first,
+ FalseSide.first);
+ Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second,
+ FalseSide.second);
+ return std::make_pair(Size, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) {
+ DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n');
+ return unknown();
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 000000000000..2240e9de33eb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1525 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on. It builds on
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+ "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+ "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+ "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+ "Number of block queries that were completely cached");
+
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+// (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
+char MemoryDependenceAnalysis::ID = 0;
+
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(ID), PredCache(0) {
+ initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+ LocalDeps.clear();
+ NonLocalDeps.clear();
+ NonLocalPointerDeps.clear();
+ ReverseLocalDeps.clear();
+ ReverseNonLocalDeps.clear();
+ ReverseNonLocalPtrDeps.clear();
+ PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ if (PredCache == 0)
+ PredCache.reset(new PredIteratorCache());
+ return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*,
+ SmallPtrSet<KeyTy, 4> > &ReverseMap,
+ Instruction *Inst, KeyTy Val) {
+ typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+ InstIt = ReverseMap.find(Inst);
+ assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+ bool Found = InstIt->second.erase(Val);
+ assert(Found && "Invalid reverse map!"); (void)Found;
+ if (InstIt->second.empty())
+ ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+ AliasAnalysis::Location &Loc,
+ AliasAnalysis *AA) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isUnordered()) {
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ }
+ if (LI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isUnordered()) {
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ }
+ if (SI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+ Loc = AA->getLocation(V);
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+ // calls to free() deallocate the entire structure
+ Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+ return AliasAnalysis::Mod;
+ }
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Loc = AliasAnalysis::Location(II->getArgOperand(1),
+ cast<ConstantInt>(II->getArgOperand(0))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ case Intrinsic::invariant_end:
+ Loc = AliasAnalysis::Location(II->getArgOperand(2),
+ cast<ConstantInt>(II->getArgOperand(1))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ default:
+ break;
+ }
+
+ // Otherwise, just do the coarse-grained thing that always works.
+ if (Inst->mayWriteToMemory())
+ return AliasAnalysis::ModRef;
+ if (Inst->mayReadFromMemory())
+ return AliasAnalysis::Ref;
+ return AliasAnalysis::NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ unsigned Limit = BlockScanLimit;
+
+ // Walk backwards through the block, looking for dependencies
+ while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
+ Instruction *Inst = --ScanIt;
+
+ // If this inst is a memory op, get the pointer it accessed
+ AliasAnalysis::Location Loc;
+ AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ if (Loc.Ptr) {
+ // A simple instruction.
+ if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ continue;
+ }
+
+ if (CallSite InstCS = cast<Value>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ // If these two calls do not interfere, look past it.
+ switch (AA->getModRefInfo(CS, InstCS)) {
+ case AliasAnalysis::NoModRef:
+ // If the two calls are the same, return InstCS as a Def, so that
+ // CS can be found redundant and eliminated.
+ if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+ CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ return MemDepResult::getDef(Inst);
+
+ // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // keep scanning.
+ continue;
+ default:
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+
+ // If we could not obtain a pointer for the instruction and the instruction
+ // touches memory then assume that this is a dependency.
+ if (MR != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ }
+
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getNonFuncLocal();
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+ const Value *&MemLocBase,
+ int64_t &MemLocOffs,
+ const LoadInst *LI,
+ const DataLayout *TD) {
+ // If we have no target data, we can't do this.
+ if (TD == 0) return false;
+
+ // If we haven't already computed the base/offset of MemLoc, do so now.
+ if (MemLocBase == 0)
+ MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+ LI, *TD);
+ return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load. If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use. If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+ unsigned MemLocSize, const LoadInst *LI,
+ const DataLayout &TD) {
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
+
+ // Load widening is hostile to ThreadSanitizer: it may cause false positives
+ // or make the reports more cryptic (access sizes are wrong).
+ if (LI->getParent()->getParent()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
+ return 0;
+
+ // Get the base of this load.
+ int64_t LIOffs = 0;
+ const Value *LIBase =
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
+
+ // If the two pointers are not based on the same pointer, we can't tell that
+ // they are related.
+ if (LIBase != MemLocBase) return 0;
+
+ // Okay, the two values are based on the same pointer, but returned as
+ // no-alias. This happens when we have things like two byte loads at "P+1"
+ // and "P+3". Check to see if increasing the size of the "LI" load up to its
+ // alignment (or the largest native integer type) will allow us to load all
+ // the bits required by MemLoc.
+
+ // If MemLoc is before LI, then no widening of LI will help us out.
+ if (MemLocOffs < LIOffs) return 0;
+
+ // Get the alignment of the load in bytes. We assume that it is safe to load
+ // any legal integer up to this size without a problem. For example, if we're
+ // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+ // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
+ // to i16.
+ unsigned LoadAlign = LI->getAlignment();
+
+ int64_t MemLocEnd = MemLocOffs+MemLocSize;
+
+ // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+ if (LIOffs+LoadAlign < MemLocEnd) return 0;
+
+ // This is the size of the load to try. Start with the next larger power of
+ // two.
+ unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+ NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+ while (1) {
+ // If this load size is bigger than our known alignment or would not fit
+ // into a native integer register, then we fail.
+ if (NewLoadByteSize > LoadAlign ||
+ !TD.fitsInLegalInteger(NewLoadByteSize*8))
+ return 0;
+
+ if (LIOffs+NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+
+ // If a load of this width would include all of MemLoc, then we succeed.
+ if (LIOffs+NewLoadByteSize >= MemLocEnd)
+ return NewLoadByteSize;
+
+ NewLoadByteSize <<= 1;
+ }
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends. If isLoad is true, this routine ignores may-aliases with
+/// read-only operations. If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations. If possible, pass the query
+/// instruction as well; this function may take advantage of the metadata
+/// annotated to the query instruction to refine the result.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB,
+ Instruction *QueryInst) {
+
+ const Value *MemLocBase = 0;
+ int64_t MemLocOffset = 0;
+ unsigned Limit = BlockScanLimit;
+ bool isInvariantLoad = false;
+ if (isLoad && QueryInst) {
+ LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
+ if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
+ isInvariantLoad = true;
+ }
+
+ // Walk backwards through the basic block, looking for dependencies.
+ while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Debug intrinsics don't (and can't) cause dependences.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
+ // If we reach a lifetime begin or end marker, then the query ends here
+ // because the value is undefined.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // FIXME: This only considers queries directly on the invariant-tagged
+ // pointer, not on query pointers that are indexed off of them. It'd
+ // be nice to handle that at some point (the right approach is to use
+ // GetPointerBaseWithConstantOffset).
+ if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+ MemLoc))
+ return MemDepResult::getDef(II);
+ continue;
+ }
+ }
+
+ // Values depend on loads if the pointers are must aliased. This means that
+ // a load depends on another must aliased load from the same value.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Atomic loads have complications involved.
+ // FIXME: This is overly conservative.
+ if (!LI->isUnordered())
+ return MemDepResult::getClobber(LI);
+
+ AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+
+ if (isLoad) {
+ if (R == AliasAnalysis::NoAlias) {
+ // If this is an over-aligned integer load (for example,
+ // "load i8* %P, align 4") see if it would obviously overlap with the
+ // queried location if widened to a larger load (e.g. if the queried
+ // location is 1 byte at P+1). If so, return it as a load/load
+ // clobber result, allowing the client to decide to widen the load if
+ // it wants to.
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+ isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+ MemLocOffset, LI, TD))
+ return MemDepResult::getClobber(Inst);
+
+ continue;
+ }
+
+ // Must aliased loads are defs of each other.
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+ // in terms of clobbering loads, but since it does this by looking
+ // at the clobbering load directly, it doesn't know about any
+ // phi translation that may have happened along the way.
+
+ // If we have a partial alias, then return this as a clobber for the
+ // client to handle.
+ if (R == AliasAnalysis::PartialAlias)
+ return MemDepResult::getClobber(Inst);
+#endif
+
+ // Random may-alias loads don't depend on each other without a
+ // dependence.
+ continue;
+ }
+
+ // Stores don't depend on other no-aliased accesses.
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+
+ // Stores don't alias loads from read-only memory.
+ if (AA->pointsToConstantMemory(LoadLoc))
+ continue;
+
+ // Stores depend on may/must aliased loads.
+ return MemDepResult::getDef(Inst);
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Atomic stores have complications involved.
+ // FIXME: This is overly conservative.
+ if (!SI->isUnordered())
+ return MemDepResult::getClobber(SI);
+
+ // If alias analysis can tell that this store is guaranteed to not modify
+ // the query pointer, ignore it. Use getModRefInfo to handle cases where
+ // the query pointer points to constant memory etc.
+ if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+ continue;
+
+ // Ok, this store might clobber the query pointer. Check to see if it is
+ // a must alias: in this case, we want to return this as a def.
+ AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+ if (isInvariantLoad)
+ continue;
+ return MemDepResult::getClobber(Inst);
+ }
+
+ // If this is an allocation, and if we know that the accessed pointer is to
+ // the allocation, return Def. This means that there is no dependence and
+ // the access can be optimized based on that. For example, a load could
+ // turn into undef.
+ // Note: Only determine this to be a malloc if Inst is the malloc call, not
+ // a subsequent bitcast of the malloc call result. There can be stores to
+ // the malloced memory between the malloc call and its bitcast uses, and we
+ // need to continue scanning until the malloc call.
+ const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
+ if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
+ const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+
+ if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
+ return MemDepResult::getDef(Inst);
+ // Be conservative if the accessed pointer may alias the allocation.
+ if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
+ return MemDepResult::getClobber(Inst);
+ // If the allocation is not aliased and does not read memory (like
+ // strdup), it is safe to ignore.
+ if (isa<AllocaInst>(Inst) ||
+ isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
+ continue;
+ }
+
+ // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+ AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ // If necessary, perform additional analysis.
+ if (MR == AliasAnalysis::ModRef)
+ MR = AA->callCapturesBefore(Inst, MemLoc, DT);
+ switch (MR) {
+ case AliasAnalysis::NoModRef:
+ // If the call has no effect on the queried pointer, just ignore it.
+ continue;
+ case AliasAnalysis::Mod:
+ return MemDepResult::getClobber(Inst);
+ case AliasAnalysis::Ref:
+ // If the call is known to never store to the pointer, and if this is a
+ // load query, we can safely ignore it (scan past it).
+ if (isLoad)
+ continue;
+ default:
+ // Otherwise, there is a potential dependence. Return a clobber.
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getNonFuncLocal();
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+ Instruction *ScanPos = QueryInst;
+
+ // Check for a cached result
+ MemDepResult &LocalCache = LocalDeps[QueryInst];
+
+ // If the cached entry is non-dirty, just return it. Note that this depends
+ // on MemDepResult's default constructing to 'dirty'.
+ if (!LocalCache.isDirty())
+ return LocalCache;
+
+ // Otherwise, if we have a dirty entry, we know we can start the scan at that
+ // instruction, which may save us some work.
+ if (Instruction *Inst = LocalCache.getInst()) {
+ ScanPos = Inst;
+
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+ }
+
+ BasicBlock *QueryParent = QueryInst->getParent();
+
+ // Do the scan.
+ if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+ LocalCache = MemDepResult::getNonLocal();
+ else
+ LocalCache = MemDepResult::getNonFuncLocal();
+ } else {
+ AliasAnalysis::Location MemLoc;
+ AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ if (MemLoc.Ptr) {
+ // If we can do a pointer scan, make it happen.
+ bool isLoad = !(MR & AliasAnalysis::Mod);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
+
+ LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+ QueryParent, QueryInst);
+ } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+ CallSite QueryCS(QueryInst);
+ bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+ LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+ QueryParent);
+ } else
+ // Non-memory instruction.
+ LocalCache = MemDepResult::getUnknown();
+ }
+
+ // Remember the result!
+ if (Instruction *I = LocalCache.getInst())
+ ReverseLocalDeps[I].insert(QueryInst);
+
+ return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ int Count = -1) {
+ if (Count == -1) Count = Cache.size();
+ if (Count == 0) return;
+
+ for (unsigned i = 1; i != unsigned(Count); ++i)
+ assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across. The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed. Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+ assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ NonLocalDepInfo &Cache = CacheP.first;
+
+ /// DirtyBlocks - This is the set of blocks that need to be recomputed. In
+ /// the cached case, this can happen due to instructions being deleted etc. In
+ /// the uncached case, this starts out as the set of predecessors we care
+ /// about.
+ SmallVector<BasicBlock*, 32> DirtyBlocks;
+
+ if (!Cache.empty()) {
+ // Okay, we have a cache entry. If we know it is not dirty, just return it
+ // with no computation.
+ if (!CacheP.second) {
+ ++NumCacheNonLocal;
+ return Cache;
+ }
+
+ // If we already have a partially computed set of results, scan them to
+ // determine what is dirty, seeding our initial DirtyBlocks worklist.
+ for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+ I != E; ++I)
+ if (I->getResult().isDirty())
+ DirtyBlocks.push_back(I->getBB());
+
+ // Sort the cache so that we can do fast binary search lookups below.
+ std::sort(Cache.begin(), Cache.end());
+
+ ++NumCacheDirtyNonLocal;
+ //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+ // << Cache.size() << " cached: " << *QueryInst;
+ } else {
+ // Seed DirtyBlocks with each of the preds of QueryInst's block.
+ BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ ++NumUncacheNonLocal;
+ }
+
+ // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+ bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+ SmallPtrSet<BasicBlock*, 64> Visited;
+
+ unsigned NumSortedEntries = Cache.size();
+ DEBUG(AssertSorted(Cache));
+
+ // Iterate while we still have blocks to update.
+ while (!DirtyBlocks.empty()) {
+ BasicBlock *DirtyBB = DirtyBlocks.back();
+ DirtyBlocks.pop_back();
+
+ // Already processed this block?
+ if (!Visited.insert(DirtyBB))
+ continue;
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ DEBUG(AssertSorted(Cache, NumSortedEntries));
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+ NonLocalDepEntry(DirtyBB));
+ if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache.begin()+NumSortedEntries &&
+ Entry->getBB() == DirtyBB) {
+ // If we already have an entry, and if it isn't already dirty, the block
+ // is done.
+ if (!Entry->getResult().isDirty())
+ continue;
+
+ // Otherwise, remember this slot so we can update the value.
+ ExistingResult = &*Entry;
+ }
+
+ // If the dirty entry has a pointer, start scanning from it so we don't have
+ // to rescan the entire block.
+ BasicBlock::iterator ScanPos = DirtyBB->end();
+ if (ExistingResult) {
+ if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+ ScanPos = Inst;
+ // We're removing QueryInst's use of Inst.
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+ QueryCS.getInstruction());
+ }
+ }
+
+ // Find out if this block has a local dependency for QueryInst.
+ MemDepResult Dep;
+
+ if (ScanPos != DirtyBB->begin()) {
+ Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+ } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // a clobber, otherwise it is unknown.
+ Dep = MemDepResult::getNonLocal();
+ } else {
+ Dep = MemDepResult::getNonFuncLocal();
+ }
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the association!
+ if (!Dep.isNonLocal()) {
+ // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+ // update this when we remove instructions.
+ if (Instruction *Inst = Dep.getInst())
+ ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ } else {
+
+ // If the block *is* completely transparent to the load, we need to check
+ // the predecessors of this block. Add them to our worklist.
+ for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ }
+ }
+
+ return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+ BasicBlock *FromBB,
+ SmallVectorImpl<NonLocalDepResult> &Result) {
+ assert(Loc.Ptr->getType()->isPointerTy() &&
+ "Can't get pointer deps of a non-pointer!");
+ Result.clear();
+
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+
+ // This is the set of blocks we've inspected, and the pointer we consider in
+ // each block. Because of critical edges, we currently bail out if querying
+ // a block with multiple different pointers. This can happen during PHI
+ // translation.
+ DenseMap<BasicBlock*, Value*> Visited;
+ if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+ Result, Visited, true))
+ return;
+ Result.clear();
+ Result.push_back(NonLocalDepResult(FromBB,
+ MemDepResult::getUnknown(),
+ const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available). If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *BB,
+ NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+ NonLocalDepEntry(BB));
+ if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+ ExistingResult = &*Entry;
+
+ // If we have a cached entry, and it is non-dirty, use it as the value for
+ // this dependency.
+ if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+ ++NumCacheNonLocalPtr;
+ return ExistingResult->getResult();
+ }
+
+ // Otherwise, we have to scan for the value. If we have a dirty cache
+ // entry, start scanning from its position, otherwise we scan from the end
+ // of the block.
+ BasicBlock::iterator ScanPos = BB->end();
+ if (ExistingResult && ExistingResult->getResult().getInst()) {
+ assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+ "Instruction invalidated?");
+ ++NumCacheDirtyNonLocalPtr;
+ ScanPos = ExistingResult->getResult().getInst();
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ } else {
+ ++NumUncacheNonLocalPtr;
+ }
+
+ // Scan the block for the dependency.
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache->push_back(NonLocalDepEntry(BB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the reverse association because we just added it
+ // to Cache!
+ if (!Dep.isDef() && !Dep.isClobber())
+ return Dep;
+
+ // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+ // update MemDep when we remove instructions.
+ Instruction *Inst = Dep.getInst();
+ assert(Inst && "Didn't depend on anything?");
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+ return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered. This is
+/// optimized for the case when only a few entries are added.
+static void
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ unsigned NumSortedEntries) {
+ switch (Cache.size() - NumSortedEntries) {
+ case 0:
+ // done, no new entries.
+ break;
+ case 2: {
+ // Two new entries, insert the last one into place.
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+ Cache.insert(Entry, Val);
+ // FALL THROUGH.
+ }
+ case 1:
+ // One new entry, Just insert the new value at the appropriate position.
+ if (Cache.size() != 1) {
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end(), Val);
+ Cache.insert(Entry, Val);
+ }
+ break;
+ default:
+ // Added many values, do a full scale sort.
+ std::sort(Cache.begin(), Cache.end());
+ break;
+ }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true). In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason. This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+ const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *StartBB,
+ SmallVectorImpl<NonLocalDepResult> &Result,
+ DenseMap<BasicBlock*, Value*> &Visited,
+ bool SkipFirstBlock) {
+
+ // Look up the cached info for Pointer.
+ ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+ // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+ // CacheKey, this value will be inserted as the associated value. Otherwise,
+ // it'll be ignored, and we'll have to check to see if the cached size and
+ // tbaa tag are consistent with the current query.
+ NonLocalPointerInfo InitialNLPI;
+ InitialNLPI.Size = Loc.Size;
+ InitialNLPI.TBAATag = Loc.TBAATag;
+
+ // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+ // already have one.
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+ NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+ // If we already have a cache entry for this CacheKey, we may need to do some
+ // work to reconcile the cache entry and the current query.
+ if (!Pair.second) {
+ if (CacheInfo->Size < Loc.Size) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and proceed with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else if (CacheInfo->Size > Loc.Size) {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(Pointer,
+ Loc.getWithNewSize(CacheInfo->Size),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+
+ // If the query's TBAATag is inconsistent with the cached one,
+ // conservatively throw out the cached data and restart the query with
+ // no tag if needed.
+ if (CacheInfo->TBAATag != Loc.TBAATag) {
+ if (CacheInfo->TBAATag) {
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->TBAATag = 0;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ }
+ if (Loc.TBAATag)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+ }
+
+ NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+ // If we have valid cached information for exactly the block we are
+ // investigating, just return it with no recomputation.
+ if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ // We have a fully cached result for this query then we can just return the
+ // cached results and populate the visited set. However, we have to verify
+ // that we don't already have conflicting results for these blocks. Check
+ // to ensure that if a block in the results set is in the visited set that
+ // it was for the same pointer query.
+ if (!Visited.empty()) {
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+ if (VI == Visited.end() || VI->second == Pointer.getAddr())
+ continue;
+
+ // We have a pointer mismatch in a block. Just return clobber, saying
+ // that something was clobbered in this result. We could also do a
+ // non-fully cached query, but there is little point in doing this.
+ return true;
+ }
+ }
+
+ Value *Addr = Pointer.getAddr();
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ Visited.insert(std::make_pair(I->getBB(), Addr));
+ if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+ }
+ ++NumCacheCompleteNonLocalPtr;
+ return false;
+ }
+
+ // Otherwise, either this is a new block, a block with an invalid cache
+ // pointer or one that we're about to invalidate by putting more info into it
+ // than its valid cache info. If empty, the result will be valid cache info,
+ // otherwise it isn't.
+ if (Cache->empty())
+ CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ else
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(StartBB);
+
+ // PredList used inside loop.
+ SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
+ // Keep track of the entries that we know are sorted. Previously cached
+ // entries will all be sorted. The entries we add we only sort on demand (we
+ // don't insert every element into its sorted position). We know that we
+ // won't get any reuse from currently inserted values, because we don't
+ // revisit blocks after we insert info for them.
+ unsigned NumSortedEntries = Cache->size();
+ DEBUG(AssertSorted(*Cache));
+
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+
+ // Skip the first block if we have it.
+ if (!SkipFirstBlock) {
+ // Analyze the dependency of *Pointer in FromBB. See if we already have
+ // been here.
+ assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+ // Get the dependency info for Pointer in BB. If we have cached
+ // information, we will use it, otherwise we compute it.
+ DEBUG(AssertSorted(*Cache, NumSortedEntries));
+ MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+ NumSortedEntries);
+
+ // If we got a Def or Clobber, add this to the list of results.
+ if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
+ Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+ continue;
+ }
+ }
+
+ // If 'Pointer' is an instruction defined in this block, then we need to do
+ // phi translation to change it into a value live in the predecessor block.
+ // If not, we just add the predecessors to the worklist and scan them with
+ // the same Pointer.
+ if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+ SkipFirstBlock = false;
+ SmallVector<BasicBlock*, 16> NewBlocks;
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ // Verify that we haven't looked at this block yet.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+ if (InsertRes.second) {
+ // First time we've looked at *PI.
+ NewBlocks.push_back(*PI);
+ continue;
+ }
+
+ // If we have seen this block before, but it was with a different
+ // pointer then we have a phi translation failure and we have to treat
+ // this as a clobber.
+ if (InsertRes.first->second != Pointer.getAddr()) {
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < NewBlocks.size(); i++)
+ Visited.erase(NewBlocks[i]);
+ goto PredTranslationFailure;
+ }
+ }
+ Worklist.append(NewBlocks.begin(), NewBlocks.end());
+ continue;
+ }
+
+ // We do need to do phi translation, if we know ahead of time we can't phi
+ // translate this value, don't even try.
+ if (!Pointer.IsPotentiallyPHITranslatable())
+ goto PredTranslationFailure;
+
+ // We may have added values to the cache list before this PHI translation.
+ // If so, we haven't done anything to ensure that the cache remains sorted.
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+ // value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
+ Cache = 0;
+
+ PredList.clear();
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ BasicBlock *Pred = *PI;
+ PredList.push_back(std::make_pair(Pred, Pointer));
+
+ // Get the PHI translated pointer in this predecessor. This can fail if
+ // not translatable, in which case the getAddr() returns null.
+ PHITransAddr &PredPointer = PredList.back().second;
+ PredPointer.PHITranslateValue(BB, Pred, 0);
+
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ // Check to see if we have already visited this pred block with another
+ // pointer. If so, we can't do this lookup. This failure can occur
+ // with PHI translation when a critical edge exists and the PHI node in
+ // the successor translates to a pointer value different than the
+ // pointer the block was first analyzed with.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+ if (!InsertRes.second) {
+ // We found the pred; take it off the list of preds to visit.
+ PredList.pop_back();
+
+ // If the predecessor was visited with PredPtr, then we already did
+ // the analysis and can ignore it.
+ if (InsertRes.first->second == PredPtrVal)
+ continue;
+
+ // Otherwise, the block was previously analyzed with a different
+ // pointer. We can't represent the result of this case, so we just
+ // treat this as a phi translation failure.
+
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0, n = PredList.size(); i < n; ++i)
+ Visited.erase(PredList[i].first);
+
+ goto PredTranslationFailure;
+ }
+ }
+
+ // Actually process results here; this need to be a separate loop to avoid
+ // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+ // any results for. (getNonLocalPointerDepFromBB will modify our
+ // datastructures in ways the code after the PredTranslationFailure label
+ // doesn't expect.)
+ for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
+ BasicBlock *Pred = PredList[i].first;
+ PHITransAddr &PredPointer = PredList[i].second;
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ bool CanTranslate = true;
+ // If PHI translation was unable to find an available pointer in this
+ // predecessor, then we have to assume that the pointer is clobbered in
+ // that predecessor. We can still do PRE of the load, which would insert
+ // a computation of the pointer in this predecessor.
+ if (PredPtrVal == 0)
+ CanTranslate = false;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If getNonLocalPointerDepFromBB fails here, that means the cached
+ // result conflicted with the Visited list; we have to conservatively
+ // assume it is unknown, but this also does not block PRE of the load.
+ if (!CanTranslate ||
+ getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPtrVal),
+ isLoad, Pred,
+ Result, Visited)) {
+ // Add the entry to the Result list.
+ NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
+ Result.push_back(Entry);
+
+ // Since we had a phi translation failure, the cache for CacheKey won't
+ // include all of the entries that we need to immediately satisfy future
+ // queries. Mark this in NonLocalPointerDeps by setting the
+ // BBSkipFirstBlockPair pointer to null. This requires reuse of the
+ // cached value to do more work but not miss the phi trans failure.
+ NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+ NLPI.Pair = BBSkipFirstBlockPair();
+ continue;
+ }
+ }
+
+ // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+
+ // Since we did phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set" Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ SkipFirstBlock = false;
+ continue;
+
+ PredTranslationFailure:
+ // The following code is "failure"; we can't produce a sane translation
+ // for the given block. It assumes that we haven't modified any of
+ // our datastructures while processing the current block.
+
+ if (Cache == 0) {
+ // Refresh the CacheInfo/Cache pointer if it got invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+ }
+
+ // Since we failed phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ // If *nothing* works, mark the pointer as unknown.
+ //
+ // If this is the magic first block, return this as a clobber of the whole
+ // incoming value. Since we can't phi translate to one of the predecessors,
+ // we have to bail out.
+ if (SkipFirstBlock)
+ return true;
+
+ for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+ assert(I != Cache->rend() && "Didn't find current block??");
+ if (I->getBB() != BB)
+ continue;
+
+ assert(I->getResult().isNonLocal() &&
+ "Should only be here with transparent block");
+ I->setResult(MemDepResult::getUnknown());
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+ Pointer.getAddr()));
+ break;
+ }
+ }
+
+ // Okay, we're done now. If we added new values to the cache, re-sort it.
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ DEBUG(AssertSorted(*Cache));
+ return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+ CachedNonLocalPointerInfo::iterator It =
+ NonLocalPointerDeps.find(P);
+ if (It == NonLocalPointerDeps.end()) return;
+
+ // Remove all of the entries in the BB->val map. This involves removing
+ // instructions from the reverse map.
+ NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+
+ for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+ Instruction *Target = PInfo[i].getResult().getInst();
+ if (Target == 0) continue; // Ignore non-local dep results.
+ assert(Target->getParent() == PInfo[i].getBB());
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+ }
+
+ // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+ NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep. This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+ // If Ptr isn't really a pointer, just ignore it.
+ if (!Ptr->getType()->isPointerTy()) return;
+ // Flush store info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+ // Flush load info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+ PredCache->clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+ // Walk through the Non-local dependencies, removing this one as the value
+ // for any cached queries.
+ NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+ if (NLDI != NonLocalDeps.end()) {
+ NonLocalDepInfo &BlockMap = NLDI->second.first;
+ for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+ DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+ NonLocalDeps.erase(NLDI);
+ }
+
+ // If we have a cached local dependence query for this instruction, remove it.
+ //
+ LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+ if (LocalDepEntry != LocalDeps.end()) {
+ // Remove us from DepInst's reverse set now that the local dep info is gone.
+ if (Instruction *Inst = LocalDepEntry->second.getInst())
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+ // Remove this local dependency info.
+ LocalDeps.erase(LocalDepEntry);
+ }
+
+ // If we have any cached pointer dependencies on this instruction, remove
+ // them. If the instruction has non-pointer type, then it can't be a pointer
+ // base.
+
+ // Remove it from both the load info and the store info. The instruction
+ // can't be in either of these maps if it is non-pointer.
+ if (RemInst->getType()->isPointerTy()) {
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+ }
+
+ // Loop over all of the things that depend on the instruction we're removing.
+ //
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+ // If we find RemInst as a clobber or Def in any of the maps for other values,
+ // we need to replace its entry with a dirty version of the instruction after
+ // it. If RemInst is a terminator, we use a null dirty value.
+ //
+ // Using a dirty version of the instruction after RemInst saves having to scan
+ // the entire block to get to this point.
+ MemDepResult NewDirtyVal;
+ if (!RemInst->isTerminator())
+ NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+
+ ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+ // RemInst can't be the terminator if it has local stuff depending on it.
+ assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ "Nothing can locally depend on a terminator");
+
+ for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+ E = ReverseDeps.end(); I != E; ++I) {
+ Instruction *InstDependingOnRemInst = *I;
+ assert(InstDependingOnRemInst != RemInst &&
+ "Already removed our local dep info");
+
+ LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+
+ // Make sure to remember that new things depend on NewDepInst.
+ assert(NewDirtyVal.getInst() && "There is no way something else can have "
+ "a local dep on this if it is a terminator!");
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ InstDependingOnRemInst));
+ }
+
+ ReverseLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating the
+ // 'ReverseDeps' reference.
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+ for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+ I != E; ++I) {
+ assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+
+ PerInstNLInfo &INLD = NonLocalDeps[*I];
+ // The information is now dirty!
+ INLD.second = true;
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+ DE = INLD.first.end(); DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NextI = NewDirtyVal.getInst())
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ }
+ }
+
+ ReverseNonLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+ // value in the NonLocalPointerDeps info.
+ ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+ ReverseNonLocalPtrDeps.find(RemInst);
+ if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+ SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+ SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+ E = Set.end(); I != E; ++I) {
+ ValueIsLoadPair P = *I;
+ assert(P.getPointer() != RemInst &&
+ "Already removed NonLocalPointerDeps info for RemInst");
+
+ NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+
+ // The cache is not valid for any specific block anymore.
+ NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+
+ // Update any entries for RemInst to use the instruction after it.
+ for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+ DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+ ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+ }
+
+ // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
+ // subsequent value may invalidate the sortedness.
+ std::sort(NLPDI.begin(), NLPDI.end());
+ }
+
+ ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+
+ while (!ReversePtrDepsToAdd.empty()) {
+ ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+ .insert(ReversePtrDepsToAdd.back().second);
+ ReversePtrDepsToAdd.pop_back();
+ }
+ }
+
+
+ assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+ AA->deleteValue(RemInst);
+ DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+ for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+ E = LocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ assert(I->second.getInst() != D &&
+ "Inst occurs in data structures");
+ }
+
+ for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+ E = NonLocalPointerDeps.end(); I != E; ++I) {
+ assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+ const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+ for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+ II != E; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+ }
+
+ for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+ E = NonLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ const PerInstNLInfo &INLD = I->second;
+ for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+ EE = INLD.first.end(); II != EE; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+ E = ReverseLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+ E = ReverseNonLocalDeps.end();
+ I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseNonLocalPtrDepTy::const_iterator
+ I = ReverseNonLocalPtrDeps.begin(),
+ E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in rev NLPD map");
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+ E = I->second.end(); II != E; ++II)
+ assert(*II != ValueIsLoadPair(D, false) &&
+ *II != ValueIsLoadPair(D, true) &&
+ "Inst occurs in ReverseNonLocalPtrDeps map");
+ }
+
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 000000000000..03415375263a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class ModuleDebugInfoPrinter : public ModulePass {
+ DebugInfoFinder Finder;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ModuleDebugInfoPrinter() : ModulePass(ID) {
+ initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const;
+ };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+ "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+ return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+ Finder.processModule(M);
+ return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+ for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+ E = Finder.compile_unit_end(); I != E; ++I) {
+ O << "Compile Unit: ";
+ DICompileUnit(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+ E = Finder.subprogram_end(); I != E; ++I) {
+ O << "Subprogram: ";
+ DISubprogram(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+ E = Finder.global_variable_end(); I != E; ++I) {
+ O << "GlobalVariable: ";
+ DIGlobalVariable(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.type_begin(),
+ E = Finder.type_end(); I != E; ++I) {
+ O << "Type: ";
+ DIType(*I).print(O);
+ O << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 000000000000..907e9621baed
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// NoAA - This class implements the -no-aa pass, which always returns "I
+ /// don't know" for alias queries. NoAA is unlike other alias analysis
+ /// implementations, in that it does not chain to a previous analysis. As
+ /// such it doesn't follow many of the rules that other alias analyses must.
+ ///
+ struct NoAA : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ NoAA() : ImmutablePass(ID) {
+ initializeNoAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+ virtual void initializePass() {
+ // Note: NoAA does not call InitializeAliasAnalysis because it's
+ // special and does not support chaining.
+ TD = getAnalysisIfAvailable<DataLayout>();
+ }
+
+ virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+ return MayAlias;
+ }
+
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
+ }
+
+ virtual bool pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return false;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ return ModRef;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return ModRef;
+ }
+
+ virtual void deleteValue(Value *V) {}
+ virtual void copyValue(Value *From, Value *To) {}
+ virtual void addEscapingUse(Use &U) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 000000000000..e6af0663feaa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,443 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+ if (isa<PHINode>(Inst) ||
+ isa<GetElementPtrInst>(Inst))
+ return true;
+
+ if (isa<CastInst>(Inst) &&
+ isSafeToSpeculativelyExecute(Inst))
+ return true;
+
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1)))
+ return true;
+
+ // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+ // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+ // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+ return false;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PHITransAddr::dump() const {
+ if (Addr == 0) {
+ dbgs() << "PHITransAddr: null\n";
+ return;
+ }
+ dbgs() << "PHITransAddr: " << *Addr << "\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+#endif
+
+
+static bool VerifySubExpr(Value *Expr,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ // If this is a non-instruction value, there is nothing to do.
+ Instruction *I = dyn_cast<Instruction>(Expr);
+ if (I == 0) return true;
+
+ // If it's an instruction, it is either in Tmp or its operands recursively
+ // are.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return true;
+ }
+
+ // If it isn't in the InstInputs list it is a subexpr incorporated into the
+ // address. Sanity check that it is phi translatable.
+ if (!CanPHITrans(I)) {
+ errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+ errs() << *I << '\n';
+ llvm_unreachable("Either something is missing from InstInputs or "
+ "CanPHITrans is wrong.");
+ }
+
+ // Validate the operands of the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!VerifySubExpr(I->getOperand(i), InstInputs))
+ return false;
+
+ return true;
+}
+
+/// Verify - Check internal consistency of this data structure. If the
+/// structure is valid, it returns true. If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+ if (Addr == 0) return true;
+
+ SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+ if (!VerifySubExpr(Addr, Tmp))
+ return false;
+
+ if (!Tmp.empty()) {
+ errs() << "PHITransAddr contains extra instructions:\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
+ llvm_unreachable("This is unexpected.");
+ }
+
+ // a-ok.
+ return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it. This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+ // If the input value is not an instruction, or if it is not defined in CurBB,
+ // then we don't need to phi translate it.
+ Instruction *Inst = dyn_cast<Instruction>(Addr);
+ return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return;
+
+ // If the instruction is in the InstInputs list, remove it.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return;
+ }
+
+ assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+ // Otherwise, it must have instruction inputs itself. Zap them recursively.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+ RemoveInstInputs(Op, InstInputs);
+ }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ // If this is a non-instruction value, it can't require PHI translation.
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (Inst == 0) return V;
+
+ // Determine whether 'Inst' is an input to our PHI translatable expression.
+ bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+ // Handle inputs instructions if needed.
+ if (isInput) {
+ if (Inst->getParent() != CurBB) {
+ // If it is an input defined in a different block, then it remains an
+ // input.
+ return Inst;
+ }
+
+ // If 'Inst' is defined in this block and is an input that needs to be phi
+ // translated, we need to incorporate the value into the expression or fail.
+
+ // In either case, the instruction itself isn't an input any longer.
+ InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+ // If this is a PHI, go ahead and translate it.
+ if (PHINode *PN = dyn_cast<PHINode>(Inst))
+ return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+ // If this is a non-phi value, and it is analyzable, we can incorporate it
+ // into the expression by making all instruction operands be inputs.
+ if (!CanPHITrans(Inst))
+ return 0;
+
+ // All instruction operands are now inputs (and of course, they may also be
+ // defined in this block, so they may need to be phi translated themselves.
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+ InstInputs.push_back(Op);
+ }
+
+ // Ok, it must be an intermediate result (either because it started that way
+ // or because we just incorporated it into the expression). See if its
+ // operands need to be phi translated, and if so, reconstruct it.
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+ Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+ if (PHIIn == 0) return 0;
+ if (PHIIn == Cast->getOperand(0))
+ return Cast;
+
+ // Find an available version of this cast.
+
+ // Constants are trivial to find.
+ if (Constant *C = dyn_cast<Constant>(PHIIn))
+ return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+ C, Cast->getType()));
+
+ // Otherwise we have to see if a casted version of the incoming pointer
+ // is available. If so, we can use it, otherwise we have to fail.
+ for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+ UI != E; ++UI) {
+ if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+ if (CastI->getOpcode() == Cast->getOpcode() &&
+ CastI->getType() == Cast->getType() &&
+ (!DT || DT->dominates(CastI->getParent(), PredBB)))
+ return CastI;
+ }
+ return 0;
+ }
+
+ // Handle getelementptr with at least one PHI translatable operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ bool AnyChanged = false;
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+ if (GEPOp == 0) return 0;
+
+ AnyChanged |= GEPOp != GEP->getOperand(i);
+ GEPOps.push_back(GEPOp);
+ }
+
+ if (!AnyChanged)
+ return GEP;
+
+ // Simplify the GEP to handle 'gep x, 0' -> x etc.
+ if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ RemoveInstInputs(GEPOps[i], InstInputs);
+
+ return AddAsInput(V);
+ }
+
+ // Scan to see if we have this GEP available.
+ Value *APHIOp = GEPOps[0];
+ for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+ UI != E; ++UI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+ if (GEPI->getType() == GEP->getType() &&
+ GEPI->getNumOperands() == GEPOps.size() &&
+ GEPI->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+ bool Mismatch = false;
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ if (GEPI->getOperand(i) != GEPOps[i]) {
+ Mismatch = true;
+ break;
+ }
+ if (!Mismatch)
+ return GEPI;
+ }
+ }
+ return 0;
+ }
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+ bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+ bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+ if (LHS == 0) return 0;
+
+ // If the PHI translated LHS is an add of a constant, fold the immediates.
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+ if (BOp->getOpcode() == Instruction::Add)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ LHS = BOp->getOperand(0);
+ RHS = ConstantExpr::getAdd(RHS, CI);
+ isNSW = isNUW = false;
+
+ // If the old 'LHS' was an input, add the new 'LHS' as an input.
+ if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+ RemoveInstInputs(BOp, InstInputs);
+ AddAsInput(LHS);
+ }
+ }
+
+ // See if the add simplifies away.
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
+ // If we simplified the operands, the LHS is no longer an input, but Res
+ // is.
+ RemoveInstInputs(LHS, InstInputs);
+ return AddAsInput(Res);
+ }
+
+ // If we didn't modify the add, just return it.
+ if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+ return Inst;
+
+ // Otherwise, see if we have this add available somewhere.
+ for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+ UI != E; ++UI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+ if (BO->getOpcode() == Instruction::Add &&
+ BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+ BO->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(BO->getParent(), PredBB)))
+ return BO;
+ }
+
+ return 0;
+ }
+
+ // Otherwise, we failed.
+ return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes. If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB. This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ assert(Verify() && "Invalid PHITransAddr!");
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+ assert(Verify() && "Invalid PHITransAddr!");
+
+ if (DT) {
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+ if (!DT->dominates(Inst->getParent(), PredBB))
+ Addr = 0;
+ }
+
+ return Addr == 0;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list. This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ unsigned NISize = NewInsts.size();
+
+ // Attempt to PHI translate with insertion.
+ Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+ // If successful, return the new value.
+ if (Addr) return Addr;
+
+ // If not, destroy any intermediate instructions inserted.
+ while (NewInsts.size() != NISize)
+ NewInsts.pop_back_val()->eraseFromParent();
+ return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block. All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+ BasicBlock *PredBB, const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ // See if we have a version of this value already available and dominating
+ // PredBB. If so, there is no need to insert a new instance of it.
+ PHITransAddr Tmp(InVal, TD);
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ return Tmp.getAddr();
+
+ // If we don't have an available version of this value, it must be an
+ // instruction.
+ Instruction *Inst = cast<Instruction>(InVal);
+
+ // Handle cast of PHI translatable value.
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+ Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ // Otherwise insert a cast at the end of PredBB.
+ CastInst *New = CastInst::Create(Cast->getOpcode(),
+ OpVal, InVal->getType(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ NewInsts.push_back(New);
+ return New;
+ }
+
+ // Handle getelementptr with at least one PHI operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ BasicBlock *CurBB = GEP->getParent();
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+ GEPOps.push_back(OpVal);
+ }
+
+ GetElementPtrInst *Result =
+ GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Result->setIsInBounds(GEP->isInBounds());
+ NewInsts.push_back(Result);
+ return Result;
+ }
+
+#if 0
+ // FIXME: This code works, but it is unclear that we actually want to insert
+ // a big chain of computation in order to make a value available in a block.
+ // This needs to be evaluated carefully to consider its cost trade offs.
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+ NewInsts.push_back(Res);
+ return Res;
+ }
+#endif
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 000000000000..30d213b77576
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,521 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner. As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+#include <sstream>
+#include <stack>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+ "path-profile-early-termination", cl::Hidden,
+ cl::desc("In path profiling, insert extra instrumentation to account for "
+ "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+ return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+ return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+ _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+ return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+ _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+ return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+ return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+ return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+ return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+ return(_succEdges.end());
+}
+
+// Returns the number of successor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+ return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+ _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+ removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+ _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+ removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented. If BasicBlock
+// is null then returns "<null>". If BasicBlock has no name, then
+// "<unnamed>" is returned. Intended for use with debug output.
+std::string BallLarusNode::getName() {
+ std::stringstream name;
+
+ if(getBlock() != NULL) {
+ if(getBlock()->hasName()) {
+ std::string tempName(getBlock()->getName());
+ name << tempName.c_str() << " (" << _uid << ")";
+ } else
+ name << "<unnamed> (" << _uid << ")";
+ } else
+ name << "<null> (" << _uid << ")";
+
+ return name.str();
+}
+
+// Removes an edge from an edgeVector. Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+ // TODO: Avoid linear scan by using a set instead
+ for(BLEdgeIterator i = v.begin(),
+ end = v.end();
+ i != end;
+ ++i) {
+ if((*i) == e) {
+ v.erase(i);
+ break;
+ }
+ }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+ return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+ return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+ return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+ _edgeType = type;
+}
+
+// Returns the weight of this edge. Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+ return(_weight);
+}
+
+// Sets the weight of the edge. Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+ _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+ return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+ _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+ return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+ _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+ return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+ _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+ return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+ BLBlockNodeMap inDag;
+ std::stack<BallLarusNode*> dfsStack;
+
+ _root = addNode(&(_function.getEntryBlock()));
+ _exit = addNode(NULL);
+
+ // start search from root
+ dfsStack.push(getRoot());
+
+ // dfs to add each bb into the dag
+ while(dfsStack.size())
+ buildNode(inDag, dfsStack);
+
+ // put in the final edge
+ addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+ ++edge)
+ delete (*edge);
+
+ for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+ ++node)
+ delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+ BallLarusNode* node;
+ std::queue<BallLarusNode*> bfsQueue;
+ bfsQueue.push(getExit());
+
+ while(bfsQueue.size() > 0) {
+ node = bfsQueue.front();
+
+ DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+ bfsQueue.pop();
+ unsigned prevPathNumber = node->getNumberPaths();
+ calculatePathNumbersFrom(node);
+
+ // Check for DAG splitting
+ if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+ // Add new phony edge from the split-node to the DAG's exit
+ BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+ exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+ // Counters to handle the possibility of a multi-graph
+ BasicBlock* oldTarget = 0;
+ unsigned duplicateNumber = 0;
+
+ // Iterate through each successor edge, adding phony edges
+ for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+ if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+ // is this edge a duplicate?
+ if( oldTarget != (*succ)->getTarget()->getBlock() )
+ duplicateNumber = 0;
+
+ // create the new phony edge: root -> succ
+ BallLarusEdge* rootEdge =
+ addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+ rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+ rootEdge->setRealEdge(*succ);
+
+ // split on this edge and reference it's exit/root phony edges
+ (*succ)->setType(BallLarusEdge::SPLITEDGE);
+ (*succ)->setPhonyRoot(rootEdge);
+ (*succ)->setPhonyExit(exitEdge);
+ (*succ)->setWeight(0);
+ }
+ }
+
+ calculatePathNumbersFrom(node);
+ }
+
+ DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+ << node->getNumberPaths() << ".\n");
+
+ if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+ DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+ for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+ pred != end; pred++) {
+ if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+ (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ BallLarusNode* nextNode = (*pred)->getSource();
+ // not yet visited?
+ if(nextNode->getNumberPaths() == 0)
+ bfsQueue.push(nextNode);
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+ return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+ return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+ return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+ return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+ for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+ (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+ BallLarusNode* currentNode = dfsStack.top();
+ BasicBlock* currentBlock = currentNode->getBlock();
+
+ if(currentNode->getColor() != BallLarusNode::WHITE) {
+ // we have already visited this node
+ dfsStack.pop();
+ currentNode->setColor(BallLarusNode::BLACK);
+ } else {
+ // are there any external procedure calls?
+ if( ProcessEarlyTermination ) {
+ for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+ bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+ bbCurrent++ ) {
+ Instruction& instr = *bbCurrent;
+ if( instr.getOpcode() == Instruction::Call ) {
+ BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+ callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+ break;
+ }
+ }
+ }
+
+ TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+ if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
+ isa<ResumeInst>(terminator))
+ addEdge(currentNode, getExit(),0);
+
+ currentNode->setColor(BallLarusNode::GRAY);
+ inDag[currentBlock] = currentNode;
+
+ BasicBlock* oldSuccessor = 0;
+ unsigned duplicateNumber = 0;
+
+ // iterate through this node's successors
+ for(succ_iterator successor = succ_begin(currentBlock),
+ succEnd = succ_end(currentBlock); successor != succEnd;
+ oldSuccessor = *successor, ++successor ) {
+ BasicBlock* succBB = *successor;
+
+ // is this edge a duplicate?
+ if (oldSuccessor == succBB)
+ duplicateNumber++;
+ else
+ duplicateNumber = 0;
+
+ buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+ }
+ }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+ dfsStack, BallLarusNode* currentNode,
+ BasicBlock* succBB, unsigned duplicateCount) {
+ BallLarusNode* succNode = inDag[succBB];
+
+ if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+ // visited node and forward edge
+ addEdge(currentNode, succNode, duplicateCount);
+ } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+ // visited node and back edge
+ DEBUG(dbgs() << "Backedge detected.\n");
+ addBackedge(currentNode, succNode, duplicateCount);
+ } else {
+ BallLarusNode* childNode;
+ // not visited node and forward edge
+ if(succNode) // an unvisited node that is child of a gray node
+ childNode = succNode;
+ else { // an unvisited node that is a child of a an unvisted node
+ childNode = addNode(succBB);
+ inDag[succBB] = childNode;
+ }
+ addEdge(currentNode, childNode, duplicateCount);
+ dfsStack.push(childNode);
+ }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+ if(node == getExit())
+ // The Exit node must be base case
+ node->setNumberPaths(1);
+ else {
+ unsigned sumPaths = 0;
+ BallLarusNode* succNode;
+
+ for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; succ++) {
+ if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+ (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ (*succ)->setWeight(sumPaths);
+ succNode = (*succ)->getTarget();
+
+ if( !succNode->getNumberPaths() )
+ return;
+ sumPaths += succNode->getNumberPaths();
+ }
+
+ node->setNumberPaths(sumPaths);
+ }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+ return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor. Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+ BallLarusNode* newNode = createNode(BB);
+ _nodes.push_back(newNode);
+ return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+ _edges.push_back(newEdge);
+ source->addSuccEdge(newEdge);
+ target->addPredEdge(newEdge);
+ return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+ childEdge->setType(BallLarusEdge::BACKEDGE);
+
+ childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+ childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+ childEdge->getPhonyRoot()->setRealEdge(childEdge);
+ childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+ childEdge->getPhonyExit()->setRealEdge(childEdge);
+ childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+ _backEdges.push_back(childEdge);
+}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 000000000000..bc53221d3176
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,433 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+ class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+ public:
+ PathProfileLoaderPass() : ModulePass(ID) { }
+ ~PathProfileLoaderPass();
+
+ // this pass doesn't change anything (only loads information)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // the full name of the loader pass
+ virtual const char* getPassName() const {
+ return "Path Profiling Information Loader";
+ }
+
+ // required since this pass implements multiple inheritance
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ // entry point to run the pass
+ bool runOnModule(Module &M);
+
+ // pass identification
+ static char ID;
+
+ private:
+ // make a reference table to refer to function by number
+ void buildFunctionRefs(Module &M);
+
+ // process argument info of a program from the input file
+ void handleArgumentInfo();
+
+ // process path number information from the input file
+ void handlePathInfo();
+
+ // array of references to the functions in the module
+ std::vector<Function*> _functions;
+
+ // path profile file handle
+ FILE* _file;
+
+ // path profile file name
+ std::string _filename;
+ };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+ NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+ "path-profile-loader",
+ "Load path profile information from file",
+ false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+ return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+ unsigned duplicateNumber)
+ : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+ double countStdDev, PathProfileInfo* ppi)
+ : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+ return 100 * double(_count) /
+ double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+ unsigned int pathNumber) {
+ BallLarusEdge* best = 0;
+
+ for( BLEdgeIterator next = node->succBegin(),
+ end = node->succEnd(); next != end; next++ ) {
+ if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+ (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+ (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+ (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+ best = *next;
+ }
+
+ return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+ increment -= next->getWeight();
+
+ if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+ next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getTarget() != _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getSource()->getBlock(),
+ next->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+ next->getTarget() == _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getSource() == _ppi->_currentDag->getRoot() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+ increment -= next->getWeight();
+
+ // add block to the block list if it is a real edge
+ if( next->getType() == BallLarusEdge::NORMAL)
+ pbv->push_back (currentNode->getBlock());
+ // make the back edge the last edge since we are at the end
+ else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+ pbv->push_back (currentNode->getBlock());
+ pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+ }
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+ BallLarusNode* root = _ppi->_currentDag->getRoot();
+ BallLarusEdge* edge = getNextEdge(root, _number);
+
+ if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+ return edge->getTarget()->getBlock();
+
+ return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+ if (_currentDag)
+ delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+ // Make sure it exists
+ if (!F) return;
+
+ if (_currentDag)
+ delete _currentDag;
+
+ _currentFunction = F;
+ _currentDag = new BallLarusDag(*F);
+ _currentDag->init();
+ _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+ return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+ return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+ return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+ return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+ return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+ return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+ return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+ for( FunctionPathIterator funcNext = _functionPaths.begin(),
+ funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+ for( ProfilePathIterator pathNext = funcNext->second.begin(),
+ pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+ delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+ // get the filename and setup the module's function references
+ _filename = PathProfileInfoFilename;
+ buildFunctionRefs (M);
+
+ if (!(_file = fopen(_filename.c_str(), "rb"))) {
+ errs () << "error: input '" << _filename << "' file does not exist.\n";
+ return false;
+ }
+
+ ProfilingType profType;
+
+ while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+ switch (profType) {
+ case ArgumentInfo:
+ handleArgumentInfo ();
+ break;
+ case PathInfo:
+ handlePathInfo ();
+ break;
+ default:
+ errs () << "error: bad path profiling file syntax, " << profType << "\n";
+ fclose (_file);
+ return false;
+ }
+ }
+
+ fclose (_file);
+
+ return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+ _functions.push_back(0); // make the 0 index a null pointer
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+ _functions.push_back(F);
+ }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+ // get the argument list's length
+ unsigned savedArgsLength;
+ if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+ errs() << "warning: argument info header/data mismatch\n";
+ return;
+ }
+
+ // allocate a buffer, and get the arguments
+ char* args = new char[savedArgsLength+1];
+ if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+ errs() << "warning: argument info header/data mismatch\n";
+
+ args[savedArgsLength] = '\0';
+ argList = std::string(args);
+ delete [] args; // cleanup dynamic string
+
+ // byte alignment
+ if (savedArgsLength & 3)
+ fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+ // get the number of functions in this profile
+ unsigned functionCount;
+ if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+ errs() << "warning: path info header/data mismatch\n";
+ return;
+ }
+
+ // gather path information for each function
+ for (unsigned i = 0; i < functionCount; i++) {
+ PathProfileHeader pathHeader;
+ if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+ errs() << "warning: bad header for path function info\n";
+ break;
+ }
+
+ Function* f = _functions[pathHeader.fnNumber];
+
+ // dynamically allocate a table to store path numbers
+ PathProfileTableEntry* pathTable =
+ new PathProfileTableEntry[pathHeader.numEntries];
+
+ if( fread(pathTable, sizeof(PathProfileTableEntry),
+ pathHeader.numEntries, _file) != pathHeader.numEntries) {
+ delete [] pathTable;
+ errs() << "warning: path function info header/data mismatch\n";
+ return;
+ }
+
+ // Build a new path for the current function
+ unsigned int totalPaths = 0;
+ for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+ totalPaths += pathTable[j].pathCounter;
+ _functionPaths[f][pathTable[j].pathNumber]
+ = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+ 0, this);
+ }
+
+ _functionPathCounts[f] = totalPaths;
+
+ delete [] pathTable;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// NoProfile PathProfileInfo implementation
+//
+
+namespace {
+ struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoPathProfileInfo() : ImmutablePass(ID) {
+ initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoPathProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+ "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 000000000000..48d7d05d788f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,206 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class PathProfileVerifier : public ModulePass {
+ private:
+ bool runOnModule(Module &M);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfileVerifier() : ModulePass(ID) {
+ initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+
+ virtual const char *getPassName() const {
+ return "Path Profiler Verifier";
+ }
+
+ // The verifier requires the path profile and edge profile.
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+ };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+ cl::init("edgefrompath.llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Edge profile file generated by -path-profile-verifier"),
+ cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+ "Compare the path profile derived edge profile against the "
+ "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+ return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<PathProfileInfo>();
+ AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+ PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+ // setup a data structure to map path edges which index an
+ // array of edge counters
+ NestedBlockToIndexMap arrayMap;
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ unsigned duplicate = 0;
+ BasicBlock* prev = 0;
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+ prev = TI->getSuccessor(s), ++s) {
+ if (prev == TI->getSuccessor(s))
+ duplicate++;
+ else duplicate = 0;
+
+ arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+ }
+ }
+ }
+
+ std::vector<unsigned> edgeArray(i);
+
+ // iterate through each path and increment the edge counters as needed
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ pathProfileInfo.setCurrentFunction(F);
+
+ DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+ << pathProfileInfo.pathsRun()
+ << "/" << pathProfileInfo.getPotentialPathCount()
+ << " potential paths\n");
+
+ for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+ endPath = pathProfileInfo.pathEnd();
+ nextPath != endPath; nextPath++ ) {
+ ProfilePath* currentPath = nextPath->second;
+
+ ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+ DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+ << currentPath->getCount() << "\n");
+ // setup the entry edge (normally path profiling doesn't care about this)
+ if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+ edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
+ += currentPath->getCount();
+
+ for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+ endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+ if (nextEdge != pev->begin())
+ DEBUG(dbgs() << " :: ");
+
+ BasicBlock* source = nextEdge->getSource();
+ BasicBlock* target = nextEdge->getTarget();
+ unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+ DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
+ << "}--> " << target->getName());
+
+ // Ensure all the referenced edges exist
+ // TODO: make this a separate function
+ if( !arrayMap.count(source) ) {
+ errs() << " error [" << F->getName() << "()]: source '"
+ << source->getName()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source].count(target) ) {
+ errs() << " error [" << F->getName() << "()]: target '"
+ << target->getName()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+ errs() << " error [" << F->getName() << "()]: edge "
+ << source->getName() << " -> " << target->getName()
+ << " duplicate number " << duplicateNumber
+ << " does not exist in the array map.\n";
+ } else {
+ edgeArray[arrayMap[source][target][duplicateNumber]]
+ += currentPath->getCount();
+ }
+ }
+
+ DEBUG(errs() << "\n");
+
+ delete pev;
+ }
+ }
+
+ std::string errorInfo;
+ std::string filename = EdgeProfileFilename;
+
+ // Open a handle to the file
+ FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+ if (!edgeFile) {
+ errs() << "error: unable to open file '" << filename << "' for output.\n";
+ return false;
+ }
+
+ errs() << "Generating edge profile '" << filename << "' ...\n";
+
+ // write argument info
+ unsigned type = ArgumentInfo;
+ unsigned num = pathProfileInfo.argList.size();
+ int zeros = 0;
+
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+ fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+ if (num&3)
+ fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+ type = EdgeInfo;
+ num = edgeArray.size();
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+ // write each edge to the file
+ for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+ e = edgeArray.end(); s != e; s++)
+ fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+ fclose (edgeFile);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
new file mode 100644
index 000000000000..96804a01edc6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,51 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+ "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+ delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+ DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+ return new PostDominatorTree();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp
new file mode 100644
index 000000000000..d7f444b4b6d7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp
@@ -0,0 +1,155 @@
+//===- ProfileDataLoader.cpp - Load profile information from disk ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileDataLoader class is used to load raw profiling data from the dump
+// file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileDataLoader.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/ProfileDataTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *,
+ const BasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first->getName();
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second->getName();
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one
+/// (or both) may not be defined.
+static unsigned AddCounts(unsigned A, unsigned B) {
+ // If either value is undefined, use the other.
+ // Undefined + undefined = undefined.
+ if (A == ProfileDataLoader::Uncounted) return B;
+ if (B == ProfileDataLoader::Uncounted) return A;
+
+ return A + B;
+}
+
+/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F'
+template <typename T>
+static void ReadProfilingData(const char *ToolName, FILE *F,
+ T *Data, size_t NumEntries) {
+ // Read in the block of data...
+ if (fread(Data, sizeof(T), NumEntries, F) != NumEntries)
+ report_fatal_error(Twine(ToolName) + ": Profiling data truncated");
+}
+
+/// ReadProfilingNumEntries - Read how many entries are in this profiling data
+/// packet.
+static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F,
+ bool ShouldByteSwap) {
+ unsigned Entry;
+ ReadProfilingData<unsigned>(ToolName, F, &Entry, 1);
+ return ShouldByteSwap ? ByteSwap_32(Entry) : Entry;
+}
+
+/// ReadProfilingBlock - Read the number of entries in the next profiling data
+/// packet and then accumulate the entries into 'Data'.
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ SmallVector<unsigned, 32> &Data) {
+ // Read the number of entries...
+ unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
+
+ // Read in the data.
+ SmallVector<unsigned, 8> TempSpace(NumEntries);
+ ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries);
+
+ // Make sure we have enough space ...
+ if (Data.size() < NumEntries)
+ Data.resize(NumEntries, ProfileDataLoader::Uncounted);
+
+ // Accumulate the data we just read into the existing data.
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i];
+ Data[i] = AddCounts(Entry, Data[i]);
+ }
+}
+
+/// ReadProfilingArgBlock - Read the command line arguments that the progam was
+/// run with when the current profiling data packet(s) were generated.
+static void ReadProfilingArgBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ SmallVector<std::string, 1> &CommandLines) {
+ // Read the number of bytes ...
+ unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
+
+ // Read in the arguments (if there are any to read). Round up the length to
+ // the nearest 4-byte multiple.
+ SmallVector<char, 8> Args(ArgLength+4);
+ if (ArgLength)
+ ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3);
+
+ // Store the arguments.
+ CommandLines.push_back(std::string(&Args[0], &Args[ArgLength]));
+}
+
+const unsigned ProfileDataLoader::Uncounted = ~0U;
+
+/// ProfileDataLoader ctor - Read the specified profiling data file, reporting
+/// a fatal error if the file is invalid or broken.
+ProfileDataLoader::ProfileDataLoader(const char *ToolName,
+ const std::string &Filename)
+ : Filename(Filename) {
+ FILE *F = fopen(Filename.c_str(), "rb");
+ if (F == 0)
+ report_fatal_error(Twine(ToolName) + ": Error opening '" +
+ Filename + "': ");
+
+ // Keep reading packets until we run out of them.
+ unsigned PacketType;
+ while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+ // If the low eight bits of the packet are zero, we must be dealing with an
+ // endianness mismatch. Byteswap all words read from the profiling
+ // information. This can happen when the compiler host and target have
+ // different endianness.
+ bool ShouldByteSwap = (char)PacketType == 0;
+ PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType;
+
+ switch (PacketType) {
+ case ArgumentInfo:
+ ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines);
+ break;
+
+ case EdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+ break;
+
+ default:
+ report_fatal_error(std::string(ToolName)
+ + ": Unknown profiling packet type");
+ break;
+ }
+ }
+
+ fclose(F);
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp
new file mode 100644
index 000000000000..2ee0093a8f57
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp
@@ -0,0 +1,188 @@
+//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loads profiling data from a dump file and sets branch weight
+// metadata.
+//
+// TODO: Replace all "profile-metadata-loader" strings with "profile-loader"
+// once ProfileInfo etc. has been removed.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-metadata-loader"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileDataLoader.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated.");
+
+static cl::opt<std::string>
+ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -profile-metadata-loader"));
+
+namespace {
+ /// This pass loads profiling data from a dump file and sets branch weight
+ /// metadata.
+ class ProfileMetadataLoaderPass : public ModulePass {
+ std::string Filename;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit ProfileMetadataLoaderPass(const std::string &filename = "")
+ : ModulePass(ID), Filename(filename) {
+ initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry());
+ if (filename.empty()) Filename = ProfileMetadataFilename;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profile loader";
+ }
+
+ virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge,
+ ArrayRef<unsigned>);
+ virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>);
+ virtual void setBranchWeightMetadata(Module&, ProfileData&);
+
+ virtual bool runOnModule(Module &M);
+ };
+} // End of anonymous namespace
+
+char ProfileMetadataLoaderPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader",
+ "Load profile information from llvmprof.out", false, true)
+INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader",
+ "Load profile information from llvmprof.out", false, true)
+
+char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID;
+
+/// createProfileMetadataLoaderPass - This function returns a Pass that loads
+/// the profiling information for the module from the specified filename,
+/// making it available to the optimizers.
+ModulePass *llvm::createProfileMetadataLoaderPass() {
+ return new ProfileMetadataLoaderPass();
+}
+ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) {
+ return new ProfileMetadataLoaderPass(Filename);
+}
+
+/// readEdge - Take the value from a profile counter and assign it to an edge.
+void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount,
+ ProfileData &PB, ProfileData::Edge e,
+ ArrayRef<unsigned> Counters) {
+ if (ReadCount >= Counters.size()) return;
+
+ unsigned weight = Counters[ReadCount];
+ assert(weight != ProfileDataLoader::Uncounted);
+ PB.addEdgeWeight(e, weight);
+
+ DEBUG(dbgs() << "-- Read Edge Counter for " << e
+ << " (# "<< (ReadCount) << "): "
+ << PB.getEdgeWeight(e) << "\n");
+}
+
+/// matchEdges - Link every profile counter with an edge.
+unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB,
+ ArrayRef<unsigned> Counters) {
+ if (Counters.size() == 0) return 0;
+
+ unsigned ReadCount = 0;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n");
+ readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)),
+ Counters);
+ }
+ }
+ }
+
+ return ReadCount;
+}
+
+/// setBranchWeightMetadata - Translate the counter values associated with each
+/// edge into branch weights for each conditional branch (a branch with 2 or
+/// more desinations).
+void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M,
+ ProfileData &PB) {
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n");
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ unsigned NumSuccessors = TI->getNumSuccessors();
+
+ // If there is only one successor then we can not set a branch
+ // probability as the target is certain.
+ if (NumSuccessors < 2) continue;
+
+ // Load the weights of all edges leading from this terminator.
+ DEBUG(dbgs() << "-- Terminator with " << NumSuccessors
+ << " successors:\n");
+ SmallVector<uint32_t, 4> Weights(NumSuccessors);
+ for (unsigned s = 0 ; s < NumSuccessors ; ++s) {
+ ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s));
+ Weights[s] = (uint32_t)PB.getEdgeWeight(edge);
+ DEBUG(dbgs() << "---- Edge '" << edge << "' has weight "
+ << Weights[s] << "\n");
+ }
+
+ // Set branch weight metadata. This will set branch probabilities of
+ // 100%/0% if that is true of the dynamic execution.
+ // BranchProbabilityInfo can account for this when it loads this metadata
+ // (it gives the unexectuted branch a weight of 1 for the purposes of
+ // probability calculations).
+ MDBuilder MDB(TI->getContext());
+ MDNode *Node = MDB.createBranchWeights(Weights);
+ TI->setMetadata(LLVMContext::MD_prof, Node);
+ NumTermsAnnotated++;
+ }
+ }
+}
+
+bool ProfileMetadataLoaderPass::runOnModule(Module &M) {
+ ProfileDataLoader PDL("profile-data-loader", Filename);
+ ProfileData PB;
+
+ ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts();
+
+ unsigned ReadCount = matchEdges(M, PB, Counters);
+
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+
+ setBranchWeightMetadata(M, PB);
+
+ return ReadCount > 0;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 000000000000..b284b995ac78
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,426 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+ "profile-estimator-loop-weight", cl::init(10),
+ cl::value_desc("loop-weight"),
+ cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+ class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
+ double ExecCount;
+ LoopInfo *LI;
+ std::set<BasicBlock*> BBToVisit;
+ std::map<Loop*,double> LoopExitWeights;
+ std::map<Edge,double> MinimalWeight;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit ProfileEstimatorPass(const double execcount = 0)
+ : FunctionPass(ID), ExecCount(execcount) {
+ initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
+ if (execcount == 0) ExecCount = LoopWeight;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information estimator";
+ }
+
+ /// run - Estimate the profile information from the specified file.
+ virtual bool runOnFunction(Function &F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual void recurseBasicBlock(BasicBlock *BB);
+
+ void inline printEdgeWeight(Edge);
+ };
+} // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+
+namespace llvm {
+ char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
+
+ FunctionPass *createProfileEstimatorPass() {
+ return new ProfileEstimatorPass();
+ }
+
+ /// createProfileEstimatorPass - This function returns a Pass that estimates
+ /// profiling information using the given loop execution count.
+ Pass *createProfileEstimatorPass(const unsigned execcount) {
+ return new ProfileEstimatorPass(execcount);
+ }
+}
+
+static double ignoreMissing(double w) {
+ if (w == ProfileInfo::MissingValue) return 0;
+ return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+ DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+ DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
+ << format("%20.20g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+ // Break the recursion if this BasicBlock was already visited.
+ if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+ // Read the LoopInfo for this block.
+ bool BBisHeader = LI->isLoopHeader(BB);
+ Loop* BBLoop = LI->getLoopFor(BB);
+
+ // To get the block weight, read all incoming edges.
+ double BBWeight = 0;
+ std::set<BasicBlock*> ProcessedPreds;
+ for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ // If this block was not considered already, add weight.
+ Edge edge = getEdge(*bbi,BB);
+ double w = getEdgeWeight(edge);
+ if (ProcessedPreds.insert(*bbi).second) {
+ BBWeight += ignoreMissing(w);
+ }
+ // If this block is a loop header and the predecessor is contained in this
+ // loop, thus the edge is a backedge, continue and do not check if the
+ // value is valid.
+ if (BBisHeader && BBLoop->contains(*bbi)) {
+ printEdgeError(edge, "but is backedge, continuing");
+ continue;
+ }
+ // If the edges value is missing (and this is no loop header, and this is
+ // no backedge) return, this block is currently non estimatable.
+ if (w == MissingValue) {
+ printEdgeError(edge, "returning");
+ return;
+ }
+ }
+ if (getExecutionCount(BB) != MissingValue) {
+ BBWeight = getExecutionCount(BB);
+ }
+
+ // Fetch all necessary information for current block.
+ SmallVector<Edge, 8> ExitEdges;
+ SmallVector<Edge, 8> Edges;
+ if (BBLoop) {
+ BBLoop->getExitEdges(ExitEdges);
+ }
+
+ // If this is a loop header, consider the following:
+ // Exactly the flow that is entering this block, must exit this block too. So
+ // do the following:
+ // *) get all the exit edges, read the flow that is already leaving this
+ // loop, remember the edges that do not have any flow on them right now.
+ // (The edges that have already flow on them are most likely exiting edges of
+ // other loops, do not touch those flows because the previously caclulated
+ // loopheaders would not be exact anymore.)
+ // *) In case there is not a single exiting edge left, create one at the loop
+ // latch to prevent the flow from building up in the loop.
+ // *) Take the flow that is not leaving the loop already and distribute it on
+ // the remaining exiting edges.
+ // (This ensures that all flow that enters the loop also leaves it.)
+ // *) Increase the flow into the loop by increasing the weight of this block.
+ // There is at least one incoming backedge that will bring us this flow later
+ // on. (So that the flow condition in this node is valid again.)
+ if (BBisHeader) {
+ double incoming = BBWeight;
+ // Subtract the flow leaving the loop.
+ std::set<Edge> ProcessedExits;
+ for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+ ee = ExitEdges.end(); ei != ee; ++ei) {
+ if (ProcessedExits.insert(*ei).second) {
+ double w = getEdgeWeight(*ei);
+ if (w == MissingValue) {
+ Edges.push_back(*ei);
+ // Check if there is a necessary minimal weight, if yes, subtract it
+ // from weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ incoming -= MinimalWeight[*ei];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ } else {
+ incoming -= w;
+ }
+ }
+ }
+ // If no exit edges, create one:
+ if (Edges.size() == 0) {
+ BasicBlock *Latch = BBLoop->getLoopLatch();
+ if (Latch) {
+ Edge edge = getEdge(Latch,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ edge = getEdge(Latch, BB);
+ EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+ printEdgeWeight(edge);
+ }
+ }
+
+ // Distribute remaining weight to the exting edges. To prevent fractions
+ // from building up and provoking precision problems the weight which is to
+ // be distributed is split and the rounded, the last edge gets a somewhat
+ // bigger value, but we are close enough for an estimation.
+ double fraction = floor(incoming/Edges.size());
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ double w = 0;
+ if (ei != (ee-1)) {
+ w = fraction;
+ incoming -= fraction;
+ } else {
+ w = incoming;
+ }
+ EdgeInformation[BB->getParent()][*ei] += w;
+ // Read necessary minimal weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+
+ // Add minimal weight to paths to all exit edges, this is used to ensure
+ // that enough flow is reaching this edges.
+ Path p;
+ const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+ while (Dest != BB) {
+ const BasicBlock *Parent = p.find(Dest)->second;
+ Edge e = getEdge(Parent, Dest);
+ if (MinimalWeight.find(e) == MinimalWeight.end()) {
+ MinimalWeight[e] = 0;
+ }
+ MinimalWeight[e] += w;
+ DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+ Dest = Parent;
+ }
+ }
+ // Increase flow into the loop.
+ BBWeight *= (ExecCount+1);
+ }
+
+ BlockInformation[BB->getParent()][BB] = BBWeight;
+ // Up until now we considered only the loop exiting edges, now we have a
+ // definite block weight and must distribute this onto the outgoing edges.
+ // Since there may be already flow attached to some of the edges, read this
+ // flow first and remember the edges that have still now flow attached.
+ Edges.clear();
+ std::set<BasicBlock*> ProcessedSuccs;
+
+ succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // Also check for (BB,0) edges that may already contain some flow. (But only
+ // in case there are no successors.)
+ if (bbi == bbe) {
+ Edge edge = getEdge(BB,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ }
+ for ( ; bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ Edge edge = getEdge(BB,*bbi);
+ double w = getEdgeWeight(edge);
+ if (w != MissingValue) {
+ BBWeight -= getEdgeWeight(edge);
+ } else {
+ Edges.push_back(edge);
+ // If minimal weight is necessary, reserve weight by subtracting weight
+ // from block weight, this is readded later on.
+ if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+ BBWeight -= MinimalWeight[edge];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+ }
+ }
+ }
+ }
+
+ double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
+ // Finally we know what flow is still not leaving the block, distribute this
+ // flow onto the empty edges.
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ if (ei != (ee-1)) {
+ EdgeInformation[BB->getParent()][*ei] += fraction;
+ BBWeight -= fraction;
+ } else {
+ EdgeInformation[BB->getParent()][*ei] += BBWeight;
+ }
+ // Readd minial necessary weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+ }
+
+ // This block is visited, mark this before the recursion.
+ BBToVisit.erase(BB);
+
+ // Recurse into successors.
+ for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+
+ // Fetch LoopInfo and clear ProfileInfo for this function.
+ LI = &getAnalysis<LoopInfo>();
+ FunctionInformation.erase(&F);
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ BBToVisit.clear();
+
+ // Mark all blocks as to visit.
+ for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+ BBToVisit.insert(bi);
+
+ // Clear Minimal Edges.
+ MinimalWeight.clear();
+
+ DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
+
+ // Since the entry block is the first one and has no predecessors, the edge
+ // (0,entry) is inserted with the starting weight of 1.
+ BasicBlock *entry = &F.getEntryBlock();
+ BlockInformation[&F][entry] = pow(2.0, 32.0);
+ Edge edge = getEdge(0,entry);
+ EdgeInformation[&F][edge] = BlockInformation[&F][entry];
+ printEdgeWeight(edge);
+
+ // Since recurseBasicBlock() maybe returns with a block which was not fully
+ // estimated, use recurseBasicBlock() until everything is calculated.
+ bool cleanup = false;
+ recurseBasicBlock(entry);
+ while (BBToVisit.size() > 0 && !cleanup) {
+ // Remember number of open blocks, this is later used to check if progress
+ // was made.
+ unsigned size = BBToVisit.size();
+
+ // Try to calculate all blocks in turn.
+ for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+ be = BBToVisit.end(); bi != be; ++bi) {
+ recurseBasicBlock(*bi);
+ // If at least one block was finished, break because iterator may be
+ // invalid.
+ if (BBToVisit.size() < size) break;
+ }
+
+ // If there was not a single block resolved, make some assumptions.
+ if (BBToVisit.size() == size) {
+ bool found = false;
+ for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
+ (BBI != BBE) && (!found); ++BBI) {
+ BasicBlock *BB = *BBI;
+ // Try each predecessor if it can be assumend.
+ for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ (bbi != bbe) && (!found); ++bbi) {
+ Edge e = getEdge(*bbi,BB);
+ double w = getEdgeWeight(e);
+ // Check that edge from predecessor is still free.
+ if (w == MissingValue) {
+ // Check if there is a circle from this block to predecessor.
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+ if (Dest != *bbi) {
+ // If there is no circle, just set edge weight to 0
+ EdgeInformation[&F][e] = 0;
+ DEBUG(dbgs() << "Assuming edge weight: ");
+ printEdgeWeight(e);
+ found = true;
+ }
+ }
+ }
+ }
+ if (!found) {
+ cleanup = true;
+ DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+ }
+ }
+ }
+ // In case there was no safe way to assume edges, set as a last measure,
+ // set _everything_ to zero.
+ if (cleanup) {
+ FunctionInformation[&F] = 0;
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ const BasicBlock *BB = &(*FI);
+ BlockInformation[&F][BB] = 0;
+ const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
+ if (predi == prede) {
+ Edge e = getEdge(0,BB);
+ setEdgeWeight(e,0);
+ }
+ for (;predi != prede; ++predi) {
+ Edge e = getEdge(*predi,BB);
+ setEdgeWeight(e,0);
+ }
+ succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+ if (succi == succe) {
+ Edge e = getEdge(BB,0);
+ setEdgeWeight(e,0);
+ }
+ for (;succi != succe; ++succi) {
+ Edge e = getEdge(*succi,BB);
+ setEdgeWeight(e,0);
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 000000000000..9626a48b9d0d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,1079 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-info"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include <limits>
+#include <queue>
+#include <set>
+using namespace llvm;
+
+namespace llvm {
+ template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
+
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+ MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+ if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
+
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
+
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(BB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ double Count = MissingValue;
+
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // Are there zero predecessors of this block?
+ if (PI == PE) {
+ Edge e = getEdge(0, BB);
+ Count = getEdgeWeight(e);
+ } else {
+ // Otherwise, if there are predecessors, the execution count of this block is
+ // the sum of the edge frequencies from the incoming edges.
+ std::set<const BasicBlock*> ProcessedPreds;
+ Count = 0;
+ for (; PI != PE; ++PI) {
+ const BasicBlock *P = *PI;
+ if (ProcessedPreds.insert(P).second) {
+ double w = getEdgeWeight(getEdge(P, BB));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ // If the predecessors did not suffice to get block weight, try successors.
+ if (Count == MissingValue) {
+
+ succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+ // Are there zero successors of this block?
+ if (SI == SE) {
+ Edge e = getEdge(BB,0);
+ Count = getEdgeWeight(e);
+ } else {
+ std::set<const BasicBlock*> ProcessedSuccs;
+ Count = 0;
+ for (; SI != SE; ++SI)
+ if (ProcessedSuccs.insert(*SI).second) {
+ double w = getEdgeWeight(getEdge(BB, *SI));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineBasicBlock *MBB) {
+ std::map<const MachineFunction*, BlockCounts>::iterator J =
+ BlockInformation.find(MBB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(MBB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
+ std::map<const Function*, double>::iterator J =
+ FunctionInformation.find(F);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ // isDeclaration() is checked here and not at start of function to allow
+ // functions without a body still to have a execution count.
+ if (F->isDeclaration()) return MissingValue;
+
+ double Count = getExecutionCount(&F->getEntryBlock());
+ if (Count != MissingValue) FunctionInformation[F] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineFunction *MF) {
+ std::map<const MachineFunction*, double>::iterator J =
+ FunctionInformation.find(MF);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ double Count = getExecutionCount(&MF->front());
+ if (Count != MissingValue) FunctionInformation[MF] = Count;
+ return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ setExecutionCount(const BasicBlock *BB, double w) {
+ DEBUG(dbgs() << "Creating Block " << BB->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ setExecutionCount(const MachineBasicBlock *MBB, double w) {
+ DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+ double oldw = getEdgeWeight(e);
+ assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+ DEBUG(dbgs() << "Adding to Edge " << e
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ addExecutionCount(const BasicBlock *BB, double w) {
+ double oldw = getExecutionCount(BB);
+ assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+ DEBUG(dbgs() << "Adding to Block " << BB->getName()
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J == BlockInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
+ J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(getFunction(e));
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting" << e << "\n");
+ J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceEdge(const Edge &oldedge, const Edge &newedge) {
+ double w;
+ if ((w = getEdgeWeight(newedge)) == MissingValue) {
+ w = getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
+ } else {
+ w += getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
+ }
+ setEdgeWeight(newedge,w);
+ removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+ GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+ Path &P, unsigned Mode) {
+ const BasicBlock *BB = 0;
+ bool hasFoundPath = false;
+
+ std::queue<const BasicBlock *> BFS;
+ BFS.push(Src);
+
+ while(BFS.size() && !hasFoundPath) {
+ BB = BFS.front();
+ BFS.pop();
+
+ succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ if (Succ == End) {
+ P[(const BasicBlock*)0] = BB;
+ if (Mode & GetPathToExit) {
+ hasFoundPath = true;
+ BB = 0;
+ }
+ }
+ for(;Succ != End; ++Succ) {
+ if (P.find(*Succ) != P.end()) continue;
+ Edge e = getEdge(BB,*Succ);
+ if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+ P[*Succ] = BB;
+ BFS.push(*Succ);
+ if ((Mode & GetPathToDest) && *Succ == Dest) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ }
+ }
+
+ return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ divertFlow(const Edge &oldedge, const Edge &newedge) {
+ DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
+
+ // First check if the old edge was taken, if not, just delete it...
+ if (getEdgeWeight(oldedge) == 0) {
+ removeEdge(oldedge);
+ return;
+ }
+
+ Path P;
+ P[newedge.first] = 0;
+ P[newedge.second] = newedge.first;
+ const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+ double w = getEdgeWeight (oldedge);
+ DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
+ do {
+ const BasicBlock *Parent = P.find(BB)->second;
+ Edge e = getEdge(Parent,BB);
+ double oldw = getEdgeWeight(e);
+ double oldc = getExecutionCount(e.first);
+ setEdgeWeight(e, w+oldw);
+ if (Parent != oldedge.first) {
+ setExecutionCount(e.first, w+oldc);
+ }
+ BB = Parent;
+ } while (BB != newedge.first);
+ removeEdge(oldedge);
+}
+
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurrences of RmBB with DestBB.
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+ DEBUG(dbgs() << "Replacing " << RmBB->getName()
+ << " with " << DestBB->getName() << "\n");
+ const Function *F = DestBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ Edge e, newedge;
+ bool erasededge = false;
+ EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+ while(I != E) {
+ e = (I++)->first;
+ bool foundedge = false; bool eraseedge = false;
+ if (e.first == RmBB) {
+ if (e.second == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(DestBB, e.second);
+ foundedge = true;
+ }
+ }
+ if (e.second == RmBB) {
+ if (e.first == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(e.first, DestBB);
+ foundedge = true;
+ }
+ }
+ if (foundedge) {
+ replaceEdge(e, newedge);
+ }
+ if (eraseedge) {
+ if (erasededge) {
+ Edge newedge = getEdge(DestBB, DestBB);
+ replaceEdge(e, newedge);
+ } else {
+ removeEdge(e);
+ erasededge = true;
+ }
+ }
+ }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+ const BasicBlock *SecondBB,
+ const BasicBlock *NewBB,
+ bool MergeIdenticalEdges) {
+ const Function *F = FirstBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ // Generate edges and read current weight.
+ Edge e = getEdge(FirstBB, SecondBB);
+ Edge n1 = getEdge(FirstBB, NewBB);
+ Edge n2 = getEdge(NewBB, SecondBB);
+ EdgeWeights &ECs = J->second;
+ double w = ECs[e];
+
+ int succ_count = 0;
+ if (!MergeIdenticalEdges) {
+ // First count the edges from FristBB to SecondBB, if there is more than
+ // one, only slice out a proporional part for NewBB.
+ for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+ BBI != BBE; ++BBI) {
+ if (*BBI == SecondBB) succ_count++;
+ }
+ // When the NewBB is completely new, increment the count by one so that
+ // the counts are properly distributed.
+ if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+ } else {
+ // When the edges are merged anyway, then redirect all flow.
+ succ_count = 1;
+ }
+
+ // We know now how many edges there are from FirstBB to SecondBB, reroute a
+ // proportional part of the edge weight over NewBB.
+ double neww = floor(w / succ_count);
+ ECs[n1] += neww;
+ ECs[n2] += neww;
+ BlockInformation[F][NewBB] += neww;
+ if (succ_count == 1) {
+ ECs.erase(e);
+ } else {
+ ECs[e] -= neww;
+ }
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+ const BasicBlock* New) {
+ const Function *F = Old->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+ std::set<Edge> Edges;
+ for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
+ ewi != ewe; ++ewi) {
+ Edge old = ewi->first;
+ if (old.first == Old) {
+ Edges.insert(old);
+ }
+ }
+ for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
+ EI != EE; ++EI) {
+ Edge newedge = getEdge(New, EI->second);
+ replaceEdge(*EI, newedge);
+ }
+
+ double w = getExecutionCount(Old);
+ setEdgeWeight(getEdge(Old, New), w);
+ setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+ const BasicBlock* NewBB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds) {
+ const Function *F = BB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
+ << " to " << NewBB->getName() << "\n");
+
+ // Collect weight that was redirected over NewBB.
+ double newweight = 0;
+
+ std::set<const BasicBlock *> ProcessedPreds;
+ // For all requestes Predecessors.
+ for (unsigned pred = 0; pred < NumPreds; ++pred) {
+ const BasicBlock * Pred = Preds[pred];
+ if (ProcessedPreds.insert(Pred).second) {
+ // Create edges and read old weight.
+ Edge oldedge = getEdge(Pred, BB);
+ Edge newedge = getEdge(Pred, NewBB);
+
+ // Remember how much weight was redirected.
+ newweight += getEdgeWeight(oldedge);
+
+ replaceEdge(oldedge,newedge);
+ }
+ }
+
+ Edge newedge = getEdge(NewBB,BB);
+ setEdgeWeight(newedge, newweight);
+ setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+ const Function *New) {
+ DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
+ << New->getName() << "\n");
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(Old);
+ if(J != EdgeInformation.end()) {
+ EdgeInformation[New] = J->second;
+ }
+ EdgeInformation.erase(Old);
+ BlockInformation.erase(Old);
+ FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+ ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+ if (w == ProfileInfo::MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ return 0;
+ } else {
+ return w;
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+ CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+ bool assumeEmptySelf) {
+ Edge edgetocalc;
+ unsigned uncalculated = 0;
+
+ // collect weights of all incoming and outgoing edges, rememer edges that
+ // have no value
+ double incount = 0;
+ SmallSet<const BasicBlock*,8> pred_visited;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi==bbe) {
+ Edge e = getEdge(0,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (pred_visited.insert(*bbi)) {
+ Edge e = getEdge(*bbi,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ }
+
+ double outcount = 0;
+ SmallSet<const BasicBlock*,8> succ_visited;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi==sbbe) {
+ Edge e = getEdge(BB,0);
+ if (getEdgeWeight(e) == MissingValue) {
+ double w = getExecutionCount(BB);
+ if (w != MissingValue) {
+ setEdgeWeight(e,w);
+ removed = e;
+ }
+ }
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ for (;sbbi != sbbe; ++sbbi) {
+ if (succ_visited.insert(*sbbi)) {
+ Edge e = getEdge(BB,*sbbi);
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ }
+
+ // if exactly one edge weight was missing, calculate it and remove it from
+ // spanning tree
+ if (uncalculated == 0 ) {
+ return true;
+ } else
+ if (uncalculated == 1) {
+ if (incount < outcount) {
+ EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+ } else {
+ EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+ }
+ DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
+ << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+ removed = edgetocalc;
+ return true;
+ } else
+ if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+ setEdgeWeight(edgetocalc, incount * 10);
+ removed = edgetocalc;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+ double w = PI->getEdgeWeight(e);
+ if (w != ProfileInfo::MissingValue) {
+ calcw += w;
+ } else {
+ misscount.insert(e);
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+ double inWeight = 0;
+ std::set<Edge> inMissing;
+ std::set<const BasicBlock*> ProcessedPreds;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi == bbe) {
+ readEdge(this,getEdge(0,BB),inWeight,inMissing);
+ }
+ for( ; bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+ }
+ }
+
+ double outWeight = 0;
+ std::set<Edge> outMissing;
+ std::set<const BasicBlock*> ProcessedSuccs;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi == sbbe)
+ readEdge(this,getEdge(BB,0),outWeight,outMissing);
+ for ( ; sbbi != sbbe; ++sbbi ) {
+ if (ProcessedSuccs.insert(*sbbi).second) {
+ readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+ }
+ }
+
+ double share;
+ std::set<Edge>::iterator ei,ee;
+ if (inMissing.size() == 0 && outMissing.size() > 0) {
+ ei = outMissing.begin();
+ ee = outMissing.end();
+ share = inWeight/outMissing.size();
+ setExecutionCount(BB,inWeight);
+ } else
+ if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+ ei = inMissing.begin();
+ ee = inMissing.end();
+ share = 0;
+ setExecutionCount(BB,0);
+ } else
+ if (inMissing.size() == 0 && outMissing.size() == 0) {
+ setExecutionCount(BB,outWeight);
+ return true;
+ } else {
+ return false;
+ }
+ for ( ; ei != ee; ++ei ) {
+ setEdgeWeight(*ei,share);
+ }
+ return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+// for (Function::const_iterator FI = F->begin(), FE = F->end();
+// FI != FE; ++FI) {
+// const BasicBlock* BB = &(*FI);
+// {
+// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// {
+// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// }
+// return;
+// }
+ // The set of BasicBlocks that are still unvisited.
+ std::set<const BasicBlock*> Unvisited;
+
+ // The set of return edges (Edges with no successors).
+ std::set<Edge> ReturnEdges;
+ double ReturnWeight = 0;
+
+ // First iterate over the whole function and collect:
+ // 1) The blocks in this function in the Unvisited set.
+ // 2) The return edges in the ReturnEdges set.
+ // 3) The flow that is leaving the function already via return edges.
+
+ // Data structure for searching the function.
+ std::queue<const BasicBlock *> BFS;
+ const BasicBlock *BB = &(F->getEntryBlock());
+ BFS.push(BB);
+ Unvisited.insert(BB);
+
+ while (BFS.size()) {
+ BB = BFS.front(); BFS.pop();
+ succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ if (NBB == End) {
+ Edge e = getEdge(BB,0);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ // If the return edge has no value, try to read value from block.
+ double bw = getExecutionCount(BB);
+ if (bw != MissingValue) {
+ setEdgeWeight(e,bw);
+ ReturnWeight += bw;
+ } else {
+ // If both return edge and block provide no value, collect edge.
+ ReturnEdges.insert(e);
+ }
+ } else {
+ // If the return edge has a proper value, collect it.
+ ReturnWeight += w;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (Unvisited.insert(*NBB).second) {
+ BFS.push(*NBB);
+ }
+ }
+ }
+
+ while (Unvisited.size() > 0) {
+ unsigned oldUnvisitedCount = Unvisited.size();
+ bool FoundPath = false;
+
+ // If there is only one edge left, calculate it.
+ if (ReturnEdges.size() == 1) {
+ ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+ Edge e = *ReturnEdges.begin();
+ setEdgeWeight(e,ReturnWeight);
+ setExecutionCount(e.first,ReturnWeight);
+
+ Unvisited.erase(e.first);
+ ReturnEdges.erase(e);
+ continue;
+ }
+
+ // Calculate all blocks where only one edge is missing, this may also
+ // resolve furhter return edges.
+ std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ Edge e;
+ if(CalculateMissingEdge(BB,e,true)) {
+ if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+ setExecutionCount(BB,getExecutionCount(BB));
+ }
+ Unvisited.erase(BB);
+ if (e.first != 0 && e.second == 0) {
+ ReturnEdges.erase(e);
+ ReturnWeight += getEdgeWeight(e);
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Estimate edge weights by dividing the flow proportionally.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ bool AllEdgesHaveSameReturn = true;
+ // Check each Successor, these must all end up in the same or an empty
+ // return block otherwise its dangerous to do an estimation on them.
+ for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ Succ != End; ++Succ) {
+ Path P;
+ GetPath(*Succ, 0, P, GetPathToExit);
+ if (Dest && Dest != P[(const BasicBlock*)0]) {
+ AllEdgesHaveSameReturn = false;
+ }
+ Dest = P[(const BasicBlock*)0];
+ }
+ if (AllEdgesHaveSameReturn) {
+ if(EstimateMissingEdges(BB)) {
+ Unvisited.erase(BB);
+ break;
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Check if there is a path to an block that has a known value and redirect
+ // flow accordingly.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ // Fetch path.
+ const BasicBlock *BB = *FI; ++FI;
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+ // Calculate incoming flow.
+ double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ invalid++;
+ } else {
+ // If the path contains the successor, this means its a backedge,
+ // do not count as missing.
+ if (P.find(*NBB) == P.end())
+ inmissing++;
+ }
+ incount++;
+ }
+ }
+ if (inmissing == incount) continue;
+ if (invalid == 0) continue;
+
+ // Subtract (already) outgoing flow.
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw -= ew;
+ }
+ }
+ }
+ if (iw < 0) continue;
+
+ // Check the receiving end of the path if it can handle the flow.
+ double ow = getExecutionCount(Dest);
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ ow -= ew;
+ }
+ }
+ }
+ if (ow < 0) continue;
+
+ // Determine how much flow shall be used.
+ double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+ if (ew != MissingValue) {
+ ew = ew<ow?ew:ow;
+ ew = ew<iw?ew:iw;
+ } else {
+ if (inmissing == 0)
+ ew = iw;
+ }
+
+ // Create flow.
+ if (ew != MissingValue) {
+ do {
+ Edge e = getEdge(P[Dest],Dest);
+ if (getEdgeWeight(e) == MissingValue) {
+ setEdgeWeight(e,ew);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Calculate a block with self loop.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ bool SelfEdgeFound = false;
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (*NBB == BB) {
+ SelfEdgeFound = true;
+ break;
+ }
+ }
+ if (SelfEdgeFound) {
+ Edge e = getEdge(BB,BB);
+ if (getEdgeWeight(e) == MissingValue) {
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ setEdgeWeight(e,iw * 10);
+ FoundPath = true;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ // Determine backedges, set them to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ Path P;
+ bool BackEdgeFound = false;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+ if (Dest == *NBB) {
+ BackEdgeFound = true;
+ break;
+ }
+ }
+ if (BackEdgeFound) {
+ Edge e = getEdge(Dest,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Channel flow to return block.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+ Dest = P[(const BasicBlock*)0];
+ if (!Dest) continue;
+
+ if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+ // Calculate incoming flow.
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,iw);
+ FoundPath = true;
+ } else {
+ assert(0 && "Edge should not have value already!");
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Speculatively set edges to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Edge e = getEdge(*NBB,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ break;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ errs() << "{";
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ dbgs() << BB->getName();
+ if (FI != FE)
+ dbgs() << ",";
+ }
+ errs() << "}";
+
+ errs() << "ASSERT: could not repair function";
+ assert(0 && "could not repair function");
+ }
+
+ EdgeWeights J = EdgeInformation[F];
+ for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+ Edge e = EI->first;
+
+ bool SuccFound = false;
+ if (e.first != 0) {
+ succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+ if (NBB == End) {
+ if (0 == e.second) {
+ SuccFound = true;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (*NBB == e.second) {
+ SuccFound = true;
+ break;
+ }
+ }
+ if (!SuccFound) {
+ removeEdge(e);
+ }
+ }
+ }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+ return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+ return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// NoProfile ProfileInfo implementation
+//
+
+namespace {
+ struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoProfileInfo() : ImmutablePass(ID) {
+ initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+ "No Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 000000000000..f1f3e940c932
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,155 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+ if (!Really) return Var;
+ return ((Var & (255U<< 0U)) << 24U) |
+ ((Var & (255U<< 8U)) << 8U) |
+ ((Var & (255U<<16U)) >> 8U) |
+ ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+ // If either value is undefined, use the other.
+ if (A == ProfileInfoLoader::Uncounted) return B;
+ if (B == ProfileInfoLoader::Uncounted) return A;
+ return A + B;
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ std::vector<unsigned> &Data) {
+ // Read the number of entries...
+ unsigned NumEntries;
+ if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+ // Read the counts...
+ std::vector<unsigned> TempSpace(NumEntries);
+
+ // Read in the block of data...
+ if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+
+ // Make sure we have enough space... The space is initialised to -1 to
+ // facitiltate the loading of missing values for OptimalEdgeProfiling.
+ if (Data.size() < NumEntries)
+ Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
+
+ // Accumulate the data we just read into the data.
+ if (!ShouldByteSwap) {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(TempSpace[i], Data[i]);
+ }
+ } else {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+ }
+ }
+}
+
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+ const std::string &Filename)
+ : Filename(Filename) {
+ FILE *F = fopen(Filename.c_str(), "rb");
+ if (F == 0) {
+ errs() << ToolName << ": Error opening '" << Filename << "': ";
+ perror(0);
+ exit(1);
+ }
+
+ // Keep reading packets until we run out of them.
+ unsigned PacketType;
+ while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+ // If the low eight bits of the packet are zero, we must be dealing with an
+ // endianness mismatch. Byteswap all words read from the profiling
+ // information.
+ bool ShouldByteSwap = (char)PacketType == 0;
+ PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+ switch (PacketType) {
+ case ArgumentInfo: {
+ unsigned ArgLength;
+ if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+ // Read in the arguments...
+ std::vector<char> Chars(ArgLength+4);
+
+ if (ArgLength)
+ if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+ break;
+ }
+
+ case FunctionInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+ break;
+
+ case BlockInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+ break;
+
+ case EdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+ break;
+
+ case OptEdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+ break;
+
+ case BBTraceInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+ break;
+
+ default:
+ errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+ exit(1);
+ }
+ }
+
+ fclose(F);
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 000000000000..346f8d6d6258
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,267 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-loader"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+ class LoaderPass : public ModulePass, public ProfileInfo {
+ std::string Filename;
+ std::set<Edge> SpanningTree;
+ std::set<const BasicBlock*> BBisUnvisited;
+ unsigned ReadCount;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit LoaderPass(const std::string &filename = "")
+ : ModulePass(ID), Filename(filename) {
+ initializeLoaderPassPass(*PassRegistry::getPassRegistry());
+ if (filename.empty()) Filename = ProfileInfoFilename;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information loader";
+ }
+
+ // recurseBasicBlock() - Calculates the edge weights for as much basic
+ // blocks as possbile.
+ virtual void recurseBasicBlock(const BasicBlock *BB);
+ virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
+ virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ /// run - Load the profile information from the specified file.
+ virtual bool runOnModule(Module &M);
+ };
+} // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+ "Load profile information from llvmprof.out", false, true, false)
+
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+ return new LoaderPass(Filename);
+}
+
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
+ unsigned &uncalc, double &count) {
+ double w;
+ if ((w = getEdgeWeight(edge)) == MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ } else {
+ count+=w;
+ }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+ // break recursion if already visited
+ if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+ BBisUnvisited.erase(BB);
+ if (!BB) return;
+
+ for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+
+ Edge tocalc;
+ if (CalculateMissingEdge(BB, tocalc)) {
+ SpanningTree.erase(tocalc);
+ }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+ std::vector<unsigned> &ECs) {
+ if (ReadCount < ECs.size()) {
+ double weight = ECs[ReadCount++];
+ if (weight != ProfileInfoLoader::Uncounted) {
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also representable
+ // in double.
+ EdgeInformation[getFunction(e)][e] += (double)weight;
+
+ DEBUG(dbgs() << "--Read Edge Counter for " << e
+ << " (# "<< (ReadCount-1) << "): "
+ << (unsigned)getEdgeWeight(e) << "\n");
+ } else {
+ // This happens only if reading optimal profiling information, not when
+ // reading regular profiling information.
+ SpanningTree.insert(e);
+ }
+ }
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+ ProfileInfoLoader PIL("profile-loader", Filename);
+
+ EdgeInformation.clear();
+ std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ Counters = PIL.getRawOptimalEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ readEdge(getEdge(BB,0), Counters);
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ while (SpanningTree.size() > 0) {
+
+ unsigned size = SpanningTree.size();
+
+ BBisUnvisited.clear();
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ BBisUnvisited.insert(ei->first);
+ BBisUnvisited.insert(ei->second);
+ }
+ while (BBisUnvisited.size() > 0) {
+ recurseBasicBlock(*BBisUnvisited.begin());
+ }
+
+ if (SpanningTree.size() == size) {
+ DEBUG(dbgs()<<"{");
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ DEBUG(dbgs()<< *ei <<",");
+ }
+ assert(0 && "No edge calculated!");
+ }
+
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ BlockInformation.clear();
+ Counters = PIL.getRawBlockCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ BlockInformation[F][BB] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ FunctionInformation.clear();
+ Counters = PIL.getRawFunctionCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ FunctionInformation[F] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 000000000000..c8896de89301
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,383 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+ cl::desc("Disable assertions"));
+
+namespace {
+ template<class FType, class BType>
+ class ProfileVerifierPassT : public FunctionPass {
+
+ struct DetailedBlockInfo {
+ const BType *BB;
+ double BBWeight;
+ double inWeight;
+ int inCount;
+ double outWeight;
+ int outCount;
+ };
+
+ ProfileInfoT<FType, BType> *PI;
+ std::set<const BType*> BBisVisited;
+ std::set<const FType*> FisVisited;
+ bool DisableAssertions;
+
+ // When debugging is enabled, the verifier prints a whole slew of debug
+ // information, otherwise its just the assert. These are all the helper
+ // functions.
+ bool PrintedDebugTree;
+ std::set<const BType*> BBisPrinted;
+ void debugEntry(DetailedBlockInfo*);
+ void printDebugInfo(const BType *BB);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ explicit ProfileVerifierPassT () : FunctionPass(ID) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ DisableAssertions = ProfileVerifierDisableAssertions;
+ }
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
+ DisableAssertions(da) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<ProfileInfoT<FType, BType> >();
+ }
+
+ const char *getPassName() const {
+ return "Profiling information verifier";
+ }
+
+ /// run - Verify the profile information.
+ bool runOnFunction(FType &F);
+ void recurseBasicBlock(const BType*);
+
+ bool exitReachable(const FType*);
+ double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
+ void CheckValue(bool, const char*, DetailedBlockInfo*);
+ };
+
+ typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+ if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+ double BBWeight = PI->getExecutionCount(BB);
+ if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+ double inWeight = 0;
+ int inCount = 0;
+ std::set<const BType*> ProcessedPreds;
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated in-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ inWeight += EdgeWeight;
+ inCount++;
+ }
+ }
+ double outWeight = 0;
+ int outCount = 0;
+ std::set<const BType*> ProcessedSuccs;
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated out-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ outWeight += EdgeWeight;
+ outCount++;
+ }
+ }
+ dbgs() << "Block " << BB->getName() << " in "
+ << BB->getParent()->getName() << ":"
+ << "BBWeight=" << format("%20.20g",BBWeight) << ","
+ << "inWeight=" << format("%20.20g",inWeight) << ","
+ << "inCount=" << inCount << ","
+ << "outWeight=" << format("%20.20g",outWeight) << ","
+ << "outCount" << outCount << "\n";
+
+ // mark as visited and recurse into subnodes
+ BBisPrinted.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ printDebugInfo(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+ dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in "
+ << DI->BB->getParent()->getName() << ":"
+ << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
+ << "inWeight=" << format("%20.20g",DI->inWeight) << ","
+ << "inCount=" << DI->inCount << ","
+ << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+ << "outCount=" << DI->outCount << "\n";
+ if (!PrintedDebugTree) {
+ PrintedDebugTree = true;
+ printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+ }
+ }
+
+ // This compares A and B for equality.
+ static bool Equals(double A, double B) {
+ return A == B;
+ }
+
+ // This checks if the function "exit" is reachable from an given function
+ // via calls, this is necessary to check if a profile is valid despite the
+ // counts not fitting exactly.
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+ if (!F) return false;
+
+ if (FisVisited.count(F)) return false;
+
+ FType *Exit = F->getParent()->getFunction("exit");
+ if (Exit == F) {
+ return true;
+ }
+
+ FisVisited.insert(F);
+ bool exits = false;
+ for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ exits |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ exits = true;
+ }
+ if (exits) break;
+ }
+ }
+ return exits;
+ }
+
+ #define ASSERTMESSAGE(M) \
+ { dbgs() << "ASSERT:" << (M) << "\n"; \
+ if (!DisableAssertions) assert(0 && (M)); }
+
+ template<class FType, class BType>
+ double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
+ ASSERTMESSAGE("Edge has missing value");
+ return 0;
+ } else {
+ if (EdgeWeight < 0) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
+ ASSERTMESSAGE("Edge has negative value");
+ }
+ return EdgeWeight;
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
+ const char *Message,
+ DetailedBlockInfo *DI) {
+ if (Error) {
+ DEBUG(debugEntry(DI));
+ dbgs() << "Block " << DI->BB->getName() << " in Function "
+ << DI->BB->getParent()->getName() << ": ";
+ ASSERTMESSAGE(Message);
+ }
+ return;
+ }
+
+ // This calculates the Information for a block and then recurses into the
+ // successors.
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+ // Break the recursion by remembering all visited blocks.
+ if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+ // Use a data structure to store all the information, this can then be handed
+ // to debug printers.
+ DetailedBlockInfo DI;
+ DI.BB = BB;
+ DI.outCount = DI.inCount = 0;
+ DI.inWeight = DI.outWeight = 0;
+
+ // Read predecessors.
+ std::set<const BType*> ProcessedPreds;
+ const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+ // If there are none, check for (0,BB) edge.
+ if (bpi == bpe) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+ DI.inCount++;
+ }
+ for (;bpi != bpe; ++bpi) {
+ if (ProcessedPreds.insert(*bpi).second) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+ DI.inCount++;
+ }
+ }
+
+ // Read successors.
+ std::set<const BType*> ProcessedSuccs;
+ succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // If there is an (0,BB) edge, consider it too. (This is done not only when
+ // there are no successors, but every time; not every function contains
+ // return blocks with no successors (think loop latch as return block)).
+ double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+ if (w != ProfileInfoT<FType, BType>::MissingValue) {
+ DI.outWeight += w;
+ DI.outCount++;
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+ DI.outCount++;
+ }
+ }
+
+ // Read block weight.
+ DI.BBWeight = PI->getExecutionCount(BB);
+ CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+ "BasicBlock has missing value", &DI);
+ CheckValue(DI.BBWeight < 0,
+ "BasicBlock has negative value", &DI);
+
+ // Check if this block is a setjmp target.
+ bool isSetJmpTarget = false;
+ if (DI.outWeight > DI.inWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F && (F->getName() == "_setjmp")) {
+ isSetJmpTarget = true; break;
+ }
+ }
+ }
+ }
+ // Check if this block is eventually reaching exit.
+ bool isExitReachable = false;
+ if (DI.inWeight > DI.outWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ FisVisited.clear();
+ isExitReachable |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ isExitReachable = true;
+ }
+ if (isExitReachable) break;
+ }
+ }
+ }
+
+ if (DI.inCount > 0 && DI.outCount == 0) {
+ // If this is a block with no successors.
+ if (!isSetJmpTarget) {
+ CheckValue(!Equals(DI.inWeight,DI.BBWeight),
+ "inWeight and BBWeight do not match", &DI);
+ }
+ } else if (DI.inCount == 0 && DI.outCount > 0) {
+ // If this is a block with no predecessors.
+ if (!isExitReachable)
+ CheckValue(!Equals(DI.BBWeight,DI.outWeight),
+ "BBWeight and outWeight do not match", &DI);
+ } else {
+ // If this block has successors and predecessors.
+ if (DI.inWeight > DI.outWeight && !isExitReachable)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ }
+
+
+ // Mark this block as visited, rescurse into successors.
+ BBisVisited.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ recurseBasicBlock(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+ PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+ if (!PI)
+ ASSERTMESSAGE("No ProfileInfo available");
+
+ // Prepare global variables.
+ PrintedDebugTree = false;
+ BBisVisited.clear();
+
+ // Fetch entry block and recurse into it.
+ const BType *entry = &F.getEntryBlock();
+ recurseBasicBlock(entry);
+
+ if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+ ASSERTMESSAGE("Function count and entry block count do not match");
+
+ return false;
+ }
+
+ template<class FType, class BType>
+ char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+
+namespace llvm {
+ FunctionPass *createProfileVerifierPass() {
+ return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
new file mode 100644
index 000000000000..0a342b2167e4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
@@ -0,0 +1,36 @@
+//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Implementation of the pointer use visitors.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PtrUseVisitor.h"
+
+using namespace llvm;
+
+void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ if (VisitedUses.insert(&UI.getUse())) {
+ UseToVisit NewU = {
+ UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown),
+ Offset
+ };
+ Worklist.push_back(llvm_move(NewU));
+ }
+ }
+}
+
+bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
+ if (!IsOffsetKnown)
+ return false;
+
+ return GEPI.accumulateConstantOffset(DL, Offset);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 000000000000..fad5074086ce
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,834 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+ cl::Hidden,
+ cl::desc("style of printing regions"),
+ cl::values(
+ clEnumValN(Region::PrintNone, "none", "print no details"),
+ clEnumValN(Region::PrintBB, "bb",
+ "print regions in detail with block_iterator"),
+ clEnumValN(Region::PrintRN, "rn",
+ "print regions in detail with element_iterator"),
+ clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+ DominatorTree *dt, Region *Parent)
+ : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+ ie = BBNodeMap.end(); it != ie; ++it)
+ delete it->second;
+
+ // Only clean the cache for this Region. Caches of child Regions will be
+ // cleaned when the child Regions are deleted.
+ BBNodeMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+void Region::replaceEntry(BasicBlock *BB) {
+ entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+ assert(exit && "No exit to replace!");
+ exit = BB;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+ BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+ assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // Toplevel region.
+ if (!exit)
+ return true;
+
+ return (DT->dominates(entry, BB)
+ && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+ // BBs that are not part of any loop are element of the Loop
+ // described by the NULL pointer. This loop is not part of any region,
+ // except if the region describes the whole function.
+ if (L == 0)
+ return getExit() == 0;
+
+ if (!contains(L->getHeader()))
+ return false;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+ BE = ExitingBlocks.end(); BI != BE; ++BI)
+ if (!contains(*BI))
+ return false;
+
+ return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+ if (!contains(L))
+ return 0;
+
+ while (L && contains(L->getParentLoop())) {
+ L = L->getParentLoop();
+ }
+
+ return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+ assert(LI && BB && "LI and BB cannot be null!");
+ Loop *L = LI->getLoopFor(BB);
+ return outermostLoopInRegion(L);
+}
+
+BasicBlock *Region::getEnteringBlock() const {
+ BasicBlock *entry = getEntry();
+ BasicBlock *Pred;
+ BasicBlock *enteringBlock = 0;
+
+ for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (DT->getNode(Pred) && !contains(Pred)) {
+ if (enteringBlock)
+ return 0;
+
+ enteringBlock = Pred;
+ }
+ }
+
+ return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+ BasicBlock *exit = getExit();
+ BasicBlock *Pred;
+ BasicBlock *exitingBlock = 0;
+
+ if (!exit)
+ return 0;
+
+ for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (contains(Pred)) {
+ if (exitingBlock)
+ return 0;
+
+ exitingBlock = Pred;
+ }
+ }
+
+ return exitingBlock;
+}
+
+bool Region::isSimple() const {
+ return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
+}
+
+std::string Region::getNameStr() const {
+ std::string exitName;
+ std::string entryName;
+
+ if (getEntry()->getName().empty()) {
+ raw_string_ostream OS(entryName);
+
+ WriteAsOperand(OS, getEntry(), false);
+ } else
+ entryName = getEntry()->getName();
+
+ if (getExit()) {
+ if (getExit()->getName().empty()) {
+ raw_string_ostream OS(exitName);
+
+ WriteAsOperand(OS, getExit(), false);
+ } else
+ exitName = getExit()->getName();
+ } else
+ exitName = "<Function Return>";
+
+ return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+ if (!contains(BB))
+ llvm_unreachable("Broken region found!");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (!contains(*SI) && exit != *SI)
+ llvm_unreachable("Broken region found!");
+
+ if (entry != BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+ if (!contains(*SI))
+ llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+ BasicBlock *exit = getExit();
+
+ visited->insert(BB);
+
+ verifyBBInRegion(BB);
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (*SI != exit && visited->find(*SI) == visited->end())
+ verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+ // Only do verification when user wants to, otherwise this expensive
+ // check will be invoked by PassManager.
+ if (!VerifyRegionInfo) return;
+
+ std::set<BasicBlock*> visited;
+ verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+ for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->verifyRegionNest();
+
+ verifyRegion();
+}
+
+Region::element_iterator Region::element_begin() {
+ return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+ return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+ return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+ return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+ Region *R = RI->getRegionFor(BB);
+
+ if (!R || R == this)
+ return 0;
+
+ // If we pass the BB out of this region, that means our code is broken.
+ assert(contains(R) && "BB not in current region!");
+
+ while (contains(R->getParent()) && R->getParent() != this)
+ R = R->getParent();
+
+ if (R->getEntry() != BB)
+ return 0;
+
+ return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+
+ BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+ if (at != BBNodeMap.end())
+ return at->second;
+
+ RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+ BBNodeMap.insert(std::make_pair(BB, NewNode));
+ return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+ if (Region* Child = getSubRegionNode(BB))
+ return Child->getNode();
+
+ return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ (*I)->parent = To;
+ To->children.push_back(*I);
+ }
+ children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
+ assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ assert(std::find(begin(), end(), SubRegion) == children.end()
+ && "Subregion already exists!");
+
+ SubRegion->parent = this;
+ children.push_back(SubRegion);
+
+ if (!moveChildren)
+ return;
+
+ assert(SubRegion->children.size() == 0
+ && "SubRegions that contain children are not supported");
+
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSubRegion()) {
+ BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+ if (SubRegion->contains(BB))
+ RI->setRegionFor(BB, SubRegion);
+ }
+
+ std::vector<Region*> Keep;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (SubRegion->contains(*I) && *I != SubRegion) {
+ SubRegion->children.push_back(*I);
+ (*I)->parent = SubRegion;
+ } else
+ Keep.push_back(*I);
+
+ children.clear();
+ children.insert(children.begin(), Keep.begin(), Keep.end());
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+ assert(Child->parent == this && "Child is not a child of this region!");
+ Child->parent = 0;
+ RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ assert(I != children.end() && "Region does not exit. Unable to remove.");
+ children.erase(children.begin()+(I-begin()));
+ return Child;
+}
+
+unsigned Region::getDepth() const {
+ unsigned Depth = 0;
+
+ for (Region *R = parent; R != 0; R = R->parent)
+ ++Depth;
+
+ return Depth;
+}
+
+Region *Region::getExpandedRegion() const {
+ unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+ if (NumSuccessors == 0)
+ return NULL;
+
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(getEntry(), *PI))
+ return NULL;
+
+ Region *R = RI->getRegionFor(exit);
+
+ if (R->getEntry() != exit) {
+ if (exit->getTerminator()->getNumSuccessors() == 1)
+ return new Region(getEntry(), *succ_begin(exit), RI, DT);
+ else
+ return NULL;
+ }
+
+ while (R->getParent() && R->getParent()->getEntry() == exit)
+ R = R->getParent();
+
+ if (!DT->dominates(getEntry(), R->getExit()))
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(R->getExit(), *PI))
+ return NULL;
+
+ return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
+ enum PrintStyle Style) const {
+ if (print_tree)
+ OS.indent(level*2) << "[" << level << "] " << getNameStr();
+ else
+ OS.indent(level*2) << getNameStr();
+
+ OS << "\n";
+
+
+ if (Style != PrintNone) {
+ OS.indent(level*2) << "{\n";
+ OS.indent(level*2 + 2);
+
+ if (Style == PrintBB) {
+ for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I)
+ OS << (*I)->getName() << ", "; // TODO: remove the last ","
+ } else if (Style == PrintRN) {
+ for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ",
+ }
+
+ OS << "\n";
+ }
+
+ if (print_tree)
+ for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->print(OS, print_tree, level+1, Style);
+
+ if (Style != PrintNone)
+ OS.indent(level*2) << "} \n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void Region::dump() const {
+ print(dbgs(), true, getDepth(), printStyle.getValue());
+}
+#endif
+
+void Region::clearNodeCache() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+ IE = BBNodeMap.end(); I != IE; ++I)
+ delete I->second;
+
+ BBNodeMap.clear();
+ for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+ BasicBlock *exit) const {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+ return false;
+ }
+ return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+ typedef DominanceFrontier::DomSetType DST;
+
+ DST *entrySuccs = &DF->find(entry)->second;
+
+ // Exit is the header of a loop that contains the entry. In this case,
+ // the dominance frontier must only contain the exit.
+ if (!DT->dominates(entry, exit)) {
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI)
+ if (*SI != exit && *SI != entry)
+ return false;
+
+ return true;
+ }
+
+ DST *exitSuccs = &DF->find(exit)->second;
+
+ // Do not allow edges leaving the region.
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI) {
+ if (*SI == exit || *SI == entry)
+ continue;
+ if (exitSuccs->find(*SI) == exitSuccs->end())
+ return false;
+ if (!isCommonDomFrontier(*SI, entry, exit))
+ return false;
+ }
+
+ // Do not allow edges pointing into the region.
+ for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+ SI != SE; ++SI)
+ if (DT->properlyDominates(entry, *SI) && *SI != exit)
+ return false;
+
+
+ return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+ BBtoBBMap *ShortCut) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ BBtoBBMap::iterator e = ShortCut->find(exit);
+
+ if (e == ShortCut->end())
+ // No further region at exit available.
+ (*ShortCut)[entry] = exit;
+ else {
+ // We found a region e that starts at exit. Therefore (entry, e->second)
+ // is also a region, that is larger than (entry, exit). Insert the
+ // larger one.
+ BasicBlock *BB = e->second;
+ (*ShortCut)[entry] = BB;
+ }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+ BBtoBBMap *ShortCut) const {
+ BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+ if (e == ShortCut->end())
+ return N->getIDom();
+
+ return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+ if (num_successors <= 1 && exit == *(succ_begin(entry)))
+ return true;
+
+ return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+ ++numRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ if (isTrivialRegion(entry, exit))
+ return 0;
+
+ Region *region = new Region(entry, exit, this, DT);
+ BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+ region->verifyRegion();
+ #else
+ DEBUG(region->verifyRegion());
+ #endif
+
+ updateStatistics(region);
+ return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+ assert(entry);
+
+ DomTreeNode *N = PDT->getNode(entry);
+
+ if (!N)
+ return;
+
+ Region *lastRegion= 0;
+ BasicBlock *lastExit = entry;
+
+ // As only a BasicBlock that postdominates entry can finish a region, walk the
+ // post dominance tree upwards.
+ while ((N = getNextPostDom(N, ShortCut))) {
+ BasicBlock *exit = N->getBlock();
+
+ if (!exit)
+ break;
+
+ if (isRegion(entry, exit)) {
+ Region *newRegion = createRegion(entry, exit);
+
+ if (lastRegion)
+ newRegion->addSubRegion(lastRegion);
+
+ lastRegion = newRegion;
+ lastExit = exit;
+ }
+
+ // This can never be a region, so stop the search.
+ if (!DT->dominates(entry, exit))
+ break;
+ }
+
+ // Tried to create regions from entry to lastExit. Next time take a
+ // shortcut from entry to lastExit.
+ if (lastExit != entry)
+ insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+ BasicBlock *entry = &(F.getEntryBlock());
+ DomTreeNode *N = DT->getNode(entry);
+
+ // Iterate over the dominance tree in post order to start with the small
+ // regions from the bottom of the dominance tree. If the small regions are
+ // detected first, detection of bigger regions is faster, as we can jump
+ // over the small regions.
+ for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+ ++FI) {
+ findRegionsWithEntry(FI->getBlock(), ShortCut);
+ }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+ while (region->parent)
+ region = region->getParent();
+
+ return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+ BasicBlock *BB = N->getBlock();
+
+ // Passed region exit
+ while (BB == region->getExit())
+ region = region->getParent();
+
+ BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+ // This basic block is a start block of a region. It is already in the
+ // BBtoRegion relation. Only the child basic blocks have to be updated.
+ if (it != BBtoRegion.end()) {
+ Region *newRegion = it->second;
+ region->addSubRegion(getTopMostParent(newRegion));
+ region = newRegion;
+ } else {
+ BBtoRegion[BB] = region;
+ }
+
+ for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+ buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+ BBtoRegion.clear();
+ if (TopLevelRegion)
+ delete TopLevelRegion;
+ TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+ releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+ // ShortCut a function where for every BB the exit of the largest region
+ // starting with BB is stored. These regions can be threated as single BBS.
+ // This improves performance on linear CFGs.
+ BBtoBBMap ShortCut;
+
+ scanForRegions(F, &ShortCut);
+ BasicBlock *BB = &F.getEntryBlock();
+ buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+ releaseMemory();
+
+ DT = &getAnalysis<DominatorTree>();
+ PDT = &getAnalysis<PostDominatorTree>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ updateStatistics(TopLevelRegion);
+
+ Calculate(F);
+
+ return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "Region tree:\n";
+ TopLevelRegion->print(OS, true, 0, printStyle.getValue());
+ OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (!VerifyRegionInfo) return;
+
+ TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+ BBtoRegionMap::const_iterator I=
+ BBtoRegion.find(BB);
+ return I != BBtoRegion.end() ? I->second : 0;
+}
+
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+ BBtoRegion[BB] = R;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+ return getRegionFor(BB);
+}
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+ BasicBlock *Exit = NULL;
+
+ while (true) {
+ // Get largest region that starts at BB.
+ Region *R = getRegionFor(BB);
+ while (R && R->getParent() && R->getParent()->getEntry() == BB)
+ R = R->getParent();
+
+ // Get the single exit of BB.
+ if (R && R->getEntry() == BB)
+ Exit = R->getExit();
+ else if (++succ_begin(BB) == succ_end(BB))
+ Exit = *succ_begin(BB);
+ else // No single exit exists.
+ return Exit;
+
+ // Get largest region that starts at Exit.
+ Region *ExitR = getRegionFor(Exit);
+ while (ExitR && ExitR->getParent()
+ && ExitR->getParent()->getEntry() == Exit)
+ ExitR = ExitR->getParent();
+
+ for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+ ++PI)
+ if (!R->contains(*PI) && !ExitR->contains(*PI))
+ break;
+
+ // This stops infinite cycles.
+ if (DT->dominates(Exit, BB))
+ break;
+
+ BB = Exit;
+ }
+
+ return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+ assert (A && B && "One of the Regions is NULL");
+
+ if (A->contains(B)) return A;
+
+ while (!B->contains(A))
+ B = B->getParent();
+
+ return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+ Region* ret = Regions.back();
+ Regions.pop_back();
+
+ for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+ E = Regions.end(); I != E; ++I)
+ ret = getCommonRegion(ret, *I);
+
+ return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+ Region* ret = getRegionFor(BBs.back());
+ BBs.pop_back();
+
+ for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+ E = BBs.end(); I != E; ++I)
+ ret = getCommonRegion(ret, getRegionFor(*I));
+
+ return ret;
+}
+
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+ Region *R = getRegionFor(OldBB);
+
+ setRegionFor(NewBB, R);
+
+ while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+ R->replaceEntry(NewBB);
+ R = R->getParent();
+ }
+
+ setRegionFor(OldBB, R);
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createRegionInfoPass() {
+ return new RegionInfo();
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 000000000000..9208fa21d7ec
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager()
+ : FunctionPass(ID), PMDataManager() {
+ skipThisRegion = false;
+ redoThisRegion = false;
+ RI = NULL;
+ CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+ RQ.push_back(R);
+ for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+ addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.addRequired<RegionInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+ RI = &getAnalysis<RegionInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+ if (RQ.empty()) // No regions, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+ I != E; ++I) {
+ Region *R = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *RP = (RegionPass *)getContainedPass(Index);
+ Changed |= RP->doInitialization(R, *this);
+ }
+ }
+
+ // Walk Regions
+ while (!RQ.empty()) {
+
+ CurrentRegion = RQ.back();
+ skipThisRegion = false;
+ redoThisRegion = false;
+
+ // Run all passes on the current Region.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+ TimeRegion PassTimer(getPassTimer(P));
+ Changed |= P->runOnRegion(CurrentRegion, *this);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+
+ if (!skipThisRegion) {
+ // Manually check that this region is still healthy. This is done
+ // instead of relying on RegionInfo::verifyRegion since RegionInfo
+ // is a function pass and it's really expensive to verify every
+ // Region in the function every time. That level of checking can be
+ // enabled with the -verify-region-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(P));
+ CurrentRegion->verifyRegion();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr(),
+ ON_REGION_MSG);
+
+ if (skipThisRegion)
+ // Do not run other passes on this region.
+ break;
+ }
+
+ // If the region was deleted, release all the region passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisRegion)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_REGION_MSG);
+ }
+
+ // Pop the region from queue after running all passes.
+ RQ.pop_back();
+
+ if (redoThisRegion)
+ RQ.push_back(CurrentRegion);
+
+ // Free all region nodes created in region passes.
+ RI->clearNodeCache();
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ // Print the region tree after all pass.
+ DEBUG(
+ dbgs() << "\nRegion tree of function " << F.getName()
+ << " after all region Pass:\n";
+ RI->dump();
+ dbgs() << "\n";
+ );
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Region Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+ PrintRegionPass(const std::string &B, raw_ostream &o)
+ : RegionPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+ Out << Banner;
+ for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+ I != E; ++I)
+ (*I)->print(Out);
+
+ return false;
+ }
+};
+
+char PrintRegionPass::ID = 0;
+} //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new RGPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+ RGPassManager *RGPM;
+
+ // Create new Region Pass Manager if it does not exist.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+ RGPM = (RGPassManager*)PMS.top();
+ else {
+
+ assert (!PMS.empty() && "Unable to create Region Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Region Pass Manager
+ RGPM = new RGPassManager();
+ RGPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(RGPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ TPM->schedulePass(RGPM);
+
+ // [4] Push new manager into PMS
+ PMS.push(RGPM);
+ }
+
+ RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 000000000000..c5f1b925921b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,218 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+ cl::desc("Show only simple regions in the graphviz viewer"),
+ cl::Hidden,
+ cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+
+ return "Not implemented";
+ }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+ static std::string getGraphName(RegionInfo *DT) {
+ return "Region Graph";
+ }
+
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+ G->getTopLevelRegion());
+ }
+
+ std::string getEdgeAttributes(RegionNode *srcNode,
+ GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+ RegionNode *destNode = *CI;
+
+ if (srcNode->isSubRegion() || destNode->isSubRegion())
+ return "";
+
+ // In case of a backedge, do not use it to define the layout of the nodes.
+ BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+ BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+ Region *R = RI->getRegionFor(destBB);
+
+ while (R && R->getParent())
+ if (R->getParent()->getEntry() == destBB)
+ R = R->getParent();
+ else
+ break;
+
+ if (R->getEntry() == destBB && R->contains(srcBB))
+ return "constraint=false";
+
+ return "";
+ }
+
+ // Print the cluster of the subregions. This groups the single basic blocks
+ // and adds a different background color for each group.
+ static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ unsigned depth = 0) {
+ raw_ostream &O = GW.getOStream();
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ << " {\n";
+ O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+ if (!onlySimpleRegions || R->isSimple()) {
+ O.indent(2 * (depth + 1)) << "style = filled;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+ } else {
+ O.indent(2 * (depth + 1)) << "style = solid;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ }
+
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ printRegionCluster(*RI, GW, depth + 1);
+
+ RegionInfo *RI = R->getRegionInfo();
+
+ for (Region::const_block_iterator BI = R->block_begin(),
+ BE = R->block_end(); BI != BE; ++BI)
+ if (RI->getRegionFor(*BI) == R)
+ O.indent(2 * (depth + 1)) << "Node"
+ << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(*BI))
+ << ";\n";
+
+ O.indent(2 * depth) << "}\n";
+ }
+
+ static void addCustomGraphFeatures(const RegionInfo* RI,
+ GraphWriter<RegionInfo*> &GW) {
+ raw_ostream &O = GW.getOStream();
+ O << "\tcolorscheme = \"paired12\"\n";
+ printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+ : public DOTGraphTraitsViewer<RegionInfo, false> {
+ static char ID;
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+ initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+ : public DOTGraphTraitsViewer<RegionInfo, true> {
+ static char ID;
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+ initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionOnlyViewer::ID = 0;
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ static char ID;
+ RegionPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionPrinter::ID = 0;
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+ "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true)
+
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true)
+
+namespace {
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ static char ID;
+ RegionOnlyPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file "
+ "(with no function bodies)",
+ true, true)
+
+FunctionPass* llvm::createRegionViewerPass() {
+ return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+ return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+ return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 000000000000..6ea915fdb0b7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,7057 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library. First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression. These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+// Chains of recurrences -- a method to expedite the evaluation
+// of closed-form functions
+// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+// On computational properties of chains of recurrences
+// Eugene V. Zima
+//
+// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+// Robert A. van Engelen
+//
+// Efficient Symbolic Analysis for Optimizing Compilers
+// Robert A. van Engelen
+//
+// Using the chains of recurrences algebra for data dependence testing and
+// induction variable substitution
+// MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+ "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+ "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+ "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+ "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::desc("Maximum number of iterations SCEV will "
+ "symbolically execute a constant "
+ "derived loop"),
+ cl::init(100));
+
+// FIXME: Enable this with XDEBUG when the test suite is clean.
+static cl::opt<bool>
+VerifySCEV("verify-scev",
+ cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SCEV::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+void SCEV::print(raw_ostream &OS) const {
+ switch (getSCEVType()) {
+ case scConstant:
+ WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+ return;
+ case scTruncate: {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+ const SCEV *Op = Trunc->getOperand();
+ OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+ << *Trunc->getType() << ")";
+ return;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+ const SCEV *Op = ZExt->getOperand();
+ OS << "(zext " << *Op->getType() << " " << *Op << " to "
+ << *ZExt->getType() << ")";
+ return;
+ }
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+ const SCEV *Op = SExt->getOperand();
+ OS << "(sext " << *Op->getType() << " " << *Op << " to "
+ << *SExt->getType() << ")";
+ return;
+ }
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+ OS << "{" << *AR->getOperand(0);
+ for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+ OS << ",+," << *AR->getOperand(i);
+ OS << "}<";
+ if (AR->getNoWrapFlags(FlagNUW))
+ OS << "nuw><";
+ if (AR->getNoWrapFlags(FlagNSW))
+ OS << "nsw><";
+ if (AR->getNoWrapFlags(FlagNW) &&
+ !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+ OS << "nw><";
+ WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+ OS << ">";
+ return;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+ const char *OpStr = 0;
+ switch (NAry->getSCEVType()) {
+ case scAddExpr: OpStr = " + "; break;
+ case scMulExpr: OpStr = " * "; break;
+ case scUMaxExpr: OpStr = " umax "; break;
+ case scSMaxExpr: OpStr = " smax "; break;
+ }
+ OS << "(";
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ OS << **I;
+ if (llvm::next(I) != E)
+ OS << OpStr;
+ }
+ OS << ")";
+ switch (NAry->getSCEVType()) {
+ case scAddExpr:
+ case scMulExpr:
+ if (NAry->getNoWrapFlags(FlagNUW))
+ OS << "<nuw>";
+ if (NAry->getNoWrapFlags(FlagNSW))
+ OS << "<nsw>";
+ }
+ return;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+ OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+ return;
+ }
+ case scUnknown: {
+ const SCEVUnknown *U = cast<SCEVUnknown>(this);
+ Type *AllocTy;
+ if (U->isSizeOf(AllocTy)) {
+ OS << "sizeof(" << *AllocTy << ")";
+ return;
+ }
+ if (U->isAlignOf(AllocTy)) {
+ OS << "alignof(" << *AllocTy << ")";
+ return;
+ }
+
+ Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo)) {
+ OS << "offsetof(" << *CTy << ", ";
+ WriteAsOperand(OS, FieldNo, false);
+ OS << ")";
+ return;
+ }
+
+ // Otherwise just print it normally.
+ WriteAsOperand(OS, U->getValue(), false);
+ return;
+ }
+ case scCouldNotCompute:
+ OS << "***COULDNOTCOMPUTE***";
+ return;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
+Type *SCEV::getType() const {
+ switch (getSCEVType()) {
+ case scConstant:
+ return cast<SCEVConstant>(this)->getType();
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return cast<SCEVCastExpr>(this)->getType();
+ case scAddRecExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ return cast<SCEVNAryExpr>(this)->getType();
+ case scAddExpr:
+ return cast<SCEVAddExpr>(this)->getType();
+ case scUDivExpr:
+ return cast<SCEVUDivExpr>(this)->getType();
+ case scUnknown:
+ return cast<SCEVUnknown>(this)->getType();
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
+ }
+}
+
+bool SCEV::isZero() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isZero();
+ return false;
+}
+
+bool SCEV::isOne() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isOne();
+ return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isAllOnesValue();
+ return false;
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+ SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+ return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scConstant);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+ return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
+ IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+ unsigned SCEVTy, const SCEV *op, Type *ty)
+ : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, Type *ty)
+ : SCEVCastExpr(ID, scTruncate, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, Type *ty)
+ : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, Type *ty)
+ : SCEVCastExpr(ID, scSignExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Release the value.
+ setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Update this SCEVUnknown to point to the new value. This is needed
+ // because there may still be outstanding SCEVs which still point to
+ // this SCEVUnknown.
+ setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getNumOperands() == 2)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+ if (CI->isOne()) {
+ AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType();
+ return true;
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue()) {
+ Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked() &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(1)->isNullValue()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+ if (CI->isOne() &&
+ STy->getNumElements() == 2 &&
+ STy->getElementType(0)->isIntegerTy(1)) {
+ AllocTy = STy->getElementType(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getOperand(1)->isNullValue()) {
+ Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ // Ignore vector types here so that ScalarEvolutionExpander doesn't
+ // emit getelementptrs that index into vectors.
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
+ CTy = Ty;
+ FieldNo = CE->getOperand(2);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+ /// than the complexity of the RHS. This comparator is used to canonicalize
+ /// expressions.
+ class SCEVComplexityCompare {
+ const LoopInfo *const LI;
+ public:
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+ // Return true or false if LHS is less than, or at least RHS, respectively.
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
+
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
+
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (LType) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
+
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
+ }
+
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
+
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ return 0;
+ }
+
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+
+ return 0;
+ }
+
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
+ }
+
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
+ }
+
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
+ }
+ }
+ };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine. In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+ LoopInfo *LI) {
+ if (Ops.size() < 2) return; // Noop
+ if (Ops.size() == 2) {
+ // This is the common case, which also happens to be trivially simple.
+ // Special case it.
+ const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+ if (SCEVComplexityCompare(LI)(RHS, LHS))
+ std::swap(LHS, RHS);
+ return;
+ }
+
+ // Do the rough sort by complexity.
+ std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+ // Now that we are sorted by complexity, group elements of the same
+ // complexity. Note that this is, at worst, N^2, but the vector is likely to
+ // be extremely short in practice. Note that we take this approach because we
+ // do not want to depend on the addresses of the objects we are grouping.
+ for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+ const SCEV *S = Ops[i];
+ unsigned Complexity = S->getSCEVType();
+
+ // If there are any objects of the same complexity and same value as this
+ // one, group them.
+ for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+ if (Ops[j] == S) { // Found a duplicate.
+ // Move it to immediately after i'th element.
+ std::swap(Ops[i+1], Ops[j]);
+ ++i; // no need to rescan it.
+ if (i == e-2) return; // Done!
+ }
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K). The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+ ScalarEvolution &SE,
+ Type *ResultTy) {
+ // Handle the simplest case efficiently.
+ if (K == 1)
+ return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+ // We are using the following formula for BC(It, K):
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+ //
+ // Suppose, W is the bitwidth of the return value. We must be prepared for
+ // overflow. Hence, we must assure that the result of our computation is
+ // equal to the accurate one modulo 2^W. Unfortunately, division isn't
+ // safe in modular arithmetic.
+ //
+ // However, this code doesn't use exactly that formula; the formula it uses
+ // is something like the following, where T is the number of factors of 2 in
+ // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+ // exponentiation:
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+ //
+ // This formula is trivially equivalent to the previous formula. However,
+ // this formula can be implemented much more efficiently. The trick is that
+ // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+ // arithmetic. To do exact division in modular arithmetic, all we have
+ // to do is multiply by the inverse. Therefore, this step can be done at
+ // width W.
+ //
+ // The next issue is how to safely do the division by 2^T. The way this
+ // is done is by doing the multiplication step at a width of at least W + T
+ // bits. This way, the bottom W+T bits of the product are accurate. Then,
+ // when we perform the division by 2^T (which is equivalent to a right shift
+ // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
+ // truncated out after the division by 2^T.
+ //
+ // In comparison to just directly using the first formula, this technique
+ // is much more efficient; using the first formula requires W * K bits,
+ // but this formula less than W + K bits. Also, the first formula requires
+ // a division step, whereas this formula only requires multiplies and shifts.
+ //
+ // It doesn't matter whether the subtraction step is done in the calculation
+ // width or the input iteration count's width; if the subtraction overflows,
+ // the result must be zero anyway. We prefer here to do it in the width of
+ // the induction variable because it helps a lot for certain cases; CodeGen
+ // isn't smart enough to ignore the overflow, which leads to much less
+ // efficient code if the width of the subtraction is wider than the native
+ // register width.
+ //
+ // (It's possible to not widen at all by pulling out factors of 2 before
+ // the multiplication; for example, K=2 can be calculated as
+ // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+ // extra arithmetic, so it's not an obvious win, and it gets
+ // much more complicated for K > 3.)
+
+ // Protection from insane SCEVs; this bound is conservative,
+ // but it probably doesn't matter.
+ if (K > 1000)
+ return SE.getCouldNotCompute();
+
+ unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+ // Calculate K! / 2^T and T; we divide out the factors of two before
+ // multiplying for calculating K! / 2^T to avoid overflow.
+ // Other overflow doesn't matter because we only care about the bottom
+ // W bits of the result.
+ APInt OddFactorial(W, 1);
+ unsigned T = 1;
+ for (unsigned i = 3; i <= K; ++i) {
+ APInt Mult(W, i);
+ unsigned TwoFactors = Mult.countTrailingZeros();
+ T += TwoFactors;
+ Mult = Mult.lshr(TwoFactors);
+ OddFactorial *= Mult;
+ }
+
+ // We need at least W + T bits for the multiplication step
+ unsigned CalculationBits = W + T;
+
+ // Calculate 2^T, at width T+W.
+ APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+ // Calculate the multiplicative inverse of K! / 2^T;
+ // this multiplication factor will perform the exact division by
+ // K! / 2^T.
+ APInt Mod = APInt::getSignedMinValue(W+1);
+ APInt MultiplyFactor = OddFactorial.zext(W+1);
+ MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+ MultiplyFactor = MultiplyFactor.trunc(W);
+
+ // Calculate the product, at width T+W
+ IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+ CalculationBits);
+ const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+ for (unsigned i = 1; i != K; ++i) {
+ const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+ Dividend = SE.getMulExpr(Dividend,
+ SE.getTruncateOrZeroExtend(S, CalculationTy));
+ }
+
+ // Divide by 2^T
+ const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+ // Truncate the result, and divide by K! / 2^T.
+
+ return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+ SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number. We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+ ScalarEvolution &SE) const {
+ const SCEV *Result = getStart();
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ // The computation is correct in the face of overflow provided that the
+ // multiplication is performed _after_ the evaluation of the binomial
+ // coefficient.
+ const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+ if (isa<SCEVCouldNotCompute>(Coeff))
+ return Coeff;
+
+ Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+ }
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+ Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+ "This is not a truncating conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scTruncate);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
+
+ // trunc(trunc(x)) --> trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+ return getTruncateExpr(ST->getOperand(), Ty);
+
+ // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+ // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+ // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getAddExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getMulExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // If the input value is a chrec scev, truncate the chrec's operands.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ }
+
+ // The cast wasn't folded; create an explicit cast node. We can reuse
+ // the existing insert position since if we get here, we won't have
+ // made any changes which would invalidate it.
+ SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+ Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+
+ // zext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scZeroExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // zext(trunc(x)) --> zext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all zero bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getUnsignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+ CR.zextOrTrunc(NewBits)))
+ return getTruncateOrZeroExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can zero extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNUW))
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no unsigned overflow.
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
+ const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+ const SCEV *WideMaxBECount =
+ getZeroExtendExpr(CastedMaxBECount, WideTy);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getZeroExtendExpr(Step, WideTy)));
+ if (ZAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as signed.
+ // This covers loops that count down.
+ OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getSignExtendExpr(Step, WideTy)));
+ if (ZAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ if (isKnownPositive(Step)) {
+ const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+ getUnsignedRange(Step).getUnsignedMax());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ } else if (isKnownNegative(Step)) {
+ const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+ getSignedRange(Step).getSignedMin());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+ Type *Ty,
+ ScalarEvolution *SE) {
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA)
+ return 0;
+
+ // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+ // subtraction is expensive. For this purpose, perform a quick and dirty
+ // difference, by checking for Step in the operand list.
+ SmallVector<const SCEV *, 4> DiffOps;
+ for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
+ I != E; ++I) {
+ if (*I != Step)
+ DiffOps.push_back(*I);
+ }
+ if (DiffOps.size() == SA->getNumOperands())
+ return 0;
+
+ // This is a postinc AR. Check for overflow on the preinc recurrence using the
+ // same three conditions that getSignExtendedExpr checks.
+
+ // 1. NSW flags on the step increment.
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+ SE->getSignExtendExpr(Step, WideTy));
+ if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+ // Cache knowledge of PreAR NSW.
+ if (PreAR)
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+ // FIXME: this optimization needs a unit test
+ DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+ Type *Ty,
+ ScalarEvolution *SE) {
+ const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+ if (!PreStart)
+ return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+ return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+ SE->getSignExtendExpr(PreStart, Ty));
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+ Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+
+ // sext(sext(x)) --> sext(x)
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getSignExtendExpr(SS->getOperand(), Ty);
+
+ // sext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSignExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // If the input value is provably positive, build a zext instead.
+ if (isKnownNonNegative(Op))
+ return getZeroExtendExpr(Op, Ty);
+
+ // sext(trunc(x)) --> sext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all sign bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getSignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+ CR.sextOrTrunc(NewBits)))
+ return getTruncateOrSignExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can sign extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, SCEV::FlagNSW);
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no signed overflow.
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
+ const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+ const SCEV *WideMaxBECount =
+ getZeroExtendExpr(CastedMaxBECount, WideTy);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getSignExtendExpr(Step, WideTy)));
+ if (SAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as unsigned.
+ // This covers loops that count up with an unsigned step.
+ OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getZeroExtendExpr(Step, WideTy)));
+ if (SAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ if (OverflowLimit &&
+ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+ (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+ isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+ OverflowLimit)))) {
+ // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+ Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Sign-extend negative constants.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ if (SC->getValue()->getValue().isNegative())
+ return getSignExtendExpr(Op, Ty);
+
+ // Peel off a truncate cast.
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+ const SCEV *NewOp = T->getOperand();
+ if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+ return getAnyExtendExpr(NewOp, Ty);
+ return getTruncateOrNoop(NewOp, Ty);
+ }
+
+ // Next try a zext cast. If the cast is folded, use it.
+ const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+ if (!isa<SCEVZeroExtendExpr>(ZExt))
+ return ZExt;
+
+ // Next try a sext cast. If the cast is folded, use it.
+ const SCEV *SExt = getSignExtendExpr(Op, Ty);
+ if (!isa<SCEVSignExtendExpr>(SExt))
+ return SExt;
+
+ // Force the cast to be folded into the operands of an addrec.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Ops;
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ Ops.push_back(getAnyExtendExpr(*I, Ty));
+ return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
+ }
+
+ // If the expression is obviously signed, use the sext cast value.
+ if (isa<SCEVSMaxExpr>(Op))
+ return SExt;
+
+ // Absent any other information, use the zext cast value.
+ return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+ SmallVector<const SCEV *, 8> &NewOps,
+ APInt &AccumulatedConstant,
+ const SCEV *const *Ops, size_t NumOperands,
+ const APInt &Scale,
+ ScalarEvolution &SE) {
+ bool Interesting = false;
+
+ // Iterate over the add operands. They are sorted, with constants first.
+ unsigned i = 0;
+ while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ ++i;
+ // Pull a buried constant out to the outside.
+ if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+ Interesting = true;
+ AccumulatedConstant += Scale * C->getValue()->getValue();
+ }
+
+ // Next comes everything else. We're especially interested in multiplies
+ // here, but they're in the middle, so just visit the rest with one loop.
+ for (; i != NumOperands; ++i) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+ if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+ APInt NewScale =
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+ // A multiplication of a constant with another add; recurse.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+ Interesting |=
+ CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Add->op_begin(), Add->getNumOperands(),
+ NewScale, SE);
+ } else {
+ // A multiplication of a constant with some other value. Update
+ // the map.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *Key = SE.getMulExpr(MulOps);
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Key, NewScale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += NewScale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ } else {
+ // An ordinary operand. Update the map.
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Ops[i], Scale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += Scale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ }
+
+ return Interesting;
+}
+
+namespace {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty add!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVAddExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ Ops[0] = getConstant(LHSC->getValue()->getValue() +
+ RHSC->getValue()->getValue());
+ if (Ops.size() == 2) return Ops[0];
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant zero being added, strip it off.
+ if (LHSC->getValue()->isZero()) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Okay, check to see if the same value occurs in the operand list more than
+ // once. If so, merge them together into an multiply expression. Since we
+ // sorted the list, these values are required to be adjacent.
+ Type *Ty = Ops[0]->getType();
+ bool FoundMatch = false;
+ for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+ if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
+ // Scan ahead to count how many equal operands there are.
+ unsigned Count = 2;
+ while (i+Count != e && Ops[i+Count] == Ops[i])
+ ++Count;
+ // Merge the values into a multiply.
+ const SCEV *Scale = getConstant(Ty, Count);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ if (Ops.size() == Count)
+ return Mul;
+ Ops[i] = Mul;
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+ --i; e -= Count - 1;
+ FoundMatch = true;
+ }
+ if (FoundMatch)
+ return getAddExpr(Ops, Flags);
+
+ // Check for truncates. If all the operands are truncated from the same
+ // type, see if factoring out the truncate would permit the result to be
+ // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+ // if the contents of the resulting outer trunc fold to something simple.
+ for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+ Type *DstType = Trunc->getType();
+ Type *SrcType = Trunc->getOperand()->getType();
+ SmallVector<const SCEV *, 8> LargeOps;
+ bool Ok = true;
+ // Check all the operands to see if they can be represented in the
+ // source type of the truncate.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+ SmallVector<const SCEV *, 8> LargeMulOps;
+ for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+ if (const SCEVTruncateExpr *T =
+ dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeMulOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C =
+ dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok)
+ LargeOps.push_back(getMulExpr(LargeMulOps));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok) {
+ // Evaluate the expression in the larger type.
+ const SCEV *Fold = getAddExpr(LargeOps, Flags);
+ // If it folds to something simple, use it. Otherwise, don't.
+ if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+ return getTruncateExpr(Fold, DstType);
+ }
+ }
+
+ // Skip past any other cast SCEVs.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+ ++Idx;
+
+ // If there are add operands they would be next.
+ if (Idx < Ops.size()) {
+ bool DeletedAdd = false;
+ while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+ // If we have an add, expand the add operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Add->op_begin(), Add->op_end());
+ DeletedAdd = true;
+ }
+
+ // If we deleted at least one add, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedAdd)
+ return getAddExpr(Ops);
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // Check to see if there are any folding opportunities present with
+ // operands multiplied by constant values.
+ if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+ uint64_t BitWidth = getTypeSizeInBits(Ty);
+ DenseMap<const SCEV *, APInt> M;
+ SmallVector<const SCEV *, 8> NewOps;
+ APInt AccumulatedConstant(BitWidth, 0);
+ if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Ops.data(), Ops.size(),
+ APInt(BitWidth, 1), *this)) {
+ // Some interesting folding opportunity is present, so its worthwhile to
+ // re-generate the operands list. Group the operands by constant scale,
+ // to avoid multiplying by the same constant scale multiple times.
+ std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+ for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+ E = NewOps.end(); I != E; ++I)
+ MulOpLists[M.find(*I)->second].push_back(*I);
+ // Re-generate the operands list.
+ Ops.clear();
+ if (AccumulatedConstant != 0)
+ Ops.push_back(getConstant(AccumulatedConstant));
+ for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+ I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+ if (I->first != 0)
+ Ops.push_back(getMulExpr(getConstant(I->first),
+ getAddExpr(I->second)));
+ if (Ops.empty())
+ return getConstant(Ty, 0);
+ if (Ops.size() == 1)
+ return Ops[0];
+ return getAddExpr(Ops);
+ }
+ }
+
+ // If we are adding something to a multiply expression, make sure the
+ // something is not already an operand of the multiply. If so, merge it into
+ // the multiply.
+ for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+ const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+ for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+ const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ if (isa<SCEVConstant>(MulOpSCEV))
+ continue;
+ for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+ if (MulOpSCEV == Ops[AddOp]) {
+ // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
+ const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ // If the multiply has more than two operands, we must get the
+ // Y*Z term.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul = getMulExpr(MulOps);
+ }
+ const SCEV *One = getConstant(Ty, 1);
+ const SCEV *AddOne = getAddExpr(One, InnerMul);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+ if (Ops.size() == 2) return OuterMul;
+ if (AddOp < Idx) {
+ Ops.erase(Ops.begin()+AddOp);
+ Ops.erase(Ops.begin()+Idx-1);
+ } else {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+AddOp-1);
+ }
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+
+ // Check this multiply against other multiplies being added together.
+ for (unsigned OtherMulIdx = Idx+1;
+ OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+ ++OtherMulIdx) {
+ const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+ // If MulOp occurs in OtherMul, we can fold the two multiplies
+ // together.
+ for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+ OMulOp != e; ++OMulOp)
+ if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+ // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+ const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul1 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+ if (OtherMul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+ OtherMul->op_begin()+OMulOp);
+ MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+ InnerMul2 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+ const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+ if (Ops.size() == 2) return OuterMul;
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherMulIdx-1);
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+ }
+ }
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this add and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
+ LIOps.push_back(AddRec->getStart());
+
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ AddRecOps[0] = getAddExpr(LIOps);
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer add and the inner addrec are guaranteed to have no overflow.
+ // Always propagate NW.
+ Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, add the folded AddRec by the non-invariant parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getAddExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // added together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ AddRecOps[i] = getAddExpr(AddRecOps[i],
+ OtherAddRec->getOperand(i));
+ }
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ // Step size has changed, so we cannot guarantee no self-wraparound.
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+ return getAddExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an add expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVAddExpr *S =
+ static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+ uint64_t k = i*j;
+ if (j > 1 && k / j != i) Overflow = true;
+ return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient. If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absence of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+ // We use the multiplicative formula:
+ // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+ // At each iteration, we take the n-th term of the numeral and divide by the
+ // (k-n)th term of the denominator. This division will always produce an
+ // integral result, and helps reduce the chance of overflow in the
+ // intermediate computations. However, we can still overflow even when the
+ // final result would fit.
+
+ if (n == 0 || n == k) return 1;
+ if (k > n) return 0;
+
+ if (k > n/2)
+ k = n-k;
+
+ uint64_t r = 1;
+ for (uint64_t i = 1; i <= k; ++i) {
+ r = umul_ov(r, n-(i-1), Overflow);
+ r /= i;
+ }
+ return r;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty mul!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVMulExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+ // C1*(C2+V) -> C1*C2 + C1*V
+ if (Ops.size() == 2)
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ if (Add->getNumOperands() == 2 &&
+ isa<SCEVConstant>(Add->getOperand(0)))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
+
+ ++Idx;
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ LHSC->getValue()->getValue() *
+ RHSC->getValue()->getValue());
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant one being multiplied, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+ // If we have a multiply of zero, it will always be zero.
+ return Ops[0];
+ } else if (Ops[0]->isAllOnesValue()) {
+ // If we have a mul by -1 of an add, try distributing the -1 among the
+ // add operands.
+ if (Ops.size() == 2) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+ SmallVector<const SCEV *, 4> NewOps;
+ bool AnyFolded = false;
+ for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
+ E = Add->op_end(); I != E; ++I) {
+ const SCEV *Mul = getMulExpr(Ops[0], *I);
+ if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+ NewOps.push_back(Mul);
+ }
+ if (AnyFolded)
+ return getAddExpr(NewOps);
+ }
+ else if (const SCEVAddRecExpr *
+ AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ // Negation preserves a recurrence's no self-wrap property.
+ SmallVector<const SCEV *, 4> Operands;
+ for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+ E = AddRec->op_end(); I != E; ++I) {
+ Operands.push_back(getMulExpr(Ops[0], *I));
+ }
+ return getAddRecExpr(Operands, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ }
+ }
+ }
+
+ if (Ops.size() == 1)
+ return Ops[0];
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // If there are mul operands inline them all into this expression.
+ if (Idx < Ops.size()) {
+ bool DeletedMul = false;
+ while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ // If we have an mul, expand the mul operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Mul->op_begin(), Mul->op_end());
+ DeletedMul = true;
+ }
+
+ // If we deleted at least one mul, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedMul)
+ return getMulExpr(Ops);
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this mul and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.reserve(AddRec->getNumOperands());
+ const SCEV *Scale = getMulExpr(LIOps);
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer mul and the inner addrec are guaranteed to have no overflow.
+ //
+ // No self-wrap cannot be guaranteed after changing the step size, but
+ // will be inferred if either NUW or NSW is true.
+ Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, multiply the folded AddRec by the non-invariant parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // multiplied together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx) {
+ if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
+ continue;
+
+ // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+ // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+ // ]]],+,...up to x=2n}.
+ // Note that the arguments to choose() are always integers with values
+ // known at compile time, never SCEV objects.
+ //
+ // The implementation avoids pointless extra computations when the two
+ // addrec's are of different length (mathematically, it's equivalent to
+ // an infinite stream of zeros on the right).
+ bool OpsModified = false;
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx) {
+ const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
+ continue;
+
+ bool Overflow = false;
+ Type *Ty = AddRec->getType();
+ bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+ SmallVector<const SCEV*, 7> AddRecOps;
+ for (int x = 0, xe = AddRec->getNumOperands() +
+ OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+ const SCEV *Term = getConstant(Ty, 0);
+ for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+ uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+ for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+ ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+ z < ze && !Overflow; ++z) {
+ uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+ uint64_t Coeff;
+ if (LargerThan64Bits)
+ Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+ else
+ Coeff = Coeff1*Coeff2;
+ const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+ const SCEV *Term1 = AddRec->getOperand(y-z);
+ const SCEV *Term2 = OtherAddRec->getOperand(z);
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+ }
+ }
+ AddRecOps.push_back(Term);
+ }
+ if (!Overflow) {
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = NewAddRec;
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ OpsModified = true;
+ AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+ if (!AddRec)
+ break;
+ }
+ }
+ if (OpsModified)
+ return getMulExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an mul expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scMulExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVMulExpr *S =
+ static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ assert(getEffectiveSCEVType(LHS->getType()) ==
+ getEffectiveSCEVType(RHS->getType()) &&
+ "SCEVUDivExpr operand types don't match!");
+
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (RHSC->getValue()->equalsInt(1))
+ return LHS; // X udiv 1 --> x
+ // If the denominator is zero, the result of the udiv is undefined. Don't
+ // try to analyze it, because the resolution chosen here may differ from
+ // the resolution chosen in other parts of the compiler.
+ if (!RHSC->getValue()->isZero()) {
+ // Determine if the division can be folded into the operands of
+ // its operands.
+ // TODO: Generalize this to non-constants by using known-bits information.
+ Type *Ty = LHS->getType();
+ unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+ // For non-power-of-two values, effectively round the value up to the
+ // nearest power of two.
+ if (!RHSC->getValue()->getValue().isPowerOf2())
+ ++MaxShiftAmt;
+ IntegerType *ExtTy =
+ IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ const APInt &StepInt = Step->getValue()->getValue();
+ const APInt &DivInt = RHSC->getValue()->getValue();
+ if (!StepInt.urem(DivInt) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+ Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+ return getAddRecExpr(Operands, AR->getLoop(),
+ SCEV::FlagNW);
+ }
+ /// Get a canonical UDivExpr for a recurrence.
+ /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+ // We can currently only fold X%N if X is constant.
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+ if (StartC && !DivInt.urem(StepInt) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ const APInt &StartInt = StartC->getValue()->getValue();
+ const APInt &StartRem = StartInt.urem(StepInt);
+ if (StartRem != 0)
+ LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+ AR->getLoop(), SCEV::FlagNW);
+ }
+ }
+ // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+ // Find an operand that's safely divisible.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = M->getOperand(i);
+ const SCEV *Div = getUDivExpr(Op, RHSC);
+ if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+ Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+ M->op_end());
+ Operands[i] = Div;
+ return getMulExpr(Operands);
+ }
+ }
+ }
+ // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+ Operands.clear();
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+ if (isa<SCEVUDivExpr>(Op) ||
+ getMulExpr(Op, RHS) != A->getOperand(i))
+ break;
+ Operands.push_back(Op);
+ }
+ if (Operands.size() == A->getNumOperands())
+ return getAddExpr(Operands);
+ }
+ }
+
+ // Fold if both operands are constant.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ Constant *LHSCV = LHSC->getValue();
+ Constant *RHSCV = RHSC->getValue();
+ return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+ RHSCV)));
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUDivExpr);
+ ID.AddPointer(LHS);
+ ID.AddPointer(RHS);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+ LHS, RHS);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+ const Loop *L,
+ SCEV::NoWrapFlags Flags) {
+ SmallVector<const SCEV *, 4> Operands;
+ Operands.push_back(Start);
+ if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+ if (StepChrec->getLoop() == L) {
+ Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+ return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
+ }
+
+ Operands.push_back(Step);
+ return getAddRecExpr(Operands, L, Flags);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+ const Loop *L, SCEV::NoWrapFlags Flags) {
+ if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+ Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+ "SCEVAddRecExpr operand types don't match!");
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ assert(isLoopInvariant(Operands[i], L) &&
+ "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+ if (Operands.back()->isZero()) {
+ Operands.pop_back();
+ return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
+ }
+
+ // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // use that information to infer NUW and NSW flags. However, computing a
+ // BE count requires calling getAddRecExpr, so we may not yet have a
+ // meaningful BE count at this point (and if we don't, we'd be stuck
+ // with a SCEVCouldNotCompute as the cached BE count).
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+ E = Operands.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+ if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+ const Loop *NestedLoop = NestedAR->getLoop();
+ if (L->contains(NestedLoop) ?
+ (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+ (!NestedLoop->contains(L) &&
+ DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+ SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+ NestedAR->op_end());
+ Operands[0] = NestedAR->getStart();
+ // AddRecs require their operands be loop-invariant with respect to their
+ // loops. Don't perform this transformation if it would break this
+ // requirement.
+ bool AllInvariant = true;
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ if (!isLoopInvariant(Operands[i], L)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Create a recurrence for the outer loop with the same step size.
+ //
+ // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+ // inner recurrence has the same property.
+ SCEV::NoWrapFlags OuterFlags =
+ maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+ NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
+ AllInvariant = true;
+ for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+ if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Ok, both add recurrences are valid after the transformation.
+ //
+ // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+ // the outer recurrence has the same property.
+ SCEV::NoWrapFlags InnerFlags =
+ maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+ return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+ }
+ }
+ // Reset Operands to its original state.
+ Operands[0] = NestedAR;
+ }
+ }
+
+ // Okay, it looks like we really DO need an addrec expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ ID.AddPointer(Operands[i]);
+ ID.AddPointer(L);
+ void *IP = 0;
+ SCEVAddRecExpr *S =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+ std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+ S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+ O, Operands.size(), L);
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty smax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVSMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::smax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+ // If we have an smax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first SMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is an SMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedSMax = false;
+ while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(SMax->op_begin(), SMax->op_end());
+ DeletedSMax = true;
+ }
+
+ if (DeletedSMax)
+ return getSMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X smax Y smax Y --> X smax Y
+ // X smax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+ // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty umax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVUMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::umax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+ // If we have an umax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first UMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is a UMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedUMax = false;
+ while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(UMax->op_begin(), UMax->op_end());
+ DeletedUMax = true;
+ }
+
+ if (DeletedUMax)
+ return getUMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X umax Y umax Y --> X umax Y
+ // X umax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+ // Okay, it looks like we really DO need a umax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~smax(~x, ~y) == smin(x, y).
+ return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~umax(~x, ~y) == umin(x, y)
+ return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+ // If we have DataLayout, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getTypeAllocSize(AllocTy));
+
+ Constant *C = ConstantExpr::getSizeOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ C = Folded;
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
+ Constant *C = ConstantExpr::getAlignOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ C = Folded;
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+ unsigned FieldNo) {
+ // If we have DataLayout, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
+ Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ C = Folded;
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
+ Constant *FieldNo) {
+ Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ C = Folded;
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+ // Don't attempt to do anything other than create a SCEVUnknown object
+ // here. createSCEV only calls getUnknown after checking for all other
+ // interesting possibilities, and any other code that calls getUnknown
+ // is doing so in order to hide a value from SCEV canonicalization.
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUnknown);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ assert(cast<SCEVUnknown>(S)->getValue() == V &&
+ "Stale SCEVUnknown in uniquing map!");
+ return S;
+ }
+ SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+ FirstUnknown);
+ FirstUnknown = cast<SCEVUnknown>(S);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(Type *Ty) const {
+ // Integers and pointers are always SCEVable.
+ return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ // If we have a DataLayout, use it!
+ if (TD)
+ return TD->getTypeSizeInBits(Ty);
+
+ // Integer types have fixed sizes.
+ if (Ty->isIntegerTy())
+ return Ty->getPrimitiveSizeInBits();
+
+ // The only other support type is pointer. Without DataLayout, conservatively
+ // assume pointers are 64-bit.
+ assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
+ return 64;
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ if (Ty->isIntegerTy())
+ return Ty;
+
+ // The only other support type is pointer.
+ assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+ if (TD) return TD->getIntPtrType(getContext());
+
+ // Without DataLayout, conservatively assume pointers are 64-bit.
+ return Type::getInt64Ty(getContext());
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+ return &CouldNotCompute;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+ ValueExprMapType::const_iterator I = ValueExprMap.find_as(V);
+ if (I != ValueExprMap.end()) return I->second;
+ const SCEV *S = createSCEV(V);
+
+ // The process of creating a SCEV for V may have caused other SCEVs
+ // to have been created, so it's necessary to insert the new entry
+ // from scratch, rather than trying to remember the insert position
+ // above.
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ return S;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+ Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ return getMulExpr(V,
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+ Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ const SCEV *AllOnes =
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+ return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+ SCEV::NoWrapFlags Flags) {
+ assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
+ // Fast path: X - X --> 0.
+ if (LHS == RHS)
+ return getConstant(LHS->getType(), 0);
+
+ // X - Y --> X + -Y
+ return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+ Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or zero extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrZeroExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or sign extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrSignExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or any extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrAnyExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or noop with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+ "getTruncateOrNoop cannot extend!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+ // A pointer operand may evaluate to a nonpointer expression, such as null.
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+ return getPointerBase(Cast->getOperand());
+ }
+ else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ const SCEV *PtrOp = 0;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ if ((*I)->getType()->isPointerTy()) {
+ // Cannot find the base of an expression with multiple pointer operands.
+ if (PtrOp)
+ return V;
+ PtrOp = *I;
+ }
+ }
+ if (!PtrOp)
+ return V;
+ return getPointerBase(PtrOp);
+ }
+ return V;
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist) {
+ // Push the def-use children onto the Worklist stack.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushDefUseChildren(PN, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ Visited.insert(PN);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // Short-circuit the def-use traversal if the symbolic name
+ // ceases to appear in expressions.
+ if (Old != SymName && !hasOperand(Old, SymName))
+ continue;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, it's a PHI that's in the progress of being computed
+ // by createNodeForPHI, or it's a single-value PHI. In the first case,
+ // additional loop trip count information isn't going to change anything.
+ // In the second case, createNodeForPHI will perform the necessary
+ // updates on its own when it gets to that point. In the third, we do
+ // want to forget the SCEVUnknown.
+ if (!isa<PHINode>(I) ||
+ !isa<SCEVUnknown>(Old) ||
+ (I != PN && Old == SymName)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (const Loop *L = LI->getLoopFor(PN->getParent()))
+ if (L->getHeader() == PN->getParent()) {
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = 0, *StartValueV = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = 0;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = 0;
+ break;
+ }
+ }
+ if (BEValueV && StartValueV) {
+ // While we are analyzing this PHI node, handle its value symbolically.
+ const SCEV *SymbolicName = getUnknown(PN);
+ assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+ "PHI node already processed?");
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
+ }
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (isLoopInvariant(Accum, L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+ // If the increment doesn't overflow, then neither the addrec nor
+ // the post-increment will overflow.
+ if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ } else if (const GEPOperator *GEP =
+ dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is an inbounds GEP, then we know the address
+ // space cannot be wrapped around. We cannot make any guarantee
+ // about signed or unsigned overflow because pointers are
+ // unsigned but we may have a negative index from the base
+ // pointer.
+ if (GEP->isInBounds())
+ Flags = setFlags(Flags, SCEV::FlagNW);
+ }
+
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+ // Since the no-wrap flags are on the increment, they apply to the
+ // post-incremented value as well.
+ if (isLoopInvariant(Accum, L))
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+ Accum, L, Flags);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ } else if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(BEValue)) {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ if (AddRec->getLoop() == L && AddRec->isAffine()) {
+ const SCEV *StartVal = getSCEV(StartValueV);
+
+ // If StartVal = j.start - j.stride, we can use StartVal as the
+ // initial step of the addrec evolution.
+ if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+ AddRec->getOperand(1))) {
+ // FIXME: For constant StartVal, we should be able to infer
+ // no-wrap flags.
+ const SCEV *PHISCEV =
+ getAddRecExpr(StartVal, AddRec->getOperand(1), L,
+ SCEV::FlagAnyWrap);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ }
+ }
+ }
+
+ // If the PHI has a single incoming value, follow that value, unless the
+ // PHI's incoming blocks are in a different loop, in which case doing so
+ // risks breaking LCSSA form. Instcombine would normally zap these, but
+ // it doesn't have DominatorTree information, so it may miss cases.
+ if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
+ if (LI->replacementPreservesLCSSAForm(PN, V))
+ return getSCEV(V);
+
+ // If it's not a loop phi, we can't handle it yet.
+ return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+
+ // Don't blindly transfer the inbounds flag from the GEP instruction to the
+ // Add expression, because the Instruction may be guarded by control flow
+ // and the no-overflow bits may not be valid for the expression in any
+ // context.
+ bool isInBounds = GEP->isInBounds();
+
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+ return getUnknown(GEP);
+ const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+ E = GEP->op_end();
+ I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+ // Add the field offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+ } else {
+ // For an array, add the element offset, explicitly scaled.
+ const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *IndexS = getSCEV(Index);
+ // Getelementptr indices are signed.
+ IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+ // Multiply the index by the element size to compute the element offset.
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
+ isInBounds ? SCEV::FlagNSW :
+ SCEV::FlagAnyWrap);
+
+ // Add the element offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ }
+ }
+
+ // Get the SCEV for the GEP base.
+ const SCEV *BaseS = getSCEV(Base);
+
+ // Add the total offset from all the GEP indices to the base.
+ return getAddExpr(BaseS, TotalOffset,
+ isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration). It is, at the same time,
+/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
+/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return C->getValue()->getValue().countTrailingZeros();
+
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+ return std::min(GetMinTrailingZeros(T->getOperand()),
+ (uint32_t)getTypeSizeInBits(T->getType()));
+
+ if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // The result is the sum of all operands results.
+ uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+ uint32_t BitWidth = getTypeSizeInBits(M->getType());
+ for (unsigned i = 1, e = M->getNumOperands();
+ SumOpRes != BitWidth && i != e; ++i)
+ SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+ BitWidth);
+ return SumOpRes;
+ }
+
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Zeros, Ones);
+ return Zeros.countTrailingOnes();
+ }
+
+ // SCEVUDivExpr
+ return 0;
+}
+
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+ if (I != UnsignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum unsigned value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getMinValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getUnsignedRange(Add->getOperand(i)));
+ return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+ return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+ return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+ return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UDiv->getLHS());
+ ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+ return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(ZExt->getOperand());
+ return setUnsignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SExt->getOperand());
+ return setUnsignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Trunc->getOperand());
+ return setUnsignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no unsigned wrap, the value will never be less than its
+ // initial value.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+ if (!C->getValue()->isZero())
+ ConservativeResult =
+ ConservativeResult.intersectWith(
+ ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getUnsignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setUnsignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+ EndRange.getUnsignedMin());
+ APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+ EndRange.getUnsignedMax());
+ if (Min.isMinValue() && Max.isMaxValue())
+ return setUnsignedRange(AddRec, ConservativeResult);
+ return setUnsignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setUnsignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
+ if (Ones == ~Zeros + 1)
+ return setUnsignedRange(U, ConservativeResult);
+ return setUnsignedRange(U,
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
+ }
+
+ return setUnsignedRange(S, ConservativeResult);
+}
+
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+ if (I != SignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum signed value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getSignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getSignedRange(Add->getOperand(i)));
+ return setSignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getSignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getSignedRange(Mul->getOperand(i)));
+ return setSignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getSignedRange(SMax->getOperand(i)));
+ return setSignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getSignedRange(UMax->getOperand(i)));
+ return setSignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getSignedRange(UDiv->getLHS());
+ ConstantRange Y = getSignedRange(UDiv->getRHS());
+ return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(ZExt->getOperand());
+ return setSignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(SExt->getOperand());
+ return setSignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getSignedRange(Trunc->getOperand());
+ return setSignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no signed wrap, and all the operands have the same sign or
+ // zero, the value won't ever change sign.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+ bool AllNonNeg = true;
+ bool AllNonPos = true;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+ if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+ }
+ if (AllNonNeg)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt(BitWidth, 0),
+ APInt::getSignedMinValue(BitWidth)));
+ else if (AllNonPos)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt(BitWidth, 1)));
+ }
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getSignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setSignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+ EndRange.getSignedMin());
+ APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+ EndRange.getSignedMax());
+ if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+ return setSignedRange(AddRec, ConservativeResult);
+ return setSignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setSignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ if (!U->getValue()->getType()->isIntegerTy() && !TD)
+ return setSignedRange(U, ConservativeResult);
+ unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+ if (NS == 1)
+ return setSignedRange(U, ConservativeResult);
+ return setSignedRange(U, ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+ }
+
+ return setSignedRange(S, ConservativeResult);
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+ if (!isSCEVable(V->getType()))
+ return getUnknown(V);
+
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+
+ // Don't attempt to analyze instructions in blocks that aren't
+ // reachable. Such instructions don't matter, and they aren't required
+ // to obey basic rules for definitions dominating uses which this
+ // analysis depends on.
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return getUnknown(V);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ Opcode = CE->getOpcode();
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return getConstant(CI);
+ else if (isa<ConstantPointerNull>(V))
+ return getConstant(V->getType(), 0);
+ else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+ else
+ return getUnknown(V);
+
+ Operator *U = cast<Operator>(V);
+ switch (Opcode) {
+ case Instruction::Add: {
+ // The simple thing to do would be to just call getSCEV on both operands
+ // and call getAddExpr with the result. However if we're looking at a
+ // bunch of things all added together, this can be quite inefficient,
+ // because it leads to N-1 getAddExpr calls for N ultimate operands.
+ // Instead, gather up all the operands and make a single getAddExpr call.
+ // LLVM IR canonical form means we need only traverse the left operands.
+ //
+ // Don't apply this instruction's NSW or NUW flags to the new
+ // expression. The instruction may be guarded by control flow that the
+ // no-wrap behavior depends on. Non-control-equivalent instructions can be
+ // mapped to the same SCEV expression, and it would be incorrect to transfer
+ // NSW/NUW semantics to those operations.
+ SmallVector<const SCEV *, 4> AddOps;
+ AddOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+ unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ break;
+ U = cast<Operator>(Op);
+ const SCEV *Op1 = getSCEV(U->getOperand(1));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getNegativeSCEV(Op1));
+ else
+ AddOps.push_back(Op1);
+ }
+ AddOps.push_back(getSCEV(U->getOperand(0)));
+ return getAddExpr(AddOps);
+ }
+ case Instruction::Mul: {
+ // Don't transfer NSW/NUW for the same reason as AddExpr.
+ SmallVector<const SCEV *, 4> MulOps;
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0);
+ Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+ Op = U->getOperand(0)) {
+ U = cast<Operator>(Op);
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ }
+ MulOps.push_back(getSCEV(U->getOperand(0)));
+ return getMulExpr(MulOps);
+ }
+ case Instruction::UDiv:
+ return getUDivExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::Sub:
+ return getMinusSCEV(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::And:
+ // For an expression like x&255 that merely masks off the high bits,
+ // use zext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ if (CI->isNullValue())
+ return getSCEV(U->getOperand(1));
+ if (CI->isAllOnesValue())
+ return getSCEV(U->getOperand(0));
+ const APInt &A = CI->getValue();
+
+ // Instcombine's ShrinkDemandedConstant may strip bits out of
+ // constants, obscuring what would otherwise be a low-bits mask.
+ // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // knew about to reconstruct a low-bits mask value.
+ unsigned LZ = A.countLeadingZeros();
+ unsigned BitWidth = A.getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
+
+ APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+ if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
+ return
+ getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+ IntegerType::get(getContext(), BitWidth - LZ)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Or:
+ // If the RHS of the Or is a constant, we may have something like:
+ // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
+ // optimizations will transparently handle this case.
+ //
+ // In order for this transformation to be safe, the LHS must be of the
+ // form X*(2^n) and the Or constant must be less than 2^n.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ const APInt &CIVal = CI->getValue();
+ if (GetMinTrailingZeros(LHS) >=
+ (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+ // Build a plain add SCEV.
+ const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+ // If the LHS of the add was an addrec and it has no-wrap flags,
+ // transfer the no-wrap flags, since an or won't introduce a wrap.
+ if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+ const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+ OldAR->getNoWrapFlags());
+ }
+ return S;
+ }
+ }
+ break;
+ case Instruction::Xor:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // If the RHS of the xor is a signbit, then this is just an add.
+ // Instcombine turns add of signbit into xor as a strength reduction step.
+ if (CI->getValue().isSignBit())
+ return getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+
+ // If the RHS of xor is -1, then this is a not operation.
+ if (CI->isAllOnesValue())
+ return getNotSCEV(getSCEV(U->getOperand(0)));
+
+ // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+ // This is a variant of the check for xor with -1, and it handles
+ // the case where instcombine has trimmed non-demanded bits out
+ // of an xor with -1.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+ if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO->getOpcode() == Instruction::And &&
+ LCI->getValue() == CI->getValue())
+ if (const SCEVZeroExtendExpr *Z =
+ dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+ Type *UTy = U->getType();
+ const SCEV *Z0 = Z->getOperand();
+ Type *Z0Ty = Z0->getType();
+ unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+ // If C is a low-bits mask, the zero extend is serving to
+ // mask off the high bits. Complement the operand and
+ // re-apply the zext.
+ if (APIntOps::isMask(Z0TySize, CI->getValue()))
+ return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+ // If C is a single bit, it may be in the sign-bit position
+ // before the zero-extend. In this case, represent the xor
+ // using an add, which is equivalent, and re-apply the zext.
+ APInt Trunc = CI->getValue().trunc(Z0TySize);
+ if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+ Trunc.isSignBit())
+ return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+ UTy);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ // Turn shift left of a constant amount into a multiply.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::LShr:
+ // Turn logical shift right of a constant into a unsigned divide.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::AShr:
+ // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+ if (L->getOpcode() == Instruction::Shl &&
+ L->getOperand(1) == U->getOperand(1)) {
+ uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (CI->getValue().uge(BitWidth))
+ break;
+
+ uint64_t Amt = BitWidth - CI->getZExtValue();
+ if (Amt == BitWidth)
+ return getSCEV(L->getOperand(0)); // shift by zero --> noop
+ return
+ getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+ IntegerType::get(getContext(),
+ Amt)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Trunc:
+ return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::ZExt:
+ return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::SExt:
+ return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::BitCast:
+ // BitCasts are no-op casts so we just eliminate the cast.
+ if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+ return getSCEV(U->getOperand(0));
+ break;
+
+ // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+ // lead to pointer expressions which cannot safely be expanded to GEPs,
+ // because ScalarEvolution doesn't respect the GEP aliasing rules when
+ // simplifying integer expressions.
+
+ case Instruction::GetElementPtr:
+ return createNodeForGEP(cast<GEPOperator>(U));
+
+ case Instruction::PHI:
+ return createNodeForPHI(cast<PHINode>(U));
+
+ case Instruction::Select:
+ // This could be a smax or umax that was lowered earlier.
+ // Try to recover it.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ // a >s b ? a+x : b+x -> smax(a, b)+x
+ // a >s b ? b+x : a+x -> smin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // a >u b ? a+x : b+x -> umax(a, b)+x
+ // a >u b ? b+x : a+x -> umin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n+x : 1+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, One);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, One);
+ const SCEV *RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ default: // We cannot analyze this expression.
+ break;
+ }
+
+ return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Iteration Count Computation Code
+//
+
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::
+getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+ const SCEVConstant *ExitCount =
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+ if (!ExitCount)
+ return 0;
+
+ ConstantInt *ExitConst = ExitCount->getValue();
+
+ // Guard against huge trip counts.
+ if (ExitConst->getValue().getActiveBits() > 32)
+ return 0;
+
+ // In case of integer overflow, this returns 0, which is correct.
+ return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned ScalarEvolution::
+getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
+ const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+ if (ExitCount == getCouldNotCompute())
+ return 1;
+
+ // Get the trip count from the BE count by adding 1.
+ const SCEV *TCMul = getAddExpr(ExitCount,
+ getConstant(ExitCount->getType(), 1));
+ // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+ // to factor simple cases.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+ TCMul = Mul->getOperand(0);
+
+ const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+ if (!MulC)
+ return 1;
+
+ ConstantInt *Result = MulC->getValue();
+
+ // Guard against huge trip counts (this requires checking
+ // for zero to handle the case where the trip count == -1 and the
+ // addition wraps).
+ if (!Result || Result->getValue().getActiveBits() > 32 ||
+ Result->getValue().getActiveBits() == 0)
+ return 1;
+
+ return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).getExact(this);
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).getMax(this);
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+ BasicBlock *Header = L->getHeader();
+
+ // Push all Loop-header PHIs onto the Worklist stack.
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+ // Initially insert an invalid entry for this loop. If the insertion
+ // succeeds, proceed to actually compute a backedge-taken count and
+ // update the value. The temporary CouldNotCompute value tells SCEV
+ // code elsewhere that it shouldn't attempt to request a new
+ // backedge-taken count, which could result in infinite recursion.
+ std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+ // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+ // must be cleared in this scope.
+ BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+
+ if (Result.getExact(this) != getCouldNotCompute()) {
+ assert(isLoopInvariant(Result.getExact(this), L) &&
+ isLoopInvariant(Result.getMax(this), L) &&
+ "Computed backedge-taken count isn't loop invariant for loop!");
+ ++NumTripCountsComputed;
+ }
+ else if (Result.getMax(this) == getCouldNotCompute() &&
+ isa<PHINode>(L->getHeader()->begin())) {
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
+ }
+
+ // Now that we know more about the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they are only
+ // conservative estimates made without the benefit of trip count
+ // information. This is similar to the code in forgetLoop, except that
+ // it handles SCEVUnknown PHI nodes specially.
+ if (Result.hasAnyInfo()) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, or it's a PHI that's in the progress of being computed
+ // by createNodeForPHI. In the former case, additional loop trip
+ // count information isn't going to change anything. In the later
+ // case, createNodeForPHI will perform the necessary updates on its
+ // own when it gets to that point.
+ if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+ }
+
+ // Re-lookup the insert position, since the call to
+ // ComputeBackedgeTakenCount above could result in a
+ // recusive call to getBackedgeTakenInfo (on a different
+ // loop), which would invalidate the iterator computed
+ // earlier.
+ return BackedgeTakenCounts.find(L)->second = Result;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+ // Drop any stored trip count value.
+ DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+ BackedgeTakenCounts.find(L);
+ if (BTCPos != BackedgeTakenCounts.end()) {
+ BTCPos->second.clear();
+ BackedgeTakenCounts.erase(BTCPos);
+ }
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+
+ // Forget all contained loops too, to avoid dangling entries in the
+ // ValuesAtScopes map.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return;
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ Worklist.push_back(I);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. A computable result can only be return for loops with a single exit.
+/// Returning the minimum taken count among all exits is incorrect because one
+/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
+/// the limit of each loop test is never skipped. This is a valid assumption as
+/// long as the loop exits via that test. For precise results, it is the
+/// caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ // If any exits were not computable, the loop is not computable.
+ if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+ // We need exactly one computable exit.
+ if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+ assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+ const SCEV *BECount = 0;
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+ if (!BECount)
+ BECount = ENT->ExactNotTaken;
+ else if (BECount != ENT->ExactNotTaken)
+ return SE->getCouldNotCompute();
+ }
+ assert(BECount && "Invalid not taken count for loop exit");
+ return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const {
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExitingBlock == ExitingBlock)
+ return ENT->ExactNotTaken;
+ }
+ return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+ return Max ? Max : SE->getCouldNotCompute();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+ ScalarEvolution *SE) const {
+ if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+ return true;
+
+ if (!ExitNotTaken.ExitingBlock)
+ return false;
+
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+ && SE->hasOperand(ENT->ExactNotTaken, S)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+ SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+ bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+ if (!Complete)
+ ExitNotTaken.setIncomplete();
+
+ unsigned NumExits = ExitCounts.size();
+ if (NumExits == 0) return;
+
+ ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+ ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+ if (NumExits == 1) return;
+
+ // Handle the rare case of multiple computable exits.
+ ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+ ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+ for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+ PrevENT->setNextExit(ENT);
+ ENT->ExitingBlock = ExitCounts[i].first;
+ ENT->ExactNotTaken = ExitCounts[i].second;
+ }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+ ExitNotTaken.ExitingBlock = 0;
+ ExitNotTaken.ExactNotTaken = 0;
+ delete[] ExitNotTaken.getNextExit();
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Examine all exits and pick the most conservative values.
+ const SCEV *MaxBECount = getCouldNotCompute();
+ bool CouldComputeBECount = true;
+ SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+ if (EL.Exact == getCouldNotCompute())
+ // We couldn't compute an exact value for this exit, so
+ // we won't be able to compute an exact value for the loop.
+ CouldComputeBECount = false;
+ else
+ ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
+
+ if (MaxBECount == getCouldNotCompute())
+ MaxBECount = EL.Max;
+ else if (EL.Max != getCouldNotCompute()) {
+ // We cannot take the "min" MaxBECount, because non-unit stride loops may
+ // skip some loop tests. Taking the max over the exits is sufficiently
+ // conservative. TODO: We could do better taking into consideration
+ // that (1) the loop has unit stride (2) the last loop test is
+ // less-than/greater-than (3) any loop test is less-than/greater-than AND
+ // falls-through some constant times less then the other tests.
+ MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+ }
+ }
+
+ return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+}
+
+/// ComputeExitLimit - Compute the number of times the backedge of the specified
+/// loop will execute if it exits via the specified block.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+
+ // Okay, we've chosen an exiting block. See what condition causes us to
+ // exit at this block.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (ExitBr == 0) return getCouldNotCompute();
+ assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+ // At this point, we know we have a conditional branch that determines whether
+ // the loop is exited. However, we don't know if the branch is executed each
+ // time through the loop. If not, then the execution count of the branch will
+ // not be equal to the trip count of the loop.
+ //
+ // Currently we check for this by checking to see if the Exit branch goes to
+ // the loop header. If so, we know it will always execute the same number of
+ // times as the loop. We also handle the case where the exit block *is* the
+ // loop header. This is common for un-rotated loops.
+ //
+ // If both of those tests fail, walk up the unique predecessor chain to the
+ // header, stopping if there is an edge that doesn't exit the loop. If the
+ // header is reached, the execution count of the branch will be equal to the
+ // trip count of the loop.
+ //
+ // More extensive analysis could be done to handle more cases here.
+ //
+ if (ExitBr->getSuccessor(0) != L->getHeader() &&
+ ExitBr->getSuccessor(1) != L->getHeader() &&
+ ExitBr->getParent() != L->getHeader()) {
+ // The simple checks failed, try climbing the unique predecessor chain
+ // up to the header.
+ bool Ok = false;
+ for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred)
+ return getCouldNotCompute();
+ TerminatorInst *PredTerm = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ if (PredSucc == BB)
+ continue;
+ // If the predecessor has a successor that isn't BB and isn't
+ // outside the loop, assume the worst.
+ if (L->contains(PredSucc))
+ return getCouldNotCompute();
+ }
+ if (Pred == L->getHeader()) {
+ Ok = true;
+ break;
+ }
+ BB = Pred;
+ }
+ if (!Ok)
+ return getCouldNotCompute();
+ }
+
+ // Proceed to the next level to examine the exit condition expression.
+ return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0),
+ ExitBr->getSuccessor(1));
+}
+
+/// ComputeExitLimitFromCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+ // Check if the controlling expression for this loop is an And or Or.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+ if (BO->getOpcode() == Instruction::And) {
+ // Recurse on the operands of the and.
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(TBB)) {
+ // Both conditions must be true for the loop to continue executing.
+ // Choose the less conservative count.
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+ } else {
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(FBB) && "Loop block has no successor in loop!");
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
+ }
+
+ return ExitLimit(BECount, MaxBECount);
+ }
+ if (BO->getOpcode() == Instruction::Or) {
+ // Recurse on the operands of the or.
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(FBB)) {
+ // Both conditions must be false for the loop to continue executing.
+ // Choose the less conservative count.
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+ } else {
+ // Both conditions must be false at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(TBB) && "Loop block has no successor in loop!");
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
+ }
+
+ return ExitLimit(BECount, MaxBECount);
+ }
+ }
+
+ // With an icmp, it may be feasible to compute an exact backedge-taken count.
+ // Proceed to the next level to examine the icmp.
+ if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
+
+ // Check for a constant condition. These are normally stripped out by
+ // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+ // preserve the CFG and is temporarily leaving constant conditions
+ // in place.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+ if (L->contains(FBB) == !CI->getZExtValue())
+ // The backedge is always taken.
+ return getCouldNotCompute();
+ else
+ // The backedge is never taken.
+ return getConstant(CI->getType(), 0);
+ }
+
+ // If it's not an integer or pointer comparison then compute it the hard way.
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeExitLimitFromICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+
+ // If the condition was exit on true, convert the condition to exit on false
+ ICmpInst::Predicate Cond;
+ if (!L->contains(FBB))
+ Cond = ExitCond->getPredicate();
+ else
+ Cond = ExitCond->getInversePredicate();
+
+ // Handle common loops like: for (X = "string"; *X; ++X)
+ if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+ if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+ ExitLimit ItCnt =
+ ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+ if (ItCnt.hasAnyInfo())
+ return ItCnt;
+ }
+
+ const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+ const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+ // Try to evaluate any dependencies out of the loop.
+ LHS = getSCEVAtScope(LHS, L);
+ RHS = getSCEVAtScope(RHS, L);
+
+ // At this point, we would like to compute how many iterations of the
+ // loop the predicate will return true for these inputs.
+ if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+ // If there is a loop-invariant, force it into the RHS.
+ std::swap(LHS, RHS);
+ Cond = ICmpInst::getSwappedPredicate(Cond);
+ }
+
+ // Simplify the operands before analyzing them.
+ (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+ // If we have a comparison of a chrec against a constant, try to use value
+ // ranges to answer this query.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (AddRec->getLoop() == L) {
+ // Form the constant range.
+ ConstantRange CompRange(
+ ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+ const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+ if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+ }
+
+ switch (Cond) {
+ case ICmpInst::ICMP_NE: { // while (X != Y)
+ // Convert to: while (X-Y != 0)
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ case ICmpInst::ICMP_EQ: { // while (X == Y)
+ // Convert to: while (X-Y == 0)
+ ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, true);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ case ICmpInst::ICMP_ULT: {
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, false);
+ if (EL.hasAnyInfo()) return EL;
+ break;
+ }
+ default:
+#if 0
+ dbgs() << "ComputeBackedgeTakenCount ";
+ if (ExitCond->getOperand(0)->getType()->isUnsigned())
+ dbgs() << "[unsigned] ";
+ dbgs() << *LHS << " "
+ << Instruction::getOpcodeName(Instruction::ICmp)
+ << " " << *RHS << "\n";
+#endif
+ break;
+ }
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+ ScalarEvolution &SE) {
+ const SCEV *InVal = SE.getConstant(C);
+ const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+ assert(isa<SCEVConstant>(Val) &&
+ "Evaluation of SCEV at constant didn't fold correctly?");
+ return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ LoadInst *LI,
+ Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+
+ if (LI->isVolatile()) return getCouldNotCompute();
+
+ // Check to see if the loaded pointer is a getelementptr of a global.
+ // TODO: Use SCEV instead of manually grubbing with GEPs.
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+ if (!GEP) return getCouldNotCompute();
+
+ // Make sure that it is really a constant global we are gepping, with an
+ // initializer, and make sure the first IDX is really 0.
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+ !cast<Constant>(GEP->getOperand(1))->isNullValue())
+ return getCouldNotCompute();
+
+ // Okay, we allow one non-constant index into the GEP instruction.
+ Value *VarIdx = 0;
+ std::vector<Constant*> Indexes;
+ unsigned VarIdxNum = 0;
+ for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ Indexes.push_back(CI);
+ } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+ if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
+ VarIdx = GEP->getOperand(i);
+ VarIdxNum = i-2;
+ Indexes.push_back(0);
+ }
+
+ // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+ if (!VarIdx)
+ return getCouldNotCompute();
+
+ // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+ // Check to see if X is a loop variant variable value now.
+ const SCEV *Idx = getSCEV(VarIdx);
+ Idx = getSCEVAtScope(Idx, L);
+
+ // We can only recognize very limited forms of loop index expressions, in
+ // particular, only affine AddRec's like {C1,+,C2}.
+ const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+ if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+ return getCouldNotCompute();
+
+ unsigned MaxSteps = MaxBruteForceIterations;
+ for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+ ConstantInt *ItCst = ConstantInt::get(
+ cast<IntegerType>(IdxExpr->getType()), IterationNum);
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+ // Form the GEP offset.
+ Indexes[VarIdxNum] = Val;
+
+ Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+ Indexes);
+ if (Result == 0) break; // Cannot compute!
+
+ // Evaluate the condition for this iteration.
+ Result = ConstantExpr::getICmp(predicate, Result, RHS);
+ if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
+ if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+ dbgs() << "\n***\n*** Computed loop count " << *ItCst
+ << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+ << "***\n";
+#endif
+ ++NumArrayLenItCounts;
+ return getConstant(ItCst); // Found terminating iteration!
+ }
+ }
+ return getCouldNotCompute();
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+ if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<LoadInst>(I))
+ return true;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ return canConstantFoldCallTo(F);
+ return false;
+}
+
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+ // An instruction outside of the loop can't be derived from a loop PHI.
+ if (!L->contains(I)) return false;
+
+ if (isa<PHINode>(I)) {
+ if (L->getHeader() == I->getParent())
+ return true;
+ else
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return false;
+ }
+
+ // If we won't be able to constant fold this expression even if the operands
+ // are constants, bail early.
+ return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+ DenseMap<Instruction *, PHINode *> &PHIMap) {
+
+ // Otherwise, we can evaluate this instruction if all of its operands are
+ // constant or derived from a PHI node themselves.
+ PHINode *PHI = 0;
+ for (Instruction::op_iterator OpI = UseInst->op_begin(),
+ OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+
+ if (isa<Constant>(*OpI)) continue;
+
+ Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+ if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+
+ PHINode *P = dyn_cast<PHINode>(OpInst);
+ if (!P)
+ // If this operand is already visited, reuse the prior result.
+ // We may have P != PHI if this is the deepest point at which the
+ // inconsistent paths meet.
+ P = PHIMap.lookup(OpInst);
+ if (!P) {
+ // Recurse and memoize the results, whether a phi is found or not.
+ // This recursive call invalidates pointers into PHIMap.
+ P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+ PHIMap[OpInst] = P;
+ }
+ if (P == 0) return 0; // Not evolving from PHI
+ if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs.
+ PHI = P;
+ }
+ // This is a expression evolving from a constant PHI!
+ return PHI;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !canConstantEvolve(I, L)) return 0;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ return PN;
+ }
+
+ // Record non-constant instructions contained by the loop.
+ DenseMap<Instruction *, PHINode *> PHIMap;
+ return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal. If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+ DenseMap<Instruction *, Constant *> &Vals,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // Convenient constant check, but redundant for recursive calls.
+ if (Constant *C = dyn_cast<Constant>(V)) return C;
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0;
+
+ if (Constant *C = Vals.lookup(I)) return C;
+
+ // An instruction inside the loop depends on a value outside the loop that we
+ // weren't given a mapping for, or a value such as a call inside the loop.
+ if (!canConstantEvolve(I, L)) return 0;
+
+ // An unmapped PHI can be due to a branch or another loop inside this loop,
+ // or due to this not being the initial iteration through a loop where we
+ // couldn't compute the evolution of this particular PHI last time.
+ if (isa<PHINode>(I)) return 0;
+
+ std::vector<Constant*> Operands(I->getNumOperands());
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+ if (!Operand) {
+ Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+ if (!Operands[i]) return 0;
+ continue;
+ }
+ Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
+ Vals[Operand] = C;
+ if (!C) return 0;
+ Operands[i] = C;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], TD, TLI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+ }
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+ TLI);
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+ const APInt &BEs,
+ const Loop *L) {
+ DenseMap<PHINode*, Constant*>::const_iterator I =
+ ConstantEvolutionLoopExitValue.find(PN);
+ if (I != ConstantEvolutionLoopExitValue.end())
+ return I->second;
+
+ if (BEs.ugt(MaxBruteForceIterations))
+ return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+
+ Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
+ // Since the loop is canonicalized, the PHI node must have two entries. One
+ // entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ PHINode *PHI = 0;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return RetVal = 0;
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+
+ // Execute the loop symbolically to determine the exit value.
+ if (BEs.getActiveBits() >= 32)
+ return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+ unsigned NumIterations = BEs.getZExtValue(); // must be in range
+ unsigned IterationNum = 0;
+ for (; ; ++IterationNum) {
+ if (IterationNum == NumIterations)
+ return RetVal = CurrentIterVals[PN]; // Got exit value!
+
+ // Compute the value of the PHIs for the next iteration.
+ // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+ DenseMap<Instruction *, Constant *> NextIterVals;
+ Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+ TLI);
+ if (NextPHI == 0)
+ return 0; // Couldn't evaluate!
+ NextIterVals[PN] = NextPHI;
+
+ bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+ // Also evaluate the other PHI nodes. However, we don't get to stop if we
+ // cease to be able to evaluate one of them or if they stop evolving,
+ // because that doesn't necessarily prevent us from computing PN.
+ SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+ }
+ // We use two distinct loops because EvaluateExpression may invalidate any
+ // iterators into CurrentIterVals.
+ for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
+ I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = I->first;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (!NextPHI) { // Not already computed.
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+ }
+ if (NextPHI != I->second)
+ StoppedEvolving = false;
+ }
+
+ // If all entries in CurrentIterVals == NextIterVals then we can stop
+ // iterating, the loop can't continue to change.
+ if (StoppedEvolving)
+ return RetVal = CurrentIterVals[PN];
+
+ CurrentIterVals.swap(NextIterVals);
+ }
+}
+
+/// ComputeExitCountExhaustively - If the loop is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false). If we cannot
+/// evaluate the trip count of the loop, return getCouldNotCompute().
+const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
+ PHINode *PN = getConstantEvolvingPHI(Cond, L);
+ if (PN == 0) return getCouldNotCompute();
+
+ // If the loop is canonicalized, the PHI will have exactly two entries.
+ // That's the only form we support here.
+ if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
+ // One entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ PHINode *PHI = 0;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return getCouldNotCompute();
+
+ // Okay, we find a PHI node that defines the trip count of this loop. Execute
+ // the loop symbolically to determine when the condition gets a value of
+ // "ExitWhen".
+
+ unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
+ ConstantInt *CondVal =
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+ TD, TLI));
+
+ // Couldn't symbolically evaluate.
+ if (!CondVal) return getCouldNotCompute();
+
+ if (CondVal->getValue() == uint64_t(ExitWhen)) {
+ ++NumBruteForceTripCountsComputed;
+ return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+ }
+
+ // Update all the PHI nodes for the next iteration.
+ DenseMap<Instruction *, Constant *> NextIterVals;
+
+ // Create a list of which PHIs we need to compute. We want to do this before
+ // calling EvaluateExpression on them because that may invalidate iterators
+ // into CurrentIterVals.
+ SmallVector<PHINode *, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(PHI);
+ }
+ for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
+ E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = *I;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (NextPHI) continue; // Already computed!
+
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+ }
+ CurrentIterVals.swap(NextIterVals);
+ }
+
+ // Too many iterations were needed to evaluate.
+ return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program. The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ // Check to see if we've folded this expression at this loop before.
+ std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+ std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+ if (!Pair.second)
+ return Pair.first->second ? Pair.first->second : V;
+
+ // Otherwise compute it.
+ const SCEV *C = computeSCEVAtScope(V, L);
+ ValuesAtScopes[V][L] = C;
+ return C;
+}
+
+/// This builds up a Constant using the ConstantExpr interface. That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+ switch (V->getSCEVType()) {
+ default: // TODO: smax, umax.
+ case scCouldNotCompute:
+ case scAddRecExpr:
+ break;
+ case scConstant:
+ return cast<SCEVConstant>(V)->getValue();
+ case scUnknown:
+ return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+ return ConstantExpr::getSExt(CastOp, SS->getType());
+ break;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+ return ConstantExpr::getZExt(CastOp, SZ->getType());
+ break;
+ }
+ case scTruncate: {
+ const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+ return ConstantExpr::getTrunc(CastOp, ST->getType());
+ break;
+ }
+ case scAddExpr: {
+ const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+ if (C->getType()->isPointerTy())
+ C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+ for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+ if (!C2) return 0;
+
+ // First pointer!
+ if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ std::swap(C, C2);
+ // The offsets have been converted to bytes. We can add bytes to an
+ // i8* by GEP with the byte count in the first index.
+ C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+ }
+
+ // Don't bother trying to sum two pointers. We probably can't
+ // statically compute a load that results from it anyway.
+ if (C2->getType()->isPointerTy())
+ return 0;
+
+ if (C->getType()->isPointerTy()) {
+ if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+ C2 = ConstantExpr::getIntegerCast(
+ C2, Type::getInt32Ty(C->getContext()), true);
+ C = ConstantExpr::getGetElementPtr(C, C2);
+ } else
+ C = ConstantExpr::getAdd(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scMulExpr: {
+ const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+ // Don't bother with pointers at all.
+ if (C->getType()->isPointerTy()) return 0;
+ for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+ if (!C2 || C2->getType()->isPointerTy()) return 0;
+ C = ConstantExpr::getMul(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+ if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+ if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+ if (LHS->getType() == RHS->getType())
+ return ConstantExpr::getUDiv(LHS, RHS);
+ break;
+ }
+ }
+ return 0;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+ if (isa<SCEVConstant>(V)) return V;
+
+ // If this instruction is evolved from a constant-evolving PHI, compute the
+ // exit value from the loop without using SCEVs.
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+ if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+ const Loop *LI = (*this->LI)[I->getParent()];
+ if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ if (const SCEVConstant *BTCC =
+ dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV = getConstantEvolutionLoopExitValue(PN,
+ BTCC->getValue()->getValue(),
+ LI);
+ if (RV) return getSCEV(RV);
+ }
+ }
+
+ // Okay, this is an expression that we cannot symbolically evaluate
+ // into a SCEV. Check to see if it's possible to symbolically evaluate
+ // the arguments into constants, and if so, try to constant propagate the
+ // result. This is particularly useful for computing loop exit values.
+ if (CanConstantFold(I)) {
+ SmallVector<Constant *, 4> Operands;
+ bool MadeImprovement = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *Op = I->getOperand(i);
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ Operands.push_back(C);
+ continue;
+ }
+
+ // If any of the operands is non-constant and if they are
+ // non-integer and non-pointer, don't even try to analyze them
+ // with scev techniques.
+ if (!isSCEVable(Op->getType()))
+ return V;
+
+ const SCEV *OrigV = getSCEV(Op);
+ const SCEV *OpV = getSCEVAtScope(OrigV, L);
+ MadeImprovement |= OrigV != OpV;
+
+ Constant *C = BuildConstantFromSCEV(OpV);
+ if (!C) return V;
+ if (C->getType() != Op->getType())
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
+ }
+
+ // Check to see if getSCEVAtScope actually made an improvement.
+ if (MadeImprovement) {
+ Constant *C = 0;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+ Operands[0], Operands[1], TD,
+ TLI);
+ else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+ } else
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ Operands, TD, TLI);
+ if (!C) return V;
+ return getSCEV(C);
+ }
+ }
+ }
+
+ // This is some other type of SCEVUnknown, just return it.
+ return V;
+ }
+
+ if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ if (OpAtScope != Comm->getOperand(i)) {
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+ Comm->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+
+ for (++i; i != e; ++i) {
+ OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ NewOps.push_back(OpAtScope);
+ }
+ if (isa<SCEVAddExpr>(Comm))
+ return getAddExpr(NewOps);
+ if (isa<SCEVMulExpr>(Comm))
+ return getMulExpr(NewOps);
+ if (isa<SCEVSMaxExpr>(Comm))
+ return getSMaxExpr(NewOps);
+ if (isa<SCEVUMaxExpr>(Comm))
+ return getUMaxExpr(NewOps);
+ llvm_unreachable("Unknown commutative SCEV type!");
+ }
+ }
+ // If we got here, all operands are loop invariant.
+ return Comm;
+ }
+
+ if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+ const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+ const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+ if (LHS == Div->getLHS() && RHS == Div->getRHS())
+ return Div; // must be loop invariant
+ return getUDivExpr(LHS, RHS);
+ }
+
+ // If this is a loop recurrence for a loop that does not contain L, then we
+ // are dealing with the final value computed by the loop.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ // First, attempt to evaluate each operand.
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+ if (OpAtScope == AddRec->getOperand(i))
+ continue;
+
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+ AddRec->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+ const SCEV *FoldedRec =
+ getAddRecExpr(NewOps, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+ // The addrec may be folded to a nonrecurrence, for example, if the
+ // induction variable is multiplied by zero after constant folding. Go
+ // ahead and return the folded value.
+ if (!AddRec)
+ return FoldedRec;
+ break;
+ }
+
+ // If the scope is outside the addrec's loop, evaluate it by using the
+ // loop exit value of the addrec.
+ if (!AddRec->getLoop()->contains(L)) {
+ // To evaluate this recurrence, we need to know how many times the AddRec
+ // loop iterates. Compute this now.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+ if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+ // Then, evaluate the AddRec.
+ return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+ }
+
+ return AddRec;
+ }
+
+ if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getZeroExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getSignExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getTruncateExpr(Op, Cast->getType());
+ }
+
+ llvm_unreachable("Unknown SCEV type!");
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+ return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+/// A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+ ScalarEvolution &SE) {
+ uint32_t BW = A.getBitWidth();
+ assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+ assert(A != 0 && "A must be non-zero.");
+
+ // 1. D = gcd(A, N)
+ //
+ // The gcd of A and N may have only one prime factor: 2. The number of
+ // trailing zeros in A is its multiplicity
+ uint32_t Mult2 = A.countTrailingZeros();
+ // D = 2^Mult2
+
+ // 2. Check if B is divisible by D.
+ //
+ // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+ // is not less than multiplicity of this prime factor for D.
+ if (B.countTrailingZeros() < Mult2)
+ return SE.getCouldNotCompute();
+
+ // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+ // modulo (N / D).
+ //
+ // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
+ // bit width during computations.
+ APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
+ APInt Mod(BW + 1, 0);
+ Mod.setBit(BW - Mult2); // Mod = N / D
+ APInt I = AD.multiplicativeInverse(Mod);
+
+ // 4. Compute the minimum unsigned root of the equation:
+ // I * (B / D) mod (N / D)
+ APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+ // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+ // bits.
+ return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+ const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+ const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+ const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+ // We currently can only solve this if the coefficients are constants.
+ if (!LC || !MC || !NC) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+ const APInt &L = LC->getValue()->getValue();
+ const APInt &M = MC->getValue()->getValue();
+ const APInt &N = NC->getValue()->getValue();
+ APInt Two(BitWidth, 2);
+ APInt Four(BitWidth, 4);
+
+ {
+ using namespace APIntOps;
+ const APInt& C = L;
+ // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ // The B coefficient is M-N/2
+ APInt B(M);
+ B -= sdiv(N,Two);
+
+ // The A coefficient is N/2
+ APInt A(N.sdiv(Two));
+
+ // Compute the B^2-4ac term.
+ APInt SqrtTerm(B);
+ SqrtTerm *= B;
+ SqrtTerm -= Four * (A * C);
+
+ if (SqrtTerm.isNegative()) {
+ // The loop is provably infinite.
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+ // integer value or else APInt::sqrt() will assert.
+ APInt SqrtVal(SqrtTerm.sqrt());
+
+ // Compute the two solutions for the quadratic formula.
+ // The divisions must be performed as signed divisions.
+ APInt NegB(-B);
+ APInt TwoA(A << 1);
+ if (TwoA.isMinValue()) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ LLVMContext &Context = SE.getContext();
+
+ ConstantInt *Solution1 =
+ ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution2 =
+ ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+ return std::make_pair(SE.getConstant(Solution1),
+ SE.getConstant(Solution2));
+ } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute. If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0. We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ // If the value is a constant
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ // If the value is already zero, the branch will execute zero times.
+ if (C->getValue()->isZero()) return C;
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+ // the quadratic equation to solve it.
+ if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(AddRec, *this);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1 && R2) {
+#if 0
+ dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
+ << " sol#2: " << *R2 << "\n";
+#endif
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+ R1->getValue(),
+ R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+ if (Val->isZero())
+ return R1; // We found a quadratic root!
+ }
+ }
+ return getCouldNotCompute();
+ }
+
+ // Otherwise we can only handle this if it is affine.
+ if (!AddRec->isAffine())
+ return getCouldNotCompute();
+
+ // If this is an affine expression, the execution count of this branch is
+ // the minimum unsigned root of the following equation:
+ //
+ // Start + Step*N = 0 (mod 2^BW)
+ //
+ // equivalent to:
+ //
+ // Step*N = -Start (mod 2^BW)
+ //
+ // where BW is the common bit width of Start and Step.
+
+ // Get the initial value for the loop.
+ const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+ const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+ // For now we handle only constant steps.
+ //
+ // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+ // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+ // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+ // We have not yet seen any such cases.
+ const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+ if (StepC == 0 || StepC->getValue()->equalsInt(0))
+ return getCouldNotCompute();
+
+ // For positive steps (counting up until unsigned overflow):
+ // N = -Start/Step (as unsigned)
+ // For negative steps (counting down to zero):
+ // N = Start/-Step
+ // First compute the unsigned distance from zero in the direction of Step.
+ bool CountDown = StepC->getValue()->getValue().isNegative();
+ const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+ // Handle unitary steps, which cannot wraparound.
+ // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+ // N = Distance (as unsigned)
+ if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+ ConstantRange CR = getUnsignedRange(Start);
+ const SCEV *MaxBECount;
+ if (!CountDown && CR.getUnsignedMin().isMinValue())
+ // When counting up, the worst starting value is 1, not 0.
+ MaxBECount = CR.getUnsignedMax().isMinValue()
+ ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+ : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+ else
+ MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+ : -CR.getUnsignedMin());
+ return ExitLimit(Distance, MaxBECount);
+ }
+
+ // If the recurrence is known not to wraparound, unsigned divide computes the
+ // back edge count. We know that the value will either become zero (and thus
+ // the loop terminates), that the loop will terminate through some other exit
+ // condition first, or that the loop has undefined behavior. This means
+ // we can't "miss" the exit value, even with nonunit stride.
+ //
+ // FIXME: Prove that loops always exhibits *acceptable* undefined
+ // behavior. Loops must exhibit defined behavior until a wrapped value is
+ // actually used. So the trip count computed by udiv could be smaller than the
+ // number of well-defined iterations.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+ // FIXME: We really want an "isexact" bit for udiv.
+ return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ }
+ // Then, try to solve the above equation provided that Start is constant.
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+ return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+ -StartC->getValue()->getValue(),
+ *this);
+ return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute. If not computable, return
+/// CouldNotCompute
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ // Loops that look like: while (X == 0) are very strange indeed. We don't
+ // handle them yet except for the trivial case. This could be expanded in the
+ // future as needed.
+
+ // If the value is a constant, check to see if it is known to be non-zero
+ // already. If so, the backedge will execute zero times.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ if (!C->getValue()->isNullValue())
+ return getConstant(C->getType(), 0);
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ // We could implement others, but I really doubt anyone writes loops like
+ // this, and if they did, they would already be constant folded.
+ return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+ // If the block has a unique predecessor, then there is no path from the
+ // predecessor to the block that does not go through the direct edge
+ // from the predecessor to the block.
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ return std::make_pair(Pred, BB);
+
+ // A loop's header is defined to be a block that dominates the loop.
+ // If the header has a unique predecessor outside the loop, it must be
+ // a block that has exactly one successor that can reach the loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+ return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+ // Quick check to see if they are the same SCEV.
+ if (A == B) return true;
+
+ // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+ // two different instructions with the same value. Check for this case.
+ if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+ if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+ if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+ if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+ if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+ return true;
+
+ // Otherwise assume they may have a different value.
+ return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+ const SCEV *&LHS, const SCEV *&RHS,
+ unsigned Depth) {
+ bool Changed = false;
+
+ // If we hit the max recursion limit bail out.
+ if (Depth >= 3)
+ return false;
+
+ // Canonicalize a constant to the right side.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ // Check for both operands constant.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (ConstantExpr::getICmp(Pred,
+ LHSC->getValue(),
+ RHSC->getValue())->isNullValue())
+ goto trivially_false;
+ else
+ goto trivially_true;
+ }
+ // Otherwise swap the operands to put the constant on the right.
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+
+ // If we're comparing an addrec with a value which is loop-invariant in the
+ // addrec's loop, put the addrec on the left. Also make a dominance check,
+ // as both operands could be addrecs loop-invariant in each other's loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+ const Loop *L = AR->getLoop();
+ if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+ }
+
+ // If there's a constant operand, canonicalize comparisons with boundary
+ // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+ if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+ const APInt &RA = RC->getValue()->getValue();
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+ if (!RA)
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+ if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+ if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+ ME->getOperand(0)->isAllOnesValue()) {
+ RHS = AE->getOperand(1);
+ LHS = ME->getOperand(1);
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_UGE:
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_UGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_ULE:
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_ULT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SGE:
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SLE:
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SLT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_false;
+ break;
+ }
+ }
+
+ // Check for obvious equality.
+ if (HasSameValue(LHS, RHS)) {
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ goto trivially_true;
+ if (ICmpInst::isFalseWhenEqual(Pred))
+ goto trivially_false;
+ }
+
+ // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+ // adding or subtracting 1 from one of the operands.
+ switch (Pred) {
+ case ICmpInst::ICMP_SLE:
+ if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // TODO: More simplifications are possible here.
+
+ // Recursively simplify until we either hit a recursion limit or nothing
+ // changes.
+ if (Changed)
+ return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+
+ return Changed;
+
+trivially_true:
+ // Return 0 == 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_EQ;
+ return true;
+
+trivially_false:
+ // Return 0 != 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_NE;
+ return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+ return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+ return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+ return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+ return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+ return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Canonicalize the inputs first.
+ (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+ // If LHS or RHS is an addrec, check to see if the condition is true in
+ // every iteration of the loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, AR->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+ return true;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getStart()) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+ return true;
+
+ // Otherwise see what can be done with known constant ranges.
+ return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ if (HasSameValue(LHS, RHS))
+ return ICmpInst::isTrueWhenEqual(Pred);
+
+ // This code is split out from isKnownPredicate because it is called from
+ // within isLoopEntryGuardedByCond.
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_SGT:
+ Pred = ICmpInst::ICMP_SLT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLT: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_SGE:
+ Pred = ICmpInst::ICMP_SLE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLE: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ Pred = ICmpInst::ICMP_ULT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULT: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGE:
+ Pred = ICmpInst::ICMP_ULE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULE: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+ return true;
+ if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+ return true;
+
+ const SCEV *Diff = getMinusSCEV(LHS, RHS);
+ if (isKnownNonZero(Diff))
+ return true;
+ break;
+ }
+ case ICmpInst::ICMP_EQ:
+ // The check at the top of the function catches the case where
+ // the values are known to be equal.
+ break;
+ }
+ return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS. This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return true;
+
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+
+ BranchInst *LoopContinuePredicate =
+ dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LoopContinuePredicate ||
+ LoopContinuePredicate->isUnconditional())
+ return false;
+
+ return isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS. This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return false;
+
+ // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // as there are predecessors that can be found that have unique successors
+ // leading to the original header.
+ for (std::pair<BasicBlock *, BasicBlock *>
+ Pair(L->getLoopPredecessor(), L->getHeader());
+ Pair.first;
+ Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+ BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Pair.first->getTerminator());
+ if (!LoopEntryPredicate ||
+ LoopEntryPredicate->isUnconditional())
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS,
+ LoopEntryPredicate->getCondition(),
+ LoopEntryPredicate->getSuccessor(0) != Pair.second))
+ return true;
+ }
+
+ return false;
+}
+
+/// RAII wrapper to prevent recursive application of isImpliedCond.
+/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
+/// currently evaluating isImpliedCond.
+struct MarkPendingLoopPredicate {
+ Value *Cond;
+ DenseSet<Value*> &LoopPreds;
+ bool Pending;
+
+ MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
+ : Cond(C), LoopPreds(LP) {
+ Pending = !LoopPreds.insert(Cond).second;
+ }
+ ~MarkPendingLoopPredicate() {
+ if (!Pending)
+ LoopPreds.erase(Cond);
+ }
+};
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
+ bool Inverse) {
+ MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
+ if (Mark.Pending)
+ return false;
+
+ // Recursively handle And and Or conditions.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+ if (BO->getOpcode() == Instruction::And) {
+ if (!Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ } else if (BO->getOpcode() == Instruction::Or) {
+ if (Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ }
+ }
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+ if (!ICI) return false;
+
+ // Bail if the ICmp's operands' types are wider than the needed type
+ // before attempting to call getSCEV on them. This avoids infinite
+ // recursion, since the analysis of widening casts can require loop
+ // exit condition information for overflow checking, which would
+ // lead back here.
+ if (getTypeSizeInBits(LHS->getType()) <
+ getTypeSizeInBits(ICI->getOperand(0)->getType()))
+ return false;
+
+ // Now that we found a conditional branch that dominates the loop or controls
+ // the loop latch. Check to see if it is the comparison we are looking for.
+ ICmpInst::Predicate FoundPred;
+ if (Inverse)
+ FoundPred = ICI->getInversePredicate();
+ else
+ FoundPred = ICI->getPredicate();
+
+ const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+ const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+ // Balance the types. The case where FoundLHS' type is wider than
+ // LHS' type is checked for above.
+ if (getTypeSizeInBits(LHS->getType()) >
+ getTypeSizeInBits(FoundLHS->getType())) {
+ if (CmpInst::isSigned(Pred)) {
+ FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+ } else {
+ FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+ }
+ }
+
+ // Canonicalize the query to match the way instcombine will have
+ // canonicalized the comparison.
+ if (SimplifyICmpOperands(Pred, LHS, RHS))
+ if (LHS == RHS)
+ return CmpInst::isTrueWhenEqual(Pred);
+ if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+ if (FoundLHS == FoundRHS)
+ return CmpInst::isFalseWhenEqual(FoundPred);
+
+ // Check to see if we can make the LHS or RHS match.
+ if (LHS == FoundRHS || RHS == FoundLHS) {
+ if (isa<SCEVConstant>(RHS)) {
+ std::swap(FoundLHS, FoundRHS);
+ FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+ } else {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ }
+
+ // Check whether the found predicate is the same as the desired predicate.
+ if (FoundPred == Pred)
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+ // Check whether swapping the found predicate makes it the same as the
+ // desired predicate.
+ if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+ if (isa<SCEVConstant>(RHS))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+ else
+ return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+ RHS, LHS, FoundLHS, FoundRHS);
+ }
+
+ // Check whether the actual condition is beyond sufficient.
+ if (FoundPred == ICmpInst::ICMP_EQ)
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+ if (Pred == ICmpInst::ICMP_NE)
+ if (!ICmpInst::isTrueWhenEqual(FoundPred))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
+ // Otherwise assume the worst.
+ return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ FoundLHS, FoundRHS) ||
+ // ~x < ~y --> x > y
+ isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ getNotSCEV(FoundRHS),
+ getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+ const SCEV *End,
+ const SCEV *Step,
+ bool NoWrap) {
+ assert(!isKnownNegative(Step) &&
+ "This code doesn't handle negative strides yet!");
+
+ Type *Ty = Start->getType();
+
+ // When Start == End, we have an exact BECount == 0. Short-circuit this case
+ // here because SCEV may not be able to determine that the unsigned division
+ // after rounding is zero.
+ if (Start == End)
+ return getConstant(Ty, 0);
+
+ const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
+ const SCEV *Diff = getMinusSCEV(End, Start);
+ const SCEV *RoundUp = getAddExpr(Step, NegOne);
+
+ // Add an adjustment to the difference between End and Start so that
+ // the division will effectively round up.
+ const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+ if (!NoWrap) {
+ // Check Add for unsigned overflow.
+ // TODO: More sophisticated things could be done here.
+ Type *WideTy = IntegerType::get(getContext(),
+ getTypeSizeInBits(Ty) + 1);
+ const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+ const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+ const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+ if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+ return getCouldNotCompute();
+ }
+
+ return getUDivExpr(Add, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute. If not computable, return
+/// CouldNotCompute.
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool isSigned) {
+ // Only handle: "ADDREC < LoopInvariant".
+ if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // Check to see if we have a flag which makes analysis easy.
+ bool NoWrap = isSigned ?
+ AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
+ AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
+
+ if (AddRec->isAffine()) {
+ unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ if (Step->isZero())
+ return getCouldNotCompute();
+ if (Step->isOne()) {
+ // With unit stride, the iteration never steps past the limit value.
+ } else if (isKnownPositive(Step)) {
+ // Test whether a positive iteration can step past the limit
+ // value and past the maximum value for its type in a single step.
+ // Note that it's not sufficient to check NoWrap here, because even
+ // though the value after a wrap is undefined, it's not undefined
+ // behavior, so if wrap does occur, the loop could either terminate or
+ // loop infinitely, but in either case, the loop is guaranteed to
+ // iterate at least until the iteration where the wrapping occurs.
+ const SCEV *One = getConstant(Step->getType(), 1);
+ if (isSigned) {
+ APInt Max = APInt::getSignedMaxValue(BitWidth);
+ if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+ .slt(getSignedRange(RHS).getSignedMax()))
+ return getCouldNotCompute();
+ } else {
+ APInt Max = APInt::getMaxValue(BitWidth);
+ if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+ .ult(getUnsignedRange(RHS).getUnsignedMax()))
+ return getCouldNotCompute();
+ }
+ } else
+ // TODO: Handle negative strides here and below.
+ return getCouldNotCompute();
+
+ // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+ // m. So, we count the number of iterations in which {n,+,s} < m is true.
+ // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+ // treat m-n as signed nor unsigned due to overflow possibility.
+
+ // First, we get the value of the LHS in the first iteration: n
+ const SCEV *Start = AddRec->getOperand(0);
+
+ // Determine the minimum constant start value.
+ const SCEV *MinStart = getConstant(isSigned ?
+ getSignedRange(Start).getSignedMin() :
+ getUnsignedRange(Start).getUnsignedMin());
+
+ // If we know that the condition is true in order to enter the loop,
+ // then we know that it will run exactly (m-n)/s times. Otherwise, we
+ // only know that it will execute (max(m,n)-n)/s times. In both cases,
+ // the division must round up.
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L,
+ isSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT,
+ getMinusSCEV(Start, Step), RHS))
+ End = isSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ // Determine the maximum constant end value.
+ const SCEV *MaxEnd = getConstant(isSigned ?
+ getSignedRange(End).getSignedMax() :
+ getUnsignedRange(End).getUnsignedMax());
+
+ // If MaxEnd is within a step of the maximum integer value in its type,
+ // adjust it down to the minimum value which would produce the same effect.
+ // This allows the subsequent ceiling division of (N+(step-1))/step to
+ // compute the correct value.
+ const SCEV *StepMinusOne = getMinusSCEV(Step,
+ getConstant(Step->getType(), 1));
+ MaxEnd = isSigned ?
+ getSMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+ StepMinusOne)) :
+ getUMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+ StepMinusOne));
+
+ // Finally, we subtract these two values and divide, rounding up, to get
+ // the number of times the backedge is executed.
+ const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
+
+ // The maximum backedge count is similar, except using the minimum start
+ // value and the maximum end value.
+ // If we already have an exact constant BECount, use it instead.
+ const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
+ : getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+ // If the stride is nonconstant, and NoWrap == true, then
+ // getBECount(MinStart, MaxEnd) may not compute. This would result in an
+ // exact BECount and invalid MaxBECount, which should be avoided to catch
+ // more optimization opportunities.
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
+ }
+
+ return getCouldNotCompute();
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range. Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+ ScalarEvolution &SE) const {
+ if (Range.isFullSet()) // Infinite loop.
+ return SE.getCouldNotCompute();
+
+ // If the start is a non-zero constant, shift the range to simplify things.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+ if (!SC->getValue()->isZero()) {
+ SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+ Operands[0] = SE.getConstant(SC->getType(), 0);
+ const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+ getNoWrapFlags(FlagNW));
+ if (const SCEVAddRecExpr *ShiftedAddRec =
+ dyn_cast<SCEVAddRecExpr>(Shifted))
+ return ShiftedAddRec->getNumIterationsInRange(
+ Range.subtract(SC->getValue()->getValue()), SE);
+ // This is strange and shouldn't happen.
+ return SE.getCouldNotCompute();
+ }
+
+ // The only time we can solve this is when we have all constant indices.
+ // Otherwise, we cannot determine the overflow conditions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<SCEVConstant>(getOperand(i)))
+ return SE.getCouldNotCompute();
+
+
+ // Okay at this point we know that all elements of the chrec are constants and
+ // that the start element is zero.
+
+ // First check to see if the range contains zero. If not, the first
+ // iteration exits.
+ unsigned BitWidth = SE.getTypeSizeInBits(getType());
+ if (!Range.contains(APInt(BitWidth, 0)))
+ return SE.getConstant(getType(), 0);
+
+ if (isAffine()) {
+ // If this is an affine expression then we have this situation:
+ // Solve {0,+,A} in Range === Ax in Range
+
+ // We know that zero is in the range. If A is positive then we know that
+ // the upper value of the range must be the first possible exit value.
+ // If A is negative then the lower of the range is the last possible loop
+ // value. Also note that we already checked for a full range.
+ APInt One(BitWidth,1);
+ APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+ // The exit value should be (End+A)/A.
+ APInt ExitVal = (End + A).udiv(A);
+ ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+ // Evaluate at the exit value. If we really did fall out of the valid
+ // range, then we computed our trip count, otherwise wrap around or other
+ // things must have happened.
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+ if (Range.contains(Val->getValue()))
+ return SE.getCouldNotCompute(); // Something strange happened
+
+ // Ensure that the previous value is in the range. This is a sanity check.
+ assert(Range.contains(
+ EvaluateConstantChrecAtConstant(this,
+ ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+ "Linear scev computation is off in a bad way!");
+ return SE.getConstant(ExitValue);
+ } else if (isQuadratic()) {
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+ // quadratic equation to solve it. To do this, we must frame our problem in
+ // terms of figuring out when zero is crossed, instead of when
+ // Range.getUpper() is crossed.
+ SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+ NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+ const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+ // getNoWrapFlags(FlagNW)
+ FlagAnyWrap);
+
+ // Next, solve the constructed addrec
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1) {
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+ R1->getValue(), R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // Make sure the root is not off by one. The returned iteration should
+ // not be in the range, but the previous one should be. When solving
+ // for "X*X < 5", for example, we should not return a root of 2.
+ ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+ R1->getValue(),
+ SE);
+ if (Range.contains(R1Val->getValue())) {
+ // The next iteration must be out of the range...
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (!Range.contains(R1Val->getValue()))
+ return SE.getConstant(NextVal);
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+
+ // If R1 was not in the range, then it is a good return value. Make
+ // sure that R1-1 WAS in the range though, just in case.
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (Range.contains(R1Val->getValue()))
+ return R1;
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+ }
+ }
+
+ return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+ if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(getValPtr());
+ // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+ // Forget all the expressions associated with users of the old value,
+ // so that future queries will recompute the expressions using the new
+ // value.
+ Value *Old = getValPtr();
+ SmallVector<User *, 16> Worklist;
+ SmallPtrSet<User *, 8> Visited;
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
+ // Deleting the Old value will cause this to dangle. Postpone
+ // that until everything else is done.
+ if (U == Old)
+ continue;
+ if (!Visited.insert(U))
+ continue;
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(U);
+ for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ }
+ // Delete the Old value.
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(Old);
+ // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+ : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+// ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+ : FunctionPass(ID), FirstUnknown(0) {
+ initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+ this->F = &F;
+ LI = &getAnalysis<LoopInfo>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+ // Iterate through all the SCEVUnknown instances and call their
+ // destructors, so that they release their references to their values.
+ for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+ U->~SCEVUnknown();
+ FirstUnknown = 0;
+
+ ValueExprMap.clear();
+
+ // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+ // that a loop had multiple computable exits.
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
+ I != E; ++I) {
+ I->second.clear();
+ }
+
+ assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+
+ BackedgeTakenCounts.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
+ UniqueSCEVs.clear();
+ SCEVAllocator.Reset();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+ return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+ const Loop *L) {
+ // Print all inner loops first
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ PrintLoopInfo(OS, SE, *I);
+
+ OS << "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ OS << "<multiple exits> ";
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable backedge-taken count. ";
+ }
+
+ OS << "\n"
+ "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+ OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable max backedge-taken count. ";
+ }
+
+ OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+ // ScalarEvolution's implementation of the print method is to print
+ // out SCEV values of all instructions that are interesting. Doing
+ // this potentially causes it to create new SCEV objects though,
+ // which technically conflicts with the const qualifier. This isn't
+ // observable from outside the class though, so casting away the
+ // const isn't dangerous.
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ OS << "Classifying expressions for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+ OS << *I << '\n';
+ OS << " --> ";
+ const SCEV *SV = SE.getSCEV(&*I);
+ SV->print(OS);
+
+ const Loop *L = LI->getLoopFor((*I).getParent());
+
+ const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+ if (AtUse != SV) {
+ OS << " --> ";
+ AtUse->print(OS);
+ }
+
+ if (L) {
+ OS << "\t\t" "Exits: ";
+ const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+ if (!SE.isLoopInvariant(ExitValue, L)) {
+ OS << "<<Unknown>>";
+ } else {
+ OS << *ExitValue;
+ }
+ }
+
+ OS << "\n";
+ }
+
+ OS << "Determining loop execution counts for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+ std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+ std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, LoopVariant));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ LoopDisposition D = computeLoopDisposition(S, L);
+ return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return LoopInvariant;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+ // If L is the addrec's loop, it's computable.
+ if (AR->getLoop() == L)
+ return LoopComputable;
+
+ // Add recurrences are never invariant in the function-body (null loop).
+ if (!L)
+ return LoopVariant;
+
+ // This recurrence is variant w.r.t. L if L contains AR's loop.
+ if (L->contains(AR->getLoop()))
+ return LoopVariant;
+
+ // This recurrence is invariant w.r.t. L if AR's loop contains L.
+ if (AR->getLoop()->contains(L))
+ return LoopInvariant;
+
+ // This recurrence is variant w.r.t. L if any of its operands
+ // are variant.
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ if (!isLoopInvariant(*I, L))
+ return LoopVariant;
+
+ // Otherwise it's loop-invariant.
+ return LoopInvariant;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool HasVarying = false;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ LoopDisposition D = getLoopDisposition(*I, L);
+ if (D == LoopVariant)
+ return LoopVariant;
+ if (D == LoopComputable)
+ HasVarying = true;
+ }
+ return HasVarying ? LoopComputable : LoopInvariant;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+ if (LD == LoopVariant)
+ return LoopVariant;
+ LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+ if (RD == LoopVariant)
+ return LoopVariant;
+ return (LD == LoopInvariant && RD == LoopInvariant) ?
+ LoopInvariant : LoopComputable;
+ }
+ case scUnknown:
+ // All non-instruction values are loop invariant. All instructions are loop
+ // invariant if they are not contained in the specified loop.
+ // Instructions are never considered invariant in the function body
+ // (null loop) because they are defined within the "loop".
+ if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+ return LoopInvariant;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ default: llvm_unreachable("Unknown SCEV kind!");
+ }
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+ std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+ Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BlockDisposition D = computeBlockDisposition(S, BB);
+ return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return ProperlyDominatesBlock;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+ case scAddRecExpr: {
+ // This uses a "dominates" query instead of "properly dominates" query
+ // to test for proper dominance too, because the instruction which
+ // produces the addrec's value is a PHI, and a PHI effectively properly
+ // dominates its entire containing block.
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+ if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ return DoesNotDominateBlock;
+ }
+ // FALL THROUGH into SCEVNAryExpr handling.
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool Proper = true;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ BlockDisposition D = getBlockDisposition(*I, BB);
+ if (D == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ if (D == DominatesBlock)
+ Proper = false;
+ }
+ return Proper ? ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ BlockDisposition LD = getBlockDisposition(LHS, BB);
+ if (LD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ BlockDisposition RD = getBlockDisposition(RHS, BB);
+ if (RD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+ ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUnknown:
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+ if (I->getParent() == BB)
+ return DominatesBlock;
+ if (DT->properlyDominates(I->getParent(), BB))
+ return ProperlyDominatesBlock;
+ return DoesNotDominateBlock;
+ }
+ return ProperlyDominatesBlock;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
+ }
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+namespace {
+// Search for a SCEV expression node within an expression tree.
+// Implements SCEVTraversal::Visitor.
+struct SCEVSearch {
+ const SCEV *Node;
+ bool IsFound;
+
+ SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+
+ bool follow(const SCEV *S) {
+ IsFound |= (S == Node);
+ return !IsFound;
+ }
+ bool isDone() const { return IsFound; }
+};
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ SCEVSearch Search(Op);
+ visitAll(S, Search);
+ return Search.IsFound;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+ BackedgeTakenInfo &BEInfo = I->second;
+ if (BEInfo.hasOperand(S, this)) {
+ BEInfo.clear();
+ BackedgeTakenCounts.erase(I++);
+ }
+ else
+ ++I;
+ }
+}
+
+typedef DenseMap<const Loop *, std::string> VerifyMap;
+
+/// replaceSubString - Replaces all occurences of From in Str with To.
+static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
+ size_t Pos = 0;
+ while ((Pos = Str.find(From, Pos)) != std::string::npos) {
+ Str.replace(Pos, From.size(), To.data(), To.size());
+ Pos += To.size();
+ }
+}
+
+/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
+static void
+getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
+ for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
+ getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
+
+ std::string &S = Map[L];
+ if (S.empty()) {
+ raw_string_ostream OS(S);
+ SE.getBackedgeTakenCount(L)->print(OS);
+
+ // false and 0 are semantically equivalent. This can happen in dead loops.
+ replaceSubString(OS.str(), "false", "0");
+ // Remove wrap flags, their use in SCEV is highly fragile.
+ // FIXME: Remove this when SCEV gets smarter about them.
+ replaceSubString(OS.str(), "<nw>", "");
+ replaceSubString(OS.str(), "<nsw>", "");
+ replaceSubString(OS.str(), "<nuw>", "");
+ }
+ }
+}
+
+void ScalarEvolution::verifyAnalysis() const {
+ if (!VerifySCEV)
+ return;
+
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ // Gather stringified backedge taken counts for all loops using SCEV's caches.
+ // FIXME: It would be much better to store actual values instead of strings,
+ // but SCEV pointers will change if we drop the caches.
+ VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
+ for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+
+ // Gather stringified backedge taken counts for all loops without using
+ // SCEV's caches.
+ SE.releaseMemory();
+ for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+
+ // Now compare whether they're the same with and without caches. This allows
+ // verifying that no pass changed the cache.
+ assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
+ "New loops suddenly appeared!");
+
+ for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
+ OldE = BackedgeDumpsOld.end(),
+ NewI = BackedgeDumpsNew.begin();
+ OldI != OldE; ++OldI, ++NewI) {
+ assert(OldI->first == NewI->first && "Loop order changed!");
+
+ // Compare the stringified SCEVs. We don't care if undef backedgetaken count
+ // changes.
+ // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
+ // means that a pass is buggy or SCEV has to learn a new pattern but is
+ // usually not harmful.
+ if (OldI->second != NewI->second &&
+ OldI->second.find("undef") == std::string::npos &&
+ NewI->second.find("undef") == std::string::npos &&
+ OldI->second != "***COULDNOTCOMPUTE***" &&
+ NewI->second != "***COULDNOTCOMPUTE***") {
+ dbgs() << "SCEVValidator: SCEV for loop '"
+ << OldI->first->getHeader()->getName()
+ << "' changed from '" << OldI->second
+ << "' to '" << NewI->second << "'!\n";
+ std::abort();
+ }
+ }
+
+ // TODO: Verify more things.
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 000000000000..79c5f0deb03b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,173 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses ScalarEvolution to answer queries.
+ class ScalarEvolutionAliasAnalysis : public FunctionPass,
+ public AliasAnalysis {
+ ScalarEvolution *SE;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+ initializeScalarEvolutionAliasAnalysisPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+ Value *GetBaseValue(const SCEV *S);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+ return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<ScalarEvolution>();
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+ InitializeAliasAnalysis(this);
+ SE = &getAnalysis<ScalarEvolution>();
+ return false;
+}
+
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // In an addrec, assume that the base will be in the start, rather
+ // than the step.
+ return GetBaseValue(AR->getStart());
+ } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // If there's a pointer operand, it'll be sorted at the end of the list.
+ const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+ if (Last->getType()->isPointerTy())
+ return GetBaseValue(Last);
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // This is a leaf node.
+ return U->getValue();
+ }
+ // No Identified object found.
+ return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are. This allows the code below to ignore this special
+ // case.
+ if (LocA.Size == 0 || LocB.Size == 0)
+ return NoAlias;
+
+ // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+ const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+
+ // If they evaluate to the same expression, it's a MustAlias.
+ if (AS == BS) return MustAlias;
+
+ // If something is known about the difference between the two addresses,
+ // see if it's enough to prove a NoAlias.
+ if (SE->getEffectiveSCEVType(AS->getType()) ==
+ SE->getEffectiveSCEVType(BS->getType())) {
+ unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+ APInt ASizeInt(BitWidth, LocA.Size);
+ APInt BSizeInt(BitWidth, LocB.Size);
+
+ // Compute the difference between the two pointers.
+ const SCEV *BA = SE->getMinusSCEV(BS, AS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ return NoAlias;
+
+ // Folding the subtraction while preserving range information can be tricky
+ // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+ // and try again to see if things fold better that way.
+
+ // Compute the difference between the two pointers.
+ const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ return NoAlias;
+ }
+
+ // If ScalarEvolution can find an underlying object, form a new query.
+ // The correctness of this depends on ScalarEvolution not recognizing
+ // inttoptr and ptrtoint operators.
+ Value *AO = GetBaseValue(AS);
+ Value *BO = GetBaseValue(BS);
+ if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+ if (alias(Location(AO ? AO : LocA.Ptr,
+ AO ? +UnknownSize : LocA.Size,
+ AO ? 0 : LocA.TBAATag),
+ Location(BO ? BO : LocB.Ptr,
+ BO ? +UnknownSize : LocB.Size,
+ BO ? 0 : LocB.TBAATag)) == NoAlias)
+ return NoAlias;
+
+ // Forward the query to the next analysis.
+ return AliasAnalysis::alias(LocA, LocB);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 000000000000..fcd7ce272a22
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,1753 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // This function must be called with the builder having a valid insertion
+ // point. It doesn't need to be the actual IP where the uses of the returned
+ // cast will be added, but it must dominate such IP.
+ // We use this precondition to produce a cast that will dominate all its
+ // uses. In particular, this is crucial for the case where the builder's
+ // insertion point *is* the point where we were asked to put the cast.
+ // Since we don't know the builder's insertion point is actually
+ // where the uses will be added (only that it dominates it), we are
+ // not allowed to move it.
+ BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+ Instruction *Ret = NULL;
+
+ // Check to see if there is already a cast!
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (U->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(U))
+ if (CI->getOpcode() == Op) {
+ // If the cast isn't where we want it, create a new cast at IP.
+ // Likewise, do not reuse a cast at BIP because it must dominate
+ // instructions that might be inserted before BIP.
+ if (BasicBlock::iterator(CI) != IP || BIP == IP) {
+ // Create a new cast, and leave the old cast in place in case
+ // it is being used as an insert point. Clear its operand
+ // so that it doesn't hold anything live.
+ Ret = CastInst::Create(Op, V, Ty, "", IP);
+ Ret->takeName(CI);
+ CI->replaceAllUsesWith(Ret);
+ CI->setOperand(0, UndefValue::get(V->getType()));
+ break;
+ }
+ Ret = CI;
+ break;
+ }
+ }
+
+ // Create a new cast.
+ if (!Ret)
+ Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+
+ // We assert at the end of the function since IP might point to an
+ // instruction with different dominance properties than a cast
+ // (an invoke for example) and not dominate BIP (but the cast does).
+ assert(SE.DT->dominates(Ret, BIP));
+
+ rememberInstruction(Ret);
+ return Ret;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+
+ // Short-circuit unnecessary bitcasts.
+ if (Op == Instruction::BitCast) {
+ if (V->getType() == Ty)
+ return V;
+ if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->getOperand(0)->getType() == Ty)
+ return CI->getOperand(0);
+ }
+ }
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // Fold a cast of a constant.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Op, C, Ty);
+
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP) ||
+ isa<LandingPadInst>(IP))
+ ++IP;
+ return ReuseOrCreateCast(A, Ty, Op, IP);
+ }
+
+ // Cast the instruction immediately after the instruction.
+ Instruction *I = cast<Instruction>(V);
+ BasicBlock::iterator IP = I; ++IP;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
+ ++IP;
+ return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // If we haven't found this binop, insert it.
+ Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
+ BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+ rememberInstruction(BO);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S,
+ const SCEV *&Remainder,
+ const SCEV *Factor,
+ ScalarEvolution &SE,
+ const DataLayout *TD) {
+ // Everything is divisible by one.
+ if (Factor->isOne())
+ return true;
+
+ // x/x == 1.
+ if (S == Factor) {
+ S = SE.getConstant(S->getType(), 1);
+ return true;
+ }
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ // 0/x == 0.
+ if (C->isZero())
+ return true;
+ // Check for divisibility.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+ ConstantInt *CI =
+ ConstantInt::get(SE.getContext(),
+ C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (!CI->isZero()) {
+ const SCEV *Div = SE.getConstant(CI);
+ S = Div;
+ Remainder =
+ SE.getAddExpr(Remainder,
+ SE.getConstant(C->getValue()->getValue().srem(
+ FC->getValue()->getValue())));
+ return true;
+ }
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ if (TD) {
+ // With DataLayout, the size is known. Check if there is a constant
+ // operand which is a multiple of the given factor. If so, we can
+ // factor it.
+ const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] =
+ SE.getConstant(C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ } else {
+ // Without DataLayout, check if Factor can be factored out of any of the
+ // Mul's operands. If so, we can just remove it.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *SOp = M->getOperand(i);
+ const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
+ if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+ Remainder->isZero()) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[i] = SOp;
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ }
+ }
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = A->getStepRecurrence(SE);
+ const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+ if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ const SCEV *Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+ return false;
+ // FIXME: can use A->getNoWrapFlags(FlagNW)
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ unsigned NumAddRecs = 0;
+ for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+ ++NumAddRecs;
+ // Group Ops into non-addrecs and addrecs.
+ SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+ SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+ // Let ScalarEvolution sort and simplify the non-addrecs list.
+ const SCEV *Sum = NoAddRecs.empty() ?
+ SE.getConstant(Ty, 0) :
+ SE.getAddExpr(NoAddRecs);
+ // If it returned an add, use the operands. Otherwise it simplified
+ // the sum into a single value, so just use that.
+ Ops.clear();
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+ Ops.append(Add->op_begin(), Add->op_end());
+ else if (!Sum->isZero())
+ Ops.push_back(Sum);
+ // Then append the addrecs.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ // Find the addrecs.
+ SmallVector<const SCEV *, 8> AddRecs;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+ const SCEV *Start = A->getStart();
+ if (Start->isZero()) break;
+ const SCEV *Zero = SE.getConstant(Ty, 0);
+ AddRecs.push_back(SE.getAddRecExpr(Zero,
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+ Ops[i] = Zero;
+ Ops.append(Add->op_begin(), Add->op_end());
+ e += Add->getNumOperands();
+ } else {
+ Ops[i] = Start;
+ }
+ }
+ if (!AddRecs.empty()) {
+ // Add the addrecs onto the end of the list.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+ // Resort the operand list, moving any constants to the front.
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+ const SCEV *const *op_end,
+ PointerType *PTy,
+ Type *Ty,
+ Value *V) {
+ Type *ElTy = PTy->getElementType();
+ SmallVector<Value *, 4> GepIndices;
+ SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ SplitAddRecs(Ops, Ty, SE);
+
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ const SCEV *Op = Ops[i];
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of operands
+ // we'll scan next iteration.
+ NewOps.push_back(Ops[i]);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+ }
+ }
+
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled = ScaledOps.empty() ?
+ Constant::getNullValue(Ty) :
+ expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ if (SE.TD) {
+ // With DataLayout, field offsets are known. See if a constant offset
+ // falls within any of the struct fields.
+ if (Ops.empty()) break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *SE.TD->getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
+ }
+ } else {
+ // Without DataLayout, just check for an offsetof expression of the
+ // appropriate struct type.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+ Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+ GepIndices.push_back(FieldNo);
+ ElTy =
+ STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
+ Ops[i] = SE.getConstant(Ty, 0);
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ break;
+ }
+ }
+ }
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+ }
+ }
+
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ break;
+ }
+
+ // If none of the operands were convertible to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ // Cast the base to i8*.
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+ assert(!isa<Instruction>(V) ||
+ SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+
+ // Expand the operands for a plain byte offset.
+ Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Emit a GEP.
+ Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return GEP;
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = false;
+ for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+ E = GepIndices.end(); I != E; ++I)
+ if (!L->isLoopInvariant(*I)) {
+ AnyIndexNotLoopInvariant = true;
+ break;
+ }
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+ // because ScalarEvolution may have changed the address arithmetic to
+ // compute a value which is beyond the end of the allocated object.
+ Value *Casted = V;
+ if (V->getType() != PTy)
+ Casted = InsertNoopCastOfTo(Casted, PTy);
+ Value *GEP = Builder.CreateGEP(Casted,
+ GepIndices,
+ "scevgep");
+ Ops.push_back(SE.getUnknown(GEP));
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return expand(SE.getAddExpr(Ops));
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+ RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ if (isa<SCEVConstant>(S))
+ // A constant has no relevant loops.
+ return 0;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
+ return 0;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = 0;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+ I != E; ++I)
+ L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ return RelevantLoops[N] = L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+ const Loop *Result = getRelevantLoop(C->getOperand());
+ return RelevantLoops[C] = Result;
+ }
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const Loop *Result =
+ PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+ getRelevantLoop(D->getRHS()),
+ *SE.DT);
+ return RelevantLoops[D] = Result;
+ }
+ llvm_unreachable("Unexpected SCEV type!");
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (LHS.second->isNonConstantNegative()) {
+ if (!RHS.second->isNonConstantNegative())
+ return false;
+ } else if (RHS.second->isNonConstantNegative())
+ return true;
+
+ // Otherwise they are equivalent according to this comparison.
+ return false;
+ }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr. If the running sum is instructions,
+ // use a SCEVUnknown to avoid re-analyzing them.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+ SE.getSCEV(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (Op->isNonConstantNegative()) {
+ // Instead of doing a negate and add, just do a subtract.
+ Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W);
+ ++I;
+ } else {
+ // A simple add.
+ Value *W = expandCodeFor(Op, Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W);
+ ++I;
+ }
+ }
+
+ return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const SCEV *Op = I->second;
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = expand(Op);
+ ++I;
+ } else if (Op->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = expandCodeFor(Op, Ty);
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W);
+ ++I;
+ }
+ }
+
+ return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getValue()->getValue();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()));
+ }
+
+ Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+ ScalarEvolution &SE) {
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+ Base = A->getStart();
+ Rest = SE.getAddExpr(Rest,
+ SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+ Base = A->getOperand(A->getNumOperands()-1);
+ SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ NewAddOps.back() = Rest;
+ Rest = SE.getAddExpr(NewAddOps);
+ ExposePointerBase(Base, Rest, SE);
+ }
+}
+
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+ return false;
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ if (L == IVIncInsertLoop) {
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos))
+ return false;
+ }
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ if (IncV != PN)
+ return true;
+
+ return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+ Instruction *InsertPos,
+ bool allowScale) {
+ if (IncV == InsertPos)
+ return NULL;
+
+ switch (IncV->getOpcode()) {
+ default:
+ return NULL;
+ // Check for a simple Add/Sub or GEP of a loop invariant step.
+ case Instruction::Add:
+ case Instruction::Sub: {
+ Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+ if (!OInst || SE.DT->dominates(OInst, InsertPos))
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ return NULL;
+ }
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ case Instruction::GetElementPtr:
+ for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
+ I != E; ++I) {
+ if (isa<Constant>(*I))
+ continue;
+ if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+ if (!SE.DT->dominates(OInst, InsertPos))
+ return NULL;
+ }
+ if (allowScale) {
+ // allow any kind of GEP as long as it can be hoisted.
+ continue;
+ }
+ // This must be a pointer addition of constants (pretty), which is already
+ // handled, or some number of address-size elements (ugly). Ugly geps
+ // have 2 operands. i1* is used by the expander to represent an
+ // address-size element.
+ if (IncV->getNumOperands() != 2)
+ return NULL;
+ unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+ if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+ && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+ return NULL;
+ break;
+ }
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ }
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
+ if (SE.DT->dominates(IncV, InsertPos))
+ return true;
+
+ // InsertPos must itself dominate IncV so that IncV's new position satisfies
+ // its existing users.
+ if (isa<PHINode>(InsertPos)
+ || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ // Check that the chain of IV operands leading back to Phi can be hoisted.
+ SmallVector<Instruction*, 4> IVIncs;
+ for(;;) {
+ Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+ if (!Oper)
+ return false;
+ // IncV is safe to hoist.
+ IVIncs.push_back(IncV);
+ IncV = Oper;
+ if (SE.DT->dominates(IncV, InsertPos))
+ break;
+ }
+ for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
+ E = IVIncs.rend(); I != E; ++I) {
+ (*I)->moveBefore(InsertPos);
+ }
+ return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ for(Instruction *IVOper = IncV;
+ (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+ /*allowScale=*/false));) {
+ if (IVOper == PN)
+ return true;
+ }
+ return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+ Type *ExpandTy, Type *IntTy,
+ bool useSubtract) {
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (ExpandTy->isPointerTy()) {
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+ IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = useSubtract ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ rememberInstruction(IncV);
+ }
+ return IncV;
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L,
+ Type *ExpandTy,
+ Type *IntTy) {
+ assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+ // Reuse a previously-inserted PHI, if present.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()) ||
+ (SE.getEffectiveSCEVType(PN->getType()) !=
+ SE.getEffectiveSCEVType(Normalized->getType())) ||
+ SE.getSCEV(PN) != Normalized)
+ continue;
+
+ Instruction *IncV =
+ cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+ if (LSRMode) {
+ if (!isExpandedAddRecExprPHI(PN, IncV, L))
+ continue;
+ if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+ continue;
+ }
+ else {
+ if (!isNormalAddRecExprPHI(PN, IncV, L))
+ continue;
+ if (L == IVIncInsertLoop)
+ do {
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
+ } while (IncV != PN);
+ }
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+ // Remember the increment.
+ rememberInstruction(IncV);
+ return PN;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Another AddRec may need to be recursively expanded below. For example, if
+ // this AddRec is quadratic, the StepV may itself be an AddRec in this
+ // loop. Remove this loop from the PostIncLoops set before expanding such
+ // AddRecs. Otherwise, we cannot find a valid position for the step
+ // (i.e. StepV can never dominate its loop header). Ideally, we could do
+ // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+ // so it's not worth implementing SmallPtrSet::swap.
+ PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+ PostIncLoops.clear();
+
+ // Expand code for the start value.
+ Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+ L->getHeader()->begin());
+
+ // StartV must be hoisted into L's preheader to dominate the new phi.
+ assert(!isa<Instruction>(StartV) ||
+ SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
+
+ // Expand code for the step value. Do this before creating the PHI so that PHI
+ // reuse code doesn't see an incomplete PHI.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ // If the stride is negative, insert a sub instead of an add for the increment
+ // (unless it's a constant, because subtracts of constants are canonicalized
+ // to adds).
+ bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+ // Create the PHI.
+ BasicBlock *Header = L->getHeader();
+ Builder.SetInsertPoint(Header, Header->begin());
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+ Twine(IVName) + ".iv");
+ rememberInstruction(PN);
+
+ // Create the step instructions and populate the PHI.
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *Pred = *HPI;
+
+ // Add a start value.
+ if (!L->contains(Pred)) {
+ PN->addIncoming(StartV, Pred);
+ continue;
+ }
+
+ // Create a step value and add it to the PHI.
+ // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+ // instructions at IVIncInsertPos.
+ Instruction *InsertPos = L == IVIncInsertLoop ?
+ IVIncInsertPos : Pred->getTerminator();
+ Builder.SetInsertPoint(InsertPos);
+ Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
+ PN->addIncoming(IncV, Pred);
+ }
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // After expanding subexpressions, restore the PostIncLoops set so the caller
+ // can ensure that IVIncrement dominates the current uses.
+ PostIncLoops = SavedPostIncLoops;
+
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+
+ return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+ Type *STy = S->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(STy);
+ const Loop *L = S->getLoop();
+
+ // Determine a normalized form of this expression, which is the expression
+ // before any post-inc adjustment is made.
+ const SCEVAddRecExpr *Normalized = S;
+ if (PostIncLoops.count(L)) {
+ PostIncLoopSet Loops;
+ Loops.insert(L);
+ Normalized =
+ cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+ Loops, SE, *SE.DT));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec start.
+ const SCEV *Start = Normalized->getStart();
+ const SCEV *PostLoopOffset = 0;
+ if (!SE.properlyDominates(Start, L->getHeader())) {
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ Normalized = cast<SCEVAddRecExpr>(
+ SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+ Normalized->getLoop(),
+ // FIXME: Normalized->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec step.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ const SCEV *PostLoopScale = 0;
+ if (!SE.dominates(Step, L->getHeader())) {
+ PostLoopScale = Step;
+ Step = SE.getConstant(Normalized->getType(), 1);
+ Normalized =
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+ Normalized->getLoop(),
+ // FIXME: Normalized
+ // ->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Expand the core addrec. If we need post-loop scaling, force it to
+ // expand to an integer type to avoid the need for additional casting.
+ Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+ // Accommodate post-inc mode, if necessary.
+ Value *Result;
+ if (!PostIncLoops.count(L))
+ Result = PN;
+ else {
+ // In PostInc mode, use the post-incremented value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+ Result = PN->getIncomingValueForBlock(LatchBlock);
+
+ // For an expansion to use the postinc form, the client must call
+ // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+ // or dominated by IVIncInsertPos.
+ if (isa<Instruction>(Result)
+ && !SE.DT->dominates(cast<Instruction>(Result),
+ Builder.GetInsertPoint())) {
+ // The induction variable's postinc expansion does not dominate this use.
+ // IVUsers tries to prevent this case, so it is rare. However, it can
+ // happen when an IVUser outside the loop is not dominated by the latch
+ // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+ // all cases. Consider a phi outide whose operand is replaced during
+ // expansion with the value of the postinc user. Without fundamentally
+ // changing the way postinc users are tracked, the only remedy is
+ // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+ // but hopefully expandCodeFor handles that.
+ bool useSubtract =
+ !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ // Restore the insertion point to the place where the caller has
+ // determined dominates all uses.
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ }
+ }
+
+ // Re-apply any non-loop-dominating scale.
+ if (PostLoopScale) {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateMul(Result,
+ expandCodeFor(PostLoopScale, IntTy));
+ rememberInstruction(Result);
+ }
+
+ // Re-apply any non-loop-dominating offset.
+ if (PostLoopOffset) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ const SCEV *const OffsetArray[1] = { PostLoopOffset };
+ Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+ } else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateAdd(Result,
+ expandCodeFor(PostLoopOffset, IntTy));
+ rememberInstruction(Result);
+ }
+ }
+
+ return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // First check for an existing canonical IV in a suitable type.
+ PHINode *CanonicalIV = 0;
+ if (PHINode *PN = L->getCanonicalInductionVariable())
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ CanonicalIV = PN;
+
+ // Rewrite an AddRec in terms of the canonical induction variable, if
+ // its type is more narrow.
+ if (CanonicalIV &&
+ SE.getTypeSizeInBits(CanonicalIV->getType()) >
+ SE.getTypeSizeInBits(Ty)) {
+ SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+ for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+ NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+ // FIXME: S->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BasicBlock::iterator NewInsertPt =
+ llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
+ isa<LandingPadInst>(NewInsertPt))
+ ++NewInsertPt;
+ V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+ NewInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+ }
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ NewOps[0] = SE.getConstant(Ty, 0);
+ // FIXME: can use S->getNoWrapFlags()
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ const SCEV *Base = S->getStart();
+ const SCEV *RestArray[1] = { Rest };
+ // Dig into the expression to find the pointer base for a GEP.
+ ExposePointerBase(Base, RestArray[0], SE);
+ // If we found a pointer, expand the AddRec with a GEP.
+ if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ // Make sure the Base isn't something exotic, such as a multiplied
+ // or divided pointer value. In those cases, the result type isn't
+ // actually a pointer type.
+ if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+ Value *StartV = expand(Base);
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+ }
+ }
+
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+ SE.getUnknown(expand(Rest))));
+ }
+
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+ Header->begin());
+ rememberInstruction(CanonicalIV);
+
+ Constant *One = ConstantInt::get(Ty, 1);
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (L->contains(HP)) {
+ // Insert a unit add instruction right before the terminator
+ // corresponding to the back-edge.
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
+ Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+ rememberInstruction(Add);
+ CanonicalIV->addIncoming(Add, HP);
+ } else {
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+ }
+ }
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
+ // {0,+,F} --> {0,+,1} * F
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ CanonicalIV->getType())),
+ Ty));
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
+
+ // Promote S up to the canonical IV type, if the cast is foldable.
+ const SCEV *NewS = S;
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+ if (isa<SCEVAddRecExpr>(Ext))
+ NewS = Ext;
+
+ const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+ //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
+
+ // Truncate the result down to the original type, if needed.
+ const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+ return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateTrunc(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateZExt(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateSExt(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
+ Instruction *IP) {
+ Builder.SetInsertPoint(IP->getParent(), IP);
+ return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ Instruction *InsertPt = Builder.GetInsertPoint();
+ for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+ L = L->getParentLoop())
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else {
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = L->getHeader()->getFirstInsertionPt();
+ }
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = L->getHeader()->getFirstInsertionPt();
+ while (InsertPt != Builder.GetInsertPoint()
+ && (isInsertedInstruction(InsertPt)
+ || isa<DbgInfoIntrinsic>(InsertPt))) {
+ InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+ }
+ break;
+ }
+
+ // Check to see if we already expanded this here.
+ std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
+ I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+
+ // Expand the expression into instructions.
+ Value *V = visit(S);
+
+ // Remember the expanded value for this SCEV at this location.
+ //
+ // This is independent of PostIncLoops. The mapped value simply materializes
+ // the expression at this insertion point. If the mapped value happened to be
+ // a postinc expansion, it could be reused by a non postinc user, but only if
+ // its insertion point was already at the head of the loop.
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(I);
+ else
+ InsertedValues.insert(I);
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+ Builder.SetInsertPoint(BB, I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none). A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+ Type *Ty) {
+ assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
+ // Conservatively use FlagAnyWrap for now.
+ const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+ SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+ // Emit code for it.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return V;
+}
+
+/// Sort values by integer width for replaceCongruentIVs.
+static bool width_descending(Value *lhs, Value *rhs) {
+ // Put pointers at the back and make sure pointer < pointer = false.
+ if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
+ return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
+ return rhs->getType()->getPrimitiveSizeInBits()
+ < lhs->getType()->getPrimitiveSizeInBits();
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ const TargetTransformInfo *TTI) {
+ // Find integer phis in order of increasing width.
+ SmallVector<PHINode*, 8> Phis;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ Phis.push_back(Phi);
+ }
+ if (TTI)
+ std::sort(Phis.begin(), Phis.end(), width_descending);
+
+ unsigned NumElim = 0;
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ // Process phis from wide to narrow. Mapping wide phis to the their truncation
+ // so narrow phis can reuse them.
+ for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
+ PEnd = Phis.end(); PIter != PEnd; ++PIter) {
+ PHINode *Phi = *PIter;
+
+ // Fold constant phis. They may be congruent to other constant phis and
+ // would confuse the logic below that expects proper IVs.
+ if (Value *V = Phi->hasConstantValue()) {
+ Phi->replaceAllUsesWith(V);
+ DeadInsts.push_back(Phi);
+ ++NumElim;
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
+ continue;
+ }
+
+ if (!SE.isSCEVable(Phi->getType()))
+ continue;
+
+ PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+ if (!OrigPhiRef) {
+ OrigPhiRef = Phi;
+ if (Phi->getType()->isIntegerTy() && TTI
+ && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ // This phi can be freely truncated to the narrowest phi type. Map the
+ // truncated expression to it so it will be reused for narrow types.
+ const SCEV *TruncExpr =
+ SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+ ExprToIVMap[TruncExpr] = Phi;
+ }
+ continue;
+ }
+
+ // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+ // sense.
+ if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
+ continue;
+
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc =
+ cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+ // If this phi has the same width but is more canonical, replace the
+ // original with it. As part of the "more canonical" determination,
+ // respect a prior decision to use an IV chain.
+ if (OrigPhiRef->getType() == Phi->getType()
+ && !(ChainedPhis.count(Phi)
+ || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
+ && (ChainedPhis.count(Phi)
+ || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
+ std::swap(OrigPhiRef, Phi);
+ std::swap(OrigInc, IsomorphicInc);
+ }
+ // Replacing the congruent phi is sufficient because acyclic redundancy
+ // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+ // that a phi is congruent, it's often the head of an IV user cycle that
+ // is isomorphic with the original phi. It's worth eagerly cleaning up the
+ // common case of a single IV increment so that DeleteDeadPHIs can remove
+ // cycles that had postinc uses.
+ const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc),
+ IsomorphicInc->getType());
+ if (OrigInc != IsomorphicInc
+ && TruncExpr == SE.getSCEV(IsomorphicInc)
+ && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
+ || hoistIVInc(OrigInc, IsomorphicInc))) {
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ Value *NewInc = OrigInc;
+ if (OrigInc->getType() != IsomorphicInc->getType()) {
+ Instruction *IP = isa<PHINode>(OrigInc)
+ ? (Instruction*)L->getHeader()->getFirstInsertionPt()
+ : OrigInc->getNextNode();
+ IRBuilder<> Builder(IP);
+ Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+ NewInc = Builder.
+ CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
+ }
+ IsomorphicInc->replaceAllUsesWith(NewInc);
+ DeadInsts.push_back(IsomorphicInc);
+ }
+ }
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+ ++NumElim;
+ Value *NewIV = OrigPhiRef;
+ if (OrigPhiRef->getType() != Phi->getType()) {
+ IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+ NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+ }
+ Phi->replaceAllUsesWith(NewIV);
+ DeadInsts.push_back(Phi);
+ }
+ return NumElim;
+}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand. Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a nonzero constant here. If this is
+// inadequate, we could easily allow division by SCEVUnknown by using
+// ValueTracking to check isKnownNonZero().
+struct SCEVFindUnsafe {
+ bool IsUnsafe;
+
+ SCEVFindUnsafe(): IsUnsafe(false) {}
+
+ bool follow(const SCEV *S) {
+ const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
+ if (!D)
+ return true;
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (SC && !SC->getValue()->isZero())
+ return true;
+ IsUnsafe = true;
+ return false;
+ }
+ bool isDone() const { return IsUnsafe; }
+};
+}
+
+namespace llvm {
+bool isSafeToExpand(const SCEV *S) {
+ SCEVFindUnsafe Search;
+ visitAll(S, Search);
+ return !Search.IsUnsafe;
+}
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 000000000000..dd2ed4ff831c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,223 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+ const Loop *L, DominatorTree *DT) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User)) return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock)
+ return false;
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
+namespace {
+
+/// Hold the state used during post-inc expression transformation, including a
+/// map of transformed expressions.
+class PostIncTransform {
+ TransformKind Kind;
+ PostIncLoopSet &Loops;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+ DenseMap<const SCEV*, const SCEV*> Transformed;
+
+public:
+ PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
+ ScalarEvolution &se, DominatorTree &dt):
+ Kind(kind), Loops(loops), SE(se), DT(dt) {}
+
+ const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
+ Value *OperandValToReplace);
+
+protected:
+ const SCEV *TransformImpl(const SCEV *S, Instruction *User,
+ Value *OperandValToReplace);
+};
+
+} // namespace
+
+/// Implement post-inc transformation for all valid expression types.
+const SCEV *PostIncTransform::
+TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+ if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+ const SCEV *O = X->getOperand();
+ const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
+ if (O != N)
+ switch (S->getSCEVType()) {
+ case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+ case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+ case scTruncate: return SE.getTruncateExpr(N, S->getType());
+ default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // An addrec. This is the interesting part.
+ SmallVector<const SCEV *, 8> Operands;
+ const Loop *L = AR->getLoop();
+ // The addrec conceptually uses its operands at loop entry.
+ Instruction *LUser = L->getHeader()->begin();
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I) {
+ Operands.push_back(TransformSubExpr(*I, LUser, 0));
+ }
+ // Conservatively use AnyWrap until/unless we need FlagNW.
+ const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+ switch (Kind) {
+ case NormalizeAutodetect:
+ if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ const SCEV *TransformedStep =
+ TransformSubExpr(AR->getStepRecurrence(SE),
+ User, OperandValToReplace);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ Loops.insert(L);
+ }
+#if 0
+ // This assert is conceptually correct, but ScalarEvolution currently
+ // sometimes fails to canonicalize two equal SCEVs to exactly the same
+ // form. It's possibly a pessimization when this happens, but it isn't a
+ // correctness problem, so disable this assert for now.
+ assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Normalize:
+ if (Loops.count(L)) {
+ const SCEV *TransformedStep =
+ TransformSubExpr(AR->getStepRecurrence(SE),
+ User, OperandValToReplace);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ }
+#if 0
+ // See the comment on the assert above.
+ assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+ break;
+ }
+ return Result;
+ }
+
+ if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+ SmallVector<const SCEV *, 8> Operands;
+ bool Changed = false;
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
+ Changed |= N != O;
+ Operands.push_back(N);
+ }
+ // If any operand actually changed, return a transformed result.
+ if (Changed)
+ switch (S->getSCEVType()) {
+ case scAddExpr: return SE.getAddExpr(Operands);
+ case scMulExpr: return SE.getMulExpr(Operands);
+ case scSMaxExpr: return SE.getSMaxExpr(Operands);
+ case scUMaxExpr: return SE.getUMaxExpr(Operands);
+ default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEV *LO = X->getLHS();
+ const SCEV *RO = X->getRHS();
+ const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
+ const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
+ if (LO != LN || RO != RN)
+ return SE.getUDivExpr(LN, RN);
+ return S;
+ }
+
+ llvm_unreachable("Unexpected SCEV kind!");
+}
+
+/// Manage recursive transformation across an expression DAG. Revisiting
+/// expressions would lead to exponential recursion.
+const SCEV *PostIncTransform::
+TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+ if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+ return S;
+
+ const SCEV *Result = Transformed.lookup(S);
+ if (Result)
+ return Result;
+
+ Result = TransformImpl(S, User, OperandValToReplace);
+ Transformed[S] = Result;
+ return Result;
+}
+
+/// Top level driver for transforming an expression DAG into its requested
+/// post-inc form (either "Normalized" or "Denormalized".
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+ const SCEV *S,
+ Instruction *User,
+ Value *OperandValToReplace,
+ PostIncLoopSet &Loops,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ PostIncTransform Transform(Kind, Loops, SE, DT);
+ return Transform.TransformSubExpr(S, User, OperandValToReplace);
+}
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 000000000000..15b78728a73c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+ if (V == UndefVal)
+ OS << "undefined";
+ else if (V == OverdefinedVal)
+ OS << "overdefined";
+ else if (V == UntrackedVal)
+ OS << "untracked";
+ else
+ OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+// SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet. This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+ if (I != ValueState.end()) return I->second; // Common case, in the map
+
+ LatticeVal LV;
+ if (LatticeFunc->IsUntrackedValue(V))
+ return LatticeFunc->getUntrackedVal();
+ else if (Constant *C = dyn_cast<Constant>(V))
+ LV = LatticeFunc->ComputeConstant(C);
+ else if (Argument *A = dyn_cast<Argument>(V))
+ LV = LatticeFunc->ComputeArgument(A);
+ else if (!isa<Instruction>(V))
+ // All other non-instructions are overdefined.
+ LV = LatticeFunc->getOverdefinedVal();
+ else
+ // All instructions are underdefined by default.
+ LV = LatticeFunc->getUndefVal();
+
+ // If this value is untracked, don't add it to the map.
+ if (LV == LatticeFunc->getUntrackedVal())
+ return LV;
+ return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+ if (I != ValueState.end() && I->second == V)
+ return; // No change.
+
+ // An update. Visit uses of I.
+ ValueState[&Inst] = V;
+ InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBExecutable.insert(BB); // Basic block is executable!
+ BBWorkList.push_back(BB); // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ if (BBExecutable.count(Dest)) {
+ // The destination is already executable, but we just made an edge
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+ visitPHINode(*cast<PHINode>(I));
+
+ } else {
+ MarkBlockExecutable(Dest);
+ }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVectorImpl<bool> &Succs,
+ bool AggressiveUndef) {
+ Succs.resize(TI.getNumSuccessors());
+ if (TI.getNumSuccessors() == 0) return;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue;
+ if (AggressiveUndef)
+ BCValue = getOrInitValueState(BI->getCondition());
+ else
+ BCValue = getLatticeState(BI->getCondition());
+
+ if (BCValue == LatticeFunc->getOverdefinedVal() ||
+ BCValue == LatticeFunc->getUntrackedVal()) {
+ // Overdefined condition variables can branch either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (BCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // Non-constant values can go either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way
+ Succs[C->isNullValue()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ // TODO: Could ask the lattice function if the value can throw.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (isa<IndirectBrInst>(TI)) {
+ Succs.assign(Succs.size(), true);
+ return;
+ }
+
+ SwitchInst &SI = cast<SwitchInst>(TI);
+ LatticeVal SCValue;
+ if (AggressiveUndef)
+ SCValue = getOrInitValueState(SI.getCondition());
+ else
+ SCValue = getLatticeState(SI.getCondition());
+
+ if (SCValue == LatticeFunc->getOverdefinedVal() ||
+ SCValue == LatticeFunc->getUntrackedVal()) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (SCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+ SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+ Succs[Case.getSuccessorIndex()] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+ bool AggressiveUndef) {
+ SmallVector<bool, 16> SuccFeasible;
+ TerminatorInst *TI = From->getTerminator();
+ getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (TI->getSuccessor(i) == To && SuccFeasible[i])
+ return true;
+
+ return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible, true);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable...
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+ // The lattice function may store more information on a PHINode than could be
+ // computed from its incoming values. For example, SSI form stores its sigma
+ // functions as PHINodes with a single incoming value.
+ if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(PN, IV);
+ return;
+ }
+
+ LatticeVal PNIV = getOrInitValueState(&PN);
+ LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+
+ // If this value is already overdefined (common) just return.
+ if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64) {
+ UpdateState(PN, Overdefined);
+ return;
+ }
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the
+ // transfer function to give us the merge of the incoming values.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+ continue;
+
+ // Merge in this value.
+ LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+ if (OpVal != PNIV)
+ PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+
+ if (PNIV == Overdefined)
+ break; // Rest of input values don't matter.
+ }
+
+ // Update the PHI with the compute value, which is the merge of the inputs.
+ UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+ // PHIs are handled by the propagation logic, they are never passed into the
+ // transfer functions.
+ if (PHINode *PN = dyn_cast<PHINode>(&I))
+ return visitPHINode(*PN);
+
+ // Otherwise, ask the transfer function what the result is. If this is
+ // something that we care about, remember it.
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(I, IV);
+
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+ visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+ MarkBlockExecutable(&F.getEntryBlock());
+
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty()) {
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Instruction *I = InstWorkList.back();
+ InstWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+ // "I" got into the work list because it made a transition. See if any
+ // users are both live and in need of updating.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (BBExecutable.count(U->getParent())) // Inst is executable?
+ visitInst(*U);
+ }
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ visitInst(*I);
+ }
+ }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+ OS << "\nFUNCTION: " << F.getName() << "\n";
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ OS << "INFEASIBLE: ";
+ OS << "\t";
+ if (BB->hasName())
+ OS << BB->getName() << ":\n";
+ else
+ OS << "; anon bb\n";
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ LatticeFunc->PrintValue(getLatticeState(I), OS);
+ OS << *I << "\n";
+ }
+
+ OS << "\n";
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
new file mode 100644
index 000000000000..64f8e96884c7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -0,0 +1,558 @@
+//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tti"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// Setup the analysis group to manage the TargetTransformInfo passes.
+INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
+char TargetTransformInfo::ID = 0;
+
+TargetTransformInfo::~TargetTransformInfo() {
+}
+
+void TargetTransformInfo::pushTTIStack(Pass *P) {
+ TopTTI = this;
+ PrevTTI = &P->getAnalysis<TargetTransformInfo>();
+
+ // Walk up the chain and update the top TTI pointer.
+ for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
+ PTTI->TopTTI = this;
+}
+
+void TargetTransformInfo::popTTIStack() {
+ TopTTI = 0;
+
+ // Walk up the chain and update the top TTI pointer.
+ for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
+ PTTI->TopTTI = PrevTTI;
+
+ PrevTTI = 0;
+}
+
+void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+}
+
+unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+ Type *OpTy) const {
+ return PrevTTI->getOperationCost(Opcode, Ty, OpTy);
+}
+
+unsigned TargetTransformInfo::getGEPCost(
+ const Value *Ptr, ArrayRef<const Value *> Operands) const {
+ return PrevTTI->getGEPCost(Ptr, Operands);
+}
+
+unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
+ int NumArgs) const {
+ return PrevTTI->getCallCost(FTy, NumArgs);
+}
+
+unsigned TargetTransformInfo::getCallCost(const Function *F,
+ int NumArgs) const {
+ return PrevTTI->getCallCost(F, NumArgs);
+}
+
+unsigned TargetTransformInfo::getCallCost(
+ const Function *F, ArrayRef<const Value *> Arguments) const {
+ return PrevTTI->getCallCost(F, Arguments);
+}
+
+unsigned TargetTransformInfo::getIntrinsicCost(
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
+ return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+}
+
+unsigned TargetTransformInfo::getIntrinsicCost(
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+ return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
+}
+
+unsigned TargetTransformInfo::getUserCost(const User *U) const {
+ return PrevTTI->getUserCost(U);
+}
+
+bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
+ return PrevTTI->isLoweredToCall(F);
+}
+
+bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
+ return PrevTTI->isLegalAddImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
+ return PrevTTI->isLegalICmpImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset,
+ bool HasBaseReg,
+ int64_t Scale) const {
+ return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale);
+}
+
+bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ return PrevTTI->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
+ return PrevTTI->isTypeLegal(Ty);
+}
+
+unsigned TargetTransformInfo::getJumpBufAlignment() const {
+ return PrevTTI->getJumpBufAlignment();
+}
+
+unsigned TargetTransformInfo::getJumpBufSize() const {
+ return PrevTTI->getJumpBufSize();
+}
+
+bool TargetTransformInfo::shouldBuildLookupTables() const {
+ return PrevTTI->shouldBuildLookupTables();
+}
+
+TargetTransformInfo::PopcntSupportKind
+TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
+ return PrevTTI->getPopcntSupport(IntTyWidthInBit);
+}
+
+unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ return PrevTTI->getIntImmCost(Imm, Ty);
+}
+
+unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
+ return PrevTTI->getNumberOfRegisters(Vector);
+}
+
+unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
+ return PrevTTI->getRegisterBitWidth(Vector);
+}
+
+unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
+ return PrevTTI->getMaximumUnrollFactor();
+}
+
+unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
+ Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+}
+
+unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const {
+ return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ return PrevTTI->getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+ return PrevTTI->getCFInstrCost(Opcode);
+}
+
+unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ return PrevTTI->getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ ;
+}
+
+unsigned
+TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
+ Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
+unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
+ return PrevTTI->getNumberOfParts(Tp);
+}
+
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
+ return PrevTTI->getAddressComputationCost(Tp);
+}
+
+namespace {
+
+struct NoTTI : ImmutablePass, TargetTransformInfo {
+ const DataLayout *DL;
+
+ NoTTI() : ImmutablePass(ID), DL(0) {
+ initializeNoTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ // Note that this subclass is special, and must *not* call initializeTTI as
+ // it does not chain.
+ TopTTI = this;
+ PrevTTI = 0;
+ DL = getAnalysisIfAvailable<DataLayout>();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // Note that this subclass is special, and must *not* call
+ // TTI::getAnalysisUsage as it breaks the recursion.
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const {
+ switch (Opcode) {
+ default:
+ // By default, just classify everything as 'basic'.
+ return TCC_Basic;
+
+ case Instruction::GetElementPtr:
+ llvm_unreachable("Use getGEPCost for GEP operations!");
+
+ case Instruction::BitCast:
+ assert(OpTy && "Cast instructions must provide the operand type");
+ if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
+ // Identity and pointer-to-pointer casts are free.
+ return TCC_Free;
+
+ // Otherwise, the default basic cost is used.
+ return TCC_Basic;
+
+ case Instruction::IntToPtr:
+ // An inttoptr cast is free so long as the input is a legal integer type
+ // which doesn't contain values outside the range of a pointer.
+ if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
+ OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+ return TCC_Free;
+
+ // Otherwise it's not a no-op.
+ return TCC_Basic;
+
+ case Instruction::PtrToInt:
+ // A ptrtoint cast is free so long as the result is large enough to store
+ // the pointer, and a legal integer type.
+ if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
+ Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ return TCC_Free;
+
+ // Otherwise it's not a no-op.
+ return TCC_Basic;
+
+ case Instruction::Trunc:
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty)))
+ return TCC_Free;
+
+ return TCC_Basic;
+ }
+ }
+
+ unsigned getGEPCost(const Value *Ptr,
+ ArrayRef<const Value *> Operands) const {
+ // In the basic model, we just assume that all-constant GEPs will be folded
+ // into their uses via addressing modes.
+ for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
+ if (!isa<Constant>(Operands[Idx]))
+ return TCC_Basic;
+
+ return TCC_Free;
+ }
+
+ unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const {
+ assert(FTy && "FunctionType must be provided to this routine.");
+
+ // The target-independent implementation just measures the size of the
+ // function by approximating that each argument will take on average one
+ // instruction to prepare.
+
+ if (NumArgs < 0)
+ // Set the argument number to the number of explicit arguments in the
+ // function.
+ NumArgs = FTy->getNumParams();
+
+ return TCC_Basic * (NumArgs + 1);
+ }
+
+ unsigned getCallCost(const Function *F, int NumArgs = -1) const {
+ assert(F && "A concrete function must be provided to this routine.");
+
+ if (NumArgs < 0)
+ // Set the argument number to the number of explicit arguments in the
+ // function.
+ NumArgs = F->arg_size();
+
+ if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
+ FunctionType *FTy = F->getFunctionType();
+ SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
+ return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
+ }
+
+ if (!TopTTI->isLoweredToCall(F))
+ return TCC_Basic; // Give a basic cost if it will be lowered directly.
+
+ return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
+ }
+
+ unsigned getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) const {
+ // Simply delegate to generic handling of the call.
+ // FIXME: We should use instsimplify or something else to catch calls which
+ // will constant fold with these arguments.
+ return TopTTI->getCallCost(F, Arguments.size());
+ }
+
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys) const {
+ switch (IID) {
+ default:
+ // Intrinsics rarely (if ever) have normal argument setup constraints.
+ // Model them as having a basic instruction cost.
+ // FIXME: This is wrong for libc intrinsics.
+ return TCC_Basic;
+
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // These intrinsics don't actually represent code after lowering.
+ return TCC_Free;
+ }
+ }
+
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments) const {
+ // Delegate to the generic intrinsic handling code. This mostly provides an
+ // opportunity for targets to (for example) special case the cost of
+ // certain intrinsics based on constants used as arguments.
+ SmallVector<Type *, 8> ParamTys;
+ ParamTys.reserve(Arguments.size());
+ for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
+ ParamTys.push_back(Arguments[Idx]->getType());
+ return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+ }
+
+ unsigned getUserCost(const User *U) const {
+ if (isa<PHINode>(U))
+ return TCC_Free; // Model all PHI nodes as free.
+
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U))
+ // In the basic model we just assume that all-constant GEPs will be
+ // folded into their uses via addressing modes.
+ return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic;
+
+ if (ImmutableCallSite CS = U) {
+ const Function *F = CS.getCalledFunction();
+ if (!F) {
+ // Just use the called value type.
+ Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
+ return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
+ }
+
+ SmallVector<const Value *, 8> Arguments;
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
+ AE = CS.arg_end();
+ AI != AE; ++AI)
+ Arguments.push_back(*AI);
+
+ return TopTTI->getCallCost(F, Arguments);
+ }
+
+ if (const CastInst *CI = dyn_cast<CastInst>(U)) {
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ return TCC_Free;
+ }
+
+ // Otherwise delegate to the fully generic implementations.
+ return getOperationCost(Operator::getOpcode(U), U->getType(),
+ U->getNumOperands() == 1 ?
+ U->getOperand(0)->getType() : 0);
+ }
+
+ bool isLoweredToCall(const Function *F) const {
+ // FIXME: These should almost certainly not be handled here, and instead
+ // handled with the help of TLI or the target itself. This was largely
+ // ported from existing analysis heuristics here so that such refactorings
+ // can take place in the future.
+
+ if (F->isIntrinsic())
+ return false;
+
+ if (F->hasLocalLinkage() || !F->hasName())
+ return true;
+
+ StringRef Name = F->getName();
+
+ // These will all likely lower to a single selection DAG node.
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+ Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
+ Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
+ return false;
+
+ // These are all likely to be optimized into something smaller.
+ if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
+ Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
+ "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
+ Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
+ return false;
+
+ return true;
+ }
+
+ bool isLegalAddImmediate(int64_t Imm) const {
+ return false;
+ }
+
+ bool isLegalICmpImmediate(int64_t Imm) const {
+ return false;
+ }
+
+ bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) const {
+ // Guess that reg+reg addressing is allowed. This heuristic is taken from
+ // the implementation of LSR.
+ return !BaseGV && BaseOffset == 0 && Scale <= 1;
+ }
+
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const {
+ return false;
+ }
+
+ bool isTypeLegal(Type *Ty) const {
+ return false;
+ }
+
+ unsigned getJumpBufAlignment() const {
+ return 0;
+ }
+
+ unsigned getJumpBufSize() const {
+ return 0;
+ }
+
+ bool shouldBuildLookupTables() const {
+ return true;
+ }
+
+ PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
+ return PSK_Software;
+ }
+
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
+ return 1;
+ }
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ return 8;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) const {
+ return 32;
+ }
+
+ unsigned getMaximumUnrollFactor() const {
+ return 1;
+ }
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+ OperandValueKind) const {
+ return 1;
+ }
+
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index = 0, Type *SubTp = 0) const {
+ return 1;
+ }
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ return 1;
+ }
+
+ unsigned getCFInstrCost(unsigned Opcode) const {
+ return 1;
+ }
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy = 0) const {
+ return 1;
+ }
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index = -1) const {
+ return 1;
+ }
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ return 1;
+ }
+
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID,
+ Type *RetTy,
+ ArrayRef<Type*> Tys) const {
+ return 1;
+ }
+
+ unsigned getNumberOfParts(Type *Tp) const {
+ return 0;
+ }
+
+ unsigned getAddressComputationCost(Type *Tp) const {
+ return 0;
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti",
+ "No target information", true, true, true)
+char NoTTI::ID = 0;
+
+ImmutablePass *llvm::createNoTargetTransformInfoPass() {
+ return new NoTTI();
+}
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
new file mode 100644
index 000000000000..4c68322b8282
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -0,0 +1,53 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks. A trace is a
+// single entry, multiple exit, region of code that is often hot. Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+ return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+ return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+ Function *F = getFunction();
+ O << "; Trace from function " << F->getName() << ", blocks:\n";
+ for (const_iterator i = begin(), e = end(); i != e; ++i) {
+ O << "; ";
+ WriteAsOperand(O, *i, true, getModule());
+ O << "\n";
+ }
+ O << "; Trace parent function: \n" << *F;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 000000000000..68e43b2cdb63
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,300 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+// !0 = metadata !{ metadata !"an example type tree" }
+// !1 = metadata !{ metadata !"int", metadata !0 }
+// !2 = metadata !{ metadata !"float", metadata !0 }
+// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+// struct X {
+// double d;
+// int i;
+// };
+// void foo(struct X *x, struct X *y, double *p) {
+// *x = *y;
+// *p = 0.0;
+// }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+ /// TBAANode - This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAANode {
+ const MDNode *Node;
+
+ public:
+ TBAANode() : Node(0) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias tree parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
+
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+ };
+}
+
+namespace {
+ /// TypeBasedAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses TypeBased to answer queries.
+ class TypeBasedAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+ initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+ "Type-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+ return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+ const MDNode *B) const {
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A); ; ) {
+ if (T.getNode() == B)
+ // B is an ancestor of A.
+ return true;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B); ; ) {
+ if (T.getNode() == A)
+ // A is an ancestor of B.
+ return true;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ if (!EnableTBAA)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ const MDNode *AM = LocA.TBAATag;
+ if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ const MDNode *BM = LocB.TBAATag;
+ if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+ // If they may alias, chain to the next AliasAnalysis.
+ if (Aliases(AM, BM))
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Otherwise return a definitive result.
+ return NoAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableTBAA)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ const MDNode *M = Loc.TBAATag;
+ if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // If this is an "immutable" type, we can assume the pointer is pointing
+ // to constant memory.
+ if (TBAANode(M).TypeIsImmutable())
+ return true;
+
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefBehavior(CS);
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If this is an "immutable" type, we can assume the call doesn't write
+ // to memory.
+ if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (TBAANode(M).TypeIsImmutable())
+ Min = OnlyReadsMemory;
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+ // Functions don't have metadata. Just chain to the next implementation.
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(L, M))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (const MDNode *M1 =
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 =
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(M1, M2))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 000000000000..45dcc5e37ecf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,2034 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0). For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+ return TD ? TD->getPointerSizeInBits() : 0;
+}
+
+static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout *TD, unsigned Depth) {
+ if (!Add) {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ }
+ }
+ }
+ }
+
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ // If one of the operands has trailing zeros, then the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+
+ // Are we still trying to solve for the sign bit?
+ if (!KnownZero.isNegative() && !KnownOne.isNegative()) {
+ if (NSW) {
+ if (Add) {
+ // Adding two positive numbers can't wrap into negative
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // and adding two negative numbers can't wrap into positive.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ } else {
+ // Subtracting a negative number from a positive one can't wrap
+ if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // neither can subtracting a positive number from a negative one.
+ else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ }
+ }
+}
+
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout *TD, unsigned Depth) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ bool isKnownNegative = false;
+ bool isKnownNonNegative = false;
+ // If the multiplication is known not to overflow, compute the sign bit.
+ if (NSW) {
+ if (Op0 == Op1) {
+ // The product of a number with itself is non-negative.
+ isKnownNonNegative = true;
+ } else {
+ bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+ bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+ bool isKnownNegativeOp1 = KnownOne.isNegative();
+ bool isKnownNegativeOp0 = KnownOne2.isNegative();
+ // The product of two numbers with the same sign is non-negative.
+ isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+ (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+ // The product of a negative number and a non-negative number is either
+ // negative or zero.
+ if (!isKnownNonNegative)
+ isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+ isKnownNonZero(Op0, TD, Depth)) ||
+ (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+ isKnownNonZero(Op1, TD, Depth));
+ }
+ }
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+
+ // Only make use of no-wrap flags if we failed to compute the sign bit
+ // directly. This matters if the multiplication always overflows, in
+ // which case we prefer to follow the result of the direct computation,
+ // though as the program is invoking undefined behaviour we can choose
+ // whatever we like here.
+ if (isKnownNonNegative && !KnownOne.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (isKnownNegative && !KnownZero.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+}
+
+void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned NumRanges = Ranges.getNumOperands() / 2;
+ assert(NumRanges >= 1);
+
+ // Use the high end of the ranges to find leading zeros.
+ unsigned MinLeadingZeros = BitWidth;
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0));
+ ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ if (Range.isWrappedSet())
+ MinLeadingZeros = 0; // -1 has no zeros
+ unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
+ MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
+ }
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+}
+/// ComputeMaskedBits - Determine which of the bits are known to be either zero
+/// or one and return them in the KnownZero/KnownOne bit sets.
+///
+/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero. If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth) {
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ assert((V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarType()->isPointerTy()) &&
+ "Not integer or pointer type!");
+ assert((!TD ||
+ TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ (!V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue();
+ KnownZero = ~KnownOne;
+ return;
+ }
+ // Null and aggregate-zero are all-zeros.
+ if (isa<ConstantPointerNull>(V) ||
+ isa<ConstantAggregateZero>(V)) {
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ return;
+ }
+ // Handle a constant vector by taking the intersection of the known bits of
+ // each element. There is no real need to handle ConstantVector here, because
+ // we don't handle undef in any particularly useful way.
+ if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ // We know that CDS must be a vector of integers. Take the intersection of
+ // each element.
+ KnownZero.setAllBits(); KnownOne.setAllBits();
+ APInt Elt(KnownZero.getBitWidth(), 0);
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ Elt = CDS->getElementAsInteger(i);
+ KnownZero &= ~Elt;
+ KnownOne &= Elt;
+ }
+ return;
+ }
+
+ // The address of an aligned GlobalValue has trailing zeros.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ unsigned Align = GV->getAlignment();
+ if (Align == 0 && TD) {
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ Type *ObjectType = GVar->getType()->getElementType();
+ if (ObjectType->isSized()) {
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ Align = TD->getPreferredAlignment(GVar);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
+ }
+ }
+ if (Align > 0)
+ KnownZero = APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ else
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ return;
+ }
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden()) {
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ } else {
+ ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
+ }
+ return;
+ }
+
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ unsigned Align = 0;
+
+ if (A->hasByValAttr()) {
+ // Get alignment information off byval arguments if specified in the IR.
+ Align = A->getParamAlignment();
+ } else if (TD && A->hasStructRetAttr()) {
+ // An sret parameter has at least the ABI alignment of the return type.
+ Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ Align = TD->getABITypeAlignment(EltTy);
+ }
+
+ if (Align)
+ KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
+ return;
+ }
+
+ // Start out not knowing anything.
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+
+ if (Depth == MaxDepth)
+ return; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return;
+
+ APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+ computeMaskedBitsLoad(*MD, KnownZero);
+ return;
+ case Instruction::And: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ }
+ case Instruction::Or: {
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ }
+ case Instruction::Xor: {
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case Instruction::Mul: {
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
+ }
+ case Instruction::UDiv: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ return;
+ }
+ case Instruction::Select:
+ ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return; // Can't work with floating point.
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ // We can't handle these if we don't know the pointer size.
+ if (!TD) return;
+ // FALL THROUGH and handle them the same as zext/trunc.
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ Type *SrcTy = I->getOperand(0)->getType();
+
+ unsigned SrcBitWidth;
+ // Note that we handle pointer operands here because of inttoptr/ptrtoint
+ // which fall through here.
+ if(TD) {
+ SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
+ } else {
+ SrcBitWidth = SrcTy->getScalarSizeInBits();
+ if (!SrcBitWidth) return;
+ }
+
+ assert(SrcBitWidth && "SrcBitWidth can't be zero");
+ KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+ KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ KnownZero = KnownZero.zextOrTrunc(BitWidth);
+ KnownOne = KnownOne.zextOrTrunc(BitWidth);
+ // Any top bits are known to be zero.
+ if (BitWidth > SrcBitWidth)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::BitCast: {
+ Type *SrcTy = I->getOperand(0)->getType();
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ // TODO: For now, not handling conversions like:
+ // (bitcast i64 %x to <2 x i32>)
+ !I->getType()->isVectorTy()) {
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ return;
+ }
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
+ KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::Shl:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+ return;
+ }
+ break;
+ case Instruction::LShr:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ // high bits known zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ return;
+ }
+ break;
+ case Instruction::AShr:
+ // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
+ KnownZero |= HighBits;
+ else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
+ KnownOne |= HighBits;
+ return;
+ }
+ break;
+ case Instruction::Sub: {
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth);
+ break;
+ }
+ case Instruction::Add: {
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth);
+ break;
+ }
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (KnownZero.isNonNegative()) {
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ }
+
+ break;
+ case Instruction::URem: {
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~LowBits;
+ KnownOne &= LowBits;
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+
+ unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ break;
+ }
+
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ unsigned Align = AI->getAlignment();
+ if (Align == 0 && TD)
+ Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+
+ if (Align > 0)
+ KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ // Analyze all of the subscripts of this getelementptr instruction
+ // to determine if we can prove known low zero bits.
+ APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
+ Depth+1);
+ unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+ gep_type_iterator GTI = gep_type_begin(I);
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+ Value *Index = I->getOperand(i);
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ // Handle struct member offset arithmetic.
+ if (!TD) return;
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ uint64_t Offset = SL->getElementOffset(Idx);
+ TrailZ = std::min(TrailZ,
+ CountTrailingZeros_64(Offset));
+ } else {
+ // Handle array index arithmetic.
+ Type *IndexedTy = GTI.getIndexedType();
+ if (!IndexedTy->isSized()) return;
+ unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+ uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+ ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ TrailZ = std::min(TrailZ,
+ unsigned(CountTrailingZeros_64(TypeSize) +
+ LocalKnownZero.countTrailingOnes()));
+ }
+ }
+
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *P = cast<PHINode>(I);
+ // Handle the case of a simple two-predecessor recurrence PHI.
+ // There's a lot more that could theoretically be done here, but
+ // this is sufficient to catch some interesting cases.
+ if (P->getNumIncomingValues() == 2) {
+ for (unsigned i = 0; i != 2; ++i) {
+ Value *L = P->getIncomingValue(i);
+ Value *R = P->getIncomingValue(!i);
+ Operator *LU = dyn_cast<Operator>(L);
+ if (!LU)
+ continue;
+ unsigned Opcode = LU->getOpcode();
+ // Check for operations that have the property that if
+ // both their operands have low zero bits, the result
+ // will have low zero bits.
+ if (Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub ||
+ Opcode == Instruction::And ||
+ Opcode == Instruction::Or ||
+ Opcode == Instruction::Mul) {
+ Value *LL = LU->getOperand(0);
+ Value *LR = LU->getOperand(1);
+ // Find a recurrence.
+ if (LL == I)
+ L = LR;
+ else if (LR == I)
+ L = LL;
+ else
+ break;
+ // Ok, we have a PHI of the form L op= R. Check for low
+ // zero bits.
+ ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1);
+
+ // We need to take the minimum number of known bits
+ APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+ ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1);
+
+ KnownZero = APInt::getLowBitsSet(BitWidth,
+ std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
+ break;
+ }
+ }
+ }
+
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (P->getNumIncomingValues() == 0)
+ return;
+
+ // Otherwise take the unions of the known bit sets of the operands,
+ // taking conservative care to avoid excessive recursion.
+ if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+ // Skip if every incoming value references to ourself.
+ if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
+ break;
+
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+ // Skip direct self references.
+ if (P->getIncomingValue(i) == P) continue;
+
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ // Recurse, but cap the recursion to one level, because we don't
+ // want to waste time spinning around in loops.
+ ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
+ MaxDepth-1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ // If all bits have been ruled out, there's no need to check
+ // more operands.
+ if (!KnownZero && !KnownOne)
+ break;
+ }
+ }
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ // If this call is undefined for 0, the result will be less than 2^n.
+ if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+ LowBits -= 1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ case Intrinsic::ctpop: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ break;
+ }
+ }
+ break;
+ case Instruction::ExtractValue:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+ ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+ if (EVI->getNumIndices() != 1) break;
+ if (EVI->getIndices()[0] == 0) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+ false, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, TD, Depth);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one. Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth) {
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+ if (!BitWidth) {
+ KnownZero = false;
+ KnownOne = false;
+ return;
+ }
+ APInt ZeroBits(BitWidth, 0);
+ APInt OneBits(BitWidth, 0);
+ ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth);
+ KnownOne = OneBits[BitWidth - 1];
+ KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined. Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return OrZero;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2();
+ // TODO: Handle vector constants.
+ }
+
+ // 1 << X is clearly a power of two if the one is not shifted off the end. If
+ // it is shifted off the end then the result is undefined.
+ if (match(V, m_Shl(m_One(), m_Value())))
+ return true;
+
+ // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+ // bottom. If it is shifted off the bottom then the result is undefined.
+ if (match(V, m_LShr(m_SignBit(), m_Value())))
+ return true;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ Value *X = 0, *Y = 0;
+ // A shift of a power of two is a power of two or zero.
+ if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
+ match(V, m_Shr(m_Value(X), m_Value()))))
+ return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth);
+
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+ return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(V))
+ return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) &&
+ isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth);
+
+ if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
+ // A power of two and'd with anything is a power of two or zero.
+ if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) ||
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth))
+ return true;
+ // X & (-X) is always a power of two or zero.
+ if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
+ return true;
+ return false;
+ }
+
+ // An exact divide or right shift can only shift off zero bits, so the result
+ // is a power of two only if the first operand is a power of two and not
+ // copying a sign bit (sdiv int_min, 2).
+ if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
+ match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
+ return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth);
+ }
+
+ return false;
+}
+
+/// \brief Test whether a GEP's result is known to be non-null.
+///
+/// Uses properties inherent in a GEP to try to determine whether it is known
+/// to be non-null.
+///
+/// Currently this routine does not support vector GEPs.
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+ unsigned Depth) {
+ if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
+ return false;
+
+ // FIXME: Support vector-GEPs.
+ assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
+
+ // If the base pointer is non-null, we cannot walk to a null address with an
+ // inbounds GEP in address space zero.
+ if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
+ return true;
+
+ // Past this, if we don't have DataLayout, we can't do much.
+ if (!DL)
+ return false;
+
+ // Walk the GEP operands and see if any operand introduces a non-zero offset.
+ // If so, then the GEP cannot produce a null pointer, as doing so would
+ // inherently violate the inbounds contract within address space zero.
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ // Struct types are easy -- they must always be indexed by a constant.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = DL->getStructLayout(STy);
+ uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
+ if (ElementOffset > 0)
+ return true;
+ continue;
+ }
+
+ // If we have a zero-sized type, the index doesn't matter. Keep looping.
+ if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+ continue;
+
+ // Fast path the constant operand case both for efficiency and so we don't
+ // increment Depth when just zipping down an all-constant GEP.
+ if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
+ if (!OpC->isZero())
+ return true;
+ continue;
+ }
+
+ // We post-increment Depth here because while isKnownNonZero increments it
+ // as well, when we pop back up that increment won't persist. We don't want
+ // to recurse 10k times just because we have 10k GEP operands. We don't
+ // bail completely out because we want to handle constant GEPs regardless
+ // of depth.
+ if (Depth++ >= MaxDepth)
+ continue;
+
+ if (isKnownNonZero(GTI.getOperand(), DL, Depth))
+ return true;
+ }
+
+ return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined. For vectors return true if every element is known to be
+/// non-zero when defined. Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return false;
+ if (isa<ConstantInt>(C))
+ // Must be non-zero due to null test above.
+ return true;
+ // TODO: Handle vectors
+ return false;
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ >= MaxDepth)
+ return false;
+
+ // Check for pointer simplifications.
+ if (V->getType()->isPointerTy()) {
+ if (isKnownNonNull(V))
+ return true;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ if (isGEPKnownNonNull(GEP, TD, Depth))
+ return true;
+ }
+
+ unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
+
+ // X | Y != 0 if X != 0 or Y != 0.
+ Value *X = 0, *Y = 0;
+ if (match(V, m_Or(m_Value(X), m_Value(Y))))
+ return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+ // ext X != 0 if X != 0.
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+ // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
+ // if the lowest bit is shifted off the end.
+ if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+ // shl nuw can't remove any non-zero bits.
+ OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+ if (BO->hasNoUnsignedWrap())
+ return isKnownNonZero(X, TD, Depth);
+
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+ if (KnownOne[0])
+ return true;
+ }
+ // shr X, Y != 0 if X is negative. Note that the value of the shift is not
+ // defined if the sign bit is shifted off the end.
+ else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+ // shr exact can only shift out zero bits.
+ PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+ if (BO->isExact())
+ return isKnownNonZero(X, TD, Depth);
+
+ bool XKnownNonNegative, XKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ if (XKnownNegative)
+ return true;
+ }
+ // div exact can only produce a zero if the dividend is zero.
+ else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
+ return isKnownNonZero(X, TD, Depth);
+ }
+ // X + Y.
+ else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ bool XKnownNonNegative, XKnownNegative;
+ bool YKnownNonNegative, YKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (XKnownNonNegative && YKnownNonNegative)
+ if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ return true;
+
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (BitWidth && XKnownNegative && YKnownNegative) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ }
+
+ // The sum of a non-negative number and a power of two is not zero.
+ if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth))
+ return true;
+ if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth))
+ return true;
+ }
+ // X * Y.
+ else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
+ OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+ // If X and Y are non-zero then so is X * Y as long as the multiplication
+ // does not overflow.
+ if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+ isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth))
+ return true;
+ }
+ // (C ? X : Y) != 0 if X != 0 and Y != 0.
+ else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ return true;
+ }
+
+ if (!BitWidth) return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ return KnownOne != 0;
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth) {
+ APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth) {
+ assert((TD || V->getType()->isIntOrIntVectorTy()) &&
+ "ComputeNumSignBits requires a DataLayout object to operate "
+ "on non-integer values!");
+ Type *Ty = V->getType();
+ unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
+ Ty->getScalarSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ // Note that ConstantInt is handled by the general ComputeMaskedBits case
+ // below.
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ Operator *U = dyn_cast<Operator>(V);
+ switch (Operator::getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+
+ case Instruction::AShr: {
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ // ashr X, C -> adds C sign bits. Vectors too.
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ Tmp += ShAmt->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ return Tmp;
+ }
+ case Instruction::Shl: {
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp2 = ShAmt->getZExtValue();
+ if (Tmp2 >= TyBits || // Bad shift.
+ Tmp2 >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - Tmp2;
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case Instruction::Select:
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(U);
+ // Don't analyze large in-degree PHIs.
+ if (PN->getNumIncomingValues() > 4) break;
+
+ // Take the minimum of all incoming values. This can't infinitely loop
+ // because of our depth threshold.
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (Tmp == 1) return Tmp;
+ Tmp = std::min(Tmp,
+ ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+ }
+ return Tmp;
+ }
+
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask;
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-TyBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V. If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+ bool LookThroughSExt, unsigned Depth) {
+ const unsigned MaxDepth = 6;
+
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+ Type *T = V->getType();
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+ if (Base == 0)
+ return false;
+
+ if (Base == 1) {
+ Multiple = V;
+ return true;
+ }
+
+ ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+ Constant *BaseVal = ConstantInt::get(T, Base);
+ if (CO && CO == BaseVal) {
+ // Multiple is 1.
+ Multiple = ConstantInt::get(T, 1);
+ return true;
+ }
+
+ if (CI && CI->getZExtValue() % Base == 0) {
+ Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+ return true;
+ }
+
+ if (Depth == MaxDepth) return false; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::SExt:
+ if (!LookThroughSExt) return false;
+ // otherwise fall through to ZExt
+ case Instruction::ZExt:
+ return ComputeMultiple(I->getOperand(0), Base, Multiple,
+ LookThroughSExt, Depth+1);
+ case Instruction::Shl:
+ case Instruction::Mul: {
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+
+ if (I->getOpcode() == Instruction::Shl) {
+ ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+ if (!Op1CI) return false;
+ // Turn Op0 << Op1 into Op0 * 2^Op1
+ APInt Op1Int = Op1CI->getValue();
+ uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+ APInt API(Op1Int.getBitWidth(), 0);
+ API.setBit(BitToSet);
+ Op1 = ConstantInt::get(V->getContext(), API);
+ }
+
+ Value *Mul0 = NULL;
+ if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1))
+ if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+ if (Op1C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+ if (Op1C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(MulC, Op1C);
+ return true;
+ }
+
+ if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+ if (Mul0CI->getValue() == 1) {
+ // V == Base * Op1, so return Op1
+ Multiple = Op1;
+ return true;
+ }
+ }
+
+ Value *Mul1 = NULL;
+ if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+ if (Constant *Op0C = dyn_cast<Constant>(Op0))
+ if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+ if (Op0C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+ if (Op0C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(MulC, Op0C);
+ return true;
+ }
+
+ if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+ if (Mul1CI->getValue() == 1) {
+ // V == Base * Op0, so return Op0
+ Multiple = Op0;
+ return true;
+ }
+ }
+ }
+ }
+
+ // We could not determine if V is a multiple of Base.
+ return false;
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegZero();
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ const Operator *I = dyn_cast<Operator>(V);
+ if (I == 0) return false;
+
+ // Check if the nsz fast-math flag is set
+ if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
+ if (FPO->hasNoSignedZeros())
+ return true;
+
+ // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if (I->getOpcode() == Instruction::FAdd)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
+ if (CFP->isNullValue())
+ return true;
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+ return true;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // sqrt(-0.0) = -0.0, no other negative results are possible.
+ if (II->getIntrinsicID() == Intrinsic::sqrt)
+ return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction()) {
+ if (F->isDeclaration()) {
+ // abs(x) != -0.0
+ if (F->getName() == "abs") return true;
+ // fabs[lf](x) != -0.0
+ if (F->getName() == "fabs") return true;
+ if (F->getName() == "fabsf") return true;
+ if (F->getName() == "fabsl") return true;
+ if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+ F->getName() == "sqrtl")
+ return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+ }
+ }
+
+ return false;
+}
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType()->isIntegerTy(8)) return V;
+
+ // Handle 'null' ConstantArrayZero etc.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+ if (CFP->getType()->isDoubleTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2 = Val2.trunc(Val.getBitWidth()/2);
+ Val = Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(V->getContext(), Val);
+ }
+ }
+
+ // A ConstantDataArray/Vector is splatable if all its members are equal and
+ // also splatable.
+ if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
+ Value *Elt = CA->getElementAsConstant(0);
+ Value *Val = isBytewiseValue(Elt);
+ if (!Val)
+ return 0;
+
+ for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
+ if (CA->getElementAsConstant(I) != Elt)
+ return 0;
+
+ return Val;
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
+ SmallVector<unsigned, 10> &Idxs,
+ unsigned IdxSkip,
+ Instruction *InsertBefore) {
+ llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
+ if (STy) {
+ // Save the original To argument so we can modify it
+ Value *OrigTo = To;
+ // General case, the type indexed by Idxs is a struct
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // Process each struct element recursively
+ Idxs.push_back(i);
+ Value *PrevTo = To;
+ To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+ InsertBefore);
+ Idxs.pop_back();
+ if (!To) {
+ // Couldn't find any inserted value for this index? Cleanup
+ while (PrevTo != OrigTo) {
+ InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+ PrevTo = Del->getAggregateOperand();
+ Del->eraseFromParent();
+ }
+ // Stop processing elements
+ break;
+ }
+ }
+ // If we successfully found a value for each of our subaggregates
+ if (To)
+ return To;
+ }
+ // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+ // the struct's elements had a value that was inserted directly. In the latter
+ // case, perhaps we can't determine each of the subelements individually, but
+ // we might be able to find the complete struct somewhere.
+
+ // Find the value that is at that particular spot
+ Value *V = FindInsertedValue(From, Idxs);
+
+ if (!V)
+ return NULL;
+
+ // Insert the value in the new (sub) aggregrate
+ return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
+ "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
+ Instruction *InsertBefore) {
+ assert(InsertBefore && "Must have someplace to insert!");
+ Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+ idx_range);
+ Value *To = UndefValue::get(IndexedType);
+ SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
+ unsigned IdxSkip = Idxs.size();
+
+ return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+ Instruction *InsertBefore) {
+ // Nothing to index? Just return V then (this is useful at the end of our
+ // recursion).
+ if (idx_range.empty())
+ return V;
+ // We have indices, so V should have an indexable type.
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
+ "Not looking at a struct or array?");
+ assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
+ "Invalid indices for type?");
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = C->getAggregateElement(idx_range[0]);
+ if (C == 0) return 0;
+ return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
+ }
+
+ if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+ // Loop the indices for the insertvalue instruction in parallel with the
+ // requested indices
+ const unsigned *req_idx = idx_range.begin();
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i, ++req_idx) {
+ if (req_idx == idx_range.end()) {
+ // We can't handle this without inserting insertvalues
+ if (!InsertBefore)
+ return 0;
+
+ // The requested index identifies a part of a nested aggregate. Handle
+ // this specially. For example,
+ // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+ // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+ // %C = extractvalue {i32, { i32, i32 } } %B, 1
+ // This can be changed into
+ // %A = insertvalue {i32, i32 } undef, i32 10, 0
+ // %C = insertvalue {i32, i32 } %A, i32 11, 1
+ // which allows the unused 0,0 element from the nested struct to be
+ // removed.
+ return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
+ InsertBefore);
+ }
+
+ // This insert value inserts something else than what we are looking for.
+ // See if the (aggregrate) value inserted into has the value we are
+ // looking for, then.
+ if (*req_idx != *i)
+ return FindInsertedValue(I->getAggregateOperand(), idx_range,
+ InsertBefore);
+ }
+ // If we end up here, the indices of the insertvalue match with those
+ // requested (though possibly only partially). Now we recursively look at
+ // the inserted value, passing any remaining indices.
+ return FindInsertedValue(I->getInsertedValueOperand(),
+ makeArrayRef(req_idx, idx_range.end()),
+ InsertBefore);
+ }
+
+ if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+ // If we're extracting a value from an aggregrate that was extracted from
+ // something else, we can extract from that something else directly instead.
+ // However, we will need to chain I's indices with the requested indices.
+
+ // Calculate the number of indices required
+ unsigned size = I->getNumIndices() + idx_range.size();
+ // Allocate some space to put the new indices in
+ SmallVector<unsigned, 5> Idxs;
+ Idxs.reserve(size);
+ // Add indices from the extract value instruction
+ Idxs.append(I->idx_begin(), I->idx_end());
+
+ // Add requested indices
+ Idxs.append(idx_range.begin(), idx_range.end());
+
+ assert(Idxs.size() == size
+ && "Number of indices added not correct?");
+
+ return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
+ }
+ // Otherwise, we don't know (such as, extracting from a function return value
+ // or load instruction)
+ return 0;
+}
+
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset. Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const DataLayout *TD) {
+ // Without DataLayout, conservatively assume 64-bit offsets, which is
+ // the widest we support.
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt ByteOffset(BitWidth, 0);
+ while (1) {
+ if (Ptr->getType()->isVectorTy())
+ break;
+
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ APInt GEPOffset(BitWidth, 0);
+ if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
+ break;
+ ByteOffset += GEPOffset;
+ Ptr = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
+ Ptr = cast<Operator>(Ptr)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
+ if (GA->mayBeOverridden())
+ break;
+ Ptr = GA->getAliasee();
+ } else {
+ break;
+ }
+ }
+ Offset = ByteOffset.getSExtValue();
+ return Ptr;
+}
+
+
+/// getConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V. If successful, it returns true
+/// and returns the string in Str. If unsuccessful, it returns false.
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+ uint64_t Offset, bool TrimAtNul) {
+ assert(V);
+
+ // Look through bitcast instructions and geps.
+ V = V->stripPointerCasts();
+
+ // If the value is a GEP instructionor constant expression, treat it as an
+ // offset.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return false;
+
+ // Make sure the index-ee is a pointer to array of i8.
+ PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+ ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+ if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (FirstIdx == 0 || !FirstIdx->isZero())
+ return false;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return false;
+ return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
+ }
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return false;
+
+ // Handle the all-zeros case
+ if (GV->getInitializer()->isNullValue()) {
+ // This is a degenerate case. The initializer is constant zero so the
+ // length of the string must be zero.
+ Str = "";
+ return true;
+ }
+
+ // Must be a Constant Array
+ const ConstantDataArray *Array =
+ dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (Array == 0 || !Array->isString())
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getArrayNumElements();
+
+ // Start out with the entire array in the StringRef.
+ Str = Array->getAsString();
+
+ if (Offset > NumElts)
+ return false;
+
+ // Skip over 'offset' bytes.
+ Str = Str.substr(Offset);
+
+ if (TrimAtNul) {
+ // Trim off the \0 and anything after it. If the array is not nul
+ // terminated, we just return the whole end of string. The client may know
+ // some other way that the string is length-bound.
+ Str = Str.substr(0, Str.find('\0'));
+ }
+ return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ V = V->stripPointerCasts();
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // Otherwise, see if we can read the string.
+ StringRef StrData;
+ if (!getConstantStringInfo(V, StrData))
+ return 0;
+
+ return StrData.size()+1;
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ // See if InstructionSimplify knows any relevant tricks.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Acquire a DominatorTree and use it.
+ if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
+
+void
+llvm::GetUnderlyingObjects(Value *V,
+ SmallVectorImpl<Value *> &Objects,
+ const DataLayout *TD,
+ unsigned MaxLookup) {
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ Value *P = Worklist.pop_back_val();
+ P = GetUnderlyingObject(P, TD, MaxLookup);
+
+ if (!Visited.insert(P))
+ continue;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ Objects.push_back(P);
+ } while (!Worklist.empty());
+}
+
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+ if (!II) return false;
+
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ }
+ return true;
+}
+
+bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+ const DataLayout *TD) {
+ const Operator *Inst = dyn_cast<Operator>(V);
+ if (!Inst)
+ return false;
+
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+ if (C->canTrap())
+ return false;
+
+ switch (Inst->getOpcode()) {
+ default:
+ return true;
+ case Instruction::UDiv:
+ case Instruction::URem:
+ // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+ return isKnownNonZero(Inst->getOperand(1), TD);
+ case Instruction::SDiv:
+ case Instruction::SRem: {
+ Value *Op = Inst->getOperand(1);
+ // x / y is undefined if y == 0
+ if (!isKnownNonZero(Op, TD))
+ return false;
+ // x / y might be undefined if y == -1
+ unsigned BitWidth = getBitWidth(Op->getType(), TD);
+ if (BitWidth == 0)
+ return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, TD);
+ return !!KnownZero;
+ }
+ case Instruction::Load: {
+ const LoadInst *LI = cast<LoadInst>(Inst);
+ if (!LI->isUnordered())
+ return false;
+ return LI->getPointerOperand()->isDereferenceablePointer();
+ }
+ case Instruction::Call: {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ // These synthetic intrinsics have no side-effects, and just mark
+ // information about their operands.
+ // FIXME: There are other no-op synthetic instructions that potentially
+ // should be considered at least *safe* to speculate...
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ return true;
+
+ case Intrinsic::bswap:
+ case Intrinsic::ctlz:
+ case Intrinsic::ctpop:
+ case Intrinsic::cttz:
+ case Intrinsic::objectsize:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ return true;
+ // TODO: some fp intrinsics are marked as having the same error handling
+ // as libm. They're safe to speculate when they won't error.
+ // TODO: are convert_{from,to}_fp16 safe?
+ // TODO: can we list target-specific intrinsics here?
+ default: break;
+ }
+ }
+ return false; // The called function could have undefined behavior or
+ // side-effects, even if marked readnone nounwind.
+ }
+ case Instruction::VAArg:
+ case Instruction::Alloca:
+ case Instruction::Invoke:
+ case Instruction::PHI:
+ case Instruction::Store:
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::IndirectBr:
+ case Instruction::Switch:
+ case Instruction::Unreachable:
+ case Instruction::Fence:
+ case Instruction::LandingPad:
+ case Instruction::AtomicRMW:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::Resume:
+ return false; // Misc instructions which have effects
+ }
+}
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+bool llvm::isKnownNonNull(const Value *V) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V)) return true;
+
+ // A byval argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // Global values are not null unless extern weak.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return !GV->hasExternalWeakLinkage();
+ return false;
+}
diff --git a/contrib/llvm/lib/Archive/Archive.cpp b/contrib/llvm/lib/Archive/Archive.cpp
new file mode 100644
index 000000000000..1f36a00ab086
--- /dev/null
+++ b/contrib/llvm/lib/Archive/Archive.cpp
@@ -0,0 +1,262 @@
+//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the Archive and ArchiveMember
+// classes that is common to both reading and writing archives..
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/Archive.h"
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
+#include <cstring>
+#include <memory>
+using namespace llvm;
+
+// getMemberSize - compute the actual physical size of the file member as seen
+// on disk. This isn't the size of member's payload. Use getSize() for that.
+unsigned
+ArchiveMember::getMemberSize() const {
+ // Basically its the file size plus the header size
+ unsigned result = info.fileSize + sizeof(ArchiveMemberHeader);
+
+ // If it has a long filename, include the name length
+ if (hasLongFilename())
+ result += path.str().length() + 1;
+
+ // If its now odd lengthed, include the padding byte
+ if (result % 2 != 0 )
+ result++;
+
+ return result;
+}
+
+// This default constructor is only use by the ilist when it creates its
+// sentry node. We give it specific static values to make it stand out a bit.
+ArchiveMember::ArchiveMember()
+ : parent(0), path("--invalid--"), flags(0), data(0)
+{
+ info.user = sys::Process::GetCurrentUserId();
+ info.group = sys::Process::GetCurrentGroupId();
+ info.mode = 0777;
+ info.fileSize = 0;
+ info.modTime = sys::TimeValue::now();
+}
+
+// This is the constructor that the Archive class uses when it is building or
+// reading an archive. It just defaults a few things and ensures the parent is
+// set for the iplist. The Archive class fills in the ArchiveMember's data.
+// This is required because correctly setting the data may depend on other
+// things in the Archive.
+ArchiveMember::ArchiveMember(Archive* PAR)
+ : parent(PAR), path(), flags(0), data(0)
+{
+}
+
+// This method allows an ArchiveMember to be replaced with the data for a
+// different file, presumably as an update to the member. It also makes sure
+// the flags are reset correctly.
+bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
+ bool Exists;
+ if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+ if (ErrMsg)
+ *ErrMsg = "Can not replace an archive member with a non-existent file";
+ return true;
+ }
+
+ data = 0;
+ path = newFile;
+
+ // SVR4 symbol tables have an empty name
+ if (path.str() == ARFILE_SVR4_SYMTAB_NAME)
+ flags |= SVR4SymbolTableFlag;
+ else
+ flags &= ~SVR4SymbolTableFlag;
+
+ // BSD4.4 symbol tables have a special name
+ if (path.str() == ARFILE_BSD4_SYMTAB_NAME)
+ flags |= BSD4SymbolTableFlag;
+ else
+ flags &= ~BSD4SymbolTableFlag;
+
+ // LLVM symbol tables have a very specific name
+ if (path.str() == ARFILE_LLVM_SYMTAB_NAME)
+ flags |= LLVMSymbolTableFlag;
+ else
+ flags &= ~LLVMSymbolTableFlag;
+
+ // String table name
+ if (path.str() == ARFILE_STRTAB_NAME)
+ flags |= StringTableFlag;
+ else
+ flags &= ~StringTableFlag;
+
+ // If it has a slash then it has a path
+ bool hasSlash = path.str().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= HasPathFlag;
+ else
+ flags &= ~HasPathFlag;
+
+ // If it has a slash or its over 15 chars then its a long filename format
+ if (hasSlash || path.str().length() > 15)
+ flags |= HasLongFilenameFlag;
+ else
+ flags &= ~HasLongFilenameFlag;
+
+ // Get the signature and status info
+ const char* signature = (const char*) data;
+ SmallString<4> magic;
+ if (!signature) {
+ sys::fs::get_magic(path.str(), magic.capacity(), magic);
+ signature = magic.c_str();
+ const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
+ if (FSinfo)
+ info = *FSinfo;
+ else
+ return true;
+ }
+
+ // Determine what kind of file it is.
+ switch (sys::IdentifyFileType(signature,4)) {
+ case sys::Bitcode_FileType:
+ flags |= BitcodeFlag;
+ break;
+ default:
+ flags &= ~BitcodeFlag;
+ break;
+ }
+ return false;
+}
+
+// Archive constructor - this is the only constructor that gets used for the
+// Archive class. Everything else (default,copy) is deprecated. This just
+// initializes and maps the file into memory, if requested.
+Archive::Archive(const sys::Path& filename, LLVMContext& C)
+ : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
+ symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) {
+}
+
+bool
+Archive::mapToMemory(std::string* ErrMsg) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ mapfile = File.take();
+ base = mapfile->getBufferStart();
+ return false;
+}
+
+void Archive::cleanUpMemory() {
+ // Shutdown the file mapping
+ delete mapfile;
+ mapfile = 0;
+ base = 0;
+
+ // Forget the entire symbol table
+ symTab.clear();
+ symTabSize = 0;
+
+ firstFileOffset = 0;
+
+ // Free the foreign symbol table member
+ if (foreignST) {
+ delete foreignST;
+ foreignST = 0;
+ }
+
+ // Delete any Modules and ArchiveMember's we've allocated as a result of
+ // symbol table searches.
+ for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
+ delete I->second.first;
+ delete I->second.second;
+ }
+}
+
+// Archive destructor - just clean up memory
+Archive::~Archive() {
+ cleanUpMemory();
+}
+
+
+
+static void getSymbols(Module*M, std::vector<std::string>& symbols) {
+ // Loop over global variables
+ for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
+ if (!GI->isDeclaration() && !GI->hasLocalLinkage())
+ if (!GI->getName().empty())
+ symbols.push_back(GI->getName());
+
+ // Loop over functions
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+ if (!FI->isDeclaration() && !FI->hasLocalLinkage())
+ if (!FI->getName().empty())
+ symbols.push_back(FI->getName());
+
+ // Loop over aliases
+ for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ if (AI->hasName())
+ symbols.push_back(AI->getName());
+ }
+}
+
+// Get just the externally visible defined symbols from the bitcode
+bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ OwningPtr<MemoryBuffer> Buffer;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+ if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+ + ec.message();
+ return true;
+ }
+
+ Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+ if (!M)
+ return true;
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module.
+ delete M;
+ return true;
+}
+
+Module*
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
+ const std::string& ModuleID,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ // Get the module.
+ OwningPtr<MemoryBuffer> Buffer(
+ MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
+
+ Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+ if (!M)
+ return 0;
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module. Note that it's the caller's responsibility to delete
+ // the Module.
+ return M;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveInternals.h b/contrib/llvm/lib/Archive/ArchiveInternals.h
new file mode 100644
index 000000000000..f6c87e899f25
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveInternals.h
@@ -0,0 +1,88 @@
+//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Internal implementation header for LLVM Archive files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
+#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/TimeValue.h"
+#include <cstring>
+
+#define ARFILE_MAGIC "!<arch>\n" ///< magic string
+#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1) ///< length of magic string
+#define ARFILE_SVR4_SYMTAB_NAME "/ " ///< SVR4 symtab entry name
+#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name
+#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name
+#define ARFILE_STRTAB_NAME "// " ///< Name of string table
+#define ARFILE_PAD "\n" ///< inter-file align padding
+#define ARFILE_MEMBER_MAGIC "`\n" ///< fmag field magic #
+
+namespace llvm {
+
+ class LLVMContext;
+
+ /// The ArchiveMemberHeader structure is used internally for bitcode
+ /// archives.
+ /// The header precedes each file member in the archive. This structure is
+ /// defined using character arrays for direct and correct interpretation
+ /// regardless of the endianess of the machine that produced it.
+ /// @brief Archive File Member Header
+ class ArchiveMemberHeader {
+ /// @name Data
+ /// @{
+ public:
+ char name[16]; ///< Name of the file member.
+ char date[12]; ///< File date, decimal seconds since Epoch
+ char uid[6]; ///< user id in ASCII decimal
+ char gid[6]; ///< group id in ASCII decimal
+ char mode[8]; ///< file mode in ASCII octal
+ char size[10]; ///< file size in ASCII decimal
+ char fmag[2]; ///< Always contains ARFILE_MAGIC_TERMINATOR
+
+ /// @}
+ /// @name Methods
+ /// @{
+ public:
+ void init() {
+ memset(name,' ',16);
+ memset(date,' ',12);
+ memset(uid,' ',6);
+ memset(gid,' ',6);
+ memset(mode,' ',8);
+ memset(size,' ',10);
+ fmag[0] = '`';
+ fmag[1] = '\n';
+ }
+
+ bool checkSignature() const {
+ return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2);
+ }
+ };
+
+ // Get just the externally visible defined symbols from the bitcode
+ bool GetBitcodeSymbols(const sys::Path& fName,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+
+ Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
+ const std::string& ModuleID,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+}
+
+#endif
+
+// vim: sw=2 ai
diff --git a/contrib/llvm/lib/Archive/ArchiveReader.cpp b/contrib/llvm/lib/Archive/ArchiveReader.cpp
new file mode 100644
index 000000000000..14713e692c0f
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveReader.cpp
@@ -0,0 +1,633 @@
+//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up standard unix archive files (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/Archive.h"
+#include "ArchiveInternals.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+/// Read a variable-bit-rate encoded unsigned integer
+static inline unsigned readInteger(const char*&At, const char*End) {
+ unsigned Shift = 0;
+ unsigned Result = 0;
+
+ do {
+ if (At == End)
+ return Result;
+ Result |= (unsigned)((*At++) & 0x7F) << Shift;
+ Shift += 7;
+ } while (At[-1] & 0x80);
+ return Result;
+}
+
+// Completely parse the Archive's symbol table and populate symTab member var.
+bool
+Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
+ const char* At = (const char*) data;
+ const char* End = At + size;
+ while (At < End) {
+ unsigned offset = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab offset!";
+ return false;
+ }
+ unsigned length = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab length!";
+ return false;
+ }
+ if (At + length > End) {
+ if (error)
+ *error = "Malformed symbol table: length not consistent with size";
+ return false;
+ }
+ // we don't care if it can't be inserted (duplicate entry)
+ symTab.insert(std::make_pair(std::string(At, length), offset));
+ At += length;
+ }
+ symTabSize = size;
+ return true;
+}
+
+// This member parses an ArchiveMemberHeader that is presumed to be pointed to
+// by At. The At pointer is updated to the byte just after the header, which
+// can be variable in size.
+ArchiveMember*
+Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
+{
+ if (At + sizeof(ArchiveMemberHeader) >= End) {
+ if (error)
+ *error = "Unexpected end of file";
+ return 0;
+ }
+
+ // Cast archive member header
+ const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At;
+ At += sizeof(ArchiveMemberHeader);
+
+ int flags = 0;
+ int MemberSize = atoi(Hdr->size);
+ assert(MemberSize >= 0);
+
+ // Check the size of the member for sanity
+ if (At + MemberSize > End) {
+ if (error)
+ *error = "invalid member length in archive file";
+ return 0;
+ }
+
+ // Check the member signature
+ if (!Hdr->checkSignature()) {
+ if (error)
+ *error = "invalid file member signature";
+ return 0;
+ }
+
+ // Convert and check the member name
+ // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
+ // table. The special name "//" and 14 blanks is for a string table, used
+ // for long file names. This library doesn't generate either of those but
+ // it will accept them. If the name starts with #1/ and the remainder is
+ // digits, then those digits specify the length of the name that is
+ // stored immediately following the header. The special name
+ // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
+ // Anything else is a regular, short filename that is terminated with
+ // a '/' and blanks.
+
+ std::string pathname;
+ switch (Hdr->name[0]) {
+ case '#':
+ if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
+ if (isdigit(Hdr->name[3])) {
+ unsigned len = atoi(&Hdr->name[3]);
+ const char *nulp = (const char *)memchr(At, '\0', len);
+ pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
+ At += len;
+ MemberSize -= len;
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ } else {
+ if (error)
+ *error = "invalid long filename";
+ return 0;
+ }
+ } else if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
+ // The member is using a long file name (>15 chars) format.
+ // This format is standard for 4.4BSD and Mac OSX operating
+ // systems. LLVM uses it similarly. In this format, the
+ // remainder of the name field (after #1/) specifies the
+ // length of the file name which occupy the first bytes of
+ // the member's data. The pathname already has the #1/ stripped.
+ pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
+ flags |= ArchiveMember::LLVMSymbolTableFlag;
+ }
+ break;
+ case '/':
+ if (Hdr->name[1]== '/') {
+ if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
+ pathname.assign(ARFILE_STRTAB_NAME);
+ flags |= ArchiveMember::StringTableFlag;
+ } else {
+ if (error)
+ *error = "invalid string table name";
+ return 0;
+ }
+ } else if (Hdr->name[1] == ' ') {
+ if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
+ pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
+ flags |= ArchiveMember::SVR4SymbolTableFlag;
+ } else {
+ if (error)
+ *error = "invalid SVR4 symbol table name";
+ return 0;
+ }
+ } else if (isdigit(Hdr->name[1])) {
+ unsigned index = atoi(&Hdr->name[1]);
+ if (index < strtab.length()) {
+ const char* namep = strtab.c_str() + index;
+ const char* endp = strtab.c_str() + strtab.length();
+ const char* p = namep;
+ const char* last_p = p;
+ while (p < endp) {
+ if (*p == '\n' && *last_p == '/') {
+ pathname.assign(namep, last_p - namep);
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ break;
+ }
+ last_p = p;
+ p++;
+ }
+ if (p >= endp) {
+ if (error)
+ *error = "missing name terminator in string table";
+ return 0;
+ }
+ } else {
+ if (error)
+ *error = "name index beyond string table";
+ return 0;
+ }
+ }
+ break;
+ case '_':
+ if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
+ pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
+ flags |= ArchiveMember::BSD4SymbolTableFlag;
+ break;
+ }
+ /* FALL THROUGH */
+
+ default:
+ const char* slash = (const char*) memchr(Hdr->name, '/', 16);
+ if (slash == 0)
+ slash = Hdr->name + 16;
+ pathname.assign(Hdr->name, slash - Hdr->name);
+ break;
+ }
+
+ // Determine if this is a bitcode file
+ switch (sys::IdentifyFileType(At, 4)) {
+ case sys::Bitcode_FileType:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ flags &= ~ArchiveMember::BitcodeFlag;
+ break;
+ }
+
+ // Instantiate the ArchiveMember to be filled
+ ArchiveMember* member = new ArchiveMember(this);
+
+ // Fill in fields of the ArchiveMember
+ member->parent = this;
+ member->path.set(pathname);
+ member->info.fileSize = MemberSize;
+ member->info.modTime.fromEpochTime(atoi(Hdr->date));
+ unsigned int mode;
+ sscanf(Hdr->mode, "%o", &mode);
+ member->info.mode = mode;
+ member->info.user = atoi(Hdr->uid);
+ member->info.group = atoi(Hdr->gid);
+ member->flags = flags;
+ member->data = At;
+
+ return member;
+}
+
+bool
+Archive::checkSignature(std::string* error) {
+ // Check the magic string at file's header
+ if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
+ if (error)
+ *error = "invalid signature for an archive file";
+ return false;
+ }
+ return true;
+}
+
+// This function loads the entire archive and fully populates its ilist with
+// the members of the archive file. This is typically used in preparation for
+// editing the contents of the archive.
+bool
+Archive::loadArchive(std::string* error) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ if (!checkSignature(error))
+ return false;
+
+ At += 8; // Skip the magic string.
+
+ bool seenSymbolTable = false;
+ bool foundFirstFile = false;
+ while (At < End) {
+ // parse the member header
+ const char* Save = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // check if this is the foreign symbol table
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // We just save this but don't do anything special
+ // with it. It doesn't count as the "first file".
+ if (foreignST) {
+ // What? Multiple foreign symbol tables? Just chuck it
+ // and retain the last one found.
+ delete foreignST;
+ }
+ foreignST = mbr;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ } else if (mbr->isStringTable()) {
+ // Simply suck the entire string table into a string
+ // variable. This will be used to get the names of the
+ // members that use the "/ddd" format for their names
+ // (SVR4 style long names).
+ strtab.assign(At, mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ } else if (mbr->isLLVMSymbolTable()) {
+ // This is the LLVM symbol table for the archive. If we've seen it
+ // already, its an error. Otherwise, parse the symbol table and move on.
+ if (seenSymbolTable) {
+ if (error)
+ *error = "invalid archive: multiple symbol tables";
+ return false;
+ }
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
+ return false;
+ seenSymbolTable = true;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr; // We don't need this member in the list of members.
+ } else {
+ // This is just a regular file. If its the first one, save its offset.
+ // Otherwise just push it on the list and move on to the next file.
+ if (!foundFirstFile) {
+ firstFileOffset = Save - base;
+ foundFirstFile = true;
+ }
+ members.push_back(mbr);
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+ return true;
+}
+
+// Open and completely load the archive file.
+Archive*
+Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C,
+ std::string* ErrorMessage) {
+ OwningPtr<Archive> result ( new Archive(File, C));
+ if (result->mapToMemory(ErrorMessage))
+ return NULL;
+ if (!result->loadArchive(ErrorMessage))
+ return NULL;
+ return result.take();
+}
+
+// Get all the bitcode modules from the archive
+bool
+Archive::getAllModules(std::vector<Module*>& Modules,
+ std::string* ErrMessage) {
+
+ for (iterator I=begin(), E=end(); I != E; ++I) {
+ if (I->isBitcode()) {
+ std::string FullMemberName = archPath.str() +
+ "(" + I->getPath().str() + ")";
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+ FullMemberName.c_str());
+
+ Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
+ delete Buffer;
+ if (!M)
+ return true;
+
+ Modules.push_back(M);
+ }
+ }
+ return false;
+}
+
+// Load just the symbol table from the archive file
+bool
+Archive::loadSymbolTable(std::string* ErrorMsg) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ // Make sure we're dealing with an archive
+ if (!checkSignature(ErrorMsg))
+ return false;
+
+ At += 8; // Skip signature
+
+ // Parse the first file member header
+ const char* FirstFile = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr)
+ return false;
+
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // Skip the foreign symbol table, we don't do anything with it
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+
+ // Read the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ if (mbr->isStringTable()) {
+ // Process the string table entry
+ strtab.assign((const char*)mbr->getData(), mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Get the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ // See if its the symbol table
+ if (mbr->isLLVMSymbolTable()) {
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
+ delete mbr;
+ return false;
+ }
+
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Can't be any more symtab headers so just advance
+ FirstFile = At;
+ } else {
+ // There's no symbol table in the file. We have to rebuild it from scratch
+ // because the intent of this method is to get the symbol table loaded so
+ // it can be searched efficiently.
+ // Add the member to the members list
+ members.push_back(mbr);
+ }
+
+ firstFileOffset = FirstFile - base;
+ return true;
+}
+
+// Open the archive and load just the symbol tables
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& File,
+ LLVMContext& C,
+ std::string* ErrorMessage) {
+ OwningPtr<Archive> result ( new Archive(File, C) );
+ if (result->mapToMemory(ErrorMessage))
+ return NULL;
+ if (!result->loadSymbolTable(ErrorMessage))
+ return NULL;
+ return result.take();
+}
+
+// Look up one symbol in the symbol table and return the module that defines
+// that symbol.
+Module*
+Archive::findModuleDefiningSymbol(const std::string& symbol,
+ std::string* ErrMsg) {
+ SymTabType::iterator SI = symTab.find(symbol);
+ if (SI == symTab.end())
+ return 0;
+
+ // The symbol table was previously constructed assuming that the members were
+ // written without the symbol table header. Because VBR encoding is used, the
+ // values could not be adjusted to account for the offset of the symbol table
+ // because that could affect the size of the symbol table due to VBR encoding.
+ // We now have to account for this by adjusting the offset by the size of the
+ // symbol table and its header.
+ unsigned fileOffset =
+ SI->second + // offset in symbol-table-less file
+ firstFileOffset; // add offset to first "real" file in archive
+
+ // See if the module is already loaded
+ ModuleMap::iterator MI = modules.find(fileOffset);
+ if (MI != modules.end())
+ return MI->second.first;
+
+ // Module hasn't been loaded yet, we need to load it
+ const char* modptr = base + fileOffset;
+ ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
+ ErrMsg);
+ if (!mbr)
+ return 0;
+
+ // Now, load the bitcode module to get the Module.
+ std::string FullMemberName = archPath.str() + "(" +
+ mbr->getPath().str() + ")";
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+ FullMemberName.c_str());
+
+ Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+ if (!m)
+ return 0;
+
+ modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
+
+ return m;
+}
+
+// Look up multiple symbols in the symbol table and return a set of
+// Modules that define those symbols.
+bool
+Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
+ SmallVectorImpl<Module*>& result,
+ std::string* error) {
+ if (!mapfile || !base) {
+ if (error)
+ *error = "Empty archive invalid for finding modules defining symbols";
+ return false;
+ }
+
+ if (symTab.empty()) {
+ // We don't have a symbol table, so we must build it now but lets also
+ // make sure that we populate the modules table as we do this to ensure
+ // that we don't load them twice when findModuleDefiningSymbol is called
+ // below.
+
+ // Get a pointer to the first file
+ const char* At = base + firstFileOffset;
+ const char* End = mapfile->getBufferEnd();
+
+ while ( At < End) {
+ // Compute the offset to be put in the symbol table
+ unsigned offset = At - base - firstFileOffset;
+
+ // Parse the file's header
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // If it contains symbols
+ if (mbr->isBitcode()) {
+ // Get the symbols
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.str() + "(" +
+ mbr->getPath().str() + ")";
+ Module* M =
+ GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+ symbols, error);
+
+ if (M) {
+ // Insert the module's symbols into the symbol table
+ for (std::vector<std::string>::iterator I = symbols.begin(),
+ E=symbols.end(); I != E; ++I ) {
+ symTab.insert(std::make_pair(*I, offset));
+ }
+ // Insert the Module and the ArchiveMember into the table of
+ // modules.
+ modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
+ } else {
+ if (error)
+ *error = "Can't parse bitcode member: " +
+ mbr->getPath().str() + ": " + *error;
+ delete mbr;
+ return false;
+ }
+ }
+
+ // Go to the next file location
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+
+ // At this point we have a valid symbol table (one way or another) so we
+ // just use it to quickly find the symbols requested.
+
+ SmallPtrSet<Module*, 16> Added;
+ for (std::set<std::string>::iterator I=symbols.begin(),
+ Next = I,
+ E=symbols.end(); I != E; I = Next) {
+ // Increment Next before we invalidate it.
+ ++Next;
+
+ // See if this symbol exists
+ Module* m = findModuleDefiningSymbol(*I,error);
+ if (!m)
+ continue;
+ bool NewMember = Added.insert(m);
+ if (!NewMember)
+ continue;
+
+ // The symbol exists, insert the Module into our result.
+ result.push_back(m);
+
+ // Remove the symbol now that its been resolved.
+ symbols.erase(I);
+ }
+ return true;
+}
+
+bool Archive::isBitcodeArchive() {
+ // Make sure the symTab has been loaded. In most cases this should have been
+ // done when the archive was constructed, but still, this is just in case.
+ if (symTab.empty())
+ if (!loadSymbolTable(0))
+ return false;
+
+ // Now that we know it's been loaded, return true
+ // if it has a size
+ if (symTab.size()) return true;
+
+ // We still can't be sure it isn't a bitcode archive
+ if (!loadArchive(0))
+ return false;
+
+ std::vector<Module *> Modules;
+ std::string ErrorMessage;
+
+ // Scan the archive, trying to load a bitcode member. We only load one to
+ // see if this works.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (!I->isBitcode())
+ continue;
+
+ std::string FullMemberName =
+ archPath.str() + "(" + I->getPath().str() + ")";
+
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+ FullMemberName.c_str());
+ Module *M = ParseBitcodeFile(Buffer, Context);
+ delete Buffer;
+ if (!M)
+ return false; // Couldn't parse bitcode, not a bitcode archive.
+ delete M;
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
new file mode 100644
index 000000000000..3eba701c9535
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
@@ -0,0 +1,489 @@
+//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up an LLVM archive file (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/Archive.h"
+#include "ArchiveInternals.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <fstream>
+#include <iomanip>
+#include <ostream>
+using namespace llvm;
+
+// Write an integer using variable bit rate encoding. This saves a few bytes
+// per entry in the symbol table.
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+ while (1) {
+ if (num < 0x80) { // done?
+ ARFile << (unsigned char)num;
+ return;
+ }
+
+ // Nope, we are bigger than a character, output the next 7 bits and set the
+ // high bit to say that there is more coming...
+ ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
+ num >>= 7; // Shift out 7 bits now...
+ }
+}
+
+// Compute how many bytes are taken by a given VBR encoded value. This is needed
+// to pre-compute the size of the symbol table.
+static inline unsigned numVbrBytes(unsigned num) {
+
+ // Note that the following nested ifs are somewhat equivalent to a binary
+ // search. We split it in half by comparing against 2^14 first. This allows
+ // most reasonable values to be done in 2 comparisons instead of 1 for
+ // small ones and four for large ones. We expect this to access file offsets
+ // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
+ // so this approach is reasonable.
+ if (num < 1<<14) {
+ if (num < 1<<7)
+ return 1;
+ else
+ return 2;
+ }
+ if (num < 1<<21)
+ return 3;
+
+ if (num < 1<<28)
+ return 4;
+ return 5; // anything >= 2^28 takes 5 bytes
+}
+
+// Create an empty archive.
+Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
+ Archive* result = new Archive(FilePath, C);
+ return result;
+}
+
+// Fill the ArchiveMemberHeader with the information from a member. If
+// TruncateNames is true, names are flattened to 15 chars or less. The sz field
+// is provided here instead of coming from the mbr because the member might be
+// stored compressed and the compressed size is not the ArchiveMember's size.
+// Furthermore compressed files have negative size fields to identify them as
+// compressed.
+bool
+Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
+ int sz, bool TruncateNames) const {
+
+ // Set the permissions mode, uid and gid
+ hdr.init();
+ char buffer[32];
+ sprintf(buffer, "%-8o", mbr.getMode());
+ memcpy(hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", mbr.getUser());
+ memcpy(hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", mbr.getGroup());
+ memcpy(hdr.gid,buffer,6);
+
+ // Set the last modification date
+ uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(hdr.date,buffer,12);
+
+ // Get rid of trailing blanks in the name
+ std::string mbrPath = mbr.getPath().str();
+ size_t mbrLen = mbrPath.length();
+ while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
+ mbrPath.erase(mbrLen-1,1);
+ mbrLen--;
+ }
+
+ // Set the name field in one of its various flavors.
+ bool writeLongName = false;
+ if (mbr.isStringTable()) {
+ memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
+ } else if (mbr.isSVR4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
+ } else if (mbr.isBSD4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
+ } else if (mbr.isLLVMSymbolTable()) {
+ memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ } else if (TruncateNames) {
+ const char* nm = mbrPath.c_str();
+ unsigned len = mbrPath.length();
+ size_t slashpos = mbrPath.rfind('/');
+ if (slashpos != std::string::npos) {
+ nm += slashpos + 1;
+ len -= slashpos +1;
+ }
+ if (len > 15)
+ len = 15;
+ memcpy(hdr.name,nm,len);
+ hdr.name[len] = '/';
+ } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
+ memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
+ hdr.name[mbrPath.length()] = '/';
+ } else {
+ std::string nm = "#1/";
+ nm += utostr(mbrPath.length());
+ memcpy(hdr.name,nm.data(),nm.length());
+ if (sz < 0)
+ sz -= mbrPath.length();
+ else
+ sz += mbrPath.length();
+ writeLongName = true;
+ }
+
+ // Set the size field
+ if (sz < 0) {
+ buffer[0] = '-';
+ sprintf(&buffer[1],"%-9u",(unsigned)-sz);
+ } else {
+ sprintf(buffer, "%-10u", (unsigned)sz);
+ }
+ memcpy(hdr.size,buffer,10);
+
+ return writeLongName;
+}
+
+// Insert a file into the archive before some other member. This also takes care
+// of extracting the necessary flags and information from the file.
+bool
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
+ std::string* ErrMsg) {
+ bool Exists;
+ if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
+ if (ErrMsg)
+ *ErrMsg = "Can not add a non-existent file to archive";
+ return true;
+ }
+
+ ArchiveMember* mbr = new ArchiveMember(this);
+
+ mbr->data = 0;
+ mbr->path = filePath;
+ const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
+ if (!FSInfo) {
+ delete mbr;
+ return true;
+ }
+ mbr->info = *FSInfo;
+
+ unsigned flags = 0;
+ bool hasSlash = filePath.str().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= ArchiveMember::HasPathFlag;
+ if (hasSlash || filePath.str().length() > 15)
+ flags |= ArchiveMember::HasLongFilenameFlag;
+
+ sys::fs::file_magic type;
+ if (sys::fs::identify_magic(mbr->path.str(), type))
+ type = sys::fs::file_magic::unknown;
+ switch (type) {
+ case sys::fs::file_magic::bitcode:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ break;
+ }
+ mbr->flags = flags;
+ members.insert(where,mbr);
+ return false;
+}
+
+// Write one member out to the file.
+bool
+Archive::writeMember(
+ const ArchiveMember& member,
+ std::ofstream& ARFile,
+ bool CreateSymbolTable,
+ bool TruncateNames,
+ std::string* ErrMsg
+) {
+
+ unsigned filepos = ARFile.tellp();
+ filepos -= 8;
+
+ // Get the data and its size either from the
+ // member's in-memory data or directly from the file.
+ size_t fSize = member.getSize();
+ const char *data = (const char*)member.getData();
+ MemoryBuffer *mFile = 0;
+ if (!data) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ mFile = File.take();
+ data = mFile->getBufferStart();
+ fSize = mFile->getBufferSize();
+ }
+
+ // Now that we have the data in memory, update the
+ // symbol table if it's a bitcode file.
+ if (CreateSymbolTable && member.isBitcode()) {
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
+ + ")";
+ Module* M =
+ GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
+
+ // If the bitcode parsed successfully
+ if ( M ) {
+ for (std::vector<std::string>::iterator SI = symbols.begin(),
+ SE = symbols.end(); SI != SE; ++SI) {
+
+ std::pair<SymTabType::iterator,bool> Res =
+ symTab.insert(std::make_pair(*SI,filepos));
+
+ if (Res.second) {
+ symTabSize += SI->length() +
+ numVbrBytes(SI->length()) +
+ numVbrBytes(filepos);
+ }
+ }
+ // We don't need this module any more.
+ delete M;
+ } else {
+ delete mFile;
+ if (ErrMsg)
+ *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
+ + ": " + *ErrMsg;
+ return true;
+ }
+ }
+
+ int hdrSize = fSize;
+
+ // Compute the fields of the header
+ ArchiveMemberHeader Hdr;
+ bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
+
+ // Write header to archive file
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+ // Write the long filename if its long
+ if (writeLongName) {
+ ARFile.write(member.getPath().str().data(),
+ member.getPath().str().length());
+ }
+
+ // Write the (possibly compressed) member's content to the file.
+ ARFile.write(data,fSize);
+
+ // Make sure the member is an even length
+ if ((ARFile.tellp() & 1) == 1)
+ ARFile << ARFILE_PAD;
+
+ // Close the mapped file if it was opened
+ delete mFile;
+ return false;
+}
+
+// Write out the LLVM symbol table as an archive member to the file.
+void
+Archive::writeSymbolTable(std::ofstream& ARFile) {
+
+ // Construct the symbol table's header
+ ArchiveMemberHeader Hdr;
+ Hdr.init();
+ memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
+ char buffer[32];
+ sprintf(buffer, "%-8o", 0644);
+ memcpy(Hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
+ memcpy(Hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
+ memcpy(Hdr.gid,buffer,6);
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(Hdr.date,buffer,12);
+ sprintf(buffer,"%-10u",symTabSize);
+ memcpy(Hdr.size,buffer,10);
+
+ // Write the header
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+#ifndef NDEBUG
+ // Save the starting position of the symbol tables data content.
+ unsigned startpos = ARFile.tellp();
+#endif
+
+ // Write out the symbols sequentially
+ for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
+ I != E; ++I)
+ {
+ // Write out the file index
+ writeInteger(I->second, ARFile);
+ // Write out the length of the symbol
+ writeInteger(I->first.length(), ARFile);
+ // Write out the symbol
+ ARFile.write(I->first.data(), I->first.length());
+ }
+
+#ifndef NDEBUG
+ // Now that we're done with the symbol table, get the ending file position
+ unsigned endpos = ARFile.tellp();
+#endif
+
+ // Make sure that the amount we wrote is what we pre-computed. This is
+ // critical for file integrity purposes.
+ assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
+
+ // Make sure the symbol table is even sized
+ if (symTabSize % 2 != 0 )
+ ARFile << ARFILE_PAD;
+}
+
+// Write the entire archive to the file specified when the archive was created.
+// This writes to a temporary file first. Options are for creating a symbol
+// table, flattening the file names (no directories, 15 chars max) and
+// compressing each archive member.
+bool
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames,
+ std::string* ErrMsg)
+{
+ // Make sure they haven't opened up the file, not loaded it,
+ // but are now trying to write it which would wipe out the file.
+ if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
+ if (ErrMsg)
+ *ErrMsg = "Can't write an archive not opened for writing";
+ return true;
+ }
+
+ // Create a temporary file to store the archive in
+ sys::Path TmpArchive = archPath;
+ if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+
+ // Make sure the temporary gets removed if we crash
+ sys::RemoveFileOnSignal(TmpArchive);
+
+ // Create archive file for output.
+ std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+ std::ios::binary;
+ std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+ // Check for errors opening or creating archive file.
+ if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + archPath.str();
+ return true;
+ }
+
+ // If we're creating a symbol table, reset it now
+ if (CreateSymbolTable) {
+ symTabSize = 0;
+ symTab.clear();
+ }
+
+ // Write magic string to archive.
+ ArchiveFile << ARFILE_MAGIC;
+
+ // Loop over all member files, and write them out. Note that this also
+ // builds the symbol table, symTab.
+ for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
+ if (writeMember(*I, ArchiveFile, CreateSymbolTable,
+ TruncateNames, ErrMsg)) {
+ TmpArchive.eraseFromDisk();
+ ArchiveFile.close();
+ return true;
+ }
+ }
+
+ // Close archive file.
+ ArchiveFile.close();
+
+ // Write the symbol table
+ if (CreateSymbolTable) {
+ // At this point we have written a file that is a legal archive but it
+ // doesn't have a symbol table in it. To aid in faster reading and to
+ // ensure compatibility with other archivers we need to put the symbol
+ // table first in the file. Unfortunately, this means mapping the file
+ // we just wrote back in and copying it to the destination file.
+ sys::Path FinalFilePath = archPath;
+
+ // Map in the archive we just wrote.
+ {
+ OwningPtr<MemoryBuffer> arch;
+ if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ const char* base = arch->getBufferStart();
+
+ // Open another temporary file in order to avoid invalidating the
+ // mmapped data
+ if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+ sys::RemoveFileOnSignal(FinalFilePath);
+
+ std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+ if (!FinalFile.is_open() || FinalFile.bad()) {
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+ return true;
+ }
+
+ // Write the file magic number
+ FinalFile << ARFILE_MAGIC;
+
+ // If there is a foreign symbol table, put it into the file now. Most
+ // ar(1) implementations require the symbol table to be first but llvm-ar
+ // can deal with it being after a foreign symbol table. This ensures
+ // compatibility with other ar(1) implementations as well as allowing the
+ // archive to store both native .o and LLVM .bc files, both indexed.
+ if (foreignST) {
+ if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) {
+ FinalFile.close();
+ TmpArchive.eraseFromDisk();
+ return true;
+ }
+ }
+
+ // Put out the LLVM symbol table now.
+ writeSymbolTable(FinalFile);
+
+ // Copy the temporary file contents being sure to skip the file's magic
+ // number.
+ FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
+ arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
+
+ // Close up shop
+ FinalFile.close();
+ } // free arch.
+
+ // Move the final file over top of TmpArchive
+ if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+ return true;
+ }
+
+ // Before we replace the actual archive, we need to forget all the
+ // members, since they point to data in that old archive. We need to do
+ // this because we cannot replace an open file on Windows.
+ cleanUpMemory();
+
+ if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+ return true;
+
+ // Set correct read and write permissions after temporary file is moved
+ // to final destination path.
+ if (archPath.makeReadableOnDisk(ErrMsg))
+ return true;
+ if (archPath.makeWriteableOnDisk(ErrMsg))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
new file mode 100644
index 000000000000..f46383be7e46
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -0,0 +1,884 @@
+//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLLexer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
+ ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions.
+//===----------------------------------------------------------------------===//
+
+// atoull - Convert an ascii string of decimal digits into the unsigned long
+// long representation... this does not have to do input error checking,
+// because we know that the input will be matched by a suitable regex...
+//
+uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; Buffer++) {
+ uint64_t OldRes = Result;
+ Result *= 10;
+ Result += *Buffer-'0';
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; ++Buffer) {
+ uint64_t OldRes = Result;
+ Result *= 16;
+ Result += hexDigitValue(*Buffer);
+
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+void LLLexer::HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[0] *= 16;
+ Pair[0] += hexDigitValue(*Buffer);
+ }
+ Pair[1] = 0;
+ for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+ Pair[1] *= 16;
+ Pair[1] += hexDigitValue(*Buffer);
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
+/// { low64, high16 } as usual for an APInt.
+void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[1] = 0;
+ for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[1] *= 16;
+ Pair[1] += hexDigitValue(*Buffer);
+ }
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ Pair[0] *= 16;
+ Pair[0] += hexDigitValue(*Buffer);
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
+// appropriate character.
+static void UnEscapeLexed(std::string &Str) {
+ if (Str.empty()) return;
+
+ char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
+ char *BOut = Buffer;
+ for (char *BIn = Buffer; BIn != EndBuffer; ) {
+ if (BIn[0] == '\\') {
+ if (BIn < EndBuffer-1 && BIn[1] == '\\') {
+ *BOut++ = '\\'; // Two \ becomes one
+ BIn += 2;
+ } else if (BIn < EndBuffer-2 &&
+ isxdigit(static_cast<unsigned char>(BIn[1])) &&
+ isxdigit(static_cast<unsigned char>(BIn[2]))) {
+ *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
+ BIn += 3; // Skip over handled chars
+ ++BOut;
+ } else {
+ *BOut++ = *BIn++;
+ }
+ } else {
+ *BOut++ = *BIn++;
+ }
+ }
+ Str.resize(BOut-Buffer);
+}
+
+/// isLabelChar - Return true for [-a-zA-Z$._0-9].
+static bool isLabelChar(char C) {
+ return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
+ C == '.' || C == '_';
+}
+
+
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+ while (1) {
+ if (CurPtr[0] == ':') return CurPtr+1;
+ if (!isLabelChar(CurPtr[0])) return 0;
+ ++CurPtr;
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Lexer definition.
+//===----------------------------------------------------------------------===//
+
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+ LLVMContext &C)
+ : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
+ CurPtr = CurBuf->getBufferStart();
+}
+
+std::string LLLexer::getFilename() const {
+ return CurBuf->getBufferIdentifier();
+}
+
+int LLLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default: return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+}
+
+
+lltok::Kind LLLexer::LexToken() {
+ TokStart = CurPtr;
+
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
+ return LexIdentifier();
+
+ return lltok::Error;
+ case EOF: return lltok::Eof;
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '+': return LexPositive();
+ case '@': return LexAt();
+ case '%': return LexPercent();
+ case '"': return LexQuote();
+ case '.':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+ CurPtr += 2;
+ return lltok::dotdotdot;
+ }
+ return lltok::Error;
+ case '$':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ return lltok::Error;
+ case ';':
+ SkipLineComment();
+ return LexToken();
+ case '!': return LexExclaim();
+ case '#': return LexHash();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '-':
+ return LexDigitOrNegative();
+ case '=': return lltok::equal;
+ case '[': return lltok::lsquare;
+ case ']': return lltok::rsquare;
+ case '{': return lltok::lbrace;
+ case '}': return lltok::rbrace;
+ case '<': return lltok::less;
+ case '>': return lltok::greater;
+ case '(': return lltok::lparen;
+ case ')': return lltok::rparen;
+ case ',': return lltok::comma;
+ case '*': return lltok::star;
+ case '\\': return lltok::backslash;
+ }
+}
+
+void LLLexer::SkipLineComment() {
+ while (1) {
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+ return;
+ }
+}
+
+/// LexAt - Lex all tokens that start with an @ character:
+/// GlobalVar @\"[^\"]*\"
+/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// GlobalVarID @[0-9]+
+lltok::Kind LLLexer::LexAt() {
+ // Handle AtStringConstant: @\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in global variable name");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart+2, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return lltok::GlobalVar;
+ }
+ }
+ }
+
+ // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (ReadVarName())
+ return lltok::GlobalVar;
+
+ // Handle GlobalVarID: @[0-9]+
+ if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::GlobalID;
+ }
+
+ return lltok::Error;
+}
+
+/// ReadString - Read a string until the closing quote.
+lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
+ const char *Start = CurPtr;
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in string constant");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(Start, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return kind;
+ }
+ }
+}
+
+/// ReadVarName - Read the rest of a token containing a variable name.
+bool LLLexer::ReadVarName() {
+ const char *NameStart = CurPtr;
+ if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
+ CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_') {
+ ++CurPtr;
+ while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
+ CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_')
+ ++CurPtr;
+
+ StrVal.assign(NameStart, CurPtr);
+ return true;
+ }
+ return false;
+}
+
+/// LexPercent - Lex all tokens that start with a % character:
+/// LocalVar ::= %\"[^\"]*\"
+/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+ // Handle LocalVarName: %\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+ return ReadString(lltok::LocalVar);
+ }
+
+ // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (ReadVarName())
+ return lltok::LocalVar;
+
+ // Handle LocalVarID: %[0-9]+
+ if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::LocalVarID;
+ }
+
+ return lltok::Error;
+}
+
+/// LexQuote - Lex all tokens that start with a " character:
+/// QuoteLabel "[^"]+":
+/// StringConstant "[^"]*"
+lltok::Kind LLLexer::LexQuote() {
+ lltok::Kind kind = ReadString(lltok::StringConstant);
+ if (kind == lltok::Error || kind == lltok::Eof)
+ return kind;
+
+ if (CurPtr[0] == ':') {
+ ++CurPtr;
+ kind = lltok::LabelStr;
+ }
+
+ return kind;
+}
+
+/// LexExclaim:
+/// !foo
+/// !
+lltok::Kind LLLexer::LexExclaim() {
+ // Lex a metadata name as a MetadataVar.
+ if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
+ CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
+ ++CurPtr;
+ while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
+ CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
+ ++CurPtr;
+
+ StrVal.assign(TokStart+1, CurPtr); // Skip !
+ UnEscapeLexed(StrVal);
+ return lltok::MetadataVar;
+ }
+ return lltok::exclaim;
+}
+
+/// LexHash - Lex all tokens that start with a # character:
+/// AttrGrpID ::= #[0-9]+
+lltok::Kind LLLexer::LexHash() {
+ // Handle AttrGrpID: #[0-9]+
+ if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::AttrGrpID;
+ }
+
+ return lltok::Error;
+}
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// IntegerType i[0-9]+
+/// Keyword sdiv, float, ...
+/// HexIntConstant [us]0x[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexIdentifier() {
+ const char *StartChar = CurPtr;
+ const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
+ const char *KeywordEnd = 0;
+
+ for (; isLabelChar(*CurPtr); ++CurPtr) {
+ // If we decide this is an integer, remember the end of the sequence.
+ if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
+ IntEnd = CurPtr;
+ if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
+ *CurPtr != '_')
+ KeywordEnd = CurPtr;
+ }
+
+ // If we stopped due to a colon, this really is a label.
+ if (*CurPtr == ':') {
+ StrVal.assign(StartChar-1, CurPtr++);
+ return lltok::LabelStr;
+ }
+
+ // Otherwise, this wasn't a label. If this was valid as an integer type,
+ // return it.
+ if (IntEnd == 0) IntEnd = CurPtr;
+ if (IntEnd != StartChar) {
+ CurPtr = IntEnd;
+ uint64_t NumBits = atoull(StartChar, CurPtr);
+ if (NumBits < IntegerType::MIN_INT_BITS ||
+ NumBits > IntegerType::MAX_INT_BITS) {
+ Error("bitwidth for integer type out of range!");
+ return lltok::Error;
+ }
+ TyVal = IntegerType::get(Context, NumBits);
+ return lltok::Type;
+ }
+
+ // Otherwise, this was a letter sequence. See which keyword this is.
+ if (KeywordEnd == 0) KeywordEnd = CurPtr;
+ CurPtr = KeywordEnd;
+ --StartChar;
+ unsigned Len = CurPtr-StartChar;
+#define KEYWORD(STR) \
+ do { \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
+ return lltok::kw_##STR; \
+ } while (0)
+
+ KEYWORD(true); KEYWORD(false);
+ KEYWORD(declare); KEYWORD(define);
+ KEYWORD(global); KEYWORD(constant);
+
+ KEYWORD(private);
+ KEYWORD(linker_private);
+ KEYWORD(linker_private_weak);
+ KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility.
+ KEYWORD(internal);
+ KEYWORD(available_externally);
+ KEYWORD(linkonce);
+ KEYWORD(linkonce_odr);
+ KEYWORD(linkonce_odr_auto_hide);
+ KEYWORD(weak);
+ KEYWORD(weak_odr);
+ KEYWORD(appending);
+ KEYWORD(dllimport);
+ KEYWORD(dllexport);
+ KEYWORD(common);
+ KEYWORD(default);
+ KEYWORD(hidden);
+ KEYWORD(protected);
+ KEYWORD(unnamed_addr);
+ KEYWORD(externally_initialized);
+ KEYWORD(extern_weak);
+ KEYWORD(external);
+ KEYWORD(thread_local);
+ KEYWORD(localdynamic);
+ KEYWORD(initialexec);
+ KEYWORD(localexec);
+ KEYWORD(zeroinitializer);
+ KEYWORD(undef);
+ KEYWORD(null);
+ KEYWORD(to);
+ KEYWORD(tail);
+ KEYWORD(target);
+ KEYWORD(triple);
+ KEYWORD(unwind);
+ KEYWORD(deplibs); // FIXME: Remove in 4.0.
+ KEYWORD(datalayout);
+ KEYWORD(volatile);
+ KEYWORD(atomic);
+ KEYWORD(unordered);
+ KEYWORD(monotonic);
+ KEYWORD(acquire);
+ KEYWORD(release);
+ KEYWORD(acq_rel);
+ KEYWORD(seq_cst);
+ KEYWORD(singlethread);
+
+ KEYWORD(nnan);
+ KEYWORD(ninf);
+ KEYWORD(nsz);
+ KEYWORD(arcp);
+ KEYWORD(fast);
+ KEYWORD(nuw);
+ KEYWORD(nsw);
+ KEYWORD(exact);
+ KEYWORD(inbounds);
+ KEYWORD(align);
+ KEYWORD(addrspace);
+ KEYWORD(section);
+ KEYWORD(alias);
+ KEYWORD(module);
+ KEYWORD(asm);
+ KEYWORD(sideeffect);
+ KEYWORD(alignstack);
+ KEYWORD(inteldialect);
+ KEYWORD(gc);
+
+ KEYWORD(ccc);
+ KEYWORD(fastcc);
+ KEYWORD(coldcc);
+ KEYWORD(x86_stdcallcc);
+ KEYWORD(x86_fastcallcc);
+ KEYWORD(x86_thiscallcc);
+ KEYWORD(arm_apcscc);
+ KEYWORD(arm_aapcscc);
+ KEYWORD(arm_aapcs_vfpcc);
+ KEYWORD(msp430_intrcc);
+ KEYWORD(ptx_kernel);
+ KEYWORD(ptx_device);
+ KEYWORD(spir_kernel);
+ KEYWORD(spir_func);
+ KEYWORD(intel_ocl_bicc);
+
+ KEYWORD(cc);
+ KEYWORD(c);
+
+ KEYWORD(attributes);
+
+ KEYWORD(alwaysinline);
+ KEYWORD(byval);
+ KEYWORD(inlinehint);
+ KEYWORD(inreg);
+ KEYWORD(minsize);
+ KEYWORD(naked);
+ KEYWORD(nest);
+ KEYWORD(noalias);
+ KEYWORD(nobuiltin);
+ KEYWORD(nocapture);
+ KEYWORD(noduplicate);
+ KEYWORD(noimplicitfloat);
+ KEYWORD(noinline);
+ KEYWORD(nonlazybind);
+ KEYWORD(noredzone);
+ KEYWORD(noreturn);
+ KEYWORD(nounwind);
+ KEYWORD(optsize);
+ KEYWORD(readnone);
+ KEYWORD(readonly);
+ KEYWORD(returns_twice);
+ KEYWORD(signext);
+ KEYWORD(sret);
+ KEYWORD(ssp);
+ KEYWORD(sspreq);
+ KEYWORD(sspstrong);
+ KEYWORD(sanitize_address);
+ KEYWORD(sanitize_thread);
+ KEYWORD(sanitize_memory);
+ KEYWORD(uwtable);
+ KEYWORD(zeroext);
+
+ KEYWORD(type);
+ KEYWORD(opaque);
+
+ KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
+ KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
+ KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
+ KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+
+ KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
+ KEYWORD(umin);
+
+ KEYWORD(x);
+ KEYWORD(blockaddress);
+
+ KEYWORD(personality);
+ KEYWORD(cleanup);
+ KEYWORD(catch);
+ KEYWORD(filter);
+#undef KEYWORD
+
+ // Keywords for types.
+#define TYPEKEYWORD(STR, LLVMTY) \
+ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
+ TyVal = LLVMTY; return lltok::Type; }
+ TYPEKEYWORD("void", Type::getVoidTy(Context));
+ TYPEKEYWORD("half", Type::getHalfTy(Context));
+ TYPEKEYWORD("float", Type::getFloatTy(Context));
+ TYPEKEYWORD("double", Type::getDoubleTy(Context));
+ TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
+ TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
+ TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
+ TYPEKEYWORD("label", Type::getLabelTy(Context));
+ TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
+ TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
+#undef TYPEKEYWORD
+
+ // Keywords for instructions.
+#define INSTKEYWORD(STR, Enum) \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
+ UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+
+ INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
+ INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
+ INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
+ INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
+ INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
+ INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
+ INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
+ INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
+
+ INSTKEYWORD(phi, PHI);
+ INSTKEYWORD(call, Call);
+ INSTKEYWORD(trunc, Trunc);
+ INSTKEYWORD(zext, ZExt);
+ INSTKEYWORD(sext, SExt);
+ INSTKEYWORD(fptrunc, FPTrunc);
+ INSTKEYWORD(fpext, FPExt);
+ INSTKEYWORD(uitofp, UIToFP);
+ INSTKEYWORD(sitofp, SIToFP);
+ INSTKEYWORD(fptoui, FPToUI);
+ INSTKEYWORD(fptosi, FPToSI);
+ INSTKEYWORD(inttoptr, IntToPtr);
+ INSTKEYWORD(ptrtoint, PtrToInt);
+ INSTKEYWORD(bitcast, BitCast);
+ INSTKEYWORD(select, Select);
+ INSTKEYWORD(va_arg, VAArg);
+ INSTKEYWORD(ret, Ret);
+ INSTKEYWORD(br, Br);
+ INSTKEYWORD(switch, Switch);
+ INSTKEYWORD(indirectbr, IndirectBr);
+ INSTKEYWORD(invoke, Invoke);
+ INSTKEYWORD(resume, Resume);
+ INSTKEYWORD(unreachable, Unreachable);
+
+ INSTKEYWORD(alloca, Alloca);
+ INSTKEYWORD(load, Load);
+ INSTKEYWORD(store, Store);
+ INSTKEYWORD(cmpxchg, AtomicCmpXchg);
+ INSTKEYWORD(atomicrmw, AtomicRMW);
+ INSTKEYWORD(fence, Fence);
+ INSTKEYWORD(getelementptr, GetElementPtr);
+
+ INSTKEYWORD(extractelement, ExtractElement);
+ INSTKEYWORD(insertelement, InsertElement);
+ INSTKEYWORD(shufflevector, ShuffleVector);
+ INSTKEYWORD(extractvalue, ExtractValue);
+ INSTKEYWORD(insertvalue, InsertValue);
+ INSTKEYWORD(landingpad, LandingPad);
+#undef INSTKEYWORD
+
+ // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
+ // the CFE to avoid forcing it to deal with 64-bit numbers.
+ if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
+ TokStart[1] == '0' && TokStart[2] == 'x' &&
+ isxdigit(static_cast<unsigned char>(TokStart[3]))) {
+ int len = CurPtr-TokStart-3;
+ uint32_t bits = len * 4;
+ APInt Tmp(bits, StringRef(TokStart+3, len), 16);
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < bits)
+ Tmp = Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
+ return lltok::APSInt;
+ }
+
+ // If this is "cc1234", return this as just "cc".
+ if (TokStart[0] == 'c' && TokStart[1] == 'c') {
+ CurPtr = TokStart+2;
+ return lltok::kw_cc;
+ }
+
+ // Finally, if this isn't known, return an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+}
+
+
+/// Lex0x: Handle productions that start with 0x, knowing that it matches and
+/// that this is not a label:
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+/// HexHalfConstant 0xH[0-9A-Fa-f]+
+lltok::Kind LLLexer::Lex0x() {
+ CurPtr = TokStart + 2;
+
+ char Kind;
+ if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
+ Kind = *CurPtr++;
+ } else {
+ Kind = 'J';
+ }
+
+ if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
+ // Bad token, return it as an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
+ ++CurPtr;
+
+ if (Kind == 'J') {
+ // HexFPConstant - Floating point constant represented in IEEE format as a
+ // hexadecimal number for when exponential notation is not precise enough.
+ // Half, Float, and double only.
+ APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+ return lltok::APFloat;
+ }
+
+ uint64_t Pair[2];
+ switch (Kind) {
+ default: llvm_unreachable("Unknown kind!");
+ case 'K':
+ // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
+ FP80HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
+ return lltok::APFloat;
+ case 'L':
+ // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
+ return lltok::APFloat;
+ case 'M':
+ // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
+ return lltok::APFloat;
+ case 'H':
+ APFloatVal = APFloat(APFloat::IEEEhalf,
+ APInt(16,HexIntToVal(TokStart+3, CurPtr)));
+ return lltok::APFloat;
+ }
+}
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// NInteger -[0-9]+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+/// PInteger [0-9]+
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexDigitOrNegative() {
+ // If the letter after the negative is not a number, this is probably a label.
+ if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
+ !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+ // Okay, this is not a number after the -, it's probably a label.
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+
+ return lltok::Error;
+ }
+
+ // At this point, it is either a label, int or fp constant.
+
+ // Skip digits, we have at least one.
+ for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ /*empty*/;
+
+ // Check to see if this really is a label afterall, e.g. "-1:".
+ if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+ }
+
+ // If the next character is a '.', then it is a fp value, otherwise its
+ // integer.
+ if (CurPtr[0] != '.') {
+ if (TokStart[0] == '0' && TokStart[1] == 'x')
+ return Lex0x();
+ unsigned Len = CurPtr-TokStart;
+ uint32_t numBits = ((Len * 64) / 19) + 2;
+ APInt Tmp(numBits, StringRef(TokStart, Len), 10);
+ if (TokStart[0] == '-') {
+ uint32_t minBits = Tmp.getMinSignedBits();
+ if (minBits > 0 && minBits < numBits)
+ Tmp = Tmp.trunc(minBits);
+ APSIntVal = APSInt(Tmp, false);
+ } else {
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < numBits)
+ Tmp = Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, true);
+ }
+ return lltok::APSInt;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
+ isdigit(static_cast<unsigned char>(CurPtr[2])))) {
+ CurPtr += 2;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(std::atof(TokStart));
+ return lltok::APFloat;
+}
+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+lltok::Kind LLLexer::LexPositive() {
+ // If the letter after the negative is a number, this is probably not a
+ // label.
+ if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
+ return lltok::Error;
+
+ // Skip digits.
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ /*empty*/;
+
+ // At this point, we need a '.'.
+ if (CurPtr[0] != '.') {
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
+ isdigit(static_cast<unsigned char>(CurPtr[2])))) {
+ CurPtr += 2;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(std::atof(TokStart));
+ return lltok::APFloat;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h
new file mode 100644
index 000000000000..85703c766b09
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.h
@@ -0,0 +1,93 @@
+//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ASMPARSER_LLLEXER_H
+#define LIB_ASMPARSER_LLLEXER_H
+
+#include "LLToken.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+
+namespace llvm {
+ class MemoryBuffer;
+ class Type;
+ class SMDiagnostic;
+ class LLVMContext;
+
+ class LLLexer {
+ const char *CurPtr;
+ MemoryBuffer *CurBuf;
+ SMDiagnostic &ErrorInfo;
+ SourceMgr &SM;
+ LLVMContext &Context;
+
+ // Information about the current token.
+ const char *TokStart;
+ lltok::Kind CurKind;
+ std::string StrVal;
+ unsigned UIntVal;
+ Type *TyVal;
+ APFloat APFloatVal;
+ APSInt APSIntVal;
+
+ public:
+ explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+ LLVMContext &C);
+ ~LLLexer() {}
+
+ lltok::Kind Lex() {
+ return CurKind = LexToken();
+ }
+
+ typedef SMLoc LocTy;
+ LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
+ lltok::Kind getKind() const { return CurKind; }
+ const std::string &getStrVal() const { return StrVal; }
+ Type *getTyVal() const { return TyVal; }
+ unsigned getUIntVal() const { return UIntVal; }
+ const APSInt &getAPSIntVal() const { return APSIntVal; }
+ const APFloat &getAPFloatVal() const { return APFloatVal; }
+
+
+ bool Error(LocTy L, const Twine &Msg) const;
+ bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+ std::string getFilename() const;
+
+ private:
+ lltok::Kind LexToken();
+
+ int getNextChar();
+ void SkipLineComment();
+ lltok::Kind ReadString(lltok::Kind kind);
+ bool ReadVarName();
+
+ lltok::Kind LexIdentifier();
+ lltok::Kind LexDigitOrNegative();
+ lltok::Kind LexPositive();
+ lltok::Kind LexAt();
+ lltok::Kind LexExclaim();
+ lltok::Kind LexPercent();
+ lltok::Kind LexQuote();
+ lltok::Kind Lex0x();
+ lltok::Kind LexHash();
+
+ uint64_t atoull(const char *Buffer, const char *End);
+ uint64_t HexIntToVal(const char *Buffer, const char *End);
+ void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+ void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]);
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
new file mode 100644
index 000000000000..c8da1f8bc661
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -0,0 +1,4335 @@
+//===-- LLParser.cpp - Parser Class ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLParser.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static std::string getTypeString(Type *T) {
+ std::string Result;
+ raw_string_ostream Tmp(Result);
+ Tmp << *T;
+ return Tmp.str();
+}
+
+/// Run: module ::= toplevelentity*
+bool LLParser::Run() {
+ // Prime the lexer.
+ Lex.Lex();
+
+ return ParseTopLevelEntities() ||
+ ValidateEndOfModule();
+}
+
+/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// module.
+bool LLParser::ValidateEndOfModule() {
+ // Handle any instruction metadata forward references.
+ if (!ForwardRefInstMetadata.empty()) {
+ for (DenseMap<Instruction*, std::vector<MDRef> >::iterator
+ I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end();
+ I != E; ++I) {
+ Instruction *Inst = I->first;
+ const std::vector<MDRef> &MDList = I->second;
+
+ for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
+ unsigned SlotNo = MDList[i].MDSlot;
+
+ if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+ return Error(MDList[i].Loc, "use of undefined metadata '!" +
+ Twine(SlotNo) + "'");
+ Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
+ }
+ }
+ ForwardRefInstMetadata.clear();
+ }
+
+ // Handle any function attribute group forward references.
+ for (std::map<Value*, std::vector<unsigned> >::iterator
+ I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end();
+ I != E; ++I) {
+ Value *V = I->first;
+ std::vector<unsigned> &Vec = I->second;
+ AttrBuilder B;
+
+ for (std::vector<unsigned>::iterator VI = Vec.begin(), VE = Vec.end();
+ VI != VE; ++VI)
+ B.merge(NumberedAttrBuilders[*VI]);
+
+ if (Function *Fn = dyn_cast<Function>(V)) {
+ AttributeSet AS = Fn->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+ AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+ AS.getFnAttributes());
+
+ FnAttrs.merge(B);
+
+ // If the alignment was parsed as an attribute, move to the alignment
+ // field.
+ if (FnAttrs.hasAlignmentAttr()) {
+ Fn->setAlignment(FnAttrs.getAlignment());
+ FnAttrs.removeAttribute(Attribute::Alignment);
+ }
+
+ AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+ AttributeSet::get(Context,
+ AttributeSet::FunctionIndex,
+ FnAttrs));
+ Fn->setAttributes(AS);
+ } else if (CallInst *CI = dyn_cast<CallInst>(V)) {
+ AttributeSet AS = CI->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+ AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+ AS.getFnAttributes());
+ FnAttrs.merge(B);
+ AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+ AttributeSet::get(Context,
+ AttributeSet::FunctionIndex,
+ FnAttrs));
+ CI->setAttributes(AS);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
+ AttributeSet AS = II->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+ AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+ AS.getFnAttributes());
+ FnAttrs.merge(B);
+ AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+ AttributeSet::get(Context,
+ AttributeSet::FunctionIndex,
+ FnAttrs));
+ II->setAttributes(AS);
+ } else {
+ llvm_unreachable("invalid object with forward attribute group reference");
+ }
+ }
+
+ // If there are entries in ForwardRefBlockAddresses at this point, they are
+ // references after the function was defined. Resolve those now.
+ while (!ForwardRefBlockAddresses.empty()) {
+ // Okay, we are referencing an already-parsed function, resolve them now.
+ Function *TheFn = 0;
+ const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
+ if (Fn.Kind == ValID::t_GlobalName)
+ TheFn = M->getFunction(Fn.StrVal);
+ else if (Fn.UIntVal < NumberedVals.size())
+ TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
+
+ if (TheFn == 0)
+ return Error(Fn.Loc, "unknown function referenced by blockaddress");
+
+ // Resolve all these references.
+ if (ResolveForwardRefBlockAddresses(TheFn,
+ ForwardRefBlockAddresses.begin()->second,
+ 0))
+ return true;
+
+ ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
+ }
+
+ for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
+ if (NumberedTypes[i].second.isValid())
+ return Error(NumberedTypes[i].second,
+ "use of undefined type '%" + Twine(i) + "'");
+
+ for (StringMap<std::pair<Type*, LocTy> >::iterator I =
+ NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
+ if (I->second.second.isValid())
+ return Error(I->second.second,
+ "use of undefined type named '" + I->getKey() + "'");
+
+ if (!ForwardRefVals.empty())
+ return Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '@" + ForwardRefVals.begin()->first +
+ "'");
+
+ if (!ForwardRefValIDs.empty())
+ return Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '@" +
+ Twine(ForwardRefValIDs.begin()->first) + "'");
+
+ if (!ForwardRefMDNodes.empty())
+ return Error(ForwardRefMDNodes.begin()->second.second,
+ "use of undefined metadata '!" +
+ Twine(ForwardRefMDNodes.begin()->first) + "'");
+
+
+ // Look for intrinsic functions and CallInst that need to be upgraded
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
+ UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+
+ return false;
+}
+
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
+ std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+ PerFunctionState *PFS) {
+ // Loop over all the references, resolving them.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+ BasicBlock *Res;
+ if (PFS) {
+ if (Refs[i].first.Kind == ValID::t_LocalName)
+ Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
+ else
+ Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
+ } else if (Refs[i].first.Kind == ValID::t_LocalID) {
+ return Error(Refs[i].first.Loc,
+ "cannot take address of numeric label after the function is defined");
+ } else {
+ Res = dyn_cast_or_null<BasicBlock>(
+ TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
+ }
+
+ if (Res == 0)
+ return Error(Refs[i].first.Loc,
+ "referenced value is not a basic block");
+
+ // Get the BlockAddress for this and update references to use it.
+ BlockAddress *BA = BlockAddress::get(TheFn, Res);
+ Refs[i].second->replaceAllUsesWith(BA);
+ Refs[i].second->eraseFromParent();
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Top-Level Entities
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ParseTopLevelEntities() {
+ while (1) {
+ switch (Lex.getKind()) {
+ default: return TokError("expected top-level entity");
+ case lltok::Eof: return false;
+ case lltok::kw_declare: if (ParseDeclare()) return true; break;
+ case lltok::kw_define: if (ParseDefine()) return true; break;
+ case lltok::kw_module: if (ParseModuleAsm()) return true; break;
+ case lltok::kw_target: if (ParseTargetDefinition()) return true; break;
+ case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
+ case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
+ case lltok::LocalVar: if (ParseNamedType()) return true; break;
+ case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break;
+ case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break;
+ case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break;
+ case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break;
+
+ // The Global variable production with no name can have many different
+ // optional leading prefixes, the production is:
+ // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+ // OptionalAddrSpace OptionalUnNammedAddr
+ // ('constant'|'global') ...
+ case lltok::kw_private: // OptionalLinkage
+ case lltok::kw_linker_private: // OptionalLinkage
+ case lltok::kw_linker_private_weak: // OptionalLinkage
+ case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat.
+ case lltok::kw_internal: // OptionalLinkage
+ case lltok::kw_weak: // OptionalLinkage
+ case lltok::kw_weak_odr: // OptionalLinkage
+ case lltok::kw_linkonce: // OptionalLinkage
+ case lltok::kw_linkonce_odr: // OptionalLinkage
+ case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage
+ case lltok::kw_appending: // OptionalLinkage
+ case lltok::kw_dllexport: // OptionalLinkage
+ case lltok::kw_common: // OptionalLinkage
+ case lltok::kw_dllimport: // OptionalLinkage
+ case lltok::kw_extern_weak: // OptionalLinkage
+ case lltok::kw_external: { // OptionalLinkage
+ unsigned Linkage, Visibility;
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", SMLoc(), Linkage, true, Visibility))
+ return true;
+ break;
+ }
+ case lltok::kw_default: // OptionalVisibility
+ case lltok::kw_hidden: // OptionalVisibility
+ case lltok::kw_protected: { // OptionalVisibility
+ unsigned Visibility;
+ if (ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", SMLoc(), 0, false, Visibility))
+ return true;
+ break;
+ }
+
+ case lltok::kw_thread_local: // OptionalThreadLocal
+ case lltok::kw_addrspace: // OptionalAddrSpace
+ case lltok::kw_constant: // GlobalType
+ case lltok::kw_global: // GlobalType
+ if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
+ break;
+
+ case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
+ }
+ }
+}
+
+
+/// toplevelentity
+/// ::= 'module' 'asm' STRINGCONSTANT
+bool LLParser::ParseModuleAsm() {
+ assert(Lex.getKind() == lltok::kw_module);
+ Lex.Lex();
+
+ std::string AsmStr;
+ if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
+ ParseStringConstant(AsmStr)) return true;
+
+ M->appendModuleInlineAsm(AsmStr);
+ return false;
+}
+
+/// toplevelentity
+/// ::= 'target' 'triple' '=' STRINGCONSTANT
+/// ::= 'target' 'datalayout' '=' STRINGCONSTANT
+bool LLParser::ParseTargetDefinition() {
+ assert(Lex.getKind() == lltok::kw_target);
+ std::string Str;
+ switch (Lex.Lex()) {
+ default: return TokError("unknown target property");
+ case lltok::kw_triple:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target triple") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setTargetTriple(Str);
+ return false;
+ case lltok::kw_datalayout:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setDataLayout(Str);
+ return false;
+ }
+}
+
+/// toplevelentity
+/// ::= 'deplibs' '=' '[' ']'
+/// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+/// FIXME: Remove in 4.0. Currently parse, but ignore.
+bool LLParser::ParseDepLibs() {
+ assert(Lex.getKind() == lltok::kw_deplibs);
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
+ ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+ return true;
+
+ if (EatIfPresent(lltok::rsquare))
+ return false;
+
+ do {
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+ } while (EatIfPresent(lltok::comma));
+
+ return ParseToken(lltok::rsquare, "expected ']' at end of list");
+}
+
+/// ParseUnnamedType:
+/// ::= LocalVarID '=' 'type' type
+bool LLParser::ParseUnnamedType() {
+ LocTy TypeLoc = Lex.getLoc();
+ unsigned TypeID = Lex.getUIntVal();
+ Lex.Lex(); // eat LocalVarID;
+
+ if (ParseToken(lltok::equal, "expected '=' after name") ||
+ ParseToken(lltok::kw_type, "expected 'type' after '='"))
+ return true;
+
+ if (TypeID >= NumberedTypes.size())
+ NumberedTypes.resize(TypeID+1);
+
+ Type *Result = 0;
+ if (ParseStructDefinition(TypeLoc, "",
+ NumberedTypes[TypeID], Result)) return true;
+
+ if (!isa<StructType>(Result)) {
+ std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
+ if (Entry.first)
+ return Error(TypeLoc, "non-struct types may not be recursive");
+ Entry.first = Result;
+ Entry.second = SMLoc();
+ }
+
+ return false;
+}
+
+
+/// toplevelentity
+/// ::= LocalVar '=' 'type' type
+bool LLParser::ParseNamedType() {
+ std::string Name = Lex.getStrVal();
+ LocTy NameLoc = Lex.getLoc();
+ Lex.Lex(); // eat LocalVar.
+
+ if (ParseToken(lltok::equal, "expected '=' after name") ||
+ ParseToken(lltok::kw_type, "expected 'type' after name"))
+ return true;
+
+ Type *Result = 0;
+ if (ParseStructDefinition(NameLoc, Name,
+ NamedTypes[Name], Result)) return true;
+
+ if (!isa<StructType>(Result)) {
+ std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
+ if (Entry.first)
+ return Error(NameLoc, "non-struct types may not be recursive");
+ Entry.first = Result;
+ Entry.second = SMLoc();
+ }
+
+ return false;
+}
+
+
+/// toplevelentity
+/// ::= 'declare' FunctionHeader
+bool LLParser::ParseDeclare() {
+ assert(Lex.getKind() == lltok::kw_declare);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, false);
+}
+
+/// toplevelentity
+/// ::= 'define' FunctionHeader '{' ...
+bool LLParser::ParseDefine() {
+ assert(Lex.getKind() == lltok::kw_define);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, true) ||
+ ParseFunctionBody(*F);
+}
+
+/// ParseGlobalType
+/// ::= 'constant'
+/// ::= 'global'
+bool LLParser::ParseGlobalType(bool &IsConstant) {
+ if (Lex.getKind() == lltok::kw_constant)
+ IsConstant = true;
+ else if (Lex.getKind() == lltok::kw_global)
+ IsConstant = false;
+ else {
+ IsConstant = false;
+ return TokError("expected 'global' or 'constant'");
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseUnnamedGlobal:
+/// OptionalVisibility ALIAS ...
+/// OptionalLinkage OptionalVisibility ... -> global variable
+/// GlobalID '=' OptionalVisibility ALIAS ...
+/// GlobalID '=' OptionalLinkage OptionalVisibility ... -> global variable
+bool LLParser::ParseUnnamedGlobal() {
+ unsigned VarID = NumberedVals.size();
+ std::string Name;
+ LocTy NameLoc = Lex.getLoc();
+
+ // Handle the GlobalID form.
+ if (Lex.getKind() == lltok::GlobalID) {
+ if (Lex.getUIntVal() != VarID)
+ return Error(Lex.getLoc(), "variable expected to be numbered '%" +
+ Twine(VarID) + "'");
+ Lex.Lex(); // eat GlobalID;
+
+ if (ParseToken(lltok::equal, "expected '=' after name"))
+ return true;
+ }
+
+ bool HasLinkage;
+ unsigned Linkage, Visibility;
+ if (ParseOptionalLinkage(Linkage, HasLinkage) ||
+ ParseOptionalVisibility(Visibility))
+ return true;
+
+ if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+ return ParseAlias(Name, NameLoc, Visibility);
+}
+
+/// ParseNamedGlobal:
+/// GlobalVar '=' OptionalVisibility ALIAS ...
+/// GlobalVar '=' OptionalLinkage OptionalVisibility ... -> global variable
+bool LLParser::ParseNamedGlobal() {
+ assert(Lex.getKind() == lltok::GlobalVar);
+ LocTy NameLoc = Lex.getLoc();
+ std::string Name = Lex.getStrVal();
+ Lex.Lex();
+
+ bool HasLinkage;
+ unsigned Linkage, Visibility;
+ if (ParseToken(lltok::equal, "expected '=' in global variable") ||
+ ParseOptionalLinkage(Linkage, HasLinkage) ||
+ ParseOptionalVisibility(Visibility))
+ return true;
+
+ if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+ return ParseAlias(Name, NameLoc, Visibility);
+}
+
+// MDString:
+// ::= '!' STRINGCONSTANT
+bool LLParser::ParseMDString(MDString *&Result) {
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+ Result = MDString::get(Context, Str);
+ return false;
+}
+
+// MDNode:
+// ::= '!' MDNodeNumber
+//
+/// This version of ParseMDNodeID returns the slot number and null in the case
+/// of a forward reference.
+bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
+ // !{ ..., !42, ... }
+ if (ParseUInt32(SlotNo)) return true;
+
+ // Check existing MDNode.
+ if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+ Result = NumberedMetadata[SlotNo];
+ else
+ Result = 0;
+ return false;
+}
+
+bool LLParser::ParseMDNodeID(MDNode *&Result) {
+ // !{ ..., !42, ... }
+ unsigned MID = 0;
+ if (ParseMDNodeID(Result, MID)) return true;
+
+ // If not a forward reference, just return it now.
+ if (Result) return false;
+
+ // Otherwise, create MDNode forward reference.
+ MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+
+ if (NumberedMetadata.size() <= MID)
+ NumberedMetadata.resize(MID+1);
+ NumberedMetadata[MID] = FwdNode;
+ Result = FwdNode;
+ return false;
+}
+
+/// ParseNamedMetadata:
+/// !foo = !{ !1, !2 }
+bool LLParser::ParseNamedMetadata() {
+ assert(Lex.getKind() == lltok::MetadataVar);
+ std::string Name = Lex.getStrVal();
+ Lex.Lex();
+
+ if (ParseToken(lltok::equal, "expected '=' here") ||
+ ParseToken(lltok::exclaim, "Expected '!' here") ||
+ ParseToken(lltok::lbrace, "Expected '{' here"))
+ return true;
+
+ NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
+ if (Lex.getKind() != lltok::rbrace)
+ do {
+ if (ParseToken(lltok::exclaim, "Expected '!' here"))
+ return true;
+
+ MDNode *N = 0;
+ if (ParseMDNodeID(N)) return true;
+ NMD->addOperand(N);
+ } while (EatIfPresent(lltok::comma));
+
+ if (ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ return false;
+}
+
+/// ParseStandaloneMetadata:
+/// !42 = !{...}
+bool LLParser::ParseStandaloneMetadata() {
+ assert(Lex.getKind() == lltok::exclaim);
+ Lex.Lex();
+ unsigned MetadataID = 0;
+
+ LocTy TyLoc;
+ Type *Ty = 0;
+ SmallVector<Value *, 16> Elts;
+ if (ParseUInt32(MetadataID) ||
+ ParseToken(lltok::equal, "expected '=' here") ||
+ ParseType(Ty, TyLoc) ||
+ ParseToken(lltok::exclaim, "Expected '!' here") ||
+ ParseToken(lltok::lbrace, "Expected '{' here") ||
+ ParseMDNodeVector(Elts, NULL) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ MDNode *Init = MDNode::get(Context, Elts);
+
+ // See if this was forward referenced, if so, handle it.
+ std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
+ FI = ForwardRefMDNodes.find(MetadataID);
+ if (FI != ForwardRefMDNodes.end()) {
+ MDNode *Temp = FI->second.first;
+ Temp->replaceAllUsesWith(Init);
+ MDNode::deleteTemporary(Temp);
+ ForwardRefMDNodes.erase(FI);
+
+ assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
+ } else {
+ if (MetadataID >= NumberedMetadata.size())
+ NumberedMetadata.resize(MetadataID+1);
+
+ if (NumberedMetadata[MetadataID] != 0)
+ return TokError("Metadata id is already used");
+ NumberedMetadata[MetadataID] = Init;
+ }
+
+ return false;
+}
+
+/// ParseAlias:
+/// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
+/// Aliasee
+/// ::= TypeAndValue
+/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
+/// ::= 'getelementptr' 'inbounds'? '(' ... ')'
+///
+/// Everything through visibility has already been parsed.
+///
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+ unsigned Visibility) {
+ assert(Lex.getKind() == lltok::kw_alias);
+ Lex.Lex();
+ unsigned Linkage;
+ LocTy LinkageLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage))
+ return true;
+
+ if (Linkage != GlobalValue::ExternalLinkage &&
+ Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::WeakODRLinkage &&
+ Linkage != GlobalValue::InternalLinkage &&
+ Linkage != GlobalValue::PrivateLinkage &&
+ Linkage != GlobalValue::LinkerPrivateLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage)
+ return Error(LinkageLoc, "invalid linkage type for alias");
+
+ Constant *Aliasee;
+ LocTy AliaseeLoc = Lex.getLoc();
+ if (Lex.getKind() != lltok::kw_bitcast &&
+ Lex.getKind() != lltok::kw_getelementptr) {
+ if (ParseGlobalTypeAndValue(Aliasee)) return true;
+ } else {
+ // The bitcast dest type is not present, it is implied by the dest type.
+ ValID ID;
+ if (ParseValID(ID)) return true;
+ if (ID.Kind != ValID::t_Constant)
+ return Error(AliaseeLoc, "invalid aliasee");
+ Aliasee = ID.ConstantVal;
+ }
+
+ if (!Aliasee->getType()->isPointerTy())
+ return Error(AliaseeLoc, "alias must have pointer type");
+
+ // Okay, create the alias but do not insert it into the module yet.
+ GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
+ (GlobalValue::LinkageTypes)Linkage, Name,
+ Aliasee);
+ GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+
+ // See if this value already exists in the symbol table. If so, it is either
+ // a redefinition or a definition of a forward reference.
+ if (GlobalValue *Val = M->getNamedValue(Name)) {
+ // See if this was a redefinition. If so, there is no entry in
+ // ForwardRefVals.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I == ForwardRefVals.end())
+ return Error(NameLoc, "redefinition of global named '@" + Name + "'");
+
+ // Otherwise, this was a definition of forward ref. Verify that types
+ // agree.
+ if (Val->getType() != GA->getType())
+ return Error(NameLoc,
+ "forward reference and definition of alias have different types");
+
+ // If they agree, just RAUW the old value with the alias and remove the
+ // forward ref info.
+ Val->replaceAllUsesWith(GA);
+ Val->eraseFromParent();
+ ForwardRefVals.erase(I);
+ }
+
+ // Insert into the module, we know its name won't collide now.
+ M->getAliasList().push_back(GA);
+ assert(GA->getName() == Name && "Should not be a name conflict!");
+
+ return false;
+}
+
+/// ParseGlobal
+/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace OptionalUnNammedAddr
+/// OptionalExternallyInitialized GlobalType Type Const
+/// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace OptionalUnNammedAddr
+/// OptionalExternallyInitialized GlobalType Type Const
+///
+/// Everything through visibility has been parsed already.
+///
+bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+ unsigned Linkage, bool HasLinkage,
+ unsigned Visibility) {
+ unsigned AddrSpace;
+ bool IsConstant, UnnamedAddr, IsExternallyInitialized;
+ GlobalVariable::ThreadLocalMode TLM;
+ LocTy UnnamedAddrLoc;
+ LocTy IsExternallyInitializedLoc;
+ LocTy TyLoc;
+
+ Type *Ty = 0;
+ if (ParseOptionalThreadLocal(TLM) ||
+ ParseOptionalAddrSpace(AddrSpace) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
+ ParseOptionalToken(lltok::kw_externally_initialized,
+ IsExternallyInitialized,
+ &IsExternallyInitializedLoc) ||
+ ParseGlobalType(IsConstant) ||
+ ParseType(Ty, TyLoc))
+ return true;
+
+ // If the linkage is specified and is external, then no initializer is
+ // present.
+ Constant *Init = 0;
+ if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage &&
+ Linkage != GlobalValue::ExternalWeakLinkage &&
+ Linkage != GlobalValue::ExternalLinkage)) {
+ if (ParseGlobalValue(Ty, Init))
+ return true;
+ }
+
+ if (Ty->isFunctionTy() || Ty->isLabelTy())
+ return Error(TyLoc, "invalid type for global variable");
+
+ GlobalVariable *GV = 0;
+
+ // See if the global was forward referenced, if so, use the global.
+ if (!Name.empty()) {
+ if (GlobalValue *GVal = M->getNamedValue(Name)) {
+ if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+ return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ GV = cast<GlobalVariable>(GVal);
+ }
+ } else {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ GV = cast<GlobalVariable>(I->second.first);
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (GV == 0) {
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
+ Name, 0, GlobalVariable::NotThreadLocal,
+ AddrSpace);
+ } else {
+ if (GV->getType()->getElementType() != Ty)
+ return Error(TyLoc,
+ "forward reference and definition of global have different types");
+
+ // Move the forward-reference to the correct spot in the module.
+ M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
+ }
+
+ if (Name.empty())
+ NumberedVals.push_back(GV);
+
+ // Set the parsed properties on the global.
+ if (Init)
+ GV->setInitializer(Init);
+ GV->setConstant(IsConstant);
+ GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ GV->setExternallyInitialized(IsExternallyInitialized);
+ GV->setThreadLocalMode(TLM);
+ GV->setUnnamedAddr(UnnamedAddr);
+
+ // Parse attributes on the global.
+ while (Lex.getKind() == lltok::comma) {
+ Lex.Lex();
+
+ if (Lex.getKind() == lltok::kw_section) {
+ Lex.Lex();
+ GV->setSection(Lex.getStrVal());
+ if (ParseToken(lltok::StringConstant, "expected global section string"))
+ return true;
+ } else if (Lex.getKind() == lltok::kw_align) {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment)) return true;
+ GV->setAlignment(Alignment);
+ } else {
+ TokError("unknown global variable property!");
+ }
+ }
+
+ return false;
+}
+
+/// ParseUnnamedAttrGrp
+/// ::= 'attributes' AttrGrpID '=' '{' AttrValPair+ '}'
+bool LLParser::ParseUnnamedAttrGrp() {
+ assert(Lex.getKind() == lltok::kw_attributes);
+ LocTy AttrGrpLoc = Lex.getLoc();
+ Lex.Lex();
+
+ assert(Lex.getKind() == lltok::AttrGrpID);
+ unsigned VarID = Lex.getUIntVal();
+ std::vector<unsigned> unused;
+ LocTy NoBuiltinLoc;
+ Lex.Lex();
+
+ if (ParseToken(lltok::equal, "expected '=' here") ||
+ ParseToken(lltok::lbrace, "expected '{' here") ||
+ ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true,
+ NoBuiltinLoc) ||
+ ParseToken(lltok::rbrace, "expected end of attribute group"))
+ return true;
+
+ if (!NumberedAttrBuilders[VarID].hasAttributes())
+ return Error(AttrGrpLoc, "attribute group has no attributes");
+
+ return false;
+}
+
+/// ParseFnAttributeValuePairs
+/// ::= <attr> | <attr> '=' <value>
+bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
+ std::vector<unsigned> &FwdRefAttrGrps,
+ bool inAttrGrp, LocTy &NoBuiltinLoc) {
+ bool HaveError = false;
+
+ B.clear();
+
+ while (true) {
+ lltok::Kind Token = Lex.getKind();
+ if (Token == lltok::kw_nobuiltin)
+ NoBuiltinLoc = Lex.getLoc();
+ switch (Token) {
+ default:
+ if (!inAttrGrp) return HaveError;
+ return Error(Lex.getLoc(), "unterminated attribute group");
+ case lltok::rbrace:
+ // Finished.
+ return false;
+
+ case lltok::AttrGrpID: {
+ // Allow a function to reference an attribute group:
+ //
+ // define void @foo() #1 { ... }
+ if (inAttrGrp)
+ HaveError |=
+ Error(Lex.getLoc(),
+ "cannot have an attribute group reference in an attribute group");
+
+ unsigned AttrGrpNum = Lex.getUIntVal();
+ if (inAttrGrp) break;
+
+ // Save the reference to the attribute group. We'll fill it in later.
+ FwdRefAttrGrps.push_back(AttrGrpNum);
+ break;
+ }
+ // Target-dependent attributes:
+ case lltok::StringConstant: {
+ std::string Attr = Lex.getStrVal();
+ Lex.Lex();
+ std::string Val;
+ if (EatIfPresent(lltok::equal) &&
+ ParseStringConstant(Val))
+ return true;
+
+ B.addAttribute(Attr, Val);
+ continue;
+ }
+
+ // Target-independent attributes:
+ case lltok::kw_align: {
+ // As a hack, we allow "align 2" on functions as a synonym for "alignstack
+ // 2".
+ unsigned Alignment;
+ if (inAttrGrp) {
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' here") ||
+ ParseUInt32(Alignment))
+ return true;
+ } else {
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ }
+ B.addAlignmentAttr(Alignment);
+ continue;
+ }
+ case lltok::kw_alignstack: {
+ unsigned Alignment;
+ if (inAttrGrp) {
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' here") ||
+ ParseUInt32(Alignment))
+ return true;
+ } else {
+ if (ParseOptionalStackAlignment(Alignment))
+ return true;
+ }
+ B.addStackAlignmentAttr(Alignment);
+ continue;
+ }
+ case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
+ case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
+ case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
+ case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
+ case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
+ case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
+ case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break;
+ case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
+ case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
+ case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
+ case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+ case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
+ case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
+ case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
+ case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
+ case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break;
+ case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
+ case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
+ case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break;
+ case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break;
+ case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break;
+ case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break;
+ case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
+
+ // Error handling.
+ case lltok::kw_inreg:
+ case lltok::kw_signext:
+ case lltok::kw_zeroext:
+ HaveError |=
+ Error(Lex.getLoc(),
+ "invalid use of attribute on a function");
+ break;
+ case lltok::kw_byval:
+ case lltok::kw_nest:
+ case lltok::kw_noalias:
+ case lltok::kw_nocapture:
+ case lltok::kw_sret:
+ HaveError |=
+ Error(Lex.getLoc(),
+ "invalid use of parameter-only attribute on a function");
+ break;
+ }
+
+ Lex.Lex();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
+ LocTy Loc) {
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ // Look this name up in the normal function symbol table.
+ GlobalValue *Val =
+ cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + Name + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+ else
+ FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, Name,
+ 0, GlobalVariable::NotThreadLocal,
+ PTy->getAddressSpace());
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
+ else
+ FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, "");
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Routines.
+//===----------------------------------------------------------------------===//
+
+/// ParseToken - If the current token has the specified kind, eat it and return
+/// success. Otherwise, emit the specified error and return failure.
+bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+ if (Lex.getKind() != T)
+ return TokError(ErrMsg);
+ Lex.Lex();
+ return false;
+}
+
+/// ParseStringConstant
+/// ::= StringConstant
+bool LLParser::ParseStringConstant(std::string &Result) {
+ if (Lex.getKind() != lltok::StringConstant)
+ return TokError("expected string constant");
+ Result = Lex.getStrVal();
+ Lex.Lex();
+ return false;
+}
+
+/// ParseUInt32
+/// ::= uint32
+bool LLParser::ParseUInt32(unsigned &Val) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+ return TokError("expected integer");
+ uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
+ if (Val64 != unsigned(Val64))
+ return TokError("expected 32-bit integer (too large)");
+ Val = Val64;
+ Lex.Lex();
+ return false;
+}
+
+/// ParseTLSModel
+/// := 'localdynamic'
+/// := 'initialexec'
+/// := 'localexec'
+bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) {
+ switch (Lex.getKind()) {
+ default:
+ return TokError("expected localdynamic, initialexec or localexec");
+ case lltok::kw_localdynamic:
+ TLM = GlobalVariable::LocalDynamicTLSModel;
+ break;
+ case lltok::kw_initialexec:
+ TLM = GlobalVariable::InitialExecTLSModel;
+ break;
+ case lltok::kw_localexec:
+ TLM = GlobalVariable::LocalExecTLSModel;
+ break;
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalThreadLocal
+/// := /*empty*/
+/// := 'thread_local'
+/// := 'thread_local' '(' tlsmodel ')'
+bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
+ TLM = GlobalVariable::NotThreadLocal;
+ if (!EatIfPresent(lltok::kw_thread_local))
+ return false;
+
+ TLM = GlobalVariable::GeneralDynamicTLSModel;
+ if (Lex.getKind() == lltok::lparen) {
+ Lex.Lex();
+ return ParseTLSModel(TLM) ||
+ ParseToken(lltok::rparen, "expected ')' after thread local model");
+ }
+ return false;
+}
+
+/// ParseOptionalAddrSpace
+/// := /*empty*/
+/// := 'addrspace' '(' uint32 ')'
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
+ AddrSpace = 0;
+ if (!EatIfPresent(lltok::kw_addrspace))
+ return false;
+ return ParseToken(lltok::lparen, "expected '(' in address space") ||
+ ParseUInt32(AddrSpace) ||
+ ParseToken(lltok::rparen, "expected ')' in address space");
+}
+
+/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
+bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
+ bool HaveError = false;
+
+ B.clear();
+
+ while (1) {
+ lltok::Kind Token = Lex.getKind();
+ switch (Token) {
+ default: // End of attributes.
+ return HaveError;
+ case lltok::kw_align: {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ B.addAlignmentAttr(Alignment);
+ continue;
+ }
+ case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break;
+ case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
+ case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
+ case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
+ case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break;
+ case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
+ case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break;
+ case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
+
+ case lltok::kw_alignstack: case lltok::kw_nounwind:
+ case lltok::kw_alwaysinline: case lltok::kw_optsize:
+ case lltok::kw_inlinehint: case lltok::kw_readnone:
+ case lltok::kw_minsize: case lltok::kw_readonly:
+ case lltok::kw_naked: case lltok::kw_returns_twice:
+ case lltok::kw_nobuiltin: case lltok::kw_sanitize_address:
+ case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
+ case lltok::kw_noinline: case lltok::kw_sanitize_thread:
+ case lltok::kw_nonlazybind: case lltok::kw_ssp:
+ case lltok::kw_noredzone: case lltok::kw_sspreq:
+ case lltok::kw_noreturn: case lltok::kw_uwtable:
+ HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+ break;
+ }
+
+ Lex.Lex();
+ }
+}
+
+/// ParseOptionalReturnAttrs - Parse a potentially empty list of return attributes.
+bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
+ bool HaveError = false;
+
+ B.clear();
+
+ while (1) {
+ lltok::Kind Token = Lex.getKind();
+ switch (Token) {
+ default: // End of attributes.
+ return HaveError;
+ case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
+ case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
+ case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
+ case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
+
+ // Error handling.
+ case lltok::kw_sret: case lltok::kw_nocapture:
+ case lltok::kw_byval: case lltok::kw_nest:
+ HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
+ break;
+
+ case lltok::kw_align: case lltok::kw_noreturn:
+ case lltok::kw_alignstack: case lltok::kw_nounwind:
+ case lltok::kw_alwaysinline: case lltok::kw_optsize:
+ case lltok::kw_inlinehint: case lltok::kw_readnone:
+ case lltok::kw_minsize: case lltok::kw_readonly:
+ case lltok::kw_naked: case lltok::kw_returns_twice:
+ case lltok::kw_nobuiltin: case lltok::kw_sanitize_address:
+ case lltok::kw_noduplicate: case lltok::kw_sanitize_memory:
+ case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_thread:
+ case lltok::kw_noinline: case lltok::kw_ssp:
+ case lltok::kw_nonlazybind: case lltok::kw_sspreq:
+ case lltok::kw_noredzone: case lltok::kw_sspstrong:
+ case lltok::kw_uwtable:
+ HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+ break;
+ }
+
+ Lex.Lex();
+ }
+}
+
+/// ParseOptionalLinkage
+/// ::= /*empty*/
+/// ::= 'private'
+/// ::= 'linker_private'
+/// ::= 'linker_private_weak'
+/// ::= 'internal'
+/// ::= 'weak'
+/// ::= 'weak_odr'
+/// ::= 'linkonce'
+/// ::= 'linkonce_odr'
+/// ::= 'linkonce_odr_auto_hide'
+/// ::= 'available_externally'
+/// ::= 'appending'
+/// ::= 'dllexport'
+/// ::= 'common'
+/// ::= 'dllimport'
+/// ::= 'extern_weak'
+/// ::= 'external'
+bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
+ HasLinkage = false;
+ switch (Lex.getKind()) {
+ default: Res=GlobalValue::ExternalLinkage; return false;
+ case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break;
+ case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+ case lltok::kw_linker_private_weak:
+ Res = GlobalValue::LinkerPrivateWeakLinkage;
+ break;
+ case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
+ case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
+ case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
+ case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break;
+ case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
+ case lltok::kw_linkonce_odr_auto_hide:
+ case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat.
+ Res = GlobalValue::LinkOnceODRAutoHideLinkage;
+ break;
+ case lltok::kw_available_externally:
+ Res = GlobalValue::AvailableExternallyLinkage;
+ break;
+ case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break;
+ case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break;
+ case lltok::kw_common: Res = GlobalValue::CommonLinkage; break;
+ case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break;
+ case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break;
+ case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break;
+ }
+ Lex.Lex();
+ HasLinkage = true;
+ return false;
+}
+
+/// ParseOptionalVisibility
+/// ::= /*empty*/
+/// ::= 'default'
+/// ::= 'hidden'
+/// ::= 'protected'
+///
+bool LLParser::ParseOptionalVisibility(unsigned &Res) {
+ switch (Lex.getKind()) {
+ default: Res = GlobalValue::DefaultVisibility; return false;
+ case lltok::kw_default: Res = GlobalValue::DefaultVisibility; break;
+ case lltok::kw_hidden: Res = GlobalValue::HiddenVisibility; break;
+ case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break;
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalCallingConv
+/// ::= /*empty*/
+/// ::= 'ccc'
+/// ::= 'fastcc'
+/// ::= 'kw_intel_ocl_bicc'
+/// ::= 'coldcc'
+/// ::= 'x86_stdcallcc'
+/// ::= 'x86_fastcallcc'
+/// ::= 'x86_thiscallcc'
+/// ::= 'arm_apcscc'
+/// ::= 'arm_aapcscc'
+/// ::= 'arm_aapcs_vfpcc'
+/// ::= 'msp430_intrcc'
+/// ::= 'ptx_kernel'
+/// ::= 'ptx_device'
+/// ::= 'spir_func'
+/// ::= 'spir_kernel'
+/// ::= 'cc' UINT
+///
+bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
+ switch (Lex.getKind()) {
+ default: CC = CallingConv::C; return false;
+ case lltok::kw_ccc: CC = CallingConv::C; break;
+ case lltok::kw_fastcc: CC = CallingConv::Fast; break;
+ case lltok::kw_coldcc: CC = CallingConv::Cold; break;
+ case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
+ case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+ case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
+ case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
+ case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+ case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
+ case lltok::kw_ptx_kernel: CC = CallingConv::PTX_Kernel; break;
+ case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break;
+ case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break;
+ case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
+ case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
+ case lltok::kw_cc: {
+ unsigned ArbitraryCC;
+ Lex.Lex();
+ if (ParseUInt32(ArbitraryCC))
+ return true;
+ CC = static_cast<CallingConv::ID>(ArbitraryCC);
+ return false;
+ }
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseInstructionMetadata
+/// ::= !dbg !42 (',' !dbg !57)*
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+ PerFunctionState *PFS) {
+ do {
+ if (Lex.getKind() != lltok::MetadataVar)
+ return TokError("expected metadata after comma");
+
+ std::string Name = Lex.getStrVal();
+ unsigned MDK = M->getMDKindID(Name);
+ Lex.Lex();
+
+ MDNode *Node;
+ SMLoc Loc = Lex.getLoc();
+
+ if (ParseToken(lltok::exclaim, "expected '!' here"))
+ return true;
+
+ // This code is similar to that of ParseMetadataValue, however it needs to
+ // have special-case code for a forward reference; see the comments on
+ // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+ // at the top level here.
+ if (Lex.getKind() == lltok::lbrace) {
+ ValID ID;
+ if (ParseMetadataListValue(ID, PFS))
+ return true;
+ assert(ID.Kind == ValID::t_MDNode);
+ Inst->setMetadata(MDK, ID.MDNodeVal);
+ } else {
+ unsigned NodeID = 0;
+ if (ParseMDNodeID(Node, NodeID))
+ return true;
+ if (Node) {
+ // If we got the node, add it to the instruction.
+ Inst->setMetadata(MDK, Node);
+ } else {
+ MDRef R = { Loc, MDK, NodeID };
+ // Otherwise, remember that this should be resolved later.
+ ForwardRefInstMetadata[Inst].push_back(R);
+ }
+ }
+
+ // If this is the end of the list, we're done.
+ } while (EatIfPresent(lltok::comma));
+ return false;
+}
+
+/// ParseOptionalAlignment
+/// ::= /* empty */
+/// ::= 'align' 4
+bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_align))
+ return false;
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "alignment is not a power of two");
+ if (Alignment > Value::MaximumAlignment)
+ return Error(AlignLoc, "huge alignments are not supported yet");
+ return false;
+}
+
+/// ParseOptionalCommaAlign
+/// ::=
+/// ::= ',' align 4
+///
+/// This returns with AteExtraComma set to true if it ate an excess comma at the
+/// end.
+bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
+ bool &AteExtraComma) {
+ AteExtraComma = false;
+ while (EatIfPresent(lltok::comma)) {
+ // Metadata at the end is an early exit.
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ return false;
+ }
+
+ if (Lex.getKind() != lltok::kw_align)
+ return Error(Lex.getLoc(), "expected metadata or 'align'");
+
+ if (ParseOptionalAlignment(Alignment)) return true;
+ }
+
+ return false;
+}
+
+/// ParseScopeAndOrdering
+/// if isAtomic: ::= 'singlethread'? AtomicOrdering
+/// else: ::=
+///
+/// This sets Scope and Ordering to the parsed values.
+bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+ AtomicOrdering &Ordering) {
+ if (!isAtomic)
+ return false;
+
+ Scope = CrossThread;
+ if (EatIfPresent(lltok::kw_singlethread))
+ Scope = SingleThread;
+ switch (Lex.getKind()) {
+ default: return TokError("Expected ordering on atomic instruction");
+ case lltok::kw_unordered: Ordering = Unordered; break;
+ case lltok::kw_monotonic: Ordering = Monotonic; break;
+ case lltok::kw_acquire: Ordering = Acquire; break;
+ case lltok::kw_release: Ordering = Release; break;
+ case lltok::kw_acq_rel: Ordering = AcquireRelease; break;
+ case lltok::kw_seq_cst: Ordering = SequentiallyConsistent; break;
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalStackAlignment
+/// ::= /* empty */
+/// ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_alignstack))
+ return false;
+ LocTy ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return Error(ParenLoc, "expected '('");
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return Error(ParenLoc, "expected ')'");
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "stack alignment is not a power of two");
+ return false;
+}
+
+/// ParseIndexList - This parses the index list for an insert/extractvalue
+/// instruction. This sets AteExtraComma in the case where we eat an extra
+/// comma at the end of the line and find that it is followed by metadata.
+/// Clients that don't allow metadata can call the version of this function that
+/// only takes one argument.
+///
+/// ParseIndexList
+/// ::= (',' uint32)+
+///
+bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+ bool &AteExtraComma) {
+ AteExtraComma = false;
+
+ if (Lex.getKind() != lltok::comma)
+ return TokError("expected ',' as start of index list");
+
+ while (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ return false;
+ }
+ unsigned Idx = 0;
+ if (ParseUInt32(Idx)) return true;
+ Indices.push_back(Idx);
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Type Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseType - Parse a type.
+bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
+ SMLoc TypeLoc = Lex.getLoc();
+ switch (Lex.getKind()) {
+ default:
+ return TokError("expected type");
+ case lltok::Type:
+ // Type ::= 'float' | 'void' (etc)
+ Result = Lex.getTyVal();
+ Lex.Lex();
+ break;
+ case lltok::lbrace:
+ // Type ::= StructType
+ if (ParseAnonStructType(Result, false))
+ return true;
+ break;
+ case lltok::lsquare:
+ // Type ::= '[' ... ']'
+ Lex.Lex(); // eat the lsquare.
+ if (ParseArrayVectorType(Result, false))
+ return true;
+ break;
+ case lltok::less: // Either vector or packed struct.
+ // Type ::= '<' ... '>'
+ Lex.Lex();
+ if (Lex.getKind() == lltok::lbrace) {
+ if (ParseAnonStructType(Result, true) ||
+ ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+ return true;
+ } else if (ParseArrayVectorType(Result, true))
+ return true;
+ break;
+ case lltok::LocalVar: {
+ // Type ::= %foo
+ std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
+
+ // If the type hasn't been defined yet, create a forward definition and
+ // remember where that forward def'n was seen (in case it never is defined).
+ if (Entry.first == 0) {
+ Entry.first = StructType::create(Context, Lex.getStrVal());
+ Entry.second = Lex.getLoc();
+ }
+ Result = Entry.first;
+ Lex.Lex();
+ break;
+ }
+
+ case lltok::LocalVarID: {
+ // Type ::= %4
+ if (Lex.getUIntVal() >= NumberedTypes.size())
+ NumberedTypes.resize(Lex.getUIntVal()+1);
+ std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
+
+ // If the type hasn't been defined yet, create a forward definition and
+ // remember where that forward def'n was seen (in case it never is defined).
+ if (Entry.first == 0) {
+ Entry.first = StructType::create(Context);
+ Entry.second = Lex.getLoc();
+ }
+ Result = Entry.first;
+ Lex.Lex();
+ break;
+ }
+ }
+
+ // Parse the type suffixes.
+ while (1) {
+ switch (Lex.getKind()) {
+ // End of type.
+ default:
+ if (!AllowVoid && Result->isVoidTy())
+ return Error(TypeLoc, "void type only allowed for function results");
+ return false;
+
+ // Type ::= Type '*'
+ case lltok::star:
+ if (Result->isLabelTy())
+ return TokError("basic block pointers are invalid");
+ if (Result->isVoidTy())
+ return TokError("pointers to void are invalid - use i8* instead");
+ if (!PointerType::isValidElementType(Result))
+ return TokError("pointer to this type is invalid");
+ Result = PointerType::getUnqual(Result);
+ Lex.Lex();
+ break;
+
+ // Type ::= Type 'addrspace' '(' uint32 ')' '*'
+ case lltok::kw_addrspace: {
+ if (Result->isLabelTy())
+ return TokError("basic block pointers are invalid");
+ if (Result->isVoidTy())
+ return TokError("pointers to void are invalid; use i8* instead");
+ if (!PointerType::isValidElementType(Result))
+ return TokError("pointer to this type is invalid");
+ unsigned AddrSpace;
+ if (ParseOptionalAddrSpace(AddrSpace) ||
+ ParseToken(lltok::star, "expected '*' in address space"))
+ return true;
+
+ Result = PointerType::get(Result, AddrSpace);
+ break;
+ }
+
+ /// Types '(' ArgTypeListI ')' OptFuncAttrs
+ case lltok::lparen:
+ if (ParseFunctionType(Result))
+ return true;
+ break;
+ }
+ }
+}
+
+/// ParseParameterList
+/// ::= '(' ')'
+/// ::= '(' Arg (',' Arg)* ')'
+/// Arg
+/// ::= Type OptionalAttributes Value OptionalAttributes
+bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS) {
+ if (ParseToken(lltok::lparen, "expected '(' in call"))
+ return true;
+
+ unsigned AttrIndex = 1;
+ while (Lex.getKind() != lltok::rparen) {
+ // If this isn't the first argument, we need a comma.
+ if (!ArgList.empty() &&
+ ParseToken(lltok::comma, "expected ',' in argument list"))
+ return true;
+
+ // Parse the argument.
+ LocTy ArgLoc;
+ Type *ArgTy = 0;
+ AttrBuilder ArgAttrs;
+ Value *V;
+ if (ParseType(ArgTy, ArgLoc))
+ return true;
+
+ // Otherwise, handle normal operands.
+ if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS))
+ return true;
+ ArgList.push_back(ParamInfo(ArgLoc, V, AttributeSet::get(V->getContext(),
+ AttrIndex++,
+ ArgAttrs)));
+ }
+
+ Lex.Lex(); // Lex the ')'.
+ return false;
+}
+
+
+
+/// ParseArgumentList - Parse the argument list for a function type or function
+/// prototype.
+/// ::= '(' ArgTypeListI ')'
+/// ArgTypeListI
+/// ::= /*empty*/
+/// ::= '...'
+/// ::= ArgTypeList ',' '...'
+/// ::= ArgType (',' ArgType)*
+///
+bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+ bool &isVarArg){
+ isVarArg = false;
+ assert(Lex.getKind() == lltok::lparen);
+ Lex.Lex(); // eat the (.
+
+ if (Lex.getKind() == lltok::rparen) {
+ // empty
+ } else if (Lex.getKind() == lltok::dotdotdot) {
+ isVarArg = true;
+ Lex.Lex();
+ } else {
+ LocTy TypeLoc = Lex.getLoc();
+ Type *ArgTy = 0;
+ AttrBuilder Attrs;
+ std::string Name;
+
+ if (ParseType(ArgTy) ||
+ ParseOptionalParamAttrs(Attrs)) return true;
+
+ if (ArgTy->isVoidTy())
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ if (!FunctionType::isValidArgumentType(ArgTy))
+ return Error(TypeLoc, "invalid type for function argument");
+
+ unsigned AttrIndex = 1;
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+ AttributeSet::get(ArgTy->getContext(),
+ AttrIndex++, Attrs), Name));
+
+ while (EatIfPresent(lltok::comma)) {
+ // Handle ... at end of arg list.
+ if (EatIfPresent(lltok::dotdotdot)) {
+ isVarArg = true;
+ break;
+ }
+
+ // Otherwise must be an argument type.
+ TypeLoc = Lex.getLoc();
+ if (ParseType(ArgTy) || ParseOptionalParamAttrs(Attrs)) return true;
+
+ if (ArgTy->isVoidTy())
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ } else {
+ Name = "";
+ }
+
+ if (!ArgTy->isFirstClassType())
+ return Error(TypeLoc, "invalid type for function argument");
+
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+ AttributeSet::get(ArgTy->getContext(),
+ AttrIndex++, Attrs),
+ Name));
+ }
+ }
+
+ return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+}
+
+/// ParseFunctionType
+/// ::= Type ArgumentList OptionalAttrs
+bool LLParser::ParseFunctionType(Type *&Result) {
+ assert(Lex.getKind() == lltok::lparen);
+
+ if (!FunctionType::isValidReturnType(Result))
+ return TokError("invalid function return type");
+
+ SmallVector<ArgInfo, 8> ArgList;
+ bool isVarArg;
+ if (ParseArgumentList(ArgList, isVarArg))
+ return true;
+
+ // Reject names on the arguments lists.
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ if (!ArgList[i].Name.empty())
+ return Error(ArgList[i].Loc, "argument name invalid in function type");
+ if (ArgList[i].Attrs.hasAttributes(i + 1))
+ return Error(ArgList[i].Loc,
+ "argument attributes invalid in function type");
+ }
+
+ SmallVector<Type*, 16> ArgListTy;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ArgListTy.push_back(ArgList[i].Ty);
+
+ Result = FunctionType::get(Result, ArgListTy, isVarArg);
+ return false;
+}
+
+/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// other structs.
+bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+ SmallVector<Type*, 8> Elts;
+ if (ParseStructBody(Elts)) return true;
+
+ Result = StructType::get(Context, Elts, Packed);
+ return false;
+}
+
+/// ParseStructDefinition - Parse a struct in a 'type' definition.
+bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type*, LocTy> &Entry,
+ Type *&ResultTy) {
+ // If the type was already defined, diagnose the redefinition.
+ if (Entry.first && !Entry.second.isValid())
+ return Error(TypeLoc, "redefinition of type");
+
+ // If we have opaque, just return without filling in the definition for the
+ // struct. This counts as a definition as far as the .ll file goes.
+ if (EatIfPresent(lltok::kw_opaque)) {
+ // This type is being defined, so clear the location to indicate this.
+ Entry.second = SMLoc();
+
+ // If this type number has never been uttered, create it.
+ if (Entry.first == 0)
+ Entry.first = StructType::create(Context, Name);
+ ResultTy = Entry.first;
+ return false;
+ }
+
+ // If the type starts with '<', then it is either a packed struct or a vector.
+ bool isPacked = EatIfPresent(lltok::less);
+
+ // If we don't have a struct, then we have a random type alias, which we
+ // accept for compatibility with old files. These types are not allowed to be
+ // forward referenced and not allowed to be recursive.
+ if (Lex.getKind() != lltok::lbrace) {
+ if (Entry.first)
+ return Error(TypeLoc, "forward references to non-struct type");
+
+ ResultTy = 0;
+ if (isPacked)
+ return ParseArrayVectorType(ResultTy, true);
+ return ParseType(ResultTy);
+ }
+
+ // This type is being defined, so clear the location to indicate this.
+ Entry.second = SMLoc();
+
+ // If this type number has never been uttered, create it.
+ if (Entry.first == 0)
+ Entry.first = StructType::create(Context, Name);
+
+ StructType *STy = cast<StructType>(Entry.first);
+
+ SmallVector<Type*, 8> Body;
+ if (ParseStructBody(Body) ||
+ (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+ return true;
+
+ STy->setBody(Body, isPacked);
+ ResultTy = STy;
+ return false;
+}
+
+
+/// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere.
+/// StructType
+/// ::= '{' '}'
+/// ::= '{' Type (',' Type)* '}'
+/// ::= '<' '{' '}' '>'
+/// ::= '<' '{' Type (',' Type)* '}' '>'
+bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex(); // Consume the '{'
+
+ // Handle the empty struct.
+ if (EatIfPresent(lltok::rbrace))
+ return false;
+
+ LocTy EltTyLoc = Lex.getLoc();
+ Type *Ty = 0;
+ if (ParseType(Ty)) return true;
+ Body.push_back(Ty);
+
+ if (!StructType::isValidElementType(Ty))
+ return Error(EltTyLoc, "invalid element type for struct");
+
+ while (EatIfPresent(lltok::comma)) {
+ EltTyLoc = Lex.getLoc();
+ if (ParseType(Ty)) return true;
+
+ if (!StructType::isValidElementType(Ty))
+ return Error(EltTyLoc, "invalid element type for struct");
+
+ Body.push_back(Ty);
+ }
+
+ return ParseToken(lltok::rbrace, "expected '}' at end of struct");
+}
+
+/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// token has already been consumed.
+/// Type
+/// ::= '[' APSINTVAL 'x' Types ']'
+/// ::= '<' APSINTVAL 'x' Types '>'
+bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
+ Lex.getAPSIntVal().getBitWidth() > 64)
+ return TokError("expected number in address space");
+
+ LocTy SizeLoc = Lex.getLoc();
+ uint64_t Size = Lex.getAPSIntVal().getZExtValue();
+ Lex.Lex();
+
+ if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
+ return true;
+
+ LocTy TypeLoc = Lex.getLoc();
+ Type *EltTy = 0;
+ if (ParseType(EltTy)) return true;
+
+ if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+ "expected end of sequential type"))
+ return true;
+
+ if (isVector) {
+ if (Size == 0)
+ return Error(SizeLoc, "zero element vector is illegal");
+ if ((unsigned)Size != Size)
+ return Error(SizeLoc, "size too large for vector");
+ if (!VectorType::isValidElementType(EltTy))
+ return Error(TypeLoc, "invalid vector element type");
+ Result = VectorType::get(EltTy, unsigned(Size));
+ } else {
+ if (!ArrayType::isValidElementType(EltTy))
+ return Error(TypeLoc, "invalid array element type");
+ Result = ArrayType::get(EltTy, Size);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Semantic Analysis.
+//===----------------------------------------------------------------------===//
+
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
+ int functionNumber)
+ : P(p), F(f), FunctionNumber(functionNumber) {
+
+ // Insert unnamed arguments into the NumberedVals list.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI)
+ if (!AI->hasName())
+ NumberedVals.push_back(AI);
+}
+
+LLParser::PerFunctionState::~PerFunctionState() {
+ // If there were any forward referenced non-basicblock values, delete them.
+ for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(
+ UndefValue::get(I->second.first->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+
+ for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(
+ UndefValue::get(I->second.first->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+}
+
+bool LLParser::PerFunctionState::FinishFunction() {
+ // Check to see if someone took the address of labels in this block.
+ if (!P.ForwardRefBlockAddresses.empty()) {
+ ValID FunctionID;
+ if (!F.getName().empty()) {
+ FunctionID.Kind = ValID::t_GlobalName;
+ FunctionID.StrVal = F.getName();
+ } else {
+ FunctionID.Kind = ValID::t_GlobalID;
+ FunctionID.UIntVal = FunctionNumber;
+ }
+
+ std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
+ FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
+ if (FRBAI != P.ForwardRefBlockAddresses.end()) {
+ // Resolve all these references.
+ if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
+ return true;
+
+ P.ForwardRefBlockAddresses.erase(FRBAI);
+ }
+ }
+
+ if (!ForwardRefVals.empty())
+ return P.Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '%" + ForwardRefVals.begin()->first +
+ "'");
+ if (!ForwardRefValIDs.empty())
+ return P.Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '%" +
+ Twine(ForwardRefValIDs.begin()->first) + "'");
+ return false;
+}
+
+
+/// GetVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
+ Type *Ty, LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = F.getValueSymbolTable().lookup(Name);
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty->isLabelTy())
+ P.Error(Loc, "'%" + Name + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + Name + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Don't make placeholders with invalid type.
+ if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty->isLabelTy())
+ FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
+ else
+ FwdVal = new Argument(Ty, Name);
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
+ LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty->isLabelTy())
+ P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty->isLabelTy())
+ FwdVal = BasicBlock::Create(F.getContext(), "", &F);
+ else
+ FwdVal = new Argument(Ty);
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+/// SetInstName - After an instruction is parsed and inserted into its
+/// basic block, this installs its name.
+bool LLParser::PerFunctionState::SetInstName(int NameID,
+ const std::string &NameStr,
+ LocTy NameLoc, Instruction *Inst) {
+ // If this instruction has void type, it cannot have a name or ID specified.
+ if (Inst->getType()->isVoidTy()) {
+ if (NameID != -1 || !NameStr.empty())
+ return P.Error(NameLoc, "instructions returning void cannot have a name");
+ return false;
+ }
+
+ // If this was a numbered instruction, verify that the instruction is the
+ // expected value and resolve any forward references.
+ if (NameStr.empty()) {
+ // If neither a name nor an ID was specified, just use the next ID.
+ if (NameID == -1)
+ NameID = NumberedVals.size();
+
+ if (unsigned(NameID) != NumberedVals.size())
+ return P.Error(NameLoc, "instruction expected to be numbered '%" +
+ Twine(NumberedVals.size()) + "'");
+
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
+ ForwardRefValIDs.find(NameID);
+ if (FI != ForwardRefValIDs.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ delete FI->second.first;
+ ForwardRefValIDs.erase(FI);
+ }
+
+ NumberedVals.push_back(Inst);
+ return false;
+ }
+
+ // Otherwise, the instruction had a name. Resolve forward refs and set it.
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ FI = ForwardRefVals.find(NameStr);
+ if (FI != ForwardRefVals.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ delete FI->second.first;
+ ForwardRefVals.erase(FI);
+ }
+
+ // Set the name on the instruction.
+ Inst->setName(NameStr);
+
+ if (Inst->getName() != NameStr)
+ return P.Error(NameLoc, "multiple definition of local value named '" +
+ NameStr + "'");
+ return false;
+}
+
+/// GetBB - Get a basic block with the specified name or ID, creating a
+/// forward reference record if needed.
+BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+ LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(Name,
+ Type::getLabelTy(F.getContext()), Loc));
+}
+
+BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(ID,
+ Type::getLabelTy(F.getContext()), Loc));
+}
+
+/// DefineBB - Define the specified basic block, which is either named or
+/// unnamed. If there is an error, this returns null otherwise it returns
+/// the block being defined.
+BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+ LocTy Loc) {
+ BasicBlock *BB;
+ if (Name.empty())
+ BB = GetBB(NumberedVals.size(), Loc);
+ else
+ BB = GetBB(Name, Loc);
+ if (BB == 0) return 0; // Already diagnosed error.
+
+ // Move the block to the end of the function. Forward ref'd blocks are
+ // inserted wherever they happen to be referenced.
+ F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
+
+ // Remove the block from forward ref sets.
+ if (Name.empty()) {
+ ForwardRefValIDs.erase(NumberedVals.size());
+ NumberedVals.push_back(BB);
+ } else {
+ // BB forward references are already in the function symbol table.
+ ForwardRefVals.erase(Name);
+ }
+
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Constants.
+//===----------------------------------------------------------------------===//
+
+/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// type implied. For example, if we parse "4" we don't know what integer type
+/// it has. The value will later be combined with its type and checked for
+/// sanity. PFS is used to convert function-local operands of metadata (since
+/// metadata operands are not just parsed here but also converted to values).
+/// PFS can be null when we are not parsing metadata values inside a function.
+bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
+ ID.Loc = Lex.getLoc();
+ switch (Lex.getKind()) {
+ default: return TokError("expected value token");
+ case lltok::GlobalID: // @42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_GlobalID;
+ break;
+ case lltok::GlobalVar: // @foo
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_GlobalName;
+ break;
+ case lltok::LocalVarID: // %42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_LocalID;
+ break;
+ case lltok::LocalVar: // %foo
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_LocalName;
+ break;
+ case lltok::exclaim: // !42, !{...}, or !"foo"
+ return ParseMetadataValue(ID, PFS);
+ case lltok::APSInt:
+ ID.APSIntVal = Lex.getAPSIntVal();
+ ID.Kind = ValID::t_APSInt;
+ break;
+ case lltok::APFloat:
+ ID.APFloatVal = Lex.getAPFloatVal();
+ ID.Kind = ValID::t_APFloat;
+ break;
+ case lltok::kw_true:
+ ID.ConstantVal = ConstantInt::getTrue(Context);
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_false:
+ ID.ConstantVal = ConstantInt::getFalse(Context);
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_null: ID.Kind = ValID::t_Null; break;
+ case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+ case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+
+ case lltok::lbrace: {
+ // ValID ::= '{' ConstVector '}'
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rbrace, "expected end of struct constant"))
+ return true;
+
+ ID.ConstantStructElts = new Constant*[Elts.size()];
+ ID.UIntVal = Elts.size();
+ memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ ID.Kind = ValID::t_ConstantStruct;
+ return false;
+ }
+ case lltok::less: {
+ // ValID ::= '<' ConstVector '>' --> Vector.
+ // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
+ Lex.Lex();
+ bool isPackedStruct = EatIfPresent(lltok::lbrace);
+
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ (isPackedStruct &&
+ ParseToken(lltok::rbrace, "expected end of packed struct")) ||
+ ParseToken(lltok::greater, "expected end of constant"))
+ return true;
+
+ if (isPackedStruct) {
+ ID.ConstantStructElts = new Constant*[Elts.size()];
+ memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ ID.UIntVal = Elts.size();
+ ID.Kind = ValID::t_PackedConstantStruct;
+ return false;
+ }
+
+ if (Elts.empty())
+ return Error(ID.Loc, "constant vector must not be empty");
+
+ if (!Elts[0]->getType()->isIntegerTy() &&
+ !Elts[0]->getType()->isFloatingPointTy() &&
+ !Elts[0]->getType()->isPointerTy())
+ return Error(FirstEltLoc,
+ "vector elements must have integer, pointer or floating point type");
+
+ // Verify that all the vector elements have the same type.
+ for (unsigned i = 1, e = Elts.size(); i != e; ++i)
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "vector element #" + Twine(i) +
+ " is not of type '" + getTypeString(Elts[0]->getType()));
+
+ ID.ConstantVal = ConstantVector::get(Elts);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::lsquare: { // Array Constant
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rsquare, "expected end of array constant"))
+ return true;
+
+ // Handle empty element.
+ if (Elts.empty()) {
+ // Use undef instead of an array because it's inconvenient to determine
+ // the element type at this point, there being no elements to examine.
+ ID.Kind = ValID::t_EmptyArray;
+ return false;
+ }
+
+ if (!Elts[0]->getType()->isFirstClassType())
+ return Error(FirstEltLoc, "invalid array element type: " +
+ getTypeString(Elts[0]->getType()));
+
+ ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
+ // Verify all elements are correct type!
+ for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "array element #" + Twine(i) +
+ " is not of type '" + getTypeString(Elts[0]->getType()));
+ }
+
+ ID.ConstantVal = ConstantArray::get(ATy, Elts);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_c: // c "foo"
+ Lex.Lex();
+ ID.ConstantVal = ConstantDataArray::getString(Context, Lex.getStrVal(),
+ false);
+ if (ParseToken(lltok::StringConstant, "expected string")) return true;
+ ID.Kind = ValID::t_Constant;
+ return false;
+
+ case lltok::kw_asm: {
+ // ValID ::= 'asm' SideEffect? AlignStack? IntelDialect? STRINGCONSTANT ','
+ // STRINGCONSTANT
+ bool HasSideEffect, AlignStack, AsmDialect;
+ Lex.Lex();
+ if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+ ParseOptionalToken(lltok::kw_alignstack, AlignStack) ||
+ ParseOptionalToken(lltok::kw_inteldialect, AsmDialect) ||
+ ParseStringConstant(ID.StrVal) ||
+ ParseToken(lltok::comma, "expected comma in inline asm expression") ||
+ ParseToken(lltok::StringConstant, "expected constraint string"))
+ return true;
+ ID.StrVal2 = Lex.getStrVal();
+ ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1) |
+ (unsigned(AsmDialect)<<2);
+ ID.Kind = ValID::t_InlineAsm;
+ return false;
+ }
+
+ case lltok::kw_blockaddress: {
+ // ValID ::= 'blockaddress' '(' @foo ',' %bar ')'
+ Lex.Lex();
+
+ ValID Fn, Label;
+ LocTy FnLoc, LabelLoc;
+
+ if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
+ ParseValID(Fn) ||
+ ParseToken(lltok::comma, "expected comma in block address expression")||
+ ParseValID(Label) ||
+ ParseToken(lltok::rparen, "expected ')' in block address expression"))
+ return true;
+
+ if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+ return Error(Fn.Loc, "expected function name in blockaddress");
+ if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
+ return Error(Label.Loc, "expected basic block name in blockaddress");
+
+ // Make a global variable as a placeholder for this reference.
+ GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
+ false, GlobalValue::InternalLinkage,
+ 0, "");
+ ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
+ ID.ConstantVal = FwdRef;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: {
+ unsigned Opc = Lex.getUIntVal();
+ Type *DestTy = 0;
+ Constant *SrcVal;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+ ParseGlobalTypeAndValue(SrcVal) ||
+ ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
+ ParseType(DestTy) ||
+ ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+ return true;
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
+ return Error(ID.Loc, "invalid cast opcode for cast from '" +
+ getTypeString(SrcVal->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
+ ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
+ SrcVal, DestTy);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_extractvalue: {
+ Lex.Lex();
+ Constant *Val;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+ return true;
+
+ if (!Val->getType()->isAggregateType())
+ return Error(ID.Loc, "extractvalue operand must be aggregate type");
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+ return Error(ID.Loc, "invalid indices for extractvalue");
+ ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_insertvalue: {
+ Lex.Lex();
+ Constant *Val0, *Val1;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+ return true;
+ if (!Val0->getType()->isAggregateType())
+ return Error(ID.Loc, "insertvalue operand must be aggregate type");
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+ return Error(ID.Loc, "invalid indices for insertvalue");
+ ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp: {
+ unsigned PredVal, Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseCmpPredicate(PredVal, Opc) ||
+ ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+ return true;
+
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "compare operands must have the same type");
+
+ CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
+
+ if (Opc == Instruction::FCmp) {
+ if (!Val0->getType()->isFPOrFPVectorTy())
+ return Error(ID.Loc, "fcmp requires floating point operands");
+ ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+ } else {
+ assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
+ if (!Val0->getType()->isIntOrIntVectorTy() &&
+ !Val0->getType()->getScalarType()->isPointerTy())
+ return Error(ID.Loc, "icmp requires pointer or integer operands");
+ ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+ }
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_fadd:
+ case lltok::kw_sub:
+ case lltok::kw_fsub:
+ case lltok::kw_mul:
+ case lltok::kw_fmul:
+ case lltok::kw_udiv:
+ case lltok::kw_sdiv:
+ case lltok::kw_fdiv:
+ case lltok::kw_urem:
+ case lltok::kw_srem:
+ case lltok::kw_frem:
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool NUW = false;
+ bool NSW = false;
+ bool Exact = false;
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ LocTy ModifierLoc = Lex.getLoc();
+ if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul || Opc == Instruction::Shl) {
+ if (EatIfPresent(lltok::kw_nuw))
+ NUW = true;
+ if (EatIfPresent(lltok::kw_nsw)) {
+ NSW = true;
+ if (EatIfPresent(lltok::kw_nuw))
+ NUW = true;
+ }
+ } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr || Opc == Instruction::AShr) {
+ if (EatIfPresent(lltok::kw_exact))
+ Exact = true;
+ }
+ if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVectorTy()) {
+ if (NUW)
+ return Error(ModifierLoc, "nuw only applies to integer operations");
+ if (NSW)
+ return Error(ModifierLoc, "nsw only applies to integer operations");
+ }
+ // Check that the type is valid for the operator.
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ if (!Val0->getType()->isIntOrIntVectorTy())
+ return Error(ID.Loc, "constexpr requires integer operands");
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ if (!Val0->getType()->isFPOrFPVectorTy())
+ return Error(ID.Loc, "constexpr requires fp operands");
+ break;
+ default: llvm_unreachable("Unknown binary operator!");
+ }
+ unsigned Flags = 0;
+ if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ if (Exact) Flags |= PossiblyExactOperator::IsExact;
+ Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
+ ID.ConstantVal = C;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Logical Operations
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: {
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVectorTy())
+ return Error(ID.Loc,
+ "constexpr requires integer or integer vector operands");
+ ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ case lltok::kw_getelementptr:
+ case lltok::kw_shufflevector:
+ case lltok::kw_insertelement:
+ case lltok::kw_extractelement:
+ case lltok::kw_select: {
+ unsigned Opc = Lex.getUIntVal();
+ SmallVector<Constant*, 16> Elts;
+ bool InBounds = false;
+ Lex.Lex();
+ if (Opc == Instruction::GetElementPtr)
+ InBounds = EatIfPresent(lltok::kw_inbounds);
+ if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
+ ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+ return true;
+
+ if (Opc == Instruction::GetElementPtr) {
+ if (Elts.size() == 0 ||
+ !Elts[0]->getType()->getScalarType()->isPointerTy())
+ return Error(ID.Loc, "getelementptr requires pointer operand");
+
+ ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+ if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
+ return Error(ID.Loc, "invalid indices for getelementptr");
+ ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+ InBounds);
+ } else if (Opc == Instruction::Select) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to select");
+ if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
+ Elts[2]))
+ return Error(ID.Loc, Reason);
+ ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+ } else if (Opc == Instruction::ShuffleVector) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to shufflevector");
+ if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid operands to shufflevector");
+ ID.ConstantVal =
+ ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+ } else if (Opc == Instruction::ExtractElement) {
+ if (Elts.size() != 2)
+ return Error(ID.Loc, "expected two operands to extractelement");
+ if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
+ return Error(ID.Loc, "invalid extractelement operands");
+ ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+ } else {
+ assert(Opc == Instruction::InsertElement && "Unknown opcode");
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to insertelement");
+ if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid insertelement operands");
+ ID.ConstantVal =
+ ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+ }
+
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseGlobalValue - Parse a global value with the specified type.
+bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) {
+ C = 0;
+ ValID ID;
+ Value *V = NULL;
+ bool Parsed = ParseValID(ID) ||
+ ConvertValIDToValue(Ty, ID, V, NULL);
+ if (V && !(C = dyn_cast<Constant>(V)))
+ return Error(ID.Loc, "global values must be constants");
+ return Parsed;
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+ Type *Ty = 0;
+ return ParseType(Ty) ||
+ ParseGlobalValue(Ty, V);
+}
+
+/// ParseGlobalValueVector
+/// ::= /*empty*/
+/// ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+ // Empty list.
+ if (Lex.getKind() == lltok::rbrace ||
+ Lex.getKind() == lltok::rsquare ||
+ Lex.getKind() == lltok::greater ||
+ Lex.getKind() == lltok::rparen)
+ return false;
+
+ Constant *C;
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+ }
+
+ return false;
+}
+
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex();
+
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.MDNodeVal = MDNode::get(Context, Elts);
+ ID.Kind = ValID::t_MDNode;
+ return false;
+}
+
+/// ParseMetadataValue
+/// ::= !42
+/// ::= !{...}
+/// ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::exclaim);
+ Lex.Lex();
+
+ // MDNode:
+ // !{ ... }
+ if (Lex.getKind() == lltok::lbrace)
+ return ParseMetadataListValue(ID, PFS);
+
+ // Standalone metadata reference
+ // !42
+ if (Lex.getKind() == lltok::APSInt) {
+ if (ParseMDNodeID(ID.MDNodeVal)) return true;
+ ID.Kind = ValID::t_MDNode;
+ return false;
+ }
+
+ // MDString:
+ // ::= '!' STRINGCONSTANT
+ if (ParseMDString(ID.MDStringVal)) return true;
+ ID.Kind = ValID::t_MDString;
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState *PFS) {
+ if (Ty->isFunctionTy())
+ return Error(ID.Loc, "functions are not values, refer to them as pointers");
+
+ switch (ID.Kind) {
+ case ValID::t_LocalID:
+ if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+ V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
+ return (V == 0);
+ case ValID::t_LocalName:
+ if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+ V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
+ return (V == 0);
+ case ValID::t_InlineAsm: {
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
+ FunctionType *FTy =
+ PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+ if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+ return Error(ID.Loc, "invalid type for inline asm constraint string");
+ V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1,
+ (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2)));
+ return false;
+ }
+ case ValID::t_MDNode:
+ if (!Ty->isMetadataTy())
+ return Error(ID.Loc, "metadata value must have metadata type");
+ V = ID.MDNodeVal;
+ return false;
+ case ValID::t_MDString:
+ if (!Ty->isMetadataTy())
+ return Error(ID.Loc, "metadata value must have metadata type");
+ V = ID.MDStringVal;
+ return false;
+ case ValID::t_GlobalName:
+ V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_GlobalID:
+ V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_APSInt:
+ if (!Ty->isIntegerTy())
+ return Error(ID.Loc, "integer constant must have integer type");
+ ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+ V = ConstantInt::get(Context, ID.APSIntVal);
+ return false;
+ case ValID::t_APFloat:
+ if (!Ty->isFloatingPointTy() ||
+ !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
+ return Error(ID.Loc, "floating point constant invalid for type");
+
+ // The lexer has no type info, so builds all half, float, and double FP
+ // constants as double. Fix this here. Long double does not need this.
+ if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble) {
+ bool Ignored;
+ if (Ty->isHalfTy())
+ ID.APFloatVal.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven,
+ &Ignored);
+ else if (Ty->isFloatTy())
+ ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &Ignored);
+ }
+ V = ConstantFP::get(Context, ID.APFloatVal);
+
+ if (V->getType() != Ty)
+ return Error(ID.Loc, "floating point constant does not have type '" +
+ getTypeString(Ty) + "'");
+
+ return false;
+ case ValID::t_Null:
+ if (!Ty->isPointerTy())
+ return Error(ID.Loc, "null must be a pointer type");
+ V = ConstantPointerNull::get(cast<PointerType>(Ty));
+ return false;
+ case ValID::t_Undef:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty->isLabelTy())
+ return Error(ID.Loc, "invalid type for undef constant");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_EmptyArray:
+ if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
+ return Error(ID.Loc, "invalid empty array initializer");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_Zero:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty->isLabelTy())
+ return Error(ID.Loc, "invalid type for null constant");
+ V = Constant::getNullValue(Ty);
+ return false;
+ case ValID::t_Constant:
+ if (ID.ConstantVal->getType() != Ty)
+ return Error(ID.Loc, "constant expression type mismatch");
+
+ V = ID.ConstantVal;
+ return false;
+ case ValID::t_ConstantStruct:
+ case ValID::t_PackedConstantStruct:
+ if (StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (ST->getNumElements() != ID.UIntVal)
+ return Error(ID.Loc,
+ "initializer with struct type has wrong # elements");
+ if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
+ return Error(ID.Loc, "packed'ness of initializer and type don't match");
+
+ // Verify that the elements are compatible with the structtype.
+ for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
+ if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
+ return Error(ID.Loc, "element " + Twine(i) +
+ " of struct initializer doesn't match struct element type");
+
+ V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
+ ID.UIntVal));
+ } else
+ return Error(ID.Loc, "constant expression type mismatch");
+ return false;
+ }
+ llvm_unreachable("Invalid ValID");
+}
+
+bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
+ V = 0;
+ ValID ID;
+ return ParseValID(ID, PFS) ||
+ ConvertValIDToValue(Ty, ID, V, PFS);
+}
+
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+ Type *Ty = 0;
+ return ParseType(Ty) ||
+ ParseValue(Ty, V, PFS);
+}
+
+bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+ PerFunctionState &PFS) {
+ Value *V;
+ Loc = Lex.getLoc();
+ if (ParseTypeAndValue(V, PFS)) return true;
+ if (!isa<BasicBlock>(V))
+ return Error(Loc, "expected a basic block");
+ BB = cast<BasicBlock>(V);
+ return false;
+}
+
+
+/// FunctionHeader
+/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
+/// OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+/// OptionalAlign OptGC
+bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
+ // Parse the linkage.
+ LocTy LinkageLoc = Lex.getLoc();
+ unsigned Linkage;
+
+ unsigned Visibility;
+ AttrBuilder RetAttrs;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+ return true;
+
+ // Verify that the linkage is ok.
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::ExternalLinkage:
+ break; // always ok.
+ case GlobalValue::DLLImportLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ if (isDefine)
+ return Error(LinkageLoc, "invalid linkage for function definition");
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::DLLExportLinkage:
+ if (!isDefine)
+ return Error(LinkageLoc, "invalid linkage for function declaration");
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::CommonLinkage:
+ return Error(LinkageLoc, "invalid function linkage type");
+ }
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "invalid function return type");
+
+ LocTy NameLoc = Lex.getLoc();
+
+ std::string FunctionName;
+ if (Lex.getKind() == lltok::GlobalVar) {
+ FunctionName = Lex.getStrVal();
+ } else if (Lex.getKind() == lltok::GlobalID) { // @42 is ok.
+ unsigned NameID = Lex.getUIntVal();
+
+ if (NameID != NumberedVals.size())
+ return TokError("function expected to be numbered '%" +
+ Twine(NumberedVals.size()) + "'");
+ } else {
+ return TokError("expected function name");
+ }
+
+ Lex.Lex();
+
+ if (Lex.getKind() != lltok::lparen)
+ return TokError("expected '(' in function argument list");
+
+ SmallVector<ArgInfo, 8> ArgList;
+ bool isVarArg;
+ AttrBuilder FuncAttrs;
+ std::vector<unsigned> FwdRefAttrGrps;
+ LocTy NoBuiltinLoc;
+ std::string Section;
+ unsigned Alignment;
+ std::string GC;
+ bool UnnamedAddr;
+ LocTy UnnamedAddrLoc;
+
+ if (ParseArgumentList(ArgList, isVarArg) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
+ ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
+ NoBuiltinLoc) ||
+ (EatIfPresent(lltok::kw_section) &&
+ ParseStringConstant(Section)) ||
+ ParseOptionalAlignment(Alignment) ||
+ (EatIfPresent(lltok::kw_gc) &&
+ ParseStringConstant(GC)))
+ return true;
+
+ if (FuncAttrs.contains(Attribute::NoBuiltin))
+ return Error(NoBuiltinLoc, "'nobuiltin' attribute not valid on function");
+
+ // If the alignment was parsed as an attribute, move to the alignment field.
+ if (FuncAttrs.hasAlignmentAttr()) {
+ Alignment = FuncAttrs.getAlignment();
+ FuncAttrs.removeAttribute(Attribute::Alignment);
+ }
+
+ // Okay, if we got here, the function is syntactically valid. Convert types
+ // and do semantic checks.
+ std::vector<Type*> ParamTypeList;
+ SmallVector<AttributeSet, 8> Attrs;
+
+ if (RetAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::ReturnIndex,
+ RetAttrs));
+
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ ParamTypeList.push_back(ArgList[i].Ty);
+ if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+ AttrBuilder B(ArgList[i].Attrs, i + 1);
+ Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+ }
+ }
+
+ if (FuncAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::FunctionIndex,
+ FuncAttrs));
+
+ AttributeSet PAL = AttributeSet::get(Context, Attrs);
+
+ if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
+ return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
+ FunctionType *FT =
+ FunctionType::get(RetType, ParamTypeList, isVarArg);
+ PointerType *PFT = PointerType::getUnqual(FT);
+
+ Fn = 0;
+ if (!FunctionName.empty()) {
+ // If this was a definition of a forward reference, remove the definition
+ // from the forward reference table and fill in the forward ref.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
+ ForwardRefVals.find(FunctionName);
+ if (FRVI != ForwardRefVals.end()) {
+ Fn = M->getFunction(FunctionName);
+ if (!Fn)
+ return Error(FRVI->second.second, "invalid forward reference to "
+ "function as global value!");
+ if (Fn->getType() != PFT)
+ return Error(FRVI->second.second, "invalid forward reference to "
+ "function '" + FunctionName + "' with wrong type!");
+
+ ForwardRefVals.erase(FRVI);
+ } else if ((Fn = M->getFunction(FunctionName))) {
+ // Reject redefinitions.
+ return Error(NameLoc, "invalid redefinition of function '" +
+ FunctionName + "'");
+ } else if (M->getNamedValue(FunctionName)) {
+ return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
+ }
+
+ } else {
+ // If this is a definition of a forward referenced function, make sure the
+ // types agree.
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
+ = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ Fn = cast<Function>(I->second.first);
+ if (Fn->getType() != PFT)
+ return Error(NameLoc, "type of definition and forward reference of '@" +
+ Twine(NumberedVals.size()) + "' disagree");
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (Fn == 0)
+ Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+ else // Move the forward-reference to the correct spot in the module.
+ M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+
+ if (FunctionName.empty())
+ NumberedVals.push_back(Fn);
+
+ Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ Fn->setCallingConv(CC);
+ Fn->setAttributes(PAL);
+ Fn->setUnnamedAddr(UnnamedAddr);
+ Fn->setAlignment(Alignment);
+ Fn->setSection(Section);
+ if (!GC.empty()) Fn->setGC(GC.c_str());
+ ForwardRefAttrGroups[Fn] = FwdRefAttrGrps;
+
+ // Add all of the arguments we parsed to the function.
+ Function::arg_iterator ArgIt = Fn->arg_begin();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+ // If the argument has a name, insert it into the argument symbol table.
+ if (ArgList[i].Name.empty()) continue;
+
+ // Set the name, if it conflicted, it will be auto-renamed.
+ ArgIt->setName(ArgList[i].Name);
+
+ if (ArgIt->getName() != ArgList[i].Name)
+ return Error(ArgList[i].Loc, "redefinition of argument '%" +
+ ArgList[i].Name + "'");
+ }
+
+ return false;
+}
+
+
+/// ParseFunctionBody
+/// ::= '{' BasicBlock+ '}'
+///
+bool LLParser::ParseFunctionBody(Function &Fn) {
+ if (Lex.getKind() != lltok::lbrace)
+ return TokError("expected '{' in function body");
+ Lex.Lex(); // eat the {.
+
+ int FunctionNumber = -1;
+ if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
+
+ PerFunctionState PFS(*this, Fn, FunctionNumber);
+
+ // We need at least one basic block.
+ if (Lex.getKind() == lltok::rbrace)
+ return TokError("function body requires at least one basic block");
+
+ while (Lex.getKind() != lltok::rbrace)
+ if (ParseBasicBlock(PFS)) return true;
+
+ // Eat the }.
+ Lex.Lex();
+
+ // Verify function is ok.
+ return PFS.FinishFunction();
+}
+
+/// ParseBasicBlock
+/// ::= LabelStr? Instruction*
+bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+ // If this basic block starts out with a name, remember it.
+ std::string Name;
+ LocTy NameLoc = Lex.getLoc();
+ if (Lex.getKind() == lltok::LabelStr) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+ if (BB == 0) return true;
+
+ std::string NameStr;
+
+ // Parse the instructions in this block until we get a terminator.
+ Instruction *Inst;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MetadataOnInst;
+ do {
+ // This instruction may have three possibilities for a name: a) none
+ // specified, b) name specified "%foo =", c) number specified: "%4 =".
+ LocTy NameLoc = Lex.getLoc();
+ int NameID = -1;
+ NameStr = "";
+
+ if (Lex.getKind() == lltok::LocalVarID) {
+ NameID = Lex.getUIntVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+ return true;
+ } else if (Lex.getKind() == lltok::LocalVar) {
+ NameStr = Lex.getStrVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+ return true;
+ }
+
+ switch (ParseInstruction(Inst, BB, PFS)) {
+ default: llvm_unreachable("Unknown ParseInstruction result!");
+ case InstError: return true;
+ case InstNormal:
+ BB->getInstList().push_back(Inst);
+
+ // With a normal result, we check to see if the instruction is followed by
+ // a comma and metadata.
+ if (EatIfPresent(lltok::comma))
+ if (ParseInstructionMetadata(Inst, &PFS))
+ return true;
+ break;
+ case InstExtraComma:
+ BB->getInstList().push_back(Inst);
+
+ // If the instruction parser ate an extra comma at the end of it, it
+ // *must* be followed by metadata.
+ if (ParseInstructionMetadata(Inst, &PFS))
+ return true;
+ break;
+ }
+
+ // Set the name on the instruction.
+ if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+ } while (!isa<TerminatorInst>(Inst));
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseInstruction - Parse one of the many different instructions.
+///
+int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ lltok::Kind Token = Lex.getKind();
+ if (Token == lltok::Eof)
+ return TokError("found end of file when expecting more instructions");
+ LocTy Loc = Lex.getLoc();
+ unsigned KeywordVal = Lex.getUIntVal();
+ Lex.Lex(); // Eat the keyword.
+
+ switch (Token) {
+ default: return Error(Loc, "expected instruction opcode");
+ // Terminator Instructions.
+ case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
+ case lltok::kw_ret: return ParseRet(Inst, BB, PFS);
+ case lltok::kw_br: return ParseBr(Inst, PFS);
+ case lltok::kw_switch: return ParseSwitch(Inst, PFS);
+ case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS);
+ case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
+ case lltok::kw_resume: return ParseResume(Inst, PFS);
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_sub:
+ case lltok::kw_mul:
+ case lltok::kw_shl: {
+ bool NUW = EatIfPresent(lltok::kw_nuw);
+ bool NSW = EatIfPresent(lltok::kw_nsw);
+ if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+
+ if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+ if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+ return false;
+ }
+ case lltok::kw_fadd:
+ case lltok::kw_fsub:
+ case lltok::kw_fmul:
+ case lltok::kw_fdiv:
+ case lltok::kw_frem: {
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+ int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2);
+ if (Res != 0)
+ return Res;
+ if (FMF.any())
+ Inst->setFastMathFlags(FMF);
+ return 0;
+ }
+
+ case lltok::kw_sdiv:
+ case lltok::kw_udiv:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool Exact = EatIfPresent(lltok::kw_exact);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+ return false;
+ }
+
+ case lltok::kw_urem:
+ case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal);
+ // Casts.
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
+ // Other.
+ case lltok::kw_select: return ParseSelect(Inst, PFS);
+ case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
+ case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
+ case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
+ case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
+ case lltok::kw_phi: return ParsePHI(Inst, PFS);
+ case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
+ case lltok::kw_call: return ParseCall(Inst, PFS, false);
+ case lltok::kw_tail: return ParseCall(Inst, PFS, true);
+ // Memory.
+ case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
+ case lltok::kw_load: return ParseLoad(Inst, PFS);
+ case lltok::kw_store: return ParseStore(Inst, PFS);
+ case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS);
+ case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS);
+ case lltok::kw_fence: return ParseFence(Inst, PFS);
+ case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
+ case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
+ case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
+ }
+}
+
+/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
+bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+ if (Opc == Instruction::FCmp) {
+ switch (Lex.getKind()) {
+ default: return TokError("expected fcmp predicate (e.g. 'oeq')");
+ case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
+ case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
+ case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
+ case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break;
+ case lltok::kw_ole: P = CmpInst::FCMP_OLE; break;
+ case lltok::kw_oge: P = CmpInst::FCMP_OGE; break;
+ case lltok::kw_ord: P = CmpInst::FCMP_ORD; break;
+ case lltok::kw_uno: P = CmpInst::FCMP_UNO; break;
+ case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break;
+ case lltok::kw_une: P = CmpInst::FCMP_UNE; break;
+ case lltok::kw_ult: P = CmpInst::FCMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::FCMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::FCMP_UGE; break;
+ case lltok::kw_true: P = CmpInst::FCMP_TRUE; break;
+ case lltok::kw_false: P = CmpInst::FCMP_FALSE; break;
+ }
+ } else {
+ switch (Lex.getKind()) {
+ default: return TokError("expected icmp predicate (e.g. 'eq')");
+ case lltok::kw_eq: P = CmpInst::ICMP_EQ; break;
+ case lltok::kw_ne: P = CmpInst::ICMP_NE; break;
+ case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
+ case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break;
+ case lltok::kw_sle: P = CmpInst::ICMP_SLE; break;
+ case lltok::kw_sge: P = CmpInst::ICMP_SGE; break;
+ case lltok::kw_ult: P = CmpInst::ICMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::ICMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::ICMP_UGE; break;
+ }
+ }
+ Lex.Lex();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Terminator Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseRet - Parse a return instruction.
+/// ::= 'ret' void (',' !dbg, !1)*
+/// ::= 'ret' TypeAndValue (',' !dbg, !1)*
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ SMLoc TypeLoc = Lex.getLoc();
+ Type *Ty = 0;
+ if (ParseType(Ty, true /*void allowed*/)) return true;
+
+ Type *ResType = PFS.getFunction().getReturnType();
+
+ if (Ty->isVoidTy()) {
+ if (!ResType->isVoidTy())
+ return Error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
+
+ Inst = ReturnInst::Create(Context);
+ return false;
+ }
+
+ Value *RV;
+ if (ParseValue(Ty, RV, PFS)) return true;
+
+ if (ResType != RV->getType())
+ return Error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
+
+ Inst = ReturnInst::Create(Context, RV);
+ return false;
+}
+
+
+/// ParseBr
+/// ::= 'br' TypeAndValue
+/// ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc, Loc2;
+ Value *Op0;
+ BasicBlock *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
+ Inst = BranchInst::Create(BB);
+ return false;
+ }
+
+ if (Op0->getType() != Type::getInt1Ty(Context))
+ return Error(Loc, "branch condition must have 'i1' type");
+
+ if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
+ ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after true destination") ||
+ ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+ return true;
+
+ Inst = BranchInst::Create(Op1, Op2, Op0);
+ return false;
+}
+
+/// ParseSwitch
+/// Instruction
+/// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
+/// JumpTable
+/// ::= (TypeAndValue ',' TypeAndValue)*
+bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CondLoc, BBLoc;
+ Value *Cond;
+ BasicBlock *DefaultBB;
+ if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after switch condition") ||
+ ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
+ ParseToken(lltok::lsquare, "expected '[' with switch table"))
+ return true;
+
+ if (!Cond->getType()->isIntegerTy())
+ return Error(CondLoc, "switch condition must have integer type");
+
+ // Parse the jump table pairs.
+ SmallPtrSet<Value*, 32> SeenCases;
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
+ while (Lex.getKind() != lltok::rsquare) {
+ Value *Constant;
+ BasicBlock *DestBB;
+
+ if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after case value") ||
+ ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+
+ if (!SeenCases.insert(Constant))
+ return Error(CondLoc, "duplicate case value in switch");
+ if (!isa<ConstantInt>(Constant))
+ return Error(CondLoc, "case value is not a constant integer");
+
+ Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
+ }
+
+ Lex.Lex(); // Eat the ']'.
+
+ SwitchInst *SI = SwitchInst::Create(Cond, DefaultBB, Table.size());
+ for (unsigned i = 0, e = Table.size(); i != e; ++i)
+ SI->addCase(Table[i].first, Table[i].second);
+ Inst = SI;
+ return false;
+}
+
+/// ParseIndirectBr
+/// Instruction
+/// ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
+bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy AddrLoc;
+ Value *Address;
+ if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
+ ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+ return true;
+
+ if (!Address->getType()->isPointerTy())
+ return Error(AddrLoc, "indirectbr address must have pointer type");
+
+ // Parse the destination list.
+ SmallVector<BasicBlock*, 16> DestList;
+
+ if (Lex.getKind() != lltok::rsquare) {
+ BasicBlock *DestBB;
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ DestList.push_back(DestBB);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ DestList.push_back(DestBB);
+ }
+ }
+
+ if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+ return true;
+
+ IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
+ for (unsigned i = 0, e = DestList.size(); i != e; ++i)
+ IBI->addDestination(DestList[i]);
+ Inst = IBI;
+ return false;
+}
+
+
+/// ParseInvoke
+/// ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
+/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CallLoc = Lex.getLoc();
+ AttrBuilder RetAttrs, FnAttrs;
+ std::vector<unsigned> FwdRefAttrGrps;
+ LocTy NoBuiltinLoc;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+
+ BasicBlock *NormalBB, *UnwindBB;
+ if (ParseOptionalCallingConv(CC) ||
+ ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+ NoBuiltinLoc) ||
+ ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
+ ParseTypeAndBasicBlock(NormalBB, PFS) ||
+ ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+ ParseTypeAndBasicBlock(UnwindBB, PFS))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ PointerType *PFTy = 0;
+ FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+ // Set up the Attribute for the function.
+ SmallVector<AttributeSet, 8> Attrs;
+ if (RetAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::ReturnIndex,
+ RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+ AttrBuilder B(ArgList[i].Attrs, i + 1);
+ Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+ }
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::FunctionIndex,
+ FnAttrs));
+
+ // Finish off the Attribute and check them
+ AttributeSet PAL = AttributeSet::get(Context, Attrs);
+
+ InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
+ II->setCallingConv(CC);
+ II->setAttributes(PAL);
+ ForwardRefAttrGroups[II] = FwdRefAttrGrps;
+ Inst = II;
+ return false;
+}
+
+/// ParseResume
+/// ::= 'resume' TypeAndValue
+bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Exn; LocTy ExnLoc;
+ if (ParseTypeAndValue(Exn, ExnLoc, PFS))
+ return true;
+
+ ResumeInst *RI = ResumeInst::Create(Exn);
+ Inst = RI;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Binary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseArithmetic
+/// ::= ArithmeticOps TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc, unsigned OperandType) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ bool Valid;
+ switch (OperandType) {
+ default: llvm_unreachable("Unknown operand type!");
+ case 0: // int or FP.
+ Valid = LHS->getType()->isIntOrIntVectorTy() ||
+ LHS->getType()->isFPOrFPVectorTy();
+ break;
+ case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+ case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
+ }
+
+ if (!Valid)
+ return Error(Loc, "invalid operand type for instruction");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+/// ParseLogical
+/// ::= ArithmeticOps TypeAndValue ',' Value {
+bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in logical operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (!LHS->getType()->isIntOrIntVectorTy())
+ return Error(Loc,"instruction requires integer or integer vector operands");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+
+/// ParseCompare
+/// ::= 'icmp' IPredicates TypeAndValue ',' Value
+/// ::= 'fcmp' FPredicates TypeAndValue ',' Value
+bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ // Parse the integer/fp comparison predicate.
+ LocTy Loc;
+ unsigned Pred;
+ Value *LHS, *RHS;
+ if (ParseCmpPredicate(Pred, Opc) ||
+ ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after compare value") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (Opc == Instruction::FCmp) {
+ if (!LHS->getType()->isFPOrFPVectorTy())
+ return Error(Loc, "fcmp requires floating point operands");
+ Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ } else {
+ assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
+ if (!LHS->getType()->isIntOrIntVectorTy() &&
+ !LHS->getType()->getScalarType()->isPointerTy())
+ return Error(Loc, "icmp requires integer operands");
+ Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Instructions.
+//===----------------------------------------------------------------------===//
+
+
+/// ParseCast
+/// ::= CastOpc TypeAndValue 'to' Type
+bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc;
+ Value *Op;
+ Type *DestTy = 0;
+ if (ParseTypeAndValue(Op, Loc, PFS) ||
+ ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
+ ParseType(DestTy))
+ return true;
+
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
+ CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
+ return Error(Loc, "invalid cast opcode for cast from '" +
+ getTypeString(Op->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
+ }
+ Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
+ return false;
+}
+
+/// ParseSelect
+/// ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select condition") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
+ return Error(Loc, Reason);
+
+ Inst = SelectInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseVA_Arg
+/// ::= 'va_arg' TypeAndValue ',' Type
+bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Op;
+ Type *EltTy = 0;
+ LocTy TypeLoc;
+ if (ParseTypeAndValue(Op, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
+ ParseType(EltTy, TypeLoc))
+ return true;
+
+ if (!EltTy->isFirstClassType())
+ return Error(TypeLoc, "va_arg requires operand with first class type");
+
+ Inst = new VAArgInst(Op, EltTy);
+ return false;
+}
+
+/// ParseExtractElement
+/// ::= 'extractelement' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after extract value") ||
+ ParseTypeAndValue(Op1, PFS))
+ return true;
+
+ if (!ExtractElementInst::isValidOperands(Op0, Op1))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = ExtractElementInst::Create(Op0, Op1);
+ return false;
+}
+
+/// ParseInsertElement
+/// ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid insertelement operands");
+
+ Inst = InsertElementInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseShuffleVector
+/// ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid shufflevector operands");
+
+ Inst = new ShuffleVectorInst(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParsePHI
+/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
+int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+ Type *Ty = 0; LocTy TypeLoc;
+ Value *Op0, *Op1;
+
+ if (ParseType(Ty, TypeLoc) ||
+ ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+
+ bool AteExtraComma = false;
+ SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
+ while (1) {
+ PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
+
+ if (!EatIfPresent(lltok::comma))
+ break;
+
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ break;
+ }
+
+ if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+ }
+
+ if (!Ty->isFirstClassType())
+ return Error(TypeLoc, "phi node must have first class type");
+
+ PHINode *PN = PHINode::Create(Ty, PHIVals.size());
+ for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
+ PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
+ Inst = PN;
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseLandingPad
+/// ::= 'landingpad' Type 'personality' TypeAndValue 'cleanup'? Clause+
+/// Clause
+/// ::= 'catch' TypeAndValue
+/// ::= 'filter'
+/// ::= 'filter' TypeAndValue ( ',' TypeAndValue )*
+bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
+ Type *Ty = 0; LocTy TyLoc;
+ Value *PersFn; LocTy PersFnLoc;
+
+ if (ParseType(Ty, TyLoc) ||
+ ParseToken(lltok::kw_personality, "expected 'personality'") ||
+ ParseTypeAndValue(PersFn, PersFnLoc, PFS))
+ return true;
+
+ LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0);
+ LP->setCleanup(EatIfPresent(lltok::kw_cleanup));
+
+ while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){
+ LandingPadInst::ClauseType CT;
+ if (EatIfPresent(lltok::kw_catch))
+ CT = LandingPadInst::Catch;
+ else if (EatIfPresent(lltok::kw_filter))
+ CT = LandingPadInst::Filter;
+ else
+ return TokError("expected 'catch' or 'filter' clause type");
+
+ Value *V; LocTy VLoc;
+ if (ParseTypeAndValue(V, VLoc, PFS)) {
+ delete LP;
+ return true;
+ }
+
+ // A 'catch' type expects a non-array constant. A filter clause expects an
+ // array constant.
+ if (CT == LandingPadInst::Catch) {
+ if (isa<ArrayType>(V->getType()))
+ Error(VLoc, "'catch' clause has an invalid type");
+ } else {
+ if (!isa<ArrayType>(V->getType()))
+ Error(VLoc, "'filter' clause has an invalid type");
+ }
+
+ LP->addClause(V);
+ }
+
+ Inst = LP;
+ return false;
+}
+
+/// ParseCall
+/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ParameterList OptionalAttrs
+bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+ bool isTail) {
+ AttrBuilder RetAttrs, FnAttrs;
+ std::vector<unsigned> FwdRefAttrGrps;
+ LocTy NoBuiltinLoc;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+ LocTy CallLoc = Lex.getLoc();
+
+ if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+ NoBuiltinLoc))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ PointerType *PFTy = 0;
+ FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+ // Set up the Attribute for the function.
+ SmallVector<AttributeSet, 8> Attrs;
+ if (RetAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::ReturnIndex,
+ RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+ AttrBuilder B(ArgList[i].Attrs, i + 1);
+ Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+ }
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs.hasAttributes())
+ Attrs.push_back(AttributeSet::get(RetType->getContext(),
+ AttributeSet::FunctionIndex,
+ FnAttrs));
+
+ // Finish off the Attribute and check them
+ AttributeSet PAL = AttributeSet::get(Context, Attrs);
+
+ CallInst *CI = CallInst::Create(Callee, Args);
+ CI->setTailCall(isTail);
+ CI->setCallingConv(CC);
+ CI->setAttributes(PAL);
+ ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
+ Inst = CI;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseAlloc
+/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Size = 0;
+ LocTy SizeLoc;
+ unsigned Alignment = 0;
+ Type *Ty = 0;
+ if (ParseType(Ty)) return true;
+
+ bool AteExtraComma = false;
+ if (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::kw_align) {
+ if (ParseOptionalAlignment(Alignment)) return true;
+ } else if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ } else {
+ if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+ }
+ }
+
+ if (Size && !Size->getType()->isIntegerTy())
+ return Error(SizeLoc, "element count must have integer type");
+
+ Inst = new AllocaInst(Ty, Size, Alignment);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseLoad
+/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
+/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
+/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy Loc;
+ unsigned Alignment = 0;
+ bool AteExtraComma = false;
+ bool isAtomic = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+
+ if (Lex.getKind() == lltok::kw_atomic) {
+ isAtomic = true;
+ Lex.Lex();
+ }
+
+ bool isVolatile = false;
+ if (Lex.getKind() == lltok::kw_volatile) {
+ isVolatile = true;
+ Lex.Lex();
+ }
+
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+
+ if (!Val->getType()->isPointerTy() ||
+ !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+ return Error(Loc, "load operand must be a pointer to a first class type");
+ if (isAtomic && !Alignment)
+ return Error(Loc, "atomic load must have explicit non-zero alignment");
+ if (Ordering == Release || Ordering == AcquireRelease)
+ return Error(Loc, "atomic load cannot use Release ordering");
+
+ Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseStore
+
+/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val, *Ptr; LocTy Loc, PtrLoc;
+ unsigned Alignment = 0;
+ bool AteExtraComma = false;
+ bool isAtomic = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+
+ if (Lex.getKind() == lltok::kw_atomic) {
+ isAtomic = true;
+ Lex.Lex();
+ }
+
+ bool isVolatile = false;
+ if (Lex.getKind() == lltok::kw_volatile) {
+ isVolatile = true;
+ Lex.Lex();
+ }
+
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after store operand") ||
+ ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "store operand must be a pointer");
+ if (!Val->getType()->isFirstClassType())
+ return Error(Loc, "store operand must be a first class value");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+ return Error(Loc, "stored value and pointer type do not match");
+ if (isAtomic && !Alignment)
+ return Error(Loc, "atomic store must have explicit non-zero alignment");
+ if (Ordering == Acquire || Ordering == AcquireRelease)
+ return Error(Loc, "atomic store cannot use Acquire ordering");
+
+ Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseCmpXchg
+/// ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering
+int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
+ bool AteExtraComma = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ bool isVolatile = false;
+
+ if (EatIfPresent(lltok::kw_volatile))
+ isVolatile = true;
+
+ if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after cmpxchg address") ||
+ ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
+ ParseTypeAndValue(New, NewLoc, PFS) ||
+ ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("cmpxchg cannot be unordered");
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "cmpxchg operand must be a pointer");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType())
+ return Error(CmpLoc, "compare value and pointer type do not match");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != New->getType())
+ return Error(NewLoc, "new value and pointer type do not match");
+ if (!New->getType()->isIntegerTy())
+ return Error(NewLoc, "cmpxchg operand must be an integer");
+ unsigned Size = New->getType()->getPrimitiveSizeInBits();
+ if (Size < 8 || (Size & (Size - 1)))
+ return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized"
+ " integer");
+
+ AtomicCmpXchgInst *CXI =
+ new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, Scope);
+ CXI->setVolatile(isVolatile);
+ Inst = CXI;
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseAtomicRMW
+/// ::= 'atomicrmw' 'volatile'? BinOp TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering
+int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Val; LocTy PtrLoc, ValLoc;
+ bool AteExtraComma = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ bool isVolatile = false;
+ AtomicRMWInst::BinOp Operation;
+
+ if (EatIfPresent(lltok::kw_volatile))
+ isVolatile = true;
+
+ switch (Lex.getKind()) {
+ default: return TokError("expected binary operation in atomicrmw");
+ case lltok::kw_xchg: Operation = AtomicRMWInst::Xchg; break;
+ case lltok::kw_add: Operation = AtomicRMWInst::Add; break;
+ case lltok::kw_sub: Operation = AtomicRMWInst::Sub; break;
+ case lltok::kw_and: Operation = AtomicRMWInst::And; break;
+ case lltok::kw_nand: Operation = AtomicRMWInst::Nand; break;
+ case lltok::kw_or: Operation = AtomicRMWInst::Or; break;
+ case lltok::kw_xor: Operation = AtomicRMWInst::Xor; break;
+ case lltok::kw_max: Operation = AtomicRMWInst::Max; break;
+ case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
+ case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
+ case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+ }
+ Lex.Lex(); // Eat the operation.
+
+ if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after atomicrmw address") ||
+ ParseTypeAndValue(Val, ValLoc, PFS) ||
+ ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("atomicrmw cannot be unordered");
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "atomicrmw operand must be a pointer");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+ return Error(ValLoc, "atomicrmw value and pointer type do not match");
+ if (!Val->getType()->isIntegerTy())
+ return Error(ValLoc, "atomicrmw operand must be an integer");
+ unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+ if (Size < 8 || (Size & (Size - 1)))
+ return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
+ " integer");
+
+ AtomicRMWInst *RMWI =
+ new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope);
+ RMWI->setVolatile(isVolatile);
+ Inst = RMWI;
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseFence
+/// ::= 'fence' 'singlethread'? AtomicOrdering
+int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("fence cannot be unordered");
+ if (Ordering == Monotonic)
+ return TokError("fence cannot be monotonic");
+
+ Inst = new FenceInst(Context, Ordering, Scope);
+ return InstNormal;
+}
+
+/// ParseGetElementPtr
+/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
+int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr = 0;
+ Value *Val = 0;
+ LocTy Loc, EltLoc;
+
+ bool InBounds = EatIfPresent(lltok::kw_inbounds);
+
+ if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+
+ if (!Ptr->getType()->getScalarType()->isPointerTy())
+ return Error(Loc, "base of getelementptr must be a pointer");
+
+ SmallVector<Value*, 16> Indices;
+ bool AteExtraComma = false;
+ while (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ break;
+ }
+ if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+ if (!Val->getType()->getScalarType()->isIntegerTy())
+ return Error(EltLoc, "getelementptr index must be an integer");
+ if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
+ return Error(EltLoc, "getelementptr index type missmatch");
+ if (Val->getType()->isVectorTy()) {
+ unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
+ unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
+ if (ValNumEl != PtrNumEl)
+ return Error(EltLoc,
+ "getelementptr vector index has a wrong number of elements");
+ }
+ Indices.push_back(Val);
+ }
+
+ if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
+ return Error(Loc, "invalid getelementptr indices");
+ Inst = GetElementPtrInst::Create(Ptr, Indices);
+ if (InBounds)
+ cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseExtractValue
+/// ::= 'extractvalue' TypeAndValue (',' uint32)+
+int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy Loc;
+ SmallVector<unsigned, 4> Indices;
+ bool AteExtraComma;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseIndexList(Indices, AteExtraComma))
+ return true;
+
+ if (!Val->getType()->isAggregateType())
+ return Error(Loc, "extractvalue operand must be aggregate type");
+
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+ return Error(Loc, "invalid indices for extractvalue");
+ Inst = ExtractValueInst::Create(Val, Indices);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseInsertValue
+/// ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
+int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val0, *Val1; LocTy Loc0, Loc1;
+ SmallVector<unsigned, 4> Indices;
+ bool AteExtraComma;
+ if (ParseTypeAndValue(Val0, Loc0, PFS) ||
+ ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
+ ParseTypeAndValue(Val1, Loc1, PFS) ||
+ ParseIndexList(Indices, AteExtraComma))
+ return true;
+
+ if (!Val0->getType()->isAggregateType())
+ return Error(Loc0, "insertvalue operand must be aggregate type");
+
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+ return Error(Loc0, "invalid indices for insertvalue");
+ Inst = InsertValueInst::Create(Val0, Val1, Indices);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+//===----------------------------------------------------------------------===//
+// Embedded metadata.
+//===----------------------------------------------------------------------===//
+
+/// ParseMDNodeVector
+/// ::= Element (',' Element)*
+/// Element
+/// ::= 'null' | TypeAndValue
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
+ PerFunctionState *PFS) {
+ // Check for an empty list.
+ if (Lex.getKind() == lltok::rbrace)
+ return false;
+
+ do {
+ // Null is a special case since it is typeless.
+ if (EatIfPresent(lltok::kw_null)) {
+ Elts.push_back(0);
+ continue;
+ }
+
+ Value *V = 0;
+ if (ParseTypeAndValue(V, PFS)) return true;
+ Elts.push_back(V);
+ } while (EatIfPresent(lltok::comma));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
new file mode 100644
index 000000000000..1f2879e948d9
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -0,0 +1,410 @@
+//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
+
+#include "LLLexer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ValueHandle.h"
+#include <map>
+
+namespace llvm {
+ class Module;
+ class OpaqueType;
+ class Function;
+ class Value;
+ class BasicBlock;
+ class Instruction;
+ class Constant;
+ class GlobalValue;
+ class MDString;
+ class MDNode;
+ class StructType;
+
+ /// ValID - Represents a reference of a definition of some sort with no type.
+ /// There are several cases where we have to parse the value but where the
+ /// type can depend on later context. This may either be a numeric reference
+ /// or a symbolic (%var) reference. This is just a discriminated union.
+ struct ValID {
+ enum {
+ t_LocalID, t_GlobalID, // ID in UIntVal.
+ t_LocalName, t_GlobalName, // Name in StrVal.
+ t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
+ t_Null, t_Undef, t_Zero, // No value.
+ t_EmptyArray, // No value: []
+ t_Constant, // Value in ConstantVal.
+ t_InlineAsm, // Value in StrVal/StrVal2/UIntVal.
+ t_MDNode, // Value in MDNodeVal.
+ t_MDString, // Value in MDStringVal.
+ t_ConstantStruct, // Value in ConstantStructElts.
+ t_PackedConstantStruct // Value in ConstantStructElts.
+ } Kind;
+
+ LLLexer::LocTy Loc;
+ unsigned UIntVal;
+ std::string StrVal, StrVal2;
+ APSInt APSIntVal;
+ APFloat APFloatVal;
+ Constant *ConstantVal;
+ MDNode *MDNodeVal;
+ MDString *MDStringVal;
+ Constant **ConstantStructElts;
+
+ ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
+ ~ValID() {
+ if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
+ delete [] ConstantStructElts;
+ }
+
+ bool operator<(const ValID &RHS) const {
+ if (Kind == t_LocalID || Kind == t_GlobalID)
+ return UIntVal < RHS.UIntVal;
+ assert((Kind == t_LocalName || Kind == t_GlobalName ||
+ Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
+ "Ordering not defined for this ValID kind yet");
+ return StrVal < RHS.StrVal;
+ }
+ };
+
+ class LLParser {
+ public:
+ typedef LLLexer::LocTy LocTy;
+ private:
+ LLVMContext &Context;
+ LLLexer Lex;
+ Module *M;
+
+ // Instruction metadata resolution. Each instruction can have a list of
+ // MDRef info associated with them.
+ //
+ // The simpler approach of just creating temporary MDNodes and then calling
+ // RAUW on them when the definition is processed doesn't work because some
+ // instruction metadata kinds, such as dbg, get stored in the IR in an
+ // "optimized" format which doesn't participate in the normal value use
+ // lists. This means that RAUW doesn't work, even on temporary MDNodes
+ // which otherwise support RAUW. Instead, we defer resolving MDNode
+ // references until the definitions have been processed.
+ struct MDRef {
+ SMLoc Loc;
+ unsigned MDKind, MDSlot;
+ };
+ DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
+
+ // Type resolution handling data structures. The location is set when we
+ // have processed a use of the type but not a definition yet.
+ StringMap<std::pair<Type*, LocTy> > NamedTypes;
+ std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+
+ std::vector<TrackingVH<MDNode> > NumberedMetadata;
+ std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
+
+ // Global Value reference information.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
+ std::vector<GlobalValue*> NumberedVals;
+
+ // References to blockaddress. The key is the function ValID, the value is
+ // a list of references to blocks in that function.
+ std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
+ ForwardRefBlockAddresses;
+
+ // Attribute builder reference information.
+ std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
+ std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
+
+ public:
+ LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
+ Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
+ M(m) {}
+ bool Run();
+
+ LLVMContext &getContext() { return Context; }
+
+ private:
+
+ bool Error(LocTy L, const Twine &Msg) const {
+ return Lex.Error(L, Msg);
+ }
+ bool TokError(const Twine &Msg) const {
+ return Error(Lex.getLoc(), Msg);
+ }
+
+ /// GetGlobalVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
+
+ // Helper Routines.
+ bool ParseToken(lltok::Kind T, const char *ErrMsg);
+ bool EatIfPresent(lltok::Kind T) {
+ if (Lex.getKind() != T) return false;
+ Lex.Lex();
+ return true;
+ }
+
+ FastMathFlags EatFastMathFlagsIfPresent() {
+ FastMathFlags FMF;
+ while (true)
+ switch (Lex.getKind()) {
+ case lltok::kw_fast: FMF.setUnsafeAlgebra(); Lex.Lex(); continue;
+ case lltok::kw_nnan: FMF.setNoNaNs(); Lex.Lex(); continue;
+ case lltok::kw_ninf: FMF.setNoInfs(); Lex.Lex(); continue;
+ case lltok::kw_nsz: FMF.setNoSignedZeros(); Lex.Lex(); continue;
+ case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
+ default: return FMF;
+ }
+ return FMF;
+ }
+
+ bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
+ if (Lex.getKind() != T) {
+ Present = false;
+ } else {
+ if (Loc)
+ *Loc = Lex.getLoc();
+ Lex.Lex();
+ Present = true;
+ }
+ return false;
+ }
+ bool ParseStringConstant(std::string &Result);
+ bool ParseUInt32(unsigned &Val);
+ bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+ Loc = Lex.getLoc();
+ return ParseUInt32(Val);
+ }
+
+ bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
+ bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
+ bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+ bool ParseOptionalParamAttrs(AttrBuilder &B);
+ bool ParseOptionalReturnAttrs(AttrBuilder &B);
+ bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
+ bool ParseOptionalLinkage(unsigned &Linkage) {
+ bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
+ }
+ bool ParseOptionalVisibility(unsigned &Visibility);
+ bool ParseOptionalCallingConv(CallingConv::ID &CC);
+ bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+ AtomicOrdering &Ordering);
+ bool ParseOptionalStackAlignment(unsigned &Alignment);
+ bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+ bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
+ bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+ bool AteExtraComma;
+ if (ParseIndexList(Indices, AteExtraComma)) return true;
+ if (AteExtraComma)
+ return TokError("expected index");
+ return false;
+ }
+
+ // Top-Level Entities
+ bool ParseTopLevelEntities();
+ bool ValidateEndOfModule();
+ bool ParseTargetDefinition();
+ bool ParseModuleAsm();
+ bool ParseDepLibs(); // FIXME: Remove in 4.0.
+ bool ParseUnnamedType();
+ bool ParseNamedType();
+ bool ParseDeclare();
+ bool ParseDefine();
+
+ bool ParseGlobalType(bool &IsConstant);
+ bool ParseUnnamedGlobal();
+ bool ParseNamedGlobal();
+ bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
+ bool HasLinkage, unsigned Visibility);
+ bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+ bool ParseStandaloneMetadata();
+ bool ParseNamedMetadata();
+ bool ParseMDString(MDString *&Result);
+ bool ParseMDNodeID(MDNode *&Result);
+ bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
+ bool ParseUnnamedAttrGrp();
+ bool ParseFnAttributeValuePairs(AttrBuilder &B,
+ std::vector<unsigned> &FwdRefAttrGrps,
+ bool inAttrGrp, LocTy &NoBuiltinLoc);
+
+ // Type Parsing.
+ bool ParseType(Type *&Result, bool AllowVoid = false);
+ bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
+ Loc = Lex.getLoc();
+ return ParseType(Result, AllowVoid);
+ }
+ bool ParseAnonStructType(Type *&Result, bool Packed);
+ bool ParseStructBody(SmallVectorImpl<Type*> &Body);
+ bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type*, LocTy> &Entry,
+ Type *&ResultTy);
+
+ bool ParseArrayVectorType(Type *&Result, bool isVector);
+ bool ParseFunctionType(Type *&Result);
+
+ // Function Semantic Analysis.
+ class PerFunctionState {
+ LLParser &P;
+ Function &F;
+ std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
+ std::vector<Value*> NumberedVals;
+
+ /// FunctionNumber - If this is an unnamed function, this is the slot
+ /// number of it, otherwise it is -1.
+ int FunctionNumber;
+ public:
+ PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
+ ~PerFunctionState();
+
+ Function &getFunction() const { return F; }
+
+ bool FinishFunction();
+
+ /// GetVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
+ Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
+
+ /// SetInstName - After an instruction is parsed and inserted into its
+ /// basic block, this installs its name.
+ bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+ Instruction *Inst);
+
+ /// GetBB - Get a basic block with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// is not a BasicBlock.
+ BasicBlock *GetBB(const std::string &Name, LocTy Loc);
+ BasicBlock *GetBB(unsigned ID, LocTy Loc);
+
+ /// DefineBB - Define the specified basic block, which is either named or
+ /// unnamed. If there is an error, this returns null otherwise it returns
+ /// the block being defined.
+ BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+ };
+
+ bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState *PFS);
+
+ bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
+ bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
+ return ParseValue(Ty, V, &PFS);
+ }
+ bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
+ PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseValue(Ty, V, &PFS);
+ }
+
+ bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
+ bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+ return ParseTypeAndValue(V, &PFS);
+ }
+ bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseTypeAndValue(V, PFS);
+ }
+ bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+ PerFunctionState &PFS);
+ bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+ LocTy Loc;
+ return ParseTypeAndBasicBlock(BB, Loc, PFS);
+ }
+
+
+ struct ParamInfo {
+ LocTy Loc;
+ Value *V;
+ AttributeSet Attrs;
+ ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
+ : Loc(loc), V(v), Attrs(attrs) {}
+ };
+ bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS);
+
+ // Constant Parsing.
+ bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+ bool ParseGlobalValue(Type *Ty, Constant *&V);
+ bool ParseGlobalTypeAndValue(Constant *&V);
+ bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
+ bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
+ bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+ bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
+
+ // Function Parsing.
+ struct ArgInfo {
+ LocTy Loc;
+ Type *Ty;
+ AttributeSet Attrs;
+ std::string Name;
+ ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
+ : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
+ };
+ bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
+ bool ParseFunctionHeader(Function *&Fn, bool isDefine);
+ bool ParseFunctionBody(Function &Fn);
+ bool ParseBasicBlock(PerFunctionState &PFS);
+
+ // Instruction Parsing. Each instruction parsing routine can return with a
+ // normal result, an error result, or return having eaten an extra comma.
+ enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
+ int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS);
+ bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
+
+ bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+ bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
+
+ bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
+ unsigned OperandType);
+ bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
+ bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
+ bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
+ int ParsePHI(Instruction *&I, PerFunctionState &PFS);
+ bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
+ bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+ int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
+ int ParseLoad(Instruction *&I, PerFunctionState &PFS);
+ int ParseStore(Instruction *&I, PerFunctionState &PFS);
+ int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
+ int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
+ int ParseFence(Instruction *&I, PerFunctionState &PFS);
+ int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
+ int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
+ int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+
+ bool ResolveForwardRefBlockAddresses(Function *TheFn,
+ std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+ PerFunctionState *PFS);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
new file mode 100644
index 000000000000..cd25ba30008f
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -0,0 +1,181 @@
+//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enums for the .ll lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBS_ASMPARSER_LLTOKEN_H
+#define LIBS_ASMPARSER_LLTOKEN_H
+
+namespace llvm {
+namespace lltok {
+ enum Kind {
+ // Markers
+ Eof, Error,
+
+ // Tokens with no info.
+ dotdotdot, // ...
+ equal, comma, // = ,
+ star, // *
+ lsquare, rsquare, // [ ]
+ lbrace, rbrace, // { }
+ less, greater, // < >
+ lparen, rparen, // ( )
+ backslash, // \ (not /)
+ exclaim, // !
+ hash, // #
+
+ kw_x,
+ kw_true, kw_false,
+ kw_declare, kw_define,
+ kw_global, kw_constant,
+
+ kw_private, kw_linker_private, kw_linker_private_weak,
+ kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility.
+ kw_internal,
+ kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide,
+ kw_weak, kw_weak_odr, kw_appending,
+ kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
+ kw_default, kw_hidden, kw_protected,
+ kw_unnamed_addr,
+ kw_externally_initialized,
+ kw_extern_weak,
+ kw_external, kw_thread_local,
+ kw_localdynamic, kw_initialexec, kw_localexec,
+ kw_zeroinitializer,
+ kw_undef, kw_null,
+ kw_to,
+ kw_tail,
+ kw_target,
+ kw_triple,
+ kw_unwind,
+ kw_deplibs, // FIXME: Remove in 4.0
+ kw_datalayout,
+ kw_volatile,
+ kw_atomic,
+ kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst,
+ kw_singlethread,
+ kw_nnan,
+ kw_ninf,
+ kw_nsz,
+ kw_arcp,
+ kw_fast,
+ kw_nuw,
+ kw_nsw,
+ kw_exact,
+ kw_inbounds,
+ kw_align,
+ kw_addrspace,
+ kw_section,
+ kw_alias,
+ kw_module,
+ kw_asm,
+ kw_sideeffect,
+ kw_alignstack,
+ kw_inteldialect,
+ kw_gc,
+ kw_c,
+
+ kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+ kw_intel_ocl_bicc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+ kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+ kw_msp430_intrcc,
+ kw_ptx_kernel, kw_ptx_device,
+ kw_spir_kernel, kw_spir_func,
+
+ // Attributes:
+ kw_attributes,
+ kw_alwaysinline,
+ kw_sanitize_address,
+ kw_byval,
+ kw_inlinehint,
+ kw_inreg,
+ kw_minsize,
+ kw_naked,
+ kw_nest,
+ kw_noalias,
+ kw_nobuiltin,
+ kw_nocapture,
+ kw_noduplicate,
+ kw_noimplicitfloat,
+ kw_noinline,
+ kw_nonlazybind,
+ kw_noredzone,
+ kw_noreturn,
+ kw_nounwind,
+ kw_optsize,
+ kw_readnone,
+ kw_readonly,
+ kw_returns_twice,
+ kw_signext,
+ kw_ssp,
+ kw_sspreq,
+ kw_sspstrong,
+ kw_sret,
+ kw_sanitize_thread,
+ kw_sanitize_memory,
+ kw_uwtable,
+ kw_zeroext,
+
+ kw_type,
+ kw_opaque,
+
+ kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
+ kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
+ kw_ueq, kw_une,
+
+ // atomicrmw operations that aren't also instruction keywords.
+ kw_xchg, kw_nand, kw_max, kw_min, kw_umax, kw_umin,
+
+ // Instruction Opcodes (Opcode in UIntVal).
+ kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul,
+ kw_udiv, kw_sdiv, kw_fdiv,
+ kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr,
+ kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp,
+
+ kw_phi, kw_call,
+ kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
+ kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+ kw_select, kw_va_arg,
+
+ kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
+
+ kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume,
+ kw_unreachable,
+
+ kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
+ kw_getelementptr,
+
+ kw_extractelement, kw_insertelement, kw_shufflevector,
+ kw_extractvalue, kw_insertvalue, kw_blockaddress,
+
+ // Unsigned Valued tokens (UIntVal).
+ GlobalID, // @42
+ LocalVarID, // %42
+ AttrGrpID, // #42
+
+ // String valued tokens (StrVal).
+ LabelStr, // foo:
+ GlobalVar, // @foo @"foo"
+ LocalVar, // %foo %"foo"
+ MetadataVar, // !foo
+ StringConstant, // "foo"
+
+ // Type valued tokens (TyVal).
+ Type,
+
+ APFloat, // APFloatVal
+ APSInt // APSInt
+ };
+} // end namespace lltok
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
new file mode 100644
index 000000000000..bb4f03bacc17
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -0,0 +1,62 @@
+//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Parser.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "LLParser.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cstring>
+using namespace llvm;
+
+Module *llvm::ParseAssembly(MemoryBuffer *F,
+ Module *M,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(F, SMLoc());
+
+ // If we are parsing into an existing module, do it.
+ if (M)
+ return LLParser(F, SM, Err, M).Run() ? 0 : M;
+
+ // Otherwise create a new module.
+ OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
+ if (LLParser(F, SM, Err, M2.get()).Run())
+ return 0;
+ return M2.take();
+}
+
+Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return ParseAssembly(File.take(), 0, Err, Context);
+}
+
+Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
+ SMDiagnostic &Err, LLVMContext &Context) {
+ MemoryBuffer *F =
+ MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)),
+ "<string>");
+
+ return ParseAssembly(F, M, Err, Context);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
new file mode 100644
index 000000000000..5cd6c552bd8a
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -0,0 +1,88 @@
+//===-- BitReader.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitReader.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+#include <string>
+
+using namespace llvm;
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+ reference to the module via the OutModule parameter. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule, char **OutMessage) {
+ return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+ OutMessage);
+}
+
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule,
+ char **OutMessage) {
+ std::string Message;
+
+ *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutModule) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Reads a module from the specified path, returning via the OutModule parameter
+ a module provider which performs lazy deserialization. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM,
+ char **OutMessage) {
+ std::string Message;
+
+ *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutM) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+
+}
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+ reinterpret_cast<LLVMModuleRef*>(OutMP),
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+ OutMP, OutMessage);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
new file mode 100644
index 000000000000..f34884391a74
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -0,0 +1,3111 @@
+//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "BitcodeReader.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
+
+enum {
+ SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
+};
+
+void BitcodeReader::materializeForwardReferencedFunctions() {
+ while (!BlockAddrFwdRefs.empty()) {
+ Function *F = BlockAddrFwdRefs.begin()->first;
+ F->Materialize();
+ }
+}
+
+void BitcodeReader::FreeState() {
+ if (BufferOwned)
+ delete Buffer;
+ Buffer = 0;
+ std::vector<Type*>().swap(TypeList);
+ ValueList.clear();
+ MDValueList.clear();
+
+ std::vector<AttributeSet>().swap(MAttributes);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+ std::vector<Function*>().swap(FunctionsWithBodies);
+ DeferredFunctionInfo.clear();
+ MDKindMap.clear();
+
+ assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references");
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(ArrayRef<uint64_t> Record, unsigned Idx,
+ StrTy &Result) {
+ if (Idx > Record.size())
+ return true;
+
+ for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+ Result += (char)Record[i];
+ return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown/new linkages to external
+ case 0: return GlobalValue::ExternalLinkage;
+ case 1: return GlobalValue::WeakAnyLinkage;
+ case 2: return GlobalValue::AppendingLinkage;
+ case 3: return GlobalValue::InternalLinkage;
+ case 4: return GlobalValue::LinkOnceAnyLinkage;
+ case 5: return GlobalValue::DLLImportLinkage;
+ case 6: return GlobalValue::DLLExportLinkage;
+ case 7: return GlobalValue::ExternalWeakLinkage;
+ case 8: return GlobalValue::CommonLinkage;
+ case 9: return GlobalValue::PrivateLinkage;
+ case 10: return GlobalValue::WeakODRLinkage;
+ case 11: return GlobalValue::LinkOnceODRLinkage;
+ case 12: return GlobalValue::AvailableExternallyLinkage;
+ case 13: return GlobalValue::LinkerPrivateLinkage;
+ case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+ case 15: return GlobalValue::LinkOnceODRAutoHideLinkage;
+ }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown visibilities to default.
+ case 0: return GlobalValue::DefaultVisibility;
+ case 1: return GlobalValue::HiddenVisibility;
+ case 2: return GlobalValue::ProtectedVisibility;
+ }
+}
+
+static GlobalVariable::ThreadLocalMode GetDecodedThreadLocalMode(unsigned Val) {
+ switch (Val) {
+ case 0: return GlobalVariable::NotThreadLocal;
+ default: // Map unknown non-zero value to general dynamic.
+ case 1: return GlobalVariable::GeneralDynamicTLSModel;
+ case 2: return GlobalVariable::LocalDynamicTLSModel;
+ case 3: return GlobalVariable::InitialExecTLSModel;
+ case 4: return GlobalVariable::LocalExecTLSModel;
+ }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+ switch (Val) {
+ default: return -1;
+ case bitc::CAST_TRUNC : return Instruction::Trunc;
+ case bitc::CAST_ZEXT : return Instruction::ZExt;
+ case bitc::CAST_SEXT : return Instruction::SExt;
+ case bitc::CAST_FPTOUI : return Instruction::FPToUI;
+ case bitc::CAST_FPTOSI : return Instruction::FPToSI;
+ case bitc::CAST_UITOFP : return Instruction::UIToFP;
+ case bitc::CAST_SITOFP : return Instruction::SIToFP;
+ case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+ case bitc::CAST_FPEXT : return Instruction::FPExt;
+ case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+ case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+ case bitc::CAST_BITCAST : return Instruction::BitCast;
+ }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
+ switch (Val) {
+ default: return -1;
+ case bitc::BINOP_ADD:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+ case bitc::BINOP_SUB:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+ case bitc::BINOP_MUL:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+ case bitc::BINOP_UDIV: return Instruction::UDiv;
+ case bitc::BINOP_SDIV:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+ case bitc::BINOP_UREM: return Instruction::URem;
+ case bitc::BINOP_SREM:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+ case bitc::BINOP_SHL: return Instruction::Shl;
+ case bitc::BINOP_LSHR: return Instruction::LShr;
+ case bitc::BINOP_ASHR: return Instruction::AShr;
+ case bitc::BINOP_AND: return Instruction::And;
+ case bitc::BINOP_OR: return Instruction::Or;
+ case bitc::BINOP_XOR: return Instruction::Xor;
+ }
+}
+
+static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
+ switch (Val) {
+ default: return AtomicRMWInst::BAD_BINOP;
+ case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
+ case bitc::RMW_ADD: return AtomicRMWInst::Add;
+ case bitc::RMW_SUB: return AtomicRMWInst::Sub;
+ case bitc::RMW_AND: return AtomicRMWInst::And;
+ case bitc::RMW_NAND: return AtomicRMWInst::Nand;
+ case bitc::RMW_OR: return AtomicRMWInst::Or;
+ case bitc::RMW_XOR: return AtomicRMWInst::Xor;
+ case bitc::RMW_MAX: return AtomicRMWInst::Max;
+ case bitc::RMW_MIN: return AtomicRMWInst::Min;
+ case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
+ case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+ }
+}
+
+static AtomicOrdering GetDecodedOrdering(unsigned Val) {
+ switch (Val) {
+ case bitc::ORDERING_NOTATOMIC: return NotAtomic;
+ case bitc::ORDERING_UNORDERED: return Unordered;
+ case bitc::ORDERING_MONOTONIC: return Monotonic;
+ case bitc::ORDERING_ACQUIRE: return Acquire;
+ case bitc::ORDERING_RELEASE: return Release;
+ case bitc::ORDERING_ACQREL: return AcquireRelease;
+ default: // Map unknown orderings to sequentially-consistent.
+ case bitc::ORDERING_SEQCST: return SequentiallyConsistent;
+ }
+}
+
+static SynchronizationScope GetDecodedSynchScope(unsigned Val) {
+ switch (Val) {
+ case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread;
+ default: // Map unknown scopes to cross-thread.
+ case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread;
+ }
+}
+
+namespace llvm {
+namespace {
+ /// @brief A class for maintaining the slot number definition
+ /// as a placeholder for the actual definition for forward constants defs.
+ class ConstantPlaceHolder : public ConstantExpr {
+ void operator=(const ConstantPlaceHolder &) LLVM_DELETED_FUNCTION;
+ public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ explicit ConstantPlaceHolder(Type *Ty, LLVMContext& Context)
+ : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+ Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+ }
+
+ /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+ }
+
+
+ /// Provide fast operand accessors
+ //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+ };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> :
+ public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
+};
+}
+
+
+void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+ if (Idx == size()) {
+ push_back(V);
+ return;
+ }
+
+ if (Idx >= size())
+ resize(Idx+1);
+
+ WeakVH &OldV = ValuePtrs[Idx];
+ if (OldV == 0) {
+ OldV = V;
+ return;
+ }
+
+ // Handle constants and non-constants (e.g. instrs) differently for
+ // efficiency.
+ if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+ ResolveConstants.push_back(std::make_pair(PHC, Idx));
+ OldV = V;
+ } else {
+ // If there was a forward reference to this value, replace it.
+ Value *PrevVal = OldV;
+ OldV->replaceAllUsesWith(V);
+ delete PrevVal;
+ }
+}
+
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+ Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert(Ty == V->getType() && "Type mismatch in constant table!");
+ return cast<Constant>(V);
+ }
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Constant *C = new ConstantPlaceHolder(Ty, Context);
+ ValuePtrs[Idx] = C;
+ return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+ return V;
+ }
+
+ // No type specified, must be invalid reference.
+ if (Ty == 0) return 0;
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Value *V = new Argument(Ty);
+ ValuePtrs[Idx] = V;
+ return V;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references. The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants. Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant. Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+ // Sort the values by-pointer so that they are efficient to look up with a
+ // binary search.
+ std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+ SmallVector<Constant*, 64> NewOps;
+
+ while (!ResolveConstants.empty()) {
+ Value *RealVal = operator[](ResolveConstants.back().second);
+ Constant *Placeholder = ResolveConstants.back().first;
+ ResolveConstants.pop_back();
+
+ // Loop over all users of the placeholder, updating them to reference the
+ // new value. If they reference more than one placeholder, update them all
+ // at once.
+ while (!Placeholder->use_empty()) {
+ Value::use_iterator UI = Placeholder->use_begin();
+ User *U = *UI;
+
+ // If the using object isn't uniqued, just update the operands. This
+ // handles instructions and initializers for global variables.
+ if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+ UI.getUse().set(RealVal);
+ continue;
+ }
+
+ // Otherwise, we have a constant that uses the placeholder. Replace that
+ // constant with a new constant that has *all* placeholder uses updated.
+ Constant *UserC = cast<Constant>(U);
+ for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+ I != E; ++I) {
+ Value *NewOp;
+ if (!isa<ConstantPlaceHolder>(*I)) {
+ // Not a placeholder reference.
+ NewOp = *I;
+ } else if (*I == Placeholder) {
+ // Common case is that it just references this one placeholder.
+ NewOp = RealVal;
+ } else {
+ // Otherwise, look up the placeholder in ResolveConstants.
+ ResolveConstantsTy::iterator It =
+ std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+ std::pair<Constant*, unsigned>(cast<Constant>(*I),
+ 0));
+ assert(It != ResolveConstants.end() && It->first == *I);
+ NewOp = operator[](It->second);
+ }
+
+ NewOps.push_back(cast<Constant>(NewOp));
+ }
+
+ // Make the new constant.
+ Constant *NewC;
+ if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+ NewC = ConstantArray::get(UserCA->getType(), NewOps);
+ } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+ NewC = ConstantStruct::get(UserCS->getType(), NewOps);
+ } else if (isa<ConstantVector>(UserC)) {
+ NewC = ConstantVector::get(NewOps);
+ } else {
+ assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+ NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
+ }
+
+ UserC->replaceAllUsesWith(NewC);
+ UserC->destroyConstant();
+ NewOps.clear();
+ }
+
+ // Update all ValueHandles, they should be the only users at this point.
+ Placeholder->replaceAllUsesWith(RealVal);
+ delete Placeholder;
+ }
+}
+
+void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
+ if (Idx == size()) {
+ push_back(V);
+ return;
+ }
+
+ if (Idx >= size())
+ resize(Idx+1);
+
+ WeakVH &OldV = MDValuePtrs[Idx];
+ if (OldV == 0) {
+ OldV = V;
+ return;
+ }
+
+ // If there was a forward reference to this value, replace it.
+ MDNode *PrevVal = cast<MDNode>(OldV);
+ OldV->replaceAllUsesWith(V);
+ MDNode::deleteTemporary(PrevVal);
+ // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
+ // value for Idx.
+ MDValuePtrs[Idx] = V;
+}
+
+Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = MDValuePtrs[Idx]) {
+ assert(V->getType()->isMetadataTy() && "Type mismatch in value table!");
+ return V;
+ }
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ MDValuePtrs[Idx] = V;
+ return V;
+}
+
+Type *BitcodeReader::getTypeByID(unsigned ID) {
+ // The type table size is always specified correctly.
+ if (ID >= TypeList.size())
+ return 0;
+
+ if (Type *Ty = TypeList[ID])
+ return Ty;
+
+ // If we have a forward reference, the only possible case is when it is to a
+ // named struct. Just create a placeholder for now.
+ return TypeList[ID] = StructType::create(Context);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+
+/// \brief This fills an AttrBuilder object with the LLVM attributes that have
+/// been decoded from the given integer. This function must stay in sync with
+/// 'encodeLLVMAttributesForBitcode'.
+static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
+ uint64_t EncodedAttrs) {
+ // FIXME: Remove in 4.0.
+
+ // The alignment is stored as a 16-bit raw value from bits 31--16. We shift
+ // the bits above 31 down by 11 bits.
+ unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+ assert((!Alignment || isPowerOf2_32(Alignment)) &&
+ "Alignment must be a power of two.");
+
+ if (Alignment)
+ B.addAlignmentAttr(Alignment);
+ B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) |
+ (EncodedAttrs & 0xffff));
+}
+
+bool BitcodeReader::ParseAttributeBlock() {
+ if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+ return Error("Malformed block record");
+
+ if (!MAttributes.empty())
+ return Error("Multiple PARAMATTR blocks found!");
+
+ SmallVector<uint64_t, 64> Record;
+
+ SmallVector<AttributeSet, 8> Attrs;
+
+ // Read all the records.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("Error at end of PARAMATTR block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...]
+ // FIXME: Remove in 4.0.
+ if (Record.size() & 1)
+ return Error("Invalid ENTRY record");
+
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ AttrBuilder B;
+ decodeLLVMAttributesForBitcode(B, Record[i+1]);
+ Attrs.push_back(AttributeSet::get(Context, Record[i], B));
+ }
+
+ MAttributes.push_back(AttributeSet::get(Context, Attrs));
+ Attrs.clear();
+ break;
+ }
+ case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [attrgrp0, attrgrp1, ...]
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ Attrs.push_back(MAttributeGroups[Record[i]]);
+
+ MAttributes.push_back(AttributeSet::get(Context, Attrs));
+ Attrs.clear();
+ break;
+ }
+ }
+ }
+}
+
+bool BitcodeReader::ParseAttributeGroupBlock() {
+ if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
+ return Error("Malformed block record");
+
+ if (!MAttributeGroups.empty())
+ return Error("Multiple PARAMATTR_GROUP blocks found!");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("Error at end of PARAMATTR_GROUP block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
+ if (Record.size() < 3)
+ return Error("Invalid ENTRY record");
+
+ uint64_t GrpID = Record[0];
+ uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
+
+ AttrBuilder B;
+ for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+ if (Record[i] == 0) { // Enum attribute
+ B.addAttribute(Attribute::AttrKind(Record[++i]));
+ } else if (Record[i] == 1) { // Align attribute
+ if (Attribute::AttrKind(Record[++i]) == Attribute::Alignment)
+ B.addAlignmentAttr(Record[++i]);
+ else
+ B.addStackAlignmentAttr(Record[++i]);
+ } else { // String attribute
+ assert((Record[i] == 3 || Record[i] == 4) &&
+ "Invalid attribute group entry");
+ bool HasValue = (Record[i++] == 4);
+ SmallString<64> KindStr;
+ SmallString<64> ValStr;
+
+ while (Record[i] != 0 && i != e)
+ KindStr += Record[i++];
+ assert(Record[i] == 0 && "Kind string not null terminated");
+
+ if (HasValue) {
+ // Has a value associated with it.
+ ++i; // Skip the '0' that terminates the "kind" string.
+ while (Record[i] != 0 && i != e)
+ ValStr += Record[i++];
+ assert(Record[i] == 0 && "Value string not null terminated");
+ }
+
+ B.addAttribute(KindStr.str(), ValStr.str());
+ }
+ }
+
+ MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B);
+ break;
+ }
+ }
+ }
+}
+
+bool BitcodeReader::ParseTypeTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
+ return Error("Malformed block record");
+
+ return ParseTypeTableBody();
+}
+
+bool BitcodeReader::ParseTypeTableBody() {
+ if (!TypeList.empty())
+ return Error("Multiple TYPE_BLOCKs found!");
+
+ SmallVector<uint64_t, 64> Record;
+ unsigned NumRecords = 0;
+
+ SmallString<64> TypeName;
+
+ // Read all the records for this type table.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ Error("Error in the type table block");
+ return true;
+ case BitstreamEntry::EndBlock:
+ if (NumRecords != TypeList.size())
+ return Error("Invalid type forward reference in TYPE_BLOCK");
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ Type *ResultTy = 0;
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: return Error("unknown type in type table");
+ case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+ // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+ // type list. This allows us to reserve space.
+ if (Record.size() < 1)
+ return Error("Invalid TYPE_CODE_NUMENTRY record");
+ TypeList.resize(Record[0]);
+ continue;
+ case bitc::TYPE_CODE_VOID: // VOID
+ ResultTy = Type::getVoidTy(Context);
+ break;
+ case bitc::TYPE_CODE_HALF: // HALF
+ ResultTy = Type::getHalfTy(Context);
+ break;
+ case bitc::TYPE_CODE_FLOAT: // FLOAT
+ ResultTy = Type::getFloatTy(Context);
+ break;
+ case bitc::TYPE_CODE_DOUBLE: // DOUBLE
+ ResultTy = Type::getDoubleTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_FP80: // X86_FP80
+ ResultTy = Type::getX86_FP80Ty(Context);
+ break;
+ case bitc::TYPE_CODE_FP128: // FP128
+ ResultTy = Type::getFP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+ ResultTy = Type::getPPC_FP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_LABEL: // LABEL
+ ResultTy = Type::getLabelTy(Context);
+ break;
+ case bitc::TYPE_CODE_METADATA: // METADATA
+ ResultTy = Type::getMetadataTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_MMX: // X86_MMX
+ ResultTy = Type::getX86_MMXTy(Context);
+ break;
+ case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
+ if (Record.size() < 1)
+ return Error("Invalid Integer type record");
+
+ ResultTy = IntegerType::get(Context, Record[0]);
+ break;
+ case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+ // [pointee type, address space]
+ if (Record.size() < 1)
+ return Error("Invalid POINTER type record");
+ unsigned AddressSpace = 0;
+ if (Record.size() == 2)
+ AddressSpace = Record[1];
+ ResultTy = getTypeByID(Record[0]);
+ if (ResultTy == 0) return Error("invalid element type in pointer type");
+ ResultTy = PointerType::get(ResultTy, AddressSpace);
+ break;
+ }
+ case bitc::TYPE_CODE_FUNCTION_OLD: {
+ // FIXME: attrid is dead, remove it in LLVM 4.0
+ // FUNCTION: [vararg, attrid, retty, paramty x N]
+ if (Record.size() < 3)
+ return Error("Invalid FUNCTION type record");
+ SmallVector<Type*, 8> ArgTys;
+ for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ ArgTys.push_back(T);
+ else
+ break;
+ }
+
+ ResultTy = getTypeByID(Record[2]);
+ if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+ return Error("invalid type in function type");
+
+ ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_FUNCTION: {
+ // FUNCTION: [vararg, retty, paramty x N]
+ if (Record.size() < 2)
+ return Error("Invalid FUNCTION type record");
+ SmallVector<Type*, 8> ArgTys;
+ for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ ArgTys.push_back(T);
+ else
+ break;
+ }
+
+ ResultTy = getTypeByID(Record[1]);
+ if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+ return Error("invalid type in function type");
+
+ ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
+ if (Record.size() < 1)
+ return Error("Invalid STRUCT type record");
+ SmallVector<Type*, 8> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ EltTys.push_back(T);
+ else
+ break;
+ }
+ if (EltTys.size() != Record.size()-1)
+ return Error("invalid type in struct type");
+ ResultTy = StructType::get(Context, EltTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N]
+ if (ConvertToString(Record, 0, TypeName))
+ return Error("Invalid STRUCT_NAME record");
+ continue;
+
+ case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+ if (Record.size() < 1)
+ return Error("Invalid STRUCT type record");
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+
+ // Check to see if this was forward referenced, if so fill in the temp.
+ StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+ if (Res) {
+ Res->setName(TypeName);
+ TypeList[NumRecords] = 0;
+ } else // Otherwise, create a new struct.
+ Res = StructType::create(Context, TypeName);
+ TypeName.clear();
+
+ SmallVector<Type*, 8> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ EltTys.push_back(T);
+ else
+ break;
+ }
+ if (EltTys.size() != Record.size()-1)
+ return Error("invalid STRUCT type record");
+ Res->setBody(EltTys, Record[0]);
+ ResultTy = Res;
+ break;
+ }
+ case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: []
+ if (Record.size() != 1)
+ return Error("Invalid OPAQUE type record");
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+
+ // Check to see if this was forward referenced, if so fill in the temp.
+ StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+ if (Res) {
+ Res->setName(TypeName);
+ TypeList[NumRecords] = 0;
+ } else // Otherwise, create a new struct with no body.
+ Res = StructType::create(Context, TypeName);
+ TypeName.clear();
+ ResultTy = Res;
+ break;
+ }
+ case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid ARRAY type record");
+ if ((ResultTy = getTypeByID(Record[1])))
+ ResultTy = ArrayType::get(ResultTy, Record[0]);
+ else
+ return Error("Invalid ARRAY type element");
+ break;
+ case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid VECTOR type record");
+ if ((ResultTy = getTypeByID(Record[1])))
+ ResultTy = VectorType::get(ResultTy, Record[0]);
+ else
+ return Error("Invalid ARRAY type element");
+ break;
+ }
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+ assert(ResultTy && "Didn't read a type?");
+ assert(TypeList[NumRecords] == 0 && "Already read type?");
+ TypeList[NumRecords++] = ResultTy;
+ }
+}
+
+bool BitcodeReader::ParseValueSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ SmallString<128> ValueName;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("malformed value symbol table block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_ENTRY record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return Error("Invalid Value ID in VST_ENTRY record");
+ Value *V = ValueList[ValueID];
+
+ V->setName(StringRef(ValueName.data(), ValueName.size()));
+ ValueName.clear();
+ break;
+ }
+ case bitc::VST_CODE_BBENTRY: {
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_BBENTRY record");
+ BasicBlock *BB = getBasicBlock(Record[0]);
+ if (BB == 0)
+ return Error("Invalid BB ID in VST_BBENTRY record");
+
+ BB->setName(StringRef(ValueName.data(), ValueName.size()));
+ ValueName.clear();
+ break;
+ }
+ }
+ }
+}
+
+bool BitcodeReader::ParseMetadata() {
+ unsigned NextMDValueNo = MDValueList.size();
+
+ if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ Error("malformed metadata block");
+ return true;
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ bool IsFunctionLocal = false;
+ // Read a record.
+ Record.clear();
+ unsigned Code = Stream.readRecord(Entry.ID, Record);
+ switch (Code) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::METADATA_NAME: {
+ // Read name of the named metadata.
+ SmallString<8> Name(Record.begin(), Record.end());
+ Record.clear();
+ Code = Stream.ReadCode();
+
+ // METADATA_NAME is always followed by METADATA_NAMED_NODE.
+ unsigned NextBitCode = Stream.readRecord(Code, Record);
+ assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
+
+ // Read named metadata elements.
+ unsigned Size = Record.size();
+ NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
+ for (unsigned i = 0; i != Size; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+ if (MD == 0)
+ return Error("Malformed metadata record");
+ NMD->addOperand(MD);
+ }
+ break;
+ }
+ case bitc::METADATA_FN_NODE:
+ IsFunctionLocal = true;
+ // fall-through
+ case bitc::METADATA_NODE: {
+ if (Record.size() % 2 == 1)
+ return Error("Invalid METADATA_NODE record");
+
+ unsigned Size = Record.size();
+ SmallVector<Value*, 8> Elts;
+ for (unsigned i = 0; i != Size; i += 2) {
+ Type *Ty = getTypeByID(Record[i]);
+ if (!Ty) return Error("Invalid METADATA_NODE record");
+ if (Ty->isMetadataTy())
+ Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+ else if (!Ty->isVoidTy())
+ Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+ else
+ Elts.push_back(NULL);
+ }
+ Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal);
+ IsFunctionLocal = false;
+ MDValueList.AssignValue(V, NextMDValueNo++);
+ break;
+ }
+ case bitc::METADATA_STRING: {
+ SmallString<8> String(Record.begin(), Record.end());
+ Value *V = MDString::get(Context, String);
+ MDValueList.AssignValue(V, NextMDValueNo++);
+ break;
+ }
+ case bitc::METADATA_KIND: {
+ if (Record.size() < 2)
+ return Error("Invalid METADATA_KIND record");
+
+ unsigned Kind = Record[0];
+ SmallString<8> Name(Record.begin()+1, Record.end());
+
+ unsigned NewKind = TheModule->getMDKindID(Name.str());
+ if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+ return Error("Conflicting METADATA_KIND records");
+ break;
+ }
+ }
+ }
+}
+
+/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
+ if ((V & 1) == 0)
+ return V >> 1;
+ if (V != 1)
+ return -(V >> 1);
+ // There is no such thing as -0 with integers. "-0" really means MININT.
+ return 1ULL << 63;
+}
+
+/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
+/// values and aliases that we can.
+bool BitcodeReader::ResolveGlobalAndAliasInits() {
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+ GlobalInitWorklist.swap(GlobalInits);
+ AliasInitWorklist.swap(AliasInits);
+
+ while (!GlobalInitWorklist.empty()) {
+ unsigned ValID = GlobalInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ // Not ready to resolve this yet, it requires something later in the file.
+ GlobalInits.push_back(GlobalInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ GlobalInitWorklist.back().first->setInitializer(C);
+ else
+ return Error("Global variable initializer is not a constant!");
+ }
+ GlobalInitWorklist.pop_back();
+ }
+
+ while (!AliasInitWorklist.empty()) {
+ unsigned ValID = AliasInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ AliasInits.push_back(AliasInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ AliasInitWorklist.back().first->setAliasee(C);
+ else
+ return Error("Alias initializer is not a constant!");
+ }
+ AliasInitWorklist.pop_back();
+ }
+ return false;
+}
+
+static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
+ SmallVector<uint64_t, 8> Words(Vals.size());
+ std::transform(Vals.begin(), Vals.end(), Words.begin(),
+ BitcodeReader::decodeSignRotatedValue);
+
+ return APInt(TypeBits, Words);
+}
+
+bool BitcodeReader::ParseConstants() {
+ if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ Type *CurTy = Type::getInt32Ty(Context);
+ unsigned NextCstNo = ValueList.size();
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("malformed block record in AST file");
+ case BitstreamEntry::EndBlock:
+ if (NextCstNo != ValueList.size())
+ return Error("Invalid constant reference!");
+
+ // Once all the constants have been read, go through and resolve forward
+ // references.
+ ValueList.ResolveConstantForwardRefs();
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ Value *V = 0;
+ unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default: // Default behavior: unknown constant
+ case bitc::CST_CODE_UNDEF: // UNDEF
+ V = UndefValue::get(CurTy);
+ break;
+ case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
+ if (Record.empty())
+ return Error("Malformed CST_SETTYPE record");
+ if (Record[0] >= TypeList.size())
+ return Error("Invalid Type ID in CST_SETTYPE record");
+ CurTy = TypeList[Record[0]];
+ continue; // Skip the ValueList manipulation.
+ case bitc::CST_CODE_NULL: // NULL
+ V = Constant::getNullValue(CurTy);
+ break;
+ case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
+ if (!CurTy->isIntegerTy() || Record.empty())
+ return Error("Invalid CST_INTEGER record");
+ V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
+ break;
+ case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+ if (!CurTy->isIntegerTy() || Record.empty())
+ return Error("Invalid WIDE_INTEGER record");
+
+ APInt VInt = ReadWideAPInt(Record,
+ cast<IntegerType>(CurTy)->getBitWidth());
+ V = ConstantInt::get(Context, VInt);
+
+ break;
+ }
+ case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
+ if (Record.empty())
+ return Error("Invalid FLOAT record");
+ if (CurTy->isHalfTy())
+ V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
+ APInt(16, (uint16_t)Record[0])));
+ else if (CurTy->isFloatTy())
+ V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, (uint32_t)Record[0])));
+ else if (CurTy->isDoubleTy())
+ V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, Record[0])));
+ else if (CurTy->isX86_FP80Ty()) {
+ // Bits are not stored the same way as a normal i80 APInt, compensate.
+ uint64_t Rearrange[2];
+ Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+ Rearrange[1] = Record[0] >> 48;
+ V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended,
+ APInt(80, Rearrange)));
+ } else if (CurTy->isFP128Ty())
+ V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad,
+ APInt(128, Record)));
+ else if (CurTy->isPPC_FP128Ty())
+ V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble,
+ APInt(128, Record)));
+ else
+ V = UndefValue::get(CurTy);
+ break;
+ }
+
+ case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ unsigned Size = Record.size();
+ SmallVector<Constant*, 16> Elts;
+
+ if (StructType *STy = dyn_cast<StructType>(CurTy)) {
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+ STy->getElementType(i)));
+ V = ConstantStruct::get(STy, Elts);
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+ Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantArray::get(ATy, Elts);
+ } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+ Type *EltTy = VTy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantVector::get(Elts);
+ } else {
+ V = UndefValue::get(CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_STRING: // STRING: [values]
+ case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+ if (Record.empty())
+ return Error("Invalid CST_STRING record");
+
+ SmallString<16> Elts(Record.begin(), Record.end());
+ V = ConstantDataArray::getString(Context, Elts,
+ BitCode == bitc::CST_CODE_CSTRING);
+ break;
+ }
+ case bitc::CST_CODE_DATA: {// DATA: [n x value]
+ if (Record.empty())
+ return Error("Invalid CST_DATA record");
+
+ Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
+ unsigned Size = Record.size();
+
+ if (EltTy->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isFloatTy()) {
+ SmallVector<float, 16> Elts(Size);
+ std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat);
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isDoubleTy()) {
+ SmallVector<double, 16> Elts(Size);
+ std::transform(Record.begin(), Record.end(), Elts.begin(),
+ BitsToDouble);
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else {
+ return Error("Unknown element type in CE_DATA");
+ }
+ break;
+ }
+
+ case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+ int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown binop.
+ } else {
+ Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+ Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+ unsigned Flags = 0;
+ if (Record.size() >= 4) {
+ if (Opc == Instruction::Add ||
+ Opc == Instruction::Sub ||
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
+ if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+ Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+ Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[3] & (1 << bitc::PEO_EXACT))
+ Flags |= SDivOperator::IsExact;
+ }
+ }
+ V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
+ if (Record.size() < 3) return Error("Invalid CE_CAST record");
+ int Opc = GetDecodedCastOpcode(Record[0]);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown cast.
+ } else {
+ Type *OpTy = getTypeByID(Record[1]);
+ if (!OpTy) return Error("Invalid CE_CAST record");
+ Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+ V = ConstantExpr::getCast(Opc, Op, CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_INBOUNDS_GEP:
+ case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
+ if (Record.size() & 1) return Error("Invalid CE_GEP record");
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ Type *ElTy = getTypeByID(Record[i]);
+ if (!ElTy) return Error("Invalid CE_GEP record");
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+ }
+ ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+ V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+ BitCode ==
+ bitc::CST_CODE_CE_INBOUNDS_GEP);
+ break;
+ }
+ case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
+ if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+ V = ConstantExpr::getSelect(
+ ValueList.getConstantFwdRef(Record[0],
+ Type::getInt1Ty(Context)),
+ ValueList.getConstantFwdRef(Record[1],CurTy),
+ ValueList.getConstantFwdRef(Record[2],CurTy));
+ break;
+ case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+ VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
+ Type::getInt32Ty(Context));
+ V = ConstantExpr::getExtractElement(Op0, Op1);
+ break;
+ }
+ case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+ VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_INSERTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+ OpTy->getElementType());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
+ Type::getInt32Ty(Context));
+ V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+ VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_SHUFFLEVEC record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ OpTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+ VectorType *RTy = dyn_cast<VectorType>(CurTy);
+ VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+ return Error("Invalid CE_SHUFVEC_EX record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+ Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ RTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
+ if (Record.size() < 4) return Error("Invalid CE_CMP record");
+ Type *OpTy = getTypeByID(Record[0]);
+ if (OpTy == 0) return Error("Invalid CE_CMP record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+ if (OpTy->isFPOrFPVectorTy())
+ V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+ else
+ V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+ break;
+ }
+ // This maintains backward compatibility, pre-asm dialect keywords.
+ // FIXME: Remove with the 4.0 release.
+ case bitc::CST_CODE_INLINEASM_OLD: {
+ if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ std::string AsmStr, ConstrStr;
+ bool HasSideEffects = Record[0] & 1;
+ bool IsAlignStack = Record[0] >> 1;
+ unsigned AsmStrSize = Record[1];
+ if (2+AsmStrSize >= Record.size())
+ return Error("Invalid INLINEASM record");
+ unsigned ConstStrSize = Record[2+AsmStrSize];
+ if (3+AsmStrSize+ConstStrSize > Record.size())
+ return Error("Invalid INLINEASM record");
+
+ for (unsigned i = 0; i != AsmStrSize; ++i)
+ AsmStr += (char)Record[2+i];
+ for (unsigned i = 0; i != ConstStrSize; ++i)
+ ConstrStr += (char)Record[3+AsmStrSize+i];
+ PointerType *PTy = cast<PointerType>(CurTy);
+ V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+ AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+ break;
+ }
+ // This version adds support for the asm dialect keywords (e.g.,
+ // inteldialect).
+ case bitc::CST_CODE_INLINEASM: {
+ if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ std::string AsmStr, ConstrStr;
+ bool HasSideEffects = Record[0] & 1;
+ bool IsAlignStack = (Record[0] >> 1) & 1;
+ unsigned AsmDialect = Record[0] >> 2;
+ unsigned AsmStrSize = Record[1];
+ if (2+AsmStrSize >= Record.size())
+ return Error("Invalid INLINEASM record");
+ unsigned ConstStrSize = Record[2+AsmStrSize];
+ if (3+AsmStrSize+ConstStrSize > Record.size())
+ return Error("Invalid INLINEASM record");
+
+ for (unsigned i = 0; i != AsmStrSize; ++i)
+ AsmStr += (char)Record[2+i];
+ for (unsigned i = 0; i != ConstStrSize; ++i)
+ ConstrStr += (char)Record[3+AsmStrSize+i];
+ PointerType *PTy = cast<PointerType>(CurTy);
+ V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+ AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+ InlineAsm::AsmDialect(AsmDialect));
+ break;
+ }
+ case bitc::CST_CODE_BLOCKADDRESS:{
+ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+ Type *FnTy = getTypeByID(Record[0]);
+ if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+ Function *Fn =
+ dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+ if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+
+ // If the function is already parsed we can insert the block address right
+ // away.
+ if (!Fn->empty()) {
+ Function::iterator BBI = Fn->begin(), BBE = Fn->end();
+ for (size_t I = 0, E = Record[2]; I != E; ++I) {
+ if (BBI == BBE)
+ return Error("Invalid blockaddress block #");
+ ++BBI;
+ }
+ V = BlockAddress::get(Fn, BBI);
+ } else {
+ // Otherwise insert a placeholder and remember it so it can be inserted
+ // when the function is parsed.
+ GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+ Type::getInt8Ty(Context),
+ false, GlobalValue::InternalLinkage,
+ 0, "");
+ BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+ V = FwdRef;
+ }
+ break;
+ }
+ }
+
+ ValueList.AssignValue(V, NextCstNo);
+ ++NextCstNo;
+ }
+}
+
+bool BitcodeReader::ParseUseLists() {
+ if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("malformed use list block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a use list record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+ unsigned RecordLength = Record.size();
+ if (RecordLength < 1)
+ return Error ("Invalid UseList reader!");
+ UseListRecords.push_back(Record);
+ break;
+ }
+ }
+ }
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it. This lets us lazily deserialize the
+/// functions.
+bool BitcodeReader::RememberAndSkipFunctionBody() {
+ // Get the function we are talking about.
+ if (FunctionsWithBodies.empty())
+ return Error("Insufficient function protos");
+
+ Function *Fn = FunctionsWithBodies.back();
+ FunctionsWithBodies.pop_back();
+
+ // Save the current stream state.
+ uint64_t CurBit = Stream.GetCurrentBitNo();
+ DeferredFunctionInfo[Fn] = CurBit;
+
+ // Skip over the function block for now.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ return false;
+}
+
+bool BitcodeReader::GlobalCleanup() {
+ // Patch the initializers for globals and aliases up.
+ ResolveGlobalAndAliasInits();
+ if (!GlobalInits.empty() || !AliasInits.empty())
+ return Error("Malformed global initializer set");
+
+ // Look for intrinsic functions which need to be upgraded at some point
+ for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ Function *NewFn;
+ if (UpgradeIntrinsicFunction(FI, NewFn))
+ UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+ }
+
+ // Look for global variables which need to be renamed.
+ for (Module::global_iterator
+ GI = TheModule->global_begin(), GE = TheModule->global_end();
+ GI != GE; ++GI)
+ UpgradeGlobalVariable(GI);
+ // Force deallocation of memory for these vectors to favor the client that
+ // want lazy deserialization.
+ std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+ std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+ return false;
+}
+
+bool BitcodeReader::ParseModule(bool Resume) {
+ if (Resume)
+ Stream.JumpToBit(NextUnreadBit);
+ else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+ std::vector<std::string> SectionTable;
+ std::vector<std::string> GCTable;
+
+ // Read all the records for this module.
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ Error("malformed module block");
+ return true;
+ case BitstreamEntry::EndBlock:
+ return GlobalCleanup();
+
+ case BitstreamEntry::SubBlock:
+ switch (Entry.ID) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::PARAMATTR_BLOCK_ID:
+ if (ParseAttributeBlock())
+ return true;
+ break;
+ case bitc::PARAMATTR_GROUP_BLOCK_ID:
+ if (ParseAttributeGroupBlock())
+ return true;
+ break;
+ case bitc::TYPE_BLOCK_ID_NEW:
+ if (ParseTypeTable())
+ return true;
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable())
+ return true;
+ SeenValueSymbolTable = true;
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants() || ResolveGlobalAndAliasInits())
+ return true;
+ break;
+ case bitc::METADATA_BLOCK_ID:
+ if (ParseMetadata())
+ return true;
+ break;
+ case bitc::FUNCTION_BLOCK_ID:
+ // If this is the first function body we've seen, reverse the
+ // FunctionsWithBodies list.
+ if (!SeenFirstFunctionBody) {
+ std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+ if (GlobalCleanup())
+ return true;
+ SeenFirstFunctionBody = true;
+ }
+
+ if (RememberAndSkipFunctionBody())
+ return true;
+ // For streaming bitcode, suspend parsing when we reach the function
+ // bodies. Subsequent materialization calls will resume it when
+ // necessary. For streaming, the function bodies must be at the end of
+ // the bitcode. If the bitcode file is old, the symbol table will be
+ // at the end instead and will not have been seen yet. In this case,
+ // just finish the parse now.
+ if (LazyStreamer && SeenValueSymbolTable) {
+ NextUnreadBit = Stream.GetCurrentBitNo();
+ return false;
+ }
+ break;
+ case bitc::USELIST_BLOCK_ID:
+ if (ParseUseLists())
+ return true;
+ break;
+ }
+ continue;
+
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+
+ // Read a record.
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
+ if (Record.size() < 1)
+ return Error("Malformed MODULE_CODE_VERSION");
+ // Only version #0 and #1 are supported so far.
+ unsigned module_version = Record[0];
+ switch (module_version) {
+ default: return Error("Unknown bitstream version!");
+ case 0:
+ UseRelativeIDs = false;
+ break;
+ case 1:
+ UseRelativeIDs = true;
+ break;
+ }
+ break;
+ }
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ TheModule->setTargetTriple(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DATALAYOUT record");
+ TheModule->setDataLayout(S);
+ break;
+ }
+ case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_ASM record");
+ TheModule->setModuleInlineAsm(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N]
+ // FIXME: Remove in 4.0.
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DEPLIB record");
+ // Ignore value.
+ break;
+ }
+ case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_SECTIONNAME record");
+ SectionTable.push_back(S);
+ break;
+ }
+ case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_GCNAME record");
+ GCTable.push_back(S);
+ break;
+ }
+ // GLOBALVAR: [pointer type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
+ case bitc::MODULE_CODE_GLOBALVAR: {
+ if (Record.size() < 6)
+ return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ if (!Ty->isPointerTy())
+ return Error("Global not a pointer type!");
+ unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ bool isConstant = Record[1];
+ GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
+ unsigned Alignment = (1 << Record[4]) >> 1;
+ std::string Section;
+ if (Record[5]) {
+ if (Record[5]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Section = SectionTable[Record[5]-1];
+ }
+ GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+ if (Record.size() > 6)
+ Visibility = GetDecodedVisibility(Record[6]);
+
+ GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
+ if (Record.size() > 7)
+ TLM = GetDecodedThreadLocalMode(Record[7]);
+
+ bool UnnamedAddr = false;
+ if (Record.size() > 8)
+ UnnamedAddr = Record[8];
+
+ bool ExternallyInitialized = false;
+ if (Record.size() > 9)
+ ExternallyInitialized = Record[9];
+
+ GlobalVariable *NewGV =
+ new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
+ TLM, AddressSpace, ExternallyInitialized);
+ NewGV->setAlignment(Alignment);
+ if (!Section.empty())
+ NewGV->setSection(Section);
+ NewGV->setVisibility(Visibility);
+ NewGV->setUnnamedAddr(UnnamedAddr);
+
+ ValueList.push_back(NewGV);
+
+ // Remember which value to use for the global initializer.
+ if (unsigned InitID = Record[2])
+ GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+ break;
+ }
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattr,
+ // alignment, section, visibility, gc, unnamed_addr]
+ case bitc::MODULE_CODE_FUNCTION: {
+ if (Record.size() < 8)
+ return Error("Invalid MODULE_CODE_FUNCTION record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+ if (!Ty->isPointerTy())
+ return Error("Function not a pointer type!");
+ FunctionType *FTy =
+ dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+ if (!FTy)
+ return Error("Function not a pointer to function type!");
+
+ Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ "", TheModule);
+
+ Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+ bool isProto = Record[2];
+ Func->setLinkage(GetDecodedLinkage(Record[3]));
+ Func->setAttributes(getAttributes(Record[4]));
+
+ Func->setAlignment((1 << Record[5]) >> 1);
+ if (Record[6]) {
+ if (Record[6]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Func->setSection(SectionTable[Record[6]-1]);
+ }
+ Func->setVisibility(GetDecodedVisibility(Record[7]));
+ if (Record.size() > 8 && Record[8]) {
+ if (Record[8]-1 > GCTable.size())
+ return Error("Invalid GC ID");
+ Func->setGC(GCTable[Record[8]-1].c_str());
+ }
+ bool UnnamedAddr = false;
+ if (Record.size() > 9)
+ UnnamedAddr = Record[9];
+ Func->setUnnamedAddr(UnnamedAddr);
+ ValueList.push_back(Func);
+
+ // If this is a function with a body, remember the prototype we are
+ // creating now, so that we can match up the body with them later.
+ if (!isProto) {
+ FunctionsWithBodies.push_back(Func);
+ if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+ }
+ break;
+ }
+ // ALIAS: [alias type, aliasee val#, linkage]
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
+ case bitc::MODULE_CODE_ALIAS: {
+ if (Record.size() < 3)
+ return Error("Invalid MODULE_ALIAS record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_ALIAS record");
+ if (!Ty->isPointerTy())
+ return Error("Function not a pointer type!");
+
+ GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+ "", 0, TheModule);
+ // Old bitcode files didn't have visibility field.
+ if (Record.size() > 3)
+ NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+ ValueList.push_back(NewGA);
+ AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+ break;
+ }
+ /// MODULE_CODE_PURGEVALS: [numvals]
+ case bitc::MODULE_CODE_PURGEVALS:
+ // Trim down the value list to the specified size.
+ if (Record.size() < 1 || Record[0] > ValueList.size())
+ return Error("Invalid MODULE_PURGEVALS record");
+ ValueList.shrinkTo(Record[0]);
+ break;
+ }
+ Record.clear();
+ }
+}
+
+bool BitcodeReader::ParseBitcodeInto(Module *M) {
+ TheModule = 0;
+
+ if (InitStream()) return true;
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (1) {
+ if (Stream.AtEndOfStream())
+ return false;
+
+ BitstreamEntry Entry =
+ Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ Error("malformed module file");
+ return true;
+ case BitstreamEntry::EndBlock:
+ return false;
+
+ case BitstreamEntry::SubBlock:
+ switch (Entry.ID) {
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::MODULE_BLOCK_ID:
+ // Reject multiple MODULE_BLOCK's in a single bitstream.
+ if (TheModule)
+ return Error("Multiple MODULE_BLOCKs in same stream");
+ TheModule = M;
+ if (ParseModule(false))
+ return true;
+ if (LazyStreamer) return false;
+ break;
+ default:
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ continue;
+ case BitstreamEntry::Record:
+ // There should be no records in the top-level of blocks.
+
+ // The ranlib in Xcode 4 will align archive members by appending newlines
+ // to the end of them. If this file size is a multiple of 4 but not 8, we
+ // have to read and ignore these final 4 bytes :-(
+ if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
+ Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
+ Stream.AtEndOfStream())
+ return false;
+
+ return Error("Invalid record at top-level");
+ }
+ }
+}
+
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this module.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("malformed module block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ Triple = S;
+ break;
+ }
+ }
+ Record.clear();
+ }
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+ if (InitStream()) return true;
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ Error("malformed module file");
+ return true;
+ case BitstreamEntry::EndBlock:
+ return false;
+
+ case BitstreamEntry::SubBlock:
+ if (Entry.ID == bitc::MODULE_BLOCK_ID)
+ return ParseModuleTriple(Triple);
+
+ // Ignore other sub-blocks.
+ if (Stream.SkipBlock()) {
+ Error("malformed block record in AST file");
+ return true;
+ }
+ continue;
+
+ case BitstreamEntry::Record:
+ Stream.skipRecord(Entry.ID);
+ continue;
+ }
+ }
+}
+
+/// ParseMetadataAttachment - Parse metadata attachments.
+bool BitcodeReader::ParseMetadataAttachment() {
+ if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return Error("malformed metadata block");
+ case BitstreamEntry::EndBlock:
+ return false;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a metadata attachment record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::METADATA_ATTACHMENT: {
+ unsigned RecordLength = Record.size();
+ if (Record.empty() || (RecordLength - 1) % 2 == 1)
+ return Error ("Invalid METADATA_ATTACHMENT reader!");
+ Instruction *Inst = InstructionList[Record[0]];
+ for (unsigned i = 1; i != RecordLength; i = i+2) {
+ unsigned Kind = Record[i];
+ DenseMap<unsigned, unsigned>::iterator I =
+ MDKindMap.find(Kind);
+ if (I == MDKindMap.end())
+ return Error("Invalid metadata kind ID");
+ Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
+ Inst->setMetadata(I->second, cast<MDNode>(Node));
+ }
+ break;
+ }
+ }
+ }
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool BitcodeReader::ParseFunctionBody(Function *F) {
+ if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+ return Error("Malformed block record");
+
+ InstructionList.clear();
+ unsigned ModuleValueListSize = ValueList.size();
+ unsigned ModuleMDValueListSize = MDValueList.size();
+
+ // Add all the function arguments to the value table.
+ for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ValueList.push_back(I);
+
+ unsigned NextValueNo = ValueList.size();
+ BasicBlock *CurBB = 0;
+ unsigned CurBBNo = 0;
+
+ DebugLoc LastLoc;
+
+ // Read all the records.
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ return Error("Bitcode error in function block");
+ case BitstreamEntry::EndBlock:
+ goto OutOfRecordLoop;
+
+ case BitstreamEntry::SubBlock:
+ switch (Entry.ID) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants()) return true;
+ NextValueNo = ValueList.size();
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable()) return true;
+ break;
+ case bitc::METADATA_ATTACHMENT_ID:
+ if (ParseMetadataAttachment()) return true;
+ break;
+ case bitc::METADATA_BLOCK_ID:
+ if (ParseMetadata()) return true;
+ break;
+ }
+ continue;
+
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ Instruction *I = 0;
+ unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default: // Default behavior: reject
+ return Error("Unknown instruction");
+ case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks]
+ if (Record.size() < 1 || Record[0] == 0)
+ return Error("Invalid DECLAREBLOCKS record");
+ // Create all the basic blocks for the function.
+ FunctionBBs.resize(Record[0]);
+ for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+ FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+ CurBB = FunctionBBs[0];
+ continue;
+
+ case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
+ // This record indicates that the last instruction is at the same
+ // location as the previous instruction with a location.
+ I = 0;
+
+ // Get the last instruction emitted.
+ if (CurBB && !CurBB->empty())
+ I = &CurBB->back();
+ else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+ !FunctionBBs[CurBBNo-1]->empty())
+ I = &FunctionBBs[CurBBNo-1]->back();
+
+ if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+ I->setDebugLoc(LastLoc);
+ I = 0;
+ continue;
+
+ case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
+ I = 0; // Get the last instruction emitted.
+ if (CurBB && !CurBB->empty())
+ I = &CurBB->back();
+ else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+ !FunctionBBs[CurBBNo-1]->empty())
+ I = &FunctionBBs[CurBBNo-1]->back();
+ if (I == 0 || Record.size() < 4)
+ return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+
+ unsigned Line = Record[0], Col = Record[1];
+ unsigned ScopeID = Record[2], IAID = Record[3];
+
+ MDNode *Scope = 0, *IA = 0;
+ if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
+ if (IAID) IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+ LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+ I->setDebugLoc(LastLoc);
+ I = 0;
+ continue;
+ }
+
+ case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode]
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
+ OpNum+1 > Record.size())
+ return Error("Invalid BINOP record");
+
+ int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+ if (Opc == -1) return Error("Invalid BINOP record");
+ I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ InstructionList.push_back(I);
+ if (OpNum < Record.size()) {
+ if (Opc == Instruction::Add ||
+ Opc == Instruction::Sub ||
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
+ if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+ cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+ if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+ cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[OpNum] & (1 << bitc::PEO_EXACT))
+ cast<BinaryOperator>(I)->setIsExact(true);
+ } else if (isa<FPMathOperator>(I)) {
+ FastMathFlags FMF;
+ if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
+ FMF.setUnsafeAlgebra();
+ if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
+ FMF.setNoNaNs();
+ if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
+ FMF.setNoInfs();
+ if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
+ FMF.setNoSignedZeros();
+ if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
+ FMF.setAllowReciprocal();
+ if (FMF.any())
+ I->setFastMathFlags(FMF);
+ }
+
+ }
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid CAST record");
+
+ Type *ResTy = getTypeByID(Record[OpNum]);
+ int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+ if (Opc == -1 || ResTy == 0)
+ return Error("Invalid CAST record");
+ I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
+ case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+ unsigned OpNum = 0;
+ Value *BasePtr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+ return Error("Invalid GEP record");
+
+ SmallVector<Value*, 16> GEPIdx;
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid GEP record");
+ GEPIdx.push_back(Op);
+ }
+
+ I = GetElementPtrInst::Create(BasePtr, GEPIdx);
+ InstructionList.push_back(I);
+ if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+ cast<GetElementPtrInst>(I)->setIsInBounds(true);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTVAL: {
+ // EXTRACTVAL: [opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid EXTRACTVAL record");
+
+ SmallVector<unsigned, 4> EXTRACTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid EXTRACTVAL index");
+ EXTRACTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTVAL: {
+ // INSERTVAL: [opty, opval, opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid INSERTVAL record");
+ Value *Val;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+ return Error("Invalid INSERTVAL record");
+
+ SmallVector<unsigned, 4> INSERTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid INSERTVAL index");
+ INSERTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
+ // obsolete form of select
+ // handles select i1 ... in old bitcode
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
+ popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
+ return Error("Invalid SELECT record");
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
+ // new form of select
+ // handles select i1 or select [N x i1]
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Cond))
+ return Error("Invalid SELECT record");
+
+ // select condition can be either i1 or [N x i1]
+ if (VectorType* vector_type =
+ dyn_cast<VectorType>(Cond->getType())) {
+ // expect <n x i1>
+ if (vector_type->getElementType() != Type::getInt1Ty(Context))
+ return Error("Invalid SELECT condition type");
+ } else {
+ // expect i1
+ if (Cond->getType() != Type::getInt1Ty(Context))
+ return Error("Invalid SELECT condition type");
+ }
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+ return Error("Invalid EXTRACTELT record");
+ I = ExtractElementInst::Create(Vec, Idx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Elt, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
+ popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+ return Error("Invalid INSERTELT record");
+ I = InsertElementInst::Create(Vec, Elt, Idx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec1, *Vec2, *Mask;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+ popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
+ return Error("Invalid SHUFFLEVEC record");
+
+ if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+ return Error("Invalid SHUFFLEVEC record");
+ I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_CMP: // CMP: [opty, opval, opval, pred]
+ // Old form of ICmp/FCmp returning bool
+ // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+ // both legal on vectors but had different behaviour.
+ case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
+ // FCmp/ICmp returning bool or vector of bool
+
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
+ OpNum+1 != Record.size())
+ return Error("Invalid CMP record");
+
+ if (LHS->getType()->isFPOrFPVectorTy())
+ I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else
+ I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
+ {
+ unsigned Size = Record.size();
+ if (Size == 0) {
+ I = ReturnInst::Create(Context);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ unsigned OpNum = 0;
+ Value *Op = NULL;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid RET record");
+ if (OpNum != Record.size())
+ return Error("Invalid RET record");
+
+ I = ReturnInst::Create(Context, Op);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+ if (Record.size() != 1 && Record.size() != 3)
+ return Error("Invalid BR record");
+ BasicBlock *TrueDest = getBasicBlock(Record[0]);
+ if (TrueDest == 0)
+ return Error("Invalid BR record");
+
+ if (Record.size() == 1) {
+ I = BranchInst::Create(TrueDest);
+ InstructionList.push_back(I);
+ }
+ else {
+ BasicBlock *FalseDest = getBasicBlock(Record[1]);
+ Value *Cond = getValue(Record, 2, NextValueNo,
+ Type::getInt1Ty(Context));
+ if (FalseDest == 0 || Cond == 0)
+ return Error("Invalid BR record");
+ I = BranchInst::Create(TrueDest, FalseDest, Cond);
+ InstructionList.push_back(I);
+ }
+ break;
+ }
+ case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+ // Check magic
+ if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
+ // New SwitchInst format with case ranges.
+
+ Type *OpTy = getTypeByID(Record[1]);
+ unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
+
+ Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
+ BasicBlock *Default = getBasicBlock(Record[3]);
+ if (OpTy == 0 || Cond == 0 || Default == 0)
+ return Error("Invalid SWITCH record");
+
+ unsigned NumCases = Record[4];
+
+ SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+ InstructionList.push_back(SI);
+
+ unsigned CurIdx = 5;
+ for (unsigned i = 0; i != NumCases; ++i) {
+ IntegersSubsetToBB CaseBuilder;
+ unsigned NumItems = Record[CurIdx++];
+ for (unsigned ci = 0; ci != NumItems; ++ci) {
+ bool isSingleNumber = Record[CurIdx++];
+
+ APInt Low;
+ unsigned ActiveWords = 1;
+ if (ValueBitWidth > 64)
+ ActiveWords = Record[CurIdx++];
+ Low = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+ ValueBitWidth);
+ CurIdx += ActiveWords;
+
+ if (!isSingleNumber) {
+ ActiveWords = 1;
+ if (ValueBitWidth > 64)
+ ActiveWords = Record[CurIdx++];
+ APInt High =
+ ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+ ValueBitWidth);
+
+ CaseBuilder.add(IntItem::fromType(OpTy, Low),
+ IntItem::fromType(OpTy, High));
+ CurIdx += ActiveWords;
+ } else
+ CaseBuilder.add(IntItem::fromType(OpTy, Low));
+ }
+ BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
+ IntegersSubset Case = CaseBuilder.getCase();
+ SI->addCase(Case, DestBB);
+ }
+ uint16_t Hash = SI->hash();
+ if (Hash != (Record[0] & 0xFFFF))
+ return Error("Invalid SWITCH record");
+ I = SI;
+ break;
+ }
+
+ // Old SwitchInst format without case ranges.
+
+ if (Record.size() < 3 || (Record.size() & 1) == 0)
+ return Error("Invalid SWITCH record");
+ Type *OpTy = getTypeByID(Record[0]);
+ Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
+ BasicBlock *Default = getBasicBlock(Record[2]);
+ if (OpTy == 0 || Cond == 0 || Default == 0)
+ return Error("Invalid SWITCH record");
+ unsigned NumCases = (Record.size()-3)/2;
+ SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+ InstructionList.push_back(SI);
+ for (unsigned i = 0, e = NumCases; i != e; ++i) {
+ ConstantInt *CaseVal =
+ dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+ BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
+ if (CaseVal == 0 || DestBB == 0) {
+ delete SI;
+ return Error("Invalid SWITCH record!");
+ }
+ SI->addCase(CaseVal, DestBB);
+ }
+ I = SI;
+ break;
+ }
+ case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+ if (Record.size() < 2)
+ return Error("Invalid INDIRECTBR record");
+ Type *OpTy = getTypeByID(Record[0]);
+ Value *Address = getValue(Record, 1, NextValueNo, OpTy);
+ if (OpTy == 0 || Address == 0)
+ return Error("Invalid INDIRECTBR record");
+ unsigned NumDests = Record.size()-2;
+ IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+ InstructionList.push_back(IBI);
+ for (unsigned i = 0, e = NumDests; i != e; ++i) {
+ if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+ IBI->addDestination(DestBB);
+ } else {
+ delete IBI;
+ return Error("Invalid INDIRECTBR record!");
+ }
+ }
+ I = IBI;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INVOKE: {
+ // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
+ if (Record.size() < 4) return Error("Invalid INVOKE record");
+ AttributeSet PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+ BasicBlock *NormalBB = getBasicBlock(Record[2]);
+ BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+
+ unsigned OpNum = 4;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid INVOKE record");
+
+ PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+ FunctionType *FTy = !CalleeTy ? 0 :
+ dyn_cast<FunctionType>(CalleeTy->getElementType());
+
+ // Check that the right number of fixed parameters are here.
+ if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+ Record.size() < OpNum+FTy->getNumParams())
+ return Error("Invalid INVOKE record");
+
+ SmallVector<Value*, 16> Ops;
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ Ops.push_back(getValue(Record, OpNum, NextValueNo,
+ FTy->getParamType(i)));
+ if (Ops.back() == 0) return Error("Invalid INVOKE record");
+ }
+
+ if (!FTy->isVarArg()) {
+ if (Record.size() != OpNum)
+ return Error("Invalid INVOKE record");
+ } else {
+ // Read type/value pairs for varargs params.
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid INVOKE record");
+ Ops.push_back(Op);
+ }
+ }
+
+ I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+ InstructionList.push_back(I);
+ cast<InvokeInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>(CCInfo));
+ cast<InvokeInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
+ unsigned Idx = 0;
+ Value *Val = 0;
+ if (getValueTypePair(Record, Idx, NextValueNo, Val))
+ return Error("Invalid RESUME record");
+ I = ResumeInst::Create(Val);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+ I = new UnreachableInst(Context);
+ InstructionList.push_back(I);
+ break;
+ case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+ if (Record.size() < 1 || ((Record.size()-1)&1))
+ return Error("Invalid PHI record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid PHI record");
+
+ PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+ InstructionList.push_back(PN);
+
+ for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+ Value *V;
+ // With the new function encoding, it is possible that operands have
+ // negative IDs (for forward references). Use a signed VBR
+ // representation to keep the encoding small.
+ if (UseRelativeIDs)
+ V = getValueSigned(Record, 1+i, NextValueNo, Ty);
+ else
+ V = getValue(Record, 1+i, NextValueNo, Ty);
+ BasicBlock *BB = getBasicBlock(Record[2+i]);
+ if (!V || !BB) return Error("Invalid PHI record");
+ PN->addIncoming(V, BB);
+ }
+ I = PN;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_LANDINGPAD: {
+ // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
+ unsigned Idx = 0;
+ if (Record.size() < 4)
+ return Error("Invalid LANDINGPAD record");
+ Type *Ty = getTypeByID(Record[Idx++]);
+ if (!Ty) return Error("Invalid LANDINGPAD record");
+ Value *PersFn = 0;
+ if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
+ return Error("Invalid LANDINGPAD record");
+
+ bool IsCleanup = !!Record[Idx++];
+ unsigned NumClauses = Record[Idx++];
+ LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, NumClauses);
+ LP->setCleanup(IsCleanup);
+ for (unsigned J = 0; J != NumClauses; ++J) {
+ LandingPadInst::ClauseType CT =
+ LandingPadInst::ClauseType(Record[Idx++]); (void)CT;
+ Value *Val;
+
+ if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
+ delete LP;
+ return Error("Invalid LANDINGPAD record");
+ }
+
+ assert((CT != LandingPadInst::Catch ||
+ !isa<ArrayType>(Val->getType())) &&
+ "Catch clause has a invalid type!");
+ assert((CT != LandingPadInst::Filter ||
+ isa<ArrayType>(Val->getType())) &&
+ "Filter clause has invalid type!");
+ LP->addClause(Val);
+ }
+
+ I = LP;
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
+ if (Record.size() != 4)
+ return Error("Invalid ALLOCA record");
+ PointerType *Ty =
+ dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+ Type *OpTy = getTypeByID(Record[1]);
+ Value *Size = getFnValueByID(Record[2], OpTy);
+ unsigned Align = Record[3];
+ if (!Ty || !Size) return Error("Invalid ALLOCA record");
+ I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid LOAD record");
+
+ I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_LOADATOMIC: {
+ // LOADATOMIC: [opty, op, align, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid LOADATOMIC record");
+
+
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Release ||
+ Ordering == AcquireRelease)
+ return Error("Invalid LOADATOMIC record");
+ if (Ordering != NotAtomic && Record[OpNum] == 0)
+ return Error("Invalid LOADATOMIC record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+
+ I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+ Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid STORE record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_STOREATOMIC: {
+ // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid STOREATOMIC record");
+
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Acquire ||
+ Ordering == AcquireRelease)
+ return Error("Invalid STOREATOMIC record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+ if (Ordering != NotAtomic && Record[OpNum] == 0)
+ return Error("Invalid STOREATOMIC record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+ Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CMPXCHG: {
+ // CMPXCHG:[ptrty, ptr, cmp, new, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Ptr, *Cmp, *New;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<PointerType>(Ptr->getType())->getElementType(), New) ||
+ OpNum+3 != Record.size())
+ return Error("Invalid CMPXCHG record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
+ if (Ordering == NotAtomic || Ordering == Unordered)
+ return Error("Invalid CMPXCHG record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
+ I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
+ cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_ATOMICRMW: {
+ // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Ptr, *Val;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ popValue(Record, OpNum, NextValueNo,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid ATOMICRMW record");
+ AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
+ if (Operation < AtomicRMWInst::FIRST_BINOP ||
+ Operation > AtomicRMWInst::LAST_BINOP)
+ return Error("Invalid ATOMICRMW record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Unordered)
+ return Error("Invalid ATOMICRMW record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+ I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
+ cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
+ if (2 != Record.size())
+ return Error("Invalid FENCE record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
+ if (Ordering == NotAtomic || Ordering == Unordered ||
+ Ordering == Monotonic)
+ return Error("Invalid FENCE record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
+ I = new FenceInst(Context, Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CALL: {
+ // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+ if (Record.size() < 3)
+ return Error("Invalid CALL record");
+
+ AttributeSet PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+
+ unsigned OpNum = 2;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid CALL record");
+
+ PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+ FunctionType *FTy = 0;
+ if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+ if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+ return Error("Invalid CALL record");
+
+ SmallVector<Value*, 16> Args;
+ // Read the fixed params.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ if (FTy->getParamType(i)->isLabelTy())
+ Args.push_back(getBasicBlock(Record[OpNum]));
+ else
+ Args.push_back(getValue(Record, OpNum, NextValueNo,
+ FTy->getParamType(i)));
+ if (Args.back() == 0) return Error("Invalid CALL record");
+ }
+
+ // Read type/value pairs for varargs params.
+ if (!FTy->isVarArg()) {
+ if (OpNum != Record.size())
+ return Error("Invalid CALL record");
+ } else {
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid CALL record");
+ Args.push_back(Op);
+ }
+ }
+
+ I = CallInst::Create(Callee, Args);
+ InstructionList.push_back(I);
+ cast<CallInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>(CCInfo>>1));
+ cast<CallInst>(I)->setTailCall(CCInfo & 1);
+ cast<CallInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+ if (Record.size() < 3)
+ return Error("Invalid VAARG record");
+ Type *OpTy = getTypeByID(Record[0]);
+ Value *Op = getValue(Record, 1, NextValueNo, OpTy);
+ Type *ResTy = getTypeByID(Record[2]);
+ if (!OpTy || !Op || !ResTy)
+ return Error("Invalid VAARG record");
+ I = new VAArgInst(Op, ResTy);
+ InstructionList.push_back(I);
+ break;
+ }
+ }
+
+ // Add instruction to end of current BB. If there is no current BB, reject
+ // this file.
+ if (CurBB == 0) {
+ delete I;
+ return Error("Invalid instruction with no BB");
+ }
+ CurBB->getInstList().push_back(I);
+
+ // If this was a terminator instruction, move to the next block.
+ if (isa<TerminatorInst>(I)) {
+ ++CurBBNo;
+ CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+ }
+
+ // Non-void values get registered in the value table for future use.
+ if (I && !I->getType()->isVoidTy())
+ ValueList.AssignValue(I, NextValueNo++);
+ }
+
+OutOfRecordLoop:
+
+ // Check the function list for unresolved values.
+ if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+ if (A->getParent() == 0) {
+ // We found at least one unresolved value. Nuke them all to avoid leaks.
+ for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+ if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ delete A;
+ }
+ }
+ return Error("Never resolved value found in function!");
+ }
+ }
+
+ // FIXME: Check for unresolved forward-declared metadata references
+ // and clean up leaks.
+
+ // See if anything took the address of blocks in this function. If so,
+ // resolve them now.
+ DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+ BlockAddrFwdRefs.find(F);
+ if (BAFRI != BlockAddrFwdRefs.end()) {
+ std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+ for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+ unsigned BlockIdx = RefList[i].first;
+ if (BlockIdx >= FunctionBBs.size())
+ return Error("Invalid blockaddress block #");
+
+ GlobalVariable *FwdRef = RefList[i].second;
+ FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+ FwdRef->eraseFromParent();
+ }
+
+ BlockAddrFwdRefs.erase(BAFRI);
+ }
+
+ // Trim the value list down to the size it was before we parsed this function.
+ ValueList.shrinkTo(ModuleValueListSize);
+ MDValueList.shrinkTo(ModuleMDValueListSize);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+ return false;
+}
+
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool BitcodeReader::FindFunctionInStream(Function *F,
+ DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+ while (DeferredFunctionInfoIterator->second == 0) {
+ if (Stream.AtEndOfStream())
+ return Error("Could not find Function in stream");
+ // ParseModule will parse the next body in the stream and set its
+ // position in the DeferredFunctionInfo map.
+ if (ParseModule(true)) return true;
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ return F->isDeclaration() &&
+ DeferredFunctionInfo.count(const_cast<Function*>(F));
+ }
+ return false;
+}
+
+bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+ Function *F = dyn_cast<Function>(GV);
+ // If it's not a function or is already material, ignore the request.
+ if (!F || !F->isMaterializable()) return false;
+
+ DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+ // If its position is recorded as 0, its body is somewhere in the stream
+ // but we haven't seen it yet.
+ if (DFII->second == 0)
+ if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
+
+ // Move the bit stream to the saved position of the deferred function body.
+ Stream.JumpToBit(DFII->second);
+
+ if (ParseFunctionBody(F)) {
+ if (ErrInfo) *ErrInfo = ErrorString;
+ return true;
+ }
+
+ // Upgrade any old intrinsic calls in the function.
+ for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+ E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ }
+ }
+
+ return false;
+}
+
+bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F || F->isDeclaration())
+ return false;
+ return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void BitcodeReader::Dematerialize(GlobalValue *GV) {
+ Function *F = dyn_cast<Function>(GV);
+ // If this function isn't dematerializable, this is a noop.
+ if (!F || !isDematerializable(F))
+ return;
+
+ assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+ // Just forget the function body, we can remat it later.
+ F->deleteBody();
+}
+
+
+bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+ assert(M == TheModule &&
+ "Can only Materialize the Module this BitcodeReader is attached to.");
+ // Iterate over the module, deserializing any functions that are still on
+ // disk.
+ for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+ F != E; ++F)
+ if (F->isMaterializable() &&
+ Materialize(F, ErrInfo))
+ return true;
+
+ // At this point, if there are any function bodies, the current bit is
+ // pointing to the END_BLOCK record after them. Now make sure the rest
+ // of the bits in the module have been read.
+ if (NextUnreadBit)
+ ParseModule(true);
+
+ // Upgrade any intrinsic calls that slipped through (should not happen!) and
+ // delete the old functions to clean up. We can't do this unless the entire
+ // module is materialized because there could always be another function body
+ // with calls to the old function.
+ for (std::vector<std::pair<Function*, Function*> >::iterator I =
+ UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ if (!I->first->use_empty())
+ I->first->replaceAllUsesWith(I->second);
+ I->first->eraseFromParent();
+ }
+ }
+ std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+ return false;
+}
+
+bool BitcodeReader::InitStream() {
+ if (LazyStreamer) return InitLazyStream();
+ return InitStreamFromBuffer();
+}
+
+bool BitcodeReader::InitStreamFromBuffer() {
+ const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
+ const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ if (Buffer->getBufferSize() & 3) {
+ if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+ return Error("Invalid bitcode signature");
+ else
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ }
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+ return Error("Invalid bitcode wrapper header");
+
+ StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+ Stream.init(*StreamFile);
+
+ return false;
+}
+
+bool BitcodeReader::InitLazyStream() {
+ // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+ // see it.
+ StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+ StreamFile.reset(new BitstreamReader(Bytes));
+ Stream.init(*StreamFile);
+
+ unsigned char buf[16];
+ if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+ return Error("Bitcode stream must be at least 16 bytes in length");
+
+ if (!isBitcode(buf, buf + 16))
+ return Error("Invalid bitcode signature");
+
+ if (isBitcodeWrapper(buf, buf + 4)) {
+ const unsigned char *bitcodeStart = buf;
+ const unsigned char *bitcodeEnd = buf + 16;
+ SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+ Bytes->dropLeadingBytes(bitcodeStart - buf);
+ Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
+ LLVMContext& Context,
+ std::string *ErrMsg) {
+ Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+ BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ M->setMaterializer(R);
+ if (R->ParseBitcodeInto(M)) {
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ delete M; // Also deletes R.
+ return 0;
+ }
+ // Have the BitcodeReader dtor delete 'Buffer'.
+ R->setBufferOwned(true);
+
+ R->materializeForwardReferencedFunctions();
+
+ return M;
+}
+
+
+Module *llvm::getStreamedBitcodeModule(const std::string &name,
+ DataStreamer *streamer,
+ LLVMContext &Context,
+ std::string *ErrMsg) {
+ Module *M = new Module(name, Context);
+ BitcodeReader *R = new BitcodeReader(streamer, Context);
+ M->setMaterializer(R);
+ if (R->ParseBitcodeInto(M)) {
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+ delete M; // Also deletes R.
+ return 0;
+ }
+ R->setBufferOwned(false); // no buffer to delete
+ return M;
+}
+
+/// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+ std::string *ErrMsg){
+ Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+ if (!M) return 0;
+
+ // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
+ // there was an error.
+ static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+ // Read in the entire module, and destroy the BitcodeReader.
+ if (M->MaterializeAllPermanently(ErrMsg)) {
+ delete M;
+ return 0;
+ }
+
+ // TODO: Restore the use-lists to the in-memory state when the bitcode was
+ // written. We must defer until the Module has been fully materialized.
+
+ return M;
+}
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+ LLVMContext& Context,
+ std::string *ErrMsg) {
+ BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ // Don't let the BitcodeReader dtor delete 'Buffer'.
+ R->setBufferOwned(false);
+
+ std::string Triple("");
+ if (R->ParseTriple(Triple))
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ delete R;
+ return Triple;
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
new file mode 100644
index 000000000000..28674eb14ef2
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
@@ -0,0 +1,349 @@
+//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITCODE_READER_H
+#define BITCODE_READER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ValueHandle.h"
+#include <vector>
+
+namespace llvm {
+ class MemoryBuffer;
+ class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+// BitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderValueList {
+ std::vector<WeakVH> ValuePtrs;
+
+ /// ResolveConstants - As we resolve forward-referenced constants, we add
+ /// information about them to this vector. This allows us to resolve them in
+ /// bulk instead of resolving each reference at a time. See the code in
+ /// ResolveConstantForwardRefs for more information about this.
+ ///
+ /// The key of this vector is the placeholder constant, the value is the slot
+ /// number that holds the resolved value.
+ typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+ ResolveConstantsTy ResolveConstants;
+ LLVMContext &Context;
+public:
+ BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+ ~BitcodeReaderValueList() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ }
+
+ // vector compatibility methods
+ unsigned size() const { return ValuePtrs.size(); }
+ void resize(unsigned N) { ValuePtrs.resize(N); }
+ void push_back(Value *V) {
+ ValuePtrs.push_back(V);
+ }
+
+ void clear() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ ValuePtrs.clear();
+ }
+
+ Value *operator[](unsigned i) const {
+ assert(i < ValuePtrs.size());
+ return ValuePtrs[i];
+ }
+
+ Value *back() const { return ValuePtrs.back(); }
+ void pop_back() { ValuePtrs.pop_back(); }
+ bool empty() const { return ValuePtrs.empty(); }
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ ValuePtrs.resize(N);
+ }
+
+ Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, Type *Ty);
+
+ void AssignValue(Value *V, unsigned Idx);
+
+ /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+ /// resolves any forward references.
+ void ResolveConstantForwardRefs();
+};
+
+
+//===----------------------------------------------------------------------===//
+// BitcodeReaderMDValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderMDValueList {
+ std::vector<WeakVH> MDValuePtrs;
+
+ LLVMContext &Context;
+public:
+ BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
+
+ // vector compatibility methods
+ unsigned size() const { return MDValuePtrs.size(); }
+ void resize(unsigned N) { MDValuePtrs.resize(N); }
+ void push_back(Value *V) { MDValuePtrs.push_back(V); }
+ void clear() { MDValuePtrs.clear(); }
+ Value *back() const { return MDValuePtrs.back(); }
+ void pop_back() { MDValuePtrs.pop_back(); }
+ bool empty() const { return MDValuePtrs.empty(); }
+
+ Value *operator[](unsigned i) const {
+ assert(i < MDValuePtrs.size());
+ return MDValuePtrs[i];
+ }
+
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ MDValuePtrs.resize(N);
+ }
+
+ Value *getValueFwdRef(unsigned Idx);
+ void AssignValue(Value *V, unsigned Idx);
+};
+
+class BitcodeReader : public GVMaterializer {
+ LLVMContext &Context;
+ Module *TheModule;
+ MemoryBuffer *Buffer;
+ bool BufferOwned;
+ OwningPtr<BitstreamReader> StreamFile;
+ BitstreamCursor Stream;
+ DataStreamer *LazyStreamer;
+ uint64_t NextUnreadBit;
+ bool SeenValueSymbolTable;
+
+ const char *ErrorString;
+
+ std::vector<Type*> TypeList;
+ BitcodeReaderValueList ValueList;
+ BitcodeReaderMDValueList MDValueList;
+ SmallVector<Instruction *, 64> InstructionList;
+ SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
+
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+
+ /// MAttributes - The set of attributes by index. Index zero in the
+ /// file is for null, and is thus not represented here. As such all indices
+ /// are off by one.
+ std::vector<AttributeSet> MAttributes;
+
+ /// \brief The set of attribute groups.
+ std::map<unsigned, AttributeSet> MAttributeGroups;
+
+ /// FunctionBBs - While parsing a function body, this is a list of the basic
+ /// blocks for the function.
+ std::vector<BasicBlock*> FunctionBBs;
+
+ // When reading the module header, this list is populated with functions that
+ // have bodies later in the file.
+ std::vector<Function*> FunctionsWithBodies;
+
+ // When intrinsic functions are encountered which require upgrading they are
+ // stored here with their replacement function.
+ typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+ UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+ DenseMap<unsigned, unsigned> MDKindMap;
+
+ // Several operations happen after the module header has been read, but
+ // before function bodies are processed. This keeps track of whether
+ // we've done this yet.
+ bool SeenFirstFunctionBody;
+
+ /// DeferredFunctionInfo - When function bodies are initially scanned, this
+ /// map contains info about where to find deferred function body in the
+ /// stream.
+ DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+ /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These
+ /// are resolved lazily when functions are loaded.
+ typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+ DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+ /// UseRelativeIDs - Indicates that we are using a new encoding for
+ /// instruction operands where most operands in the current
+ /// FUNCTION_BLOCK are encoded relative to the instruction number,
+ /// for a more compact encoding. Some instruction operands are not
+ /// relative to the instruction ID: basic block numbers, and types.
+ /// Once the old style function blocks have been phased out, we would
+ /// not need this flag.
+ bool UseRelativeIDs;
+
+public:
+ explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
+ : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+ LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+ ErrorString(0), ValueList(C), MDValueList(C),
+ SeenFirstFunctionBody(false), UseRelativeIDs(false) {
+ }
+ explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
+ : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+ LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+ ErrorString(0), ValueList(C), MDValueList(C),
+ SeenFirstFunctionBody(false), UseRelativeIDs(false) {
+ }
+ ~BitcodeReader() {
+ FreeState();
+ }
+
+ void materializeForwardReferencedFunctions();
+
+ void FreeState();
+
+ /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+ /// when the reader is destroyed.
+ void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+
+ virtual bool isMaterializable(const GlobalValue *GV) const;
+ virtual bool isDematerializable(const GlobalValue *GV) const;
+ virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+ virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+ virtual void Dematerialize(GlobalValue *GV);
+
+ bool Error(const char *Str) {
+ ErrorString = Str;
+ return true;
+ }
+ const char *getErrorString() const { return ErrorString; }
+
+ /// @brief Main interface to parsing a bitcode buffer.
+ /// @returns true if an error occurred.
+ bool ParseBitcodeInto(Module *M);
+
+ /// @brief Cheap mechanism to just extract module triple
+ /// @returns true if an error occurred.
+ bool ParseTriple(std::string &Triple);
+
+ static uint64_t decodeSignRotatedValue(uint64_t V);
+
+private:
+ Type *getTypeByID(unsigned ID);
+ Value *getFnValueByID(unsigned ID, Type *Ty) {
+ if (Ty && Ty->isMetadataTy())
+ return MDValueList.getValueFwdRef(ID);
+ return ValueList.getValueFwdRef(ID, Ty);
+ }
+ BasicBlock *getBasicBlock(unsigned ID) const {
+ if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+ return FunctionBBs[ID];
+ }
+ AttributeSet getAttributes(unsigned i) const {
+ if (i-1 < MAttributes.size())
+ return MAttributes[i-1];
+ return AttributeSet();
+ }
+
+ /// getValueTypePair - Read a value/type pair out of the specified record from
+ /// slot 'Slot'. Increment Slot past the number of slots used in the record.
+ /// Return true on failure.
+ bool getValueTypePair(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ unsigned InstNum, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ if (ValNo < InstNum) {
+ // If this is not a forward reference, just return the value we already
+ // have.
+ ResVal = getFnValueByID(ValNo, 0);
+ return ResVal == 0;
+ } else if (Slot == Record.size()) {
+ return true;
+ }
+
+ unsigned TypeNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ return ResVal == 0;
+ }
+
+ /// popValue - Read a value out of the specified record from slot 'Slot'.
+ /// Increment Slot past the number of slots used by the value in the record.
+ /// Return true if there is an error.
+ bool popValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ if (getValue(Record, Slot, InstNum, Ty, ResVal))
+ return true;
+ // All values currently take a single record slot.
+ ++Slot;
+ return false;
+ }
+
+ /// getValue -- Like popValue, but does not increment the Slot number.
+ bool getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ ResVal = getValue(Record, Slot, InstNum, Ty);
+ return ResVal == 0;
+ }
+
+ /// getValue -- Version of getValue that returns ResVal directly,
+ /// or 0 if there is an error.
+ Value *getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return 0;
+ unsigned ValNo = (unsigned)Record[Slot];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
+ /// getValueSigned -- Like getValue, but decodes signed VBRs.
+ Value *getValueSigned(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return 0;
+ unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
+ bool ParseModule(bool Resume);
+ bool ParseAttributeBlock();
+ bool ParseAttributeGroupBlock();
+ bool ParseTypeTable();
+ bool ParseTypeTableBody();
+
+ bool ParseValueSymbolTable();
+ bool ParseConstants();
+ bool RememberAndSkipFunctionBody();
+ bool ParseFunctionBody(Function *F);
+ bool GlobalCleanup();
+ bool ResolveGlobalAndAliasInits();
+ bool ParseMetadata();
+ bool ParseMetadataAttachment();
+ bool ParseModuleTriple(std::string &Triple);
+ bool ParseUseLists();
+ bool InitStream();
+ bool InitStreamFromBuffer();
+ bool InitLazyStream();
+ bool FindFunctionInStream(Function *F,
+ DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
new file mode 100644
index 000000000000..9dafe2a03670
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -0,0 +1,371 @@
+//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitstreamReader.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// BitstreamCursor implementation
+//===----------------------------------------------------------------------===//
+
+void BitstreamCursor::operator=(const BitstreamCursor &RHS) {
+ freeState();
+
+ BitStream = RHS.BitStream;
+ NextChar = RHS.NextChar;
+ CurWord = RHS.CurWord;
+ BitsInCurWord = RHS.BitsInCurWord;
+ CurCodeSize = RHS.CurCodeSize;
+
+ // Copy abbreviations, and bump ref counts.
+ CurAbbrevs = RHS.CurAbbrevs;
+ for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+ CurAbbrevs[i]->addRef();
+
+ // Copy block scope and bump ref counts.
+ BlockScope = RHS.BlockScope;
+ for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+ std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+ for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+ Abbrevs[i]->addRef();
+ }
+}
+
+void BitstreamCursor::freeState() {
+ // Free all the Abbrevs.
+ for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+ CurAbbrevs[i]->dropRef();
+ CurAbbrevs.clear();
+
+ // Free all the Abbrevs in the block scope.
+ for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+ std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+ for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+ Abbrevs[i]->dropRef();
+ }
+ BlockScope.clear();
+}
+
+/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+/// the block, and return true if the block has an error.
+bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+ // Save the current block's state on BlockScope.
+ BlockScope.push_back(Block(CurCodeSize));
+ BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+ // Add the abbrevs specific to this block to the CurAbbrevs list.
+ if (const BitstreamReader::BlockInfo *Info =
+ BitStream->getBlockInfo(BlockID)) {
+ for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) {
+ CurAbbrevs.push_back(Info->Abbrevs[i]);
+ CurAbbrevs.back()->addRef();
+ }
+ }
+
+ // Get the codesize of this block.
+ CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+ SkipToFourByteBoundary();
+ unsigned NumWords = Read(bitc::BlockSizeWidth);
+ if (NumWordsP) *NumWordsP = NumWords;
+
+ // Validate that this block is sane.
+ if (CurCodeSize == 0 || AtEndOfStream())
+ return true;
+
+ return false;
+}
+
+void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
+ SmallVectorImpl<uint64_t> &Vals) {
+ assert(Op.isLiteral() && "Not a literal");
+ // If the abbrev specifies the literal value to use, use it.
+ Vals.push_back(Op.getLiteralValue());
+}
+
+void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
+ SmallVectorImpl<uint64_t> &Vals) {
+ assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+ // Decode the value as we are commanded.
+ switch (Op.getEncoding()) {
+ case BitCodeAbbrevOp::Array:
+ case BitCodeAbbrevOp::Blob:
+ assert(0 && "Should not reach here");
+ case BitCodeAbbrevOp::Fixed:
+ Vals.push_back(Read((unsigned)Op.getEncodingData()));
+ break;
+ case BitCodeAbbrevOp::VBR:
+ Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
+ break;
+ case BitCodeAbbrevOp::Char6:
+ Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
+ break;
+ }
+}
+
+void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
+ assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+ // Decode the value as we are commanded.
+ switch (Op.getEncoding()) {
+ case BitCodeAbbrevOp::Array:
+ case BitCodeAbbrevOp::Blob:
+ assert(0 && "Should not reach here");
+ case BitCodeAbbrevOp::Fixed:
+ (void)Read((unsigned)Op.getEncodingData());
+ break;
+ case BitCodeAbbrevOp::VBR:
+ (void)ReadVBR64((unsigned)Op.getEncodingData());
+ break;
+ case BitCodeAbbrevOp::Char6:
+ (void)Read(6);
+ break;
+ }
+}
+
+
+
+/// skipRecord - Read the current record and discard it.
+void BitstreamCursor::skipRecord(unsigned AbbrevID) {
+ // Skip unabbreviated records by reading past their entries.
+ if (AbbrevID == bitc::UNABBREV_RECORD) {
+ unsigned Code = ReadVBR(6);
+ (void)Code;
+ unsigned NumElts = ReadVBR(6);
+ for (unsigned i = 0; i != NumElts; ++i)
+ (void)ReadVBR64(6);
+ return;
+ }
+
+ const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+ for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+ if (Op.isLiteral())
+ continue;
+
+ if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+ Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+ skipAbbreviatedField(Op);
+ continue;
+ }
+
+ if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+ // Array case. Read the number of elements as a vbr6.
+ unsigned NumElts = ReadVBR(6);
+
+ // Get the element encoding.
+ assert(i+2 == e && "array op not second to last?");
+ const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+ // Read all the elements.
+ for (; NumElts; --NumElts)
+ skipAbbreviatedField(EltEnc);
+ continue;
+ }
+
+ assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+ // Blob case. Read the number of bytes as a vbr6.
+ unsigned NumElts = ReadVBR(6);
+ SkipToFourByteBoundary(); // 32-bit alignment
+
+ // Figure out where the end of this blob will be including tail padding.
+ size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
+
+ // If this would read off the end of the bitcode file, just set the
+ // record to empty and return.
+ if (!canSkipToPos(NewEnd/8)) {
+ NextChar = BitStream->getBitcodeBytes().getExtent();
+ break;
+ }
+
+ // Skip over the blob.
+ JumpToBit(NewEnd);
+ }
+}
+
+unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
+ SmallVectorImpl<uint64_t> &Vals,
+ StringRef *Blob) {
+ if (AbbrevID == bitc::UNABBREV_RECORD) {
+ unsigned Code = ReadVBR(6);
+ unsigned NumElts = ReadVBR(6);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Vals.push_back(ReadVBR64(6));
+ return Code;
+ }
+
+ const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+ for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+ if (Op.isLiteral()) {
+ readAbbreviatedLiteral(Op, Vals);
+ continue;
+ }
+
+ if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+ Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+ readAbbreviatedField(Op, Vals);
+ continue;
+ }
+
+ if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+ // Array case. Read the number of elements as a vbr6.
+ unsigned NumElts = ReadVBR(6);
+
+ // Get the element encoding.
+ assert(i+2 == e && "array op not second to last?");
+ const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+ // Read all the elements.
+ for (; NumElts; --NumElts)
+ readAbbreviatedField(EltEnc, Vals);
+ continue;
+ }
+
+ assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+ // Blob case. Read the number of bytes as a vbr6.
+ unsigned NumElts = ReadVBR(6);
+ SkipToFourByteBoundary(); // 32-bit alignment
+
+ // Figure out where the end of this blob will be including tail padding.
+ size_t CurBitPos = GetCurrentBitNo();
+ size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
+
+ // If this would read off the end of the bitcode file, just set the
+ // record to empty and return.
+ if (!canSkipToPos(NewEnd/8)) {
+ Vals.append(NumElts, 0);
+ NextChar = BitStream->getBitcodeBytes().getExtent();
+ break;
+ }
+
+ // Otherwise, inform the streamer that we need these bytes in memory.
+ const char *Ptr = (const char*)
+ BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
+
+ // If we can return a reference to the data, do so to avoid copying it.
+ if (Blob) {
+ *Blob = StringRef(Ptr, NumElts);
+ } else {
+ // Otherwise, unpack into Vals with zero extension.
+ for (; NumElts; --NumElts)
+ Vals.push_back((unsigned char)*Ptr++);
+ }
+ // Skip over tail padding.
+ JumpToBit(NewEnd);
+ }
+
+ unsigned Code = (unsigned)Vals[0];
+ Vals.erase(Vals.begin());
+ return Code;
+}
+
+
+void BitstreamCursor::ReadAbbrevRecord() {
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ unsigned NumOpInfo = ReadVBR(5);
+ for (unsigned i = 0; i != NumOpInfo; ++i) {
+ bool IsLiteral = Read(1) ? true : false;
+ if (IsLiteral) {
+ Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
+ continue;
+ }
+
+ BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
+ if (BitCodeAbbrevOp::hasEncodingData(E)) {
+ unsigned Data = ReadVBR64(5);
+
+ // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
+ // and vbr(0) as a literal zero. This is decoded the same way, and avoids
+ // a slow path in Read() to have to handle reading zero bits.
+ if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
+ Data == 0) {
+ Abbv->Add(BitCodeAbbrevOp(0));
+ continue;
+ }
+
+ Abbv->Add(BitCodeAbbrevOp(E, Data));
+ } else
+ Abbv->Add(BitCodeAbbrevOp(E));
+ }
+ CurAbbrevs.push_back(Abbv);
+}
+
+bool BitstreamCursor::ReadBlockInfoBlock() {
+ // If this is the second stream to get to the block info block, skip it.
+ if (BitStream->hasBlockInfoRecords())
+ return SkipBlock();
+
+ if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
+
+ SmallVector<uint64_t, 64> Record;
+ BitstreamReader::BlockInfo *CurBlockInfo = 0;
+
+ // Read all the records for this module.
+ while (1) {
+ BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+
+ switch (Entry.Kind) {
+ case llvm::BitstreamEntry::SubBlock: // Handled for us already.
+ case llvm::BitstreamEntry::Error:
+ return true;
+ case llvm::BitstreamEntry::EndBlock:
+ return false;
+ case llvm::BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read abbrev records, associate them with CurBID.
+ if (Entry.ID == bitc::DEFINE_ABBREV) {
+ if (!CurBlockInfo) return true;
+ ReadAbbrevRecord();
+
+ // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
+ // appropriate BlockInfo.
+ BitCodeAbbrev *Abbv = CurAbbrevs.back();
+ CurAbbrevs.pop_back();
+ CurBlockInfo->Abbrevs.push_back(Abbv);
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (readRecord(Entry.ID, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::BLOCKINFO_CODE_SETBID:
+ if (Record.size() < 1) return true;
+ CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+ break;
+ case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+ if (!CurBlockInfo) return true;
+ if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
+ std::string Name;
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ Name += (char)Record[i];
+ CurBlockInfo->Name = Name;
+ break;
+ }
+ case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+ if (!CurBlockInfo) return true;
+ if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
+ std::string Name;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i)
+ Name += (char)Record[i];
+ CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+ Name));
+ break;
+ }
+ }
+ }
+}
+
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
new file mode 100644
index 000000000000..9f51c35ad92e
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -0,0 +1,39 @@
+//===-- BitWriter.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
+ std::string ErrorInfo;
+ raw_fd_ostream OS(Path, ErrorInfo, raw_fd_ostream::F_Binary);
+
+ if (!ErrorInfo.empty())
+ return -1;
+
+ WriteBitcodeToFile(unwrap(M), OS);
+ return 0;
+}
+
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+ int Unbuffered) {
+ raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
+
+ WriteBitcodeToFile(unwrap(M), OS);
+ return 0;
+}
+
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+ return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
new file mode 100644
index 000000000000..1b73f23e8f60
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -0,0 +1,2020 @@
+//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "ValueEnumerator.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+static cl::opt<bool>
+EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
+ cl::desc("Turn on experimental support for "
+ "use-list order preservation."),
+ cl::init(false), cl::Hidden);
+
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+ // VALUE_SYMTAB_BLOCK abbrev id's.
+ VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ VST_ENTRY_7_ABBREV,
+ VST_ENTRY_6_ABBREV,
+ VST_BBENTRY_6_ABBREV,
+
+ // CONSTANTS_BLOCK abbrev id's.
+ CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ CONSTANTS_INTEGER_ABBREV,
+ CONSTANTS_CE_CAST_Abbrev,
+ CONSTANTS_NULL_Abbrev,
+
+ // FUNCTION_BLOCK abbrev id's.
+ FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ FUNCTION_INST_BINOP_ABBREV,
+ FUNCTION_INST_BINOP_FLAGS_ABBREV,
+ FUNCTION_INST_CAST_ABBREV,
+ FUNCTION_INST_RET_VOID_ABBREV,
+ FUNCTION_INST_RET_VAL_ABBREV,
+ FUNCTION_INST_UNREACHABLE_ABBREV,
+
+ // SwitchInst Magic
+ SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
+};
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown cast instruction!");
+ case Instruction::Trunc : return bitc::CAST_TRUNC;
+ case Instruction::ZExt : return bitc::CAST_ZEXT;
+ case Instruction::SExt : return bitc::CAST_SEXT;
+ case Instruction::FPToUI : return bitc::CAST_FPTOUI;
+ case Instruction::FPToSI : return bitc::CAST_FPTOSI;
+ case Instruction::UIToFP : return bitc::CAST_UITOFP;
+ case Instruction::SIToFP : return bitc::CAST_SITOFP;
+ case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
+ case Instruction::FPExt : return bitc::CAST_FPEXT;
+ case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
+ case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
+ case Instruction::BitCast : return bitc::CAST_BITCAST;
+ }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown binary instruction!");
+ case Instruction::Add:
+ case Instruction::FAdd: return bitc::BINOP_ADD;
+ case Instruction::Sub:
+ case Instruction::FSub: return bitc::BINOP_SUB;
+ case Instruction::Mul:
+ case Instruction::FMul: return bitc::BINOP_MUL;
+ case Instruction::UDiv: return bitc::BINOP_UDIV;
+ case Instruction::FDiv:
+ case Instruction::SDiv: return bitc::BINOP_SDIV;
+ case Instruction::URem: return bitc::BINOP_UREM;
+ case Instruction::FRem:
+ case Instruction::SRem: return bitc::BINOP_SREM;
+ case Instruction::Shl: return bitc::BINOP_SHL;
+ case Instruction::LShr: return bitc::BINOP_LSHR;
+ case Instruction::AShr: return bitc::BINOP_ASHR;
+ case Instruction::And: return bitc::BINOP_AND;
+ case Instruction::Or: return bitc::BINOP_OR;
+ case Instruction::Xor: return bitc::BINOP_XOR;
+ }
+}
+
+static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown RMW operation!");
+ case AtomicRMWInst::Xchg: return bitc::RMW_XCHG;
+ case AtomicRMWInst::Add: return bitc::RMW_ADD;
+ case AtomicRMWInst::Sub: return bitc::RMW_SUB;
+ case AtomicRMWInst::And: return bitc::RMW_AND;
+ case AtomicRMWInst::Nand: return bitc::RMW_NAND;
+ case AtomicRMWInst::Or: return bitc::RMW_OR;
+ case AtomicRMWInst::Xor: return bitc::RMW_XOR;
+ case AtomicRMWInst::Max: return bitc::RMW_MAX;
+ case AtomicRMWInst::Min: return bitc::RMW_MIN;
+ case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
+ case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+ }
+}
+
+static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ case NotAtomic: return bitc::ORDERING_NOTATOMIC;
+ case Unordered: return bitc::ORDERING_UNORDERED;
+ case Monotonic: return bitc::ORDERING_MONOTONIC;
+ case Acquire: return bitc::ORDERING_ACQUIRE;
+ case Release: return bitc::ORDERING_RELEASE;
+ case AcquireRelease: return bitc::ORDERING_ACQREL;
+ case SequentiallyConsistent: return bitc::ORDERING_SEQCST;
+ }
+ llvm_unreachable("Invalid ordering");
+}
+
+static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) {
+ switch (SynchScope) {
+ case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD;
+ case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD;
+ }
+ llvm_unreachable("Invalid synch scope");
+}
+
+static void WriteStringRecord(unsigned Code, StringRef Str,
+ unsigned AbbrevToUse, BitstreamWriter &Stream) {
+ SmallVector<unsigned, 64> Vals;
+
+ // Code: [strchar x N]
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+ AbbrevToUse = 0;
+ Vals.push_back(Str[i]);
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+static void WriteAttributeGroupTable(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const std::vector<AttributeSet> &AttrGrps = VE.getAttributeGroups();
+ if (AttrGrps.empty()) return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) {
+ AttributeSet AS = AttrGrps[i];
+ for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) {
+ AttributeSet A = AS.getSlotAttributes(i);
+
+ Record.push_back(VE.getAttributeGroupID(A));
+ Record.push_back(AS.getSlotIndex(i));
+
+ for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0);
+ I != E; ++I) {
+ Attribute Attr = *I;
+ if (Attr.isEnumAttribute()) {
+ Record.push_back(0);
+ Record.push_back(Attr.getKindAsEnum());
+ } else if (Attr.isAlignAttribute()) {
+ Record.push_back(1);
+ Record.push_back(Attr.getKindAsEnum());
+ Record.push_back(Attr.getValueAsInt());
+ } else {
+ StringRef Kind = Attr.getKindAsString();
+ StringRef Val = Attr.getValueAsString();
+
+ Record.push_back(Val.empty() ? 3 : 4);
+ Record.append(Kind.begin(), Kind.end());
+ Record.push_back(0);
+ if (!Val.empty()) {
+ Record.append(Val.begin(), Val.end());
+ Record.push_back(0);
+ }
+ }
+ }
+
+ Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
+ Record.clear();
+ }
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteAttributeTable(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const std::vector<AttributeSet> &Attrs = VE.getAttributes();
+ if (Attrs.empty()) return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ const AttributeSet &A = Attrs[i];
+ for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i)
+ Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i)));
+
+ Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+ Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
+ SmallVector<uint64_t, 64> TypeVals;
+
+ uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+
+ // Abbrev for TYPE_CODE_POINTER.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
+ unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_FUNCTION.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_ANON.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
+ unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_NAME.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_NAMED.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
+ unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_ARRAY.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit an entry count so the reader can reserve space.
+ TypeVals.push_back(TypeList.size());
+ Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+ TypeVals.clear();
+
+ // Loop over all of the types, emitting each in turn.
+ for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+ Type *T = TypeList[i];
+ int AbbrevToUse = 0;
+ unsigned Code = 0;
+
+ switch (T->getTypeID()) {
+ default: llvm_unreachable("Unknown type!");
+ case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
+ case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break;
+ case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
+ case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
+ case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
+ case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+ case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
+ case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
+ case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+ case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+ case Type::IntegerTyID:
+ // INTEGER: [width]
+ Code = bitc::TYPE_CODE_INTEGER;
+ TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+ break;
+ case Type::PointerTyID: {
+ PointerType *PTy = cast<PointerType>(T);
+ // POINTER: [pointee type, address space]
+ Code = bitc::TYPE_CODE_POINTER;
+ TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+ unsigned AddressSpace = PTy->getAddressSpace();
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
+ break;
+ }
+ case Type::FunctionTyID: {
+ FunctionType *FT = cast<FunctionType>(T);
+ // FUNCTION: [isvararg, retty, paramty x N]
+ Code = bitc::TYPE_CODE_FUNCTION;
+ TypeVals.push_back(FT->isVarArg());
+ TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+ TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+ AbbrevToUse = FunctionAbbrev;
+ break;
+ }
+ case Type::StructTyID: {
+ StructType *ST = cast<StructType>(T);
+ // STRUCT: [ispacked, eltty x N]
+ TypeVals.push_back(ST->isPacked());
+ // Output all of the element types.
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ TypeVals.push_back(VE.getTypeID(*I));
+
+ if (ST->isLiteral()) {
+ Code = bitc::TYPE_CODE_STRUCT_ANON;
+ AbbrevToUse = StructAnonAbbrev;
+ } else {
+ if (ST->isOpaque()) {
+ Code = bitc::TYPE_CODE_OPAQUE;
+ } else {
+ Code = bitc::TYPE_CODE_STRUCT_NAMED;
+ AbbrevToUse = StructNamedAbbrev;
+ }
+
+ // Emit the name if it is present.
+ if (!ST->getName().empty())
+ WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+ StructNameAbbrev, Stream);
+ }
+ break;
+ }
+ case Type::ArrayTyID: {
+ ArrayType *AT = cast<ArrayType>(T);
+ // ARRAY: [numelts, eltty]
+ Code = bitc::TYPE_CODE_ARRAY;
+ TypeVals.push_back(AT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+ AbbrevToUse = ArrayAbbrev;
+ break;
+ }
+ case Type::VectorTyID: {
+ VectorType *VT = cast<VectorType>(T);
+ // VECTOR [numelts, eltty]
+ Code = bitc::TYPE_CODE_VECTOR;
+ TypeVals.push_back(VT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+ break;
+ }
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+ TypeVals.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+ switch (GV->getLinkage()) {
+ case GlobalValue::ExternalLinkage: return 0;
+ case GlobalValue::WeakAnyLinkage: return 1;
+ case GlobalValue::AppendingLinkage: return 2;
+ case GlobalValue::InternalLinkage: return 3;
+ case GlobalValue::LinkOnceAnyLinkage: return 4;
+ case GlobalValue::DLLImportLinkage: return 5;
+ case GlobalValue::DLLExportLinkage: return 6;
+ case GlobalValue::ExternalWeakLinkage: return 7;
+ case GlobalValue::CommonLinkage: return 8;
+ case GlobalValue::PrivateLinkage: return 9;
+ case GlobalValue::WeakODRLinkage: return 10;
+ case GlobalValue::LinkOnceODRLinkage: return 11;
+ case GlobalValue::AvailableExternallyLinkage: return 12;
+ case GlobalValue::LinkerPrivateLinkage: return 13;
+ case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::LinkOnceODRAutoHideLinkage: return 15;
+ }
+ llvm_unreachable("Invalid linkage");
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ case GlobalValue::DefaultVisibility: return 0;
+ case GlobalValue::HiddenVisibility: return 1;
+ case GlobalValue::ProtectedVisibility: return 2;
+ }
+ llvm_unreachable("Invalid visibility");
+}
+
+static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) {
+ switch (GV->getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal: return 0;
+ case GlobalVariable::GeneralDynamicTLSModel: return 1;
+ case GlobalVariable::LocalDynamicTLSModel: return 2;
+ case GlobalVariable::InitialExecTLSModel: return 3;
+ case GlobalVariable::LocalExecTLSModel: return 4;
+ }
+ llvm_unreachable("Invalid TLS model");
+}
+
+// Emit top-level description of module, including target triple, inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ // Emit various pieces of data attached to a module.
+ if (!M->getTargetTriple().empty())
+ WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
+ 0/*TODO*/, Stream);
+ if (!M->getDataLayout().empty())
+ WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(),
+ 0/*TODO*/, Stream);
+ if (!M->getModuleInlineAsm().empty())
+ WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+ 0/*TODO*/, Stream);
+
+ // Emit information about sections and GC, computing how many there are. Also
+ // compute the maximum alignment value.
+ std::map<std::string, unsigned> SectionMap;
+ std::map<std::string, unsigned> GCMap;
+ unsigned MaxAlignment = 0;
+ unsigned MaxGlobalType = 0;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
+ MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
+ if (GV->hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[GV->getSection()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ }
+ }
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ MaxAlignment = std::max(MaxAlignment, F->getAlignment());
+ if (F->hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[F->getSection()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ }
+ if (F->hasGC()) {
+ // Same for GC names.
+ unsigned &Entry = GCMap[F->getGC()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+ 0/*TODO*/, Stream);
+ Entry = GCMap.size();
+ }
+ }
+ }
+
+ // Emit abbrev for globals, now that we know # sections and max alignment.
+ unsigned SimpleGVarAbbrev = 0;
+ if (!M->global_empty()) {
+ // Add an abbrev for common globals with no visibility or thread localness.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxGlobalType+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constant.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Linkage.
+ if (MaxAlignment == 0) // Alignment.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else {
+ unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxEncAlignment+1)));
+ }
+ if (SectionMap.empty()) // Section.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(SectionMap.size()+1)));
+ // Don't bother emitting vis + thread local.
+ SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ // Emit the global variable information.
+ SmallVector<unsigned, 64> Vals;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ unsigned AbbrevToUse = 0;
+
+ // GLOBALVAR: [type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
+ Vals.push_back(VE.getTypeID(GV->getType()));
+ Vals.push_back(GV->isConstant());
+ Vals.push_back(GV->isDeclaration() ? 0 :
+ (VE.getValueID(GV->getInitializer()) + 1));
+ Vals.push_back(getEncodedLinkage(GV));
+ Vals.push_back(Log2_32(GV->getAlignment())+1);
+ Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
+ if (GV->isThreadLocal() ||
+ GV->getVisibility() != GlobalValue::DefaultVisibility ||
+ GV->hasUnnamedAddr() || GV->isExternallyInitialized()) {
+ Vals.push_back(getEncodedVisibility(GV));
+ Vals.push_back(getEncodedThreadLocalMode(GV));
+ Vals.push_back(GV->hasUnnamedAddr());
+ Vals.push_back(GV->isExternallyInitialized());
+ } else {
+ AbbrevToUse = SimpleGVarAbbrev;
+ }
+
+ Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the function proto information.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
+ // section, visibility, gc, unnamed_addr]
+ Vals.push_back(VE.getTypeID(F->getType()));
+ Vals.push_back(F->getCallingConv());
+ Vals.push_back(F->isDeclaration());
+ Vals.push_back(getEncodedLinkage(F));
+ Vals.push_back(VE.getAttributeID(F->getAttributes()));
+ Vals.push_back(Log2_32(F->getAlignment())+1);
+ Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+ Vals.push_back(getEncodedVisibility(F));
+ Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+ Vals.push_back(F->hasUnnamedAddr());
+
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the alias information.
+ for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+ AI != E; ++AI) {
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
+ Vals.push_back(VE.getTypeID(AI->getType()));
+ Vals.push_back(VE.getValueID(AI->getAliasee()));
+ Vals.push_back(getEncodedLinkage(AI));
+ Vals.push_back(getEncodedVisibility(AI));
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+}
+
+static uint64_t GetOptimizationFlags(const Value *V) {
+ uint64_t Flags = 0;
+
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(V)) {
+ if (OBO->hasNoSignedWrap())
+ Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+ if (OBO->hasNoUnsignedWrap())
+ Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+ } else if (const PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(V)) {
+ if (PEO->isExact())
+ Flags |= 1 << bitc::PEO_EXACT;
+ } else if (const FPMathOperator *FPMO =
+ dyn_cast<const FPMathOperator>(V)) {
+ if (FPMO->hasUnsafeAlgebra())
+ Flags |= FastMathFlags::UnsafeAlgebra;
+ if (FPMO->hasNoNaNs())
+ Flags |= FastMathFlags::NoNaNs;
+ if (FPMO->hasNoInfs())
+ Flags |= FastMathFlags::NoInfs;
+ if (FPMO->hasNoSignedZeros())
+ Flags |= FastMathFlags::NoSignedZeros;
+ if (FPMO->hasAllowReciprocal())
+ Flags |= FastMathFlags::AllowReciprocal;
+ }
+
+ return Flags;
+}
+
+static void WriteMDNode(const MDNode *N,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream,
+ SmallVector<uint64_t, 64> &Record) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i)) {
+ Record.push_back(VE.getTypeID(N->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(N->getOperand(i)));
+ } else {
+ Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext())));
+ Record.push_back(0);
+ }
+ }
+ unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+ bitc::METADATA_NODE;
+ Stream.EmitRecord(MDCode, Record, 0);
+ Record.clear();
+}
+
+static void WriteModuleMetadata(const Module *M,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+ bool StartedMetadataBlock = false;
+ unsigned MDSAbbrev = 0;
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+
+ if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+ if (!N->isFunctionLocal() || !N->getFunction()) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+ WriteMDNode(N, VE, Stream, Record);
+ }
+ } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+ // Abbrev for METADATA_STRING.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ MDSAbbrev = Stream.EmitAbbrev(Abbv);
+ StartedMetadataBlock = true;
+ }
+
+ // Code: [strchar x N]
+ Record.append(MDS->begin(), MDS->end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
+ Record.clear();
+ }
+ }
+
+ // Write named metadata.
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+
+ // Write name.
+ StringRef Str = NMD->getName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ Record.push_back(Str[i]);
+ Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+ Record.clear();
+
+ // Write named metadata operands.
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(NMD->getOperand(i)));
+ Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
+ Record.clear();
+ }
+
+ if (StartedMetadataBlock)
+ Stream.ExitBlock();
+}
+
+static void WriteFunctionLocalMetadata(const Function &F,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ bool StartedMetadataBlock = false;
+ SmallVector<uint64_t, 64> Record;
+ const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (const MDNode *N = Vals[i])
+ if (N->isFunctionLocal() && N->getFunction() == &F) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+ WriteMDNode(N, VE, Stream, Record);
+ }
+
+ if (StartedMetadataBlock)
+ Stream.ExitBlock();
+}
+
+static void WriteMetadataAttachment(const Function &F,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata attachments
+ // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ MDs.clear();
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+
+ // If no metadata, ignore instruction.
+ if (MDs.empty()) continue;
+
+ Record.push_back(VE.getInstructionID(I));
+
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ Record.push_back(MDs[i].first);
+ Record.push_back(VE.getValueID(MDs[i].second));
+ }
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata kinds
+ // METADATA_KIND - [n x [id, name]]
+ SmallVector<StringRef, 8> Names;
+ M->getMDKindNames(Names);
+
+ if (Names.empty()) return;
+
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+ for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
+ Record.push_back(MDKindID);
+ StringRef KName = Names[MDKindID];
+ Record.append(KName.begin(), KName.end());
+
+ Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
+ if ((int64_t)V >= 0)
+ Vals.push_back(V << 1);
+ else
+ Vals.push_back((-V << 1) | 1);
+}
+
+static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
+ unsigned &Code, unsigned &AbbrevToUse, const APInt &Val,
+ bool EmitSizeForWideNumbers = false
+ ) {
+ if (Val.getBitWidth() <= 64) {
+ uint64_t V = Val.getSExtValue();
+ emitSignedInt64(Vals, V);
+ Code = bitc::CST_CODE_INTEGER;
+ AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+ } else {
+ // Wide integers, > 64 bits in size.
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NWords = Val.getActiveWords();
+
+ if (EmitSizeForWideNumbers)
+ Vals.push_back(NWords);
+
+ const uint64_t *RawWords = Val.getRawData();
+ for (unsigned i = 0; i != NWords; ++i) {
+ emitSignedInt64(Vals, RawWords[i]);
+ }
+ Code = bitc::CST_CODE_WIDE_INTEGER;
+ }
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream, bool isGlobal) {
+ if (FirstVal == LastVal) return;
+
+ Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+ unsigned AggregateAbbrev = 0;
+ unsigned String8Abbrev = 0;
+ unsigned CString7Abbrev = 0;
+ unsigned CString6Abbrev = 0;
+ // If this is a constant pool for the module, emit module-specific abbrevs.
+ if (isGlobal) {
+ // Abbrev for CST_CODE_AGGREGATE.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
+ AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for CST_CODE_STRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ String8Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ CString7Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ CString6Abbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ SmallVector<uint64_t, 64> Record;
+
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+ Type *LastTy = 0;
+ for (unsigned i = FirstVal; i != LastVal; ++i) {
+ const Value *V = Vals[i].first;
+ // If we need to switch types, do so now.
+ if (V->getType() != LastTy) {
+ LastTy = V->getType();
+ Record.push_back(VE.getTypeID(LastTy));
+ Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+ CONSTANTS_SETTYPE_ABBREV);
+ Record.clear();
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Record.push_back(unsigned(IA->hasSideEffects()) |
+ unsigned(IA->isAlignStack()) << 1 |
+ unsigned(IA->getDialect()&1) << 2);
+
+ // Add the asm string.
+ const std::string &AsmStr = IA->getAsmString();
+ Record.push_back(AsmStr.size());
+ for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+ Record.push_back(AsmStr[i]);
+
+ // Add the constraint string.
+ const std::string &ConstraintStr = IA->getConstraintString();
+ Record.push_back(ConstraintStr.size());
+ for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+ Record.push_back(ConstraintStr[i]);
+ Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+ Record.clear();
+ continue;
+ }
+ const Constant *C = cast<Constant>(V);
+ unsigned Code = -1U;
+ unsigned AbbrevToUse = 0;
+ if (C->isNullValue()) {
+ Code = bitc::CST_CODE_NULL;
+ } else if (isa<UndefValue>(C)) {
+ Code = bitc::CST_CODE_UNDEF;
+ } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+ EmitAPInt(Record, Code, AbbrevToUse, IV->getValue());
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Code = bitc::CST_CODE_FLOAT;
+ Type *Ty = CFP->getType();
+ if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
+ Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ } else if (Ty->isX86_FP80Ty()) {
+ // api needed to prevent premature destruction
+ // bits are not in the same order as a normal i80 APInt, compensate.
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back((p[1] << 48) | (p[0] >> 16));
+ Record.push_back(p[0] & 0xffffLL);
+ } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back(p[0]);
+ Record.push_back(p[1]);
+ } else {
+ assert (0 && "Unknown FP type!");
+ }
+ } else if (isa<ConstantDataSequential>(C) &&
+ cast<ConstantDataSequential>(C)->isString()) {
+ const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
+ // Emit constant strings specially.
+ unsigned NumElts = Str->getNumElements();
+ // If this is a null-terminated string, use the denser CSTRING encoding.
+ if (Str->isCString()) {
+ Code = bitc::CST_CODE_CSTRING;
+ --NumElts; // Don't encode the null, which isn't allowed by char6.
+ } else {
+ Code = bitc::CST_CODE_STRING;
+ AbbrevToUse = String8Abbrev;
+ }
+ bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+ bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned char V = Str->getElementAsInteger(i);
+ Record.push_back(V);
+ isCStr7 &= (V & 128) == 0;
+ if (isCStrChar6)
+ isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+ }
+
+ if (isCStrChar6)
+ AbbrevToUse = CString6Abbrev;
+ else if (isCStr7)
+ AbbrevToUse = CString7Abbrev;
+ } else if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ Code = bitc::CST_CODE_DATA;
+ Type *EltTy = CDS->getType()->getElementType();
+ if (isa<IntegerType>(EltTy)) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+ Record.push_back(CDS->getElementAsInteger(i));
+ } else if (EltTy->isFloatTy()) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union { float F; uint32_t I; };
+ F = CDS->getElementAsFloat(i);
+ Record.push_back(I);
+ }
+ } else {
+ assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union { double F; uint64_t I; };
+ F = CDS->getElementAsDouble(i);
+ Record.push_back(I);
+ }
+ }
+ } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ Code = bitc::CST_CODE_AGGREGATE;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ AbbrevToUse = AggregateAbbrev;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ default:
+ if (Instruction::isCast(CE->getOpcode())) {
+ Code = bitc::CST_CODE_CE_CAST;
+ Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+ } else {
+ assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+ Code = bitc::CST_CODE_CE_BINOP;
+ Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ uint64_t Flags = GetOptimizationFlags(CE);
+ if (Flags != 0)
+ Record.push_back(Flags);
+ }
+ break;
+ case Instruction::GetElementPtr:
+ Code = bitc::CST_CODE_CE_GEP;
+ if (cast<GEPOperator>(C)->isInBounds())
+ Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ }
+ break;
+ case Instruction::Select:
+ Code = bitc::CST_CODE_CE_SELECT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::CST_CODE_CE_EXTRACTELT;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::CST_CODE_CE_INSERTELT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ // If the return type and argument types are the same, this is a
+ // standard shufflevector instruction. If the types are different,
+ // then the shuffle is widening or truncating the input vectors, and
+ // the argument type must also be encoded.
+ if (C->getType() == C->getOperand(0)->getType()) {
+ Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+ } else {
+ Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ }
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ Code = bitc::CST_CODE_CE_CMP;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(CE->getPredicate());
+ break;
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Code = bitc::CST_CODE_BLOCKADDRESS;
+ Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+ Record.push_back(VE.getValueID(BA->getFunction()));
+ Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+ } else {
+#ifndef NDEBUG
+ C->dump();
+#endif
+ llvm_unreachable("Unknown constant!");
+ }
+ Stream.EmitRecord(Code, Record, AbbrevToUse);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteModuleConstants(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+ // Find the first constant to emit, which is the first non-globalvalue value.
+ // We know globalvalues have been emitted by WriteModuleInfo.
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ if (!isa<GlobalValue>(Vals[i].first)) {
+ WriteConstants(i, Vals.size(), VE, Stream, true);
+ return;
+ }
+ }
+}
+
+/// PushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references. However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals. If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID. The value ID that is written is encoded relative to the InstID.
+static bool PushValueAndType(const Value *V, unsigned InstID,
+ SmallVector<unsigned, 64> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ // Make encoding relative to the InstID.
+ Vals.push_back(InstID - ValID);
+ if (ValID >= InstID) {
+ Vals.push_back(VE.getTypeID(V->getType()));
+ return true;
+ }
+ return false;
+}
+
+/// pushValue - Like PushValueAndType, but where the type of the value is
+/// omitted (perhaps it was already encoded in an earlier operand).
+static void pushValue(const Value *V, unsigned InstID,
+ SmallVector<unsigned, 64> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ Vals.push_back(InstID - ValID);
+}
+
+static void pushValue64(const Value *V, unsigned InstID,
+ SmallVector<uint64_t, 128> &Vals,
+ ValueEnumerator &VE) {
+ uint64_t ValID = VE.getValueID(V);
+ Vals.push_back(InstID - ValID);
+}
+
+static void pushValueSigned(const Value *V, unsigned InstID,
+ SmallVector<uint64_t, 128> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+ emitSignedInt64(Vals, diff);
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+ ValueEnumerator &VE, BitstreamWriter &Stream,
+ SmallVector<unsigned, 64> &Vals) {
+ unsigned Code = 0;
+ unsigned AbbrevToUse = 0;
+ VE.setInstructionID(&I);
+ switch (I.getOpcode()) {
+ default:
+ if (Instruction::isCast(I.getOpcode())) {
+ Code = bitc::FUNC_CODE_INST_CAST;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+ } else {
+ assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+ Code = bitc::FUNC_CODE_INST_BINOP;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+ uint64_t Flags = GetOptimizationFlags(&I);
+ if (Flags != 0) {
+ if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
+ AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+ Vals.push_back(Flags);
+ }
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ Code = bitc::FUNC_CODE_INST_GEP;
+ if (cast<GEPOperator>(&I)->isInBounds())
+ Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractValue: {
+ Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::InsertValue: {
+ Code = bitc::FUNC_CODE_INST_INSERTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::Select:
+ Code = bitc::FUNC_CODE_INST_VSELECT;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ pushValue(I.getOperand(2), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::FUNC_CODE_INST_INSERTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ pushValue(I.getOperand(2), InstID, Vals, VE);
+ break;
+ case Instruction::ShuffleVector:
+ Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ pushValue(I.getOperand(2), InstID, Vals, VE);
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ // compare returning Int1Ty or vector of Int1Ty
+ Code = bitc::FUNC_CODE_INST_CMP2;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ Vals.push_back(cast<CmpInst>(I).getPredicate());
+ break;
+
+ case Instruction::Ret:
+ {
+ Code = bitc::FUNC_CODE_INST_RET;
+ unsigned NumOperands = I.getNumOperands();
+ if (NumOperands == 0)
+ AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+ else if (NumOperands == 1) {
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+ } else {
+ for (unsigned i = 0, e = NumOperands; i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ }
+ }
+ break;
+ case Instruction::Br:
+ {
+ Code = bitc::FUNC_CODE_INST_BR;
+ const BranchInst &II = cast<BranchInst>(I);
+ Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+ if (II.isConditional()) {
+ Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+ pushValue(II.getCondition(), InstID, Vals, VE);
+ }
+ }
+ break;
+ case Instruction::Switch:
+ {
+ // Redefine Vals, since here we need to use 64 bit values
+ // explicitly to store large APInt numbers.
+ SmallVector<uint64_t, 128> Vals64;
+
+ Code = bitc::FUNC_CODE_INST_SWITCH;
+ const SwitchInst &SI = cast<SwitchInst>(I);
+
+ uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16);
+ Vals64.push_back(SwitchRecordHeader);
+
+ Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
+ pushValue64(SI.getCondition(), InstID, Vals64, VE);
+ Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
+ Vals64.push_back(SI.getNumCases());
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const IntegersSubset& CaseRanges = i.getCaseValueEx();
+ unsigned Code, Abbrev; // will unused.
+
+ if (CaseRanges.isSingleNumber()) {
+ Vals64.push_back(1/*NumItems = 1*/);
+ Vals64.push_back(true/*IsSingleNumber = true*/);
+ EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true);
+ } else {
+
+ Vals64.push_back(CaseRanges.getNumItems());
+
+ if (CaseRanges.isSingleNumbersOnly()) {
+ for (unsigned ri = 0, rn = CaseRanges.getNumItems();
+ ri != rn; ++ri) {
+
+ Vals64.push_back(true/*IsSingleNumber = true*/);
+
+ EmitAPInt(Vals64, Code, Abbrev,
+ CaseRanges.getSingleNumber(ri), true);
+ }
+ } else
+ for (unsigned ri = 0, rn = CaseRanges.getNumItems();
+ ri != rn; ++ri) {
+ IntegersSubset::Range r = CaseRanges.getItem(ri);
+ bool IsSingleNumber = CaseRanges.isSingleNumber(ri);
+
+ Vals64.push_back(IsSingleNumber);
+
+ EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true);
+ if (!IsSingleNumber)
+ EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true);
+ }
+ }
+ Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
+ }
+
+ Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+
+ // Also do expected action - clear external Vals collection:
+ Vals.clear();
+ return;
+ }
+ break;
+ case Instruction::IndirectBr:
+ Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ // Encode the address operand as relative, but not the basic blocks.
+ pushValue(I.getOperand(0), InstID, Vals, VE);
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+
+ case Instruction::Invoke: {
+ const InvokeInst *II = cast<InvokeInst>(&I);
+ const Value *Callee(II->getCalledValue());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Code = bitc::FUNC_CODE_INST_INVOKE;
+
+ Vals.push_back(VE.getAttributeID(II->getAttributes()));
+ Vals.push_back(II->getCallingConv());
+ Vals.push_back(VE.getValueID(II->getNormalDest()));
+ Vals.push_back(VE.getValueID(II->getUnwindDest()));
+ PushValueAndType(Callee, InstID, Vals, VE);
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ pushValue(I.getOperand(i), InstID, Vals, VE); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
+ i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
+ }
+ break;
+ }
+ case Instruction::Resume:
+ Code = bitc::FUNC_CODE_INST_RESUME;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
+ case Instruction::Unreachable:
+ Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+ AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+ break;
+
+ case Instruction::PHI: {
+ const PHINode &PN = cast<PHINode>(I);
+ Code = bitc::FUNC_CODE_INST_PHI;
+ // With the newer instruction encoding, forward references could give
+ // negative valued IDs. This is most common for PHIs, so we use
+ // signed VBRs.
+ SmallVector<uint64_t, 128> Vals64;
+ Vals64.push_back(VE.getTypeID(PN.getType()));
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE);
+ Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+ }
+ // Emit a Vals64 vector and exit.
+ Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+ Vals64.clear();
+ return;
+ }
+
+ case Instruction::LandingPad: {
+ const LandingPadInst &LP = cast<LandingPadInst>(I);
+ Code = bitc::FUNC_CODE_INST_LANDINGPAD;
+ Vals.push_back(VE.getTypeID(LP.getType()));
+ PushValueAndType(LP.getPersonalityFn(), InstID, Vals, VE);
+ Vals.push_back(LP.isCleanup());
+ Vals.push_back(LP.getNumClauses());
+ for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
+ if (LP.isCatch(I))
+ Vals.push_back(LandingPadInst::Catch);
+ else
+ Vals.push_back(LandingPadInst::Filter);
+ PushValueAndType(LP.getClause(I), InstID, Vals, VE);
+ }
+ break;
+ }
+
+ case Instruction::Alloca:
+ Code = bitc::FUNC_CODE_INST_ALLOCA;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+ Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+ break;
+
+ case Instruction::Load:
+ if (cast<LoadInst>(I).isAtomic()) {
+ Code = bitc::FUNC_CODE_INST_LOADATOMIC;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ } else {
+ Code = bitc::FUNC_CODE_INST_LOAD;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr
+ AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+ }
+ Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+ Vals.push_back(cast<LoadInst>(I).isVolatile());
+ if (cast<LoadInst>(I).isAtomic()) {
+ Vals.push_back(GetEncodedOrdering(cast<LoadInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<LoadInst>(I).getSynchScope()));
+ }
+ break;
+ case Instruction::Store:
+ if (cast<StoreInst>(I).isAtomic())
+ Code = bitc::FUNC_CODE_INST_STOREATOMIC;
+ else
+ Code = bitc::FUNC_CODE_INST_STORE;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr
+ pushValue(I.getOperand(0), InstID, Vals, VE); // val.
+ Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+ Vals.push_back(cast<StoreInst>(I).isVolatile());
+ if (cast<StoreInst>(I).isAtomic()) {
+ Vals.push_back(GetEncodedOrdering(cast<StoreInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<StoreInst>(I).getSynchScope()));
+ }
+ break;
+ case Instruction::AtomicCmpXchg:
+ Code = bitc::FUNC_CODE_INST_CMPXCHG;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
+ pushValue(I.getOperand(1), InstID, Vals, VE); // cmp.
+ pushValue(I.getOperand(2), InstID, Vals, VE); // newval.
+ Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
+ Vals.push_back(GetEncodedOrdering(
+ cast<AtomicCmpXchgInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(
+ cast<AtomicCmpXchgInst>(I).getSynchScope()));
+ break;
+ case Instruction::AtomicRMW:
+ Code = bitc::FUNC_CODE_INST_ATOMICRMW;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
+ pushValue(I.getOperand(1), InstID, Vals, VE); // val.
+ Vals.push_back(GetEncodedRMWOperation(
+ cast<AtomicRMWInst>(I).getOperation()));
+ Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
+ Vals.push_back(GetEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(
+ cast<AtomicRMWInst>(I).getSynchScope()));
+ break;
+ case Instruction::Fence:
+ Code = bitc::FUNC_CODE_INST_FENCE;
+ Vals.push_back(GetEncodedOrdering(cast<FenceInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<FenceInst>(I).getSynchScope()));
+ break;
+ case Instruction::Call: {
+ const CallInst &CI = cast<CallInst>(I);
+ PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ Code = bitc::FUNC_CODE_INST_CALL;
+
+ Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+ Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+ PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ // Check for labels (can happen with asm labels).
+ if (FTy->getParamType(i)->isLabelTy())
+ Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
+ else
+ pushValue(CI.getArgOperand(i), InstID, Vals, VE); // fixed param.
+ }
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
+ i != e; ++i)
+ PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs
+ }
+ break;
+ }
+ case Instruction::VAArg:
+ Code = bitc::FUNC_CODE_INST_VAARG;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty
+ pushValue(I.getOperand(0), InstID, Vals, VE); // valist.
+ Vals.push_back(VE.getTypeID(I.getType())); // restype.
+ break;
+ }
+
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+ Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ if (VST.empty()) return;
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ // FIXME: Set up the abbrev, we know how many values there are!
+ // FIXME: We know if the type names can use 7-bit ascii.
+ SmallVector<unsigned, 64> NameVals;
+
+ for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+ SI != SE; ++SI) {
+
+ const ValueName &Name = *SI;
+
+ // Figure out the encoding to use for the name.
+ bool is7Bit = true;
+ bool isChar6 = true;
+ for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+ C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128) {
+ is7Bit = false;
+ break; // don't bother scanning the rest.
+ }
+ }
+
+ unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+ // VST_ENTRY: [valueid, namechar x N]
+ // VST_BBENTRY: [bbid, namechar x N]
+ unsigned Code;
+ if (isa<BasicBlock>(SI->getValue())) {
+ Code = bitc::VST_CODE_BBENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else {
+ Code = bitc::VST_CODE_ENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_ENTRY_6_ABBREV;
+ else if (is7Bit)
+ AbbrevToUse = VST_ENTRY_7_ABBREV;
+ }
+
+ NameVals.push_back(VE.getValueID(SI->getValue()));
+ for (const char *P = Name.getKeyData(),
+ *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+ NameVals.push_back((unsigned char)*P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+ VE.incorporateFunction(F);
+
+ SmallVector<unsigned, 64> Vals;
+
+ // Emit the number of basic blocks, so the reader can create them ahead of
+ // time.
+ Vals.push_back(VE.getBasicBlocks().size());
+ Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+ Vals.clear();
+
+ // If there are function-local constants, emit them now.
+ unsigned CstStart, CstEnd;
+ VE.getFunctionConstantRange(CstStart, CstEnd);
+ WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+ // If there is function-local metadata, emit it now.
+ WriteFunctionLocalMetadata(F, VE, Stream);
+
+ // Keep a running idea of what the instruction ID is.
+ unsigned InstID = CstEnd;
+
+ bool NeedsMetadataAttachment = false;
+
+ DebugLoc LastDL;
+
+ // Finally, emit all the instructions, in order.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ WriteInstruction(*I, InstID, VE, Stream, Vals);
+
+ if (!I->getType()->isVoidTy())
+ ++InstID;
+
+ // If the instruction has metadata, write a metadata attachment later.
+ NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+
+ // If the instruction has a debug location, emit it.
+ DebugLoc DL = I->getDebugLoc();
+ if (DL.isUnknown()) {
+ // nothing todo.
+ } else if (DL == LastDL) {
+ // Just repeat the same debug loc as last time.
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+ } else {
+ MDNode *Scope, *IA;
+ DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
+
+ Vals.push_back(DL.getLine());
+ Vals.push_back(DL.getCol());
+ Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
+ Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+ Vals.clear();
+
+ LastDL = DL;
+ }
+ }
+
+ // Emit names for all the instructions etc.
+ WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+ if (NeedsMetadataAttachment)
+ WriteMetadataAttachment(F, VE, Stream);
+ VE.purgeFunction();
+ Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ // We only want to emit block info records for blocks that have multiple
+ // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+ // Other blocks can define their abbrevs inline.
+ Stream.EnterBlockInfoBlock(2);
+
+ { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_8_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // 7-bit fixed width VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_7_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_6_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_BBENTRY_6_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+
+
+ { // SETTYPE abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_SETTYPE_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // INTEGER abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_INTEGER_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // CE_CAST abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_CE_CAST_Abbrev)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // NULL abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_NULL_Abbrev)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ // FIXME: This should only use space for first class types!
+
+ { // INST_LOAD abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_LOAD_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_BINOP_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_CAST abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_CAST_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ Stream.ExitBlock();
+}
+
+// Sort the Users based on the order in which the reader parses the bitcode
+// file.
+static bool bitcodereader_order(const User *lhs, const User *rhs) {
+ // TODO: Implement.
+ return true;
+}
+
+static void WriteUseList(const Value *V, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+
+ // One or zero uses can't get out of order.
+ if (V->use_empty() || V->hasNUses(1))
+ return;
+
+ // Make a copy of the in-memory use-list for sorting.
+ unsigned UseListSize = std::distance(V->use_begin(), V->use_end());
+ SmallVector<const User*, 8> UseList;
+ UseList.reserve(UseListSize);
+ for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
+ I != E; ++I) {
+ const User *U = *I;
+ UseList.push_back(U);
+ }
+
+ // Sort the copy based on the order read by the BitcodeReader.
+ std::sort(UseList.begin(), UseList.end(), bitcodereader_order);
+
+ // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
+ // sorted list (i.e., the expected BitcodeReader in-memory use-list).
+
+ // TODO: Emit the USELIST_CODE_ENTRYs.
+}
+
+static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ VE.incorporateFunction(*F);
+
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI)
+ WriteUseList(AI, VE, Stream);
+ for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
+ ++BB) {
+ WriteUseList(BB, VE, Stream);
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ WriteUseList(II, VE, Stream);
+ for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ WriteUseList(*OI, VE, Stream);
+ }
+ }
+ }
+ VE.purgeFunction();
+}
+
+// Emit use-lists.
+static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+
+ // XXX: this modifies the module, but in a way that should never change the
+ // behavior of any pass or codegen in LLVM. The problem is that GVs may
+ // contain entries in the use_list that do not exist in the Module and are
+ // not stored in the .bc file.
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ I->removeDeadConstantUsers();
+
+ // Write the global variables.
+ for (Module::const_global_iterator GI = M->global_begin(),
+ GE = M->global_end(); GI != GE; ++GI) {
+ WriteUseList(GI, VE, Stream);
+
+ // Write the global variable initializers.
+ if (GI->hasInitializer())
+ WriteUseList(GI->getInitializer(), VE, Stream);
+ }
+
+ // Write the functions.
+ for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+ WriteUseList(FI, VE, Stream);
+ if (!FI->isDeclaration())
+ WriteFunctionUseList(FI, VE, Stream);
+ }
+
+ // Write the aliases.
+ for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ WriteUseList(AI, VE, Stream);
+ WriteUseList(AI->getAliasee(), VE, Stream);
+ }
+
+ Stream.ExitBlock();
+}
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ SmallVector<unsigned, 1> Vals;
+ unsigned CurVersion = 1;
+ Vals.push_back(CurVersion);
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+
+ // Analyze the module, enumerating globals, functions, etc.
+ ValueEnumerator VE(M);
+
+ // Emit blockinfo, which defines the standard abbreviations etc.
+ WriteBlockInfo(VE, Stream);
+
+ // Emit information about attribute groups.
+ WriteAttributeGroupTable(VE, Stream);
+
+ // Emit information about parameter attributes.
+ WriteAttributeTable(VE, Stream);
+
+ // Emit information describing all of the types in the module.
+ WriteTypeTable(VE, Stream);
+
+ // Emit top-level description of module, including target triple, inline asm,
+ // descriptors for global variables, and function prototype info.
+ WriteModuleInfo(M, VE, Stream);
+
+ // Emit constants.
+ WriteModuleConstants(VE, Stream);
+
+ // Emit metadata.
+ WriteModuleMetadata(M, VE, Stream);
+
+ // Emit metadata.
+ WriteModuleMetadataStore(M, Stream);
+
+ // Emit names for globals/functions etc.
+ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+ // Emit use-lists.
+ if (EnablePreserveUseListOrdering)
+ WriteModuleUseLists(M, VE, Stream);
+
+ // Emit function bodies.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+ if (!F->isDeclaration())
+ WriteFunction(*F, VE, Stream);
+
+ Stream.ExitBlock();
+}
+
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver. To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+/// uint32_t Magic; // 0x0B17C0DE
+/// uint32_t Version; // Version, currently always 0.
+/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+/// uint32_t BitcodeSize; // Size of traditional bitcode file.
+/// uint32_t CPUType; // CPU specifier.
+/// ... potentially more later ...
+/// };
+enum {
+ DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+ DarwinBCHeaderSize = 5*4
+};
+
+static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl<char> &Buffer,
+ uint32_t &Position) {
+ Buffer[Position + 0] = (unsigned char) (Value >> 0);
+ Buffer[Position + 1] = (unsigned char) (Value >> 8);
+ Buffer[Position + 2] = (unsigned char) (Value >> 16);
+ Buffer[Position + 3] = (unsigned char) (Value >> 24);
+ Position += 4;
+}
+
+static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
+ const Triple &TT) {
+ unsigned CPUType = ~0U;
+
+ // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+ // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+ // number from /usr/include/mach/machine.h. It is ok to reproduce the
+ // specific constants here because they are implicitly part of the Darwin ABI.
+ enum {
+ DARWIN_CPU_ARCH_ABI64 = 0x01000000,
+ DARWIN_CPU_TYPE_X86 = 7,
+ DARWIN_CPU_TYPE_ARM = 12,
+ DARWIN_CPU_TYPE_POWERPC = 18
+ };
+
+ Triple::ArchType Arch = TT.getArch();
+ if (Arch == Triple::x86_64)
+ CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+ else if (Arch == Triple::x86)
+ CPUType = DARWIN_CPU_TYPE_X86;
+ else if (Arch == Triple::ppc)
+ CPUType = DARWIN_CPU_TYPE_POWERPC;
+ else if (Arch == Triple::ppc64)
+ CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+ else if (Arch == Triple::arm || Arch == Triple::thumb)
+ CPUType = DARWIN_CPU_TYPE_ARM;
+
+ // Traditional Bitcode starts after header.
+ assert(Buffer.size() >= DarwinBCHeaderSize &&
+ "Expected header size to be reserved");
+ unsigned BCOffset = DarwinBCHeaderSize;
+ unsigned BCSize = Buffer.size()-DarwinBCHeaderSize;
+
+ // Write the magic and version.
+ unsigned Position = 0;
+ WriteInt32ToBuffer(0x0B17C0DE , Buffer, Position);
+ WriteInt32ToBuffer(0 , Buffer, Position); // Version.
+ WriteInt32ToBuffer(BCOffset , Buffer, Position);
+ WriteInt32ToBuffer(BCSize , Buffer, Position);
+ WriteInt32ToBuffer(CPUType , Buffer, Position);
+
+ // If the file is not a multiple of 16 bytes, insert dummy padding.
+ while (Buffer.size() & 15)
+ Buffer.push_back(0);
+}
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+ SmallVector<char, 0> Buffer;
+ Buffer.reserve(256*1024);
+
+ // If this is darwin or another generic macho target, reserve space for the
+ // header.
+ Triple TT(M->getTargetTriple());
+ if (TT.isOSDarwin())
+ Buffer.insert(Buffer.begin(), DarwinBCHeaderSize, 0);
+
+ // Emit the module into the buffer.
+ {
+ BitstreamWriter Stream(Buffer);
+
+ // Emit the file header.
+ Stream.Emit((unsigned)'B', 8);
+ Stream.Emit((unsigned)'C', 8);
+ Stream.Emit(0x0, 4);
+ Stream.Emit(0xC, 4);
+ Stream.Emit(0xE, 4);
+ Stream.Emit(0xD, 4);
+
+ // Emit the module.
+ WriteModule(M, Stream);
+ }
+
+ if (TT.isOSDarwin())
+ EmitDarwinBCHeaderAndTrailer(Buffer, TT);
+
+ // Write the generated bitstream to "Out".
+ Out.write((char*)&Buffer.front(), Buffer.size());
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
new file mode 100644
index 000000000000..e5e76e29bd2d
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -0,0 +1,41 @@
+//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ class WriteBitcodePass : public ModulePass {
+ raw_ostream &OS; // raw_ostream to print on
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit WriteBitcodePass(raw_ostream &o)
+ : ModulePass(ID), OS(o) {}
+
+ const char *getPassName() const { return "Bitcode Writer"; }
+
+ bool runOnModule(Module &M) {
+ WriteBitcodeToFile(&M, OS);
+ return false;
+ }
+ };
+}
+
+char WriteBitcodePass::ID = 0;
+
+/// createBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
+ return new WriteBitcodePass(Str);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
new file mode 100644
index 000000000000..8bac6da89285
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -0,0 +1,540 @@
+//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueEnumerator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
+ return V.first->getType()->isIntOrIntVectorTy();
+}
+
+/// ValueEnumerator - Enumerate module-level information.
+ValueEnumerator::ValueEnumerator(const Module *M) {
+ // Enumerate the global variables.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ EnumerateValue(I);
+
+ // Enumerate the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ EnumerateValue(I);
+ EnumerateAttributes(cast<Function>(I)->getAttributes());
+ }
+
+ // Enumerate the aliases.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ // Remember what is the cutoff between globalvalue's and other constants.
+ unsigned FirstConstant = Values.size();
+
+ // Enumerate the global variable initializers.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ EnumerateValue(I->getInitializer());
+
+ // Enumerate the aliasees.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I->getAliasee());
+
+ // Insert constants and metadata that are named at module level into the slot
+ // pool so that the module symbol table can refer to them...
+ EnumerateValueSymbolTable(M->getValueSymbolTable());
+ EnumerateNamedMetadata(M);
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+
+ // Enumerate types used by function bodies and argument lists.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ EnumerateType(I->getType());
+
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if (MDNode *MD = dyn_cast<MDNode>(*OI))
+ if (MD->isFunctionLocal() && MD->getFunction())
+ // These will get enumerated during function-incorporation.
+ continue;
+ EnumerateOperandType(*OI);
+ }
+ EnumerateType(I->getType());
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ EnumerateAttributes(CI->getAttributes());
+ else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+ EnumerateAttributes(II->getAttributes());
+
+ // Enumerate metadata attached with this instruction.
+ MDs.clear();
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i)
+ EnumerateMetadata(MDs[i].second);
+
+ if (!I->getDebugLoc().isUnknown()) {
+ MDNode *Scope, *IA;
+ I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+ if (Scope) EnumerateMetadata(Scope);
+ if (IA) EnumerateMetadata(IA);
+ }
+ }
+ }
+
+ // Optimize constant ordering.
+ OptimizeConstants(FirstConstant, Values.size());
+}
+
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+ InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+ assert(I != InstructionMap.end() && "Instruction is not mapped!");
+ return I->second;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+ InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+ if (isa<MDNode>(V) || isa<MDString>(V)) {
+ ValueMapType::const_iterator I = MDValueMap.find(V);
+ assert(I != MDValueMap.end() && "Value not in slotcalculator!");
+ return I->second-1;
+ }
+
+ ValueMapType::const_iterator I = ValueMap.find(V);
+ assert(I != ValueMap.end() && "Value not in slotcalculator!");
+ return I->second-1;
+}
+
+void ValueEnumerator::dump() const {
+ print(dbgs(), ValueMap, "Default");
+ dbgs() << '\n';
+ print(dbgs(), MDValueMap, "MetaData");
+ dbgs() << '\n';
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+ const char *Name) const {
+
+ OS << "Map Name: " << Name << "\n";
+ OS << "Size: " << Map.size() << "\n";
+ for (ValueMapType::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+
+ const Value *V = I->first;
+ if (V->hasName())
+ OS << "Value: " << V->getName();
+ else
+ OS << "Value: [null]\n";
+ V->dump();
+
+ OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ if (UI != V->use_begin())
+ OS << ",";
+ if((*UI)->hasName())
+ OS << " " << (*UI)->getName();
+ else
+ OS << " [null]";
+
+ }
+ OS << "\n\n";
+ }
+}
+
+// Optimize constant ordering.
+namespace {
+ struct CstSortPredicate {
+ ValueEnumerator &VE;
+ explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
+ bool operator()(const std::pair<const Value*, unsigned> &LHS,
+ const std::pair<const Value*, unsigned> &RHS) {
+ // Sort by plane.
+ if (LHS.first->getType() != RHS.first->getType())
+ return VE.getTypeID(LHS.first->getType()) <
+ VE.getTypeID(RHS.first->getType());
+ // Then by frequency.
+ return LHS.second > RHS.second;
+ }
+ };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+ if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+ CstSortPredicate P(*this);
+ std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+ // Ensure that integer and vector of integer constants are at the start of the
+ // constant pool. This is important so that GEP structure indices come before
+ // gep constant exprs.
+ std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+ isIntOrIntVectorValue);
+
+ // Rebuild the modified portion of ValueMap.
+ for (; CstStart != CstEnd; ++CstStart)
+ ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+ for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+ VI != VE; ++VI)
+ EnumerateValue(VI->getValue());
+}
+
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ EnumerateNamedMDNode(I);
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+ EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (isa<MDNode>(V) || isa<MDString>(V))
+ EnumerateMetadata(V);
+ else if (!isa<Instruction>(V) && !isa<Argument>(V))
+ EnumerateValue(V);
+ } else
+ EnumerateType(Type::getVoidTy(N->getContext()));
+ }
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+ assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
+
+ // Enumerate the type of this value.
+ EnumerateType(MD->getType());
+
+ const MDNode *N = dyn_cast<MDNode>(MD);
+
+ // In the module-level pass, skip function-local nodes themselves, but
+ // do walk their operands.
+ if (N && N->isFunctionLocal() && N->getFunction()) {
+ EnumerateMDNodeOperands(N);
+ return;
+ }
+
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[MD];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
+ return;
+ }
+ MDValues.push_back(std::make_pair(MD, 1U));
+ MDValueID = MDValues.size();
+
+ // Enumerate all non-function-local operands.
+ if (N)
+ EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+ assert(N->isFunctionLocal() && N->getFunction() &&
+ "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
+
+ // Enumerate the type of this value.
+ EnumerateType(N->getType());
+
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[N];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
+ return;
+ }
+ MDValues.push_back(std::make_pair(N, 1U));
+ MDValueID = MDValues.size();
+
+ // To incoroporate function-local information visit all function-local
+ // MDNodes and all function-local values they reference.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *O = dyn_cast<MDNode>(V)) {
+ if (O->isFunctionLocal() && O->getFunction())
+ EnumerateFunctionLocalMetadata(O);
+ } else if (isa<Instruction>(V) || isa<Argument>(V))
+ EnumerateValue(V);
+ }
+
+ // Also, collect all function-local MDNodes for easy access.
+ FunctionLocalMDs.push_back(N);
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+ assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+ assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+ "EnumerateValue doesn't handle Metadata!");
+
+ // Check to see if it's already in!
+ unsigned &ValueID = ValueMap[V];
+ if (ValueID) {
+ // Increment use count.
+ Values[ValueID-1].second++;
+ return;
+ }
+
+ // Enumerate the type of this value.
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<GlobalValue>(C)) {
+ // Initializers for globals are handled explicitly elsewhere.
+ } else if (C->getNumOperands()) {
+ // If a constant has operands, enumerate them. This makes sure that if a
+ // constant has uses (for example an array of const ints), that they are
+ // inserted also.
+
+ // We prefer to enumerate them with values before we enumerate the user
+ // itself. This makes it more likely that we can avoid forward references
+ // in the reader. We know that there can be no cycles in the constants
+ // graph that don't go through a global variable.
+ for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+ I != E; ++I)
+ if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+ EnumerateValue(*I);
+
+ // Finally, add the value. Doing this could make the ValueID reference be
+ // dangling, don't reuse it.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueMap[V] = Values.size();
+ return;
+ }
+ }
+
+ // Add the value.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueID = Values.size();
+}
+
+
+void ValueEnumerator::EnumerateType(Type *Ty) {
+ unsigned *TypeID = &TypeMap[Ty];
+
+ // We've already seen this type.
+ if (*TypeID)
+ return;
+
+ // If it is a non-anonymous struct, mark the type as being visited so that we
+ // don't recursively visit it. This is safe because we allow forward
+ // references of these in the bitcode reader.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isLiteral())
+ *TypeID = ~0U;
+
+ // Enumerate all of the subtypes before we enumerate this type. This ensures
+ // that the type will be enumerated in an order that can be directly built.
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ EnumerateType(*I);
+
+ // Refresh the TypeID pointer in case the table rehashed.
+ TypeID = &TypeMap[Ty];
+
+ // Check to see if we got the pointer another way. This can happen when
+ // enumerating recursive types that hit the base case deeper than they start.
+ //
+ // If this is actually a struct that we are treating as forward ref'able,
+ // then emit the definition now that all of its contents are available.
+ if (*TypeID && *TypeID != ~0U)
+ return;
+
+ // Add this type now that its contents are all happily enumerated.
+ Types.push_back(Ty);
+
+ *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value. If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ // If this constant is already enumerated, ignore it, we know its type must
+ // be enumerated.
+ if (ValueMap.count(V)) return;
+
+ // This constant may have operands, make sure to enumerate the types in
+ // them.
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ const Value *Op = C->getOperand(i);
+
+ // Don't enumerate basic blocks here, this happens as operands to
+ // blockaddress.
+ if (isa<BasicBlock>(Op)) continue;
+
+ EnumerateOperandType(Op);
+ }
+
+ if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *Elem = N->getOperand(i))
+ EnumerateOperandType(Elem);
+ }
+ } else if (isa<MDString>(V) || isa<MDNode>(V))
+ EnumerateMetadata(V);
+}
+
+void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
+ if (PAL.isEmpty()) return; // null is always 0.
+
+ // Do a lookup.
+ unsigned &Entry = AttributeMap[PAL];
+ if (Entry == 0) {
+ // Never saw this before, add it.
+ Attribute.push_back(PAL);
+ Entry = Attribute.size();
+ }
+
+ // Do lookups for all attribute groups.
+ for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
+ AttributeSet AS = PAL.getSlotAttributes(i);
+ unsigned &Entry = AttributeGroupMap[AS];
+ if (Entry == 0) {
+ AttributeGroups.push_back(AS);
+ Entry = AttributeGroups.size();
+ }
+ }
+}
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+ InstructionCount = 0;
+ NumModuleValues = Values.size();
+ NumModuleMDValues = MDValues.size();
+
+ // Adding function arguments to the value table.
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ FirstFuncConstantID = Values.size();
+
+ // Add all function-level constants to the value table.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ EnumerateValue(*OI);
+ }
+ BasicBlocks.push_back(BB);
+ ValueMap[BB] = BasicBlocks.size();
+ }
+
+ // Optimize the constant layout.
+ OptimizeConstants(FirstFuncConstantID, Values.size());
+
+ // Add the function's parameter attributes so they are available for use in
+ // the function's instruction.
+ EnumerateAttributes(F.getAttributes());
+
+ FirstInstID = Values.size();
+
+ SmallVector<MDNode *, 8> FnLocalMDVector;
+ // Add all of the instructions.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if (MDNode *MD = dyn_cast<MDNode>(*OI))
+ if (MD->isFunctionLocal() && MD->getFunction())
+ // Enumerate metadata after the instructions they might refer to.
+ FnLocalMDVector.push_back(MD);
+ }
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ MDNode *N = MDs[i].second;
+ if (N->isFunctionLocal() && N->getFunction())
+ FnLocalMDVector.push_back(N);
+ }
+
+ if (!I->getType()->isVoidTy())
+ EnumerateValue(I);
+ }
+ }
+
+ // Add all of the function-local metadata.
+ for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
+ EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
+}
+
+void ValueEnumerator::purgeFunction() {
+ /// Remove purged values from the ValueMap.
+ for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+ ValueMap.erase(Values[i].first);
+ for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+ MDValueMap.erase(MDValues[i].first);
+ for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+ ValueMap.erase(BasicBlocks[i]);
+
+ Values.resize(NumModuleValues);
+ MDValues.resize(NumModuleMDValues);
+ BasicBlocks.clear();
+ FunctionLocalMDs.clear();
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+ DenseMap<const BasicBlock*, unsigned> &IDMap) {
+ unsigned Counter = 0;
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block. This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+ unsigned &Idx = GlobalBasicBlockIDs[BB];
+ if (Idx != 0)
+ return Idx-1;
+
+ IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+ return getGlobalBasicBlockID(BB);
+}
+
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
new file mode 100644
index 000000000000..0af6164c944f
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -0,0 +1,171 @@
+//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUE_ENUMERATOR_H
+#define VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Attributes.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class MDNode;
+class NamedMDNode;
+class AttributeSet;
+class ValueSymbolTable;
+class MDSymbolTable;
+class raw_ostream;
+
+class ValueEnumerator {
+public:
+ typedef std::vector<Type*> TypeList;
+
+ // For each value, we remember its Value* and occurrence frequency.
+ typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+ typedef DenseMap<Type*, unsigned> TypeMapType;
+ TypeMapType TypeMap;
+ TypeList Types;
+
+ typedef DenseMap<const Value*, unsigned> ValueMapType;
+ ValueMapType ValueMap;
+ ValueList Values;
+ ValueList MDValues;
+ SmallVector<const MDNode *, 8> FunctionLocalMDs;
+ ValueMapType MDValueMap;
+
+ typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
+ AttributeGroupMapType AttributeGroupMap;
+ std::vector<AttributeSet> AttributeGroups;
+
+ typedef DenseMap<AttributeSet, unsigned> AttributeMapType;
+ AttributeMapType AttributeMap;
+ std::vector<AttributeSet> Attribute;
+
+ /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+ /// the "getGlobalBasicBlockID" method.
+ mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+
+ typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+ InstructionMapType InstructionMap;
+ unsigned InstructionCount;
+
+ /// BasicBlocks - This contains all the basic blocks for the currently
+ /// incorporated function. Their reverse mapping is stored in ValueMap.
+ std::vector<const BasicBlock*> BasicBlocks;
+
+ /// When a function is incorporated, this is the size of the Values list
+ /// before incorporation.
+ unsigned NumModuleValues;
+
+ /// When a function is incorporated, this is the size of the MDValues list
+ /// before incorporation.
+ unsigned NumModuleMDValues;
+
+ unsigned FirstFuncConstantID;
+ unsigned FirstInstID;
+
+ ValueEnumerator(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
+ void operator=(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
+public:
+ ValueEnumerator(const Module *M);
+
+ void dump() const;
+ void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
+ unsigned getValueID(const Value *V) const;
+
+ unsigned getTypeID(Type *T) const {
+ TypeMapType::const_iterator I = TypeMap.find(T);
+ assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+ return I->second-1;
+ }
+
+ unsigned getInstructionID(const Instruction *I) const;
+ void setInstructionID(const Instruction *I);
+
+ unsigned getAttributeID(AttributeSet PAL) const {
+ if (PAL.isEmpty()) return 0; // Null maps to zero.
+ AttributeMapType::const_iterator I = AttributeMap.find(PAL);
+ assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ unsigned getAttributeGroupID(AttributeSet PAL) const {
+ if (PAL.isEmpty()) return 0; // Null maps to zero.
+ AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL);
+ assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ /// getFunctionConstantRange - Return the range of values that corresponds to
+ /// function-local constants.
+ void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+ Start = FirstFuncConstantID;
+ End = FirstInstID;
+ }
+
+ const ValueList &getValues() const { return Values; }
+ const ValueList &getMDValues() const { return MDValues; }
+ const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
+ return FunctionLocalMDs;
+ }
+ const TypeList &getTypes() const { return Types; }
+ const std::vector<const BasicBlock*> &getBasicBlocks() const {
+ return BasicBlocks;
+ }
+ const std::vector<AttributeSet> &getAttributes() const {
+ return Attribute;
+ }
+ const std::vector<AttributeSet> &getAttributeGroups() const {
+ return AttributeGroups;
+ }
+
+ /// getGlobalBasicBlockID - This returns the function-specific ID for the
+ /// specified basic block. This is relatively expensive information, so it
+ /// should only be used by rare constructs such as address-of-label.
+ unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+ /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+ /// use these two methods to get its data into the ValueEnumerator!
+ ///
+ void incorporateFunction(const Function &F);
+ void purgeFunction();
+
+private:
+ void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+
+ void EnumerateMDNodeOperands(const MDNode *N);
+ void EnumerateMetadata(const Value *MD);
+ void EnumerateFunctionLocalMetadata(const MDNode *N);
+ void EnumerateNamedMDNode(const NamedMDNode *NMD);
+ void EnumerateValue(const Value *V);
+ void EnumerateType(Type *T);
+ void EnumerateOperandType(const Value *V);
+ void EnumerateAttributes(AttributeSet PAL);
+
+ void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+ void EnumerateNamedMetadata(const Module *M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 000000000000..c50f8b5a42ad
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,943 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ State(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(State == NULL);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ DEBUG(dbgs() << "Observe: ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\tRegs:");
+
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ DEBUG(if (State->GetGroup(Reg) != 0)
+ dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg) << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+ MachineOperand& MO)
+{
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = NULL;
+ if (MO.isDef())
+ Op = MI->findRegisterUseOperand(Reg, true);
+ else
+ Op = MI->findRegisterDefOperand(Reg);
+
+ return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+ std::set<unsigned>& PassthruRegs) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const unsigned Reg = MO.getReg();
+ PassthruRegs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PassthruRegs.insert(*SubRegs);
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+ unsigned Reg = P->getReg();
+ if (RegSet.count(Reg) == 0) {
+ Edges.push_back(&*P);
+ RegSet.insert(Reg);
+ }
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ }
+ // Repeat for subregisters.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
+ }
+
+ DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+ unsigned Count,
+ std::set<unsigned>& PassthruRegs) {
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truly
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI)) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+ TRI->getName(AliasReg) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ DefIndices[*AI] = Count;
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ if (Special) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI->isKill()) {
+ DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ FirstReg = Reg;
+ }
+ }
+
+ DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = State->GetRegRefs().equal_range(Reg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+ QE = Range.second; Q != QE; ++Q) {
+ const TargetRegisterClass *RC = Q->second.RC;
+ if (RC == NULL) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ DEBUG(dbgs() << " " << RC->getName());
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(Regs.size() > 0 && "Empty register group!");
+ if (Regs.size() == 0)
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+ BitVector BV = GetRenameRegisters(Reg);
+ RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+ DEBUG(dbgs() << " ::");
+ DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+ " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
+ const TargetRegisterClass *SuperRC =
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
+ DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tFind Registers:");
+
+ if (RenameOrder.count(SuperRC) == 0)
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
+ do {
+ if (R == 0) R = Order.size();
+ --R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+ // Check if Reg can be renamed to NewReg.
+ BitVector BV = RenameRegisterMap[Reg];
+ if (!BV.test(NewReg)) {
+ DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, const SUnit *> MISUnitMap;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+ SU));
+ }
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ const SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ if (MI->isDebugValue())
+ continue;
+
+ DEBUG(dbgs() << "Anti: ");
+ DEBUG(MI->dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<const SDep *> Edges;
+ const SUnit *PathSU = MISUnitMap[MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI->isKill()) {
+ // Attempt to break each anti-dependency...
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+ const SDep *Edge = Edges[i];
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!MRI.isAllocatable(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp != NULL &&
+ "Can't find index for defined register operand");
+ if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+ DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+ (P->getKind() != SDep::Output)) {
+ DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((P->getSUnit() != NextSU) &&
+ (P->getKind() == SDep::Data) &&
+ (P->getReg() == AntiDepReg)) {
+ DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+ if (AntiDepReg == 0) continue;
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg) << ":");
+
+ // Handle each group register...
+ for (std::map<unsigned, unsigned>::iterator
+ S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+ unsigned CurrReg = S->first;
+ unsigned NewReg = S->second;
+
+ DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+ TRI->getName(NewReg) << "(" <<
+ RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = RegRefs.equal_range(CurrReg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second.Operand->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 000000000000..6683630fba6d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,184 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+
+ /// Class AggressiveAntiDepState
+ /// Contains all the state necessary for anti-dep breaking.
+ class AggressiveAntiDepState {
+ public:
+ /// RegisterReference - Information about a register reference
+ /// within a liverange
+ typedef struct {
+ /// Operand - The registers operand
+ MachineOperand *Operand;
+ /// RC - The register class
+ const TargetRegisterClass *RC;
+ } RegisterReference;
+
+ private:
+ /// NumTargetRegs - Number of non-virtual target registers
+ /// (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// GroupNodes - Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ std::vector<unsigned> GroupNodeIndices;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// GetKillIndices - Return the kill indices.
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+ /// GetDefIndices - Return the define indices.
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+ /// GetRegRefs - Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // GetGroup - Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // GetGroupRegs - Return a vector of the registers belonging to a
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // UnionGroups - Union Reg1's and Reg2's groups to form a new
+ // group. Return the index of the GroupNode representing the
+ // group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // LeaveGroup - Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// IsLive - Return true if Reg is live
+ bool IsLive(unsigned Reg);
+ };
+
+
+ /// Class AggressiveAntiDepBreaker
+ class AggressiveAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// State - The state used to identify and rename anti-dependence
+ /// registers.
+ AggressiveAntiDepState *State;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ ~AggressiveAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ /// Keep track of a position in the allocation order for each regclass.
+ typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+
+ /// IsImplicitDefUse - Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+ /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header =NULL, const char *footer =NULL);
+
+ void PrescanInstruction(MachineInstr *MI, unsigned Count,
+ std::set<unsigned>& PassthruRegs);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..3fa1f8ff206c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,52 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo)
+ : Pos(0) {
+ const MachineFunction &MF = VRM.getMachineFunction();
+ const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+ Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+ rewind();
+
+ DEBUG({
+ if (!Hints.empty()) {
+ dbgs() << "hints:";
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ dbgs() << ' ' << PrintReg(Hints[I], TRI);
+ dbgs() << '\n';
+ }
+ });
+#ifndef NDEBUG
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+ "Target hint is outside allocation order.");
+#endif
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..aed461a7ed02
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,85 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+
+class AllocationOrder {
+ SmallVector<MCPhysReg, 16> Hints;
+ ArrayRef<MCPhysReg> Order;
+ int Pos;
+
+public:
+ /// Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param RegClassInfo Information about reserved and allocatable registers.
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo);
+
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
+
+ /// Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next() again after it returned 0, it will keep
+ /// returning 0 until rewind() is called.
+ unsigned next() {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ while (Pos < int(Order.size())) {
+ unsigned Reg = Order[Pos++];
+ if (!isHint(Reg))
+ return Reg;
+ }
+ return 0;
+ }
+
+ /// As next(), but allow duplicates to be returned, and stop before the
+ /// Limit'th register in the RegisterClassInfo allocation order.
+ ///
+ /// This can produce more than Limit registers if there are hints.
+ unsigned nextWithDups(unsigned Limit) {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ if (Pos < int(Limit))
+ return Order[Pos++];
+ return 0;
+ }
+
+ /// Start over from the beginning.
+ void rewind() { Pos = -int(Hints.size()); }
+
+ /// Return true if the last register returned from next() was a preferred register.
+ bool isHint() const { return Pos <= 0; }
+
+ /// Return true if PhysReg is a preferred register.
+ bool isHint(unsigned PhysReg) const {
+ return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..dd7282c0ad97
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,349 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+ if (GV && GV->getName() == "llvm.eh.catch.all.value") {
+ assert(GV->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = GV->getInitializer();
+ GV = dyn_cast<GlobalVariable>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
+ }
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ }
+}
+
+
+/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
+/// through it (and any transitive noop operands to it) and return its input
+/// value. This is used to determine if a tail call can be formed.
+///
+static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
+ // If V is not an instruction, it can't be looked through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
+
+ Value *Op = I->getOperand(0);
+
+ // Look through truly no-op truncates.
+ if (isa<TruncInst>(I) &&
+ TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
+ return getNoopInput(I->getOperand(0), TLI);
+
+ // Look through truly no-op bitcasts.
+ if (isa<BitCastInst>(I)) {
+ // No type change at all.
+ if (Op->getType() == I->getType())
+ return getNoopInput(Op, TLI);
+
+ // Pointer to pointer cast.
+ if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
+ return getNoopInput(Op, TLI);
+
+ if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
+ TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
+ TLI.isTypeLegal(EVT::getEVT(I->getType())))
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through inttoptr.
+ if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through ptrtoint.
+ if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+
+ // Otherwise it's not something we can look through.
+ return V;
+}
+
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+ const Instruction *I = CS.getInstruction();
+ const BasicBlock *ExitBB = I->getParent();
+ const TerminatorInst *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret &&
+ (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ !isa<UnreachableInst>(Term)))
+ return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(I))
+ for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+ --BBI) {
+ if (&*BBI == I)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(BBI))
+ return false;
+ }
+
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ const Function *F = ExitBB->getParent();
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Otherwise, make sure the unmodified return value of I is the return value.
+ // We handle two cases: multiple return values + scalars.
+ Value *RetVal = Ret->getOperand(0);
+ if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
+ // Handle scalars first.
+ return getNoopInput(Ret->getOperand(0), TLI) == I;
+
+ // If this is an aggregate return, look through the insert/extract values and
+ // see if each is transparent.
+ for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(RetVal, i);
+ if (InScalar == 0) return false;
+ InScalar = getNoopInput(InScalar, TLI);
+
+ // If the scalar value being inserted is an extractvalue of the right index
+ // from the call, then everything is good.
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
+ if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
+ EVI->getIndices()[0] != i)
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 000000000000..df47f984d578
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,71 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+
+ virtual ~AntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies within a
+ /// basic-block region and break them by renaming registers. Return
+ /// the number of anti-dependencies broken.
+ ///
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ virtual void Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) =0;
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() =0;
+
+ /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+ assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+ MI->getOperand(0).setReg(NewReg);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 000000000000..188047d94f48
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,136 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+ cl::init(false));
+
+
+ARMException::ARMException(AsmPrinter *A)
+ : DwarfException(A) {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+ Asm->OutStreamer.EmitFnStart();
+ if (Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitCantUnwind();
+ else {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ if (EnableARMEHABIDescriptors) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (!MMI->getLandingPads().empty()) {
+ // Emit references to personality.
+ if (const Function * Personality =
+ MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer.EmitPersonality(PerSym);
+ }
+
+ // Emit .handlerdata directive.
+ Asm->OutStreamer.EmitHandlerData();
+
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
+ }
+ }
+
+ Asm->OutStreamer.EmitFnEnd();
+}
+
+void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+ TTypeEncoding);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 000000000000..d4a745d985e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,2170 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+ if (P == 0)
+ P = new gcp_map_type();
+ return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form. This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+ unsigned InBits = 0) {
+ unsigned NumBits = 0;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ NumBits = TD.getPreferredAlignmentLog(GVar);
+
+ // If InBits is specified, round it to it.
+ if (InBits > NumBits)
+ NumBits = InBits;
+
+ // If the GV has a specified alignment, take it into account.
+ if (GV->getAlignment() == 0)
+ return NumBits;
+
+ unsigned GVAlign = Log2_32(GV->getAlignment());
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (GVAlign > NumBits || GV->hasSection())
+ NumBits = GVAlign;
+ return NumBits;
+}
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : MachineFunctionPass(ID),
+ TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer.getContext()),
+ OutStreamer(Streamer),
+ LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = 0; DE = 0; MMI = 0; LI = 0;
+ CurrentFnSym = CurrentFnSymForSize = 0;
+ GCMetadataPrinters = 0;
+ VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+
+ if (GCMetadataPrinters != 0) {
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+ for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+ delete I->second;
+ delete &GCMap;
+ GCMetadataPrinters = 0;
+ }
+
+ delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return TM.getTargetLowering()->getObjFileLowering();
+}
+
+/// getDataLayout - Return information about data layout.
+const DataLayout &AsmPrinter::getDataLayout() const {
+ return *TM.getDataLayout();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+ if (isVerbose())
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ OutStreamer.InitStreamer();
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getDataLayout());
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(*this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ OutStreamer.AddComment("Start of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ OutStreamer.AddComment("End of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ DD = new DwarfDebug(this, &M);
+
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ return false;
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ return false;
+ case ExceptionHandling::ARM:
+ DE = new ARMException(this);
+ return false;
+ case ExceptionHandling::Win64:
+ DE = new Win64Exception(this);
+ return false;
+ }
+
+ llvm_unreachable("Unknown exception type.");
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ if (MAI->getWeakDefDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkOnceODRAutoHideLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->getLinkOnceDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol.
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const DataLayout *TD = TM.getDataLayout();
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+
+ // Handle common and BSS local symbols (.lcomm).
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
+
+ // Handle common symbols.
+ if (GVKind.isCommon()) {
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Handle local BSS symbols.
+ if (MAI->hasMachoZeroFillDirective()) {
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
+ return;
+ }
+
+ // Use .lcomm only if it supports user-specified alignment.
+ // Otherwise, while it would still be correct to use .lcomm in some
+ // cases (e.g. when Align == 1), the external assembler might enfore
+ // some -unknown- default alignment behavior, which could cause
+ // spurious differences between external and integrated assembler.
+ // Prefer to simply fall back to .local / .comm in this case.
+ if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+ // .lcomm _foo, 42
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .local _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+ // Handle the zerofill directive on darwin, which is a special form of BSS
+ // emission.
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
+
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .zerofill __DATA, __common, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS())
+ OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ else if (GVKind.isThreadData()) {
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitAlignment(AlignLog, GV);
+ OutStreamer.EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ const MCSection *TLVSect
+ = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer.SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV->getLinkage(), GVSym);
+ OutStreamer.EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize);
+ OutStreamer.EmitIntValue(0, PtrSize);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize);
+
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), GVSym);
+ EmitAlignment(AlignLog, GV);
+
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+ OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), F,
+ /*PrintType=*/false, F->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer.AddComment("Address taken block that was later removed");
+ OutStreamer.EmitLabel(DeadBlockSyms[i]);
+ }
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW.
+ if (MAI->getLinkOnceDirective() != 0 &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+ // FIXME: What is this?
+ MCSymbol *FakeStub =
+ OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+ CurrentFnSym->getName());
+ OutStreamer.EmitLabel(FakeStub);
+ }
+
+ // Emit pre-function debug and/or EH information.
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
+ }
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
+ }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isUndefined())
+ return OutStreamer.EmitLabel(CurrentFnSym);
+
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+}
+
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// emitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+ AP.TM.getRegisterInfo()->getName(RegNo));
+ AP.OutStreamer.AddBlankLine();
+}
+
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str = "kill:";
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ Str += ' ';
+ Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += (Op.isDef() ? "<def>" : "<kill>");
+ }
+ AP.OutStreamer.AddComment(Str);
+ AP.OutStreamer.AddBlankLine();
+}
+
+/// emitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 3-operand target-independent form.
+ if (MI->getNumOperands() != 3)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+ if (V.getContext().isSubprogram())
+ OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+ OS << V.getName() << " <- ";
+
+ // Register or immediate value. Register 0 means undef.
+ if (MI->getOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+ if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ OS << (double)APF.convertToFloat();
+ } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ } else if (MI->getOperand(0).isImm()) {
+ OS << MI->getOperand(0).getImm();
+ } else if (MI->getOperand(0).isCImm()) {
+ MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else {
+ assert(MI->getOperand(0).isReg() && "Unknown operand type");
+ if (MI->getOperand(0).getReg() == 0) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+ }
+ OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+ }
+
+ OS << '+' << MI->getOperand(1).getImm();
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ MF->getFunction()->needsUnwindTableEntry())
+ return CFI_M_EH;
+
+ if (MMI->hasDebugInfo())
+ return CFI_M_Debug;
+
+ return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ MF->getFunction()->needsUnwindTableEntry();
+}
+
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+ return MAI->doesDwarfUseRelocationsAcrossSections();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ if (MMI->getCompactUnwindEncoding() != 0)
+ OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
+
+ MachineModuleInfo &MMI = MF->getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool FoundOne = false;
+ (void)FoundOne;
+ for (std::vector<MachineMove>::iterator I = Moves.begin(),
+ E = Moves.end(); I != E; ++I) {
+ if (I->getLabel() == Label) {
+ EmitCFIFrameMove(*I);
+ FoundOne = true;
+ }
+ }
+ assert(FoundOne);
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ LastMI = II;
+
+ // Print the assembly for the instruction.
+ if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+ !II->isDebugValue()) {
+ HasAnyRealCode = true;
+ ++EmittedInsts;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginInstruction(II);
+ }
+
+ if (isVerbose())
+ emitComments(*II, OutStreamer.GetCommentOS());
+
+ switch (II->getOpcode()) {
+ case TargetOpcode::PROLOG_LABEL:
+ emitPrologLabel(*II);
+ break;
+
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::INLINEASM:
+ EmitInlineAsm(II);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose()) {
+ if (!emitDebugValueComment(II, *this))
+ EmitInstruction(II);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) emitImplicitDef(II, *this);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) emitKill(II, *this);
+ break;
+ default:
+ if (!TM.hasMCUseLoc())
+ MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
+ EmitInstruction(II);
+ break;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endInstruction(II);
+ }
+ }
+ }
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ MCInst Noop;
+ TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+ if (Noop.getOpcode()) {
+ OutStreamer.AddComment("avoids zero-length function");
+ OutStreamer.EmitInstruction(Noop);
+ } else // Target not mc-ized yet.
+ OutStreamer.EmitRawText(StringRef("\tnop\n"));
+ }
+
+ const Function *F = MF->getFunction();
+ for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+ const BasicBlock *BB = i;
+ if (!BB->hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer.AddComment("Address of block that was removed by CodeGen");
+ OutStreamer.EmitLabel(Sym);
+ }
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function, so we can get the size as
+ // difference between the function label and the temp label.
+ MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(FnEndLabel);
+
+ const MCExpr *SizeExp =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSymForSize,
+ OutContext),
+ OutContext);
+ OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+ }
+
+ // Emit post-function debug information.
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
+ }
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
+ }
+ MMI->EndFunction();
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Target specific DBG_VALUE instructions are handled by each target.
+ return MachineLocation();
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+ for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
+ ++SR) {
+ Reg = TRI->getDwarfRegNum(*SR, false);
+ // FIXME: Get the bit range this register uses of the superregister
+ // so that we can produce a DW_OP_bit_piece
+ }
+
+ // FIXME: Handle cases like a super register being encoded as
+ // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+ // FIXME: We have no reasonable way of handling errors in here. The
+ // caller might be in the middle of an dwarf expression. We should
+ // probably assert that Reg >= 0 once debug info generation is more mature.
+
+ if (int Offset = MLoc.getOffset()) {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+ EmitInt8(dwarf::DW_OP_breg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_bregx");
+ EmitInt8(dwarf::DW_OP_bregx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ EmitSLEB128(Offset);
+ } else {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_regx");
+ EmitInt8(dwarf::DW_OP_regx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ }
+
+ // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Emit global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobalVariable(I);
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V, false);
+ }
+
+ // Emit module flags.
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ if (!ModuleFlags.empty())
+ getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
+ // Finalize debug and EH information.
+ if (DE) {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndModule();
+ }
+ delete DE; DE = 0;
+ }
+ if (DD) {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endModule();
+ }
+ delete DD; DD = 0;
+ }
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+ }
+
+ if (MAI->hasSetDirective()) {
+ OutStreamer.AddBlankLine();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ MCSymbol *Name = Mang->getSymbol(I);
+
+ const GlobalValue *GV = I->getAliasedGlobal();
+ MCSymbol *Target = Mang->getSymbol(GV);
+
+ if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (I->hasWeakLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+ EmitVisibility(Name, I->getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name,
+ MCSymbolRefExpr::Create(Target, OutContext));
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(*this);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer.SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = 0;
+ MMI = 0;
+
+ OutStreamer.Finish();
+ OutStreamer.reset();
+
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = Mang->getSymbol(MF.getFunction());
+ CurrentFnSymForSize = CurrentFnSym;
+
+ if (isVerbose())
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16();break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer.EmitZeros(NewOffset - Offset);
+
+ Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ bool JTInDiffSection = false;
+ if (// In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // FIXME: Need a better predicate for this: what about custom entries?
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ // We should also do if the section name is NULL or function is declared
+ // in discardable section
+ // FIXME: this isn't the right predicate, should be based on the MCSection
+ // for the function.
+ F->isWeakForLinker()) {
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+ } else {
+ // Otherwise, drop it in the readonly section.
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ReadOnlySection);
+ JTInDiffSection = true;
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+ // .set directive for each unique entry. This reduces the number of
+ // relocations the assembler will generate for the jump table.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->hasSetDirective()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = TM.getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB)) continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = 0;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+ OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // EK_LabelDifference32 - Each entry is the address of the block minus
+ // the address of the jump table. This is used for PIC jump tables where
+ // gprel32 is not supported. e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive is supported, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ if (MAI->hasSetDirective()) {
+ // If we used .set, reference the .set's symbol.
+ Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ // Otherwise, use the difference as the jump table entry.
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
+ OutStreamer.EmitValue(Value, EntrySize);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ if (GV->getName() == "llvm.global_ctors") {
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+typedef std::pair<unsigned, Constant*> Structor;
+
+static bool priority_order(const Structor& lhs, const Structor& rhs) {
+ return lhs.first < rhs.first;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority.
+ if (!isa<ConstantArray>(List)) return;
+
+ // Sanity check the structors list.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (!InitList) return; // Not an array!
+ StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
+ if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ SmallVector<Structor, 8> Structors;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (!CS) continue; // Malformed.
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority) continue; // Malformed.
+ Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
+ CS->getOperand(1)));
+ }
+
+ // Emit the function pointers in the target-specific order
+ const DataLayout *TD = TM.getDataLayout();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+ const MCSection *OutputSection =
+ (isCtor ?
+ getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+ getObjFileLowering().getStaticDtorSection(Structors[i].first));
+ OutStreamer.SwitchSection(OutputSection);
+ if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+ EmitAlignment(Align);
+ EmitXXStructor(Structors[i].second);
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer.EmitIntValue(Value, 1);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer.EmitIntValue(Value, 2);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer.EmitIntValue(Value, 4);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ // Get the Hi-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective()) {
+ OutStreamer.EmitValue(Diff, Size);
+ return;
+ }
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, Size);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+ const MCSymbol *Lo, unsigned Size)
+ const {
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(Plus,
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective())
+ OutStreamer.EmitValue(Diff, 4);
+ else {
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, 4);
+ }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+ if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
+
+ if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+
+ if (getCurrentSection()->getKind().isText())
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0) {
+ llvm_unreachable("Unknown constant value to lower!");
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (C != CE)
+ return lowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
+
+ const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return lowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return lowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = lowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+ AsmPrinter &AP);
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64) return -1;
+
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
+ uint64_t Value = CI->getZExtValue();
+
+ // Make sure the constant is at least 8 bits long and has a power
+ // of 2 bit width. This guarantees the constant bit width is
+ // always a multiple of 8 bits, avoiding issues with padding out
+ // to Size and other such corner cases.
+ if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
+
+ uint8_t Byte = static_cast<uint8_t>(Value);
+
+ for (unsigned i = 1; i < Size; ++i) {
+ Value >>= 8;
+ if (static_cast<uint8_t>(Value) != Byte) return -1;
+ }
+ return Byte;
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
+ int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
+ if (Byte == -1) return -1;
+
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
+ if (ThisByte == -1) return -1;
+ if (Byte != ThisByte) return -1;
+ }
+ return Byte;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
+
+ return -1;
+}
+
+static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+ unsigned AddrSpace,AsmPrinter &AP){
+
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, AP.TM);
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CDS->getElementAsInteger(i));
+ AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+ ElementByteSize, AddrSpace);
+ }
+ } else if (ElementByteSize == 4) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ assert(CDS->getElementType()->isFloatTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ float F;
+ uint32_t I;
+ };
+
+ F = CDS->getElementAsFloat(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
+ }
+ } else {
+ assert(CDS->getElementType()->isDoubleTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ double F;
+ uint64_t I;
+ };
+
+ F = CDS->getElementAsDouble(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
+ }
+ }
+
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ unsigned Size = TD.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+ CDS->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+
+}
+
+static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
+}
+
+static void emitGlobalConstantVector(const ConstantVector *CV,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ unsigned Size = TD.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+ CV->getType()->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+}
+
+static void emitGlobalConstantStruct(const ConstantStruct *CS,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const DataLayout *TD = AP.TM.getDataLayout();
+ unsigned Size = TD->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Now print the actual field value.
+ emitGlobalConstantImpl(Field, AddrSpace, AP);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+
+ // First print a comment with what we think the original floating-point value
+ // should have been.
+ if (AP.isVerbose()) {
+ SmallString<8> StrVal;
+ CFP->getValueAPF().toString(StrVal);
+
+ CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
+ }
+
+ // Now iterate through the APInt chunks, emitting them in endian-correct
+ // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+ // floats).
+ unsigned NumBytes = API.getBitWidth() / 8;
+ unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+ const uint64_t *p = API.getRawData();
+
+ // PPC's long double has odd notions of endianness compared to how LLVM
+ // handles it: p[0] goes first for *big* endian on PPC.
+ if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+ int Chunk = API.getNumWords() - 1;
+
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace);
+
+ for (; Chunk >= 0; --Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+ } else {
+ unsigned Chunk;
+ for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace);
+ }
+
+ // Emit the tail padding for the long double.
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+}
+
+static void emitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ const DataLayout *TD = AP.TM.getDataLayout();
+ unsigned BitWidth = CI->getBitWidth();
+ assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ }
+}
+
+static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ const DataLayout *TD = AP.TM.getDataLayout();
+ uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
+ return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CI->getZExtValue());
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ return;
+ default:
+ emitGlobalConstantLargeInt(CI, AddrSpace, AP);
+ return;
+ }
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return emitGlobalConstantFP(CFP, AddrSpace, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return emitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return emitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ return emitGlobalConstantImpl(New, AddrSpace, AP);
+ }
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return emitGlobalConstantVector(V, AddrSpace, AP);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ if (Size)
+ emitGlobalConstantImpl(CV, AddrSpace, *this);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer.EmitIntValue(0, 1, AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+ Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix,
+ bool ForcePrivate) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+ NameStr.append(Suffix.begin(), Suffix.end());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (Loop == 0) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+ OS.indent((*CL)->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, *CL, FunctionNumber);
+ }
+}
+
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (Loop == 0) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer.AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB->getAlignment())
+ EmitAlignment(Align);
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB->hasAddressTaken()) {
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (isVerbose())
+ OutStreamer.AddComment("Block address taken");
+
+ std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+ for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+ OutStreamer.EmitLabel(Syms[i]);
+ }
+
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ emitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
+ // Print the main label for the block.
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+ Twine(MBB->getNumber()) + ":");
+ }
+ } else {
+ OutStreamer.EmitLabel(MBB->getSymbol());
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+ IE = Pred->end(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.isBranch() || MI.isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not
+ // a fall through.
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end(); OI != OE; ++OI) {
+ const MachineOperand& OP = *OI;
+ if (OP.isJTI())
+ return false;
+ if (OP.isMBB() && OP.getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+ gcp_map_type::iterator GCPI = GCMap.find(S);
+ if (GCPI != GCMap.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMap.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 000000000000..156acace553d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,196 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ OutStreamer.EmitSLEB128IntValue(Value);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ OutStreamer.EmitULEB128IntValue(Value, PadTo);
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+ if (isVerbose()) {
+ if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+ OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ Twine(Val-dwarf::DW_CFA_offset) + ")");
+ else
+ OutStreamer.AddComment(dwarf::CallFrameString(Val));
+ }
+ OutStreamer.EmitIntValue(Val, 1);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr: return "absptr";
+ case dwarf::DW_EH_PE_omit: return "omit";
+ case dwarf::DW_EH_PE_pcrel: return "pcrel";
+ case dwarf::DW_EH_PE_udata4: return "udata4";
+ case dwarf::DW_EH_PE_udata8: return "udata8";
+ case dwarf::DW_EH_PE_sdata4: return "sdata4";
+ case dwarf::DW_EH_PE_sdata8: return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc != 0)
+ OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer.AddComment(Twine("Encoding = ") +
+ DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer.EmitIntValue(Val, 1);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default: llvm_unreachable("Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
+ case dwarf::DW_EH_PE_udata2: return 2;
+ case dwarf::DW_EH_PE_udata4: return 4;
+ case dwarf::DW_EH_PE_udata8: return 8;
+ }
+}
+
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) const {
+ if (GV) {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section. This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+ const MCSymbol *SectionLabel) const {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (MAI->getDwarfSectionOffsetDirective()) {
+ OutStreamer.EmitCOFFSecRel32(Label);
+ return;
+ }
+
+ // Get the section that we're referring to, based on SectionLabel.
+ const MCSection &Section = SectionLabel->getSection();
+
+ // If Label has already been emitted, verify that it is in the same section as
+ // section label for sanity.
+ assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+ "Section offset using wrong section base for label");
+
+ // If the section in question will end up with an address of 0 anyway, we can
+ // just emit an absolute reference to save a relocation.
+ if (Section.isBaseAddressKnownZero()) {
+ OutStreamer.EmitSymbolValue(Label, 4);
+ return;
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ // Reg + Offset
+ OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+ Src.getOffset());
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 000000000000..abfa330fa29d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,553 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+ InlineAsm::AsmDialect Dialect) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer is actually a .s file, just emit the blob textually.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText(Str);
+ return;
+ }
+
+ SourceMgr SrcMgr;
+ SrcMgrDiagInfo DiagInfo;
+
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ bool HasDiagHandler = false;
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for srcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
+ HasDiagHandler = true;
+ }
+
+ MemoryBuffer *Buffer;
+ if (isNullTerminated)
+ Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+ else
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+
+ // FIXME: It would be nice if we can avoid createing a new instance of
+ // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+ // we have to watch out for asm directives which can change subtarget
+ // state. e.g. .code 16, .code 32.
+ OwningPtr<MCSubtargetInfo>
+ STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+ TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ OwningPtr<MCTargetAsmParser>
+ TAP(TM.getTarget().createMCAsmParser(*STI, *Parser));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setAssemblerDialect(Dialect);
+ Parser->setTargetParser(*TAP.get());
+
+ // Don't implicitly switch to the text section before the asm.
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ if (Res && !HasDiagHandler)
+ report_fatal_error("Error parsing inline asm\n");
+}
+
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
+
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$':
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ }
+ if (Done) break;
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ break;
+ }
+ }
+ }
+ OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
+}
+
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ int AsmPrinterVariant, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ OS << '\t';
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ AP->PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') // labels are target independent
+ // FIXME: What if the operand isn't an MBB, report error?
+ OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+ else {
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : 0,
+ OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : 0, OS);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ OS << '\n' << (char)0; // null terminate string.
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ // Don't emit the comments if writing to a .o file.
+ if (!OutStreamer.hasRawTextSupport()) return;
+
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsmVariant == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+ AP, LocCookie, OS);
+ else
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
+ EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ OS << MAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ OS << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'n': // Negate the immediate constant.
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << -MO.getImm();
+ return false;
+ }
+ }
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 000000000000..57e0acda890f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ AP->EmitULEB128(0, "EOM(1)");
+ AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << format("0x%lx", (long)(intptr_t)this)
+ << ", Offset: " << Offset
+ << ", Size: " << Size << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+ } else {
+ O << "Size: " << Size << "\n";
+ }
+
+ const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(dbgs());
+}
+#endif
+
+void DIEValue::anchor() { }
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present:
+ // Emit something to keep the lines and comments in sync.
+ // FIXME: Is there a better way to do this?
+ if (Asm->OutStreamer.hasRawTextSupport())
+ Asm->OutStreamer.EmitRawText(StringRef(""));
+ return;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr:
+ Size = Asm->getDataLayout().getPointerSize(); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer.EmitIntValue(Integer, Size);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present: return 0;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getDataLayout().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+ O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getDataLayout().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+ O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+ if (!Size) {
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ }
+
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 000000000000..c332aa2a7db6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,392 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class MCSymbol;
+ class raw_ostream;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ uint16_t Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ uint16_t Form;
+ public:
+ DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ uint16_t Tag;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ uint16_t ChildrenFlag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 12> Data;
+
+ public:
+ DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
+
+ // Accessors.
+ uint16_t getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ uint16_t getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
+ void setTag(uint16_t T) { Tag = T; }
+ void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(uint16_t Attribute, uint16_t Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes its organization.
+ class DIEValue;
+
+ class DIE {
+ protected:
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ DIE *Parent;
+
+ /// Attribute values.
+ ///
+ SmallVector<DIEValue*, 12> Values;
+
+ // Private data for print()
+ mutable unsigned IndentCount;
+ public:
+ explicit DIE(unsigned Tag)
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+ IndentCount(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
+ DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+
+ /// addValue - Add a value and attributes to a DIE.
+ ///
+ void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// addChild - Add a child to the DIE.
+ ///
+ void addChild(DIE *Child) {
+ if (Child->getParent()) {
+ assert (Child->getParent() == this && "Unexpected DIE Parent!");
+ return;
+ }
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ Child->Parent = this;
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class DIEValue {
+ virtual void anchor();
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ uint64_t getValue() const { return Integer; }
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIELabel - A label expression DIE.
+ //
+ class DIELabel : public DIEValue {
+ const MCSymbol *Label;
+ public:
+ explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// getValue - Get MCSymbol.
+ ///
+ const MCSymbol *getValue() const { return Label; }
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class DIEDelta : public DIEValue {
+ const MCSymbol *LabelHi;
+ const MCSymbol *LabelLo;
+ public:
+ DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class DIEEntry : public DIEValue {
+ DIE *const Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(AsmPrinter *AP);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..f58ec9b4bf46
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,264 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DIE.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+ switch (AT) {
+ case eAtomTypeNULL: return "eAtomTypeNULL";
+ case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+ case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+ case eAtomTypeTag: return "eAtomTypeTag";
+ case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+ case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+ }
+ llvm_unreachable("invalid AtomType!");
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) :
+ Header(8 + (atomList.size() * 4)),
+ HeaderData(atomList),
+ Entries(Allocator) { }
+
+DwarfAccelTable::~DwarfAccelTable() { }
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+ assert(Data.empty() && "Already finalized!");
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DataArray &DIEs = Entries[Name];
+ DIEs.push_back(new (Allocator) HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques(Data.size());
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ uniques[i] = Data[i]->HashValue;
+ array_pod_sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024) Header.bucket_count = num/4;
+ if (num > 16) Header.bucket_count = num/2;
+ else Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+// compareDIEs - comparison predicate that sorts DIEs by their offset.
+static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
+ const DwarfAccelTable::HashDataContents *B) {
+ return A->Die->getOffset() < B->Die->getOffset();
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+ // Create the individual hash data outputs.
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+
+ // Unique the entries.
+ std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs);
+ EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+ EI->second.end());
+
+ HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0, e = Data.size(); i < e; ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer.AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer.AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer.AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer.AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer.AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer.AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer.AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ index += Buckets[i].size();
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+ Asm->EmitInt32((*HI)->HashValue);
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+ MCContext &Context = Asm->OutStreamer.getContext();
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context),
+ Context);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer.EmitLabel((*HI)->Sym);
+ Asm->OutStreamer.AddComment((*HI)->Str);
+ Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+ D->getStringPoolSym());
+ Asm->OutStreamer.AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->Data.size());
+ for (ArrayRef<HashDataContents*>::const_iterator
+ DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+ DI != DE; ++DI) {
+ // Emit the DIE offset
+ Asm->EmitInt32((*DI)->Die->getOffset());
+ // If we have multiple Atoms emit that info too.
+ // FIXME: A bit of a hack, we either emit only one atom or all info.
+ if (HeaderData.Atoms.size() > 1) {
+ Asm->EmitInt16((*DI)->Die->getTag());
+ Asm->EmitInt8((*DI)->Flags);
+ }
+ }
+ // Emit a 0 to terminate the data unless we have a hash collision.
+ if (PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ PrevHash = (*HI)->HashValue;
+ }
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+ DwarfUnits *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ EmitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DataArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ O << "Name: " << EI->getKeyData() << "\n";
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData*>::const_iterator
+ DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+ (*DI)->print(O);
+
+
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..9915bcaa9b69
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,283 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "DIE.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+#include <vector>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfUnits;
+
+class DwarfAccelTable {
+
+ enum HashFunctionType {
+ eHashFunctionDJB = 0u
+ };
+
+ static uint32_t HashDJB (StringRef Str) {
+ uint32_t h = 5381;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ h = ((h << 5) + h) + Str[i];
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount (void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader (uint32_t data_len) :
+ magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+ bucket_count (0), hashes_count (0), header_data_len (data_len)
+ {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+ enum AtomType {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
+ // contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
+ // DW_FORM_data1 (if no tags exceed 255) or
+ // DW_FORM_data2.
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
+ };
+
+ enum TypeFlags {
+ eTypeFlagClassMask = 0x0000000fu,
+
+ // Always set for C++, only set for ObjC if this is the
+ // @implementation for a class.
+ eTypeFlagClassIsImplementation = ( 1u << 1 )
+ };
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ AtomType type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+ static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ private:
+ struct TableHeaderData {
+ uint32_t die_offset_base;
+ SmallVector<Atom, 1> Atoms;
+
+ TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
+ : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }
+
+#ifndef NDEBUG
+ void print (raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ // The data itself consists of a str_offset, a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+public:
+ struct HashDataContents {
+ DIE *Die; // Offsets
+ char Flags; // Specific flags to output
+
+ HashDataContents(DIE *D, char Flags) :
+ Die(D),
+ Flags(Flags) { }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) const {
+ O << " Offset: " << Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+ O << " Flags: " << Flags << "\n";
+ }
+ #endif
+ };
+private:
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ ArrayRef<HashDataContents*> Data; // offsets
+ HashData(StringRef S, ArrayRef<HashDataContents*> Data)
+ : Str(S), Data(Data) {
+ HashValue = DwarfAccelTable::HashDJB(S);
+ }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: " ;
+ if (Sym) Sym->print(O);
+ else O << "<none>";
+ O << "\n";
+ for (size_t i = 0; i < Data.size(); i++) {
+ O << " Offset: " << Data[i]->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+ O << " Flags: " << Data[i]->Flags << "\n";
+ }
+ }
+ void dump() {
+ print(dbgs());
+ }
+ #endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+ void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void EmitOffsets(AsmPrinter *, MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfUnits *D);
+
+ // Allocator for HashData and HashDataContents.
+ BumpPtrAllocator Allocator;
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData*> Data;
+
+ // String Data
+ typedef std::vector<HashDataContents*> DataArray;
+ typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+ public:
+ DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
+ ~DwarfAccelTable();
+ void AddName(StringRef, DIE*, char = 0);
+ void FinalizeTable(AsmPrinter *, const char *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..fec5cedc684b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,156 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer.EmitCFISections(false, true);
+
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ return;
+
+ // Emit references to all used personality functions
+ bool AtLeastOne = false;
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ if (!Personalities[i])
+ continue;
+ MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+ TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+ AtLeastOne = true;
+ }
+
+ if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+ // This is a temporary hack to keep sections in the same order they
+ // were before. This lets us produce bit identical outputs while
+ // transitioning to CFI.
+ Asm->OutStreamer.SwitchSection(
+ const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ if (MoveType == AsmPrinter::CFI_M_EH ||
+ (MoveType == AsmPrinter::CFI_M_Debug &&
+ moveTypeModule == AsmPrinter::CFI_M_None))
+ moveTypeModule = MoveType;
+
+ shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+ Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+ Asm->OutStreamer.EmitDebugLabel
+ (Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ // Provide LSDA information.
+ if (!shouldEmitLSDA)
+ return;
+
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ if (!shouldEmitPersonality)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ EmitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 000000000000..f9b6f9472141
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1711 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
+ DwarfDebug *DW, DwarfUnits *DWU)
+ : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
+ IndexTyDie(0), DebugInfoOffset(0) {
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+ DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+ return Value;
+}
+
+/// getDefaultLowerBound - Return the default lower bound for an array. If the
+/// DWARF version doesn't handle the language, return -1.
+int64_t CompileUnit::getDefaultLowerBound() const {
+ switch (Language) {
+ default:
+ break;
+
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return 0;
+
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ return 1;
+
+ // The languages below have valid values only if the DWARF version >= 4.
+ case dwarf::DW_LANG_Java:
+ case dwarf::DW_LANG_Python:
+ case dwarf::DW_LANG_UPC:
+ case dwarf::DW_LANG_D:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Ada83:
+ case dwarf::DW_LANG_Ada95:
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ case dwarf::DW_LANG_Modula2:
+ case dwarf::DW_LANG_Pascal83:
+ case dwarf::DW_LANG_PLI:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 1;
+ break;
+ }
+
+ return -1;
+}
+
+/// addFlag - Add a flag that is true.
+void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
+ if (!DD->useDarwinGDBCompat())
+ Die->addValue(Attribute, dwarf::DW_FORM_flag_present,
+ DIEIntegerOne);
+ else
+ addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1);
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes. In the case of split dwarf we emit an index
+/// into another table which gets us the static offset into the string
+/// table.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+ if (!DD->useSplitDwarf()) {
+ MCSymbol *Symb = DU->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DU->getStringPoolSym();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+ } else {
+ unsigned idx = DU->getStringPoolIndex(String);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value);
+ }
+}
+
+/// addLocalString - Add a string attribute data and value. This is guaranteed
+/// to be in the local string pool instead of indirected.
+void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
+ StringRef String) {
+ MCSymbol *Symb = DU->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DU->getStringPoolSym();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
+ MCSymbol *Label) {
+ if (!DD->useSplitDwarf()) {
+ if (Label != NULL) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ } else {
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ }
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Label);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
+/// addOpAddress - Add a dwarf op address data and value using the
+/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) {
+
+ if (!DD->useSplitDwarf()) {
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Sym);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+ DIE *Entry) {
+ Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(Asm);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+ // Verify variable.
+ if (!V.Verify())
+ return;
+
+ unsigned Line = V.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
+ V.getContext().getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+ // Verify global variable.
+ if (!G.Verify())
+ return;
+
+ unsigned Line = G.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+ // Verify subprogram.
+ if (!SP.Verify())
+ return;
+
+ // If the line number is 0, don't add it.
+ unsigned Line = SP.getLineNumber();
+ if (Line == 0)
+ return;
+
+ unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
+ SP.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
+ Ty.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ DIFile File = Ty.getFile();
+ unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
+ File.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+ // Verify namespace.
+ if (!NS.Verify())
+ return;
+
+ unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
+ StringRef FN = NS.getFilename();
+
+ unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+ MachineLocation Location) {
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+ int64_t Offset) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (Reg == TRI->getFrameRegister(*Asm->MF))
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ else if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+ addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location. Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ unsigned N = DV->getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+ i = 2;
+ } else
+ addRegisterOp(Block, Location.getReg());
+ }
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ for (;i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Location.isReg())
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location. Add the DWARF information to the die. For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIType Ty = DV->getType();
+ DIType TmpTy = Ty;
+ unsigned Tag = Ty.getTag();
+ bool isPointer = false;
+
+ StringRef varName = DV->getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ TmpTy = DTy.getTypeDerivedFrom();
+ isPointer = true;
+ }
+
+ DICompositeType blockStruct = DICompositeType(TmpTy);
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DIArray Fields = blockStruct.getTypeArray();
+ DIDescriptor varField = DIDescriptor();
+ DIDescriptor forwardingField = DIDescriptor();
+
+ for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Fields.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ StringRef fieldName = DT.getName();
+ if (fieldName == "__forwarding")
+ forwardingField = Element;
+ else if (fieldName == varName)
+ varField = Element;
+ }
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned forwardingFieldOffset =
+ DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+ unsigned varFieldOffset =
+ DIDerivedType(varField).getOffsetInBits() >> 3;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// isTypeSigned - Return true if the type is signed.
+static bool isTypeSigned(DIType Ty, int *SizeInBits) {
+ if (Ty.isDerivedType())
+ return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits);
+ if (Ty.isBasicType())
+ if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed
+ || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
+ *SizeInBits = Ty.getSizeInBits();
+ return true;
+ }
+ return false;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+ DIType Ty) {
+ assert(MO.isImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ int SizeInBits = -1;
+ bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
+ unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata;
+ switch (SizeInBits) {
+ case 8: Form = dwarf::DW_FORM_data1; break;
+ case 16: Form = dwarf::DW_FORM_data2; break;
+ case 32: Form = dwarf::DW_FORM_data4; break;
+ case 64: Form = dwarf::DW_FORM_data8; break;
+ default: break;
+ }
+ SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
+ : addUInt(Block, 0, Form, MO.getImm());
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+ assert (MO.isFPImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+ // Get the raw data form of the floating point.
+ const APInt FltVal = FPImm.bitcastToAPInt();
+ const char *FltPtr = (const char*)FltVal.getRawData();
+
+ int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & FltPtr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+ return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+ bool Unsigned) {
+ return addConstantValue(Die, CI->getValue(), Unsigned);
+}
+
+// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
+ bool Unsigned) {
+ unsigned CIBitWidth = Val.getBitWidth();
+ if (CIBitWidth <= 64) {
+ unsigned form = 0;
+ switch (CIBitWidth) {
+ case 8: form = dwarf::DW_FORM_data1; break;
+ case 16: form = dwarf::DW_FORM_data2; break;
+ case 32: form = dwarf::DW_FORM_data4; break;
+ case 64: form = dwarf::DW_FORM_data8; break;
+ default:
+ form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+ }
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const uint64_t *Ptr64 = Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+
+ // Output the constant to DWARF one byte at a time.
+ for (int i = 0; i < NumBytes; i++) {
+ uint8_t c;
+ if (LittleEndian)
+ c = Ptr64[i / 8] >> (8 * (i & 7));
+ else
+ c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, c);
+ }
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+ // Add template parameters.
+ for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+}
+
+/// getOrCreateContextDIE - Get context owner's DIE.
+DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
+ if (Context.isType())
+ return getOrCreateTypeDIE(DIType(Context));
+ else if (Context.isNameSpace())
+ return getOrCreateNameSpace(DINameSpace(Context));
+ else if (Context.isSubprogram())
+ return getOrCreateSubprogramDIE(DISubprogram(Context));
+ else
+ return getDIE(Context);
+}
+
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (DIE *ContextDIE = getOrCreateContextDIE(Context))
+ ContextDIE->addChild(Die);
+ else
+ addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ DIType Ty(TyNode);
+ if (!Ty.Verify())
+ return NULL;
+ DIE *TyDIE = getDIE(Ty);
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ insertDIE(Ty, TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+ }
+ // If this is a named finished type then include it in the list of types
+ // for the accelerator tables.
+ if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+ bool IsImplementation = 0;
+ if (Ty.isCompositeType()) {
+ DICompositeType CT(Ty);
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = (CT.getRunTimeLang() == 0) ||
+ CT.isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ?
+ DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+ addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
+ if (!Ty.Verify())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *Entry = getDIEEntry(Ty);
+ // If it exists then use the existing value.
+ if (Entry) {
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+ return;
+ }
+
+ // Construct type.
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+ // Set up proxy.
+ Entry = createDIEEntry(Buffer);
+ insertDIEEntry(Ty, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+
+ // If this is a complete composite type then include it in the
+ // list of global types.
+ addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+ DIDescriptor Context = Ty.getContext();
+ if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+ && (!Context || Context.isCompileUnit() || Context.isFile()
+ || Context.isNameSpace()))
+ if (DIEEntry *Entry = getDIEEntry(Ty))
+ GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+ DICompositeType SPTy = SP.getType();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
+ return;
+
+ DIArray Args = SPTy.getTypeArray();
+ for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+ DIType ATy(Args.getElement(i));
+ if (!ATy.Verify())
+ continue;
+ addGlobalType(ATy);
+ }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+ // Get core information.
+ StringRef Name = BTy.getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
+ Buffer.setTag(dwarf::DW_TAG_unspecified_type);
+ // Unspecified types has only name, nothing else.
+ return;
+ }
+
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+ // Get core information.
+ StringRef Name = DTy.getName();
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ addType(&Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DTy.getClassType()));
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ // Get core information.
+ StringRef Name = CTy.getName();
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIDescriptor Enum(Elements.getElement(i));
+ if (Enum.isEnumerator()) {
+ ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+ Buffer.addChild(ElemDie);
+ }
+ }
+ DIType DTy = CTy.getTypeDerivedFrom();
+ if (DTy.Verify()) {
+ addType(&Buffer, DTy);
+ addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ addType(&Buffer, DIType(RTy));
+
+ bool isPrototyped = true;
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Ty = Elements.getElement(i);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ if (DIType(Ty).isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ Buffer.addChild(Arg);
+ }
+ }
+ // Add prototype flag if we're dealing with a C language and the
+ // function has been prototyped.
+ if (isPrototyped &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(&Buffer, dwarf::DW_AT_prototyped);
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ unsigned N = Elements.getNumElements();
+ if (N == 0)
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
+ ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addFlag(ElemDie, dwarf::DW_AT_explicit);
+ } else if (Element.isDerivedType()) {
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else if (DDTy.isStaticMember())
+ ElemDie = createStaticMemberDIE(DDTy);
+ else
+ ElemDie = createMemberDIE(DDTy);
+ } else if (Element.isObjCProperty()) {
+ DIObjCProperty Property(Element);
+ ElemDie = new DIE(Property.getTag());
+ StringRef PropertyName = Property.getObjCPropertyName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (Property.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (Property.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (Property.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (Property.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (Property.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (Property.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+
+ DIEEntry *Entry = getDIEEntry(Element);
+ if (!Entry) {
+ Entry = createDIEEntry(ElemDie);
+ insertDIEEntry(Element, Entry);
+ }
+ } else
+ continue;
+ Buffer.addChild(ElemDie);
+ }
+
+ if (CTy.isAppleBlockExtension())
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
+
+ DICompositeType ContainingType = CTy.getContainingType();
+ if (DIDescriptor(ContainingType).isCompositeType())
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
+
+ if (CTy.isObjcClassComplete())
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
+
+ // Add template parameters to a class, structure or union types.
+ // FIXME: The support isn't in the metadata for this yet.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy.getTemplateParams());
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ // TODO: Do we care about size for enum forward declarations?
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else if (!CTy.isForwardDecl())
+ // Add zero size if it is not a forward declaration.
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy.isForwardDecl())
+ addFlag(&Buffer, dwarf::DW_AT_declaration);
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ addSourceLine(&Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+ }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ DIE *ParamDIE = getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
+ DIE *ParamDIE = getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ if (!TPV.getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+ DIE *NDie = getDIE(NS);
+ if (NDie)
+ return NDie;
+ NDie = new DIE(dwarf::DW_TAG_namespace);
+ insertDIE(NS, NDie);
+ if (!NS.getName().empty()) {
+ addString(NDie, dwarf::DW_AT_name, NS.getName());
+ addAccelNamespace(NS.getName(), NDie);
+ } else
+ addAccelNamespace("(anonymous namespace)", NDie);
+ addSourceLine(NDie, NS);
+ addToContextOwner(NDie, NS.getContext());
+ return NDie;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// getOrCreateSubprogramDIE - Create new DIE using SP.
+DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+ DIE *SPDie = getDIE(SP);
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIE *DeclDie = NULL;
+ if (SPDecl.isSubprogram()) {
+ DeclDie = getOrCreateSubprogramDIE(SPDecl);
+ }
+
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
+ // Add function template parameters.
+ addTemplateParams(*SPDie, SP.getTemplateParams());
+
+ // Unfortunately this code needs to stay here instead of below the
+ // AT_specification code in order to work around a bug in older
+ // gdbs that requires the linkage name to resolve multiple template
+ // functions.
+ // TODO: Remove this set of code when we get rid of the old gdb
+ // compatibility.
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty() && DD->useDarwinGDBCompat())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+
+ // If this DIE is going to refer declaration info using AT_specification
+ // then there is no need to add other attributes.
+ if (DeclDie) {
+ // Refer function declaration directly.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ DeclDie);
+
+ return SPDie;
+ }
+
+ // Add the linkage name if we have one.
+ if (!LinkageName.empty() && !DD->useDarwinGDBCompat())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, SP.getName());
+
+ addSourceLine(SPDie, SP);
+
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ if (SP.isPrototyped() &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(SPDie, dwarf::DW_AT_prototyped);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0)));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
+ DIEBlock *Block = getDIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType()));
+ }
+
+ if (!SP.isDefinition()) {
+ addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(Args.getElement(i));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ addFlag(SPDie, dwarf::DW_AT_artificial);
+
+ if (!SP.isLocalToUnit())
+ addFlag(SPDie, dwarf::DW_AT_external);
+
+ if (SP.isOptimized())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
+
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ return SPDie;
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
+/// createGlobalVariableDIE - create global variable DIE.
+void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
+ // Check for pre-existence.
+ if (getDIE(N))
+ return;
+
+ DIGlobalVariable GV(N);
+ if (!GV.Verify())
+ return;
+
+ DIDescriptor GVContext = GV.getContext();
+ DIType GTy = GV.getType();
+
+ // If this is a static data member definition, some attributes belong
+ // to the declaration DIE.
+ DIE *VariableDIE = NULL;
+ bool IsStaticMember = false;
+ DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
+ if (SDMDecl.Verify()) {
+ assert(SDMDecl.isStaticMember() && "Expected static member decl");
+ // We need the declaration DIE that is in the static member's class.
+ // But that class might not exist in the DWARF yet.
+ // Creating the class will create the static member decl DIE.
+ getOrCreateContextDIE(SDMDecl.getContext());
+ VariableDIE = getDIE(SDMDecl);
+ assert(VariableDIE && "Static member decl has no context?");
+ IsStaticMember = true;
+ }
+
+ // If this is not a static data member definition, create the variable
+ // DIE and add the initial set of attributes to it.
+ if (!VariableDIE) {
+ VariableDIE = new DIE(GV.getTag());
+ // Add to map.
+ insertDIE(N, VariableDIE);
+
+ // Add name and type.
+ addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addFlag(VariableDIE, dwarf::DW_AT_external);
+ addGlobalName(GV.getName(), VariableDIE);
+ }
+
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to context owner.
+ addToContextOwner(VariableDIE, GVContext);
+ }
+
+ // Add location.
+ bool addToAccelTable = false;
+ DIE *VariableSpecDIE = NULL;
+ bool isGlobalVariable = GV.getGlobal() != NULL;
+ if (isGlobalVariable) {
+ addToAccelTable = true;
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ // A static member's declaration is already flagged as such.
+ if (!SDMDecl.Verify())
+ addFlag(VariableDIE, dwarf::DW_AT_declaration);
+ addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ // Add linkage name.
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty()) {
+ // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+ // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+ // TAG_variable.
+ addString(IsStaticMember && VariableSpecDIE ?
+ VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+ // In compatibility mode with older gdbs we put the linkage name on both
+ // the TAG_variable DIE and on the TAG_member DIE.
+ if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat())
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ // AT_const_value was added when the static member was created. To avoid
+ // emitting AT_const_value multiple times, we only add AT_const_value when
+ // it is not a static member.
+ if (!IsStaticMember)
+ addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ Value *Ptr = CE->getOperand(0);
+ addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
+ addUInt(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+
+ if (addToAccelTable) {
+ DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+ addAccelName(GV.getName(), AddrDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ addAccelName(GV.getLinkageName(), AddrDIE);
+ }
+
+ return;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
+ DIE *IndexTy) {
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+
+ // The LowerBound value defines the lower bounds which is typically zero for
+ // C/C++. The Count value is the number of elements. Values are 64 bit. If
+ // Count == -1 then the array is unbounded and we do not emit
+ // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
+ // Count == 0, then the array has zero elements in which case we do not emit
+ // an upper bound.
+ int64_t LowerBound = SR.getLo();
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+ int64_t Count = SR.getCount();
+
+ if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+
+ if (Count != -1 && Count != 0)
+ // FIXME: An unbounded array should reference the expression that defines
+ // the array.
+ addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
+
+ Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->isVector())
+ addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
+
+ // Emit derived type.
+ addType(&Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Get an anonymous type for index type.
+ // FIXME: This type should be passed down from the front end
+ // as different languages may have different sizes for indexes.
+ DIE *IdxTy = getIndexTyDie();
+ if (!IdxTy) {
+ // Construct an anonymous type for index type.
+ IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ addString(IdxTy, dwarf::DW_AT_name, "int");
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ addDie(IdxTy);
+ setIndexTyDie(IdxTy);
+ }
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+ }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ StringRef Name = ETy.getName();
+ addString(Enumerator, dwarf::DW_AT_name, Name);
+ int64_t Value = ETy.getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// constructContainingTypeDIEs - Construct DIEs for types that contain
+/// vtables.
+void CompileUnit::constructContainingTypeDIEs() {
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ const MDNode *N = CI->second;
+ if (!N) continue;
+ DIE *NDie = getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
+ StringRef Name = DV->getName();
+
+ // Translate tag to proper Dwarf tag.
+ unsigned Tag = DV->getTag();
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ DbgVariable *AbsVar = DV->getAbstractVariable();
+ DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
+ if (AbsDIE)
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ else {
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, DV->getVariable());
+ addType(VariableDie, DV->getType());
+ }
+
+ if (DV->isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+
+ if (isScopeAbstract) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLabel(VariableDie, dwarf::DW_AT_location,
+ dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV->getMInsn()) {
+ bool updated = false;
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
+ FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, VariableDie,
+ MachineLocation(RegOp.getReg()));
+ updated = true;
+ }
+ else if (DVInsn->getOperand(0).isImm())
+ updated =
+ addConstantValue(VariableDie, DVInsn->getOperand(0),
+ DV->getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ updated =
+ addConstantValue(VariableDie,
+ DVInsn->getOperand(0).getCImm(),
+ DV->getType().isUnsignedDIType());
+ } else {
+ addVariableAddress(DV, VariableDie,
+ Asm->getDebugValueLocation(DVInsn));
+ updated = true;
+ }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ } else {
+ // .. else use frame index.
+ int FI = DV->getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+ }
+ }
+
+ DV->setDIE(VariableDie);
+ return VariableDie;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+ DIE *MemberDie = new DIE(DT.getTag());
+ StringRef Name = DT.getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, Name);
+
+ addType(MemberDie, DT.getTypeDerivedFrom());
+
+ addSourceLine(MemberDie, DT);
+
+ DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+ } else
+ // This is not a bitfield.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+ if (DT.getTag() == dwarf::DW_TAG_inheritance
+ && DT.isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ VBaseLocationDie);
+ } else
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+ if (DT.isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ // Otherwise C++ member and base classes are considered public.
+ else
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ if (MDNode *PNode = DT.getObjCProperty())
+ if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+ MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ PropertyDie);
+
+ if (DT.isArtificial())
+ addFlag(MemberDie, dwarf::DW_AT_artificial);
+
+ return MemberDie;
+}
+
+/// createStaticMemberDIE - Create new DIE for C++ static member.
+DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
+ if (!DT.Verify())
+ return NULL;
+
+ DIE *StaticMemberDIE = new DIE(DT.getTag());
+ DIType Ty = DT.getTypeDerivedFrom();
+
+ addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+ addType(StaticMemberDIE, Ty);
+ addSourceLine(StaticMemberDIE, DT);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+ // FIXME: We could omit private if the parent is a class_type, and
+ // public if the parent is something else.
+ if (DT.isProtected())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+
+ if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+ addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType());
+ if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+ addConstantFPValue(StaticMemberDIE, CFP);
+
+ insertDIE(DT, StaticMemberDIE);
+ return StaticMemberDIE;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 000000000000..2b180c6cc3f4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,383 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class DwarfUnits;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associated
+/// with a source file.
+class CompileUnit {
+ /// UniqueID - a numeric ID unique among all CUs in the module
+ ///
+ unsigned UniqueID;
+
+ /// Language - The DW_AT_language of the compile unit
+ ///
+ unsigned Language;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ const OwningPtr<DIE> CUDie;
+
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ // Holders for some common dwarf information.
+ DwarfDebug *DD;
+ DwarfUnits *DU;
+
+ /// IndexTyDie - An anonymous type for index type. Owned by CUDie.
+ DIE *IndexTyDie;
+
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ ///
+ StringMap<DIE*> GlobalTypes;
+
+ /// AccelNames - A map of names for the name accelerator table.
+ ///
+ StringMap<std::vector<DIE*> > AccelNames;
+ StringMap<std::vector<DIE*> > AccelObjC;
+ StringMap<std::vector<DIE*> > AccelNamespace;
+ StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
+ /// DIEBlocks - A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+ /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
+ /// getLowerBoundDefault - Return the default lower bound for an array. If the
+ /// DWARF version doesn't handle the language, return -1.
+ int64_t getDefaultLowerBound() const;
+
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIDescriptor Context);
+
+public:
+ CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
+ DwarfUnits *);
+ ~CompileUnit();
+
+ // Accessors.
+ unsigned getUniqueID() const { return UniqueID; }
+ unsigned getLanguage() const { return Language; }
+ DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
+ const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+ const StringMap<std::vector<DIE*> > &getAccelNames() const {
+ return AccelNames;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+ return AccelObjC;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+ return AccelNamespace;
+ }
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+ &getAccelTypes() const {
+ return AccelTypes;
+ }
+
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+ /// addGlobalName - Add a new global entity to the compile unit.
+ ///
+ void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; }
+
+ /// addGlobalType - Add a new global type to the compile unit.
+ ///
+ void addGlobalType(DIType Ty);
+
+
+ /// addAccelName - Add a new name to the name accelerator table.
+ void addAccelName(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNames[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelObjC(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelObjC[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelNamespace(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNamespace[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+ std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+ DIEs.push_back(Die);
+ }
+
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+ DIEBlock *getDIEBlock() {
+ return new (DIEValueAllocator) DIEBlock();
+ }
+
+ /// insertDIE - Insert DIE into the map.
+ void insertDIE(const MDNode *N, DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(N, D));
+ }
+
+ /// getDIEEntry - Returns the debug information entry for the specified
+ /// debug variable.
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
+ return NULL;
+ return I->second;
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ /// addDie - Adds or interns the DIE to the compile unit.
+ ///
+ void addDie(DIE *Buffer) {
+ this->CUDie->addChild(Buffer);
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() {
+ return IndexTyDie;
+ }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) {
+ IndexTyDie = D;
+ }
+
+ /// addFlag - Add a flag that is true to the DIE.
+ void addFlag(DIE *Die, unsigned Attribute);
+
+ /// addUInt - Add an unsigned integer attribute data and value.
+ ///
+ void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// addSInt - Add an signed integer attribute data and value.
+ ///
+ void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// addString - Add a string attribute data and value.
+ ///
+ void addString(DIE *Die, unsigned Attribute, const StringRef Str);
+
+ /// addLocalString - Add a string attribute data and value.
+ ///
+ void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
+
+ /// addLabel - Add a Dwarf label attribute data and value.
+ ///
+ void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label);
+
+ /// addOpAddress - Add a dwarf op address data and value using the
+ /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addOpAddress(DIE *Die, MCSymbol *Label);
+
+ /// addDelta - Add a label delta attribute data and value.
+ ///
+ void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+
+ /// addBlock - Add block data.
+ ///
+ void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// addSourceLine - Add location information to specified debug information
+ /// entry.
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
+ void addSourceLine(DIE *Die, DIObjCProperty Ty);
+
+ /// addAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addConstantValue - Add constant value entry in variable DIE.
+ bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+ bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+ bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
+
+ /// addConstantFPValue - Add constant value entry in variable DIE.
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const ConstantFP *CFP);
+
+ /// addTemplateParams - Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+ /// addRegisterOp - Add register operand.
+ void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+ /// addRegisterOffset - Add register offset.
+ void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+ /// addComplexAddress - Start with the address based on the location provided,
+ /// and generate the DWARF information necessary to find the actual variable
+ /// (navigating the extra location information encoded in the type) based on
+ /// the starting location. Add the DWARF information to the die.
+ ///
+ void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// addBlockByrefAddress - Start with the address based on the location
+ /// provided, and generate the DWARF information necessary to find the
+ /// actual Block variable (navigating the Block struct) based on the
+ /// starting location. Add the DWARF information to the die. Obsolete,
+ /// please use addComplexAddress instead.
+ ///
+ void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addVariableAddress - Add DW_AT_location attribute for a
+ /// DbgVariable based on provided MachineLocation.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+ /// addType - Add a new type attribute to the specified entity. This takes
+ /// and attribute parameter because DW_AT_friend attributes are also
+ /// type references.
+ void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
+
+ /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+ DIE *getOrCreateNameSpace(DINameSpace NS);
+
+ /// getOrCreateSubprogramDIE - Create new DIE using SP.
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(const MDNode *N);
+
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
+ /// new DIE for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry);
+
+ /// createGlobalVariableDIE - create global variable DIE.
+ void createGlobalVariableDIE(const MDNode *N);
+
+ void addPubTypes(DISubprogram SP);
+
+ /// constructTypeDIE - Construct basic type die from DIBasicType.
+ void constructTypeDIE(DIE &Buffer,
+ DIBasicType BTy);
+
+ /// constructTypeDIE - Construct derived type die from DIDerivedType.
+ void constructTypeDIE(DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer,
+ DICompositeType CTy);
+
+ /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+ /// constructContainingTypeDIEs - Construct DIEs for types that contain
+ /// vtables.
+ void constructContainingTypeDIEs();
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract);
+
+ /// createMemberDIE - Create new member DIE.
+ DIE *createMemberDIE(DIDerivedType DT);
+
+ /// createStaticMemberDIE - Create new static data member DIE.
+ DIE *createStaticMemberDIE(DIDerivedType DT);
+
+private:
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+ DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 000000000000..d3cb4f9c1c0e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2570 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::init(false));
+
+static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
+ cl::Hidden, cl::init(false),
+ cl::desc("Generate DWARF pubnames section"));
+
+namespace {
+ enum DefaultOnOff {
+ Default, Enable, Disable
+ };
+}
+
+static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+ cl::desc("Output prototype dwarf accelerator tables."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
+ cl::desc("Compatibility with Darwin gdb."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
+ cl::desc("Output prototype dwarf split debug info."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+namespace {
+ const char *DWARFGroupName = "DWARF Emission";
+ const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+// Configuration values for initial hash set sizes (log2).
+//
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+DIType DbgVariable::getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ }
+ return Ty;
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+ : Asm(A), MMI(Asm->MMI), FirstCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize),
+ SourceIdMap(DIEValueAllocator),
+ PrevLabel(NULL), GlobalCUIndexCount(0),
+ InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
+ DIEValueAllocator),
+ SkeletonAbbrevSet(InitAbbreviationsSetSize),
+ SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
+ DIEValueAllocator) {
+
+ DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+ DwarfStrSectionSym = TextSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+ DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
+ FunctionBeginSym = FunctionEndSym = 0;
+
+ // Turn on accelerator tables and older gdb compatibility
+ // for Darwin.
+ bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ if (DarwinGDBCompat == Default) {
+ if (IsDarwin)
+ IsDarwinGDBCompat = true;
+ else
+ IsDarwinGDBCompat = false;
+ } else
+ IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+
+ if (DwarfAccelTables == Default) {
+ if (IsDarwin)
+ HasDwarfAccelTables = true;
+ else
+ HasDwarfAccelTables = false;
+ } else
+ HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
+ if (SplitDwarf == Default)
+ HasSplitDwarf = false;
+ else
+ HasSplitDwarf = SplitDwarf == Enable ? true : false;
+
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule();
+ }
+}
+DwarfDebug::~DwarfDebug() {
+}
+
+// Switch to the specified MCSection and emit an assembler
+// temporary label to it if SymbolStem is specified.
+static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+MCSymbol *DwarfUnits::getStringPoolSym() {
+ return Asm->GetTempSymbol(StringPref);
+}
+
+MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry =
+ StringPool.GetOrCreateValue(Str).getValue();
+ if (Entry.first) return Entry.first;
+
+ Entry.second = NextStringPoolNumber++;
+ return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+}
+
+unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry =
+ StringPool.GetOrCreateValue(Str).getValue();
+ if (Entry.first) return Entry.second;
+
+ Entry.second = NextStringPoolNumber++;
+ Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+ return Entry.second;
+}
+
+unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) {
+ std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym];
+ if (Entry.first) return Entry.second;
+
+ Entry.second = NextAddrPoolNumber++;
+ Entry.first = Sym;
+ return Entry.second;
+}
+
+// Define a unique number for the abbreviation.
+//
+void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations->push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations->size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+// If special LLVM prefix that is used to inform the asm
+// printer to not emit usual symbol prefix before the symbol name is used then
+// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name)) return false;
+
+ size_t pos = Name.find(')');
+ if (pos != std::string::npos) {
+ if (Name[pos+1] != ' ') return false;
+ return true;
+ }
+ return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+ return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+ DIE* Die) {
+ if (!SP.isDefinition()) return;
+
+ TheCU->addAccelName(SP.getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+ TheCU->addAccelName(SP.getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP.getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP.getName(), Class, Category);
+ TheCU->addAccelObjC(Class, Die);
+ if (Category != "")
+ TheCU->addAccelObjC(Category, Die);
+ // Also add the base method name to the name table.
+ TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+ }
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
+ const MDNode *SPNode) {
+ DIE *SPDie = SPCU->getDIE(SPNode);
+
+ assert(SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+
+ // If we're updating an abstract DIE, then we will be adding the children and
+ // object pointer later on. But what we don't want to do is process the
+ // concrete DIE twice.
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
+ // Pick up abstract subprogram DIE.
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
+ SPCU->addDie(SPDie);
+ } else {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (!SPDecl.isSubprogram()) {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+ !SP.getContext().isFile() &&
+ !isSubprogramContext(SP.getContext())) {
+ SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(Args.getElement(i));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
+ if (ATy.isObjectPointer())
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, Arg);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, SPDeclDie);
+ SPCU->addDie(SPDie);
+ }
+ }
+ }
+
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc,
+ Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber()));
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc,
+ Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber()));
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ addSubprogramNames(SPCU, SP, SPDie);
+
+ return SPDie;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return 0;
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getDataLayout().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ return ScopeDIE;
+ }
+
+ MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ MCSymbol *End = getLabelAfterInsn(RI->second);
+
+ if (End == 0) return 0;
+
+ assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+ assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End);
+
+ return ScopeDIE;
+}
+
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ assert(Ranges.empty() == false &&
+ "LexicalScope does not have instruction markers!");
+
+ if (!Scope->getScopeNode())
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+ if (!OriginDIE) {
+ DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
+ return NULL;
+ }
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+ if (StartLabel == 0 || EndLabel == 0) {
+ llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
+ }
+ assert(StartLabel->isDefined() &&
+ "Invalid starting label for an inlined scope!");
+ assert(EndLabel->isDefined() &&
+ "Invalid end label for an inlined scope!");
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
+
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getDataLayout().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ } else {
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
+ }
+
+ InlinedSubprogramDIEs.insert(OriginDIE);
+
+ // Track the start label for this inlined function.
+ //.debug_inlined section specification does not clearly state how
+ // to emit inlined scope that is split into multiple instruction ranges.
+ // For now, use first instruction range and emit low_pc/high_pc pair and
+ // corresponding .debug_inlined section entry for this pair.
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP);
+
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP);
+ } else
+ I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+ DILocation DL(Scope->getInlinedAt());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
+ TheCU->getUniqueID()));
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
+ if (!Scope || !Scope->getScopeNode())
+ return NULL;
+
+ DIScope DS(Scope->getScopeNode());
+ // Early return to avoid creating dangling variable|scope DIEs.
+ if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() &&
+ !TheCU->getDIE(DS))
+ return NULL;
+
+ SmallVector<DIE *, 8> Children;
+ DIE *ObjectPointer = NULL;
+
+ // Collect arguments for current function.
+ if (LScopes.isCurrentFunctionScope(Scope))
+ for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+ if (DbgVariable *ArgDV = CurrentFnArguments[i])
+ if (DIE *Arg =
+ TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
+ Children.push_back(Arg);
+ if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+ }
+
+ // Collect lexical scope children first.
+ const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable =
+ TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
+ Children.push_back(Variable);
+ if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+ }
+ const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
+ Children.push_back(Nested);
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
+ else if (DS.isSubprogram()) {
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
+ ScopeDIE = TheCU->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
+ else
+ ScopeDIE = updateSubprogramScopeDIE(TheCU, DS);
+ }
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
+ ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ }
+
+ if (!ScopeDIE) return NULL;
+
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
+
+ if (DS.isSubprogram() && ObjectPointer != NULL)
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, ObjectPointer);
+
+ if (DS.isSubprogram())
+ TheCU->addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+// Look up the source id with the given directory and source file names.
+// If none currently exists, create a new id and insert it in the
+// SourceIds map. This can update DirectoryNames and SourceFileNames maps
+// as well.
+unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
+ StringRef DirName, unsigned CUID) {
+ // If we use .loc in assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ CUID = 0;
+
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return getOrCreateSourceID("<stdin>", StringRef(), CUID);
+
+ // TODO: this might not belong here. See if we can factor this better.
+ if (DirName == CompilationDir)
+ DirName = "";
+
+ // FileIDCUMap stores the current ID for the given compile unit.
+ unsigned SrcId = FileIDCUMap[CUID] + 1;
+
+ // We look up the CUID/file/dir by concatenating them with a zero byte.
+ SmallString<128> NamePair;
+ NamePair += CUID;
+ NamePair += '\0';
+ NamePair += DirName;
+ NamePair += '\0'; // Zero bytes are not allowed in paths.
+ NamePair += FileName;
+
+ StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+ if (Ent.getValue() != SrcId)
+ return Ent.getValue();
+
+ FileIDCUMap[CUID] = SrcId;
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
+
+ return SrcId;
+}
+
+// Create new CompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ StringRef FN = DIUnit.getFilename();
+ CompilationDir = DIUnit.getDirectory();
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+ DIUnit.getLanguage(), Die, Asm,
+ this, &InfoHolder);
+
+ FileIDCUMap[NewCU->getUniqueID()] = 0;
+ // Call this to emit a .file directive if it wasn't emitted for the source
+ // file this CU comes from yet.
+ getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+
+ NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+ NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit.getLanguage());
+ NewCU->addString(Die, dwarf::DW_AT_name, FN);
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity. We're using 0 (or a NULL label) for this.
+ NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
+ NewCU->getUniqueID());
+ Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
+ NewCU->getUniqueID());
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ NewCU->getUniqueID() == 0 ?
+ Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ else if (NewCU->getUniqueID() == 0)
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ LineTableStartSym, DwarfLineSectionSym);
+
+ if (!CompilationDir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ if (DIUnit.isOptimized())
+ NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+
+ StringRef Flags = DIUnit.getFlags();
+ if (!Flags.empty())
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+
+ if (unsigned RVer = DIUnit.getRunTimeVersion())
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ if (!FirstCU)
+ FirstCU = NewCU;
+
+ InfoHolder.addUnit(NewCU);
+
+ CUMap.insert(std::make_pair(N, NewCU));
+ return NewCU;
+}
+
+// Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
+ const MDNode *N) {
+ CompileUnit *&CURef = SPMap[N];
+ if (CURef)
+ return;
+ CURef = TheCU;
+
+ DISubprogram SP(N);
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return;
+
+ DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
+
+ // Add to map.
+ TheCU->insertDIE(N, SubprogramDie);
+
+ // Add to context owner.
+ TheCU->addToContextOwner(SubprogramDie, SP.getContext());
+
+ // Expose as global, if requested.
+ if (GenerateDwarfPubNamesSection)
+ TheCU->addGlobalName(SP.getName(), SubprogramDie);
+}
+
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule() {
+ if (DisableDebugInfoPrinting)
+ return;
+
+ const Module *M = MMI->getModule();
+
+ // If module has named metadata anchors then use them, otherwise scan the
+ // module using debug info finder to collect debug info.
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes)
+ return;
+
+ // Emit initial sections so we can reference labels later.
+ emitSectionLabels();
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then construct a skeleton CU based upon it.
+ if (useSplitDwarf()) {
+ // This should be a unique identifier when we want to build .dwp files.
+ CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ // Now construct the skeleton CU associated.
+ constructSkeletonCU(CUNode);
+ }
+ }
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ // Prime section data.
+ SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+// Attach DW_AT_inline attribute with inlined subprogram DIEs.
+void DwarfDebug::computeInlinedDIEs() {
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+ for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+ AE = AbstractSPDies.end(); AI != AE; ++AI) {
+ DIE *ISP = AI->second;
+ if (InlinedSubprogramDIEs.count(ISP))
+ continue;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+}
+
+// Collect info for variables that were optimized out.
+void DwarfDebug::collectDeadVariables() {
+ const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
+
+ if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit TheCU(CU_Nodes->getOperand(i));
+ DIArray Subprograms = TheCU.getSubprograms();
+ for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
+ DISubprogram SP(Subprograms.getElement(i));
+ if (ProcessedSPNodes.count(SP) != 0) continue;
+ if (!SP.Verify()) continue;
+ if (!SP.isDefinition()) continue;
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0) continue;
+
+ LexicalScope *Scope =
+ new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+ DeadFnScopeMap[SP] = Scope;
+
+ // Construct subprogram DIE and add variables DIEs.
+ CompileUnit *SPCU = CUMap.lookup(TheCU);
+ assert(SPCU && "Unable to find Compile Unit!");
+ constructSubprogramDIE(SPCU, SP);
+ DIE *ScopeDIE = SPCU->getDIE(SP);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ if (!DV.Verify()) continue;
+ DbgVariable *NewVar = new DbgVariable(DV, NULL);
+ if (DIE *VariableDIE =
+ SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
+ }
+ DeleteContainerSeconds(DeadFnScopeMap);
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+ // Collect info for variables that were optimized out.
+ collectDeadVariables();
+
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ computeInlinedDIEs();
+
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
+ CUE = CUMap.end(); CUI != CUE; ++CUI) {
+ CompileUnit *TheCU = CUI->second;
+ TheCU->constructContainingTypeDIEs();
+ }
+
+ // Compute DIE offsets and sizes.
+ InfoHolder.computeSizeAndOffsets();
+ if (useSplitDwarf())
+ SkeletonHolder.computeSizeAndOffsets();
+}
+
+void DwarfDebug::endSections() {
+ // Standard sections final addresses.
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+ // End text sections.
+ for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
+ Asm->OutStreamer.SwitchSection(SectionMap[I]);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
+ }
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
+
+ if (!FirstCU) return;
+
+ // End any existing sections.
+ // TODO: Does this need to happen?
+ endSections();
+
+ // Finalize the debug info for the module.
+ finalizeModuleInfo();
+
+ if (!useSplitDwarf()) {
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit inline info.
+ // TODO: When we don't need the option anymore we
+ // can remove all of the code that this section
+ // depends upon.
+ if (useDarwinGDBCompat())
+ emitDebugInlineInfo();
+ } else {
+ // TODO: Fill this in for separated debug sections and separate
+ // out information into new sections.
+
+ // Emit the debug info section and compile units.
+ emitDebugInfo();
+ emitDebugInfoDWO();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+ emitDebugAbbrevDWO();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit DWO addresses.
+ InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
+
+ // Emit inline info.
+ // TODO: When we don't need the option anymore we
+ // can remove all of the code that this section
+ // depends upon.
+ if (useDarwinGDBCompat())
+ emitDebugInlineInfo();
+ }
+
+ // Emit info into the dwarf accelerator table sections.
+ if (useDwarfAccelTables()) {
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ }
+
+ // Emit info into a debug pubnames section, if requested.
+ if (GenerateDwarfPubNamesSection)
+ emitDebugPubnames();
+
+ // Emit info into a debug pubtypes section.
+ // TODO: When we don't need the option anymore we can
+ // remove all of the code that adds to the table.
+ if (useDarwinGDBCompat())
+ emitDebugPubTypes();
+
+ // Finally emit string information into a string table.
+ emitDebugStr();
+ if (useSplitDwarf())
+ emitDebugStrDWO();
+
+ // clean up.
+ SPMap.clear();
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I)
+ delete I->second;
+
+ for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(),
+ E = SkeletonCUs.end(); I != E; ++I)
+ delete *I;
+
+ // Reset these for the next Module if we have one.
+ FirstCU = NULL;
+}
+
+// Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
+ DebugLoc ScopeLoc) {
+ LLVMContext &Ctx = DV->getContext();
+ // More then one inlined variable corresponds to one abstract variable.
+ DIVariable Var = cleanseInlinedVariable(DV, Ctx);
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx));
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, NULL);
+ addScopeVariable(Scope, AbsDbgVariable);
+ AbstractVariables[Var] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
+// If Var is a current function argument then add it to CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope) {
+ if (!LScopes.isCurrentFunctionScope(Scope))
+ return false;
+ DIVariable DV = Var->getVariable();
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ return false;
+ unsigned ArgNo = DV.getArgNumber();
+ if (ArgNo == 0)
+ return false;
+
+ size_t Size = CurrentFnArguments.size();
+ if (Size == 0)
+ CurrentFnArguments.resize(MF->getFunction()->arg_size());
+ // llvm::Function argument size is not good indicator of how many
+ // arguments does the function have at source level.
+ if (ArgNo > Size)
+ CurrentFnArguments.resize(ArgNo * 2);
+ CurrentFnArguments[ArgNo - 1] = Var;
+ return true;
+}
+
+// Collect variable information from side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ Processed.insert(Var);
+ DIVariable DV(Var);
+ const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+ LexicalScope *Scope = LScopes.findLexicalScope(VP.second);
+
+ // If variable scope is not found then skip this variable.
+ if (Scope == 0)
+ continue;
+
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable);
+ RegVar->setFrameIndex(VP.first);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsDbgVariable)
+ AbsDbgVariable->setFrameIndex(VP.first);
+ }
+}
+
+// Return true if debug value, encoded by DBG_VALUE instruction, is in a
+// defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+ assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ return MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+ MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+}
+
+// Get .debug_loc entry for the instruction range starting at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+ const MCSymbol *FLabel,
+ const MCSymbol *SLabel,
+ const MachineInstr *MI) {
+ const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ if (MI->getNumOperands() != 3) {
+ MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+ MachineLocation MLoc;
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+ llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
+}
+
+// Find variables for each lexical scope.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+
+ // collection info from MMI table.
+ collectVariableInfoFromMMITable(MF, Processed);
+
+ for (SmallVectorImpl<const MDNode*>::const_iterator
+ UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+ ++UVI) {
+ const MDNode *Var = *UVI;
+ if (Processed.count(Var))
+ continue;
+
+ // History contains relevant DBG_VALUE instructions for Var and instructions
+ // clobbering it.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty())
+ continue;
+ const MachineInstr *MInsn = History.front();
+
+ DIVariable DV(Var);
+ LexicalScope *Scope = NULL;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ Scope = LScopes.getCurrentFunctionScope();
+ else if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(DV);
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+ DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
+ DbgVariable *RegVar = new DbgVariable(DV, AbsVar);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsVar)
+ AbsVar->setMInsn(MInsn);
+
+ // Simplify ranges that are fully coalesced.
+ if (History.size() <= 1 || (History.size() == 2 &&
+ MInsn->isIdenticalTo(History.back()))) {
+ RegVar->setMInsn(MInsn);
+ continue;
+ }
+
+ // Handle multiple DBG_VALUE instructions describing one variable.
+ RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+ for (SmallVectorImpl<const MachineInstr*>::const_iterator
+ HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+ const MachineInstr *Begin = *HI;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if DBG_VALUE is truncating a range.
+ if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+ && !Begin->getOperand(0).getReg())
+ continue;
+
+ // Compute the range for a register location.
+ const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+ const MCSymbol *SLabel = 0;
+
+ if (HI + 1 == HE)
+ // If Begin is the last instruction in History then its value is valid
+ // until the end of the function.
+ SLabel = FunctionEndSym;
+ else {
+ const MachineInstr *End = HI[1];
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ << "\t" << *Begin << "\t" << *End << "\n");
+ if (End->isDebugValue())
+ SLabel = getLabelBeforeInsn(End);
+ else {
+ // End is a normal instruction clobbering the range.
+ SLabel = getLabelAfterInsn(End);
+ assert(SLabel && "Forgot label after clobber instruction");
+ ++HI;
+ }
+ }
+
+ // The value is valid until the next DBG_VALUE or clobber.
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+ Begin));
+ }
+ DotDebugLocEntries.push_back(DotDebugLocEntry());
+ }
+
+ // Collect info for variables that were optimized out.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !Processed.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+}
+
+// Return Label preceding the instruction.
+MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+// Return Label immediately following the instruction.
+MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+// Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ // Check if source location changes, but ignore DBG_VALUE locations.
+ if (!MI->isDebugValue()) {
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+ unsigned Flags = 0;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ }
+ if (PrologEndLoc.isUnknown())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
+ if (!DL.isUnknown()) {
+ const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ } else
+ recordSourceLine(0, 0, 0, 0);
+ }
+ }
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+// Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!MI->isDebugValue())
+ PrevLabel = 0;
+
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsAfterInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVector<LexicalScope *, 4> &Children = S->getChildren();
+ if (!Children.empty())
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ if (S->isAbstractScope())
+ continue;
+
+ const SmallVector<InsnRange, 4> &Ranges = S->getRanges();
+ if (Ranges.empty())
+ continue;
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ assert(RI->first && "InsnRange does not have first instruction!");
+ assert(RI->second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(RI->first);
+ requestLabelAfterInsn(RI->second);
+ }
+ }
+}
+
+// Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+ if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+ return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+ return DL.getScope(Ctx);
+}
+
+// Walk up the scope chain of given debug loc and find line number info
+// for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+ const MDNode *Scope = getScopeNode(DL, Ctx);
+ DISubprogram SP = getDISubprogram(Scope);
+ if (SP.Verify()) {
+ // Check for number of operands since the compatibility is
+ // cheap here.
+ if (SP->getNumOperands() > 19)
+ return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+ else
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ }
+
+ return DebugLoc();
+}
+
+// Gather pre-function debug information. Assumes being called immediately
+// after the function entry point has been emitted.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo()) return;
+ LScopes.initialize(*MF);
+ if (LScopes.empty()) return;
+ identifyScopeMarkers();
+
+ // Set DwarfCompileUnitID in MCContext to the Compile Unit this function
+ // belongs to.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert(TheCU && "Unable to find compile unit!");
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+
+ FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+ assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ // LiveUserVar - Map physreg numbers to the MDNode they contain.
+ std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool AtBlockEntry = true;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+
+ if (MI->isDebugValue()) {
+ assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
+
+ // Keep track of user variables.
+ const MDNode *Var =
+ MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ // Variable is in a register, we need to check for clobbers.
+ if (isDbgValueInDefinedReg(MI))
+ LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+ // Check the history of this variable.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty()) {
+ UserVariables.push_back(Var);
+ // The first mention of a function argument gets the FunctionBeginSym
+ // label, so arguments are visible when breaking at function entry.
+ DIVariable DV(Var);
+ if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(getDISubprogram(DV.getContext()))
+ .describes(MF->getFunction()))
+ LabelsBeforeInsn[MI] = FunctionBeginSym;
+ } else {
+ // We have seen this variable before. Try to coalesce DBG_VALUEs.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue()) {
+ // Coalesce identical entries at the end of History.
+ if (History.size() >= 2 &&
+ Prev->isIdenticalTo(History[History.size() - 2])) {
+ DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << *Prev
+ << "\t" << *History[History.size() - 2] << "\n");
+ History.pop_back();
+ }
+
+ // Terminate old register assignments that don't reach MI;
+ MachineFunction::const_iterator PrevMBB = Prev->getParent();
+ if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+ isDbgValueInDefinedReg(Prev)) {
+ // Previous register assignment needs to terminate at the end of
+ // its basic block.
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end()) {
+ // Drop DBG_VALUE for empty range.
+ DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
+ << "\t" << *Prev << "\n");
+ History.pop_back();
+ }
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ }
+ }
+ History.push_back(MI);
+ } else {
+ // Not a DBG_VALUE instruction.
+ if (!MI->isLabel())
+ AtBlockEntry = false;
+
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ if (!MI->getFlag(MachineInstr::FrameSetup) &&
+ (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
+ PrologEndLoc = MI->getDebugLoc();
+
+ // Check if the instruction clobbers any registers with debug vars.
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+ continue;
+ for (MCRegAliasIterator AI(MOI->getReg(), TRI, true);
+ AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ const MDNode *Var = LiveUserVar[Reg];
+ if (!Var)
+ continue;
+ // Reg is now clobbered.
+ LiveUserVar[Reg] = 0;
+
+ // Was MD last defined by a DBG_VALUE referring to Reg?
+ DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+ if (HistI == DbgValues.end())
+ continue;
+ SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+ if (History.empty())
+ continue;
+ const MachineInstr *Prev = History.back();
+ // Sanity-check: Register assignments are terminated at the end of
+ // their block.
+ if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+ continue;
+ // Is the variable still in Reg?
+ if (!isDbgValueInDefinedReg(Prev) ||
+ Prev->getOperand(0).getReg() != Reg)
+ continue;
+ // Var is clobbered. Make sure the next instruction gets a label.
+ History.push_back(MI);
+ }
+ }
+ }
+ }
+ }
+
+ for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+ I != E; ++I) {
+ SmallVectorImpl<const MachineInstr*> &History = I->second;
+ if (History.empty())
+ continue;
+
+ // Make sure the final register assignments are terminated.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+ const MachineBasicBlock *PrevMBB = Prev->getParent();
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end())
+ // Drop DBG_VALUE for empty range.
+ History.pop_back();
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ // Request labels for the full history.
+ for (unsigned i = 0, e = History.size(); i != e; ++i) {
+ const MachineInstr *MI = History[i];
+ if (MI->isDebugValue())
+ requestLabelBeforeInsn(MI);
+ else
+ requestLabelAfterInsn(MI);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = FunctionBeginSym;
+
+ // Record beginning of function.
+ if (!PrologEndLoc.isUnknown()) {
+ DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+ MF->getFunction()->getContext());
+ recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ DWARF2_FLAG_IS_STMT);
+ }
+}
+
+void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS);
+ ScopeVariables[LS].push_back(Var);
+// Vars.push_back(Var);
+}
+
+// Gather and emit post-function debug information.
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo() || LScopes.empty()) return;
+
+ // Define end label for subprogram.
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+ // Set DwarfCompileUnitID in MCContext to default value.
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert(TheCU && "Unable to find compile unit!");
+
+ // Construct abstract scopes.
+ ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
+ for (unsigned i = 0, e = AList.size(); i != e; ++i) {
+ LexicalScope *AScope = AList[i];
+ DISubprogram SP(AScope->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ DIArray Variables = SP.getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
+ continue;
+ // Check that DbgVariable for DV wasn't created earlier, when
+ // findAbstractVariable() was called for inlined instance of DV.
+ LLVMContext &Ctx = DV->getContext();
+ DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
+ if (AbstractVariables.lookup(CleanDV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+ }
+ if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
+ constructScopeDIE(TheCU, AScope);
+ }
+
+ DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
+
+ if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
+ TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+ MMI->getFrameMoves()));
+
+ // Clear debug info
+ for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator
+ I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
+ DeleteContainerPointers(I->second);
+ ScopeVariables.clear();
+ DeleteContainerPointers(CurrentFnArguments);
+ UserVariables.clear();
+ DbgValues.clear();
+ AbstractVariables.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = NULL;
+}
+
+// Register a source line with debug info. Returns the unique label that was
+// emitted and which provides correspondence to the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Fn = CU.getFilename();
+ Dir = CU.getDirectory();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
+ Dir = F.getDirectory();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Fn = SP.getFilename();
+ Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF(S);
+ Fn = DBF.getFilename();
+ Dir = DBF.getDirectory();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Fn = DB.getFilename();
+ Dir = DB.getDirectory();
+ } else
+ llvm_unreachable("Unexpected scope info");
+
+ Src = getOrCreateSourceID(Fn, Dir,
+ Asm->OutStreamer.getContext().getDwarfCompileUnitID());
+ }
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+// Compute the size and offset of a DIE.
+unsigned
+DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // Record the abbreviation.
+ assignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = computeSizeAndOffset(Children[j], Offset);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+// Compute the size and offset of all the DIEs.
+void DwarfUnits::computeSizeAndOffsets() {
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
+ unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
+ }
+}
+
+// Emit initial Dwarf sections with a label at the start of each one.
+void DwarfDebug::emitSectionLabels() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Dwarf sections base addresses.
+ DwarfInfoSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+ DwarfAbbrevSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+ if (useSplitDwarf())
+ DwarfAbbrevDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(),
+ "section_abbrev_dwo");
+ emitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+ if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+ emitSectionSym(Asm, MacroInfo);
+
+ DwarfLineSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ emitSectionSym(Asm, TLOF.getDwarfLocSection());
+ if (GenerateDwarfPubNamesSection)
+ emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+ emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ DwarfStrSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
+ if (useSplitDwarf())
+ DwarfStrDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+ DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+ "debug_range");
+
+ DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ "section_debug_loc");
+
+ TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+ emitSectionSym(Asm, TLOF.getDataSection());
+}
+
+// Recursively emits a debug information entry.
+void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1);
+
+ // Emit the code (index) for the abbreviation.
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+ Twine::utohexstr(Die->getOffset()) + ":0x" +
+ Twine::utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag()));
+ Asm->EmitULEB128(AbbrevNumber);
+
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+ switch (Attr) {
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ case dwarf::DW_AT_ranges: {
+ // DW_AT_range Value encodes offset in debug_range section.
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
+ break;
+ }
+ case dwarf::DW_AT_location: {
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ Asm->EmitLabelReference(L->getValue(), 4);
+ else
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else {
+ Values[i]->EmitValue(Asm, Form);
+ }
+ break;
+ }
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ emitDIE(Children[j], Abbrevs);
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("End Of Children Mark");
+ Asm->EmitInt8(0);
+ }
+}
+
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfUnits::emitUnits(DwarfDebug *DD,
+ const MCSection *USection,
+ const MCSection *ASection,
+ const MCSymbol *ASectionSym) {
+ Asm->OutStreamer.SwitchSection(USection);
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ DIE *Die = TheCU->getCUDie();
+
+ // Emit the compile units header.
+ Asm->OutStreamer
+ .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(),
+ TheCU->getUniqueID()));
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
+ ASectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+ DD->emitDIE(Die, Abbreviations);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
+ TheCU->getUniqueID()));
+ }
+}
+
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+ Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(),
+ Asm->getObjFileLowering().getDwarfAbbrevSection(),
+ DwarfAbbrevSectionSym);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+ if (!useSplitDwarf())
+ emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(),
+ &Abbreviations);
+ else
+ emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitAbbrevs(const MCSection *Section,
+ std::vector<DIEAbbrev *> *Abbrevs) {
+ // Check to see if it is worth the effort.
+ if (!Abbrevs->empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(Section);
+
+ MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName());
+ Asm->OutStreamer.EmitLabel(Begin);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbrevs->at(i);
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128(0, "EOM(3)");
+
+ MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName());
+ Asm->OutStreamer.EmitLabel(End);
+ }
+}
+
+// Emit the last address of the section and the end of the line matrix.
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->OutStreamer.AddComment("Extended Op");
+ Asm->EmitInt8(0);
+
+ Asm->OutStreamer.AddComment("Op size");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
+ Asm->OutStreamer.AddComment("DW_LNE_set_address");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+ Asm->OutStreamer.AddComment("Section end label");
+
+ Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+ Asm->getDataLayout().getPointerSize());
+
+ // Mark end of matrix.
+ Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(1);
+ Asm->EmitInt8(1);
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "Names");
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAccelNamesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
+void DwarfDebug::emitAccelObjC() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "ObjC");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelObjCSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit namespace dies into a hashed accelerator table.
+void DwarfDebug::emitAccelNamespaces() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "namespac");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelNamespaceSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ std::vector<DwarfAccelTable::Atom> Atoms;
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+ dwarf::DW_FORM_data2));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+ dwarf::DW_FORM_data1));
+ DwarfAccelTable AT(Atoms);
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+ = TheCU->getAccelTypes();
+ for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+ for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+ = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+ AT.AddName(Name, (*DI).first, (*DI).second);
+ }
+ }
+
+ AT.FinalizeTable(Asm, "types");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelTypesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+/// emitDebugPubnames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubnames() {
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+
+ typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
+ for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ unsigned ID = TheCU->getUniqueID();
+
+ if (TheCU->getGlobalNames().empty())
+ continue;
+
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
+ Asm->GetTempSymbol("pubnames_begin", ID), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID));
+ }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubtypes section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.AddComment("Length of Public Types Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+ TheCU->getUniqueID()));
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(),
+ TheCU->getUniqueID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(),
+ TheCU->getUniqueID()),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(),
+ TheCU->getUniqueID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ // Emit the name with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+ TheCU->getUniqueID()));
+ }
+}
+
+// Emit strings into a string section.
+void DwarfUnits::emitStrings(const MCSection *StrSection,
+ const MCSection *OffsetSection = NULL,
+ const MCSymbol *StrSecSym = NULL) {
+
+ if (StringPool.empty()) return;
+
+ // Start the dwarf str section.
+ Asm->OutStreamer.SwitchSection(StrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+ for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+ I = StringPool.begin(), E = StringPool.end();
+ I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &*I));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+ // Emit the string itself with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+ Entries[i].second->getKeyLength()+1));
+ }
+
+ // If we've got an offset section go ahead and emit that now as well.
+ if (OffsetSection) {
+ Asm->OutStreamer.SwitchSection(OffsetSection);
+ unsigned offset = 0;
+ unsigned size = 4; // FIXME: DWARF64 is 8.
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ Asm->OutStreamer.EmitIntValue(offset, size);
+ offset += Entries[i].second->getKeyLength() + 1;
+ }
+ }
+}
+
+// Emit strings into a string section.
+void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
+
+ if (AddressPool.empty()) return;
+
+ // Start the dwarf addr section.
+ Asm->OutStreamer.SwitchSection(AddrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ std::pair<MCSymbol*, unsigned>* >, 64> Entries;
+
+ for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator
+ I = AddressPool.begin(), E = AddressPool.end();
+ I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &(I->second)));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ MCSymbol *Sym = Entries[i].second->first;
+ if (Sym)
+ Asm->EmitLabelReference(Entries[i].second->first,
+ Asm->getDataLayout().getPointerSize());
+ else
+ Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
+ }
+
+}
+
+// Emit visible names into a debug str section.
+void DwarfDebug::emitDebugStr() {
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+}
+
+// Emit visible names into a debug loc section.
+void DwarfDebug::emitDebugLoc() {
+ if (DotDebugLocEntries.empty())
+ return;
+
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
+ // Start the dwarf loc section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+ unsigned index = 1;
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
+ if (Entry.isEmpty()) {
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size);
+ DIVariable DV(Entry.Variable);
+ Asm->OutStreamer.AddComment("Loc expr size");
+ MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+ MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+ Asm->EmitLabelDifference(end, begin, 2);
+ Asm->OutStreamer.EmitLabel(begin);
+ if (Entry.isInt()) {
+ DIBasicType BTy(DV.getType());
+ if (BTy.Verify() &&
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
+ || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+ Asm->OutStreamer.AddComment("DW_OP_consts");
+ Asm->EmitInt8(dwarf::DW_OP_consts);
+ Asm->EmitSLEB128(Entry.getInt());
+ } else {
+ Asm->OutStreamer.AddComment("DW_OP_constu");
+ Asm->EmitInt8(dwarf::DW_OP_constu);
+ Asm->EmitULEB128(Entry.getInt());
+ }
+ } else if (Entry.isLocation()) {
+ if (!DV.hasComplexAddress())
+ // Regular entry.
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ else {
+ // Complex address entry.
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (Entry.Loc.getOffset()) {
+ i = 2;
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ Asm->OutStreamer.AddComment("DW_OP_deref");
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitSLEB128(DV.getAddrElement(1));
+ } else {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+ Asm->EmitDwarfRegOp(Loc);
+ i = 2;
+ }
+ } else {
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ }
+
+ // Emit remaining complex address elements.
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitULEB128(DV.getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Entry.Loc.isReg())
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ } else
+ llvm_unreachable("unknown Opcode found in complex address");
+ }
+ }
+ }
+ // else ... ignore constant fp. There is not any good way to
+ // to represent them here in dwarf.
+ Asm->OutStreamer.EmitLabel(end);
+ }
+ }
+}
+
+// Emit visible names into a debug aranges section.
+void DwarfDebug::emitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+// Emit visible names into a debug ranges section.
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+ for (SmallVectorImpl<const MCSymbol *>::iterator
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I != E; ++I) {
+ if (*I)
+ Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ }
+}
+
+// Emit visible names into a debug macinfo section.
+void DwarfDebug::emitDebugMacInfo() {
+ if (const MCSection *LineInfo =
+ Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer.SwitchSection(LineInfo);
+ }
+}
+
+// Emit inline info using following format.
+// Section Header:
+// 1. length of section
+// 2. Dwarf version number
+// 3. address size.
+//
+// Entries (one "entry" for each function that was inlined):
+//
+// 1. offset into __debug_str section for MIPS linkage name, if exists;
+// otherwise offset into __debug_str for regular function name.
+// 2. offset into __debug_str section for regular function name.
+// 3. an unsigned LEB128 number indicating the number of distinct inlining
+// instances for the function.
+//
+// The rest of the entry consists of a {die_offset, low_pc} pair for each
+// inlined instance; the die_offset points to the inlined_subroutine die in the
+// __debug_info section, and the low_pc is the starting address for the
+// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+ if (!Asm->MAI->doesDwarfUseInlineInfoSection())
+ return;
+
+ if (!FirstCU)
+ return;
+
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+ Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+ Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+ Asm->OutStreamer.AddComment("Dwarf Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+ for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+ const MDNode *Node = *I;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ = InlineInfo.find(Node);
+ SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
+ DISubprogram SP(Node);
+ StringRef LName = SP.getLinkageName();
+ StringRef Name = SP.getName();
+
+ Asm->OutStreamer.AddComment("MIPS linkage name");
+ if (LName.empty())
+ Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+ DwarfStrSectionSym);
+ else
+ Asm->EmitSectionOffset(InfoHolder
+ .getStringPoolEntry(getRealLinkageName(LName)),
+ DwarfStrSectionSym);
+
+ Asm->OutStreamer.AddComment("Function name");
+ Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+ DwarfStrSectionSym);
+ Asm->EmitULEB128(Labels.size(), "Inline count");
+
+ for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(LI->second->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+ Asm->OutStreamer.EmitSymbolValue(LI->first,
+ Asm->getDataLayout().getPointerSize());
+ }
+ }
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
+// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
+CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ CompilationDir = DIUnit.getDirectory();
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+ DIUnit.getLanguage(), Die, Asm,
+ this, &SkeletonHolder);
+
+ NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit.getSplitDebugFilename());
+
+ // This should be a unique identifier when we want to build .dwp files.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+
+ // FIXME: The addr base should be relative for each compile unit, however,
+ // this one is going to be 0 anyhow.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity. We're using 0, or a NULL label for this.
+ NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
+ DwarfLineSectionSym);
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0);
+
+ if (!CompilationDir.empty())
+ NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+ SkeletonHolder.addUnit(NewCU);
+ SkeletonCUs.push_back(NewCU);
+
+ return NewCU;
+}
+
+void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ emitAbbrevs(Section, &SkeletonAbbrevs);
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+ Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+ DwarfAbbrevDWOSectionSym);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+ &Abbreviations);
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ const MCSection *OffSec = Asm->getObjFileLowering()
+ .getDwarfStrOffDWOSection();
+ const MCSymbol *StrSym = DwarfStrSectionSym;
+ InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+ OffSec, StrSym);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 000000000000..81e345e6281d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,649 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class CompileUnit;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MachineOperand;
+class MCAsmInfo;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+class DwarfDebug;
+
+//===----------------------------------------------------------------------===//
+/// \brief This class is used to record source line correspondence.
+class SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ MCSymbol *Label; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+ : Line(L), Column(C), SourceID(S), Label(label) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ MCSymbol *getLabel() const { return Label; }
+};
+
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ const MDNode *Variable;
+ bool Merged;
+ bool Constant;
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt
+ };
+ enum EntryType EntryKind;
+
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constants;
+ DotDebugLocEntry()
+ : Begin(0), End(0), Variable(0), Merged(false),
+ Constant(false) { Constants.Int = 0;}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+ const MDNode *V)
+ : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
+ Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+ const ConstantInt *IPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+
+ /// \brief Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() { return Constants.Int; }
+ const ConstantFP *getConstantFP() { return Constants.CFP; }
+ const ConstantInt *getConstantInt() { return Constants.CIP; }
+} DotDebugLocEntry;
+
+//===----------------------------------------------------------------------===//
+/// \brief This class is used to track local variable information.
+class DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ DIE *TheDIE; // Variable DIE.
+ unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+ DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
+ const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
+ int FrameIndex;
+public:
+ // AbsVar may be NULL.
+ DbgVariable(DIVariable V, DbgVariable *AV)
+ : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
+ FrameIndex(~0) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
+ void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+ unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ DbgVariable *getAbstractVariable() const { return AbsVar; }
+ const MachineInstr *getMInsn() const { return MInsn; }
+ void setMInsn(const MachineInstr *M) { MInsn = M; }
+ int getFrameIndex() const { return FrameIndex; }
+ void setFrameIndex(int FI) { FrameIndex = FI; }
+ // Translate tag to proper Dwarf tag.
+ unsigned getTag() const {
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+ /// \brief Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (Var.isArtificial())
+ return true;
+ if (getType().isArtificial())
+ return true;
+ return false;
+ }
+
+ bool isObjectPointer() const {
+ if (Var.isObjectPointer())
+ return true;
+ if (getType().isObjectPointer())
+ return true;
+ return false;
+ }
+
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const;
+};
+
+
+// A String->Symbol mapping of strings used by indirect
+// references.
+typedef StringMap<std::pair<MCSymbol*, unsigned>,
+ BumpPtrAllocator&> StrPool;
+
+// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect
+// references.
+typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool;
+
+/// \brief Collects and handles information specific to a particular
+/// collection of units.
+class DwarfUnits {
+ // Target of Dwarf emission, used for sizing of abbreviations.
+ AsmPrinter *Asm;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> *AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> *Abbreviations;
+
+ // A pointer to all units in the section.
+ SmallVector<CompileUnit *, 1> CUs;
+
+ // Collection of strings for this unit and assorted symbols.
+ StrPool StringPool;
+ unsigned NextStringPoolNumber;
+ std::string StringPref;
+
+ // Collection of addresses for this unit and assorted labels.
+ AddrPool AddressPool;
+ unsigned NextAddrPoolNumber;
+
+public:
+ DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
+ std::vector<DIEAbbrev *> *A, const char *Pref,
+ BumpPtrAllocator &DA) :
+ Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
+ StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
+ AddressPool(), NextAddrPoolNumber(0) {}
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Define a unique number for the abbreviation.
+ void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// \brief Add a unit to the list of CUs.
+ void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+
+ /// \brief Emit all of the units to the section listed with the given
+ /// abbreviation section.
+ void emitUnits(DwarfDebug *, const MCSection *, const MCSection *,
+ const MCSymbol *);
+
+ /// \brief Emit all of the strings to the section given.
+ void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+
+ /// \brief Emit all of the addresses to the section given.
+ void emitAddresses(const MCSection *);
+
+ /// \brief Returns the entry into the start of the pool.
+ MCSymbol *getStringPoolSym();
+
+ /// \brief Returns an entry into the string pool with the given
+ /// string text.
+ MCSymbol *getStringPoolEntry(StringRef Str);
+
+ /// \brief Returns the index into the string pool with the given
+ /// string text.
+ unsigned getStringPoolIndex(StringRef Str);
+
+ /// \brief Returns the string pool.
+ StrPool *getStringPool() { return &StringPool; }
+
+ /// \brief Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getAddrPoolIndex(MCSymbol *);
+
+ /// \brief Returns the address pool.
+ AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
+};
+
+/// \brief Collects and handles dwarf debug information.
+class DwarfDebug {
+ // Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ // Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ // All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ //===--------------------------------------------------------------------===//
+ // Attribute used to construct specific Dwarf sections.
+ //
+
+ CompileUnit *FirstCU;
+
+ // Maps MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> CUMap;
+
+ // Maps subprogram MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> SPMap;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ // Stores the current file ID for a given compile unit.
+ DenseMap <unsigned, unsigned> FileIDCUMap;
+ // Source id map, i.e. CUID, source filename and directory,
+ // separated by a zero byte, mapped to a unique id.
+ StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
+
+ // Provides a unique id per text section.
+ SetVector<const MCSection*> SectionMap;
+
+ // List of Arguments (DbgValues) for current function.
+ SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
+ LexicalScopes LScopes;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ // Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
+
+ // Collection of abstract variables.
+ DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+ // Collection of DotDebugLocEntry.
+ SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+ // Collection of subprogram DIEs that are marked (at the end of the module)
+ // as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ // Keep track of inlined functions and their location. This
+ // information is used to populate the debug_inlined section.
+ typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<const MDNode *, 4> InlinedSPNodes;
+
+ // This is a collection of subprogram MDNodes that are processed to
+ // create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+ // Maps instruction with label emitted before instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+ // Maps instruction with label emitted after instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+ // Every user variable mentioned by a DBG_VALUE instruction in order of
+ // appearance.
+ SmallVector<const MDNode*, 8> UserVariables;
+
+ // For each user variable, keep a list of DBG_VALUE instructions in order.
+ // The list can also contain normal instructions that clobber the previous
+ // DBG_VALUE.
+ typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+ DbgValueHistoryMap;
+ DbgValueHistoryMap DbgValues;
+
+ SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+ // Previous instruction's location information. This is used to determine
+ // label location to indicate scope boundries in dwarf debug info.
+ DebugLoc PrevInstLoc;
+ MCSymbol *PrevLabel;
+
+ // This location indicates end of function prologue and beginning of function
+ // body.
+ DebugLoc PrologEndLoc;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ // Section Symbols: these are assembler temporary labels that are emitted at
+ // the beginning of each supported dwarf section. These are used to form
+ // section offsets and are created by EmitSectionLabels.
+ MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+ MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+ MCSymbol *FunctionBeginSym, *FunctionEndSym;
+ MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
+
+ // As an optimization, there is no need to emit an entry in the directory
+ // table for the same directory as DW_at_comp_dir.
+ StringRef CompilationDir;
+
+ // Counter for assigning globally unique IDs for CUs.
+ unsigned GlobalCUIndexCount;
+
+ // Holder for the file specific debug information.
+ DwarfUnits InfoHolder;
+
+ // Holders for the various debug information flags that we might need to
+ // have exposed. See accessor functions below for description.
+
+ // Whether or not we're emitting info for older versions of gdb on darwin.
+ bool IsDarwinGDBCompat;
+
+ // DWARF5 Experimental Options
+ bool HasDwarfAccelTables;
+ bool HasSplitDwarf;
+
+ // Separated Dwarf Variables
+ // In general these will all be for bits that are left in the
+ // original object file, rather than things that are meant
+ // to be in the .dwo sections.
+
+ // The CUs left in the original object file for separated debug info.
+ SmallVector<CompileUnit *, 1> SkeletonCUs;
+
+ // Used to uniquely define abbreviations for the skeleton emission.
+ FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> SkeletonAbbrevs;
+
+ // Holder for the skeleton information.
+ DwarfUnits SkeletonHolder;
+
+private:
+
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ /// \brief Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+
+ /// \brief Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
+
+ /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief This scope represents inlined body of a function. Construct
+ /// DIE to represent this concrete inlined copy of the function.
+ DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief Construct a DIE for this scope.
+ DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief Emit initial Dwarf sections with a label at the start of each one.
+ void emitSectionLabels();
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ void computeInlinedDIEs();
+
+ /// \brief Collect info for variables that were optimized out.
+ void collectDeadVariables();
+
+ /// \brief Finish off debug information after all functions have been
+ /// processed.
+ void finalizeModuleInfo();
+
+ /// \brief Emit labels to close any remaining sections that have been left
+ /// open.
+ void endSections();
+
+ /// \brief Emit a set of abbreviations to the specific section.
+ void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *);
+
+ /// \brief Emit the debug info section.
+ void emitDebugInfo();
+
+ /// \brief Emit the abbreviation section.
+ void emitAbbreviations();
+
+ /// \brief Emit the last address of the section and the end of
+ /// the line matrix.
+ void emitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// \brief Emit visible names into a hashed accelerator table section.
+ void emitAccelNames();
+
+ /// \brief Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// \brief Emit namespace dies into a hashed accelerator table.
+ void emitAccelNamespaces();
+
+ /// \brief Emit type dies into a hashed accelerator table.
+ void emitAccelTypes();
+
+ /// \brief Emit visible names into a debug pubnames section.
+ void emitDebugPubnames();
+
+ /// \brief Emit visible types into a debug pubtypes section.
+ void emitDebugPubTypes();
+
+ /// \brief Emit visible names into a debug str section.
+ void emitDebugStr();
+
+ /// \brief Emit visible names into a debug loc section.
+ void emitDebugLoc();
+
+ /// \brief Emit visible names into a debug aranges section.
+ void emitDebugARanges();
+
+ /// \brief Emit visible names into a debug ranges section.
+ void emitDebugRanges();
+
+ /// \brief Emit visible names into a debug macinfo section.
+ void emitDebugMacInfo();
+
+ /// \brief Emit inline info using custom format.
+ void emitDebugInlineInfo();
+
+ /// DWARF 5 Experimental Split Dwarf Emitters
+
+ /// \brief Construct the split debug info compile unit for the debug info
+ /// section.
+ CompileUnit *constructSkeletonCU(const MDNode *);
+
+ /// \brief Emit the local split abbreviations.
+ void emitSkeletonAbbrevs(const MCSection *);
+
+ /// \brief Emit the debug info dwo section.
+ void emitDebugInfoDWO();
+
+ /// \brief Emit the debug abbrev dwo section.
+ void emitDebugAbbrevDWO();
+
+ /// \brief Emit the debug str dwo section.
+ void emitDebugStrDWO();
+
+ /// \brief Create new CompileUnit for the given metadata node with tag
+ /// DW_TAG_compile_unit.
+ CompileUnit *constructCompileUnit(const MDNode *N);
+
+ /// \brief Construct subprogram DIE.
+ void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
+
+ /// \brief Register a source line with debug info. Returns the unique
+ /// label that was emitted and which provides correspondence to the
+ /// source line list.
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
+
+ /// \brief Indentify instructions that are marking the beginning of or
+ /// ending of a scope.
+ void identifyScopeMarkers();
+
+ /// \brief If Var is an current function argument that add it in
+ /// CurrentFnArguments list.
+ bool addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope);
+
+ /// \brief Populate LexicalScope entries with variables' info.
+ void collectVariableInfo(const MachineFunction *,
+ SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+
+ /// \brief Collect variable information from the side table maintained
+ /// by MMI.
+ void collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &P);
+
+ /// \brief Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// \brief Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// \brief Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// \brief Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(AsmPrinter *A, Module *M);
+ ~DwarfDebug();
+
+ /// \brief Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule();
+
+ /// \brief Emit all Dwarf sections that should come after the content.
+ void endModule();
+
+ /// \brief Gather pre-function debug information.
+ void beginFunction(const MachineFunction *MF);
+
+ /// \brief Gather and emit post-function debug information.
+ void endFunction(const MachineFunction *MF);
+
+ /// \brief Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
+
+ /// \brief Process end of an instruction.
+ void endInstruction(const MachineInstr *MI);
+
+ /// \brief Look up the source id with the given directory and source file
+ /// names. If none currently exists, create a new id and insert it in the
+ /// SourceIds map.
+ unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
+ unsigned CUID);
+
+ /// \brief Recursively Emits a debug information entry.
+ void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
+
+ /// \brief Returns whether or not to limit some of our debug
+ /// output to the limitations of darwin gdb.
+ bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+
+ // Experimental DWARF5 features.
+
+ /// \brief Returns whether or not to emit tables that dwarf consumers can
+ /// use to accelerate lookup.
+ bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+
+ /// \brief Returns whether or not to change the current debug info for the
+ /// split dwarf proposal support.
+ bool useSplitDwarf() { return HasSplitDwarf; }
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 000000000000..7133458129cc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,736 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+ : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = 0;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeAction =
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeAction -=
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (F == 0) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action. The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action. Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table. Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ if (MI->isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (Asm->MAI->isExceptionHandlingDwarf())
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced when using DWARF exception handling.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+ unsigned CallSiteTableLength;
+ if (IsSJLJ)
+ CallSiteTableLength = 0;
+ else {
+ unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+ CallSiteTableLength =
+ CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+ }
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+ }
+
+ // Type infos.
+ const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeEncoding;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ // dwarf::DW_EH_PE_absptr
+ TypeFormatSize = Asm->getDataLayout().getPointerSize();
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
+ Asm->EmitAlignment(2);
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(GCCETSym);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()));
+
+ if (IsSJLJ)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+ Asm->getFunctionNumber()));
+
+ // Emit the LSDA header.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
+ }
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (VerboseAsm) {
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx));
+ }
+ Asm->EmitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ } else {
+ // DWARF Exception handling
+ assert(Asm->MAI->isExceptionHandlingDwarf());
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ unsigned Entry = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (BeginLabel == 0)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (EndLabel == 0)
+ EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
+ Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.PadLabel) {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(" has no landing pad");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" jumps to ") +
+ S.PadLabel->getName());
+ Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+ }
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
+ Asm->OutStreamer.AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else if (Action.ValueForTypeID < 0)
+ Asm->OutStreamer.AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer.AddComment(" Cleanup");
+ }
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ if (VerboseAsm) {
+ if (Action.NextAction == 0) {
+ Asm->OutStreamer.AddComment(" No further actions");
+ } else {
+ unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+ Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction));
+ }
+ }
+ Asm->EmitSLEB128(Action.NextAction);
+ }
+
+ EmitTypeInfos(TTypeEncoding);
+
+ Asm->EmitAlignment(2);
+}
+
+void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitULEB128(TypeID);
+ }
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ llvm_unreachable("Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+ llvm_unreachable("Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ llvm_unreachable("Should be implemented");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 000000000000..74b1b13367a2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,234 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Function;
+class AsmPrinter;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException {
+protected:
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// MMI - Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// ComputeActionsTable - Compute the actions table and gather the first
+ /// action index for each landing pad site.
+ unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+ /// ComputeCallSiteTable - Compute the call-site table. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address.
+ void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ void EmitExceptionTable();
+
+ virtual void EmitTypeInfos(unsigned TTypeEncoding);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(AsmPrinter *A);
+ virtual ~DwarfException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+ /// should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class ARMException : public DwarfException {
+ void EmitTypeInfos(unsigned TTypeEncoding);
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ virtual ~ARMException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class Win64Exception : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if personality
+ /// info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 000000000000..a8fb66dcf17b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGCPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+ MCStreamer &OS = AP.OutStreamer;
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ // Put this in a custom .note section.
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+ .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getDataRel()));
+
+ // For each function...
+ for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.EmitInt16(MD.size());
+
+ // And each safe point...
+ for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+ ++PI) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = PI->Label;
+ AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+ MD.getFunction().arg_size() - RegisteredArgs : 0;
+ OS.AddComment("stack arity");
+ AP.EmitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.EmitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 000000000000..98177c0ba1cf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+ MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_begin");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_end");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer.EmitIntValue(0, IntPtrSize);
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1<<16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+ "(" + Twine(uintptr_t(&FI)) + ")");
+ }
+
+ AP.OutStreamer.AddComment("live roots for " +
+ Twine(FI.getFunction().getName()));
+ AP.OutStreamer.AddBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count "+Twine(LiveCount)+" >= 65536.");
+ }
+
+ AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
+ AP.EmitInt16(FrameSize);
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ if (K->StackOffset >= 1<<16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
+ }
+
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 000000000000..156101286b75
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,114 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+ {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ shouldEmitMoves = Asm->needsSEHMoves();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+ if (!shouldEmitPersonality)
+ return;
+
+ MCSymbol *GCCHandlerSym =
+ Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+ Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+ Asm->OutStreamer.PushSection();
+ Asm->OutStreamer.EmitWin64EHHandlerData();
+ Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+ 4);
+ EmitExceptionTable();
+ Asm->OutStreamer.PopSection();
+ }
+ Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 000000000000..012ff8ad8339
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,466 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basictti"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+class BasicTTI : public ImmutablePass, public TargetTransformInfo {
+ const TargetLoweringBase *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ BasicTTI() : ImmutablePass(ID), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) {
+ initializeBasicTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual bool isLegalAddImmediate(int64_t imm) const;
+ virtual bool isLegalICmpImmediate(int64_t imm) const;
+ virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTypeLegal(Type *Ty) const;
+ virtual unsigned getJumpBufAlignment() const;
+ virtual unsigned getJumpBufSize() const;
+ virtual bool shouldBuildLookupTables() const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCFInstrCost(unsigned Opcode) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+ virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+ ArrayRef<Type*> Tys) const;
+ virtual unsigned getNumberOfParts(Type *Tp) const;
+ virtual unsigned getAddressComputationCost(Type *Ty) const;
+
+ /// @}
+};
+
+}
+
+INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
+ "Target independent code generator's TTI", true, true, false)
+char BasicTTI::ID = 0;
+
+ImmutablePass *
+llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) {
+ return new BasicTTI(TLI);
+}
+
+
+bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
+ return TLI->isLegalAddImmediate(imm);
+}
+
+bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
+ return TLI->isLegalICmpImmediate(imm);
+}
+
+bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) const {
+ TargetLoweringBase::AddrMode AM;
+ AM.BaseGV = BaseGV;
+ AM.BaseOffs = BaseOffset;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Scale;
+ return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool BasicTTI::isTypeLegal(Type *Ty) const {
+ EVT T = TLI->getValueType(Ty);
+ return TLI->isTypeLegal(T);
+}
+
+unsigned BasicTTI::getJumpBufAlignment() const {
+ return TLI->getJumpBufAlignment();
+}
+
+unsigned BasicTTI::getJumpBufSize() const {
+ return TLI->getJumpBufSize();
+}
+
+bool BasicTTI::shouldBuildLookupTables() const {
+ return TLI->supportJumpTables() &&
+ (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Calls used by the vectorizers.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
+ bool Extract) const {
+ assert (Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+}
+
+unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
+ return 1;
+}
+
+unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
+ return 32;
+}
+
+unsigned BasicTTI::getMaximumUnrollFactor() const {
+ return 1;
+}
+
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const {
+ // Check if any of the operands are vector operands.
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this op.
+ if (Ty->isVectorTy()) {
+ unsigned Num = Ty->getVectorNumElements();
+ unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ // return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+ }
+
+ // We don't know anything about this scalar instruction.
+ return 1;
+}
+
+unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return 1;
+}
+
+unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
+
+ // Check for NOOP conversions.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+ // Bitcast between types that are legalized to the same type are free.
+ if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+ return 0;
+ }
+
+ if (Opcode == Instruction::Trunc &&
+ TLI->isTruncateFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ if (Opcode == Instruction::ZExt &&
+ TLI->isZExtFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ // If the cast is marked as legal (or promote) then assume low cost.
+ if (TLI->isOperationLegalOrPromote(ISD, DstLT.second))
+ return 1;
+
+ // Handle scalar conversions.
+ if (!Src->isVectorTy() && !Dst->isVectorTy()) {
+
+ // Scalar bitcasts are usually free.
+ if (Opcode == Instruction::BitCast)
+ return 0;
+
+ // Just check the op cost. If the operation is legal then assume it costs 1.
+ if (!TLI->isOperationExpand(ISD, DstLT.second))
+ return 1;
+
+ // Assume that illegal scalar instruction are expensive.
+ return 4;
+ }
+
+ // Check vector-to-vector casts.
+ if (Dst->isVectorTy() && Src->isVectorTy()) {
+
+ // If the cast is between same-sized registers, then the check is simple.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+ // Assume that Zext is done using AND.
+ if (Opcode == Instruction::ZExt)
+ return 1;
+
+ // Assume that sext is done using SHL and SRA.
+ if (Opcode == Instruction::SExt)
+ return 2;
+
+ // Just check the op cost. If the operation is legal then assume it costs
+ // 1 and multiply by the type-legalization overhead.
+ if (!TLI->isOperationExpand(ISD, DstLT.second))
+ return SrcLT.first * 1;
+ }
+
+ // If we are converting vectors and the operation is illegal, or
+ // if the vectors are legalized to different types, estimate the
+ // scalarization costs.
+ unsigned Num = Dst->getVectorNumElements();
+ unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+ }
+
+ // We already handled vector-to-vector and scalar-to-scalar conversions. This
+ // is where we handle bitcast between vectors and scalars. We need to assume
+ // that the conversion is scalarized in one way or another.
+ if (Opcode == Instruction::BitCast)
+ // Illegal bitcasts are done by storing and loading from a stack slot.
+ return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
+ (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
+
+ llvm_unreachable("Unhandled cast");
+ }
+
+unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
+ // Branches are assumed to be predicted.
+ return 0;
+}
+
+unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Selects on vectors are actually vector selects.
+ if (ISD == ISD::SELECT) {
+ assert(CondTy && "CondTy must exist");
+ if (CondTy->isVectorTy())
+ ISD = ISD::VSELECT;
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return LT.first * 1;
+ }
+
+ // Otherwise, assume that the cast is scalarized.
+ if (ValTy->isVectorTy()) {
+ unsigned Num = ValTy->getVectorNumElements();
+ if (CondTy)
+ CondTy = CondTy->getScalarType();
+ unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+ CondTy);
+
+ // Return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+ }
+
+ // Unknown scalar opcode.
+ return 1;
+}
+
+unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ return 1;
+}
+
+unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ assert(!Src->isVoidTy() && "Invalid type");
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ // Assume that all loads of legal types cost 1.
+ return LT.first;
+}
+
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ unsigned ISD = 0;
+ switch (IID) {
+ default: {
+ // Assume that we need to scalarize this intrinsic.
+ unsigned ScalarizationCost = 0;
+ unsigned ScalarCalls = 1;
+ if (RetTy->isVectorTy()) {
+ ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+ if (Tys[i]->isVectorTy()) {
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ }
+
+ return ScalarCalls + ScalarizationCost;
+ }
+ // Look for intrinsics that can be lowered directly or turned into a scalar
+ // intrinsic call.
+ case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
+ case Intrinsic::sin: ISD = ISD::FSIN; break;
+ case Intrinsic::cos: ISD = ISD::FCOS; break;
+ case Intrinsic::exp: ISD = ISD::FEXP; break;
+ case Intrinsic::exp2: ISD = ISD::FEXP2; break;
+ case Intrinsic::log: ISD = ISD::FLOG; break;
+ case Intrinsic::log10: ISD = ISD::FLOG10; break;
+ case Intrinsic::log2: ISD = ISD::FLOG2; break;
+ case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::floor: ISD = ISD::FFLOOR; break;
+ case Intrinsic::ceil: ISD = ISD::FCEIL; break;
+ case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
+ case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::pow: ISD = ISD::FPOW; break;
+ case Intrinsic::fma: ISD = ISD::FMA; break;
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this intrinsic. For math builtins
+ // this will emit a costly libcall, adding call overhead and spills. Make it
+ // very expensive.
+ if (RetTy->isVectorTy()) {
+ unsigned Num = RetTy->getVectorNumElements();
+ unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+ Tys);
+ return 10 * Cost * Num;
+ }
+
+ // This is going to be turned into a library call, make it expensive.
+ return 10;
+}
+
+unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ return LT.first;
+}
+
+unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 000000000000..f8cc3b3999e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1725 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass {
+ public:
+ static char ID;
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
+
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+ "Control Flow Optimizer", false, false)
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+ return Folder.OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+
+ EnableHoistCommonCode = CommonHoist;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (!I->isImplicitDef())
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi) {
+ if (!tii) return false;
+
+ TriedMerging.clear();
+
+ TII = tii;
+ TRI = tri;
+ MMI = mmi;
+ RS = NULL;
+
+ // Use a RegScavenger to help update liveness when required.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS = new RegScavenger();
+ else
+ MRI.invalidateLiveness();
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (JTI == 0) {
+ delete RS;
+ return MadeChange;
+ }
+
+ // Walk the function to find jump tables that are live.
+ BitVector JTIsLive(JTI->getJumpTables().size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+
+ // Remember that this JT is live.
+ JTIsLive.set(Op.getIndex());
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens when the
+ // indirect jump was unreachable (and thus deleted).
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ delete RS;
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ // Skip debug info so it will not affect codegen.
+ while (I->isDebugValue()) {
+ if (I==MBB->begin())
+ return 0; // MBB empty except for debug info.
+ --I;
+ }
+
+ return HashMachineInstr(I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ // Skip debugging pseudos; necessary to avoid changing the code.
+ while (I1->isDebugValue()) {
+ if (I1==MBB1->begin()) {
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin())
+ // I1==DBG at begin; I2==DBG at begin
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+ return TailLen;
+ }
+ --I1;
+ }
+ // I1==first (untested) non-DBG preceding known match
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin()) {
+ ++I1;
+ // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+ return TailLen;
+ }
+ --I2;
+ }
+ // I1, I2==first (untested) non-DBGs preceding known match
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ // Back past possible debugging pseudos at beginning of block. This matters
+ // when one block differs from the other only by whether debugging pseudos
+ // are present at the beginning. (This way, the various checks later for
+ // I1==MBB1->begin() work as expected.)
+ if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+ --I2;
+ while (I2->isDebugValue()) {
+ if (I2 == MBB2->begin())
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ }
+ if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+ --I1;
+ while (I1->isDebugValue()) {
+ if (I1 == MBB1->begin())
+ return TailLen;
+ --I1;
+ }
+ ++I1;
+ }
+ return TailLen;
+}
+
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB) {
+ if (RS) {
+ RS->enterBasicBlock(CurMBB);
+ if (!CurMBB->empty())
+ RS->forward(prior(CurMBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *CurMBB = OldInst->getParent();
+
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(CurMBB, NewDest);
+
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return 0;
+
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(&CurMBB, NewMBB);
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ if (I->isCall())
+ Time += 10;
+ else if (I->mayLoad() || I->mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ if (getHash() > o.getHash())
+ return false;
+ if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#else
+ return false;
+#endif
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength,
+ unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ if (CommonTailLen == 0)
+ return false;
+ DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+ << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ << '\n');
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin()))
+ return true;
+
+ return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // If the common tail includes any debug info we will take it pretty
+ // randomly from one of the inputs. Might be better to remove it?
+ DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ << maxCommonTailLength);
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return true;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ bool MadeChange = false;
+
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ dbgs() << "\n";
+ if (SuccBB) {
+ dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ dbgs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ if (!CreateCommonTailOnlyBlock(PredBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+ if (!EnableTailMerge) return MadeChange;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+ if (TriedMerging.count(I))
+ continue;
+ if (I->succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+ MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice to have
+ // a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+ }
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+ if (MBB->empty())
+ return true;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ return false;
+ }
+ return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator MBBI, MBBE;
+ for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ break;
+ }
+ return (MBBI->isBranch());
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ // Neither block consists entirely of debug info (per IsEmptyBlock check),
+ // so we needn't test for falling off the beginning here.
+ MachineBasicBlock::iterator MBB1I = --MBB1->end();
+ while (MBB1I->isDebugValue())
+ --MBB1I;
+ MachineBasicBlock::iterator MBB2I = --MBB2->end();
+ while (MBB2I->isDebugValue())
+ --MBB2I;
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return DebugLoc();
+ --I;
+ while (I->isDebugValue() && I != MBB.begin())
+ --I;
+ if (I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+ DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (PrevBB.begin() != PrevBB.end()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ if (!MBBIter->isIdenticalTo(PrevBBIter))
+ break;
+ MachineInstr *DuplicateDbg = MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg->eraseFromParent();
+ }
+ }
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+ !MBB->hasAddressTaken()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+ // If the only things remaining in the block are debug info, remove these
+ // as well, so this will behave the same as an empty block in non-debug
+ // mode.
+ if (!MBB->empty()) {
+ bool NonDebugInfoFound = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (!I->isDebugValue()) {
+ NonDebugInfoFound = true;
+ break;
+ }
+ }
+ if (!NonDebugInfoFound)
+ // Make the block empty, losing the debug info (we could probably
+ // improve this in some cases.)
+ MBB->erase(MBB->begin(), MBB->end());
+ }
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is usually just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(Loc))
+ return MBB->end();
+
+ for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Loc->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ } else if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && Loc->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg))
+ IsDef = true;
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ TII->isPredicated(PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ } else {
+ if (Uses.erase(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallSet<unsigned, 4> LocalDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ } else if (!LocalDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ break;
+
+ // Remove kills from LocalDefsSet, these registers had short live ranges.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !LocalDefsSet.count(Reg))
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ }
+
+ // Track local defs so we can update liveins.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LocalDefs.push_back(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.insert(*AI);
+ }
+
+ HasDups = true;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ // Update livein's.
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Def = LocalDefs[i];
+ if (LocalDefsSet.count(Def)) {
+ TBB->addLiveIn(Def);
+ FBB->addLiveIn(Def);
+ }
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..df795dfc248e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,123 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+ class MachineFunction;
+ class MachineModuleInfo;
+ class RegScavenger;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class BranchFolder {
+ public:
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+
+ bool OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi);
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool EnableTailMerge;
+ bool EnableHoistCommonCode;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ RegScavenger *RS;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
+
+ bool OptimizeBranches(MachineFunction &MF);
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+ };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 000000000000..dee339a45863
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,202 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequired<MachineLoopInfo>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRAI.CalculateWeightAndHint(LIS.getInterval(Reg));
+ }
+ return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
+ }
+
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+// Check if all values in LI are rematerializable
+static bool isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
+ return false;
+ }
+ return true;
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = MF.getRegInfo();
+ const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+ MachineBasicBlock *mbb = 0;
+ MachineLoop *loop = 0;
+ unsigned loopDepth = 0;
+ bool isExiting = false;
+ float totalWeight = 0;
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hist and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ // Don't recompute spill weight for an unspillable register.
+ bool Spillable = li.isSpillable();
+
+ for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+ MachineInstr *mi = I.skipInstruction();) {
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi))
+ continue;
+
+ float weight = 1.0f;
+ if (Spillable) {
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = Loops.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+ }
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ float hweight = Hint[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && mri.isAllocatable(hint))
+ bestPhys = hweight, hintPhys = hint;
+ } else {
+ if (hweight > bestVirt)
+ bestVirt = hweight, hintVirt = hint;
+ }
+ }
+
+ Hint.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weight of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // If the live interval was already unspillable, leave it that way.
+ if (!Spillable)
+ return;
+
+ // Mark li as unspillable if all live ranges are tiny.
+ if (li.isZeroLength(LIS.getSlotIndexes())) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ totalWeight *= 0.5F;
+
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 000000000000..f1d4ace92273
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,180 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+ const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
+ // No stack is used.
+ StackOffset = 0;
+
+ clearFirstByValReg();
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ MF.getFrameInfo()->ensureMaxAlignment(Align);
+ TM.getTargetLowering()->HandleByVal(this, Size, Align);
+ unsigned Offset = AllocateStack(Size, Align);
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..35ec68d00cec
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,77 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeBasicTTIPass(Registry);
+ initializeBranchFolderPassPass(Registry);
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeExpandISelPseudosPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSchedulerPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePostRASchedulerPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackColoringPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTailDuplicatePassPass(Registry);
+ initializeTargetPassConfigPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 000000000000..0eb74a40d589
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,658 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0),
+ KeepRegs(TRI->getNumRegs(), false) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.reset();
+
+ bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.reset();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ if (MI->isDebugValue())
+ return;
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservative here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = 0;
+
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *AI;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ KeepRegs.set(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceeding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isRegMask())
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (MO.clobbersPhysReg(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = 0;
+ RegRefs.erase(i);
+ }
+
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.reset(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.reset(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = 0;
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid)
+{
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned NewReg = Order[i];
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ // If NewReg overlaps any of the forbidden registers, we can't use it.
+ bool Forbidden = false;
+ for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(),
+ ite = Forbid.end(); it != ite; ++it)
+ if (TRI->regsOverlap(NewReg, *it)) {
+ Forbidden = true;
+ break;
+ }
+ if (Forbidden) continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+ // Find the node at the bottom of the critical path.
+ const SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+#ifndef NDEBUG
+ {
+ DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ const SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ const SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!MRI.isAllocatable(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.test(AntiDepReg))
+ // Don't break anti-dependencies if an use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ SmallVector<unsigned, 2> ForbidRegs;
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI))
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid. If the instruction defines other registers,
+ // save a list of them so that we don't pick a new register
+ // that overlaps any of them.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ if (MO.isDef() && Reg != AntiDepReg)
+ ForbidRegs.push_back(Reg);
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC, ForbidRegs)) {
+ DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-dependence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 000000000000..df13dd31f6b2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,110 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+ class CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ std::vector<const TargetRegisterClass*> Classes;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ BitVector KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+ ~CriticalAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..840a10128daf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,225 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state,
+ DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT, bool IsPostRA);
+ // Schedule - Actual scheduling work.
+ void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) :
+ ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+ CanHandleTerminators = true;
+}
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
+ TII = TM.getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+ if (VLIWScheduler)
+ delete VLIWScheduler;
+
+ if (ResourceTracker)
+ delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr *MIFirst = CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst, MI);
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ MachineInstr *MI = BeginItr;
+
+ this->initPacketizerState();
+
+ // End the current packet if needed.
+ if (this->isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ if (ResourceAvail) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+ MachineInstr *MJ = *VI;
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ // Is it legal to packetize SUI and SUJ together.
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ // Allow packetization if dependency can be pruned.
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ endPacket(MBB, MI);
+ break;
+ } // !isLegalToPruneDependencies.
+ } // !isLegalToPacketizeTogether.
+ } // For all instructions in CurrentPacketMIs.
+ } else {
+ // End the packet if resource is not available.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ BeginItr = this->addToPacket(MI);
+ } // For all instructions in BB.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
+}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 000000000000..a54217f5b2fb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,190 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false)
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (!MRI->use_nodbg_empty(Reg))
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = MRI->getReservedRegs();
+
+ // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // live across blocks, but some targets (x86) can have flags live out of a
+ // block.
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end(); S != E; S++)
+ for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+ LI != (*S)->livein_end(); LI++)
+ LivePhysRegs.set(*LI);
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ assert(Use.isDebug());
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+ AnyChanges = true;
+ MI->eraseFromParent();
+ ++NumDeletes;
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ LivePhysRegs.reset(*SR);
+ }
+ } else if (MO.isRegMask()) {
+ // Register mask of preserved registers. All clobbers are dead.
+ LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LivePhysRegs.set(*AI);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 000000000000..f27ec770ebad
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,183 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TLI;
+
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ bool InsertUnwindResumeCalls(Function &Fn);
+ Value *GetExceptionObject(ResumeInst *RI);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+ RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
+}
+
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Value *ExnObj = 0;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = 0;
+ InsertValueInst *ExcIVI = 0;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = ExcIVI->getOperand(1);
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
+ }
+ }
+ }
+
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
+
+ RI->eraseFromParent();
+
+ if (EraseIVIs) {
+ if (SelIVI->getNumUses() == 0)
+ SelIVI->eraseFromParent();
+ if (ExcIVI->getNumUses() == 0)
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->getNumUses() == 0)
+ SelLoad->eraseFromParent();
+ }
+
+ return ExnObj;
+}
+
+/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
+/// into calls to the appropriate _Unwind_Resume function.
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
+ bool UsesNewEH = false;
+ SmallVector<ResumeInst*, 16> Resumes;
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+ Resumes.push_back(RI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(TI))
+ UsesNewEH = II->getUnwindDest()->isLandingPad();
+ }
+
+ if (Resumes.empty())
+ return UsesNewEH;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = Resumes[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ LLVMContext &Ctx = Fn.getContext();
+ unsigned ResumesSize = Resumes.size();
+
+ if (ResumesSize == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Value *ExnObj = GetExceptionObject(RI);
+
+ // Call the _Unwind_Resume function.
+ CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+ "exn.obj", UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ for (SmallVectorImpl<ResumeInst*>::iterator
+ I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
+ ResumeInst *RI = *I;
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+
+ Value *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
+ }
+
+ // Call the function.
+ CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = InsertUnwindResumeCalls(Fn);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 000000000000..5447df09cbb2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,801 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-ifcvt"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ if (MO->isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+
+ // Remember clobbered regunits.
+ if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI))
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO->isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO->readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = 0;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = 0;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ const char *getPassName() const { return "Early If-Conversion"; }
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ SchedModel =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (po_iterator<MachineDominatorTree*>
+ I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
+ if (tryConvertIf(I->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..3bb04657b58a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, 0);
+ unsigned b1 = getBundle(i, 1);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const Twine &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 000000000000..8a1e2d9c99a8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGC : public GCStrategy {
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+ public:
+ ErlangGC();
+ bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+ };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+
+ if (MI->getDesc().isCall()) {
+
+ // Do not treat tail call sites as safe points.
+ if (MI->getDesc().isTerminator())
+ continue;
+
+ /* Code copied from VisitCallPoint(...) */
+ MachineBasicBlock::iterator RAI = MI; ++RAI;
+ MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+ FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
new file mode 100644
index 000000000000..9b0e76fa20cb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -0,0 +1,725 @@
+//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the execution dependency fix pass.
+//
+// Some X86 SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains. ARM cores
+// have similar issues when they are configured with both VFP and NEON
+// pipelines.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "execution-fix"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact that the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Pointer to the next DomainValue in a chain. When two DomainValues are
+ // merged, Victim.Next is set to point to Victor, so old DomainValue
+ // references can be updated by following the chain.
+ DomainValue *Next;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return CountTrailingZeros_32(AvailableDomains);
+ }
+
+ DomainValue() : Refs(0) { clear(); }
+
+ // Clear this DomainValue and point to next which has all its data.
+ void clear() {
+ AvailableDomains = 0;
+ Next = 0;
+ Instrs.clear();
+ }
+};
+}
+
+namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+ /// Value currently in this register, or NULL when no value is being tracked.
+ /// This counts as a DomainValue reference.
+ DomainValue *Value;
+
+ /// Instruction that defined this register, relative to the beginning of the
+ /// current basic block. When a LiveReg is used to represent a live-out
+ /// register, this value is relative to the end of the basic block, so it
+ /// will be a negative number.
+ int Def;
+};
+} // anonynous namespace
+
+namespace {
+class ExeDepsFix : public MachineFunctionPass {
+ static char ID;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
+
+ const TargetRegisterClass *const RC;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ std::vector<int> AliasMap;
+ const unsigned NumRegs;
+ LiveReg *LiveRegs;
+ typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
+ LiveOutMap LiveOuts;
+
+ /// Current instruction number.
+ /// The first instruction in each basic block is 0.
+ int CurInstr;
+
+ /// True when the current block has a predecessor that hasn't been visited
+ /// yet.
+ bool SeenUnknownBackEdge;
+
+public:
+ ExeDepsFix(const TargetRegisterClass *rc)
+ : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Execution dependency fix";
+ }
+
+private:
+ // Register mapping.
+ int regIndex(unsigned Reg);
+
+ // DomainValue allocation.
+ DomainValue *alloc(int domain = -1);
+ DomainValue *retain(DomainValue *DV) {
+ if (DV) ++DV->Refs;
+ return DV;
+ }
+ void release(DomainValue*);
+ DomainValue *resolve(DomainValue*&);
+
+ // LiveRegs manipulations.
+ void setLiveReg(int rx, DomainValue *DV);
+ void kill(int rx);
+ void force(int rx, unsigned domain);
+ void collapse(DomainValue *dv, unsigned domain);
+ bool merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock(MachineBasicBlock*);
+ void leaveBasicBlock(MachineBasicBlock*);
+ void visitInstr(MachineInstr*);
+ void processDefs(MachineInstr*, bool Kill);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char ExeDepsFix::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int ExeDepsFix::regIndex(unsigned Reg) {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ return AliasMap[Reg];
+}
+
+DomainValue *ExeDepsFix::alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ if (domain >= 0)
+ dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
+ return dv;
+}
+
+/// release - Release a reference to DV. When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached. Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+
+ if (LiveRegs[rx].Value == dv)
+ return;
+ if (LiveRegs[rx].Value)
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = retain(dv);
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void ExeDepsFix::kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (!LiveRegs[rx].Value)
+ return;
+
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = 0;
+}
+
+/// Force register rx into domain.
+void ExeDepsFix::force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx].Value) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx].Value && "Not live after collapse?");
+ LiveRegs[rx].Value->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ setLiveReg(rx, alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx].Value == dv)
+ setLiveReg(rx, alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx].Value == B)
+ setLiveReg(rx, A);
+ return true;
+}
+
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ SeenUnknownBackEdge = false;
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ if (!LiveRegs)
+ LiveRegs = new LiveReg[NumRegs];
+
+ // Default values are 'nothing happened a long time ago'.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = regIndex(*i);
+ if (rx < 0)
+ continue;
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) {
+ SeenUnknownBackEdge = true;
+ continue;
+ }
+ assert(fi->second && "Can't have NULL entries");
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+ DomainValue *pdv = resolve(fi->second[rx].Value);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx].Value) {
+ setLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx].Value->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ merge(LiveRegs[rx].Value, pdv);
+ else
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(LiveRegs && "Must enter basic block first.");
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ // Also use LiveOuts as a visited set to detect back-edges.
+ bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+ if (First) {
+ // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
+ // the end of this block instead of the beginning.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ } else {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i].Value);
+ delete[] LiveRegs;
+ }
+ LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return;
+
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ // Process defs to track register ages, and kill values clobbered by generic
+ // instructions.
+ processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugValue() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isImplicit())
+ break;
+ if (MO.isUse())
+ continue;
+ int rx = regIndex(MO.getReg());
+ if (rx < 0)
+ continue;
+
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // How many instructions since rx was last written?
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+
+ // Verify clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (!Pref)
+ continue;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ TII->breakPartialRegDependency(MI, i, TRI);
+ continue;
+ }
+
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK.\n");
+ continue;
+ }
+
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ }
+
+ ++CurInstr;
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ kill(rx);
+ force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx].Value) {
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = CountTrailingZeros_32(available);
+ TII->setExecutionDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<LiveReg, 4> Regs;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ const LiveReg &LR = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!LR.Value->getCommonDomains(available)) {
+ kill(rx);
+ continue;
+ }
+ // Sorted insertion.
+ bool Inserted = false;
+ for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+ i != e && !Inserted; ++i) {
+ if (LR.Def < i->Def) {
+ Inserted = true;
+ Regs.insert(i, LR);
+ }
+ }
+ if (!Inserted)
+ Regs.push_back(LR);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!Regs.empty()) {
+ if (!dv) {
+ dv = Regs.pop_back_val().Value;
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
+ continue;
+ }
+
+ DomainValue *Latest = Regs.pop_back_val().Value;
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i].Value == Latest)
+ kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+ ee = mi->operands_end();
+ ii != ee; ++ii) {
+ MachineOperand &mo = *ii;
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
+ }
+}
+
+bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ LiveRegs = 0;
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+ << RC->getName() << " **********\n");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
+ // or -1.
+ AliasMap.resize(TRI->getNumRegs(), -1);
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
+ AI.isValid(); ++AI)
+ AliasMap[*AI] = i;
+ }
+
+ MachineBasicBlock *Entry = MF->begin();
+ ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ enterBasicBlock(MBB);
+ if (SeenUnknownBackEdge)
+ Loops.push_back(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ visitInstr(I);
+ leaveBasicBlock(MBB);
+ }
+
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Loops[i];
+ enterBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ if (!I->isDebugValue())
+ processDefs(I, false);
+ leaveBasicBlock(MBB);
+ }
+
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end() || !FI->second)
+ continue;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (FI->second[i].Value)
+ release(FI->second[i].Value);
+ delete[] FI->second;
+ }
+ LiveOuts.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
+
+FunctionPass *
+llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
+ return new ExeDepsFix(RC);
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..b2b68828a226
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,74 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand ISel Pseudo-instructions", false, false)
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ if (MI->usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
new file mode 100644
index 000000000000..1611db8d91a3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -0,0 +1,225 @@
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postrapseudos"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
+
+ void TransferImplicitDefs(MachineInstr *MI);
+};
+} // end anonymous namespace
+
+char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
+
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+ "Post-RA pseudo instruction expansion pass", false, false)
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+ }
+}
+
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ return true;
+ }
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+
+ if (MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "dead copy: " << *MI);
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
+ }
+
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr *MI = mi;
+ // Advance iterator here because MI may be erased.
+ ++mi;
+
+ // Only expand pseudos.
+ if (!MI->isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI->getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
+ MadeChange |= LowerSubregToReg(MI);
+ break;
+ case TargetOpcode::COPY:
+ MadeChange |= LowerCopy(MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 000000000000..ef5247c2edff
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,178 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+ public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ strategy_map_type::iterator NMI = StrategyMap.find(Name);
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (Name == I->getName()) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Name).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ dbgs() << "unsupported GC: " << Name << "\n";
+ llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+ llvm_unreachable("Invalid point kind");
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC()) return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": "
+ << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+bool Printer::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Printer didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 000000000000..f80e9ced0bc9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 000000000000..1173d1102125
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,430 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class GCMachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ GCMachineCodeAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ CustomSafePoints(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+ dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
+ llvm_unreachable(0);
+}
+
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
+ MadeChange |= S.performCustomLowering(F);
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
+ : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->isCall())
+ VisitCallPoint(MI);
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ ++RI;
+ }
+ }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ if (FI->getStrategy().customSafePoints()) {
+ FI->getStrategy().findCustomSafePoints(*FI, MF);
+ } else {
+ FindSafePoints(MF);
+ }
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 000000000000..9958d7daada8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1583 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/CodeGen/Passes.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLoweringBase *TLI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineBranchProbabilityInfo *MBPI;
+ MachineRegisterInfo *MRI;
+
+ bool PreRegAlloc;
+ bool MadeChange;
+ int FnNum;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs = 0);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ const BranchProbability &Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ const BranchProbability &Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+char &llvm::IfConverterID = IfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MRI = &MF.getRegInfo();
+ InstrItins = MF.getTarget().getInstrItineraryData();
+ if (!TII) return false;
+
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected!");
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false);
+ BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB))
+ break;
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
+ break;
+ }
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
+ break;
+ }
+ if (!TIE->isIdenticalTo(FIE))
+ break;
+ ++Dups2;
+ --TIE;
+ --FIE;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = !BBI.Predicate.empty();
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (I->isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(I)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+ // considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
+ bool Enqueued = false;
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ AnalyzeBlock(BB, Tokens);
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
+ return false;
+ PI = I++;
+ }
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.erase(*SubRegs);
+ }
+ }
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!Redefs.insert(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
+ } else {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ ++DI2;
+ if (!DI2->isDebugValue())
+ --NumDups1;
+ }
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Remove branch from 'true' block and remove duplicated instructions.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
+ --DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+
+ // Remove 'false' block branch and find the last instruction to predicate.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
+ }
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<unsigned, 4> RedefsByFalse;
+ SmallSet<unsigned, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+ for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+ if (FI->isDebugValue())
+ continue;
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = FI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ ExtUses.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ExtUses.insert(*SubRegs);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!ExtUses.count(Reg)) {
+ RedefsByFalse.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RedefsByFalse.insert(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+ // Predicate the 'false' block.
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB);
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+static bool MaySpeculate(const MachineInstr *MI,
+ SmallSet<unsigned, 4> &LaterRedefs,
+ const TargetInstrInfo *TII) {
+ bool SawStore = true;
+ if (!MI->isSafeToMove(TII, 0, SawStore))
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != 0;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (I->isDebugValue() || TII->isPredicated(I))
+ continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && I->isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..c6d1a18dbd06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1295 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "Spiller.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of spilled snippets");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists, "Number of hoisted spills");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
+
+namespace {
+class InlineSpiller : public Spiller {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveRangeEdit *Edit;
+ LiveInterval *StackInt;
+ int StackSlot;
+ unsigned Original;
+
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<unsigned, 8> RegsToSpill;
+
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+public:
+ // Information about a value that was defined by a copy from a sibling
+ // register.
+ struct SibValueInfo {
+ // True when all reaching defs were reloads: No spill is necessary.
+ bool AllDefsAreReloads;
+
+ // True when value is defined by an original PHI not from splitting.
+ bool DefByOrigPHI;
+
+ // True when the COPY defining this value killed its source.
+ bool KillsSource;
+
+ // The preferred register to spill.
+ unsigned SpillReg;
+
+ // The value of SpillReg that should be spilled.
+ VNInfo *SpillVNI;
+
+ // The block where SpillVNI should be spilled. Currently, this must be the
+ // block containing SpillVNI->def.
+ MachineBasicBlock *SpillMBB;
+
+ // A defining instruction that is not a sibling copy or a reload, or NULL.
+ // This can be used as a template for rematerialization.
+ MachineInstr *DefMI;
+
+ // List of values that depend on this one. These values are actually the
+ // same, but live range splitting has placed them in different registers,
+ // or SSA update needed to insert PHI-defs to preserve SSA form. This is
+ // copies of the current value and phi-kills. Usually only phi-kills cause
+ // more than one dependent value.
+ TinyPtrVector<VNInfo*> Deps;
+
+ SibValueInfo(unsigned Reg, VNInfo *VNI)
+ : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {}
+
+ // Returns true when a def has been found.
+ bool hasDef() const { return DefByOrigPHI || DefMI; }
+ };
+
+private:
+ // Values in RegsToSpill defined by sibling copies.
+ typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+ SibValueMap SibValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ ~InlineSpiller() {}
+
+public:
+ InlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm)
+ : MF(mf),
+ LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()),
+ VRM(vrm),
+ MFI(*mf.getFrameInfo()),
+ MRI(mf.getRegInfo()),
+ TII(*mf.getTarget().getInstrInfo()),
+ TRI(*mf.getTarget().getRegisterInfo()) {}
+
+ void spill(LiveRangeEdit &);
+
+private:
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
+
+ bool isRegToSpill(unsigned Reg) {
+ return std::find(RegsToSpill.begin(),
+ RegsToSpill.end(), Reg) != RegsToSpill.end();
+ }
+
+ bool isSibling(unsigned Reg);
+ MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0);
+ void analyzeSiblingValues();
+
+ bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
+ void reMaterializeAll();
+
+ bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
+ MachineInstr *LoadMI = 0);
+ void insertReload(LiveInterval &NewLI, SlotIndex,
+ MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(unsigned Reg);
+ void spillAll();
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+ if (!MI->isFullCopy())
+ return 0;
+ if (MI->getOperand(0).getReg() == Reg)
+ return MI->getOperand(1).getReg();
+ if (MI->getOperand(1).getReg() == Reg)
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ unsigned Reg = Edit->getReg();
+
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills. We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ MachineInstr *UseMI = 0;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ RI = MRI.reg_nodbg_begin(SnipLI.reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Allow copies to/from Reg.
+ if (isFullCopyOf(MI, Reg))
+ continue;
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && MI != UseMI)
+ return false;
+ UseMI = MI;
+ }
+ return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ unsigned Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ unsigned SnipReg = isFullCopyOf(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ VRM.getOriginal(Reg) == Original;
+}
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const InlineSpiller::SibValueInfo &SVI) {
+ OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+ if (SVI.SpillMBB)
+ OS << " in BB#" << SVI.SpillMBB->getNumber();
+ if (SVI.AllDefsAreReloads)
+ OS << " all-reloads";
+ if (SVI.DefByOrigPHI)
+ OS << " orig-phi";
+ if (SVI.KillsSource)
+ OS << " kill";
+ OS << " deps[";
+ for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
+ OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ OS << " ]";
+ if (SVI.DefMI)
+ OS << " def: " << *SVI.DefMI;
+ else
+ OS << '\n';
+ return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known. Otherwise remember the dependency for later.
+///
+/// @param SVI SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI,
+ VNInfo *VNI) {
+ // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+ TinyPtrVector<VNInfo*> FirstDeps;
+ if (VNI) {
+ FirstDeps.push_back(VNI);
+ SVI->second.Deps.push_back(VNI);
+ }
+
+ // Has the value been completely determined yet? If not, defer propagation.
+ if (!SVI->second.hasDef())
+ return;
+
+ // Work list of values to propagate. It would be nice to use a SetVector
+ // here, but then we would be forced to use a SmallSet.
+ SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI);
+ SmallPtrSet<VNInfo*, 8> WorkSet;
+
+ do {
+ SVI = WorkList.pop_back_val();
+ WorkSet.erase(SVI->first);
+ TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+ VNI = 0;
+
+ SibValueInfo &SV = SVI->second;
+ if (!SV.SpillMBB)
+ SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+ DEBUG(dbgs() << " prop to " << Deps->size() << ": "
+ << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+ assert(SV.hasDef() && "Propagating undefined value");
+
+ // Should this value be propagated as a preferred spill candidate? We don't
+ // propagate values of registers that are about to spill.
+ bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+ unsigned SpillDepth = ~0u;
+
+ for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
+ DepE = Deps->end(); DepI != DepE; ++DepI) {
+ SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+ SibValueInfo &DepSV = DepSVI->second;
+ if (!DepSV.SpillMBB)
+ DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+ bool Changed = false;
+
+ // Propagate defining instruction.
+ if (!DepSV.hasDef()) {
+ Changed = true;
+ DepSV.DefMI = SV.DefMI;
+ DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+ }
+
+ // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
+ // all predecessors.
+ if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+ Changed = true;
+ DepSV.AllDefsAreReloads = false;
+ }
+
+ // Propagate best spill value.
+ if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+ if (SV.SpillMBB == DepSV.SpillMBB) {
+ // DepSV is in the same block. Hoist when dominated.
+ if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ // This hoist only helps when the DepSV copy kills its source.
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ } else {
+ // DepSV is in a different block.
+ if (SpillDepth == ~0u)
+ SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+ // Also hoist spills to blocks with smaller loop depth, but make sure
+ // that the new value dominates. Non-phi dependents are always
+ // dominated, phis need checking.
+ if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+ (!DepSVI->first->isPHIDef() ||
+ MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ }
+ }
+
+ if (!Changed)
+ continue;
+
+ // Something changed in DepSVI. Propagate to dependents.
+ if (WorkSet.insert(DepSVI->first))
+ WorkList.push_back(DepSVI);
+
+ DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
+ << DepSVI->first->def << " to:\t" << DepSV);
+ }
+ } while (!WorkList.empty());
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+ VNInfo *OrigVNI) {
+ // Check if a cached value already exists.
+ SibValueMap::iterator SVI;
+ bool Inserted;
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+ if (!Inserted) {
+ DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+ return SVI->second.DefMI;
+ }
+
+ DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << '\n');
+
+ // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+ // processed.
+ SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+ do {
+ unsigned Reg;
+ VNInfo *VNI;
+ tie(Reg, VNI) = WorkList.pop_back_val();
+ DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << ":\t");
+
+ // First check if this value has already been computed.
+ SVI = SibValues.find(VNI);
+ assert(SVI != SibValues.end() && "Missing SibValues entry");
+
+ // Trace through PHI-defs created by live range splitting.
+ if (VNI->isPHIDef()) {
+ // Stop at original PHIs. We don't know the value at the predecessors.
+ if (VNI->def == OrigVNI->def) {
+ DEBUG(dbgs() << "orig phi value\n");
+ SVI->second.DefByOrigPHI = true;
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ continue;
+ }
+
+ // This is a PHI inserted by live range splitting. We could trace the
+ // live-out value from predecessor blocks, but that search can be very
+ // expensive if there are many predecessors and many more PHIs as
+ // generated by tail-dup when it sees an indirectbr. Instead, look at
+ // all the non-PHI defs that have the same value as OrigVNI. They must
+ // jointly dominate VNI->def. This is not optimal since VNI may actually
+ // be jointly dominated by a smaller subset of defs, so there is a change
+ // we will miss a AllDefsAreReloads optimization.
+
+ // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+ SmallVector<VNInfo*, 8> PHIs, NonPHIs;
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+
+ for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI2 = *VI;
+ if (VNI2->isUnused())
+ continue;
+ if (!OrigLI.containsOneValue() &&
+ OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+ continue;
+ if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+ PHIs.push_back(VNI2);
+ else
+ NonPHIs.push_back(VNI2);
+ }
+ DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+ << " phi-defs, and " << NonPHIs.size()
+ << " non-phi/orig defs\n");
+
+ // Create entries for all the PHIs. Don't add them to the worklist, we
+ // are processing all of them in one go here.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+
+ // Add every PHI as a dependent of all the non-PHIs.
+ for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
+ VNInfo *NonPHI = NonPHIs[i];
+ // Known value? Try an insertion.
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+ // Add all the PHIs as dependents of NonPHI.
+ for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
+ SVI->second.Deps.push_back(PHIs[pi]);
+ // This is the first time we see NonPHI, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(Reg, NonPHI));
+ else
+ // Propagate to all inserted PHIs, not just VNI.
+ propagateSiblingValue(SVI);
+ }
+
+ // Next work list item.
+ continue;
+ }
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+
+ // Trace through sibling copies.
+ if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(SrcReg)) {
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ LiveRangeQuery SrcQ(SrcLI, VNI->def);
+ assert(SrcQ.valueIn() && "Copy from non-existing value");
+ // Check if this COPY kills its source.
+ SVI->second.KillsSource = SrcQ.isKill();
+ VNInfo *SrcVNI = SrcQ.valueIn();
+ DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def
+ << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+ // Known sibling source value? Try an insertion.
+ tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI,
+ SibValueInfo(SrcReg, SrcVNI)));
+ // This is the first time we see Src, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ propagateSiblingValue(SVI, VNI);
+ // Next work list item.
+ continue;
+ }
+ }
+
+ // Track reachable reloads.
+ SVI->second.DefMI = MI;
+ SVI->second.SpillMBB = MI->getParent();
+ int FI;
+ if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "reload\n");
+ propagateSiblingValue(SVI);
+ // Next work list item.
+ continue;
+ }
+
+ // Potential remat candidate.
+ DEBUG(dbgs() << "def " << *MI);
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ } while (!WorkList.empty());
+
+ // Look up the value we were looking for. We already did this lookup at the
+ // top of the function, but SibValues may have been invalidated.
+ SVI = SibValues.find(UseVNI);
+ assert(SVI != SibValues.end() && "Didn't compute requested info");
+ DEBUG(dbgs() << " traced to:\t" << SVI->second);
+ return SVI->second.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+ SibValues.clear();
+
+ // No siblings at all?
+ if (Edit->getReg() == Original)
+ return;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+ VE = LI.vni_end(); VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = 0;
+ if (!VNI->isPHIDef()) {
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ assert(DefMI && "No defining instruction");
+ }
+ // Check possible sibling copies.
+ if (VNI->isPHIDef() || DefMI->isCopy()) {
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ assert(OrigVNI && "Def outside original live range");
+ if (OrigVNI->def != VNI->def)
+ DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+ }
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
+ DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+ << VNI->def << " may remat from " << *DefMI);
+ }
+ }
+ }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
+ SibValueMap::iterator I = SibValues.find(VNI);
+ if (I == SibValues.end())
+ return false;
+
+ const SibValueInfo &SVI = I->second;
+
+ // Let the normal folding code deal with the boring case.
+ if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ return false;
+
+ // SpillReg may have been deleted by remat and DCE.
+ if (!LIS.hasInterval(SVI.SpillReg)) {
+ DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+ if (!SibLI.containsValue(SVI.SpillVNI)) {
+ DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // Already spilled everywhere.
+ if (SVI.AllDefsAreReloads) {
+ DEBUG(dbgs() << "\tno spill needed: " << SVI);
+ ++NumOmitReloadSpill;
+ return true;
+ }
+ // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SVI.SpillVNI->isPHIDef())
+ MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+ MRI.getRegClass(SVI.SpillReg), &TRI);
+ --MII; // Point to store instruction.
+ LIS.InsertMachineInstrInMaps(MII);
+ DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+ ++NumSpills;
+ ++NumHoists;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ tie(LI, VNI) = WorkList.pop_back_val();
+ unsigned Reg = LI->reg;
+ DEBUG(dbgs() << "Checking redundant spills for "
+ << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = UI.skipInstruction();) {
+ if (!MI->isCopy() && !MI->mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(MI);
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI))
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+ MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
+
+ if (!ParentVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ return true;
+ }
+
+ if (SnippetCopies.count(MI))
+ return false;
+
+ // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+ if (SibI != SibValues.end())
+ RM.OrigMI = SibI->second.DefMI;
+ if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
+ // Alocate a new register for the remat.
+ LiveInterval &NewLI = Edit->createFrom(Original);
+ NewLI.markNotSpillable();
+
+ // Finally we can rematerialize OrigMI before MI.
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ TRI);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
+
+ // Replace operands
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i].second);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ MO.setReg(NewLI.reg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ ++NumRemats;
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ // analyzeSiblingValues has already tested all relevant defining instructions.
+ if (!Edit->anyRematerializable(AA))
+ return;
+
+ UsedValues.clear();
+
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = RI.skipBundle();)
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
+ }
+ }
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+
+ // Get rid of deleted and empty intervals.
+ for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+ unsigned Reg = RegsToSpill[i-1];
+ if (!LIS.hasInterval(Reg)) {
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (!LI.empty())
+ continue;
+ Edit->eraseVirtReg(Reg);
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+ DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+ int FI = 0;
+ unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(MI, FI);
+
+ // We have a stack access. Is it the right register and slot?
+ if (InstrReg != Reg || FI != StackSlot)
+ return false;
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
+ return true;
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from analyzeVirtReg().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
+ bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned Idx = Ops[i].second;
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
+ continue;
+ }
+ // FIXME: Teach targets to deal with subregs.
+ if (MO.getSubReg())
+ return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
+ if (!FoldMI)
+ return false;
+ LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+ MI->eraseFromParent();
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
+ << *FoldMI);
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front().second == 0)
+ ++NumSpills;
+ else
+ ++NumReloads;
+ return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+ SlotIndex Idx,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to load instruction.
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ // Some (out-of-tree) targets have EC reload instructions.
+ if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
+ if (MO->isEarlyClobber())
+ LoadIdx = LoadIdx.getRegSlot(true);
+ DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+ ++NumReloads;
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex Idx, MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to store instruction.
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+ ++NumSpills;
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+ DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
+
+ // Iterate over instructions using Reg.
+ for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RegI.skipBundle();) {
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else {
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+ continue;
+ }
+
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(MI))
+ continue;
+
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI, Reg))
+ continue;
+
+ // Analyze instruction.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ unsigned SibReg = isFullCopyOf(MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ SnippetCopies.insert(MI);
+ continue;
+ }
+ if (RI.Writes) {
+ // Hoist the spill of a sib-reg copy.
+ if (hoistSpill(OldLI, MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI->getOperand(0).setIsDead();
+ DeadDefs.push_back(MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(Ops))
+ continue;
+
+ // Allocate interval around instruction.
+ // FIXME: Infer regclass from instruction alone.
+ LiveInterval &NewLI = Edit->createFrom(Reg);
+ NewLI.markNotSpillable();
+
+ if (RI.Reads)
+ insertReload(NewLI, Idx, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ MO.setReg(NewLI.reg);
+ if (MO.isUse()) {
+ if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (RI.Writes) {
+ if (hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+ }
+ }
+
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+ StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ spillAroundUses(RegsToSpill[i]);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+ MachineInstr *MI = RI.skipInstruction();) {
+ assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ Edit->eraseVirtReg(RegsToSpill[i]);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = 0;
+
+ DEBUG(dbgs() << "Inline spilling "
+ << MRI.getRegClass(edit.getReg())->getName()
+ << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent()
+ << "\nFrom original " << LIS.getInterval(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ analyzeSiblingValues();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, Loops);
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 000000000000..a8e711e33bdf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,231 @@
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+// Static member used for null interference cursors.
+InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ PhysRegEntries.assign(TRI->getNumRegs(), 0);
+ for (unsigned i = 0; i != CacheEntries; ++i)
+ Entries[i].clear(mf, indexes, lis);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+ unsigned E = PhysRegEntries[PhysReg];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate(LIUArray, TRI);
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ unsigned i = 0;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
+ RegUnits[i].VirtTag = LIUArray[*Units].getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ RegUnits.clear();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ RegUnits.push_back(LIUArray[*Units]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ }
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ if (i == e)
+ return false;
+ if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
+ return false;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
+ for (;;) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveInterval *LI = RegUnits[i].Fixed;
+ if (I == LI->end() || I->start >= Stop)
+ continue;
+ I = LI->advanceTo(I, Stop);
+ bool Backup = I == LI->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 000000000000..c02fb9a1ee24
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,228 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+
+namespace llvm {
+
+class LiveIntervals;
+
+class InterferenceCache {
+ const TargetRegisterInfo *TRI;
+ LiveIntervalUnion *LIUArray;
+ MachineFunction *MF;
+
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ BlockInterference() : Tag(0) {}
+ unsigned Tag;
+ SlotIndex First;
+ SlotIndex Last;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ unsigned PhysReg;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount;
+
+ /// MF - The current function.
+ MachineFunction *MF;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes;
+
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
+
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
+
+ /// Fixed interference in RegUnit.
+ LiveInterval *Fixed;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = 0;
+ MF = mf;
+ Indexes = indexes;
+ LIS = lis;
+ }
+
+ unsigned getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ SmallVector<unsigned char, 2> PhysRegEntries;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(unsigned PhysReg);
+
+public:
+ InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
+ const TargetRegisterInfo *);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry;
+ BlockInterference *Current;
+ static BlockInterference NoInterference;
+
+ void setEntry(Entry *E) {
+ Current = 0;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() : CacheEntry(0), Current(0) {}
+ ~Cursor() { setEntry(0); }
+
+ Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(0);
+ if (PhysReg)
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+
+ friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 000000000000..07f0ccf52f8c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,565 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ CallSite CS(CI);
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 000000000000..96a53892f6d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 000000000000..1a0983783484
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,296 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+ cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+ cl::desc("Show instruction structure in .s output"));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+ switch (AsmVerbose) {
+ case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid verbose asm state");
+}
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, Triple, CPU, FS, Options) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
+ AsmInfo = T.createMCAsmInfo(Triple);
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(AsmInfo && "MCAsmInfo not initialized."
+ "Make sure you include the correct TargetSelect.h"
+ "and that InitializeAllTargetMCs() is being invoked!");
+}
+
+void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+ PassManagerBase &PM,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ // Targets may override createPassConfig to provide a target-specific sublass.
+ TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ PassConfig->setStartStopPasses(StartAfter, StopAfter);
+
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+
+ PM.add(PassConfig);
+
+ PassConfig->addIRPasses();
+
+ PassConfig->addCodeGenPrepare();
+
+ PassConfig->addPassesToHandleExceptions();
+
+ PassConfig->addISelPrepare();
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI =
+ new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+ &TM->getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*TM));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None &&
+ EnableFastISelOption != cl::BOU_FALSE))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ if (PassConfig->addInstSelector())
+ return NULL;
+
+ PassConfig->addMachinePasses();
+
+ PassConfig->setInitialized();
+
+ return Context;
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ // Add common CodeGen passes.
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
+ StartAfter, StopAfter);
+ if (!Context)
+ return true;
+
+ if (StopAfter) {
+ // FIXME: The intent is that this should eventually write out a YAML file,
+ // containing the LLVM IR, the machine-level IR (when stopping after a
+ // machine-level pass), and whatever other information is needed to
+ // deserialize the code and resume compilation. For now, just write the
+ // LLVM IR.
+ PM.add(createPrintModulePass(&Out));
+ return false;
+ }
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+ *getInstrInfo(),
+ Context->getRegisterInfo(), STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+ if (ShowMCEncoding) {
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
+ *Context);
+ MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ hasMCUseDwarfDirectory(),
+ InstPrinter,
+ MCE, MAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Context);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+ TargetCPU);
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(),
+ *Context, *MAB, Out,
+ MCE, hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->setAutoInitSections(true);
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ if (!Context)
+ return true;
+
+ addCodeEmitter(PM, JCE);
+
+ return false; // success!
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ raw_ostream &Out,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ if (!Ctx)
+ return true;
+
+ if (hasMCSaveTempLabels())
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Ctx);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ OwningPtr<MCStreamer> AsmStreamer;
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx,
+ *MAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ return false; // success!
+}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 000000000000..deab05a412c9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,152 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return RHSNum < LHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push_back(SU);
+}
+
+
+// scheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+ }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return NULL;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 000000000000..81721541cd89
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,335 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lexicalscopes"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+LexicalScopes::~LexicalScopes() {
+ releaseMemory();
+}
+
+/// releaseMemory - release memory.
+void LexicalScopes::releaseMemory() {
+ MF = NULL;
+ CurrentFnLexicalScope = NULL;
+ DeleteContainerSeconds(LexicalScopeMap);
+ DeleteContainerSeconds(AbstractScopeMap);
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ releaseMemory();
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::
+extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ const MachineInstr *RangeBeginMI = NULL;
+ const MachineInstr *PrevMI = NULL;
+ DebugLoc PrevDL;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+
+ // Check if instruction has valid location information.
+ const DebugLoc MIDL = MInsn->getDebugLoc();
+ if (MIDL.isUnknown()) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute to any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = MInsn;
+
+ // Reset previous markers.
+ PrevMI = MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *IA = NULL;
+ DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+ if (!Scope) return NULL;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+
+ if (IA)
+ return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+ return LexicalScopeMap.lookup(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+ DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
+
+ if (InlinedAt) {
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, InlinedAt);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile()) {
+ Scope = DILexicalBlockFile(Scope).getScope();
+ D = DIDescriptor(Scope);
+ }
+
+ LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
+ if (WScope)
+ return WScope;
+
+ LexicalScope *Parent = NULL;
+ if (D.isLexicalBlock())
+ Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+ WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
+ LexicalScopeMap.insert(std::make_pair(Scope, WScope));
+ if (!Parent && DIDescriptor(Scope).isSubprogram()
+ && DISubprogram(Scope).describes(MF->getFunction()))
+ CurrentFnLexicalScope = WScope;
+
+ return WScope;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
+ MDNode *InlinedAt) {
+ LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
+ if (InlinedScope)
+ return InlinedScope;
+
+ DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt);
+ InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc),
+ DIDescriptor(Scope), InlinedAt, false);
+ InlinedLexicalScopeMap[InlinedLoc] = InlinedScope;
+ LexicalScopeMap[InlinedAt] = InlinedScope;
+ return InlinedScope;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
+ assert(N && "Invalid Scope encoding!");
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+ LexicalScope *AScope = AbstractScopeMap.lookup(N);
+ if (AScope)
+ return AScope;
+
+ LexicalScope *Parent = NULL;
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ Parent = getOrCreateAbstractScope(ParentDesc);
+ }
+ AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true);
+ AbstractScopeMap[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// constructScopeNest
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert (Scope && "Unable to calculate scop edominance graph!");
+ SmallVector<LexicalScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ LexicalScope *WS = WorkStack.back();
+ const SmallVector<LexicalScope *, 4> &Children = WS->getChildren();
+ bool visitedChildren = false;
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI) {
+ LexicalScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::
+assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
+{
+
+ LexicalScope *PrevLexicalScope = NULL;
+ for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end(); RI != RE; ++RI) {
+ const InsnRange &R = *RI;
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert (S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::
+getMachineBasicBlocks(DebugLoc DL,
+ SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
+ MBBs.clear();
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ MBBs.insert(I);
+ return;
+ }
+
+ SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
+ for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
+ E = InsnRanges.end(); I != E; ++I) {
+ InsnRange &R = *I;
+ MBBs.insert(R.first->getParent());
+ }
+}
+
+/// dominates - Return true if DebugLoc's lexical scope dominates at least one
+/// machine instruction's lexical scope in a given machine basic block.
+bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ bool Result = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (IDL.isUnknown())
+ continue;
+ if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+ if (Scope->dominates(IScope))
+ return true;
+ }
+ return Result;
+}
+
+void LexicalScope::anchor() { }
+
+/// dump - Print data structures.
+void LexicalScope::dump(unsigned Indent) const {
+#ifndef NDEBUG
+ raw_ostream &err = dbgs();
+ err.indent(Indent);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ err.indent(Indent);
+ N->dump();
+ if (AbstractScope)
+ err << std::string(Indent, ' ') << "Abstract Scope\n";
+
+ if (!Children.empty())
+ err << std::string(Indent + 2, ' ') << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump(Indent + 2);
+#endif
+}
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..0b117ac6566b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,995 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+namespace {
+/// UserValueScopes - Keeps track of lexical scopes associated with an
+/// user value's source location.
+class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {}
+
+ /// dominates - Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
+ return true;
+ return false;
+ }
+};
+} // end anonymous namespace
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the parameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return ~0u;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg())
+ locations.back().setIsUse();
+ return locations.size() - 1;
+ }
+
+ /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// End points where VNI is no longer live are added to Kills.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param Kills Append end points of VNI's live range to Kills.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+ /// registers. Determine if any of the copies are available at the kill
+ /// points, and add defs if possible.
+ /// @param LI Scan for copies of the value in LI->reg.
+ /// @param LocNo Location number of LI->reg.
+ /// @param Kills Points where the range of LocNo could be extended.
+ /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+
+ /// getDebugLoc - Return DebugLoc of this UserValue.
+ DebugLoc getDebugLoc() { return dl;}
+ void print(raw_ostream&, const TargetMachine*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ LexicalScopes LS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// Whether emitDebugValues is called.
+ bool EmitDone;
+ /// Whether the machine function is modified during the pass.
+ bool ModifiedMF;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
+ ModifiedMF(false) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Release all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ // Make sure we call emitDebugValues if the machine function was modified.
+ assert((!ModifiedMF || EmitDone) &&
+ "Dbg values are not emitted in LDV");
+ EmitDone = false;
+ ModifiedMF = false;
+ }
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// splitRegister - Replace all references to OldReg with NewRegs.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+ DIVariable DV(variable);
+ OS << "!\"";
+ DV.printExtendedName(OS);
+ OS << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TM);
+ }
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, &MF->getTarget());
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ continue;
+ }
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Children[i]->getBlock();
+ if (UVS.dominates(MBB))
+ Todo.push_back(LIS.getMBBStartIdx(MBB));
+ }
+ } while (!Todo.empty());
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ if (Kills.empty())
+ return;
+ // Don't track copies from physregs, there are too many uses.
+ if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(LI->reg),
+ UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+ // Copies of the full value.
+ if (UI.getOperand().getSubReg() || !UI->isCopy())
+ continue;
+ MachineInstr *MI = &*UI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved register,
+ // or it could be spilled.
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ continue;
+
+ // Is LocNo extended to reach this copy? If not, another def may be blocking
+ // it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
+ if (!I.valid() || I.value() != LocNo)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
+ CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ }
+
+ if (CopyValues.empty())
+ return;
+
+ DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+ // Try to add defs of the copied values for each kill point.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ SlotIndex Idx = Kills[i];
+ for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+ LiveInterval *DstLI = CopyValues[j].first;
+ const VNInfo *DstVNI = CopyValues[j].second;
+ if (DstLI->getVNInfoAt(Idx) != DstVNI)
+ continue;
+ // Check that there isn't already a def at Idx
+ LocMap::iterator I = locInts.find(Idx);
+ if (I.valid() && I.start() <= Idx)
+ continue;
+ DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+ I.insert(Idx, Idx.getNextSlot(), LocNo);
+ NewDefs.push_back(std::make_pair(Idx, LocNo));
+ break;
+ }
+ }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ if (!Loc.isReg()) {
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ continue;
+ }
+
+ // Register locations are constrained to where the register value is live.
+ if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ LiveInterval *LI = 0;
+ const VNInfo *VNI = 0;
+ if (LIS.hasInterval(Loc.getReg())) {
+ LI = &LIS.getInterval(Loc.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
+ SmallVector<SlotIndex, 16> Kills;
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+ if (LI)
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ continue;
+ }
+
+ // For physregs, use the live range of the first regunit as a guide.
+ unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
+ LiveInterval *LI = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ // Don't track copies from physregs, it is too expensive.
+ extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ LS.initialize(mf);
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << mf.getName() << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ LS.releaseMemory();
+ ModifiedMF = Changed;
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+ DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), 0);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (unsigned i = 0; i != NewRegs.size(); ++i) {
+ LiveInterval *LI = NewRegs[i];
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = ~0u;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == ~0u) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(NewLocNo);
+
+ // Re-insert any removed OldLocNo ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldLocNo);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+ locations.erase(locations.begin() + OldLocNo);
+ LocMapI.goToBegin();
+ while (LocMapI.valid()) {
+ unsigned v = LocMapI.value();
+ if (v == OldLocNo) {
+ DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
+ LocMapI.erase();
+ } else {
+ if (v > OldLocNo)
+ LocMapI.setValueUnchecked(v-1);
+ ++LocMapI;
+ }
+ }
+
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+ ++NumInsertedDebugValues;
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+ EmitDone = true;
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..3ce3c398bd4b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,70 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 000000000000..dccd847d070c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,951 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ // This algorithm is basically std::upper_bound.
+ // Unfortunately, std::upper_bound cannot be used with mixed types until we
+ // adopt C++0x. Many libraries can do it, but not all.
+ if (empty() || Pos >= endIndex())
+ return end();
+ iterator I = begin();
+ size_t Len = ranges.size();
+ do {
+ size_t Mid = Len >> 1;
+ if (Pos < I[Mid].end)
+ Len = Mid;
+ else
+ I += Mid + 1, Len -= Mid + 1;
+ } while (Len);
+ return I;
+}
+
+VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+ iterator I = find(Def);
+ if (I == end()) {
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+ if (SlotIndex::isSameInstr(Def, I->start)) {
+ assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, I->start);
+ if (Def != I->start)
+ I->start = I->valno->def = Def;
+ return I->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty interval");
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+bool LiveInterval::overlaps(const LiveInterval &Other,
+ const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty interval");
+ if (Other.empty())
+ return false;
+
+ // Use binary searches to find initial positions.
+ const_iterator I = find(Other.beginIndex());
+ const_iterator IE = end();
+ if (I == IE)
+ return false;
+ const_iterator J = Other.find(I->start);
+ const_iterator JE = Other.end();
+ if (J == JE)
+ return false;
+
+ for (;;) {
+ // J has just been advanced to satisfy:
+ assert(J->end >= I->start);
+ // Check for an overlap.
+ if (J->start < I->end) {
+ // I and J are overlapping. Find the later start.
+ SlotIndex Def = std::max(I->start, J->start);
+ // Allow the overlap if Def is a coalescable copy.
+ if (Def.isBlock() ||
+ !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+ return true;
+ }
+ // Advance the iterator that ends first to check for more overlaps.
+ if (J->end > I->end) {
+ std::swap(I, J);
+ std::swap(IE, JE);
+ }
+ // Advance J until J->end >= I->start.
+ do
+ if (++J == JE)
+ return false;
+ while (J->end < I->start);
+ }
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = std::lower_bound(begin(), end(), End);
+ return I != begin() && (--I)->end > Start;
+}
+
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->markUnused();
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ valnos.clear();
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ VNInfo *VNI = I->valno;
+ if (!Seen.insert(VNI))
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live range");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ }
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = llvm::next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ I->end = MergeTo->end;
+ ++MergeTo;
+ }
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(llvm::next(MergeTo), llvm::next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ SlotIndex Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// extendInBlock - If this interval is live before Kill in the basic
+/// block that starts at StartIdx, extend it to be live up to Kill and return
+/// the value. If there is no live range before Kill, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+ if (empty())
+ return 0;
+ iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
+ if (I == begin())
+ return 0;
+ --I;
+ if (I->end <= StartIdx)
+ return 0;
+ if (I->end < Kill)
+ extendIntervalEndTo(I, Kill);
+ return I->valno;
+}
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
+ assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+}
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ MachineRegisterInfo *MRI) {
+ verify();
+
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
+ MustMapCurValNos = true;
+ break;
+ }
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos && !empty()) {
+ // Map the first live range.
+
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo != 0 && "Huh?");
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
+ } else {
+ // Didn't merge. Move OutIt to the next interval,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+ }
+ }
+ // If we merge some live ranges, chop off the end.
+ ++OutIt;
+ ranges.erase(OutIt, end());
+ }
+
+ // Rewrite Other values before changing the VNInfo ids.
+ // This can leave Other in an invalid state because we're not coalescing
+ // touching segments that now have identical values. That's OK since Other is
+ // not supposed to be valid after calling join();
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ LiveRangeUpdater Updater(this);
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ Updater.add(*I);
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ Updater.add(I->start, I->end, LHSValNo);
+}
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ if (I->valno == RHSValNo)
+ Updater.add(I->start, I->end, LHSValNo);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
+ return V2;
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->start.distance(I->end);
+ return Sum;
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveRange::dump() const {
+ dbgs() << *this << "\n";
+}
+#endif
+
+void LiveInterval::print(raw_ostream &OS) const {
+ if (empty())
+ OS << "EMPTY";
+ else {
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I) {
+ OS << *I;
+ assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ }
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->isUnused()) {
+ OS << "x";
+ } else {
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phi";
+ }
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveInterval::dump() const {
+ dbgs() << *this << "\n";
+}
+#endif
+
+#ifndef NDEBUG
+void LiveInterval::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != 0);
+ assert(I->valno == valnos[I->valno->id]);
+ if (llvm::next(I) != E) {
+ assert(I->end <= llvm::next(I)->start);
+ if (I->end == llvm::next(I)->start)
+ assert(I->valno != llvm::next(I)->valno);
+ }
+ }
+}
+#endif
+
+
+void LiveRange::print(raw_ostream &os) const {
+ os << *this;
+}
+
+//===----------------------------------------------------------------------===//
+// LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+// This is the initial state, and the state created by flush().
+// In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LI.
+// 2. [ReadI; end) at the back of LI.
+// 3. Spills.
+//
+// - LI.begin() <= WriteI <= ReadI <= LI.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+// 1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+// and WriteI[-1].start <= LastStart.
+
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+ if (!isDirty()) {
+ if (LI)
+ OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n';
+ else
+ OS << "Null updater.\n";
+ return;
+ }
+ assert(LI && "Can't have null LI in dirty updater.");
+ OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI)
+ << ", last start = " << LastStart
+ << ":\n Area 1:";
+ for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I)
+ OS << ' ' << *I;
+ OS << "\n Spills:";
+ for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+ OS << ' ' << Spills[I];
+ OS << "\n Area 2:";
+ for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I)
+ OS << ' ' << *I;
+ OS << '\n';
+}
+
+void LiveRangeUpdater::dump() const
+{
+ print(errs());
+}
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
+ assert(A.start <= B.start && "Unordered live ranges.");
+ if (A.end == B.start)
+ return A.valno == B.valno;
+ if (A.end < B.start)
+ return false;
+ assert(A.valno == B.valno && "Cannot overlap different values");
+ return true;
+}
+
+void LiveRangeUpdater::add(LiveRange Seg) {
+ assert(LI && "Cannot add to a null destination");
+
+ // Flush the state if Start moves backwards.
+ if (!LastStart.isValid() || LastStart > Seg.start) {
+ if (isDirty())
+ flush();
+ // This brings us to an uninitialized state. Reinitialize.
+ assert(Spills.empty() && "Leftover spilled segments");
+ WriteI = ReadI = LI->begin();
+ }
+
+ // Remember start for next time.
+ LastStart = Seg.start;
+
+ // Advance ReadI until it ends after Seg.start.
+ LiveInterval::iterator E = LI->end();
+ if (ReadI != E && ReadI->end <= Seg.start) {
+ // First try to close the gap between WriteI and ReadI with spills.
+ if (ReadI != WriteI)
+ mergeSpills();
+ // Then advance ReadI.
+ if (ReadI == WriteI)
+ ReadI = WriteI = LI->find(Seg.start);
+ else
+ while (ReadI != E && ReadI->end <= Seg.start)
+ *WriteI++ = *ReadI++;
+ }
+
+ assert(ReadI == E || ReadI->end > Seg.start);
+
+ // Check if the ReadI segment begins early.
+ if (ReadI != E && ReadI->start <= Seg.start) {
+ assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+ // Bail if Seg is completely contained in ReadI.
+ if (ReadI->end >= Seg.end)
+ return;
+ // Coalesce into Seg.
+ Seg.start = ReadI->start;
+ ++ReadI;
+ }
+
+ // Coalesce as much as possible from ReadI into Seg.
+ while (ReadI != E && coalescable(Seg, *ReadI)) {
+ Seg.end = std::max(Seg.end, ReadI->end);
+ ++ReadI;
+ }
+
+ // Try coalescing Spills.back() into Seg.
+ if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+ Seg.start = Spills.back().start;
+ Seg.end = std::max(Spills.back().end, Seg.end);
+ Spills.pop_back();
+ }
+
+ // Try coalescing Seg into WriteI[-1].
+ if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) {
+ WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+ return;
+ }
+
+ // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+ if (WriteI != ReadI) {
+ *WriteI++ = Seg;
+ return;
+ }
+
+ // Finally, append to LI or Spills.
+ if (WriteI == E) {
+ LI->ranges.push_back(Seg);
+ WriteI = ReadI = LI->ranges.end();
+ } else
+ Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+ // Perform a backwards merge of Spills and [SpillI;WriteI).
+ size_t GapSize = ReadI - WriteI;
+ size_t NumMoved = std::min(Spills.size(), GapSize);
+ LiveInterval::iterator Src = WriteI;
+ LiveInterval::iterator Dst = Src + NumMoved;
+ LiveInterval::iterator SpillSrc = Spills.end();
+ LiveInterval::iterator B = LI->begin();
+
+ // This is the new WriteI position after merging spills.
+ WriteI = Dst;
+
+ // Now merge Src and Spills backwards.
+ while (Src != Dst) {
+ if (Src != B && Src[-1].start > SpillSrc[-1].start)
+ *--Dst = *--Src;
+ else
+ *--Dst = *--SpillSrc;
+ }
+ assert(NumMoved == size_t(Spills.end() - SpillSrc));
+ Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+ if (!isDirty())
+ return;
+ // Clear the dirty state.
+ LastStart = SlotIndex();
+
+ assert(LI && "Cannot add to a null destination");
+
+ // Nothing to merge?
+ if (Spills.empty()) {
+ LI->ranges.erase(WriteI, ReadI);
+ LI->verify();
+ return;
+ }
+
+ // Resize the WriteI - ReadI gap to match Spills.
+ size_t GapSize = ReadI - WriteI;
+ if (GapSize < Spills.size()) {
+ // The gap is too small. Make some room.
+ size_t WritePos = WriteI - LI->begin();
+ LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange());
+ // This also invalidated ReadI, but it is recomputed below.
+ WriteI = LI->ranges.begin() + WritePos;
+ } else {
+ // Shrink the gap if necessary.
+ LI->ranges.erase(WriteI + Spills.size(), ReadI);
+ }
+ ReadI = WriteI + Spills.size();
+ mergeSpills();
+ LI->verify();
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // Rewrite instructions.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
+ if (!VNI)
+ continue;
+ MO.setReg(LIV[getEqClass(VNI)]->reg);
+ }
+
+ // Move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && EqClass[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = EqClass[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && EqClass[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = EqClass[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 000000000000..f1b839481131
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,1172 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cmath>
+#include <limits>
+using namespace llvm;
+
+char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ // LiveVariables isn't really required by this analysis, it is only required
+ // here to make sure it is live during TwoAddressInstructionPass and
+ // PHIElimination. This is temporary.
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
+ DomTree(0), LRCalc(0) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() {
+ delete LRCalc;
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ VirtRegIntervals.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
+
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ delete RegUnitIntervals[i];
+ RegUnitIntervals.clear();
+
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ if (!LRCalc)
+ LRCalc = new LiveRangeCalc();
+
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ computeVirtRegs();
+ computeRegMasks();
+ computeLiveInRegUnits();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+
+ // Dump the regunits.
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ if (LiveInterval *LI = RegUnitIntervals[i])
+ OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n';
+
+ // Dump the virtregs.
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
+ }
+
+ OS << "RegMasks:";
+ for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+ OS << ' ' << RegMaskSlots[i];
+ OS << '\n';
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+ MF->print(OS, Indexes);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+#endif
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// computeRegUnitInterval - Compute the live interval of a register unit, based
+/// on the uses and defs of aliasing registers. The interval should be empty,
+/// or contain only dead phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
+ unsigned Unit = LI->reg;
+
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->reg_empty(Root))
+ LRCalc->createDeadDefs(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ if (!MRI->reg_empty(*Supers))
+ LRCalc->createDeadDefs(LI, *Supers);
+ }
+ }
+
+ // Now extend LI to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
+ LRCalc->extendToUses(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ unsigned Reg = *Supers;
+ if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LI, Reg);
+ }
+ }
+}
+
+
+/// computeLiveInRegUnits - Precompute the live ranges of any register units
+/// that are live-in to an ABI block somewhere. Register values can appear
+/// without a corresponding def when entering the entry block or a landing pad.
+///
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitIntervals.resize(TRI->getNumRegUnits());
+ DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the intervals allocated.
+ SmallVector<LiveInterval*, 8> NewIntvs;
+
+ // Check all basic blocks for live-ins.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock *MBB = MFI;
+
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
+ for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
+ LIE = MBB->livein_end(); LII != LIE; ++LII) {
+ for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = *Units;
+ LiveInterval *Intv = RegUnitIntervals[Unit];
+ if (!Intv) {
+ Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF);
+ NewIntvs.push_back(Intv);
+ }
+ VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the intervals.
+ for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i)
+ computeRegUnitInterval(NewIntvs[i]);
+}
+
+
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can only shrink virtual registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ LiveRangeQuery LRQ(*li, Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ DEBUG(dbgs() << Idx << '\t' << *UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
+ }
+
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+ (void)ExtVNI;
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ assert(li->getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ bool CanSeparate = false;
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->markUnused();
+ NewLI.removeRange(*LII);
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ CanSeparate = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, TRI);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+ ArrayRef<SlotIndex> Indices) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveRangeQuery LRQ(*LI, Kill);
+ VNInfo *VNI = LRQ.valueOut();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBStart, MBBEnd;
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LI->removeRange(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ VisitedTy Visited;
+ for (MachineBasicBlock::succ_iterator
+ SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+ SuccI != SuccE; ++SuccI) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveRangeQuery LRQ(*LI, MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI live range. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LI->removeRange(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+ // Keep track of regunit ranges.
+ SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU;
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = &getInterval(Reg);
+ if (LI->empty())
+ continue;
+
+ // Find the regunit intervals for the assigned register. They may overlap
+ // the virtual register live range, cancelling any kills.
+ RU.clear();
+ for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
+ ++Units) {
+ LiveInterval *RUInt = &getRegUnit(*Units);
+ if (RUInt->empty())
+ continue;
+ RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end)));
+ }
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+
+ // Check if any of the reguints are live beyond the end of RI. That could
+ // happen when a physreg is defined as a copy of a virtreg:
+ //
+ // %EAX = COPY %vreg5
+ // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
+ // BAR %EAX<kill>
+ //
+ // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+ bool CancelKill = false;
+ for (unsigned u = 0, e = RU.size(); u != e; ++u) {
+ LiveInterval *RInt = RU[u].first;
+ LiveInterval::iterator &I = RU[u].second;
+ if (I == RInt->end())
+ continue;
+ I = RInt->advanceTo(I, RI->end);
+ if (I == RInt->end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI.
+ CancelKill = true;
+ break;
+ }
+ if (CancelKill)
+ MI->clearRegisterKills(Reg, NULL);
+ else
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return NULL;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return NULL;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : NULL;
+}
+
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
+
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
+
+ return (isDef + isUse) * lc;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
+ return false;
+ LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
+
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI =
+ std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
+ return false;
+
+ bool Found = false;
+ for (;;) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(TRI->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // *SlotI is beyond the current LI segment.
+ LiveI = LI.advanceTo(LiveI, *SlotI);
+ if (LiveI == LiveE)
+ return Found;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
+
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
+ SlotIndex NewIdx;
+ SmallPtrSet<LiveInterval*, 8> Updated;
+ bool UpdateFlags;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveInterval *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags)
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
+ }
+
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ bool hasRegMask = false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (MO->isRegMask())
+ hasRegMask = true;
+ if (!MO->isReg())
+ continue;
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ if (MO->isUse())
+ MO->setIsKill(false);
+
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ updateRange(LIS.getInterval(Reg));
+ continue;
+ }
+
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = getRegUnitLI(*Units))
+ updateRange(*LI);
+ }
+ if (hasRegMask)
+ updateRegMaskSlots();
+ }
+
+private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveInterval &LI) {
+ if (!Updated.insert(&LI))
+ return;
+ DEBUG({
+ dbgs() << " ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ dbgs() << PrintReg(LI.reg);
+ else
+ dbgs() << PrintRegUnit(LI.reg, &TRI);
+ dbgs() << ":\t" << LI << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LI);
+ else
+ handleMoveUp(LI);
+ DEBUG(dbgs() << " -->\t" << LI << '\n');
+ LI.verify();
+ }
+
+ /// Update LI to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Move def to NewIdx, assert endpoint after NewIdx.
+ ///
+ /// 2. Live def at OldIdx, killed at NewIdx:
+ /// Change to dead def at NewIdx.
+ /// (Happens when bundling def+kill together).
+ ///
+ /// 3. Dead def at OldIdx:
+ /// Move def to NewIdx, possibly across another live value.
+ ///
+ /// 4. Def at OldIdx AND at NewIdx:
+ /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value read at OldIdx, killed before NewIdx:
+ /// Extend kill to NewIdx.
+ ///
+ void handleMoveDown(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
+
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+ return;
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+ for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse())
+ MO->setIsKill(false);
+ // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+ // overlapping ranges. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ // If this was a kill, there may also be a def. Otherwise we're done.
+ if (!isKill)
+ return;
+ ++I;
+ }
+
+ // Check for a def at OldIdx.
+ if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+ return;
+ // We have a def at OldIdx.
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+ // If the defined value extends beyond NewIdx, just move the def down.
+ // This is case 1 above.
+ if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+ I->start = DefVNI->def;
+ return;
+ }
+ // The remaining possibilities are now:
+ // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+ // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+ // In either case, it is possible that there is an existing def at NewIdx.
+ assert((I->end == OldIdx.getDeadSlot() ||
+ SlotIndex::isSameInstr(I->end, NewIdx)) &&
+ "Cannot move def below kill");
+ LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+ if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+ // coalesced into that value.
+ assert(NewI->valno != DefVNI && "Multiple defs of value?");
+ LI.removeValNo(DefVNI);
+ return;
+ }
+ // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+ // If the def at OldIdx was dead, we allow it to be moved across other LI
+ // values. The new range should be placed immediately before NewI, move any
+ // intermediate ranges up.
+ assert(NewI != I && "Inconsistent iterators");
+ std::copy(llvm::next(I), NewI, I);
+ *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ }
+
+ /// Update LI to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Hoist def to NewIdx.
+ ///
+ /// 2. Dead def at OldIdx:
+ /// Hoist def+end to NewIdx, possibly move across other values.
+ ///
+ /// 3. Dead def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at OldIdx, coalescing it with existing value.
+ ///
+ /// 4. Live def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value killed at OldIdx:
+ /// Hoist kill to NewIdx, then scan for last kill between NewIdx and
+ /// OldIdx.
+ ///
+ void handleMoveUp(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
+
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // If the live-in value isn't killed here, there is nothing to do.
+ if (!SlotIndex::isSameInstr(OldIdx, I->end))
+ return;
+ // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+ // another use, we need to search for that use. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ ++I;
+ // If OldIdx also defines a value, there couldn't have been another use.
+ if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // No def, search for the new kill.
+ // This can never be an early clobber kill since there is no def.
+ llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+ return;
+ }
+ }
+
+ // Now deal with the def at OldIdx.
+ assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+ // Check for an existing def at NewIdx.
+ LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ assert(NewI->valno != DefVNI && "Same value defined more than once?");
+ // There is an existing def at NewIdx.
+ if (I->end.isDead()) {
+ // Case 3: Remove the dead def at OldIdx.
+ LI.removeValNo(DefVNI);
+ return;
+ }
+ // Case 4: Replace def at NewIdx with live def at OldIdx.
+ I->start = DefVNI->def;
+ LI.removeValNo(NewI->valno);
+ return;
+ }
+
+ // There is no existing def at NewIdx. Hoist DefVNI.
+ if (!I->end.isDead()) {
+ // Leave the end point of a live def.
+ I->start = DefVNI->def;
+ return;
+ }
+
+ // DefVNI is a dead def. It may have been moved across other values in LI,
+ // so move I up to NewI. Slide [NewI;I) down one position.
+ std::copy_backward(NewI, I, llvm::next(I));
+ *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ }
+
+ void updateRegMaskSlots() {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+ "Cannot move regmask instruction below another call");
+ }
+
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(unsigned Reg) {
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SlotIndex LastUse = NewIdx;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(Reg),
+ UE = MRI.use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot;
+ }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(NewIdx < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugValue())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(MII);
+
+ // Stop searching when NewIdx is reached.
+ if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
+ return NewIdx;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
+ if (MO->isReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx;
+ }
+ // Didn't reach NewIdx. It must be the first instruction in the block.
+ return NewIdx;
+ }
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI->getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
+}
+
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
+ MachineInstr* BundleStart,
+ bool UpdateFlags) {
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
+}
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ ArrayRef<unsigned> OrigRegs) {
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !Indexes->hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !Indexes->hasIndex(End))
+ ++End;
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ else
+ endIdx = getInstructionIndex(End);
+
+ Indexes->repairIndexesInRange(MBB, Begin, End);
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ !hasInterval(MOI->getReg())) {
+ LiveInterval &LI = getOrCreateInterval(MOI->getReg());
+ computeVirtRegInterval(&LI);
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
+ unsigned Reg = OrigRegs[i];
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ LiveInterval &LI = getInterval(Reg);
+ // FIXME: Should we support undefs that gain defs?
+ if (!LI.hasAtLeastOneValue())
+ continue;
+
+ LiveInterval::iterator LII = LI.find(endIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LI.end() && LII->start < endIdx)
+ lastUseIdx = LII->end;
+ else
+ --LII;
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ const MachineOperand &MO = *OI;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ SlotIndex prevStart;
+ if (LII != LI.begin())
+ prevStart = llvm::prior(LII)->start;
+
+ // FIXME: This could be more efficient if there was a removeRange
+ // method that returned an iterator.
+ LI.removeRange(*LII, true);
+ if (prevStart.isValid())
+ LII = LI.find(prevStart);
+ else
+ LII = LI.begin();
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI);
+ LII = LI.addRange(LR);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LI.addRange(LR);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..d5a81a311c64
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,204 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Collect virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
+//
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
+
+ // In most cases, the union will start before VirtReg.
+ VirtRegI = VirtReg->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(VirtRegI->start);
+ }
+
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentReg = 0;
+ while (LiveUnionI.valid()) {
+ assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+
+ // Check for overlapping interference.
+ while (VirtRegI->start < LiveUnionI.stop() &&
+ VirtRegI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
+
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond VirtRegI.
+ assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+
+ // Advance the iterator that ends first.
+ VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
+ if (VirtRegI == VirtRegEnd)
+ break;
+
+ // Detect overlap, handle above.
+ if (VirtRegI->start < LiveUnionI.stop())
+ continue;
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 000000000000..dede490d91ba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,380 @@
+//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRangeCalc::reset(const MachineFunction *mf,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MF = mf;
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+
+ unsigned N = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(N);
+ LiveOut.resize(N);
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LI->createDeadDef() will deduplicate.
+ for (MachineRegisterInfo::def_iterator
+ I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ // Find the corresponding slot index.
+ SlotIndex Idx;
+ if (MI->isPHI())
+ // PHI defs begin at the basic block start index.
+ Idx = Indexes->getMBBStartIdx(MI->getParent());
+ else
+ // Instructions are either normal 'r', or early clobber 'e'.
+ Idx = Indexes->getInstructionIndex(MI)
+ .getRegSlot(I.getOperand().isEarlyClobber());
+
+ // Create the def in LI. This may find an existing def.
+ LI->createDeadDef(Idx, *Alloc);
+ }
+}
+
+
+void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all operands that read Reg. This may include partial defs.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervalAnalysis::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
+ if (!MO.readsReg())
+ continue;
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ const MachineInstr *MI = &*I;
+
+ // Find the SlotIndex being read.
+ SlotIndex Idx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // PHI operands are paired: (Reg, PredMBB).
+ // Extend the live range to be live-out from PredMBB.
+ Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB());
+ } else {
+ // This is a normal instruction.
+ Idx = Indexes->getInstructionIndex(MI).getRegSlot();
+ // Check for early-clobber redefs.
+ unsigned DefIdx;
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ if (MI->getOperand(DefIdx).isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+ }
+ extend(LI, Idx, Reg);
+ }
+}
+
+
+// Transfer information from the LiveIn vector to the live ranges.
+void LiveRangeCalc::updateLiveIns() {
+ LiveRangeUpdater Updater;
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ if (!I->DomNode)
+ continue;
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+ assert(I->Value && "No live-in value found");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I->Kill.isValid())
+ // Value is killed inside this block.
+ End = I->Kill;
+ else {
+ // The value is live-through, update LiveOut as well.
+ // Defer the Domtree lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
+ }
+ Updater.setDest(I->LI);
+ Updater.add(Start, End, I->Value);
+ }
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::extend(LiveInterval *LI,
+ SlotIndex Kill,
+ unsigned PhysReg) {
+ assert(LI && "Missing live range");
+ assert(Kill.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
+ assert(KillMBB && "No MBB at Kill");
+
+ // Is there a def in the same MBB we can extend?
+ if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ return;
+
+ // Find the single reaching def, or determine if Kill is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ if (findReachingDefs(LI, KillMBB, Kill, PhysReg))
+ return;
+
+ // When there were multiple different values, we may need new PHIs.
+ calculateValues();
+}
+
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA();
+ updateLiveIns();
+}
+
+
+bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg) {
+ unsigned KillMBBNum = KillMBB->getNumber();
+
+ // Block numbers where LI should be live-in.
+ SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = 0;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = LiveOut[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ VNInfo *VNI = LI->extendInBlock(Start, End);
+ setLiveOutValue(Pred, VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != KillMBB)
+ WorkList.push_back(Pred->getNumber());
+ else
+ // Loopback to KillMBB, so value is really live through.
+ Kill = SlotIndex();
+ }
+ }
+
+ LiveIn.clear();
+
+ // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+ // neither require it. Skip the sorting overhead for small updates.
+ if (WorkList.size() > 4)
+ array_pod_sort(WorkList.begin(), WorkList.end());
+
+ // If a unique reaching def was found, blit in the live ranges immediately.
+ if (UniqueVNI) {
+ LiveRangeUpdater Updater(LI);
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(*I);
+ // Trim the live range in KillMBB.
+ if (*I == KillMBBNum && Kill.isValid())
+ End = Kill;
+ else
+ LiveOut[MF->getBlockNumbered(*I)] =
+ LiveOutPair(TheVNI, (MachineDomTreeNode *)0);
+ Updater.add(Start, End, TheVNI);
+ }
+ return true;
+ }
+
+ // Multiple values were found, so transfer the work list to the LiveIn array
+ // where UpdateSSA will use it as a work list.
+ LiveIn.reserve(WorkList.size());
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+ addLiveInBlock(LI, DomTree->getNode(MBB));
+ if (MBB == KillMBB)
+ LiveIn.back().Kill = Kill;
+ }
+
+ return false;
+}
+
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ MachineDomTreeNode *Node = I->DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = LiveOut[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && !IDomValue.second)
+ LiveOut[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair &Value = LiveOut[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = LiveOut[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+ VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
+ I->Value = VNI;
+ // This block is done, we know the final value.
+ I->DomNode = 0;
+
+ // Add liveness since updateLiveIns now skips this node.
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I->Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I->Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changes);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
new file mode 100644
index 000000000000..57cab7b34220
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,242 @@
+//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to compute live ranges from scratch. It
+// caches information about values in the CFG to speed up repeated operations
+// on the same live range. The cache can be shared by non-overlapping live
+// ranges. SplitKit uses that when computing the live range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGECALC_H
+#define LLVM_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Forward declarations for MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+class LiveRangeCalc {
+ const MachineFunction *MF;
+ const MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ VNInfo::Allocator *Alloc;
+
+ /// Seen - Bit vector of active entries in LiveOut, also used as a visited
+ /// set by findReachingDefs. One entry per basic block, indexed by block
+ /// number. This is kept as a separate bit vector because it can be cleared
+ /// quickly when switching live ranges.
+ BitVector Seen;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ /// LiveOut - Map each basic block where a live range is live out to the
+ /// live-out value and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap LiveOut;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // LI - The live range that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveInterval *LI;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill)
+ : LI(li), DomNode(node), Kill(kill), Value(0) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
+ /// of defs that can reach it.
+ ///
+ /// If only one def can reach Kill, all paths from the def to kill are added
+ /// to LI, and the function returns true.
+ ///
+ /// If multiple values can reach Kill, the blocks that need LI to be live in
+ /// are added to the LiveIn array, and the function returns false.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ bool findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA();
+
+ /// Add liveness as specified in the LiveIn vector.
+ void updateLiveIns();
+
+public:
+ LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *MF,
+ SlotIndexes*,
+ MachineDominatorTree*,
+ VNInfo::Allocator*);
+
+ /// calculate - Calculate the live range of a virtual register from its defs
+ /// and uses. LI must be empty with no values.
+ void calculate(LiveInterval *LI);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// extend - Extend the live range of LI to reach Kill.
+ ///
+ /// The existing values in LI must be live so they jointly dominate Kill. If
+ /// Kill is not dominated by a single existing value, PHI-defs are inserted
+ /// as required to preserve SSA form. If Kill is known to be dominated by a
+ /// single existing value, Alloc may be null.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveInterval *LI, unsigned Reg);
+
+ /// createDeadDefs - Create a dead def in LI for every def of LI->reg.
+ void createDeadDefs(LiveInterval *LI) {
+ createDeadDefs(LI, LI->reg);
+ }
+
+ /// extendToUses - Extend the live range of LI to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveInterval *LI, unsigned Reg);
+
+ /// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
+ void extendToUses(LiveInterval *LI) {
+ extendToUses(LI, LI->reg);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LI The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveInterval *LI,
+ MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LI, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..7793e96c3540
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,387 @@
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ if (VRM) {
+ VRM->grow();
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+ NewRegs.push_back(&LI);
+ return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI,
+ AliasAnalysis *aa) {
+ assert(DefMI && "Missing instruction");
+ ScannedRemattable = true;
+ if (!TII.isTriviallyReMaterializable(DefMI, aa))
+ return false;
+ Remattable.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = getParent().vni_begin(),
+ E = getParent().vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(VNI, DefMI, aa);
+ }
+ ScannedRemattable = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
+ if (!ScannedRemattable)
+ scanRemattable(aa);
+ return !Remattable.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) const {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = UseIdx.getRegSlot(true);
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ // We can't remat physreg uses, unless it is a constant.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+ continue;
+ return false;
+ }
+
+ LiveInterval &li = LIS.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove) {
+ assert(ScannedRemattable && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!Remattable.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ if (RM.OrigMI)
+ DefIdx = LIS.getInstructionIndex(RM.OrigMI);
+ else {
+ DefIdx = RM.ParentVNI->def;
+ RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ }
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ const TargetRegisterInfo &tri,
+ bool Late) {
+ assert(RM.OrigMI && "Invalid remat");
+ TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ Rematted.insert(RM.ParentVNI);
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getRegSlot();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead) {
+ MachineInstr *DefMI = 0, *UseMI = 0;
+
+ // Check that there is a single def and a single use.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+ E = MRI.reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI,
+ LIS.getInstructionIndex(DefMI),
+ LIS.getInstructionIndex(UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ return false;
+
+ DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+ if (!FoldMI)
+ return false;
+ DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg, 0);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled) {
+ SetVector<LiveInterval*,
+ SmallVector<LiveInterval*, 8>,
+ SmallPtrSet<LiveInterval*, 8> > ToShrink;
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty()) {
+ MachineInstr *MI = Dead.pop_back_val();
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+ DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
+
+ // Check for live intervals that may shrink
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ ReadsPhysRegs = true;
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if (MI->readsVirtualRegister(Reg) &&
+ (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+ LI.killedAt(Idx)))
+ ToShrink.insert(&LI);
+
+ // Remove defined value.
+ if (MOI->isDef()) {
+ if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ LI.removeValNo(VNI);
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
+ }
+ }
+ }
+
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ unsigned Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
+ }
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.back();
+ ToShrink.pop_back();
+ if (foldAsLoad(LI, Dead))
+ continue;
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (LI->reg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues(LIS);
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ continue;
+ ++NumFracRanges;
+ bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
+ DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+ SmallVector<LiveInterval*, 8> Dups(1, LI);
+ for (unsigned i = 1; i != NumComp; ++i) {
+ Dups.push_back(&createFrom(LI->reg));
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (IsOriginal)
+ VRM->setIsSplitFromReg(Dups.back()->reg, 0);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ }
+ ConEQ.Distribute(&Dups[0], MRI);
+ DEBUG({
+ for (unsigned i = 0; i != NumComp; ++i)
+ dbgs() << '\t' << *Dups[i] << '\n';
+ });
+ }
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ const MachineLoopInfo &Loops) {
+ VirtRegAuxInfo VRAI(MF, LIS, Loops);
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ LiveInterval &LI = **I;
+ if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+ DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << MRI.getRegClass(LI.reg)->getName() << '\n');
+ VRAI.CalculateWeightAndHint(LI);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 000000000000..0ef069f47827
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,154 @@
+//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
+ UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ Queries[i].clear();
+ }
+}
+
+void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].unify(VirtReg);
+ }
+ ++NumAssigned;
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].extract(VirtReg);
+ }
+ ++NumUnassigned;
+ DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg;
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes()))
+ return true;
+ return false;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+ unsigned RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (query(VirtReg, *Units).checkInterference())
+ return IK_VirtReg;
+
+ return IK_Free;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 000000000000..be11a8fa86ef
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,86 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << RC->getName() << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 000000000000..789eddc42774
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,826 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using a sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ dbgs() << *I << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+#endif
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(RegIdx);
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ SmallSet<unsigned,4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return 0;
+
+ PartDefRegs.insert(LastDefReg);
+ for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastDef->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ PartDefRegs.insert(DefReg);
+ for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs)
+ PartDefRegs.insert(*SubRegs);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PhysRegUse[*SubRegs] = MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return 0;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ MachineInstr *LastPartDef = 0;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ PhysRegUse[SubReg] = LastRefOrPartRef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PhysRegUse[*SS] = LastRefOrPartRef;
+ }
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PartUses.erase(*SS);
+ }
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+ Super = *SR;
+ HandlePhysRegKill(Super, 0);
+ }
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Live.insert(*SubRegs);
+ } else {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.back();
+ Defs.pop_back();
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ PHIJoins.clear();
+
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, 0, Defs);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+ else
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ (void)removed;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i).readsReg())
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ unsigned Reg,
+ MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ SmallSet<unsigned, 16> Defs, Kills;
+
+ MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+ for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+ // Record the def of the PHI node.
+ Defs.insert(BBI->getOperand(0).getReg());
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+ }
+
+ // Record all vreg defs and kills of all instructions in SuccBB.
+ for (; BBI != BBE; ++BBI) {
+ for (MachineInstr::mop_iterator I = BBI->operands_begin(),
+ E = BBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isDef())
+ Defs.insert(I->getReg());
+ else if (I->isKill())
+ Kills.insert(I->getReg());
+ }
+ }
+ }
+
+ // Update info for all live variables
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // If the Defs is defined in the successor it can't be live in BB.
+ if (Defs.count(Reg))
+ continue;
+
+ // If the register is either killed in or live through SuccBB it's also live
+ // through BB.
+ VarInfo &VI = getVarInfo(Reg);
+ if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..352ef942591f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,356 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+ MachineBasicBlock *Entry = Fn.begin();
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 000000000000..898e165feeab
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,1181 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+ AddressTaken(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol() const {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+ return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) + "_" +
+ Twine(getNumber()));
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
+ ++I;
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi MI cannot be inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+ const_iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+ // Skip over end-of-block dbg_value instructions.
+ const_instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return "(null)";
+}
+
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += (Twine("BB") + Twine(getNumber())).str();
+ return Name;
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
+ OS << "BB#" << getNumber() << ": ";
+
+ const char *Comma = "";
+ if (const BasicBlock *LBB = getBasicBlock()) {
+ OS << Comma << "derived from LLVM BB ";
+ WriteAsOperand(OS, LBB, /*PrintType=*/false);
+ Comma = ", ";
+ }
+ if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment)
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+
+ OS << '\n';
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Live Ins:";
+ for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OS << ' ' << PrintReg(*I, TRI);
+ OS << '\n';
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " BB#" << (*PI)->getNumber();
+ OS << '\n';
+ }
+
+ for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
+ OS << '\t';
+ if (I->isInsideBundle())
+ OS << " * ";
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
+ OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
+ OS << '\n';
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ std::vector<unsigned>::iterator I =
+ std::find(LiveIns.begin(), LiveIns.end(), Reg);
+ if (I != LiveIns.end())
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch. First we have to locate the
+ // only non-landing-pad successor, as that is the fallthrough block.
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ assert(!TBB && "Found more than one non-landing-pad successor!");
+ TBB = *SI;
+ }
+
+ // If there is no non-landing-pad successor, the block has no
+ // fall-through edges to be concerned with.
+ if (!TBB)
+ return;
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
+ return;
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, 0, Cond, dl);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ // Walk through the successors and find the successor which is not
+ // a landing pad and is not the conditional branch destination (in TBB)
+ // as the fallthrough successor.
+ MachineBasicBlock *FallthroughBB = 0;
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad() || *SI == TBB)
+ continue;
+ assert(!FallthroughBB && "Found more than one fallthrough successor.");
+ FallthroughBB = *SI;
+ }
+ if (!FallthroughBB && canFallThrough()) {
+ // We fallthrough to the same basic block as the conditional jump
+ // targets. Remove the conditional jump, leaving unconditional
+ // fallthrough.
+ // FIXME: This does not seem like a reasonable pattern to support, but it
+ // has been seen in the wild coming out of degenerate ARM test cases.
+ TII->RemoveBranch(*this);
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ return;
+ }
+
+ // The block has a fallthrough conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ return;
+ }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ } else if (!isLayoutSuccessor(FallthroughBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
+ }
+ }
+ }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+ // If we see non-zero value for the first time it means we actually use Weight
+ // list, so we fill all Weights with 0's.
+ if (weight != 0 && Weights.empty())
+ Weights.resize(Successors.size());
+
+ if (weight != 0 || !Weights.empty())
+ Weights.push_back(weight);
+
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+ }
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ if (Old == New)
+ return;
+
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
+ }
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
+
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ uint32_t Weight = 0;
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+
+ addSuccessor(Succ, Weight);
+ fromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
+ fromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == fromMBB)
+ MO.setMBB(this);
+ }
+ }
+}
+
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ MachineFunction::iterator Fallthrough = this;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(Fallthrough))
+ return false;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicated check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return empty() || !back().isBarrier() || TII->isPredicated(&back());
+ }
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isLandingPad())
+ return NULL;
+
+ MachineFunction *MF = getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+ return NULL;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "Splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
+ if (LIS)
+ LIS->insertMBBInMaps(NMBB);
+ else if (Indexes)
+ Indexes->insertMBBInMaps(NMBB);
+
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<unsigned, 4> KilledRegs;
+ if (LV)
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0 ||
+ !OI->isUse() || !OI->isKill() || OI->isUndef())
+ continue;
+ unsigned Reg = OI->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ LV->getVarInfo(Reg).removeKill(MI)) {
+ KilledRegs.push_back(Reg);
+ DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ OI->setIsKill(false);
+ }
+ }
+ }
+
+ SmallVector<unsigned, 4> UsedRegs;
+ if (LIS) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0)
+ continue;
+
+ unsigned Reg = OI->getReg();
+ if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+
+ // If updateTerminator() removes instructions, we need to remove them from
+ // SlotIndexes.
+ SmallVector<MachineInstr*, 4> Terminators;
+ if (Indexes) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ Terminators.push_back(I);
+ }
+
+ updateTerminator();
+
+ if (Indexes) {
+ SmallVector<MachineInstr*, 4> NewTerminators;
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ NewTerminators.push_back(I);
+
+ for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
+ E = Terminators.end(); I != E; ++I) {
+ if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
+ NewTerminators.end())
+ Indexes->removeMachineInstrFromMaps(*I);
+ }
+ }
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+
+ if (Indexes) {
+ for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
+ I != E; ++I) {
+ // Some instructions may have been moved to NMBB by updateTerminator(),
+ // so we first remove any instruction that already has an index.
+ if (Indexes->hasIndex(I))
+ Indexes->removeMachineInstrFromMaps(I);
+ Indexes->insertMachineInstrInMaps(I);
+ }
+ }
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ // Inherit live-ins from the successor
+ for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
+ E = Succ->livein_end(); I != E; ++I)
+ NMBB->addLiveIn(*I);
+
+ // Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ unsigned Reg = KilledRegs.pop_back_val();
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LV->getVarInfo(Reg).Kills.push_back(I);
+ DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
+ LV->addNewBlock(NMBB, this, Succ);
+ }
+
+ if (LIS) {
+ // After splitting the edge and updating SlotIndexes, live intervals may be
+ // in one of two situations, depending on whether this block was the last in
+ // the function. If the original block was the last in the function, all live
+ // intervals will end prior to the beginning of the new split block. If the
+ // original block was not at the end of the function, all live intervals will
+ // extend to the end of the new split block.
+
+ bool isLastMBB =
+ llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+ SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+ SlotIndex PrevIndex = StartIndex.getPrevSlot();
+ SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+ // Find the registers used from NMBB in PHIs in Succ.
+ SmallSet<unsigned, 8> PHISrcRegs;
+ for (MachineBasicBlock::instr_iterator
+ I = Succ->instr_begin(), E = Succ->instr_end();
+ I != E && I->isPHI(); ++I) {
+ for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+ if (I->getOperand(ni+1).getMBB() == NMBB) {
+ MachineOperand &MO = I->getOperand(ni);
+ unsigned Reg = MO.getReg();
+ PHISrcRegs.insert(Reg);
+ if (MO.isUndef())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "PHI sources should be live out of their predecessors.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ }
+ }
+ }
+
+ MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.liveAt(PrevIndex))
+ continue;
+
+ bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+ if (isLiveOut && isLastMBB) {
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "LiveInterval should have VNInfo where it is live.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ } else if (!isLiveOut && !isLastMBB) {
+ LI.removeRange(StartIndex, EndIndex);
+ }
+ }
+
+ // Update all intervals for registers whose uses may have been modified by
+ // updateTerminator().
+ LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+ }
+
+ if (MachineDominatorTree *MDT =
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
+
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+ // Removing the first instruction in a bundle.
+ if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+ MI->unbundleFromSucc();
+ // Removing the last instruction in a bundle.
+ if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+ MI->unbundleFromPred();
+ // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+ // are already fine.
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+ unbundleSingleMI(I);
+ return Insts.erase(I);
+}
+
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+ unbundleSingleMI(MI);
+ MI->clearFlag(MachineInstr::BundledPred);
+ MI->clearFlag(MachineInstr::BundledSucc);
+ return Insts.remove(MI);
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ // Set the bundle flags when inserting inside a bundle.
+ if (I != instr_end() && I->isBundledWithPred()) {
+ MI->setFlag(MachineInstr::BundledPred);
+ MI->setFlag(MachineInstr::BundledSucc);
+ }
+ return Insts.insert(I, MI);
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
+ --I;
+ if (!I->isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ replaceSuccessor(Old, New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
+/// null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ // The values of DestA and DestB frequently come from a call to the
+ // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+ // values from there.
+ //
+ // 1. If both DestA and DestB are null, then the block ends with no branches
+ // (it falls through to its successor).
+ // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // with only an unconditional branch.
+ // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // with a conditional branch that falls through to a successor (DestB).
+ // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // conditional branch followed by an unconditional branch. DestA is the
+ // 'true' destination and DestB is the 'false' destination.
+
+ bool Changed = false;
+
+ MachineFunction::iterator FallThru =
+ llvm::next(MachineFunction::iterator(this));
+
+ if (DestA == 0 && DestB == 0) {
+ // Block falls through to successor.
+ DestA = FallThru;
+ DestB = FallThru;
+ } else if (DestA != 0 && DestB == 0) {
+ if (isCond)
+ // Block ends in conditional jump that falls through to successor.
+ DestB = FallThru;
+ } else {
+ assert(DestA && DestB && isCond &&
+ "CFG in a bad state. Cannot correct CFG edges");
+ }
+
+ // Remove superfluous edges. I.e., those which aren't destinations of this
+ // basic block, duplicate edges, or landing pads.
+ SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ while (SI != succ_end()) {
+ const MachineBasicBlock *MBB = *SI;
+ if (!SeenMBBs.insert(MBB) ||
+ (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ // This is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ Changed = true;
+ } else {
+ ++SI;
+ }
+ }
+
+ return Changed;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
+ DebugLoc DL;
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
+ return DL;
+}
+
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
+ if (Weights.empty())
+ return 0;
+
+ return *getWeightIterator(Succ);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+///
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+ unsigned Reg, MachineInstr *MI,
+ unsigned Neighborhood) {
+ unsigned N = Neighborhood;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Start by searching backwards from MI, looking for kills, reads or defs.
+
+ MachineBasicBlock::iterator I(MI);
+ // If this is the first insn in the block, don't search backwards.
+ if (I != MBB->begin()) {
+ do {
+ --I;
+
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.Defines)
+ // Outputs happen after inputs so they take precedence if both are
+ // present.
+ return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+
+ if (Analysis.Kills || Analysis.Clobbers)
+ // Register killed, so isn't live.
+ return LQR_Dead;
+
+ else if (Analysis.ReadsOverlap)
+ // Defined or read without a previous kill - live.
+ return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
+
+ } while (I != MBB->begin() && --N > 0);
+ }
+
+ // Did we get to the start of the block?
+ if (I == MBB->begin()) {
+ // If so, the register's state is definitely defined by the live-in state.
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
+ RAI.isValid(); ++RAI) {
+ if (MBB->isLiveIn(*RAI))
+ return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
+ }
+
+ return LQR_Dead;
+ }
+
+ N = Neighborhood;
+
+ // Try searching forwards from MI, looking for reads or defs.
+ I = MachineBasicBlock::iterator(MI);
+ // If this is the last insn in the block, don't search forwards.
+ if (I != MBB->end()) {
+ for (++I; I != MBB->end() && N > 0; ++I, --N) {
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.ReadsOverlap)
+ // Used, therefore must have been live.
+ return (Analysis.Reads) ?
+ LQR_Live : LQR_OverlappingLive;
+
+ else if (Analysis.Clobbers || Analysis.Defines)
+ // Defined (but not read) therefore cannot have been live.
+ return LQR_Dead;
+ }
+ }
+
+ // At this point we have no idea of the liveness of the register.
+ return LQR_Unknown;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..070daf2e2ba2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -0,0 +1,61 @@
+//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequencyInfo::ID = 0;
+
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+ MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {
+ delete MBFI;
+}
+
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI->doFunction(&F, &MBPI);
+ return false;
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency MachineBlockFrequencyInfo::
+getBlockFreq(const MachineBasicBlock *MBB) const {
+ return MBFI->getBlockFreq(MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..cd948e24a6b2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1165 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absence of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
+///
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
+class BlockChain {
+ /// \brief The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// \brief A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// \brief Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// \brief Iterator over blocks within the chain.
+ typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+
+ /// \brief Beginning of blocks within the chain.
+ iterator begin() { return Blocks.begin(); }
+
+ /// \brief End of blocks within the chain.
+ iterator end() { return Blocks.end(); }
+
+ /// \brief Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB);
+ assert(!Blocks.empty());
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB]);
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin());
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+ BI != BE; ++BI) {
+ Blocks.push_back(*BI);
+ assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+ BlockToChain[*BI] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// \brief Dump the blocks in this chain.
+ void dump() LLVM_ATTRIBUTE_USED {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ (*I)->dump();
+ }
+#endif // NDEBUG
+
+ /// \brief Count of predecessors within the loop currently being processed.
+ ///
+ /// This count is updated at each loop we process to represent the number of
+ /// in-loop predecessors of this chain.
+ unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// \brief A typedef for a block filter set.
+ typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+ /// \brief A handle to the loop info.
+ const MachineLoopInfo *MLI;
+
+ /// \brief A handle to the target's instruction info.
+ const TargetInstrInfo *TII;
+
+ /// \brief A handle to the target's lowering info.
+ const TargetLoweringBase *TLI;
+
+ /// \brief Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// \brief Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+ void markChainSuccessors(BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+ void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void buildCFGChains(MachineFunction &F);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber()
+ << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber();
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+ BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+ CBI != CBE; ++CBI) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+ SE = (*CBI)->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*SuccChain.begin());
+ }
+ }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(4, 5); // 80%
+
+ MachineBasicBlock *BestSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we manually compute probabilities using the edge
+ // weights. This is suboptimal as it means that the somewhat subtle
+ // definition of edge weight semantics is encoded here as well. We should
+ // improve the MBPI interface to efficiently support query patterns such as
+ // this.
+ uint32_t BestWeight = 0;
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ continue;
+ }
+ if (*SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+ // Only consider successors which are either "hot", or wouldn't violate
+ // any CFG constraints.
+ if (SuccChain.LoopPredecessors != 0) {
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ continue;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more important
+ // predecessor.
+ BlockFrequency CandidateEdgeFreq
+ = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ bool BadCFGConflict = false;
+ for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+ PE = (*SI)->pred_end();
+ PI != PE; ++PI) {
+ if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+ BlockToChain[*PI] == &Chain)
+ continue;
+ BlockFrequency PredEdgeFreq
+ = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ if (PredEdgeFreq >= CandidateEdgeFreq) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(*SI)
+ << " -> non-cold CFG conflict\n");
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestWeight >= SuccWeight)
+ continue;
+ BestSucc = *SI;
+ BestWeight = SuccWeight;
+ }
+ return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+ const BlockChain &PlacedChain;
+ const BlockToChainMapType &BlockToChain;
+
+public:
+ IsBlockPlaced(const BlockChain &PlacedChain,
+ const BlockToChainMapType &BlockToChain)
+ : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+ bool operator()(MachineBasicBlock *BB) const {
+ return BlockToChain.lookup(BB) == &PlacedChain;
+ }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ IsBlockPlaced(Chain, BlockToChain)),
+ WorkList.end());
+
+ MachineBasicBlock *BestBlock = 0;
+ BlockFrequency BestFreq;
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+ WBE = WorkList.end();
+ WBI != WBE; ++WBI) {
+ BlockChain &SuccChain = *BlockToChain[*WBI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*WBI)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
+ << " (freq)\n");
+ if (BestBlock && BestFreq >= CandidateFreq)
+ continue;
+ BestBlock = *WBI;
+ BestFreq = CandidateFreq;
+ }
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[I]->begin();
+ }
+ }
+ return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+ MachineBasicBlock *BB,
+ BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+ MachineBasicBlock *LoopHeaderBB = BB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ BB = *llvm::prior(Chain.end());
+ for (;;) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ assert(*llvm::prior(Chain.end()) == BB);
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out LoopPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.LoopPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+ << " to " << getBlockNum(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *llvm::prior(Chain.end());
+ }
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// Look for a block which is strictly better than the loop header for laying
+/// out at the top of the loop. This looks for one and only one pattern:
+/// a latch block with no conditional exit. This block will cause a conditional
+/// jump around it or will be the bottom of the loop if we lay it out in place,
+/// but if it it doesn't end up at the bottom of the loop for any reason,
+/// rotation alone won't fix it. Because such a block will always result in an
+/// unconditional jump (for the backedge) rotating it in front of the loop
+/// header is always profitable.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Check that the header hasn't been fused with a preheader block due to
+ // crazy branches. If it has, we need to start with the header at the top to
+ // prevent pulling the preheader into the loop body.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ DEBUG(dbgs() << "Finding best loop top for: "
+ << getBlockName(L.getHeader()) << "\n");
+
+ BlockFrequency BestPredFreq;
+ MachineBasicBlock *BestPred = 0;
+ for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
+ PE = L.getHeader()->pred_end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
+ << Pred->succ_size() << " successors, "
+ << MBFI->getBlockFreq(Pred) << " freq\n");
+ if (Pred->succ_size() > 1)
+ continue;
+
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ if (!BestPred || PredFreq > BestPredFreq ||
+ (!(PredFreq < BestPredFreq) &&
+ Pred->isLayoutSuccessor(L.getHeader()))) {
+ BestPred = Pred;
+ BestPredFreq = PredFreq;
+ }
+ }
+
+ // If no direct predecessor is fine, just use the loop header.
+ if (!BestPred)
+ return L.getHeader();
+
+ // Walk backwards through any straight line of predecessors.
+ while (BestPred->pred_size() == 1 &&
+ (*BestPred->pred_begin())->succ_size() == 1 &&
+ *BestPred->pred_begin() != L.getHeader())
+ BestPred = *BestPred->pred_begin();
+
+ DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ return BestPred;
+}
+
+
+/// \brief Find the best loop exiting block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return 0;
+
+ BlockFrequency BestExitEdgeFreq;
+ unsigned BestExitLoopDepth = 0;
+ MachineBasicBlock *ExitingBB = 0;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineLoop::block_iterator I = L.block_begin(),
+ E = L.block_end();
+ I != E; ++I) {
+ BlockChain &Chain = *BlockToChain[*I];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an analyzable branch.
+ if (*I != *llvm::prior(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ bool HasLoopingSucc = false;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we use the internal weights and manually compute the
+ // probabilities to avoid quadratic behavior.
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+ for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+ SE = (*I)->succ_end();
+ SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ if (*SI == *I)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain) {
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (chain conflict)\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+ if (LoopBlockSet.count(*SI)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ HasLoopingSucc = true;
+ continue;
+ }
+
+ unsigned SuccLoopDepth = 0;
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ SuccLoopDepth = ExitLoop->getLoopDepth();
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " [L:" << SuccLoopDepth
+ << "] (" << ExitEdgeFreq << ")\n");
+ // Note that we slightly bias this toward an existing layout successor to
+ // retain incoming order in the absence of better information.
+ // FIXME: Should we bias this more strongly? It's pretty weak.
+ if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+ ExitEdgeFreq > BestExitEdgeFreq ||
+ ((*I)->isLayoutSuccessor(*SI) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = *I;
+ }
+ }
+
+ // Restore the old exiting state, no viable looping successor was found.
+ if (!HasLoopingSucc) {
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ continue;
+ }
+ }
+ // Without a candidate exiting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return 0;
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return 0;
+
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ return ExitingBB;
+}
+
+/// \brief Attempt to rotate an exiting block to the bottom of the loop.
+///
+/// Once we have built a chain, try to rotate it to line up the hot exit block
+/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
+/// branches. For example, if the loop has fallthrough into its header and out
+/// of its bottom already, don't rotate it.
+void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
+ MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet) {
+ if (!ExitingBB)
+ return;
+
+ MachineBasicBlock *Top = *LoopChain.begin();
+ bool ViableTopFallthrough = false;
+ for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
+ PE = Top->pred_end();
+ PI != PE; ++PI) {
+ BlockChain *PredChain = BlockToChain[*PI];
+ if (!LoopBlockSet.count(*PI) &&
+ (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
+ ViableTopFallthrough = true;
+ break;
+ }
+ }
+
+ // If the header has viable fallthrough, check whether the current loop
+ // bottom is a viable exiting block. If so, bail out as rotating will
+ // introduce an unnecessary branch.
+ if (ViableTopFallthrough) {
+ MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
+ for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
+ SE = Bottom->succ_end();
+ SI != SE; ++SI) {
+ BlockChain *SuccChain = BlockToChain[*SI];
+ if (!LoopBlockSet.count(*SI) &&
+ (!SuccChain || *SI == *SuccChain->begin()))
+ return;
+ }
+ }
+
+ BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
+ ExitingBB);
+ if (ExitIt == LoopChain.end())
+ return;
+
+ std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+ MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+ // First check to see if there is an obviously preferable top block for the
+ // loop. This will default to the header, but may end up as one of the
+ // predecessors to the header if there is one which will result in strictly
+ // fewer branches in the loop body.
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+
+ // If we selected just the header for the loop top, look for a potentially
+ // profitable exit block in the event that rotating the loop can eliminate
+ // branches by placing an exit edge at the bottom.
+ MachineBasicBlock *ExitingBB = 0;
+ if (LoopTop == L.getHeader())
+ ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
+
+ BlockChain &LoopChain = *BlockToChain[LoopTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.LoopPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
+ for (MachineLoop::block_iterator BI = L.block_begin(),
+ BE = L.block_end();
+ BI != BE; ++BI) {
+ BlockChain &Chain = *BlockToChain[*BI];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.LoopPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+ BCI != BCE; ++BCI) {
+ dbgs() << " ... " << getBlockName(*BCI) << "\n";
+ if (!LoopBlockSet.erase(*BCI)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+ LBE = LoopBlockSet.end();
+ LBI != LBE; ++LBI)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*LBI) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = FI;
+ BlockChain *Chain
+ = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineBasicBlock *NextBB = NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, 0);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Build any loop-based chains.
+ for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+ ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain &Chain = *BlockToChain[BB];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain)
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ BlockChain &FunctionChain = *BlockToChain[&F.front()];
+ buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+ typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ FunctionBlockSet.insert(FI);
+
+ for (BlockChain::iterator BCI = FunctionChain.begin(),
+ BCE = FunctionChain.end();
+ BCI != BCE; ++BCI)
+ if (!FunctionBlockSet.erase(*BCI)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+ FBE = FunctionBlockSet.end();
+ FBI != FBE; ++FBI)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(*FBI) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F.begin();
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(*BI) << "\n");
+ if (InsertPos != MachineFunction::iterator(*BI))
+ F.splice(InsertPos, *BI);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (BI == FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
+ PrevBB->updateTerminator();
+ }
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+ F.back().updateTerminator();
+
+ // Walk through the backedges of the function now that we have fully laid out
+ // the basic blocks and align the destination of each backedge. We don't rely
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
+ if (F.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ return;
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
+
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, other predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
+ }
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = F.getTarget().getInstrInfo();
+ TLI = F.getTarget().getTargetLowering();
+ assert(BlockToChain.empty());
+
+ buildCFGChains(F);
+
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absence of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+ Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+ : NumUncondBranches;
+ Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+ : UncondBranchTakenFreq;
+ for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end();
+ SI != SE; ++SI) {
+ // Skip if this successor is a fallthrough.
+ if (I->isLayoutSuccessor(*SI))
+ continue;
+
+ BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..ae70912b6c69
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,126 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+void MachineBranchProbabilityInfo::anchor() { }
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+ // First we compute the sum with 64-bits of precision, ensuring that cannot
+ // overflow by bounding the number of weights considered. Hopefully no one
+ // actually needs 2^32 successors.
+ assert(MBB->succ_size() < UINT32_MAX);
+ uint64_t Sum = 0;
+ Scale = 1;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ Sum += Weight;
+ }
+
+ // If the computed sum fits in 32-bits, we're done.
+ if (Sum <= UINT32_MAX)
+ return Sum;
+
+ // Otherwise, compute the scale necessary to cause the weights to fit, and
+ // re-sum with that scale applied.
+ assert((Sum / UINT32_MAX) < UINT32_MAX);
+ Scale = (Sum / UINT32_MAX) + 1;
+ Sum = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ Sum += Weight / Scale;
+ }
+ assert(Sum <= UINT32_MAX);
+ return Sum;
+}
+
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ uint32_t Weight = Src->getSuccWeight(Dst);
+ if (!Weight)
+ return DEFAULT_WEIGHT;
+ return Weight;
+}
+
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+ uint32_t MaxWeight = 0;
+ MachineBasicBlock *MaxSucc = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = *I;
+ }
+ }
+
+ if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
+ return MaxSucc;
+
+ return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t Scale = 1;
+ uint32_t D = getSumForBlock(Src, Scale);
+ uint32_t N = getEdgeWeight(Src, Dst) / Scale;
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 000000000000..61d8d384cd38
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,661 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ MachineRegisterInfo *MRI;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ virtual void releaseMemory() {
+ ScopeMap.clear();
+ Exps.clear();
+ }
+
+ private:
+ const unsigned LookAheadLimit;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
+ DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+ ScopedHTType VNT;
+ SmallVector<MachineInstr*, 64> Exps;
+ unsigned CurrVN;
+
+ bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const;
+ bool isCSECandidate(MachineInstr *MI);
+ bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI);
+ void EnterScope(MachineBasicBlock *MBB);
+ void ExitScope(MachineBasicBlock *MBB);
+ bool ProcessBlock(MachineBasicBlock *MBB);
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
+ bool PerformCSE(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // Only coalesce single use copies. This ensure the copy will be
+ // deleted.
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI->isCopy())
+ continue;
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ continue;
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ // Reached end of block, register is obviously dead.
+ return true;
+
+ bool SeenDef = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ // Found a use!
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+
+ --LookAheadLeft;
+ ++I;
+ }
+ return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const{
+ // First, add all uses to PhysRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
+ PhysDefs.push_back(Reg);
+ }
+
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+
+ return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
+ MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I != EE && I->isDebugValue())
+ ++I;
+
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
+ if (I == E)
+ return true;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+ return false;
+
+ // Ignore copies.
+ if (MI->isCopyLike())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ if (MI->mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+ return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI) {
+ // FIXME: Heuristics that works around the lack the live range splitting.
+
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
+ if (MI->isAsCheapAsAMove()) {
+ MachineBasicBlock *CSBB = CSMI->getParent();
+ MachineBasicBlock *BB = MI->getParent();
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
+ return false;
+ }
+
+ // Heuristics #2: If the expression doesn't not use a vr and the only use
+ // of the redundant computation are copies, do not cse.
+ bool HasVRegUse = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ HasVRegUse = true;
+ break;
+ }
+ }
+ if (!HasVRegUse) {
+ bool HasNonCopyUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ // Ignore copies.
+ if (!Use->isCopyLike()) {
+ HasNonCopyUse = true;
+ break;
+ }
+ }
+ if (!HasNonCopyUse)
+ return false;
+ }
+
+ // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+ // it unless the defined value is already used in the BB of the new use.
+ bool HasPHI = false;
+ SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ HasPHI |= Use->isPHI();
+ CSBBs.insert(Use->getParent());
+ }
+
+ if (!HasPHI)
+ return true;
+ return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ ScopeType *Scope = new ScopeType(VNT);
+ ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+ assert(SI != ScopeMap.end());
+ delete SI->second;
+ ScopeMap.erase(SI);
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isCSECandidate(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Look for trivial copy coalescing opportunities.
+ if (PerformTrivialCoalescing(MI, MBB)) {
+ Changed = true;
+
+ // After coalescing MI itself may become a copy.
+ if (MI->isCopyLike())
+ continue;
+ FoundCSE = VNT.count(MI);
+ }
+ }
+
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ // It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
+ SmallSet<unsigned, 8> PhysRefs;
+ SmallVector<unsigned, 2> PhysDefs;
+ bool PhysUseDef = false;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+ PhysDefs, PhysUseDef)) {
+ FoundCSE = false;
+
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
+ }
+
+ if (!FoundCSE) {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ DEBUG(dbgs() << "Examining: " << *MI);
+ DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+ // Check if it's profitable to perform this CSE.
+ bool DoCSE = true;
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned OldReg = MO.getReg();
+ unsigned NewReg = CSMI->getOperand(i).getReg();
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
+ continue;
+ }
+
+ assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+ TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ "Do not CSE physical register defs!");
+
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ // Don't perform CSE if the result of the old instruction cannot exist
+ // within the register class of the new instruction.
+ const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
+ if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+ --NumDefs;
+ }
+
+ // Actually perform the elimination.
+ if (DoCSE) {
+ for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
+ MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+ MRI->clearKillFlags(CSEPairs[i].second);
+ }
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEs;
+ if (!PhysRefs.empty())
+ ++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
+ } else {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ }
+ CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
+ }
+
+ return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ CurrVN = 0;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(Node);
+ do {
+ Node = WorkList.pop_back_val();
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ OpenChildren[Node] = NumChildren;
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ // Now perform CSE.
+ bool Changed = false;
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
+ EnterScope(MBB);
+ Changed |= ProcessBlock(MBB);
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren);
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ return PerformCSE(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 000000000000..81b49784c052
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..dc8a2241c7e1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,334 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SmallVector<unsigned, 4> DestList;
+ typedef DenseMap<unsigned, DestList> SourceMap;
+
+ void SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+ bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ void removeCopy(MachineInstr *MI);
+ };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SourceMap::iterator SI = SrcMap.find(*AI);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+ const MachineInstr *MI) {
+ const MachineBasicBlock *MBB = CopyMI->getParent();
+ if (MI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CopyMI;
+ MachineBasicBlock::const_iterator E = MBB->end();
+ MachineBasicBlock::const_iterator E2 = MI;
+
+ ++I;
+ while (I != E && I != E2) {
+ if (I->hasUnmodeledSideEffects() || I->isCall() ||
+ I->isTerminator())
+ return false;
+ ++I;
+ }
+ return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al = mov cl
+/// But not
+/// ecx = mov eax
+/// al = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+ const TargetRegisterInfo *TRI) {
+ unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+ if (Def == SrcSrc)
+ return true;
+ if (TRI->isSubRegister(SrcSrc, Def)) {
+ unsigned SrcDef = CopyMI->getOperand(0).getReg();
+ unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+ if (!SubIdx)
+ return false;
+ return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+ }
+
+ return false;
+}
+
+// Remove MI from the function because it has been determined it is dead.
+// Turn it into a noop KILL instruction if it has super-register liveness
+// adjustments.
+void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
+ if (MI->getNumOperands() == 2)
+ MI->eraseFromParent();
+ else
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+ DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
+ DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
+ SourceMap SrcMap; // Src -> Def map
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Def) ||
+ TargetRegisterInfo::isVirtualRegister(Src))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+ if (CI != AvailCopyMap.end()) {
+ MachineInstr *CopyMI = CI->second;
+ if (!MRI->isReserved(Def) &&
+ (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ isNopCopy(CopyMI, Def, Src, TRI)) {
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX<kill>
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // Also avoid eliminating a copy from reserved registers unless the
+ // definition is proven not clobbered. e.g.
+ // %RSP<def> = COPY %RAX
+ // CALL
+ // %RAX<def> = COPY %RSP
+
+ // Clear any kills of Def between CopyMI and MI. This extends the
+ // live range.
+ for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+ I->clearRegisterKills(Def, TRI);
+
+ removeCopy(MI);
+ Changed = true;
+ ++NumDeletes;
+ continue;
+ }
+ }
+
+ // If Src is defined by a previous copy, it cannot be eliminated.
+ for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
+ CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+
+ // Copy is now a candidate for deletion.
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Src' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+ // Remember Def is defined by the copy.
+ // ... Make sure to clear the def maps of aliases first.
+ for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+ }
+ CopyMap[Def] = MI;
+ AvailCopyMap[Def] = MI;
+ for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+ SrcMap[Src].end()) {
+ SrcMap[Src].push_back(Def);
+ }
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ int RegMaskOpNum = -1;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ RegMaskOpNum = i;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. It is possible to use the register mask to
+ // prune the available copies, but treat it like a basic block boundary for
+ // now.
+ if (RegMaskOpNum >= 0) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ unsigned Reg = (*DI)->getOperand(0).getReg();
+ if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ continue;
+ removeCopy(*DI);
+ Changed = true;
+ ++NumDeletes;
+ }
+
+ // Clear all data structures as if we were beginning a new basic block.
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
+ continue;
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+
+ // No longer defined by a copy.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+ }
+
+ // If 'Reg' is previously source of a copy, it is no longer available for
+ // copy propagation.
+ SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+ }
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
+ removeCopy(*DI);
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= CopyPropagateBlock(*I);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 000000000000..04c8ecbf9bdc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 000000000000..04321f329282
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,898 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
+ TM.Options.RealignStack);
+ if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+ getStackAlignment(AttributeSet::FunctionIndex));
+ ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
+ Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ TM.getTargetLowering()->getPrefFunctionAlignment());
+ FunctionNumber = FunctionNum;
+ JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+ // Don't call destructors on MachineInstr and MachineOperand. All of their
+ // memory comes from the BumpPtrAllocator which is about to be purged.
+ //
+ // Do call MachineBasicBlock destructors, it contains std::vectors.
+ for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+ I->Insts.clearAndLeakNodesUnsafely();
+
+ InstructionRecycler.clear(Allocator);
+ OperandRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator)
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, MCID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Strip it for parts. The operand array and the MI object itself are
+ // independently recyclable.
+ if (MI->Operands)
+ deallocateOperandArray(MI->CapOperands, MI->Operands);
+ // Don't call ~MachineInstr() which must be trivial anyway because
+ // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+ // destructors.
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo, Ranges);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, uint64_t Size) {
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+ return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isLoad())
+ ++Num;
+
+ // Allocate a new array and populate it with the load information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isLoad()) {
+ if (!(*I)->isStore())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the store flag.
+ MachineMemOperand *JustLoad =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOStore,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustLoad;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isStore())
+ ++Num;
+
+ // Allocate a new array and populate it with the store information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isStore()) {
+ if (!(*I)->isLoad())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the load flag.
+ MachineMemOperand *JustStore =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustStore;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineFunction::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineFunction::getName() const {
+ assert(getFunction() && "No function!");
+ return getFunction()->getName();
+}
+
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ OS << "# Machine code for function " << getName() << ": ";
+ if (RegInfo) {
+ OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+ if (!RegInfo->tracksLiveness())
+ OS << ", not tracking liveness";
+ }
+ OS << '\n';
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ OS << PrintReg(I->first, TRI);
+ if (I->second)
+ OS << " in " << PrintReg(I->second, TRI);
+ if (llvm::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+
+ for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+ OS << '\n';
+ BB->print(OS, Indexes);
+ }
+
+ OS << "\n# End machine code for function " << getName() << ".\n\n";
+}
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getName().str() + "' function";
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName());
+#else
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName(), true);
+#else
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = getRegInfo();
+ unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ assert(JumpTableInfo && "No jump tables");
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+ const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+
+ const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+ MAI.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// ensureMaxAlignment - Make sure the function is at least Align bytes
+/// aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+ if (!TFI.isStackRealignable() || !RealignOption)
+ assert(Align <= TFI.getStackAlignment() &&
+ "For targets without stack realignment, Align is out of limit!");
+ if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+ unsigned StackAlign) {
+ if (!ShouldClamp || Align <= StackAlign)
+ return Align;
+ DEBUG(dbgs() << "Warning: requested alignment " << Align
+ << " exceeds the stack alignment " << StackAlign
+ << " when stack realignment is off" << '\n');
+ return StackAlign;
+}
+
+/// CreateStackObject - Create a new statically sized stack object, returning
+/// a nonnegative identifier to represent it.
+///
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+ assert(Size != 0 && "Cannot allocate zero size stack objects!");
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
+ Alloca));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ assert(Index >= 0 && "Bad frame index!");
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// CreateSpillStackObject - Create a new statically sized stack object that
+/// represents a spill slot, returning a nonnegative identifier to represent
+/// it.
+///
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+ unsigned Alignment) {
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ CreateStackObject(Size, Alignment, true, false);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+/// variable sized object has been created. This must be created whenever a
+/// variable sized object is created, whether or not the index returned is
+/// actually used.
+///
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+ HasVarSizedObjects = true;
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+ ensureMaxAlignment(Alignment);
+ return (int)Objects.size()-NumFixedObjects-1;
+}
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = TFI.getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Align, TFI.getStackAlignment());
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/ false,
+ /*NeedSP*/ false,
+ /*Alloca*/ 0));
+ return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+ assert(MBB && "MBB must be valid");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB must be part of a MachineFunction");
+ const TargetMachine &TM = MF->getTarget();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ BV.set(*CSR);
+
+ // The entry MBB always has all CSRs pristine.
+ if (MBB == &MF->front())
+ return BV;
+
+ // On other MBBs the saved CSRs are not pristine.
+ const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I)
+ BV.reset(I->getReg());
+
+ return BV;
+}
+
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ case MachineJumpTableInfo::EK_Inline:
+ return 0;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64);
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32);
+ case MachineJumpTableInfo::EK_Inline:
+ return 1;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
+///
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " jt#" << i << ": ";
+ for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+ OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ }
+
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+void MachineConstantPoolValue::anchor() { }
+
+Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getRelocationInfo();
+ return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+ for (DenseSet<MachineConstantPoolValue*>::iterator I =
+ MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+ I != E; ++I)
+ delete *I;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+ const DataLayout *TD) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
+ // For now, only support constants with the same size.
+ uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
+ StoreSize > 128)
+ return false;
+
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // DataLayout.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(A), TD);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(A), TD);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(B), TD);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(B), TD);
+
+ return A == B;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+ if ((unsigned)Constants[i].getAlignment() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
+ return (unsigned)Idx;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(const Value*)Constants[i].Val.ConstVal;
+ OS << ", align=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..35591e1649d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,57 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+ FunctionPass(ID), TM(tm), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+ releaseMemory();
+ assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI && "MMI not around yet??");
+ MMI->setModule(&M);
+ NextFnNum = 0;
+ return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+ assert(!MF && "MachineFunctionAnalysis already initialized!");
+ MF = new MachineFunction(&F, TM, NextFnNum++,
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
+ return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+ delete MF;
+ MF = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..674cc80a006c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,56 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreserved("iv-users");
+ AU.addPreserved("memdep");
+ AU.addPreserved("live-values");
+ AU.addPreserved("domtree");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("loops");
+ AU.addPreserved("lda");
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..fa9c821b2af7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,67 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ OS << "# " << Banner << ":\n";
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
+ return false;
+ }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
+ "Machine Function Printer", false, false)
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 000000000000..32d066894b5b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1867 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ SmallContents.RegNo = Reg;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
+ // register's use/def lists.
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
+ RegInfo->removeRegOperandFromUseList(this);
+
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg_TargetFlags = 0;
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsUndef = isUndef;
+ IsInternalRead = false;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
+ case MO_RegisterMask:
+ return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress(), MO.getOffset());
+ case MachineOperand::MO_RegisterMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ OS << PrintReg(getReg(), TRI, getSubReg());
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+ isInternalRead() || isEarlyClobber() || isTied()) {
+ OS << '<';
+ bool NeedComma = false;
+ if (isDef()) {
+ if (NeedComma) OS << ',';
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
+ OS << "def";
+ NeedComma = true;
+ // <def,read-undef> only makes sense when getSubReg() is set.
+ // Don't clutter the output otherwise.
+ if (isUndef() && getSubReg())
+ OS << ",read-undef";
+ } else if (isImplicit()) {
+ OS << "imp-use";
+ NeedComma = true;
+ }
+
+ if (isKill()) {
+ if (NeedComma) OS << ',';
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ if (NeedComma) OS << ',';
+ OS << "dead";
+ NeedComma = true;
+ }
+ if (isUndef() && isUse()) {
+ if (NeedComma) OS << ',';
+ OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
+ }
+ if (isTied()) {
+ if (NeedComma) OS << ',';
+ OS << "tied";
+ if (TiedTo != 15)
+ OS << unsigned(TiedTo - 1);
+ NeedComma = true;
+ }
+ OS << '>';
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->getValue().print(OS, false);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy())
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ else
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "<BB#" << getMBB()->getNumber() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_RegisterMask:
+ OS << "<regmask>";
+ break;
+ case MachineOperand::MO_Metadata:
+ OS << '<';
+ WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<MCSym=" << *getMCSymbol() << '>';
+ break;
+ }
+
+ if (unsigned TF = getTargetFlags())
+ OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo), Ranges(Ranges) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(getOffset());
+ ID.AddInteger(Size);
+ ID.AddPointer(getValue());
+ ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+ ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+ assert((MMO.isLoad() || MMO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MMO.isVolatile())
+ OS << "Volatile ";
+
+ if (MMO.isLoad())
+ OS << "LD";
+ if (MMO.isStore())
+ OS << "ST";
+ OS << MMO.getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (!MMO.getValue())
+ OS << "<unknown>";
+ else
+ WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (MMO.getBaseAlignment() != MMO.getAlignment())
+ OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+ if (MMO.getOffset() != 0)
+ OS << "+" << MMO.getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+ MMO.getBaseAlignment() != MMO.getSize())
+ OS << "(align=" << MMO.getAlignment() << ")";
+
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print nontemporal info.
+ if (MMO.isNonTemporal())
+ OS << "(nontemporal)";
+
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
+ if (MCID->ImplicitDefs)
+ for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID->ImplicitUses)
+ for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
+ addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
+ const DebugLoc dl, bool NoImp)
+ : MCID(&tid), Parent(0), Operands(0), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0),
+ NumMemRefs(0), MemRefs(0), debugLoc(dl) {
+ // Reserve space for the expected number of operands.
+ if (unsigned NumOps = MCID->getNumOperands() +
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+ CapOperands = OperandCapacity::get(NumOps);
+ Operands = MF.allocateOperandArray(CapOperands);
+ }
+
+ if (!NoImp)
+ addImplicitDefUseOperands(MF);
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0),
+ NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
+ debugLoc(MI.getDebugLoc()) {
+ CapOperands = OperandCapacity::get(MI.getNumOperands());
+ Operands = MF.allocateOperandArray(CapOperands);
+
+ // Copy operands.
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MF, MI.getOperand(i));
+
+ // Copy all the sensible flags.
+ setFlags(MI.Flags);
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ MRI.removeRegOperandFromUseList(&Operands[i]);
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ MRI.addRegOperandToUseList(&Operands[i]);
+}
+
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ MachineBasicBlock *MBB = getParent();
+ assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+ addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+ unsigned NumOps, MachineRegisterInfo *MRI) {
+ if (MRI)
+ return MRI->moveOperands(Dst, Src, NumOps);
+
+ // Here it would be convenient to call memmove, so that isn't allowed because
+ // MachineOperand has a constructor and so isn't a POD type.
+ if (Dst < Src)
+ for (unsigned i = 0; i != NumOps; ++i)
+ new (Dst + i) MachineOperand(Src[i]);
+ else
+ for (unsigned i = NumOps; i ; --i)
+ new (Dst + i - 1) MachineOperand(Src[i - 1]);
+}
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
+
+ // Check if we're adding one of our existing operands.
+ if (&Op >= Operands && &Op < Operands + NumOperands) {
+ // This is unusual: MI->addOperand(MI->getOperand(i)).
+ // If adding Op requires reallocating or moving existing operands around,
+ // the Op reference could go stale. Support it by copying Op.
+ MachineOperand CopyOp(Op);
+ return addOperand(MF, CopyOp);
+ }
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything else goes before the implicit regs.
+ //
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ unsigned OpNo = getNumOperands();
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
+ }
+ }
+
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *MRI = getRegInfo();
+
+ // Determine if the Operands array needs to be reallocated.
+ // Save the old capacity and operand array.
+ OperandCapacity OldCap = CapOperands;
+ MachineOperand *OldOperands = Operands;
+ if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+ CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+ Operands = MF.allocateOperandArray(CapOperands);
+ // Move the operands before the insertion point.
+ if (OpNo)
+ moveOperands(Operands, OldOperands, OpNo, MRI);
+ }
+
+ // Move the operands following the insertion point.
+ if (OpNo != NumOperands)
+ moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+ MRI);
+ ++NumOperands;
+
+ // Deallocate the old operand array.
+ if (OldOperands != Operands && OldOperands)
+ MF.deallocateOperandArray(OldCap, OldOperands);
+
+ // Copy Op into place. It still needs to be inserted into the MRI use lists.
+ MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+ NewMO->ParentMI = this;
+
+ // When adding a register operand, tell MRI about it.
+ if (NewMO->isReg()) {
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ NewMO->Contents.Reg.Prev = 0;
+ // Ignore existing ties. This is not a property that can be copied.
+ NewMO->TiedTo = 0;
+ // Add the new operand to MRI, but only for instructions in an MBB.
+ if (MRI)
+ MRI->addRegOperandToUseList(NewMO);
+ // The MCID operand information isn't accurate until we start adding
+ // explicit operands. The implicit operands are added first, then the
+ // explicits are inserted before them.
+ if (!isImpReg) {
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (NewMO->isUse()) {
+ int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ tieOperands(DefIdx, OpNo);
+ }
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ NewMO->setIsEarlyClobber(true);
+ }
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < getNumOperands() && "Invalid operand number");
+ untieRegOperand(OpNo);
+
+#ifndef NDEBUG
+ // Moving tied operands would break the ties.
+ for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
+ MachineRegisterInfo *MRI = getRegInfo();
+ if (MRI && Operands[OpNo].isReg())
+ MRI->removeRegOperandFromUseList(Operands + OpNo);
+
+ // Don't call the MachineOperand destructor. A lot of this code depends on
+ // MachineOperand having a trivial destructor anyway, and adding a call here
+ // wouldn't make it 'destructor-correct'.
+
+ if (unsigned N = NumOperands - 1 - OpNo)
+ moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+ --NumOperands;
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ mmo_iterator OldMemRefs = MemRefs;
+ unsigned OldNumMemRefs = NumMemRefs;
+
+ unsigned NewNum = NumMemRefs + 1;
+ mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
+ NewMemRefs[NewNum - 1] = MO;
+ setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+ assert(!isBundledWithPred() && "Must be called on bundle header");
+ for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle && !MII->isBundle())
+ return false;
+ }
+ // This was the last instruction in the bundle.
+ if (!MII->isBundledWithSucc())
+ return Type == AllInBundle;
+ }
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other->getOpcode() != getOpcode() ||
+ Other->getNumOperands() != getNumOperands())
+ return false;
+
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ return false;
+ }
+ }
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ else if (Check == IgnoreVRegDefs) {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
+ }
+ // If DebugLoc does not match then two dbg.values are not identical.
+ if (isDebugValue())
+ if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+ && getDebugLoc() != Other->getDebugLoc())
+ return false;
+ return true;
+}
+
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove(this);
+}
+
+MachineInstr *MachineInstr::removeFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove_instr(this);
+}
+
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+void MachineInstr::eraseFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase_instr(this);
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+void MachineInstr::bundleWithPred() {
+ assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+ setFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = this;
+ --Pred;
+ assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+ assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+ setFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = this;
+ ++Succ;
+ assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+ assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+ clearFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = this;
+ --Pred;
+ assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+ assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+ clearFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = this;
+ ++Succ;
+ assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->clearFlag(BundledPred);
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+ assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getParent()->getParent() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getParent()->getParent();
+
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
+
+ if (!getOperand(OpIdx).isReg())
+ return NULL;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return NULL;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass(MF);
+
+ return NULL;
+}
+
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
+unsigned MachineInstr::getBundleSize() const {
+ MachineBasicBlock::const_instr_iterator I = this;
+ unsigned Size = 0;
+ while (I->isBundledWithSucc())
+ ++Size, ++I;
+ return Size;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial <def,undef> doesn't count as reading the register.
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
+
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0: Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax: Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+ MachineOperand &DefMO = getOperand(DefIdx);
+ MachineOperand &UseMO = getOperand(UseIdx);
+ assert(DefMO.isDef() && "DefIdx must be a def operand");
+ assert(UseMO.isUse() && "UseIdx must be a use operand");
+ assert(!DefMO.isTied() && "Def is already tied to another use");
+ assert(!UseMO.isTied() && "Use is already tied to another def");
+
+ if (DefIdx < TiedMax)
+ UseMO.TiedTo = DefIdx + 1;
+ else {
+ // Inline asm can use the group descriptors to find tied operands, but on
+ // normal instruction, the tied def must be within the first TiedMax
+ // operands.
+ assert(isInlineAsm() && "DefIdx out of range");
+ UseMO.TiedTo = TiedMax;
+ }
+
+ // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+ DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
+}
+
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isTied() && "Operand isn't tied");
+
+ // Normally TiedTo is in range.
+ if (MO.TiedTo < TiedMax)
+ return MO.TiedTo - 1;
+
+ // Uses on normal instructions can be out of range.
+ if (!isInlineAsm()) {
+ // Normal tied defs must be in the 0..TiedMax-1 range.
+ if (MO.isUse())
+ return TiedMax - 1;
+ // MO is a def. Search for the tied use.
+ for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &UseMO = getOperand(i);
+ if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+ return i;
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
+ // Now deal with inline asm by parsing the operand group descriptor flags.
+ // Find the beginning of each operand group.
+ SmallVector<unsigned, 8> GroupIdx;
+ unsigned OpIdxGroup = ~0u;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+ unsigned CurGroup = GroupIdx.size();
+ GroupIdx.push_back(i);
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ // OpIdx belongs to this operand group.
+ if (OpIdx > i && OpIdx < i + NumOps)
+ OpIdxGroup = CurGroup;
+ unsigned TiedGroup;
+ if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ continue;
+ // Operands in this group are tied to operands in TiedGroup which must be
+ // earlier. Find the number of operands between the two groups.
+ unsigned Delta = i - GroupIdx[TiedGroup];
+
+ // OpIdx is a use tied to TiedGroup.
+ if (OpIdxGroup == CurGroup)
+ return OpIdx - Delta;
+
+ // OpIdx is a def tied to this use group.
+ if (OpIdxGroup == TiedGroup)
+ return OpIdx + Delta;
+ }
+ llvm_unreachable("Invalid tied operand on inline asm");
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ //
+ // Treat volatile loads as stores. This is not strictly necessary for
+ // volatiles, but it is required for atomic loads. It is not allowed to move
+ // a load across an atomic load with Ordering > Monotonic.
+ if (mayStore() || isCall() ||
+ (mayLoad() && hasOrderedMemoryRef())) {
+ SawStore = true;
+ return false;
+ }
+
+ if (isLabel() || isDebugValue() ||
+ isTerminator() || hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (mayLoad() && !isInvariantLoad(AA))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, we can't move it.
+ return !SawStore;
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ unsigned DstReg) const {
+ bool SawStore = false;
+ if (!TII->isTriviallyReMaterializable(this, AA) ||
+ !isSafeToMove(TII, AA, SawStore))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
+ !hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for ordered references.
+ for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+ if (!(*I)->isUnordered())
+ return true;
+
+ return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function. For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change. This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+ for (mmo_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I) {
+ if ((*I)->isVolatile()) return false;
+ if ((*I)->isStore()) return false;
+ if ((*I)->isInvariant()) return true;
+
+ if (const Value *V = (*I)->getValue()) {
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV->isConstant(MFI))
+ continue;
+ // If we have an AliasAnalysis, ask it whether the memory is constant.
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (hasProperty(MCID::UnmodeledSideEffects))
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+ for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+ const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MF, MO);
+ }
+}
+
+void MachineInstr::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " " << *this;
+#endif
+}
+
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
+ bool SkipOpers) const {
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ const MachineRegisterInfo *MRI = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ if (MF)
+ MRI = &MF->getRegInfo();
+ }
+
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
+ for (; StartOp < e && getOperand(StartOp).isReg() &&
+ getOperand(StartOp).isDef() &&
+ !getOperand(StartOp).isImplicit();
+ ++StartOp) {
+ if (StartOp != 0) OS << ", ";
+ getOperand(StartOp).print(OS, TM);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ VirtRegs.push_back(Reg);
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ // Print the opcode name.
+ if (TM && TM->getInstrInfo())
+ OS << TM->getInstrInfo()->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
+
+ if (SkipOpers)
+ return;
+
+ // Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_MayLoad)
+ OS << " [mayload]";
+ if (ExtraInfo & InlineAsm::Extra_MayStore)
+ OS << " [maystore]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+ if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+ OS << " [attdialect]";
+ if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+ OS << " [inteldialect]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg)) {
+ bool HasAliasLive = false;
+ for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (!MRI.use_empty(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+ if (i < getDesc().NumOperands) {
+ const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+ if (MCOI.isPredicate())
+ OS << "pred:";
+ if (MCOI.isOptionalDef())
+ OS << "opt:";
+ }
+ if (isDebugValue() && MO.isMetadata()) {
+ // Pretty print DBG_VALUE instructions.
+ const MDNode *MD = MO.getMetadata();
+ if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+ OS << "!\"" << MDS->getString() << '\"';
+ else
+ MO.print(OS, TM);
+ } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ switch (InlineAsm::getKind(Flag)) {
+ case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag); break;
+ }
+
+ unsigned RCID = 0;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TM)
+ OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
+ else
+ OS << ":RC" << RCID;
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ } else
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (!FirstOp) OS << ",";
+ OS << " ...";
+ }
+
+ bool HaveSemi = false;
+ const unsigned PrintableFlags = FrameSetup;
+ if (Flags & PrintableFlags) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " flags: ";
+
+ if (Flags & FrameSetup)
+ OS << "FrameSetup";
+ }
+
+ if (!memoperands_empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+ OS << " mem:";
+ for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+ i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ }
+
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << PrintReg(VirtRegs[j]);
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
+ // Print debug location information.
+ if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ DIVariable DV(getOperand(e - 1).getMetadata());
+ OS << " line no:" << DV.getLineNumber();
+ if (MDNode *InlinedAt = DV.getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " inlined @[ ";
+ printDebugLoc(InlinedAtDL, MF, OS);
+ OS << " ]";
+ }
+ }
+ } else if (!debugLoc.isUnknown() && MF) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " dbg:";
+ printDebugLoc(debugLoc, MF, OS);
+ }
+
+ OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+void MachineInstr::clearRegisterKills(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ RegInfo = 0;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned OpReg = MO.getReg();
+ if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+ MO.setIsKill(false);
+ }
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ MO.setIsDead();
+ Found = true;
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+ MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ bool Dead = true;
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ if (TRI.regsOverlap(*I, Reg)) {
+ Dead = false;
+ break;
+ }
+ // If there are no uses, including partial uses, the def is dead.
+ if (Dead) MO.setIsDead();
+ }
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ addRegisterDefined(*I, &TRI);
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ // Build up a buffer of hash code components.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..77bcd1d7c8e3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,330 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles() : MachineFunctionPass(ID) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isBundledWithPred()) {
+ MII->unbundleFromPred();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+ MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
+
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
+ TII->get(TargetOpcode::BUNDLE));
+ Bundle.prepend(MIB);
+
+ SmallVector<unsigned, 32> LocalDefs;
+ SmallSet<unsigned, 32> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 16> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (; FirstMI != LastMI; ++FirstMI) {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg)) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg)) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (LocalDefSet.insert(SubReg))
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ Defs.clear();
+ }
+
+ SmallSet<unsigned, 32> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg)) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = *I;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, llvm::prior(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::VirtRegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+ VirtRegInfo RI = { false, false, false };
+ for(; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ bool AllDefsDead = true;
+ PhysRegInfo PRI = {false, false, false, false, false, false};
+
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "analyzePhysReg not given a physical register!");
+ for (; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ PRI.Clobbers = true; // Regmask clobbers Reg.
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned MOReg = MO.getReg();
+ if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ continue;
+
+ bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+ bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
+
+ if (IsRegOrSuperReg && MO.readsReg()) {
+ // Reg or a super-reg is read, and perhaps killed also.
+ PRI.Reads = true;
+ PRI.Kills = MO.isKill();
+ }
+
+ if (IsRegOrOverlapping && MO.readsReg()) {
+ PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
+ }
+
+ if (!MO.isDef())
+ continue;
+
+ if (IsRegOrSuperReg) {
+ PRI.Defines = true; // Reg or a super-register is defined.
+ if (!MO.isDead())
+ AllDefsDead = false;
+ }
+ if (IsRegOrOverlapping)
+ PRI.Clobbers = true; // Reg or an overlapping reg is defined.
+ }
+
+ if (AllDefsDead && PRI.Defines)
+ PRI.DefinesDead = true; // Reg or super-register was defined and was dead.
+
+ return PRI;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 000000000000..ed3ed4d4d916
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1489 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+ "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+ class MachineLICM : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetLoweringBase *TLI;
+ const TargetRegisterInfo *TRI;
+ const MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
+ bool PreRegAlloc;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Alias analysis info.
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+ ExitBlocks.end();
+ }
+
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+ // For each opcode, keep a list of potential CSE instructions.
+ DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() :
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit MachineLICM(bool PreRA) :
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
+ CSEMap.clear();
+ }
+
+ private:
+ /// CandidateInfo - Keep track of information about hoisting candidates.
+ struct CandidateInfo {
+ MachineInstr *MI;
+ unsigned Def;
+ int FI;
+ CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+ : MI(mi), Def(def), FI(fi) {}
+ };
+
+ /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+ /// invariants out to the preheader.
+ void HoistRegionPostRA();
+
+ /// HoistPostRA - When an instruction is found to only use loop invariant
+ /// operands that is safe to hoist, this instruction is called to do the
+ /// dirty work.
+ void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+ /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+ /// gather register def and frame object update information.
+ void ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates);
+
+ /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+ /// current loop.
+ void AddToLiveIns(unsigned Reg);
+
+ /// IsLICMCandidate - Returns true if the instruction may be a suitable
+ /// candidate for LICM. e.g. If the instruction is a call, then it's
+ /// obviously not safe to hoist it.
+ bool IsLICMCandidate(MachineInstr &I);
+
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// HasLoopPHIUse - Return true if the specified instruction is used by any
+ /// phi node in the current loop.
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
+
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+ /// If not then a load from this mbb may not be safe to hoist.
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+ /// dominator tree node if its a leaf or all of its children are done. Walk
+ /// up the dominator tree to destroy ancestors which are now done.
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+ /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+ /// blocks dominated by the specified header block, and that are in the
+ /// current loop) in depth first order w.r.t the DominatorTree. This allows
+ /// us to visit definitions before uses, allowing us to hoist a loop body in
+ /// one pass without iteration.
+ ///
+ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+
+ /// getRegisterClassIDAndCost - For a given MI, register, and the operand
+ /// index, return the ID and cost of its representative register class by
+ /// reference.
+ void getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const;
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
+
+ /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+ /// the load itself could be hoisted. Return the unfolded and hoistable
+ /// load, or null if the load couldn't be unfolded or if it wouldn't
+ /// be hoistable.
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
+ /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+ /// the preheader that compute the same value. If it's found, do a RAU on
+ /// with the definition of the existing instruction rather than hoisting
+ /// the instruction to the preheader.
+ bool EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+ /// MayCSE - Return true if the given instruction will be CSE'd if it's
+ /// hoisted out of the loop.
+ bool MayCSE(MachineInstr *MI);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+ /// InitCSEMap - Initialize the CSE map with instructions that are in the
+ /// current loop preheader that may become duplicates of instructions that
+ /// are hoisted out of the loop.
+ void InitCSEMap(MachineBasicBlock *BB);
+
+ /// getCurPreheader - Get the preheader for the current loop, splitting
+ /// a critical edge if needed.
+ MachineBasicBlock *getCurPreheader();
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPredecessor())
+ return false;
+ // None of them did, so this is the outermost with a unique predecessor.
+ return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ Changed = FirstInLoop = false;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
+ TRI = TM->getRegisterInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
+
+ PreRegAlloc = MRI->isSSA();
+
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getName() << " ********\n");
+
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+ }
+
+ // Get our Loop information...
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = 0;
+ ExitBlocks.clear();
+
+ // If this is done before regalloc, only visit outer-most preheader-sporting
+ // loops.
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
+ continue;
+ }
+
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ if (!PreRegAlloc)
+ HoistRegionPostRA();
+ else {
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
+ HoistOutOfLoop(N);
+ CSEMap.clear();
+ }
+ }
+
+ return Changed;
+}
+
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end(); o != oe; ++o) {
+ if (!(*o)->isStore() || !(*o)->getValue())
+ continue;
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ if (Value->getFrameIndex() == FI)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates) {
+ bool RuledOut = false;
+ bool HasNonInvariantUse = false;
+ unsigned Def = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isFI()) {
+ // Remember if the instruction stores to the frame index.
+ int FI = MO.getIndex();
+ if (!StoredFIs.count(FI) &&
+ MFI->isSpillSlotObjectIndex(FI) &&
+ InstructionStoresToFI(MI, FI))
+ StoredFIs.insert(FI);
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Not expecting virtual register!");
+
+ if (!MO.isDef()) {
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
+ // If it's using a non-loop-invariant register, then it's obviously not
+ // safe to hoist.
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (MO.isImplicit()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
+ if (!MO.isDead())
+ // Non-dead implicit def? This cannot be hoisted.
+ RuledOut = true;
+ // No need to check if a dead implicit def is also defined by
+ // another instruction.
+ continue;
+ }
+
+ // FIXME: For now, avoid instructions with multiple defs, unless
+ // it's a dead implicit def.
+ if (Def)
+ RuledOut = true;
+ else
+ Def = Reg;
+
+ // If we have already seen another instruction that defines the same
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ if (PhysRegClobbers.test(*AS))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ PhysRegDefs.set(*AS);
+ }
+ }
+
+ // Only consider reloads for now and remats which do not have register
+ // operands. FIXME: Consider unfold load folding instructions.
+ if (Def && !RuledOut) {
+ int FI = INT_MIN;
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ Candidates.push_back(CandidateInfo(MI, Def, FI));
+ }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ unsigned NumRegs = TRI->getNumRegs();
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
+
+ SmallVector<CandidateInfo, 32> Candidates;
+ SmallSet<int, 32> StoredFIs;
+
+ // Walk the entire region, count number of defs for each register, and
+ // collect potential LICM candidates.
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) continue;
+
+ // Conservatively treat live-in's as an external def.
+ // FIXME: That means a reload that're reused in successor block(s) will not
+ // be LICM'ed.
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
+ }
+
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
+ }
+ }
+
+ // Now evaluate whether the potential candidates qualify.
+ // 1. Check if the candidate defined register is defined by another
+ // instruction in the loop.
+ // 2. If the candidate is a load from stack slot (always true for now),
+ // check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
+ for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+ if (Candidates[i].FI != INT_MIN &&
+ StoredFIs.count(Candidates[i].FI))
+ continue;
+
+ unsigned Def = Candidates[i].Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
+ bool Safe = true;
+ MachineInstr *MI = Candidates[i].MI;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ Safe = false;
+ break;
+ }
+ }
+ if (Safe)
+ HoistPostRA(MI, Candidates[i].Def);
+ }
+ }
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+ if (!BB->isLiveIn(Reg))
+ BB->addLiveIn(Reg);
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+ << MI->getParent()->getNumber() << ": " << *MI);
+
+ // Splice the instruction to the preheader.
+ MachineBasicBlock *MBB = MI->getParent();
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+ // Add register to livein list to all the BBs in the current loop since a
+ // loop invariant must be kept live throughout the whole loop. This is
+ // important to ensure later passes do not scavenge the def register.
+ AddToLiveIns(Def);
+
+ ++NumPostRAHoisted;
+ Changed = true;
+}
+
+// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+// If not then a load from this mbb may not be safe to hoist.
+bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
+
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ BackTrace.pop_back();
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ do {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (int i = (int)NumChildren-1; i >= 0; --i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ if (Scopes.size() != 0) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Now perform LICM.
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
+
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ continue;
+
+ EnterScope(MBB);
+
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// getRegisterClassIDAndCost - For a given MI, register, and the operand
+/// index, return the ID and cost of its representative register class.
+void
+MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ MVT VT = *RC->vt_begin();
+ if (VT == MVT::Untyped) {
+ RCId = RC->getID();
+ RCCost = 1;
+ } else {
+ RCId = TLI->getRepRegClassFor(VT)->getID();
+ RCCost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef())
+ RegPressure[RCId] += RCCost;
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += RCCost;
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ unsigned Idx = 0;
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
+ RegPressure[RCId] += RCCost;
+ ++Idx;
+ }
+}
+
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert (MI.mayLoad() && "Expected MI that loads!");
+ for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+ E = MI.memoperands_end(); I != E; ++I) {
+ if (const Value *V = (*I)->getValue()) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ return true;
+ }
+ }
+ return false;
+}
+
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+ return false;
+
+ // If it is load then check if it is guaranteed to execute by making sure that
+ // it dominates all exiting blocks. If it doesn't, then there is a path out of
+ // the loop which does not execute this load, so we can't hoist it. Loads
+ // from constant memory are not safe to speculate all the time, for example
+ // indexed load from a jump table.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent()))
+ return false;
+
+ return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ if (!IsLICMCandidate(I))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ // A PHI may cause a copy to be inserted.
+ if (UseMI->isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI->getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI->isCopy() && CurLoop->contains(UseMI))
+ Work.push_back(UseMI);
+ }
+ }
+ } while (!Work.empty());
+ return false;
+}
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+ bool CheapInstr) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ unsigned Limit = RegLimit[RCId];
+ int Cost = CI->second;
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr)
+ return true;
+
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + Cost >= Limit)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
+ }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return true;
+
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Rematerializable instructions should always be hoisted since the register
+ // allocator can just pull them down again when needed.
+ if (TII->isTriviallyReMaterializable(&MI, AA))
+ return true;
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
+ }
+ Cost[RCId] += RCCost;
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ Cost[RCId] -= RCCost;
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
+
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA)) {
+ DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
+ }
+
+ return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->canFoldAsLoad())
+ return 0;
+
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!MI->isInvariantLoad(AA))
+ return 0;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return 0;
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ if (MID.getNumDefs() != 1) return 0;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ unsigned Reg = MRI->createVirtualRegister(RC);
+
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success =
+ TII->unfoldMemoryOperand(MF, MI, Reg,
+ /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+ NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return 0;
+ }
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
+ // Otherwise we successfully unfolded a load that we can hoist.
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+ return PrevMI;
+ }
+ return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
+ SmallVector<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// MayCSE - Return true if the given instruction will be CSE'd if it's
+/// hoisted out of the loop.
+bool MachineLICM::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != 0;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return false;
+ }
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (Preheader->getBasicBlock())
+ dbgs() << " to MachineBasicBlock "
+ << Preheader->getName();
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from MachineBasicBlock "
+ << MI->getParent()->getName();
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isDead())
+ MRI->clearKillFlags(MO.getReg());
+ }
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+
+ return true;
+}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return 0;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 000000000000..4e2cfdc4e568
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,81 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB != Begin) {
+ MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB == Begin) break;
+ PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB != prior(End)) {
+ MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+ NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ }
+ }
+ return BotMBB;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineLoop::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 000000000000..0ea9ae0fcc89
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,578 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+ MMIAddrLabelMap *Map;
+public:
+ MMIAddrLabelMapCallbackPtr() : Map(0) {}
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *V2);
+};
+
+class MMIAddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// Symbols - The symbols for the label. This is a pointer union that is
+ /// either one symbol (the common case) or a list of symbols.
+ PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
+ /// use this so we get notified if a block is deleted or RAUWd.
+ std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+ /// whose corresponding BasicBlock got deleted. These symbols need to be
+ /// emitted at some point in the file, so AsmPrinter emits them after the
+ /// function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DeletedAddrLabelsNeedingEmission;
+public:
+
+ MMIAddrLabelMap(MCContext &context) : Context(context) {}
+ ~MMIAddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+
+ // Deallocate any of the 'list of symbols' case.
+ for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+ I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+ if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+ delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+ }
+
+ MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+ std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol*> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.isNull()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ if (Entry.Symbols.is<MCSymbol*>())
+ return Entry.Symbols.get<MCSymbol*>();
+ return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.push_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size()-1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Result = Context.CreateTempSymbol();
+ Entry.Symbols = Result;
+ return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ std::vector<MCSymbol*> Result;
+
+ // If we already had an entry for this block, just return it.
+ if (Entry.Symbols.isNull())
+ Result.push_back(getAddrLabelSymbol(BB));
+ else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+ Result.push_back(Sym);
+ else
+ Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+ return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = 0; // Clear the callback.
+
+ assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+
+ // Handle both the single and the multiple symbols cases.
+ if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ } else {
+ std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+ MCSymbol *Sym = (*Syms)[i];
+ if (Sym->isDefined()) continue; // Ignore already emitted labels.
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from
+ // 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+
+ // The entry is deleted, free the memory associated with the symbol list.
+ delete Syms;
+ }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.isNull()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = OldEntry; // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = 0; // Update the callback.
+
+ // Otherwise, we need to add the old symbol to the new block's set. If it is
+ // just a single entry, upgrade it to a symbol list.
+ if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+ SymList->push_back(PrevSym);
+ NewEntry.Symbols = SymList;
+ }
+
+ std::vector<MCSymbol*> *SymList =
+ NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ // If the old entry was a single symbol, add it.
+ if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ SymList->push_back(Sym);
+ return;
+ }
+
+ // Otherwise, concatenate the list.
+ std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+ SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+ delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const MCRegisterInfo &MRI,
+ const MCObjectFileInfo *MOFI)
+ : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineModuleInfo::MachineModuleInfo()
+ : ImmutablePass(ID),
+ Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
+ llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+ "MMI should always be explicitly constructed by "
+ "LLVMTargetMachine");
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+}
+
+bool MachineModuleInfo::doInitialization(Module &M) {
+
+ ObjFileMMI = 0;
+ CompactUnwindEncoding = 0;
+ CurCallSite = 0;
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ DbgInfoAvailable = UsesVAFloatArgument = false;
+ // Always emit some info, by default "no personality" info.
+ Personalities.push_back(NULL);
+ AddrLabelSymbols = 0;
+ TheModule = 0;
+
+ return false;
+}
+
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+ Personalities.clear();
+
+ delete AddrLabelSymbols;
+ AddrLabelSymbols = 0;
+
+ Context.reset();
+
+ delete ObjFileMMI;
+ ObjFileMMI = 0;
+
+ return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ CallSiteMap.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ CompactUnwindEncoding = 0;
+ VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
+ // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+ // UsedFunctions.
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const Function *F =
+ dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+ UsedFunctions.insert(F);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken. This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken. If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it. This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol*> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (AddrLabelSymbols == 0) return;
+ return AddrLabelSymbols->
+ takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ const Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ // If this is the first personality we're adding go
+ // ahead and add it at the beginning.
+ if (Personalities[0] == NULL)
+ Personalities[0] = Personality;
+ else
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = 0;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j, --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
+/// indexes.
+void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This will happen if the current personality function is
+ // in the zero index.
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..a1c7e9f5fb22
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+ typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+ const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+ const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+ return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+ MachineModuleInfoImpl::StubValueTy>&Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+ if (!List.empty())
+ qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+ return List;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 000000000000..cb204fdb3492
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,55 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+void MachinePassRegistryListener::anchor() { }
+
+/// setDefault - Set the default constructor by name.
+void MachinePassRegistry::setDefault(StringRef Name) {
+ MachinePassCtor Ctor = 0;
+ for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
+ if (R->getName() == Name) {
+ Ctor = R->getCtor();
+ break;
+ }
+ }
+ assert(Ctor && "Unregistered pass name");
+ setDefault(Ctor);
+}
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 000000000000..c3f6e9249e7d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+ return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 000000000000..1af00e84a6ed
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,360 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
+ : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ UsedRegUnits.resize(TRI.getNumRegUnits());
+ UsedPhysRegMask.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
+ VRegInfo[Reg].first = RC;
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
+ return 0;
+ setRegClass(Reg, NewRC);
+ return NewRC;
+}
+
+bool
+MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
+ ++I) {
+ const TargetRegisterClass *OpRC =
+ I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
+ if (unsigned SubIdx = I.getOperand().getSubReg()) {
+ if (OpRC)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+ else
+ NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+ } else if (OpRC)
+ NewRC = TRI->getCommonSubClass(NewRC, OpRC);
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
+ return Reg;
+}
+
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
+#endif
+ VRegInfo.clear();
+}
+
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
+ }
+}
+
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+ MachineOperand *Src,
+ unsigned NumOps) {
+ assert(Src != Dst && NumOps && "Noop moveOperands");
+
+ // Copy backwards if Dst is within the Src range.
+ int Stride = 1;
+ if (Dst >= Src && Dst < Src + NumOps) {
+ Stride = -1;
+ Dst += NumOps - 1;
+ Src += NumOps - 1;
+ }
+
+ // Copy one operand at a time.
+ do {
+ new (Dst) MachineOperand(*Src);
+
+ // Dst takes Src's place in the use-def chain.
+ if (Src->isReg()) {
+ MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+ MachineOperand *Prev = Src->Contents.Reg.Prev;
+ MachineOperand *Next = Src->Contents.Reg.Next;
+ assert(Head && "List empty, but operand is chained");
+ assert(Prev && "Operand was not on use-def list");
+
+ // Prev links are circular, next link is NULL instead of looping back to
+ // Head.
+ if (Src == Head)
+ Head = Dst;
+ else
+ Prev->Contents.Reg.Next = Dst;
+
+ // Update Prev pointer. This also works when Src was pointing to itself
+ // in a 1-element list. In that case Head == Dst.
+ (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+ }
+
+ Dst += Stride;
+ Src += Stride;
+ } while (--NumOps);
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ // Since we are in SSA form, we can use the first definition.
+ def_iterator I = def_begin(Reg);
+ assert((I.atEnd() || llvm::next(I) == def_end()) &&
+ "getVRegDef assumes a single definition or no definition");
+ return !I.atEnd() ? &*I : 0;
+}
+
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+ if (def_empty(Reg)) return 0;
+ def_iterator I = def_begin(Reg);
+ if (llvm::next(I) != def_end())
+ return 0;
+ return &*I;
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+ use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+ if (UI == use_nodbg_end())
+ return false;
+ return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+ for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+ UI.getOperand().setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == Reg || I->second == Reg)
+ return true;
+ return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->second == VReg)
+ return I->first;
+ return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == PReg)
+ return I->second;
+ return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = TRI->getReservedRegs(MF);
+ assert(ReservedRegs.size() == TRI->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (!def_empty(*AI) || isAllocatable(*AI))
+ return false;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 000000000000..bb6aad7f948e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,364 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(0), InsertedPHIs(NewPHI) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->begin();
+ if (!I->isPHI())
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+ MRI->replaceRegWith(OldReg, NewReg);
+
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+ I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+ if (I->second == OldReg)
+ I->second = NewReg;
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ typedef MachineBasicBlock BlkT;
+ typedef unsigned ValT;
+ typedef MachineInstr PhiT;
+
+ typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ MachineBasicBlock *Pred) {
+ MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return 0;
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static unsigned GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (unsigned V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..5bd2349b50f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,2396 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+}
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+// Experimental heuristics
+static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable load clustering."), cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+MachineSchedContext::MachineSchedContext():
+ MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineScheduler();
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory() {}
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+
+ static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ if (VerifyScheduling) {
+ DEBUG(LIS->print(dbgs()));
+ MF->verify(this, "Before machine scheduling.");
+ }
+ RegClassInfo->runOnMachineFunction(*MF);
+
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor == useDefaultMachineSched) {
+ // Get the default scheduler set by the target.
+ Ctor = MachineSchedRegistry::getDefault();
+ if (!Ctor) {
+ Ctor = createConvergingSched;
+ MachineSchedRegistry::setDefault(Ctor);
+ }
+ }
+ // Instantiate the selected scheduler.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+ // Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler->startBlock(MBB);
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ unsigned RemainingInstrs = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end()
+ || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+ --RegionEnd;
+ // Count the boundary instruction.
+ --RemainingInstrs;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+ break;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF->getName()
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " Remaining: " << RemainingInstrs << "\n");
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->schedule();
+
+ // Close the current region.
+ Scheduler->exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler->begin();
+ }
+ assert(RemainingInstrs == 0 && "Instruction count mismatch!");
+ Scheduler->finishBlock();
+ }
+ Scheduler->finalizeSchedule();
+ DEBUG(LIS->print(dbgs()));
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
+ return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ReadyQueue::dump() {
+ dbgs() << " " << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGMI::~ScheduleDAGMI() {
+ delete DFSResult;
+ DeleteContainerPointers(Mutations);
+ delete SchedImpl;
+}
+
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPred(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ if (SuccEdge->isCluster())
+ NextClusterSucc = SuccSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+ if (PredEdge->isWeak()) {
+ --PredSU->WeakSuccsLeft;
+ if (PredEdge->isCluster())
+ NextClusterPred = PredSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Advance RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ ++RegionBegin;
+
+ // Update the instruction stream.
+ BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
+ LIS->handleMove(MI, /*UpdateFlags=*/true);
+
+ // Recede RegionBegin if an instruction moves above the first.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd =
+ (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void ScheduleDAGMI::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.getPressure().dump(TRI));
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ BotRPTracker.recede();
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ const std::vector<unsigned> &RegionPressure =
+ RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+ << "Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
+ if (RegionPressure[i] > Limit)
+ RegionCriticalPSets.push_back(PressureElement(i, 0));
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].PSetID) << " ";
+ dbgs() << "\n");
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void ScheduleDAGMI::
+updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+ unsigned ID = RegionCriticalPSets[i].PSetID;
+ int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+ if ((int)NewMaxPressure[ID] > MaxUnits)
+ MaxUnits = NewMaxPressure[ID];
+ }
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easilly extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMI then it will want to override this virtual method in order to
+/// update any specialized state.
+void ScheduleDAGMI::schedule() {
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMI::buildDAGWithRegPressure() {
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+ for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+ Mutations[i]->apply(this);
+ }
+}
+
+void ScheduleDAGMI::computeDFSResult() {
+ if (!DFSResult)
+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+ DFSResult->clear();
+ ScheduledTrees.clear();
+ DFSResult->resize(SUnits.size());
+ DFSResult->compute(SUnits);
+ ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+
+void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+ SmallVectorImpl<SUnit*> &BotRoots) {
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ SUnit *SU = &(*I);
+ assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+ // Order predecessors so DFSResult follows the critical path.
+ SU->biasCriticalPath();
+
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (!I->NumPredsLeft)
+ TopRoots.push_back(SU);
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (!I->NumSuccsLeft)
+ BotRoots.push_back(SU);
+ }
+ ExitSU.biasCriticalPath();
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ NextClusterSucc = NULL;
+ NextClusterPred = NULL;
+
+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+ //
+ // Nodes with unreleased weak edges can still be roots.
+ // Release top roots in forward order.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+ SchedImpl->releaseTopNode(*I);
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+ SchedImpl->releaseBottomNode(*I);
+ }
+
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ SchedImpl->registerRoots();
+
+ // Advance past initial DebugValues.
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+ TopRPTracker.setPos(CurrentTop);
+
+ CurrentBottom = RegionEnd;
+}
+
+/// Move an instruction and update register pressure.
+void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+ }
+ else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+ }
+}
+
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+ // Release dependent instructions for scheduling.
+ if (IsTopNode)
+ releaseSuccessors(SU);
+ else
+ releasePredecessors(SU);
+
+ SU->isScheduled = true;
+
+ if (DFSResult) {
+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+ if (!ScheduledTrees.test(SubtreeID)) {
+ ScheduledTrees.set(SubtreeID);
+ DFSResult->scheduleTree(SubtreeID);
+ SchedImpl->scheduleTree(SubtreeID);
+ }
+ }
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ if (&*RegionBegin == DbgValue)
+ ++RegionBegin;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == llvm::prior(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+ for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+ if (SUnit *SU = getSUnit(&(*MI)))
+ SU->dump(this);
+ else
+ dbgs() << "Missing SUnit\n";
+ }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// LoadClusterMutation - DAG post-processing to cluster loads.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads.
+class LoadClusterMutation : public ScheduleDAGMutation {
+ struct LoadInfo {
+ SUnit *SU;
+ unsigned BaseReg;
+ unsigned Offset;
+ LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
+ };
+ static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : TII(tii), TRI(tri) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+protected:
+ void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+bool LoadClusterMutation::LoadInfoLess(
+ const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS) {
+ if (LHS.BaseReg != RHS.BaseReg)
+ return LHS.BaseReg < RHS.BaseReg;
+ return LHS.Offset < RHS.Offset;
+}
+
+void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
+ ScheduleDAGMI *DAG) {
+ SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
+ for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
+ SUnit *SU = Loads[Idx];
+ unsigned BaseReg;
+ unsigned Offset;
+ if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+ LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ }
+ if (LoadRecords.size() < 2)
+ return;
+ std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+ unsigned ClusterLength = 1;
+ for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
+ if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ ClusterLength = 1;
+ continue;
+ }
+
+ SUnit *SUa = LoadRecords[Idx].SU;
+ SUnit *SUb = LoadRecords[Idx+1].SU;
+ if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+ && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+
+ DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since nearby
+ // loads should have effectively the same inputs.
+ for (SUnit::const_succ_iterator
+ SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+ if (SI->getSUnit() == SUb)
+ continue;
+ DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ ++ClusterLength;
+ }
+ else
+ ClusterLength = 1;
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+ // Map DAG NodeNum to store chain ID.
+ DenseMap<unsigned, unsigned> StoreChainIDs;
+ // Map each store chain to a set of dependent loads.
+ SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->mayLoad())
+ continue;
+ unsigned ChainPredID = DAG->SUnits.size();
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->isCtrl()) {
+ ChainPredID = PI->getSUnit()->NodeNum;
+ break;
+ }
+ }
+ // Check if this chain-like pred has been seen
+ // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ unsigned NumChains = StoreChainDependents.size();
+ std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+ StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+ if (Result.second)
+ StoreChainDependents.resize(NumChains + 1);
+ StoreChainDependents[Result.first->second].push_back(SU);
+ }
+ // Iterate over the store chains.
+ for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+ clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ const TargetInstrInfo *TII;
+public:
+ MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGMI *DAG) {
+ // For now, assume targets can only fuse with the branch.
+ MachineInstr *Branch = DAG->ExitSU.getInstr();
+ if (!Branch)
+ return;
+
+ for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
+ SUnit *SU = &DAG->SUnits[--Idx];
+ if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+public:
+ /// Represent the type of SchedCandidate found within a single queue.
+ /// pickNodeBidirectional depends on these listed by decreasing priority.
+ enum CandReason {
+ NoCand, SingleExcess, SingleCritical, Cluster,
+ ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+ TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
+ NodeOrder};
+
+#ifndef NDEBUG
+ static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+#endif
+
+ /// Policy for scheduling the next instruction in the candidate's zone.
+ struct CandPolicy {
+ bool ReduceLatency;
+ unsigned ReduceResIdx;
+ unsigned DemandResIdx;
+
+ CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+ };
+
+ /// Status of an instruction's critical resource consumption.
+ struct SchedResourceDelta {
+ // Count critical resources in the scheduled region required by SU.
+ unsigned CritResources;
+
+ // Count critical resources from another region consumed by SU.
+ unsigned DemandedResources;
+
+ SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
+
+ bool operator==(const SchedResourceDelta &RHS) const {
+ return CritResources == RHS.CritResources
+ && DemandedResources == RHS.DemandedResources;
+ }
+ bool operator!=(const SchedResourceDelta &RHS) const {
+ return !operator==(RHS);
+ }
+ };
+
+ /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ CandPolicy Policy;
+
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // The reason for this candidate.
+ CandReason Reason;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ // Critical resource consumption of the best candidate.
+ SchedResourceDelta ResDelta;
+
+ SchedCandidate(const CandPolicy &policy)
+ : Policy(policy), SU(NULL), Reason(NoCand) {}
+
+ bool isValid() const { return SU; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ SU = Best.SU;
+ Reason = Best.Reason;
+ RPDelta = Best.RPDelta;
+ ResDelta = Best.ResDelta;
+ }
+
+ void initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel);
+ };
+
+ /// Summarize the unscheduled region.
+ struct SchedRemainder {
+ // Critical path through the DAG in expected latency.
+ unsigned CriticalPath;
+
+ // Unscheduled resources
+ SmallVector<unsigned, 16> RemainingCounts;
+ // Critical resource for the unscheduled zone.
+ unsigned CritResIdx;
+ // Number of micro-ops left to schedule.
+ unsigned RemainingMicroOps;
+
+ void reset() {
+ CriticalPath = 0;
+ RemainingCounts.clear();
+ CritResIdx = 0;
+ RemainingMicroOps = 0;
+ }
+
+ SchedRemainder() { reset(); }
+
+ void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
+
+ unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
+ if (!SchedModel->hasInstrSchedModel())
+ return 0;
+
+ return std::max(
+ RemainingMicroOps * SchedModel->getMicroOpFactor(),
+ RemainingCounts[CritResIdx]);
+ }
+ };
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in the direction of movement, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+ SchedRemainder *Rem;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ // For heuristics, keep a list of the nodes that immediately depend on the
+ // most recently scheduled node.
+ SmallPtrSet<const SUnit*, 8> NextSUs;
+
+ ScheduleHazardRecognizer *HazardRec;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // The expected latency of the critical path in this scheduled zone.
+ unsigned ExpectedLatency;
+
+ // Resources used in the scheduled zone beyond this boundary.
+ SmallVector<unsigned, 16> ResourceCounts;
+
+ // Cache the critical resources ID in this scheduled zone.
+ unsigned CritResIdx;
+
+ // Is the scheduled region resource limited vs. latency limited.
+ bool IsResourceLimited;
+
+ unsigned ExpectedCount;
+
+#ifndef NDEBUG
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+#endif
+
+ void reset() {
+ // A new HazardRec is created for each DAG and owned by SchedBoundary.
+ delete HazardRec;
+
+ Available.clear();
+ Pending.clear();
+ CheckPending = false;
+ NextSUs.clear();
+ HazardRec = 0;
+ CurrCycle = 0;
+ IssueCount = 0;
+ MinReadyCycle = UINT_MAX;
+ ExpectedLatency = 0;
+ ResourceCounts.resize(1);
+ assert(!ResourceCounts[0] && "nonzero count for bad resource");
+ CritResIdx = 0;
+ IsResourceLimited = false;
+ ExpectedCount = 0;
+#ifndef NDEBUG
+ MaxMinLatency = 0;
+#endif
+ // Reserve a zero-count for invalid CritResIdx.
+ ResourceCounts.resize(1);
+ }
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ HazardRec(0) {
+ reset();
+ }
+
+ ~SchedBoundary() { delete HazardRec; }
+
+ void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
+ SchedRemainder *rem);
+
+ bool isTop() const {
+ return Available.getID() == ConvergingScheduler::TopQID;
+ }
+
+ unsigned getUnscheduledLatency(SUnit *SU) const {
+ if (isTop())
+ return SU->getHeight();
+ return SU->getDepth() + SU->Latency;
+ }
+
+ unsigned getCriticalCount() const {
+ return ResourceCounts[CritResIdx];
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void setLatencyPolicy(CandPolicy &Policy);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void countResource(unsigned PIdx, unsigned Cycles);
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
+private:
+ ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+ const TargetRegisterInfo *TRI;
+
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedRemainder Rem;
+ SchedBoundary Top;
+ SchedBoundary Bot;
+
+public:
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingScheduler():
+ DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(ScheduleDAGMI *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+ virtual void registerRoots();
+
+protected:
+ void balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand);
+
+ void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand);
+
+ void tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker);
+
+ SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+ void pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+
+#ifndef NDEBUG
+ void traceCandidate(const SchedCandidate &Cand);
+#endif
+};
+} // namespace
+
+void ConvergingScheduler::SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+ reset();
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+ for (std::vector<SUnit>::iterator
+ I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+ RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ }
+ }
+ for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
+ PIdx != PEnd; ++PIdx) {
+ if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
+ >= (int)SchedModel->getLatencyFactor()) {
+ CritResIdx = PIdx;
+ }
+ }
+}
+
+void ConvergingScheduler::SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+ reset();
+ DAG = dag;
+ SchedModel = smodel;
+ Rem = rem;
+ if (SchedModel->hasInstrSchedModel())
+ ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+}
+
+void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ const TargetMachine &TM = DAG->MF.getTarget();
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isWeak())
+ continue;
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+void ConvergingScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (std::vector<SUnit*>::const_iterator
+ I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+ return true;
+ }
+ return false;
+}
+
+/// Compute the remaining latency to determine whether ILP should be increased.
+void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
+ // FIXME: compile time. In all, we visit four queues here one we should only
+ // need to visit the one that was last popped if we cache the result.
+ unsigned RemLatency = 0;
+ for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+ I != E; ++I) {
+ unsigned L = getUnscheduledLatency(*I);
+ if (L > RemLatency)
+ RemLatency = L;
+ }
+ for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
+ I != E; ++I) {
+ unsigned L = getUnscheduledLatency(*I);
+ if (L > RemLatency)
+ RemLatency = L;
+ }
+ unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ if (RemLatency + ExpectedLatency >= CriticalPathLimit
+ && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
+ Policy.ReduceLatency = true;
+ DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+ }
+}
+
+void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+ Pending.push(SU);
+ else
+ Available.push(SU);
+
+ // Record this node as an immediate dependent of the scheduled node.
+ NextSUs.insert(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ unsigned NextCycle = CurrCycle + 1;
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ if (MinReadyCycle > NextCycle) {
+ IssueCount = 0;
+ NextCycle = MinReadyCycle;
+ }
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ }
+ else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
+ DEBUG(dbgs() << " " << Available.getName()
+ << " Cycle: " << CurrCycle << '\n');
+}
+
+/// Add the given processor resource to this scheduled zone.
+void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
+ unsigned Cycles) {
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name
+ << " +(" << Cycles << "x" << Factor
+ << ") / " << SchedModel->getLatencyFactor() << '\n');
+
+ unsigned Count = Factor * Cycles;
+ ResourceCounts[PIdx] += Count;
+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+ Rem->RemainingCounts[PIdx] -= Count;
+
+ // Check if this resource exceeds the current critical resource by a full
+ // cycle. If so, it becomes the critical resource.
+ if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
+ >= (int)SchedModel->getLatencyFactor()) {
+ CritResIdx = PIdx;
+ DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getProcResource(PIdx)->Name << " x"
+ << ResourceCounts[PIdx] << '\n');
+ }
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+ // Update resource counts and critical resource.
+ if (SchedModel->hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ countResource(PI->ProcResourceIdx, PI->Cycles);
+ }
+ }
+ if (isTop()) {
+ if (SU->getDepth() > ExpectedLatency)
+ ExpectedLatency = SU->getDepth();
+ }
+ else {
+ if (SU->getHeight() > ExpectedLatency)
+ ExpectedLatency = SU->getHeight();
+ }
+
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+
+ // checkHazard prevents scheduling multiple instructions per cycle that exceed
+ // issue width. However, we commonly reach the maximum. In this case
+ // opportunistically bump the cycle to avoid uselessly checking everything in
+ // the readyQ. Furthermore, a single instruction may produce more than one
+ // cycle's worth of micro-ops.
+ if (IssueCount >= SchedModel->getIssueWidth()) {
+ DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ DEBUG(if (!Pending.empty()) Pending.dump());
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
+SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ if (IssueCount > 0) {
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
+ }
+ ++I;
+ }
+ }
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+/// Record the candidate policy for opposite zones with different critical
+/// resources.
+///
+/// If the CriticalZone is latency limited, don't force a policy for the
+/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
+void ConvergingScheduler::balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand) {
+
+ if (!CriticalZone.IsResourceLimited)
+ return;
+ assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
+
+ SchedRemainder *Rem = CriticalZone.Rem;
+
+ // If the critical zone is overconsuming a resource relative to the
+ // remainder, try to reduce it.
+ unsigned RemainingCritCount =
+ Rem->RemainingCounts[CriticalZone.CritResIdx];
+ if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+ << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
+ << '\n');
+ }
+ // If the other zone is underconsuming a resource relative to the full zone,
+ // try to increase it.
+ unsigned OppositeCount =
+ OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
+ if ((int)(OppositeZone.ExpectedCount - OppositeCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+ << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
+ << '\n');
+ }
+}
+
+/// Determine if the scheduled zones exceed resource limits or critical path and
+/// set each candidate's ReduceHeight policy accordingly.
+void ConvergingScheduler::checkResourceLimits(
+ ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand) {
+
+ // Set ReduceLatency to true if needed.
+ Bot.setLatencyPolicy(BotCand.Policy);
+ Top.setLatencyPolicy(TopCand.Policy);
+
+ // Handle resource-limited regions.
+ if (Top.IsResourceLimited && Bot.IsResourceLimited
+ && Top.CritResIdx == Bot.CritResIdx) {
+ // If the scheduled critical resource in both zones is no longer the
+ // critical remaining resource, attempt to reduce resource height both ways.
+ if (Top.CritResIdx != Rem.CritResIdx) {
+ TopCand.Policy.ReduceResIdx = Top.CritResIdx;
+ BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
+ DEBUG(dbgs() << "Reduce scheduled "
+ << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
+ }
+ return;
+ }
+ // Handle latency-limited regions.
+ if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
+ // If the total scheduled expected latency exceeds the region's critical
+ // path then reduce latency both ways.
+ //
+ // Just because a zone is not resource limited does not mean it is latency
+ // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
+ // to exceed expected latency.
+ if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
+ && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
+ TopCand.Policy.ReduceLatency = true;
+ BotCand.Policy.ReduceLatency = true;
+ DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+ << " + " << Bot.ExpectedLatency << '\n');
+ }
+ return;
+ }
+ // The critical resource is different in each zone, so request balancing.
+
+ // Compute the cost of each zone.
+ Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
+ Top.ExpectedCount = std::max(
+ Top.getCriticalCount(),
+ Top.ExpectedCount * SchedModel->getLatencyFactor());
+ Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
+ Bot.ExpectedCount = std::max(
+ Bot.getCriticalCount(),
+ Bot.ExpectedCount * SchedModel->getLatencyFactor());
+
+ balanceZones(Top, TopCand, Bot, BotCand);
+ balanceZones(Bot, BotCand, Top, TopCand);
+}
+
+void ConvergingScheduler::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel) {
+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+ return;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+ ResDelta.CritResources += PI->Cycles;
+ if (PI->ProcResourceIdx == Policy.DemandResIdx)
+ ResDelta.DemandedResources += PI->Cycles;
+ }
+}
+
+/// Return true if this heuristic determines order.
+static bool tryLess(int TryVal, int CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+
+ // Always initialize TryCand's RPDelta.
+ TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+ // Avoid exceeding the target's limit.
+ if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
+ Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+ return;
+ if (Cand.Reason == SingleExcess)
+ Cand.Reason = MultiPressure;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
+ Cand.RPDelta.CriticalMax.UnitIncrease,
+ TryCand, Cand, SingleCritical))
+ return;
+ if (Cand.Reason == SingleCritical)
+ Cand.Reason = MultiPressure;
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *NextClusterSU =
+ Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ TryCand, Cand, Cluster))
+ return;
+ // Currently, weak edges are for clustering, so we hard-code that reason.
+ // However, deferring the current TryCand will not change Cand's reason.
+ CandReason OrigReason = Cand.Reason;
+ if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+ getWeakLeft(Cand.SU, Zone.isTop()),
+ TryCand, Cand, Cluster)) {
+ Cand.Reason = OrigReason;
+ return;
+ }
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, TopDepthReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, TopPathReduce))
+ return;
+ }
+ else {
+ if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, BotHeightReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, BotPathReduce))
+ return;
+ }
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
+ Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
+ return;
+ if (Cand.Reason == SingleMax)
+ Cand.Reason = MultiPressure;
+
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // nice pressure avoidance strategy that also conserves the processor's
+ // register renaming resources and keeps the machine code readable.
+ if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+}
+
+/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
+/// more desirable than RHS from scheduling standpoint.
+static bool compareRPDelta(const RegPressureDelta &LHS,
+ const RegPressureDelta &RHS) {
+ // Compare each component of pressure in decreasing order of importance
+ // without checking if any are valid. Invalid PressureElements are assumed to
+ // have UnitIncrease==0, so are neutral.
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
+ DEBUG(dbgs() << "RP excess top - bot: "
+ << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
+ return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
+ }
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP critical top - bot: "
+ << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
+ << '\n');
+ return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
+ }
+ // Avoid increasing the max pressure of the entire region.
+ if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP current top - bot: "
+ << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
+ << '\n');
+ return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
+ }
+ return false;
+}
+
+#ifndef NDEBUG
+const char *ConvergingScheduler::getReasonStr(
+ ConvergingScheduler::CandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND ";
+ case SingleExcess: return "REG-EXCESS";
+ case SingleCritical: return "REG-CRIT ";
+ case Cluster: return "CLUSTER ";
+ case SingleMax: return "REG-MAX ";
+ case MultiPressure: return "REG-MULTI ";
+ case ResourceReduce: return "RES-REDUCE";
+ case ResourceDemand: return "RES-DEMAND";
+ case TopDepthReduce: return "TOP-DEPTH ";
+ case TopPathReduce: return "TOP-PATH ";
+ case BotHeightReduce:return "BOT-HEIGHT";
+ case BotPathReduce: return "BOT-PATH ";
+ case NextDefUse: return "DEF-USE ";
+ case NodeOrder: return "ORDER ";
+ };
+ llvm_unreachable("Unknown reason!");
+}
+
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
+ PressureElement P;
+ unsigned ResIdx = 0;
+ unsigned Latency = 0;
+ switch (Cand.Reason) {
+ default:
+ break;
+ case SingleExcess:
+ P = Cand.RPDelta.Excess;
+ break;
+ case SingleCritical:
+ P = Cand.RPDelta.CriticalMax;
+ break;
+ case SingleMax:
+ P = Cand.RPDelta.CurrentMax;
+ break;
+ case ResourceReduce:
+ ResIdx = Cand.Policy.ReduceResIdx;
+ break;
+ case ResourceDemand:
+ ResIdx = Cand.Policy.DemandResIdx;
+ break;
+ case TopDepthReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ case TopPathReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotHeightReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotPathReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ }
+ dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ if (P.isValid())
+ dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
+ << ":" << P.UnitIncrease << " ";
+ else
+ dbgs() << " ";
+ if (ResIdx)
+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+ else
+ dbgs() << " ";
+ if (Latency)
+ dbgs() << " " << Latency << " cycles ";
+ else
+ dbgs() << " ";
+ dbgs() << '\n';
+}
+#endif
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ ReadyQueue &Q = Zone.Available;
+
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+
+ SchedCandidate TryCand(Cand.Policy);
+ TryCand.SU = *I;
+ tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(DAG, SchedModel);
+ Cand.setBest(TryCand);
+ DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+ bool IsTop) {
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
+ << " SU(" << Cand.SU->NodeNum << ") "
+ << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ SchedCandidate TopCand(NoPolicy);
+ checkResourceLimits(TopCand, BotCand);
+
+ // Prefer bottom scheduling when heuristics are silent.
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
+ if (TopCand.Reason < BotCand.Reason) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+ }
+ // Check for a salient pressure difference and pick the best from either side.
+ if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate, in node order if all else failed.
+ if (TopCand.Reason < BotCand.Reason) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
+ }
+ SUnit *SU;
+ do {
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate TopCand(NoPolicy);
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ }
+ else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ }
+ else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
+/// it's state based on the current cycle before MachineSchedStrategy does.
+void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+ // Register DAG post-processors.
+ if (EnableLoadCluster)
+ DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+ if (EnableMacroFusion)
+ DAG->addMutation(new MacroFusion(DAG->TII));
+ return DAG;
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+ const SchedDFSResult *DFSResult;
+ const BitVector *ScheduledTrees;
+ bool MaximizeILP;
+
+ ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
+
+ /// \brief Apply a less-than relation on node priority.
+ ///
+ /// (Return true if A comes after B in the Q.)
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+ unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+ if (SchedTreeA != SchedTreeB) {
+ // Unscheduled trees have lower priority.
+ if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+ return ScheduledTrees->test(SchedTreeB);
+
+ // Trees with shallower connections have have lower priority.
+ if (DFSResult->getSubtreeLevel(SchedTreeA)
+ != DFSResult->getSubtreeLevel(SchedTreeB)) {
+ return DFSResult->getSubtreeLevel(SchedTreeA)
+ < DFSResult->getSubtreeLevel(SchedTreeB);
+ }
+ }
+ if (MaximizeILP)
+ return DFSResult->getILP(A) < DFSResult->getILP(B);
+ else
+ return DFSResult->getILP(A) > DFSResult->getILP(B);
+ }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ /// In case all subtrees are eventually connected to a common root through
+ /// data dependence (e.g. reduction), place an upper limit on their size.
+ ///
+ /// FIXME: A subtree limit is generally good, but in the situation commented
+ /// above, where multiple similar subtrees feed a common root, we should
+ /// only split at a point where the resulting subtrees will be balanced.
+ /// (a motivating test case must be found).
+ static const unsigned SubtreeLimit = 16;
+
+ ScheduleDAGMI *DAG;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+public:
+ ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
+
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ DAG->computeDFSResult();
+ Cmp.DFSResult = DAG->getDFSResult();
+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();
+ ReadyQ.clear();
+ }
+
+ virtual void registerRoots() {
+ // Restore the heap in ReadyQ with the updated DFS results.
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ /// Callback to select the highest priority node from the ready Q.
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (ReadyQ.empty()) return NULL;
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
+ << *SU->getInstr()
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+ return SU;
+ }
+
+ /// \brief Scheduler callback to notify that a new subtree is scheduled.
+ virtual void scheduleTree(unsigned SubtreeID) {
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+ /// DFSResults, and resort the priority Q.
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {
+ assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+ }
+
+ virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+ virtual void releaseBottomNode(SUnit *SU) {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ virtual void initialize(ScheduleDAGMI *) {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return NULL;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ }
+ else {
+ do {
+ if (BottomQ.empty()) return NULL;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {}
+
+ virtual void releaseTopNode(SUnit *SU) {
+ TopQ.push(SU);
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ BottomQ.push(SU);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMI.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return false;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+ std::string Str;
+ raw_string_ostream SS(Str);
+ SS << "SU(" << SU->NodeNum << ')';
+ return SS.str();
+ }
+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+ }
+
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ std::string Str("shape=Mrecord");
+ const SchedDFSResult *DFS =
+ static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+ if (DFS) {
+ Str += ",style=filled,fillcolor=\"#";
+ Str += DOT::getColorString(DFS->getSubtreeID(N));
+ Str += '"';
+ }
+ return Str;
+ }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 000000000000..4dafbe5a3e3a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,712 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *LI;
+ AliasAnalysis *AA;
+
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ }
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo);
+
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+ };
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+
+ // Ignore debug uses because debug info doesn't affect the code.
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
+ }
+
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+
+ return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<MachineLoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ CEBCandidates.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&MBB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (MI->isDebugValue())
+ continue;
+
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
+ return 0;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+ }
+
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// collectDebgValues - Scan instructions following MI and collect any
+/// matching DBG_VALUEs.
+static void collectDebugValues(MachineInstr *MI,
+ SmallVector<MachineInstr *, 2> & DbgValues) {
+ DbgValues.clear();
+ if (!MI->getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI->getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI->getOperand(0).getReg())
+ DbgValues.push_back(DI);
+ }
+}
+
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+ // FIXME - Use real post dominator.
+ if (A->succ_size() != 2)
+ return false;
+ MachineBasicBlock::succ_iterator I = A->succ_begin();
+ if (B == *I)
+ ++I;
+ MachineBasicBlock *OtherSuccBlock = *I;
+ if (OtherSuccBlock->succ_size() != 1 ||
+ *(OtherSuccBlock->succ_begin()) != B)
+ return false;
+
+ return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo) {
+ assert (MI && "Invalid MachineInstr!");
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
+ return false;
+
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!isPostDominatedBy(MBB, SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then catch results.
+ if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool &BreakPHIEdge) {
+
+ assert (MI && "Invalid MachineInstr!");
+ assert (MBB && "Invalid MachineBasicBlock!");
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ return NULL;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return NULL;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+ return NULL;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
+ BreakPHIEdge, LocalUse))
+ return NULL;
+
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBlock = *SI;
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
+ BreakPHIEdge, LocalUse)) {
+ SuccToSinkTo = SuccBlock;
+ break;
+ }
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return NULL;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return NULL;
+ else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return NULL;
+ }
+ }
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return NULL;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ return NULL;
+
+ return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ if (SuccToSinkTo->pred_size() > 1) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ bool TryBreak = false;
+ bool store = true;
+ if (!MI->isSafeToMove(TII, AA, store)) {
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
+ }
+
+ // Don't sink instructions into a loop.
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ BreakPHIEdge = false;
+ }
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+ ++InsertPos;
+
+ // collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Move debug values.
+ for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MI->clearKillInfo();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 000000000000..49d8c4e9470d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1290 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST =
+ MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
+ }
+ FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
+ return FBI;
+}
+
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ std::copy(PRCycles.begin(), PRCycles.end(),
+ ProcResourceHeights.begin() + PROffset);
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.isUsefulDominator(TBI))
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Print out resource depths here as well.
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.isUsefulDominator(TBI))
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel
+ .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+ /* FindMin = */ false);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp, false);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(DefOp).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.SchedModel, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K]);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (unsigned I = 0; I != Extrablocks.size(); ++I)
+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 000000000000..4b1230029a74
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1605 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, const char *b) :
+ PASS(pass),
+ Banner(b),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const char *const OutFileName;
+ raw_ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef SmallVector<const uint32_t*, 4> RegMaskVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
+
+ const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
+ RegSet regsLiveInButUnused;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RV.push_back(*SubRegs);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ bool isAllocatable(unsigned Reg) {
+ return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBundleAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
+
+ void verifyInlineAsm(const MachineInstr *MI);
+
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
+
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ MF.verify(this, Banner);
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+ raw_ostream *OutFile = 0;
+ if (OutFileName) {
+ std::string ErrorInfo;
+ OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ raw_fd_ostream::F_Append);
+ if (!ErrorInfo.empty()) {
+ errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ OS = OutFile;
+ } else {
+ OS = &errs();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LiveVars = NULL;
+ LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = 0;
+ // Do we expect the next instruction to be part of the same bundle?
+ bool InBundle = false;
+
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
+
+ // Check for consistent bundle flags.
+ if (InBundle && !MBBI->isBundledWithPred())
+ report("Missing BundledPred flag, "
+ "BundledSucc was set on predecessor", MBBI);
+ if (!InBundle && MBBI->isBundledWithPred())
+ report("BundledPred flag is set, "
+ "but BundledSucc not set on predecessor", MBBI);
+
+ // Is this a bundle header?
+ if (!MBBI->isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = MBBI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", MBBI);
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+
+ // Was this the last bundled instruction?
+ InBundle = MBBI->isBundledWithSucc();
+ }
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ if (InBundle)
+ report("BundledSucc flag set on last instruction in block", &MFI->back());
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFile)
+ delete OutFile;
+ else if (foundErrors)
+ report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regMasks.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return false; // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ *OS << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getName() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (const void*)MBB << ')';
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
+ MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = MRI->getReservedRegs();
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
+ regsReserved.set(*SubRegs);
+ }
+ }
+
+ markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = 0;
+
+ if (MRI->isSSA()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end();
+ LI != LE; ++LI) {
+ unsigned reg = *LI;
+ if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ MBB != MBB->getParent()->begin()) {
+ report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ }
+ }
+ }
+
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+ TBB, FBB, Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(MBBI)) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() &&
+ !TII->isPredicated(getBundleStart(&MBB->back()))) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(TBB)) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } else if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ regsLive.insert(I);
+ for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+}
+
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+ // The first two operands on INLINEASM are the asm string and global flags.
+ if (MI->getNumOperands() < 2) {
+ report("Too few operands on inline asm", MI);
+ return;
+ }
+ if (!MI->getOperand(0).isSymbol())
+ report("Asm string must be an external symbol", MI);
+ if (!MI->getOperand(1).isImm())
+ report("Asm flags must be an immediate", MI);
+ // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
+ if (!isUInt<5>(MI->getOperand(1).getImm()))
+ report("Unknown asm flags", &MI->getOperand(1), 1);
+
+ assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ unsigned NumOps;
+ for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // There may be implicit ops after the fixed operands.
+ if (!MO.isImm())
+ break;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ }
+
+ if (OpNo > MI->getNumOperands())
+ report("Missing operands in last group", MI);
+
+ // An optional MDNode follows the groups.
+ if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+ ++OpNo;
+
+ // All trailing operands must be implicit registers.
+ for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isReg() || !MO.isImplicit())
+ report("Expected implicit register after groups", &MO, OpNo);
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < MCID.getNumDefs()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() &&
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ if (!MO->isReg())
+ report("Tied use must be a register", MO, MONum);
+ else if (!MO->isTied())
+ report("Operand should be tied", MO, MONum);
+ else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+ report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ } else if (MO->isReg() && MO->isTied())
+ report("Explicit operand should not be tied", MO, MONum);
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+ if (MRI->tracksLiveness() && !MI->isDebugValue())
+ checkLiveness(MO, MONum);
+
+ // Verify the consistency of tied operands.
+ if (MO->isTied()) {
+ unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+ const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+ if (!OtherMO.isReg())
+ report("Must be tied to a register", MO, MONum);
+ if (!OtherMO.isTied())
+ report("Missing tie flags on tied operand", MO, MONum);
+ if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+ report("Inconsistent tie links", MO, MONum);
+ if (MONum < MCID.getNumDefs()) {
+ if (OtherIdx < MCID.getNumOperands()) {
+ if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+ report("Explicit def tied to explicit use without tie constraint",
+ MO, MONum);
+ } else {
+ if (!OtherMO.isImplicit())
+ report("Explicit def should be tied to implicit use", MO, MONum);
+ }
+ }
+ }
+
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
+
+ // Check register classes.
+ if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (!DRC->contains(Reg)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(Reg) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const unsigned Reg = MO->getReg();
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ regsLiveInButUnused.erase(Reg);
+
+ if (MO->isKill())
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
+ }
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ MachineOperand::clobbersPhysReg(Mask, *I))
+ regsDead.push_back(*I);
+ }
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ seen.clear();
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+ }
+
+ // Now check liveness info if available
+ calcRegsRequired();
+
+ // Check for killed virtual registers that should be live out.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ if (MInfo.regsKilled.count(*I)) {
+ report("Virtual register killed in block, but needed live out.", MFI);
+ *OS << "Virtual register " << PrintReg(*I)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ report("Virtual register def doesn't dominate all uses.",
+ MRI->getVRegDef(*I));
+ }
+
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
+ continue;
+ }
+
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
+
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
+
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
+
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
+
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
+
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
+
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
+ }
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
+
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
+
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
+
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
+ }
+
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
+
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 000000000000..48db200c513c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+ class OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 000000000000..3982612e8c11
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,193 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+ typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false)
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getTarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= OptimizeBB(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+ E = MRI->use_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ unsigned OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+ PI != PE; ++PI) {
+ MachineInstr *PhiMI = *PI;
+ if (&*MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 000000000000..5584708eae36
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,644 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden, cl::desc("Split all critical edges during "
+ "PHI elimination"));
+
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
+STATISTIC(NumLowered, "Number of phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+
+ bool Changed = false;
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ // Split critical edges to help the coalescer. This does not yet support
+ // updating LiveIntervals, so we disable it.
+ if (!DisableEdgeSplitting && (LV || LIS)) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(MF, *I, MLI);
+ }
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(MF);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(MF, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(DefReg)) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+ I != E; ++I) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(I->first);
+ MF.DeleteMachineInstr(I->first);
+ }
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+
+ while (MBB.front().isPHI())
+ LowerPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg),
+ DE = MRI->def_end(); DI != DE; ++DI)
+ if (!DI->isImplicitDef())
+ return false;
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+ return false;
+ return true;
+}
+
+
+/// LowerPHINode - Lower the PHI node at the top of the specified block,
+///
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ ++NumLowered;
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+ } else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
+ }
+
+ // Update live variable information if there is any.
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ LV->setPHIJoin(IncomingReg);
+
+ // When we are reusing the incoming register, it may already have been
+ // killed in this block. The old kill will also have been inserted at
+ // AfterPHIsIt, so it appears before the current PHICopy.
+ if (reusedIncoming)
+ if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+ DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+ DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Update LiveIntervals for the new copy or implicit def.
+ if (LIS) {
+ MachineInstr *NewInstr = prior(AfterPHIsIt);
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
+
+ SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+ if (IncomingReg) {
+ // Add the region from the beginning of MBB to the copy instruction to
+ // IncomingReg's live interval.
+ LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg);
+ VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+ if (!IncomingVNI)
+ IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+ LIS->getVNInfoAllocator());
+ IncomingLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
+ }
+
+ LiveInterval &DestLI = LIS->getInterval(DestReg);
+ assert(DestLI.begin() != DestLI.end() &&
+ "PHIs should have nonempty LiveIntervals.");
+ if (DestLI.endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ DestLI.removeValNo(OrigDestVNI);
+ } else {
+ // Otherwise, remove the region from the beginning of MBB to the copy
+ // instruction from DestReg's live interval.
+ DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI && "PHI destination should be live at its definition.");
+ DestVNI->def = DestCopyIndex.getRegSlot();
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, MRI);
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+ // Insert the copy.
+ MachineInstr *NewSrcInstr = 0;
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF),
+ IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
+ }
+ }
+
+ // We only need to update the LiveVariables kill of SrcReg if this was the
+ // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+ // out of the predecessor. We can also ignore undef sources.
+ if (LV && !SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+ !LV->isLiveOut(SrcReg, opBlock)) {
+ // We want to be able to insert a kill of the register if this PHI (aka,
+ // the copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value
+ // is live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Okay, if we now know that the value is not live out of the block, we
+ // can add a kill marker in this block saying that it kills the incoming
+ // value!
+
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+
+ if (LIS) {
+ if (NewSrcInstr) {
+ LIS->InsertMachineInstrInMaps(NewSrcInstr);
+ LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr);
+ }
+
+ if (!SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+ LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ SE = opBlock.succ_end(); SI != SE; ++SI) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+ VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+ // Definitions by other PHIs are not truly live-in for our purposes.
+ if (VNI && VNI->def != startIdx) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (!isLiveOut) {
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't just insert a copy.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) &&
+ "Cannot find kill instruction");
+
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
+ SrcLI.removeRange(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
+ }
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MPhi);
+ MF.DeleteMachineInstr(MPhi);
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+ BBI->getOperand(i).getReg())];
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB == &MBB && !SplitAllCriticalEdges)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+ continue;
+ }
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ return Changed;
+}
+
+bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveIn() requires either LiveVariables or LiveIntervals");
+ if (LIS)
+ return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+ else
+ return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+ // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+ // so that a register used only in a PHI is not live out of the block. In
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+ // in the predecessor basic block, so that a register used only in a PHI is live
+ // out of the block.
+ if (LIS) {
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (LI.liveAt(LIS->getMBBStartIdx(*SI)))
+ return true;
+ }
+ return false;
+ } else {
+ return LV->isLiveOut(Reg, *MBB);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..e1b56e962fa9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
new file mode 100644
index 000000000000..1af65c88abeb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -0,0 +1,746 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+ cl::desc("Disable Early If-conversion"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+ cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+ cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<std::string>
+PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
+ cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"));
+
+// Experimental option to run live interval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+ if (Override)
+ return 0;
+ return PassID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+ AnalysisID StandardID) {
+ switch (Override) {
+ case cl::BOU_UNSET:
+ return TargetID;
+ case cl::BOU_TRUE:
+ if (TargetID)
+ return TargetID;
+ if (StandardID == 0)
+ report_fatal_error("Target cannot enable pass");
+ return StandardID;
+ case cl::BOU_FALSE:
+ return 0;
+ }
+ llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRA);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableBlockPlacement);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, DisableEarlyIfConversion);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &MachineSchedulerID)
+ return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+ if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
+//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
+ DenseMap<AnalysisID,AnalysisID> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+ : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0),
+ Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false),
+ DisableVerify(false),
+ EnableTailMerge(true) {
+
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Substitute Pseudo Pass IDs for real ones.
+ substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
+ substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ // Temporarily disable experimental passes.
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enableMachineScheduler())
+ disablePass(&MachineSchedulerID);
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ AnalysisID InsertedPassID) {
+ assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
+ std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ Impl->InsertedPasses.push_back(P);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID), PM(0) {
+ llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ AnalysisID TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, AnalysisID>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (Started && !Stopped)
+ PM->add(P);
+ if (StopAfter == PassID)
+ Stopped = true;
+ if (StartAfter == PassID)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+ AnalysisID TargetID = getPassSubstitution(PassID);
+ AnalysisID FinalID = overridePass(PassID, TargetID);
+ if (FinalID == 0)
+ return FinalID;
+
+ Pass *P = Pass::createPass(FinalID);
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ addPass(P);
+ // Add the passes after the pass P if there is any.
+ for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
+ I != E; ++I) {
+ if ((*I).first == PassID) {
+ assert((*I).second && "Illegal Pass ID!");
+ Pass *NP = Pass::createPass((*I).second);
+ assert(NP && "Pass ID not registered");
+ addPass(NP);
+ }
+ }
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) {
+ if (TM->shouldPrintMachineCode())
+ addPass(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ addPass(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+ addPass(createLoopStrengthReducePass());
+ if (PrintLSR)
+ addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ addPass(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(createUnreachableBlockEliminationPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ addPass(createCodeGenPreparePass(getTargetLowering()));
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ addPass(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel();
+
+ if (PrintISelInput)
+ addPass(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ // Insert a machine instr printer pass after the specified pass.
+ // If -print-machineinstrs specified, print machineinstrs after all passes.
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+ else if (!StringRef(PrintMachineInstrs.getValue())
+ .equals("option-unspecified")) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
+ const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
+ assert (TPI && IPI && "Pass ID not registered!");
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc())
+ printAndVerify("After PreRegAlloc passes");
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc(createRegAllocPass(true));
+ else
+ addFastRegAlloc(createRegAllocPass(false));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc())
+ printAndVerify("After PostRegAlloc passes");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ addPass(&PrologEpilogCodeInserterID);
+ printAndVerify("After PrologEpilogCodeInserter");
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(&ExpandPostRAPseudosID);
+ printAndVerify("After ExpandPostRAPseudos");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2())
+ printAndVerify("After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostRASchedulerID);
+ printAndVerify("After PostRAScheduler");
+ }
+
+ // GC
+ if (addGCPasses()) {
+ if (PrintGCInfo)
+ addPass(createGCInfoPrinter(dbgs()));
+ }
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ if (addPreEmitPass())
+ printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(&EarlyTailDuplicateID))
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+ addPass(&MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(&PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ useDefaultRegisterAllocator);
+
+/// -regalloc=... command line option.
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+///
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ // Initialize the global default.
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
+
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&ProcessImplicitDefsID);
+
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ addPass(&LiveVariablesID);
+
+ // Add passes that move from transformed SSA into conventional SSA. This is a
+ // "copy coalescing" problem.
+ //
+ if (!EnableStrongPHIElim) {
+ // Edge splitting is smarter with machine loop info.
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
+ }
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
+ addPass(&TwoAddressInstructionPassID);
+
+ if (EnableStrongPHIElim)
+ addPass(&StrongPHIEliminationID);
+
+ addPass(&RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(&MachineSchedulerID))
+ printAndVerify("After Machine Scheduling");
+
+ // Add the selected register allocation pass.
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation, before rewriter");
+
+ // Allow targets to change the register assignments before rewriting.
+ if (addPreRewrite())
+ printAndVerify("After pre-rewrite passes");
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ printAndVerify("After Virtual Register Rewriter");
+
+ // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+ // but eventually, all users of it should probably be moved to addPostRA and
+ // it can go away. Currently, it's the intended place for targets to run
+ // FinalizeMachineBundles, because passes other than MachineScheduling an
+ // RegAlloc itself may not be aware of bundles.
+ if (addFinalizeRegAlloc())
+ printAndVerify("After RegAlloc finalization");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (addPass(&BranchFolderPassID))
+ printAndVerify("After BranchFolding");
+
+ // Tail duplication.
+ if (addPass(&TailDuplicateID))
+ printAndVerify("After TailDuplicate");
+
+ // Copy propagation.
+ if (addPass(&MachineCopyPropagationID))
+ printAndVerify("After copy propagation pass");
+}
+
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+ addPass(&GCMachineCodeAnalysisID);
+ return true;
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ if (addPass(&MachineBlockPlacementID)) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(&MachineBlockPlacementStatsID);
+
+ printAndVerify("After machine block placement.");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..a7439b5129b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,577 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+// v1 = bitcast v0
+// v2 = bitcast v1
+// = v2
+// =>
+// v1 = bitcast v0
+// = v0
+//
+// - Optimize Loads:
+//
+// Loads that can be folded into a later instruction. A load is foldable
+// if it loads to virtual registers and the virtual register defined has
+// a single use.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
+
+namespace {
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+
+/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ if (MRI->hasOneNonDBGUse(SrcReg))
+ // No other uses.
+ return false;
+
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx = TM->getRegisterInfo()->
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed) {
+ MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+/// %vreg0<def> = VMOVSR %vreg1
+/// %vreg3<def> = VMOVRS %vreg0
+/// Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+
+ unsigned Def = 0;
+ unsigned Src = 0;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Def = Reg;
+ else if (Src)
+ // Multiple sources?
+ return false;
+ else
+ Src = Reg;
+ }
+
+ assert(Def && Src && "Malformed bitcast instruction!");
+
+ MachineInstr *DefMI = MRI->getVRegDef(Src);
+ if (!DefMI || !DefMI->isBitcast())
+ return false;
+
+ unsigned SrcSrc = 0;
+ NumDefs = DefMI->getDesc().getNumDefs();
+ NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef()) {
+ if (SrcSrc)
+ // Multiple sources?
+ return false;
+ else
+ SrcSrc = Reg;
+ }
+ }
+
+ if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+ return false;
+
+ MRI->replaceRegWith(Def, SrcSrc);
+ MRI->clearKillFlags(SrcSrc);
+ MI->eraseFromParent();
+ ++NumBitcasts;
+ return true;
+}
+
+/// optimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg, SrcReg2;
+ int CmpMask, CmpValue;
+ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
+ return false;
+
+ // Attempt to optimize the comparison instruction.
+ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ ++NumCmps;
+ return true;
+ }
+
+ return false;
+}
+
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MI->isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// foldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+ DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
+ if (DisablePeephole)
+ return false;
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
+ LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
+
+ for (MachineBasicBlock::iterator
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
+ LocalMIs.insert(MI);
+
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ FoldAsLoadDefReg = 0;
+ continue;
+ }
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
+
+ if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
+ } else {
+ Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
+ if (SeenMoveImm)
+ Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ }
+
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ DEBUG(dbgs() << "Replacing: " << *MI);
+ DEBUG(dbgs() << " With: " << *FoldMI);
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 000000000000..53fe273a1032
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,776 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "llvm/CodeGen/Passes.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// LiveRegs - true if the register is live.
+ BitVector LiveRegs;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ public:
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo&,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
+
+ ~SchedulePostRATDList();
+
+ /// startBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void startBlock(MachineBasicBlock *BB);
+
+ /// Initialize the scheduler state for the next scheduling region.
+ virtual void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ virtual void exitRegion();
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void schedule();
+
+ void EmitSchedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// finishBlock - Clean up register live-range state.
+ ///
+ void finishBlock();
+
+ /// FixupKills - Fix register kill flags that have been made
+ /// invalid due to scheduling
+ ///
+ void FixupKills(MachineBasicBlock *MBB);
+
+ private:
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ void StartBlockForKills(MachineBasicBlock *BB);
+
+ // ToggleKillFlag - Toggle a register operand kill flag. Other
+ // adjustments may be made to the instruction if necessary. Return
+ // true if the operand has been deleted, false if not.
+ bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ void dumpSchedule() const;
+ };
+}
+
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+ "Post RA top-down list latency scheduler", false, false)
+
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
+ LiveRegs(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
+ RegClassInfo.runOnMachineFunction(Fn);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
+ if (EnablePostRAScheduler.getPosition() > 0) {
+ if (!EnablePostRAScheduler)
+ return false;
+ } else {
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+ CriticalPathRCs))
+ return false;
+ }
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
+ }
+
+ DEBUG(dbgs() << "PostRAScheduler\n");
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getName()
+ << ":BB#" << MBB->getNumber() << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.startBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = llvm::prior(I);
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ if (MI->isBundle())
+ Count -= MI->getBundleSize();
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.finishBlock();
+
+ // Update register kills
+ Scheduler.FixupKills(MBB);
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::startBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+
+ if (AntiDepBreak != NULL) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->Observe(MI, Count, EndIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::finishBlock() {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+ // Start with no live registers.
+ LiveRegs.reset();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ LiveRegs.set(Reg);
+ // Repeat, for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+ MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (LiveRegs.test(MO.getReg())) {
+ MO.setIsKill(false);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ MachineInstrBuilder MIB(MF, MI);
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MIB.addReg(*SubRegs, RegState::ImplicitDefine);
+ AllDead = false;
+ }
+ }
+
+ if(AllDead)
+ MO.setIsKill(true);
+ return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ BitVector killedRegs(TRI->getNumRegs());
+
+ StartBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ LiveRegs.reset(Reg);
+
+ // Repeat for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
+ }
+
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use.
+ killedRegs.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ bool kill = false;
+ if (!killedRegs.test(Reg)) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ kill = false;
+ break;
+ }
+ }
+
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = !LiveRegs.test(Reg);
+ }
+
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: ToggleKillFlag may invalidate MO.
+ ToggleKillFlag(MI, MO);
+ DEBUG(MI->dump());
+ }
+
+ killedRegs.set(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ LiveRegs.set(Reg);
+
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.scheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+ SUnit *FoundSUnit = 0;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ } else {
+ if (CycleHasInsts) {
+ DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = prior(RegionEnd);
+ }
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+}
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 000000000000..e4e18c3bb54b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,170 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+};
+} // end anonymous namespace
+
+char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AliasAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
+ return false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse() && MO->readsReg())
+ return false;
+ return true;
+}
+
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ DEBUG(dbgs() << "Processing " << *MI);
+ unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // For virtual regiusters, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &MO = UI.getOperand();
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
+ continue;
+ DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
+ }
+ MI->eraseFromParent();
+ return;
+ }
+
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MIOperands MO(UserMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned UserReg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ !TRI->regsOverlap(Reg, UserReg))
+ continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO->isUse())
+ MO->setIsUndef();
+ }
+ if (Found)
+ break;
+ }
+
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->RemoveOperand(i);
+ DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ bool Changed = false;
+
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ // Scan the basic block for implicit defs.
+ for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
+ if (MBBI->isImplicitDef())
+ WorkList.insert(MBBI);
+
+ if (WorkList.empty())
+ continue;
+
+ DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 000000000000..e5872df731a0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,892 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pei"
+#include "PrologEpilogInserter.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
+
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
+ assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallsInformation(Fn);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill code
+ // for any callee saved registers that are modified.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - With shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used.
+ // - Without shink wrapping (default), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimiation
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(Fn);
+
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
+ const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+
+ // Get the callee saved register list...
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers.
+ if (CSRegs == 0 || CSRegs[0] == 0)
+ return;
+
+ // In Naked functions we aren't going to save any registers.
+ if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ return;
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (F.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ if (!ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MBB, I, Reg,
+ true,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn)) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+ !RegInfo->useFPForScavengingIndex(Fn))) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ // Add prologue to the function...
+ TFI.emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().isReturn())
+ TFI.emitEpilogue(Fn, *I);
+ }
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (Fn.getTarget().Options.EnableSegmentedStacks)
+ TFI.adjustForSegmentedStacks(Fn);
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ TFI.adjustForHiPEPrologue(Fn);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+ int SPAdjCount = 0; // frame setup / destroy count.
+#endif
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+ // Track whether we see even pairs of them
+ SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = llvm::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? NULL : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+
+ // If we have evenly matched pairs of frame setup / destroy instructions,
+ // make sure the adjustments come out to zero. If we don't have matched
+ // pairs, we can't be sure the missing bit isn't in another basic block
+ // due to a custom inserter playing tricks, so just asserting SPAdj==0
+ // isn't sufficient. See tMOVCC on Thumb1, for example.
+ assert((SPAdjCount || SPAdj == 0) &&
+ "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(BB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = llvm::next(I);
+ MachineBasicBlock::iterator P = I == BB->begin() ?
+ MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert (ScratchReg && "Missing scratch register!");
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setUsed(ScratchReg);
+ }
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != llvm::prior(J)) {
+ BB->splice(J, BB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+
+ // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
+ } else
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 000000000000..87fff9afb309
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,173 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the curren function.
+ bool FrameIndexVirtualScavenging;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 000000000000..85649111d7f1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,132 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+namespace {
+struct PSVGlobalsTy {
+ // PseudoSourceValues are immutable so don't need locking.
+ const PseudoSourceValue PSVs[4];
+ sys::Mutex Lock; // Guards FSValues, but not the values inside it.
+ std::map<int, const PseudoSourceValue *> FSValues;
+
+ PSVGlobalsTy() : PSVs() {}
+ ~PSVGlobalsTy() {
+ for (std::map<int, const PseudoSourceValue *>::iterator
+ I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+ delete I->second;
+ }
+ }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+} // anonymous namespace
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &PSVGlobals->PSVs[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &PSVGlobals->PSVs[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &PSVGlobals->PSVs[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &PSVGlobals->PSVs[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static. For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+ Value(Type::getInt8PtrTy(getGlobalContext()),
+ Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ O << PSVNames[this - PSVGlobals->PSVs];
+}
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ PSVGlobalsTy &PG = *PSVGlobals;
+ sys::ScopedLock locked(PG.Lock);
+ const PseudoSourceValue *&V = PG.FSValues[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (this == getStack() ||
+ this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ // Negative frame indices are used for special things that don't
+ // appear in LLVM IR. Non-negative indices may be used for things
+ // like static allocas.
+ if (!MFI)
+ return FI >= 0;
+ // Spill slots should not alias others.
+ return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..c0355903574f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,145 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+void RegAllocBase::init(VirtRegMap &vrm,
+ LiveIntervals &lis,
+ LiveRegMatrix &mat) {
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ Matrix = &mat;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
+ }
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ Matrix->invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ Matrix->assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..064e40f06b7b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,108 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class LiveRegMatrix;
+class Spiller;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+protected:
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
+ RegisterClassInfo RegClassInfo;
+
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueue(LiveInterval *LI) = 0;
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..0b6dc68cdf09
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,293 @@
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "AllocationOrder.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(LiveInterval *A, LiveInterval *B) const {
+ return A->weight < B->weight;
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+ CompSpillWeight> Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual void enqueue(LiveInterval *LI) {
+ Queue.push(LI);
+ }
+
+ virtual LiveInterval *dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+}
+
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ SmallVector<LiveInterval*, 8> Intfs;
+
+ // Collect interferences assigned to any alias of the physical register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
+ return false;
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ return false;
+ Intfs.push_back(Intf);
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(!Intfs.empty() && "expected interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+ }
+ return true;
+}
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ while (unsigned PhysReg = Order.next()) {
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
+ return PhysReg;
+
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
+ PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
+ }
+ }
+
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ continue;
+
+ assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << mf.getName() << '\n');
+
+ MF = &mf;
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ releaseMemory();
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..bb9c05c5f42d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1117 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+ fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+ class RAFast : public MachineFunctionPass {
+ public:
+ static char ID;
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+ isBulkSpilling(false) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ // Basic block currently being allocated.
+ MachineBasicBlock *MBB;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse; // Last instr to use reg.
+ unsigned VirtReg; // Virtual register number.
+ unsigned PhysReg; // Currently held here.
+ unsigned short LastOpNum; // OpNum on LastUse.
+ bool Dirty; // Register needs spill.
+
+ explicit LiveReg(unsigned v)
+ : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+ unsigned getSparseSetIndex() const {
+ return TargetRegisterInfo::virtReg2Index(VirtReg);
+ }
+ };
+
+ typedef SparseSet<LiveReg> LiveRegMap;
+
+ // LiveVirtRegs - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+
+ // RegState - Track the state of a physical register.
+ enum RegState {
+ // A disabled register is not available for allocation, but an alias may
+ // be in use. A register can only be moved out of the disabled state if
+ // all aliases are disabled.
+ regDisabled,
+
+ // A free register is not currently in use and can be allocated
+ // immediately without checking aliases.
+ regFree,
+
+ // A reserved register has been assigned explicitly (e.g., setting up a
+ // call parameter), and it remains reserved until it is used.
+ regReserved
+
+ // A register state may also be a virtual register number, indication that
+ // the physical register is currently allocated to a virtual register. In
+ // that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ // PhysRegState - One of the RegState enums, or a virtreg.
+ std::vector<unsigned> PhysRegState;
+
+ // Set of register units.
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // Set of register units that are used in the current instruction, and so
+ // cannot be allocated.
+ UsedInInstrSet UsedInInstr;
+
+ // Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(unsigned PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.insert(*Units);
+ }
+
+ // Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(unsigned PhysReg) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (UsedInInstr.count(*Units))
+ return true;
+ return false;
+ }
+
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
+ SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
+
+ // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+ // completely after spilling all live registers. LiveRegMap entries should
+ // not be erased.
+ bool isBulkSpilling;
+
+ enum {
+ spillClean = 1,
+ spillDirty = 100,
+ spillImpossible = ~0u
+ };
+ public:
+ virtual const char *getPassName() const {
+ return "Fast Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool runOnMachineFunction(MachineFunction &Fn);
+ void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+ bool isLastUseOfLocalReg(MachineOperand&);
+
+ void addKillFlag(const LiveReg&);
+ void killVirtReg(LiveRegMap::iterator);
+ void killVirtReg(unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+ void usePhysReg(MachineOperand&);
+ void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+ unsigned calcSpillCost(unsigned PhysReg) const;
+ void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+ LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+ unsigned Hint);
+ LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ void spillAll(MachineBasicBlock::iterator MI);
+ bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ };
+ char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
+
+ // Check that the use/def chain has exactly one operand - MO.
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ else
+ LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+ addKillFlag(*LRI);
+ assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+ "Broken RegState mapping");
+ PhysRegState[LRI->PhysReg] = regFree;
+ // Erase from LiveVirtRegs unless we're spilling in bulk.
+ if (!isBulkSpilling)
+ LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end())
+ killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+ spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
+ LiveReg &LR = *LRI;
+ assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = LR.LastUse != MI;
+ LR.Dirty = false;
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ int FI = getStackSpaceFor(LRI->VirtReg, RC);
+ DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ ++NumStores; // Update statistics
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVector<MachineInstr *, 4> &LRIDbgValues =
+ LiveDbgValueMap[LRI->VirtReg];
+ for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+ MachineInstr *DBG = LRIDbgValues[li];
+ const MDNode *MDPtr =
+ DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+ int64_t Offset = 0;
+ if (DBG->getOperand(1).isImm())
+ Offset = DBG->getOperand(1).getImm();
+ DebugLoc DL;
+ if (MI == MBB->end()) {
+ // If MI is at basic block end then use last instruction's location.
+ MachineBasicBlock::iterator EI = MI;
+ DL = (--EI)->getDebugLoc();
+ }
+ else
+ DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+ MachineBasicBlock *MBB = DBG->getParent();
+ MBB->insert(MI, NewDV);
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ }
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgValues.clear();
+ if (SpillKill)
+ LR.LastUse = 0; // Don't kill register again
+ }
+ killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
+ if (LiveVirtRegs.empty()) return;
+ isBulkSpilling = true;
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+ i != e; ++i)
+ spillVirtReg(MI, i);
+ LiveVirtRegs.clear();
+ isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+ markRegUsedInInstr(PhysReg);
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ // Fall through
+ case regFree:
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
+
+ // Maybe a superregister is reserved?
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ assert(TRI->isSuperRegister(PhysReg, Alias) &&
+ "Instruction is not using a subregister of a reserved register");
+ // Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ PhysRegState[Alias] = regDisabled;
+ break;
+ default:
+ llvm_unreachable("Instruction uses an alias of an allocated register");
+ }
+ }
+
+ // All aliases are disabled, bring register into working set.
+ PhysRegState[PhysReg] = regFree;
+ MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+ RegState NewState) {
+ markRegUsedInInstr(PhysReg);
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[PhysReg] = NewState;
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ PhysRegState[PhysReg] = NewState;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[Alias] = regDisabled;
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
+ break;
+ }
+ }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+ if (isRegUsedInInstr(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+ return spillImpossible;
+ }
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+ << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ return I->Dirty ? spillDirty : spillClean;
+ }
+ }
+
+ // This is a disabled register, add up cost of aliases.
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regFree:
+ ++Cost;
+ break;
+ case regReserved:
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ Cost += I->Dirty ? spillDirty : spillClean;
+ break;
+ }
+ }
+ }
+ return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
+ PhysRegState[PhysReg] = LR.VirtReg;
+ assert(!LR.PhysReg && "Already assigned a physreg");
+ LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+ LiveRegMap::iterator LRI,
+ unsigned Hint) {
+ const unsigned VirtReg = LRI->VirtReg;
+
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // Ignore invalid hints.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
+ Hint = 0;
+
+ // Take hint when possible.
+ if (Hint) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint);
+ if (Cost < spillDirty) {
+ if (Cost)
+ definePhysReg(MI, Hint, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, Hint);
+ }
+ }
+
+ ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
+
+ // First try to find a completely free register.
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned PhysReg = *I;
+ if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
+
+ unsigned BestReg = 0, BestCost = spillImpossible;
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned Cost = calcSpillCost(*I);
+ DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+ // Cost is 0 when all aliases are already disabled.
+ if (Cost == 0) {
+ assignVirtToPhysReg(*LRI, *I);
+ return LRI;
+ }
+ if (Cost < BestCost)
+ BestReg = *I, BestCost = Cost;
+ }
+
+ if (BestReg) {
+ definePhysReg(MI, BestReg, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, BestReg);
+ }
+
+ // Nothing we can do. Report an error and keep going with a bad allocation.
+ MI->emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AO.begin(), regFree);
+ return assignVirtToPhysReg(VirtReg, *AO.begin());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ if (New) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+ // It's a copy, use the destination register as a hint.
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ }
+ LRI = allocVirtReg(MI, LRI, Hint);
+ } else if (LRI->LastUse) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (New) {
+ LRI = allocVirtReg(MI, LRI, Hint);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
+ ++NumLoads;
+ } else if (LRI->Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
+ } else if (MO.isKill()) {
+ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR %x<kill>, %x
+ // This would cause a second reload of %x into a different register.
+ DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+ MachineOperand &MO = MI->getOperand(OpNum);
+ bool Dead = MO.isDead();
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ return MO.isKill() || Dead;
+ }
+
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI->addRegisterKilled(PhysReg, TRI, true);
+ return true;
+ }
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef())
+ MI->addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg))
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ markRegUsedInInstr(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->PhysReg);
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.clear();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ << " as used in instr\n");
+ markRegUsedInInstr(Reg);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ markRegUsedInInstr(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
+
+ MachineBasicBlock::iterator MII = MBB->begin();
+
+ // Add live-in registers as live.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ if (MRI->isAllocatable(*I))
+ definePhysReg(MII, *I, regReserved);
+
+ SmallVector<unsigned, 8> VirtDead;
+ SmallVector<MachineInstr*, 32> Coalesced;
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB->end()) {
+ MachineInstr *MI = MII++;
+ const MCInstrDesc &MCID = MI->getDesc();
+ DEBUG({
+ dbgs() << "\n>> " << *MI << "Regs:";
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << TRI->getName(Reg);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default: {
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
+ dbgs() << "*";
+ assert(I->PhysReg == Reg && "Bad inverse map");
+ break;
+ }
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ }
+ });
+
+ // Debug values are not allowed to change codegen in any way.
+ if (MI->isDebugValue()) {
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->PhysReg);
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1) {
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ int64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ } else {
+ // We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ }
+ }
+ LiveDbgValueMap[Reg].push_back(MI);
+ }
+ }
+ // Next instruction.
+ continue;
+ }
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.clear();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned i = 0; i != VirtOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+ unsigned PhysReg = LRI->PhysReg;
+ CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, i, PhysReg))
+ killVirtReg(LRI);
+ }
+ }
+
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setRegUnitUsed(*I);
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+ markRegUsedInInstr(Reg);
+ }
+ }
+
+ unsigned DefOpEnd = MI->getNumOperands();
+ if (MI->isCall()) {
+ // Spill all virtregs before a call. This serves two purposes: 1. If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
+ DefOpEnd = VirtOpEnd;
+ DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&MCID);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned i = 0; i != DefOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!MRI->isAllocatable(Reg)) continue;
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ continue;
+ }
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDst = 0; // cancel coalescing;
+ } else
+ CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+ killVirtReg(VirtDead[i]);
+ VirtDead.clear();
+
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setRegUnitUsed(*I);
+
+ if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << *MI);
+ Coalesced.push_back(MI);
+ } else {
+ DEBUG(dbgs() << "<< " << *MI);
+ }
+ }
+
+ // Spill all physical registers holding virtual registers now.
+ DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB->getFirstTerminator());
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+ MBB->erase(Coalesced[i]);
+ NumCopies += Coalesced.size();
+
+ DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+ MF = &Fn;
+ MRI = &MF->getRegInfo();
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI->freezeReservedRegs(Fn);
+ RegClassInfo.runOnMachineFunction(Fn);
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegUnits());
+
+ assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+ MBBi != MBBe; ++MBBi) {
+ MBB = &*MBBi;
+ AllocateBasicBlock();
+ }
+
+ // Add the clobber lists for all the instructions we skipped earlier.
+ for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
+ I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+ if (const uint16_t *Defs = (*I)->getImplicitDefs())
+ while (*Defs)
+ MRI->setPhysRegUsed(*Defs++);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+
+ SkippedInstrs.clear();
+ StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+ return new RAFast();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..6d84176af261
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1791 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "SpillPlacement.h"
+#include "Spiller.h"
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Partition));
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+ unsigned NextCascade;
+
+ // Live ranges pass through a number of stages as we try to allocate them.
+ // Some of the stages may also create new live ranges:
+ //
+ // - Region splitting.
+ // - Per-block splitting.
+ // - Local splitting.
+ // - Spilling.
+ //
+ // Ranges produced by one of the stages skip the previous stages when they are
+ // dequeued. This improves performance because we can skip interference checks
+ // that are unlikely to give any results. It also guarantees that the live
+ // range splitting algorithm terminates, something that is otherwise hard to
+ // ensure.
+ enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+ };
+
+ static const char *const StageName[];
+
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage;
+
+ // Cascade - Eviction loop prevention. See canEvictInterference().
+ unsigned Cascade;
+
+ RegInfo() : Stage(RS_New), Cascade(0) {}
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return ExtraRegInfo[VirtReg.reg].Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ }
+
+ template<typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ for (;Begin != End; ++Begin) {
+ unsigned Reg = (*Begin)->reg;
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = NewStage;
+ }
+ }
+
+ /// Cost of evicting interference.
+ struct EvictionCost {
+ unsigned BrokenHints; ///< Total number of broken hints.
+ float MaxWeight; ///< Maximum spill weight evicted.
+
+ EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+ bool operator<(const EvictionCost &O) const {
+ if (BrokenHints != O.BrokenHints)
+ return BrokenHints < O.BrokenHints;
+ return MaxWeight < O.MaxWeight;
+ }
+ };
+
+ // splitting state.
+ std::auto_ptr<SplitAnalysis> SA;
+ std::auto_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ unsigned PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, unsigned Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[i] = C for every live bundle where B[i] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (int i = LiveBundles.find_first(); i >= 0;
+ i = LiveBundles.find_next(i))
+ if (B[i] == NoCand) {
+ B[i] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+ virtual Spiller &spiller() { return *SpillerInstance; }
+ virtual void enqueue(LiveInterval *LI);
+ virtual LiveInterval *dequeue();
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ bool LRE_CanEraseVirtReg(unsigned);
+ void LRE_WillShrinkVirtReg(unsigned);
+ void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+ float calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, float&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand);
+ float calcGlobalSplitCost(GlobalSplitCandidate&);
+ bool calcCompactRegion(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ void evictInterference(LiveInterval&, unsigned,
+ SmallVectorImpl<LiveInterval*>&);
+
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Done"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LIS->getInterval(VirtReg));
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!ExtraRegInfo.inBounds(Old))
+ return;
+
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
+ // same stage as the parent.
+ ExtraRegInfo[Old].Stage = RS_Assign;
+ ExtraRegInfo.grow(New);
+ ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ ExtraRegInfo.clear();
+ GlobalCand.clear();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const unsigned Size = LI->getSize();
+ const unsigned Reg = LI->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only enqueue virtual registers");
+ unsigned Prio;
+
+ ExtraRegInfo.grow(Reg);
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = RS_Assign;
+
+ if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated.
+ Prio = Size;
+ } else {
+ // Everything is allocated in long->short order. Long ranges that don't fit
+ // should be spilled (or split) ASAP so they don't create interference.
+ Prio = (1u << 31) + Size;
+
+ // Boost ranges that have a physical register hint.
+ if (VRM->hasKnownPreference(Reg))
+ Prio |= (1u << 30);
+ }
+
+ Queue.push(std::make_pair(Prio, ~Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
+ Queue.pop();
+ return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!Matrix->checkInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint())
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Order.isHint(Hint)) {
+ DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ EvictionCost MaxCost(1);
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ evictInterference(VirtReg, Hint, NewVRegs);
+ return Hint;
+ }
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B, bool BreaksHint) {
+ bool CanSplit = getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ return A.weight > B.weight;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted. When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ bool IsHint, EvictionCost &MaxCost) {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = NextCascade;
+
+ EvictionCost Cost;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10) >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ "Only expecting virtual register interference from query");
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent = !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ // Finally, apply the eviction policy for non-urgent evictions.
+ if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+ DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+
+ // Collect all interfering virtregs first.
+ SmallVector<LiveInterval*, 8> Intfs;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval *Intf = Intfs[i];
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg))
+ continue;
+ Matrix->unassign(*Intf);
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
+ }
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs,
+ unsigned CostPerUseLimit) {
+ NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost(~0u);
+ unsigned BestPhys = 0;
+ unsigned OrderLimit = Order.getOrder().size();
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < ~0u) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight;
+
+ // Check of any registers in RC are below CostPerUseLimit.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ unsigned MinCost = RegClassInfo.getMinCost(RC);
+ if (MinCost >= CostPerUseLimit) {
+ DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+ << ", no cheaper registers to be found.\n");
+ return 0;
+ }
+
+ // It is normal for register classes to have a long tail of registers with
+ // the same cost. We don't need to look at them if they're too expensive.
+ if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+ OrderLimit = RegClassInfo.getLastCostChange(RC);
+ DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+ }
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1)
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (!MRI->isPhysRegUsed(CSR)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(CSR, TRI) << '\n');
+ continue;
+ }
+
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (Order.isHint())
+ break;
+ }
+
+ if (!BestPhys)
+ return 0;
+
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ float &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ float StaticCost = 0;
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef;
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+ BC.Entry = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.first() < BI.FirstInstr)
+ BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.first() < BI.LastInstr)
+ ++Ins;
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+ BC.Exit = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.last() > BI.LastInstr)
+ BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.last() > BI.FirstInstr)
+ ++Ins;
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ if (Ins)
+ StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ B = 0;
+ }
+ }
+
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg)
+ addThroughConstraints(Cand.Intf, NewBlocks);
+ else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, 0);
+
+ DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ growRegion(Cand);
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ DEBUG({
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+ float Cost = 0;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+ float GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ unsigned Ins = 0;
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ if (Ins)
+ GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference())
+ GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
+///
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
+///
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ unsigned Reg = SA->getParent().reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!IntvIn && !IntvOut) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ continue;
+ }
+
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
+ else
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
+ }
+
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned c = 0; c != UsedCands.size(); ++c) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ unsigned Number = Blocks[i];
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &Reg = *LREdit.get(i);
+
+ // Ignore old intervals from DCE.
+ if (getStage(Reg) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ setStage(Reg, RS_Spill);
+ continue;
+ }
+
+ // Global intervals. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] < NumGlobalIntvs) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ setStage(Reg, RS_Split2);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ unsigned NumCands = 0;
+ unsigned BestCand = NoCand;
+ float BestCost;
+ SmallVector<unsigned, 8> UsedCands;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = HUGE_VALF;
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned i = 0; i != NumCands; ++i) {
+ if (i == BestCand || !GlobalCand[i].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[i].LiveBundles.count();
+ if (Count < WorstCount)
+ Worst = i, WorstCount = Count;
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ growRegion(Cand);
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand);
+ DEBUG({
+ dbgs() << ", total = " << Cost << " with bundles";
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Hysteresis * Cost; // Prevent rounding effects.
+ }
+ ++NumCands;
+ }
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return 0;
+
+ // Prepare split editor.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
+ << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ unsigned Reg = VirtReg.reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &LI = *LREdit.get(i);
+ if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // There is no point to this if there are no larger sub-classes.
+ if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+ return 0;
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+ // Split around every non-copy instruction.
+ for (unsigned i = 0; i != Uses.size(); ++i) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ if (MI->isFullCopy()) {
+ DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ continue;
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Assign all new registers to RS_Spill. This was the last chance.
+ setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[*Units] .find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+
+ // Add fixed interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveInterval &LI = LIS->getRegUnit(*Units);
+ LiveInterval::const_iterator I = LI.find(StartIdx);
+ LiveInterval::const_iterator E = LI.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = HUGE_VALF;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << Uses[i];
+ dbgs() << '\n';
+ });
+
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+ Uses.front().getRegSlot()) - RMS.begin();
+ unsigned re = RMS.size();
+ for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+ // Look for Uses[i] <= RMS <= Uses[i+1].
+ assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+ if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ break;
+ DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ RegMaskGaps.push_back(i);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+ ++ri;
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Remove any gaps with regmask clobbers.
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
+ for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+ GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ setStage(*LREdit.get(i), RS_Split2);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ // Ranges must be Split2 or less.
+ if (getStage(VirtReg) >= RS_Spill)
+ return 0;
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ SA->analyze(&VirtReg);
+
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ Matrix->invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+ }
+
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (getStage(VirtReg) < RS_Split2) {
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ }
+
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage]
+ << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Split ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Split)
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage < RS_Split) {
+ setStage(VirtReg, RS_Split);
+ DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(&VirtReg);
+ return 0;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Done || !VirtReg.isSpillable())
+ return ~0u;
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+ ExtraRegInfo.clear();
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ NextCascade = 1;
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
+
+ allocatePhysRegs();
+ releaseMemory();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 000000000000..607edac24bd2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,638 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "RegisterCoalescer.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+pbqpDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::set<unsigned> RegSet;
+
+
+ std::auto_ptr<PBQPBuilder> builder;
+
+ char *customPassID;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+
+ std::auto_ptr<Spiller> spiller;
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ RegSet vregsToAlloc, emptyIntervalVRegs;
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
+}
+
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
+
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
+ if (mri->def_empty(Reg))
+ continue;
+ pregs.insert(Reg);
+ mri->setPhysRegUsed(Reg);
+ }
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ LiveInterval *vregLI = &LIS->getInterval(vreg);
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps;
+ LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
+ for (unsigned i = 0; i != rawOrder.size(); ++i) {
+ unsigned preg = rawOrder[i];
+ if (mri->isReserved(preg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
+ if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ vrAllowed.push_back(preg);
+ }
+
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
+
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
+ }
+ }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
+
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
+
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
+
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
+
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
+
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
+
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
+
+ if (cp.isPhys()) {
+ if (!mf->getRegInfo().isAllocatable(dst)) {
+ continue;
+ }
+
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
+ }
+ }
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
+
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AliasAnalysis>();
+ au.addPreserved<AliasAnalysis>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<VirtRegMap>();
+ au.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (mri->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *li = &lis->getInterval(Reg);
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
+ }
+ }
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
+ << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ SmallVector<LiveInterval*, 8> newSpills;
+ LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ spiller->spill(LRE);
+
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
+ itr != end; ++itr) {
+ assert(!(*itr)->empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " ");
+ vregsToAlloc.insert((*itr)->reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !LRE.empty();
+ } else {
+ llvm_unreachable("Unknown allocation option.");
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+ // First allocate registers for the empty intervals.
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+ itr != end; ++itr) {
+ LiveInterval *li = &lis->getInterval(*itr);
+
+ unsigned physReg = mri->getSimpleHint(li->reg);
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = liRC->getRawAllocationOrder(*mf).front();
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+ spiller.reset(createInlineSpiller(*this, MF, *vrm));
+
+ mri->freezeReservedRegs(MF);
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+#ifndef NDEBUG
+ const Function* func = mf->getFunction();
+ std::string fqn =
+ func->getParent()->getModuleIdentifier() + "." +
+ func->getName().str();
+#endif
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+ if (pbqpDumpGraphs) {
+ std::ostringstream rs;
+ rs << round;
+ std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+ std::string tmp;
+ raw_fd_ostream os(graphFileName.c_str(), tmp);
+ DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+ << graphFileName << "\"\n");
+ problem->getGraph().dump(os);
+ }
+#endif
+
+ PBQP::Solution solution =
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder,
+ char *customPassID) {
+ return new RegAllocPBQP(builder, customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 000000000000..87382d8f7c42
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,146 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ // Allocate new array the first time we see a new target.
+ if (MF->getTarget().getRegisterInfo() != TRI) {
+ TRI = MF->getTarget().getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ Update = true;
+ }
+
+ // Does this MF have different CSRs?
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ if (Update || CSR != CalleeSaved) {
+ // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+ // overlapping CSR.
+ CSRNum.clear();
+ CSRNum.resize(TRI->getNumRegs(), 0);
+ for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ Update = true;
+ }
+ CalleeSaved = CSR;
+
+ // Different reserved registers?
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
+ Update = true;
+ Reserved = RR;
+ }
+
+ // Invalidate cached information from previous function.
+ if (Update)
+ ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ RCInfo &RCI = RegClass[RC->getID()];
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new MCPhysReg[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<MCPhysReg, 16> CSRAlias;
+ unsigned MinCost = 0xff;
+ unsigned LastCost = ~0u;
+ unsigned LastCostChange = 0;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned i = 0; i != RawOrder.size(); ++i) {
+ unsigned PhysReg = RawOrder[i];
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ MinCost = std::min(MinCost, Cost);
+
+ if (CSRNum[PhysReg])
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else {
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+ unsigned PhysReg = CSRAlias[i];
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
+ RCI.MinCost = uint8_t(MinCost);
+ RCI.LastCostChange = LastCostChange;
+
+ DEBUG({
+ dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 000000000000..d85646dd3c58
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,2193 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+namespace {
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
+ MachineFunction* MF;
+ MachineRegisterInfo* MRI;
+ const TargetMachine* TM;
+ const TargetRegisterInfo* TRI;
+ const TargetInstrInfo* TII;
+ LiveIntervals *LIS;
+ const MachineLoopInfo* Loops;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// \brief True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies;
+
+ /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges;
+
+ /// WorkList - Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+ SmallVector<MachineInstr*, 8> LocalWorkList;
+
+ /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<unsigned, 8> InflateRegs;
+
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs();
+
+ /// LiveRangeEdit callback.
+ void LRE_WillEraseInstruction(MachineInstr *MI);
+
+ /// coalesceLocals - coalesce the LocalWorkList.
+ void coalesceLocals();
+
+ /// joinAllIntervals - join compatible live intervals
+ void joinAllIntervals();
+
+ /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
+
+ /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+ /// true if any progress was made.
+ bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+
+ /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns
+ /// true if the copy was successfully coalesced away. If it is not
+ /// currently possible to coalesce this interval, but it may be possible if
+ /// other things get coalesced, then it returns true by reference in
+ /// 'Again'.
+ bool joinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// joinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool joinIntervals(CoalescerPair &CP);
+
+ /// Attempt joining two virtual registers. Return true on success.
+ bool joinVirtRegs(CoalescerPair &CP);
+
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
+
+ /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// hasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// reMaterializeTrivialDef - If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// canJoinPhys - Return true if a physreg copy should be joined.
+ bool canJoinPhys(const CoalescerPair &CP);
+
+ /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// eliminateUndefCopy - Handle copies of undef values.
+ bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+ };
+} /// end anonymous namespace
+
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
+}
+
+// Return true if this block should be vacated by the coalescer to eliminate
+// branches. The important cases to handle in the coalescer are critical edges
+// split during phi elimination which contain only copies. Simple blocks that
+// contain non-branches should also be vacated, but this can be handled by an
+// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII) {
+ if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ SrcReg = DstReg = 0;
+ SrcIdx = DstIdx = 0;
+ NewRC = 0;
+ Flipped = CrossClass = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ Partial = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ Flipped = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = TRI.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ SrcSub = 0;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
+ return false;
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
+ return false;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ }
+
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
+ std::swap(Src, Dst);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ }
+
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ SrcReg = Src;
+ DstReg = Dst;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+ std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != SrcReg) {
+ return false;
+ }
+
+ // Now check that Dst matches DstReg.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = TRI.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return DstReg == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return TRI.getSubReg(DstReg, SrcSub) == Dst;
+ } else {
+ // DstReg is virtual.
+ if (DstReg != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+ TRI.composeSubRegIndices(DstIdx, DstSub);
+ }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::eliminateDeadDefs() {
+ SmallVector<LiveInterval*, 8> NewRegs;
+ LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
+}
+
+// Callback from eliminateDeadDefs().
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
+}
+
+/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ if (BLR == IntB.end()) return false;
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (BValNo->def != CopyIdx) return false;
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
+ VNInfo *AValNo = ALR->valno;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ // Don't allow any partial copies, even if isCoalescable() allows them.
+ if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
+ return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR =
+ IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+ if (ValLR == IntB.end())
+ return false;
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst =
+ LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
+
+ SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno)
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ DEBUG(dbgs() << " result = " << IntB << '\n');
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // Rewrite the copy. If the copy instruction was killing the destination
+ // register before the merge, find the last use and trim the live range. That
+ // will also add the isKill marker.
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
+ if (ALR->end == CopyIdx)
+ LIS->shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+/// hasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert (!CP.isPhys());
+
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || BValNo->def != CopyIdx)
+ return false;
+
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
+ assert(AValNo && "COPY source not live");
+ if (AValNo->isPHIDef() || AValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
+ if (!DefMI->isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+ unsigned Op1, Op2, NewDstIdx;
+ if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
+ return false;
+ if (Op1 == UseOpIdx)
+ NewDstIdx = Op2;
+ else if (Op2 == UseOpIdx)
+ NewDstIdx = Op1;
+ else
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(IntA.reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+ TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ return false;
+ if (NewMI != DefMI) {
+ LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ UE = MRI->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *TRI);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getRegSlot();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+ ErasedInstrs.insert(UseMI);
+ LIS->RemoveMachineInstrFromMaps(UseMI);
+ UseMI->eraseFromParent();
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+ }
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ IntA.removeValNo(AValNo);
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return true;
+}
+
+/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ if (ValNo->isPHIDef() || ValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ assert(DefMI && "Defining instruction disappeared");
+ if (!DefMI->isAsCheapAsAMove())
+ return false;
+ if (!TII->isTriviallyReMaterializable(DefMI, AA))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(TII, AA, SawStore))
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
+ if (!DefMI->isImplicitDef()) {
+ // Make sure the copy destination register class fits the instruction
+ // definition register class. The mismatch can happen as a result of earlier
+ // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (!MRI->constrainRegClass(DstReg, RC))
+ return false;
+ } else if (!RC->contains(DstReg))
+ return false;
+ }
+
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
+ TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
+ MachineInstr *NewMI = prior(MII);
+
+ // The original DefMI may have been a subregister def, but the full register
+ // class of its destination matches the destination of CopyMI, and CopyMI is
+ // either a full register def or is read-undef. Therefore we can clear the
+ // subregister index on the rematerialized instruction.
+ NewMI->getOperand(0).setSubReg(0);
+
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<unsigned, 4> NewMIImplDefs;
+ for (unsigned i = NewMI->getDesc().getNumOperands(),
+ e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ NewMIImplDefs.push_back(MO.getReg());
+ }
+ }
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ NewMI->addOperand(MO);
+ }
+ }
+ }
+
+ LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ unsigned Reg = NewMIImplDefs[i];
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
+ LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ CopyMI->eraseFromParent();
+ ErasedInstrs.insert(CopyMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
+ ++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ LIS->shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+
+ return true;
+}
+
+/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// values, it only removes local variables. When we have a copy like:
+///
+/// %vreg1 = COPY %vreg2<undef>
+///
+/// We delete the copy and remove the corresponding value number from %vreg1.
+/// Any uses of that value number are marked as <undef>.
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
+ const CoalescerPair &CP) {
+ SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
+ LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
+ if (SrcInt->liveAt(Idx))
+ return false;
+ LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
+ if (DstInt->liveAt(Idx))
+ return false;
+
+ // No intervals are live-in to CopyMI - it is undef.
+ if (CP.isFlipped())
+ DstInt = SrcInt;
+ SrcInt = 0;
+
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
+ assert(DeadVNI && "No value defined in DstInt");
+ DstInt->removeValNo(DeadVNI);
+
+ // Find new undef uses.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
+ I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isDef() || MO.isUndef())
+ continue;
+ MachineInstr *MI = MO.getParent();
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ if (DstInt->liveAt(Idx))
+ continue;
+ MO.setIsUndef(true);
+ DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
+ }
+ return true;
+}
+
+/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
+
+ SmallPtrSet<MachineInstr*, 8> Visited;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI))
+ continue;
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx)
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *TRI);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
+ /// Always join simple intervals that are defined by a single copy from a
+ /// reserved register. This doesn't increase register pressure, so it is
+ /// always beneficial.
+ if (!MRI->isReserved(CP.getDstReg())) {
+ DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
+ return false;
+ }
+
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (CP.isFlipped() && JoinVInt.containsOneValue())
+ return true;
+
+ DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+ return false;
+}
+
+/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
+
+ Again = false;
+ DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*TRI);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
+ }
+
+ // Eliminate undefs.
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
+ DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return false; // Not coalescable.
+ }
+
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return true;
+ }
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ << '\n');
+ if (!canJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (reMaterializeTrivialDef(CP, CopyMI))
+ return true;
+ return false;
+ }
+ } else {
+ DEBUG({
+ dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
+ << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << PrintReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << PrintReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
+ << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
+
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ LIS->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!joinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (reMaterializeTrivialDef(CP, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!CP.isPartial() && !CP.isPhys()) {
+ if (adjustCopiesBackFrom(CP, CopyMI) ||
+ removeCopyByCommutingDef(CP, CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
+
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
+
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+
+ // SrcReg is guaranteed to be the register whose live interval that is
+ // being merged.
+ LIS->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+
+ DEBUG({
+ dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
+ if (!CP.isPhys())
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
+ });
+
+ ++numJoins;
+ return true;
+}
+
+/// Attempt joining with a reserved physreg.
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
+
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+// identity copy after joining SrcReg and DstReg. The copy instruction will
+// be removed, and the value will be merged with the source value.
+//
+// There can be several copies back and forth, causing many values to be
+// merged into one. We compute a list of ultimate values in the joined live
+// range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+// predecessors have a live out value. It doesn't cause real interference,
+// and can be merged into the value it overlaps. Like a coalescable copy, it
+// can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+// is already in the other register. This is like a coalescable copy, but
+// the live range of the source register must be trimmed after erasing the
+// copy instruction:
+//
+// %src = COPY %ext
+// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+// defining one lane at a time:
+//
+// %dst:ssub0<def,read-undef> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// The live range of %src overlaps the %dst value defined by FOO, but
+// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+// which was undef anyway.
+//
+// The value mapping is more complicated in this case. The final live range
+// will have different value numbers for both FOO and BAR, but there is no
+// simple mapping from old to new values. It may even be necessary to add
+// new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+// is live, but never read. This can happen because we don't compute
+// individual live ranges per lane.
+//
+// %dst<def> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// This kind of interference is only resolved locally. If the clobbered
+// lane value escapes the block, the join is aborted.
+
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair.
+class JoinVals {
+ LiveInterval &LI;
+
+ // Location of this register in the final joined register.
+ // Either CP.DstIdx or CP.SrcIdx.
+ unsigned SubIdx;
+
+ // Values that will be present in the final live range.
+ SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+ const CoalescerPair &CP;
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ const TargetRegisterInfo *TRI;
+
+ // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
+ // This is suitable for passing to LiveInterval::join().
+ SmallVector<int, 8> Assignments;
+
+ // Conflict resolution for overlapping values.
+ enum ConflictResolution {
+ // No overlap, simply keep this value.
+ CR_Keep,
+
+ // Merge this value into OtherVNI and erase the defining instruction.
+ // Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ // values.
+ CR_Erase,
+
+ // Merge this value into OtherVNI but keep the defining instruction.
+ // This is for the special case where OtherVNI is defined by the same
+ // instruction.
+ CR_Merge,
+
+ // Keep this value, and have it replace OtherVNI where possible. This
+ // complicates value mapping since OtherVNI maps to two different values
+ // before and after this def.
+ // Used when clobbering undefined or dead lanes.
+ CR_Replace,
+
+ // Unresolved conflict. Visit later when all values have been mapped.
+ CR_Unresolved,
+
+ // Unresolvable conflict. Abort the join.
+ CR_Impossible
+ };
+
+ // Per-value info for LI. The lane bit masks are all relative to the final
+ // joined register, so they can be compared directly between SrcReg and
+ // DstReg.
+ struct Val {
+ ConflictResolution Resolution;
+
+ // Lanes written by this def, 0 for unanalyzed values.
+ unsigned WriteLanes;
+
+ // Lanes with defined values in this register. Other lanes are undef and
+ // safe to clobber.
+ unsigned ValidLanes;
+
+ // Value in LI being redefined by this def.
+ VNInfo *RedefVNI;
+
+ // Value in the other live range that overlaps this def, if any.
+ VNInfo *OtherVNI;
+
+ // Is this value an IMPLICIT_DEF that can be erased?
+ //
+ // IMPLICIT_DEF values should only exist at the end of a basic block that
+ // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ // safely erased if they are overlapping a live value in the other live
+ // interval.
+ //
+ // Weird control flow graphs and incomplete PHI handling in
+ // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ // longer live ranges. Such IMPLICIT_DEF values should be treated like
+ // normal values.
+ bool ErasableImplicitDef;
+
+ // True when the live range of this value will be pruned because of an
+ // overlapping CR_Replace value in the other live range.
+ bool Pruned;
+
+ // True once Pruned above has been computed.
+ bool PrunedComputed;
+
+ Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+ RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+ Pruned(false), PrunedComputed(false) {}
+
+ bool isAnalyzed() const { return WriteLanes != 0; }
+ };
+
+ // One entry per value number in LI.
+ SmallVector<Val, 8> Vals;
+
+ unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
+ VNInfo *stripCopies(VNInfo *VNI);
+ ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+ void computeAssignment(unsigned ValNo, JoinVals &Other);
+ bool taintExtent(unsigned, unsigned, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+ bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+ bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+ JoinVals(LiveInterval &li, unsigned subIdx,
+ SmallVectorImpl<VNInfo*> &newVNInfo,
+ const CoalescerPair &cp,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri)
+ : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
+ Indexes(LIS->getSlotIndexes()), TRI(tri),
+ Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+ {}
+
+ /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool mapValues(JoinVals &Other);
+
+ /// Try to resolve conflicts that require all values to be mapped.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool resolveConflicts(JoinVals &Other);
+
+ /// Prune the live range of values in Other.LI where they would conflict with
+ /// CR_Replace values in LI. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
+ /// Erase any machine instructions that have been coalesced away.
+ /// Add erased instructions to ErasedInstrs.
+ /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+ /// the erased instrs.
+ void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs);
+
+ /// Get the value assignments suitable for passing to LiveInterval::join.
+ const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+/// Compute the bitmask of lanes actually written by DefMI.
+/// Set Redef if there are any partial register definitions that depend on the
+/// previous value of the register.
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+ unsigned L = 0;
+ for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+ continue;
+ L |= TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
+ if (MO->readsReg())
+ Redef = true;
+ }
+ return L;
+}
+
+/// Find the ultimate value that VNI was copied from.
+VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+ while (!VNI->isPHIDef()) {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+ assert(MI && "No defining instruction");
+ if (!MI->isFullCopy())
+ break;
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ break;
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+ if (!LRQ.valueIn())
+ break;
+ VNI = LRQ.valueIn();
+ }
+ return VNI;
+}
+
+/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+/// Return a conflict resolution when possible, but leave the hard cases as
+/// CR_Unresolved.
+/// Recursively calls computeAssignment() on this and Other, guaranteeing that
+/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+/// The recursion always goes upwards in the dominator tree, making loops
+/// impossible.
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ assert(!V.isAnalyzed() && "Value has already been analyzed!");
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ if (VNI->isUnused()) {
+ V.WriteLanes = ~0u;
+ return CR_Keep;
+ }
+
+ // Get the instruction defining this value, compute the lanes written.
+ const MachineInstr *DefMI = 0;
+ if (VNI->isPHIDef()) {
+ // Conservatively assume that all lanes in a PHI are valid.
+ V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ DefMI = Indexes->getInstructionFromIndex(VNI->def);
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1<def> = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+ assert(V.RedefVNI && "Instruction is reading nonexistent value");
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
+
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ V.ErasableImplicitDef = true;
+ V.ValidLanes &= ~V.WriteLanes;
+ }
+ }
+
+ // Find the value in Other that overlaps VNI->def, if any.
+ LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+
+ // It is possible that both values are defined by the same instruction, or
+ // the values are PHIs defined in the same block. When that happens, the two
+ // values should be merged into one, but not into any preceding value.
+ // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+ if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+ assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+ // One value stays, the other is merged. Keep the earlier one, or the first
+ // one we see.
+ if (OtherVNI->def < VNI->def)
+ Other.computeAssignment(OtherVNI->id, *this);
+ else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+ // This is an early-clobber def overlapping a live-in value in the other
+ // register. Not mergeable.
+ V.OtherVNI = OtherLRQ.valueIn();
+ return CR_Impossible;
+ }
+ V.OtherVNI = OtherVNI;
+ Val &OtherV = Other.Vals[OtherVNI->id];
+ // Keep this value, check for conflicts when analyzing OtherVNI.
+ if (!OtherV.isAnalyzed())
+ return CR_Keep;
+ // Both sides have been analyzed now.
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Merge;
+ if (V.ValidLanes & OtherV.ValidLanes)
+ // Overlapping lanes can't be resolved.
+ return CR_Impossible;
+ else
+ return CR_Merge;
+ }
+
+ // No simultaneous def. Is Other live at the def?
+ V.OtherVNI = OtherLRQ.valueIn();
+ if (!V.OtherVNI)
+ // No overlap, no conflict.
+ return CR_Keep;
+
+ assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+ // We have overlapping values, or possibly a kill of Other.
+ // Recursively compute assignments up the dominator tree.
+ Other.computeAssignment(V.OtherVNI->id, *this);
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (OtherV.ErasableImplicitDef && DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into BB#" << DefMI->getParent()->getNumber()
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ }
+
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Replace;
+
+ // Check for simple erasable conflicts.
+ if (DefMI->isImplicitDef())
+ return CR_Erase;
+
+ // Include the non-conflict where DefMI is a coalescable copy that kills
+ // OtherVNI. We still want the copy erased and value numbers merged.
+ if (CP.isCoalescable(DefMI)) {
+ // Some of the lanes copied from OtherVNI may be undef, making them undef
+ // here too.
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+ return CR_Erase;
+ }
+
+ // This may not be a real conflict if DefMI simply kills Other and defines
+ // VNI.
+ if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+ return CR_Keep;
+
+ // Handle the case where VNI and OtherVNI can be proven to be identical:
+ //
+ // %other = COPY %ext
+ // %this = COPY %ext <-- Erase this copy
+ //
+ if (DefMI->isFullCopy() && !CP.isPartial() &&
+ stripCopies(VNI) == stripCopies(V.OtherVNI))
+ return CR_Erase;
+
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
+ // 4 BAZ %dst<kill>
+ // 5 QUUX %src<kill>
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ return CR_Replace;
+
+ // If the other live range is killed by DefMI and the live ranges are still
+ // overlapping, it must be because we're looking at an early clobber def:
+ //
+ // %dst<def,early-clobber> = ASM %src<kill>
+ //
+ // In this case, it is illegal to merge the two live ranges since the early
+ // clobber def would clobber %src before it was read.
+ if (OtherLRQ.isKill()) {
+ // This case where the def doesn't overlap the kill is handled above.
+ assert(VNI->def.isEarlyClobber() &&
+ "Only early clobber defs can overlap a kill");
+ return CR_Impossible;
+ }
+
+ // VNI is clobbering live lanes in OtherVNI, but there is still the
+ // possibility that no instructions actually read the clobbered lanes.
+ // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+ // Otherwise Other.LI wouldn't be live here.
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+ return CR_Impossible;
+
+ // We need to verify that no instructions are reading the clobbered lanes. To
+ // save compile time, we'll only check that locally. Don't allow the tainted
+ // value to escape the basic block.
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+ return CR_Impossible;
+
+ // There are still some things that could go wrong besides clobbered lanes
+ // being read, for example OtherVNI may be only partially redefined in MBB,
+ // and some clobbered lanes could escape the block. Save this analysis for
+ // resolveConflicts() when all values have been mapped. We need to know
+ // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+ // that now - the recursive analyzeValue() calls must go upwards in the
+ // dominator tree.
+ return CR_Unresolved;
+}
+
+/// Compute the value assignment for ValNo in LI.
+/// This may be called recursively by analyzeValue(), but never for a ValNo on
+/// the stack.
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.isAnalyzed()) {
+ // Recursion should always move up the dominator tree, so ValNo is not
+ // supposed to reappear before it has been assigned.
+ assert(Assignments[ValNo] != -1 && "Bad recursion?");
+ return;
+ }
+ switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+ case CR_Erase:
+ case CR_Merge:
+ // Merge this ValNo into OtherVNI.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+ assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+ Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+ DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
+ << LI.getValNumInfo(ValNo)->def << " into "
+ << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ break;
+ case CR_Replace:
+ case CR_Unresolved:
+ // The other value is going to be pruned if this join is successful.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+ Other.Vals[V.OtherVNI->id].Pruned = true;
+ // Fall through.
+ default:
+ // This value number needs to go in the final joined live range.
+ Assignments[ValNo] = NewVNInfo.size();
+ NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+ break;
+ }
+}
+
+bool JoinVals::mapValues(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ computeAssignment(i, Other);
+ if (Vals[i].Resolution == CR_Impossible) {
+ DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+/// 1 %dst = VLOAD <-- Define all lanes in %dst
+/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+ // Scan Other.LI from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+ assert(OtherI != Other.LI.end() && "No conflict?");
+ do {
+ // OtherI is pointing to a tainted value. Abort the join if the tainted
+ // lanes escape the block.
+ SlotIndex End = OtherI->end;
+ if (End >= MBBEnd) {
+ DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
+ return false;
+ }
+ DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start
+ << " to " << End << '\n');
+ // A dead def is not a problem.
+ if (End.isDead())
+ break;
+ TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+ // Check for another def in the MBB.
+ if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+ break;
+
+ // Lanes written by the new def are no longer tainted.
+ const Val &OV = Other.Vals[OtherI->valno->id];
+ TaintedLanes &= ~OV.WriteLanes;
+ if (!OV.RedefVNI)
+ break;
+ } while (TaintedLanes);
+ return true;
+}
+
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+ unsigned Lanes) {
+ if (MI->isDebugValue())
+ return false;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
+ continue;
+ if (!MO->readsReg())
+ continue;
+ if (Lanes & TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+ return true;
+ }
+ return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+ if (V.Resolution != CR_Unresolved)
+ continue;
+ DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ ++NumLaneConflicts;
+ assert(V.OtherVNI && "Inconsistent conflict resolution.");
+ VNInfo *VNI = LI.getValNumInfo(i);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+ // join, those lanes will be tainted with a wrong value. Get the extent of
+ // the tainted lanes.
+ unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+ if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+ // Tainted lanes would extend beyond the basic block.
+ return false;
+
+ assert(!TaintExtent.empty() && "There should be at least one conflict.");
+
+ // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ MachineBasicBlock::iterator MI = MBB->begin();
+ if (!VNI->isPHIDef()) {
+ MI = Indexes->getInstructionFromIndex(VNI->def);
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
+ }
+ assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+ "Interference ends on VNI->def. Should have been handled earlier");
+ MachineInstr *LastMI =
+ Indexes->getInstructionFromIndex(TaintExtent.front().first);
+ assert(LastMI && "Range must end at a proper instruction");
+ unsigned TaintNum = 0;
+ for(;;) {
+ assert(MI != MBB->end() && "Bad LastMI");
+ if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+ DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+ return false;
+ }
+ // LastMI is the last instruction to use the current value.
+ if (&*MI == LastMI) {
+ if (++TaintNum == TaintExtent.size())
+ break;
+ LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+ assert(LastMI && "Range must end at a proper instruction");
+ TaintedLanes = TaintExtent[TaintNum].second;
+ }
+ ++MI;
+ }
+
+ // The tainted lanes are unused.
+ V.Resolution = CR_Replace;
+ ++NumLaneResolves;
+ }
+ return true;
+}
+
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+// %dst = COPY %src
+// %src = COPY %dst <-- This value to be pruned.
+// %dst = COPY %src <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.Pruned || V.PrunedComputed)
+ return V.Pruned;
+
+ if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+ return V.Pruned;
+
+ // Follow copies up the dominator tree and check if any intermediate value
+ // has been pruned.
+ V.PrunedComputed = true;
+ V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+ return V.Pruned;
+}
+
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ break;
+ case CR_Replace: {
+ // This value takes precedence over the value in Other.LI.
+ LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.ErasableImplicitDef &&
+ OtherV.Resolution == CR_Keep;
+ if (!Def.isBlock()) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+ MO->setIsUndef(EraseImpDef);
+ MO->setIsDead(false);
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
+ }
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+ << ": " << Other.LI << '\n');
+ break;
+ }
+ case CR_Erase:
+ case CR_Merge:
+ if (isPrunedValue(i, Other)) {
+ // This value is ultimately a copy of a pruned value in LI or Other.LI.
+ // We can no longer trust the value mapping computed by
+ // computeAssignment(), the value that was originally copied could have
+ // been replaced.
+ LIS->pruneValue(&LI, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+ << Def << ": " << LI << '\n');
+ }
+ break;
+ case CR_Unresolved:
+ case CR_Impossible:
+ llvm_unreachable("Unresolved conflicts");
+ }
+ }
+}
+
+void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ // Get the def location before markUnused() below invalidates it.
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LI. Note that this VNInfo is still present
+ // in NewVNInfo, so it will appear as an unused value number in the final
+ // joined interval.
+ LI.getValNumInfo(i)->markUnused();
+ LI.removeValNo(LI.getValNumInfo(i));
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+ // FALL THROUGH.
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+ JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
+ JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
+
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+ << '\n');
+
+ // First compute NewVNInfo and the simple value mappings.
+ // Detect impossible conflicts early.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ return false;
+
+ // Some conflicts can only be resolved after all values have been mapped.
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ return false;
+
+ // All clear, the live ranges can be merged.
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints);
+ RHSVals.pruneValues(LHSVals, EndPoints);
+
+ // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+ // registers to require trimming.
+ SmallVector<unsigned, 8> ShrinkRegs;
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ while (!ShrinkRegs.empty())
+ LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+ // Join RHS into LHS.
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
+ MRI);
+
+ // Kill flags are going to be wrong if the live ranges were overlapping.
+ // Eventually, we should simply clear all kill flags when computing live
+ // ranges. They are reinserted after register allocation.
+ MRI->clearKillFlags(LHS.reg);
+ MRI->clearKillFlags(RHS.reg);
+
+ if (EndPoints.empty())
+ return true;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices(&LHS, EndPoints);
+ return true;
+}
+
+/// joinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
+namespace {
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+ const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+ const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+ if (!Copy->isCopy())
+ return false;
+
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+
+ return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+ || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
+}
+
+// Try joining WorkList copies starting from index From.
+// Null out any successful joins.
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
+ bool Progress = false;
+ for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+ if (!CurrList[i])
+ continue;
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.erase(CurrList[i])) {
+ CurrList[i] = 0;
+ continue;
+ }
+ bool Again = false;
+ bool Success = joinCopy(CurrList[i], Again);
+ Progress |= Success;
+ if (Success || !Again)
+ CurrList[i] = 0;
+ }
+ return Progress;
+}
+
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ if (JoinGlobalCopies) {
+ // Coalesce copies bottom-up to coalesce local defs before local uses. They
+ // are not inherently easier to resolve, but slightly preferable until we
+ // have local live range splitting. In particular this is required by
+ // cmp+jmp macro fusion.
+ for (MachineBasicBlock::reverse_iterator
+ MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+ if (!MII->isCopyLike())
+ continue;
+ if (isLocalCopy(&(*MII), LIS))
+ LocalWorkList.push_back(&(*MII));
+ else
+ WorkList.push_back(&(*MII));
+ }
+ }
+ else {
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII)
+ if (MII->isCopyLike())
+ WorkList.push_back(MII);
+ }
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ MutableArrayRef<MachineInstr*>
+ CurrList(WorkList.begin() + PrevSize, WorkList.end());
+ if (copyCoalesceWorkList(CurrList))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ (MachineInstr*)0), WorkList.end());
+}
+
+void RegisterCoalescer::coalesceLocals() {
+ copyCoalesceWorkList(LocalWorkList);
+ for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+ if (LocalWorkList[j])
+ WorkList.push_back(LocalWorkList[j]);
+ }
+ LocalWorkList.clear();
+}
+
+void RegisterCoalescer::joinAllIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+ std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+ JoinSplitEdges && isSplitEdge(MBB)));
+ }
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+ // Coalesce intervals in MBB priority order.
+ unsigned CurrDepth = UINT_MAX;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ // Try coalescing the collected local copies for deeper loops.
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
+ }
+ copyCoalesceInMBB(MBBs[i].MBB);
+ }
+ coalesceLocals();
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ while (copyCoalesceWorkList(WorkList))
+ /* empty */ ;
+}
+
+void RegisterCoalescer::releaseMemory() {
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ AA = &getAnalysis<AliasAnalysis>();
+ Loops = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = ST.enableMachineScheduler();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+
+ if (VerifyCoalescing)
+ MF->verify(this, "Before register coalescing");
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining)
+ joinAllIntervals();
+
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
+ DEBUG(dump());
+ if (VerifyCoalescing)
+ MF->verify(this, "After register coalescing");
+ return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ LIS->print(O, m);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 000000000000..47c3df14606d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,120 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+ class MachineInstr;
+ class TargetRegisterInfo;
+ class TargetRegisterClass;
+ class TargetInstrInfo;
+
+ /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetRegisterInfo &TRI;
+
+ /// DstReg - The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ unsigned DstReg;
+
+ /// SrcReg - the virtual register that will be coalesced into dstReg.
+ unsigned SrcReg;
+
+ /// DstIdx - The sub-register index of the old DstReg in the new coalesced
+ /// register.
+ unsigned DstIdx;
+
+ /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
+ /// register.
+ unsigned SrcIdx;
+
+ /// Partial - True when the original copy was a partial subregister copy.
+ bool Partial;
+
+ /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ bool CrossClass;
+
+ /// Flipped - True when DstReg and SrcReg are reversed from the original
+ /// copy instruction.
+ bool Flipped;
+
+ /// NewRC - The register class of the coalesced register, or NULL if DstReg
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
+ const TargetRegisterClass *NewRC;
+
+ public:
+ CoalescerPair(const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
+ /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+ /// No need to call setRegisters().
+ CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
+ /// setRegisters - set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
+ bool flip();
+
+ /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// isPhys - Return true if DstReg is a physical register.
+ bool isPhys() const { return !NewRC; }
+
+ /// isPartial - Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
+
+ /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
+
+ /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return Flipped; }
+
+ /// getDstReg - Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ unsigned getDstReg() const { return DstReg; }
+
+ /// getSrcReg - Return the virtual register that will be coalesced away.
+ unsigned getSrcReg() const { return SrcReg; }
+
+ /// getDstIdx - Return the subregister index that DstReg will be coalesced
+ /// into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
+ /// into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
+
+ /// getNewRC - Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 000000000000..97f22e1049f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,793 @@
+//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ std::vector<unsigned> &MaxSetPressure,
+ const int *PSet, unsigned Weight) {
+ for (; *PSet != -1; ++PSet) {
+ CurrSetPressure[*PSet] += Weight;
+ if (&CurrSetPressure != &MaxSetPressure
+ && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
+ MaxSetPressure[*PSet] = CurrSetPressure[*PSet];
+ }
+ }
+}
+
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const int *PSet, unsigned Weight) {
+ for (; *PSet != -1; ++PSet) {
+ assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSet] -= Weight;
+ }
+}
+
+/// Directly increase pressure only within this RegisterPressure result.
+void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ increaseSetPressure(MaxSetPressure, MaxSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ increaseSetPressure(MaxSetPressure, MaxSetPressure,
+ TRI->getRegUnitPressureSets(Reg),
+ TRI->getRegUnitWeight(Reg));
+ }
+}
+
+/// Directly decrease pressure only within this RegisterPressure result.
+void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
+ TRI->getRegUnitWeight(Reg));
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+ if (SetPressure[i] != 0)
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+ }
+}
+
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+ dbgs() << "Max Pressure: ";
+ dumpSetPressure(MaxSetPressure, TRI);
+ dbgs() << "Live In: ";
+ for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
+ dbgs() << '\n';
+}
+
+void RegPressureTracker::dump() const {
+ dbgs() << "Curr Pressure: ";
+ dumpSetPressure(CurrSetPressure, TRI);
+ P.dump(TRI);
+}
+#endif
+
+/// Increase the current pressure as impacted by these registers and bump
+/// the high water mark if needed.
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getRegUnitPressureSets(Regs[I]),
+ TRI->getRegUnitWeight(Regs[I]));
+ }
+ }
+}
+
+/// Simply decrease the current pressure as impacted by these registers.
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+ decreaseSetPressure(CurrSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
+ TRI->getRegUnitWeight(Regs[I]));
+ }
+ }
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return &LIS->getInterval(Reg);
+ return LIS->getCachedRegUnit(Reg);
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos)
+{
+ MF = mf;
+ TRI = MF->getTarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+ P.MaxSetPressure = CurrSetPressure;
+
+ LiveRegs.PhysRegs.clear();
+ LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
+ LiveRegs.VirtRegs.clear();
+ LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+ MachineBasicBlock::const_iterator IdxPos = CurrPos;
+ while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+ ++IdxPos;
+ if (IdxPos == MBB->end())
+ return LIS->getMBBEndIdx(MBB);
+ return LIS->getInstructionIndex(IdxPos).getRegSlot();
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+ P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
+ P.LiveInRegs.push_back(*I);
+ std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
+ P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
+ P.LiveInRegs.end());
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+ P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
+ P.LiveOutRegs.push_back(*I);
+ std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
+ P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
+ P.LiveOutRegs.end());
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
+ "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// \brief Convenient wrapper for checking membership in RegisterOperands.
+static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
+ return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
+}
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+class RegisterOperands {
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+public:
+ SmallVector<unsigned, 8> Uses;
+ SmallVector<unsigned, 8> Defs;
+ SmallVector<unsigned, 8> DeadDefs;
+
+ RegisterOperands(const TargetRegisterInfo *tri,
+ const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+
+ /// Push this operand's register onto the correct vector.
+ void collect(const MachineOperand &MO) {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ if (MO.readsReg())
+ pushRegUnits(MO.getReg(), Uses);
+ if (MO.isDef()) {
+ if (MO.isDead())
+ pushRegUnits(MO.getReg(), DeadDefs);
+ else
+ pushRegUnits(MO.getReg(), Defs);
+ }
+ }
+
+protected:
+ void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (containsReg(Regs, Reg))
+ return;
+ Regs.push_back(Reg);
+ }
+ else if (MRI->isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (containsReg(Regs, *Units))
+ continue;
+ Regs.push_back(*Units);
+ }
+ }
+ }
+};
+
+/// Collect physical and virtual register operands.
+static void collectOperands(const MachineInstr *MI,
+ RegisterOperands &RegOpers) {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ RegOpers.collect(*OperI);
+
+ // Remove redundant physreg dead defs.
+ SmallVectorImpl<unsigned>::iterator I =
+ std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+ std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+ RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ if (LiveRegs.insert(Regs[i]))
+ increaseRegPressure(Regs[i]);
+ }
+}
+
+/// Add Reg to the live in set and increase max pressure.
+void RegPressureTracker::discoverLiveIn(unsigned Reg) {
+ assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+ if (containsReg(P.LiveInRegs, Reg))
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(Reg, TRI, MRI);
+}
+
+/// Add Reg to the live out set and increase max pressure.
+void RegPressureTracker::discoverLiveOut(unsigned Reg) {
+ assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+ if (containsReg(P.LiveOutRegs, Reg))
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(Reg, TRI, MRI);
+}
+
+/// Recede across the previous instruction.
+bool RegPressureTracker::recede() {
+ // Check for the top of the analyzable region.
+ if (CurrPos == MBB->begin()) {
+ closeRegion();
+ return false;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ if (CurrPos->isDebugValue()) {
+ closeRegion();
+ return false;
+ }
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(CurrPos, RegOpers);
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (LiveRegs.erase(Reg))
+ decreaseRegPressure(Reg);
+ else
+ discoverLiveOut(Reg);
+ }
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (!LiveRegs.contains(Reg)) {
+ // Adjust liveouts if LiveIntervals are available.
+ if (RequireIntervals) {
+ const LiveInterval *LI = getInterval(Reg);
+ if (LI && !LI->killedAt(SlotIdx))
+ discoverLiveOut(Reg);
+ }
+ increaseRegPressure(Reg);
+ LiveRegs.insert(Reg);
+ }
+ }
+ return true;
+}
+
+/// Advance across the current instruction.
+bool RegPressureTracker::advance() {
+ // Check for the bottom of the analyzable region.
+ if (CurrPos == MBB->end()) {
+ closeRegion();
+ return false;
+ }
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = getCurrSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(CurrPos, RegOpers);
+
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ // Discover live-ins.
+ bool isLive = LiveRegs.contains(Reg);
+ if (!isLive)
+ discoverLiveIn(Reg);
+ // Kill liveness at last uses.
+ bool lastUse = false;
+ if (RequireIntervals) {
+ const LiveInterval *LI = getInterval(Reg);
+ lastUse = LI && LI->killedAt(SlotIdx);
+ }
+ else {
+ // Allocatable physregs are always single-use before register rewriting.
+ lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
+ }
+ if (lastUse && isLive) {
+ LiveRegs.erase(Reg);
+ decreaseRegPressure(Reg);
+ }
+ else if (!lastUse && !isLive)
+ increaseRegPressure(Reg);
+ }
+
+ // Generate liveness for defs.
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (LiveRegs.insert(Reg))
+ increaseRegPressure(Reg);
+ }
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Find the next instruction.
+ do
+ ++CurrPos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+ return true;
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const TargetRegisterInfo *TRI) {
+ int ExcessUnits = 0;
+ unsigned PSetID = ~0U;
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ }
+ else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (std::abs(PDiff) > std::abs(ExcessUnits)) {
+ ExcessUnits = PDiff;
+ PSetID = i;
+ }
+ }
+ Delta.Excess.PSetID = PSetID;
+ Delta.Excess.UnitIncrease = ExcessUnits;
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureElement();
+ Delta.CurrentMax = PressureElement();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+ if (PDiff > Delta.CriticalMax.UnitIncrease) {
+ Delta.CriticalMax.PSetID = i;
+ Delta.CriticalMax.UnitIncrease = PDiff;
+ }
+ }
+
+ // Find the greatest increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ int MDiff = (int)PNew - (int)MaxPressureLimit[i];
+ if (MDiff > Delta.CurrentMax.UnitIncrease) {
+ Delta.CurrentMax.PSetID = i;
+ Delta.CurrentMax.UnitIncrease = PNew;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(MI, RegOpers);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (!containsReg(RegOpers.Uses, Reg))
+ decreaseRegPressure(Reg);
+ }
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (!LiveRegs.contains(Reg))
+ increaseRegPressure(Reg);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// FIXME: This is expensive for an on-the-fly query. We need to cache the
+/// result per-SUnit with enough information to adjust for the current
+/// scheduling position. But this works as a proof of concept.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+static bool findUseBetween(unsigned Reg,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ if (MI->isDebugValue())
+ continue;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
+ }
+ return false;
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(MI, RegOpers);
+
+ // Kill liveness at last uses. Assume allocatable physregs are single-use
+ // rather than checking LiveIntervals.
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (RequireIntervals) {
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = getCurrSlot();
+ const LiveInterval *LI = getInterval(Reg);
+ if (LI && LI->killedAt(SlotIdx)
+ && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ decreaseRegPressure(Reg);
+ }
+ }
+ else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Allocatable physregs are always single-use before register rewriting.
+ decreaseRegPressure(Reg);
+ }
+ }
+
+ // Generate liveness for defs.
+ increaseRegPressure(RegOpers.Defs);
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 000000000000..07ace7a436c7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,443 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+ RegsAvailable.reset(Reg);
+
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RegsAvailable.reset(*SubRegs);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (isUsed(*AI, *AI == Reg))
+ return true;
+ return false;
+}
+
+void RegScavenger::initRegState() {
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ if (!MBB)
+ return;
+
+ // Live-in registers are in use.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ // Pristine CSRs are also unavailable.
+ BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ // It is not possible to use the register scavenger after late optimization
+ // passes that don't preserve accurate liveness information.
+ assert(MRI->tracksLiveness() &&
+ "Cannot use register scavenger with inaccurate liveness");
+
+ // Self-initialize.
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+ KillRegs.resize(NumPhysRegs);
+ DefRegs.resize(NumPhysRegs);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ initRegState();
+
+ Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ BV.set(*SubRegs);
+}
+
+void RegScavenger::determineKillsAndDefs() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ MachineInstr *MI = MBBI;
+ assert(!MI->isDebugValue() && "Debug values have no kills or defs");
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ // FIXME: The scavenger is not predication aware. If the instruction is
+ // predicated, conservatively assume "kill" markers do not actually kill the
+ // register. Similarly ignores "dead" markers.
+ bool isPred = TII->isPredicated(MI);
+ KillRegs.reset();
+ DefRegs.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ continue;
+
+ if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
+ if (!isPred && MO.isKill())
+ addRegWithSubRegs(KillRegs, Reg);
+ } else {
+ assert(MO.isDef());
+ if (!isPred && MO.isDead())
+ addRegWithSubRegs(KillRegs, Reg);
+ else
+ addRegWithSubRegs(DefRegs, Reg);
+ }
+ }
+}
+
+void RegScavenger::unprocess() {
+ assert(Tracking && "Cannot unprocess because we're not tracking");
+
+ MachineInstr *MI = MBBI;
+ if (MI->isDebugValue())
+ return;
+
+ determineKillsAndDefs();
+
+ // Commit the changes.
+ setUsed(KillRegs);
+ setUnused(DefRegs);
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(NULL);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = llvm::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr *MI = MBBI;
+
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ if (I->Restore != MI)
+ continue;
+
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
+
+ if (MI->isDebugValue())
+ return;
+
+ determineKillsAndDefs();
+
+ // Verify uses and defs.
+#ifndef NDEBUG
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
+ if (!isUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg D0<undef>, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ bool SubUsed = false;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ if (isUsed(*SubRegs)) {
+ SubUsed = true;
+ break;
+ }
+ if (!SubUsed) {
+ MBB->getParent()->verify(NULL, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
+ (void)SubUsed;
+ }
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+#endif // NDEBUG
+
+ // Commit the changes.
+ setUnused(KillRegs);
+ setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ used = RegsAvailable;
+ used.flip();
+ if (includeReserved)
+ used |= MRI->getReservedRegs();
+ else
+ used.reset(MRI->getReservedRegs());
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
+ return *I;
+ }
+ return 0;
+}
+
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ Mask.set(*I);
+ return Mask;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after StargMII. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ BitVector &Candidates,
+ unsigned InstrLimit,
+ MachineBasicBlock::iterator &UseMI) {
+ int Survivor = Candidates.find_first();
+ assert(Survivor > 0 && "No candidates for scavenging");
+
+ MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+ assert(StartMI != ME && "MI already at terminator");
+ MachineBasicBlock::iterator RestorePointMI = StartMI;
+ MachineBasicBlock::iterator MI = StartMI;
+
+ bool inVirtLiveRange = false;
+ for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
+ bool isVirtKillInsn = false;
+ bool isVirtDefInsn = false;
+ // Remove any candidates touched by instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ Candidates.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isDef())
+ isVirtDefInsn = true;
+ else if (MO.isKill())
+ isVirtKillInsn = true;
+ continue;
+ }
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+ // If we're not in a virtual reg's live range, this is a valid
+ // restore point.
+ if (!inVirtLiveRange) RestorePointMI = MI;
+
+ // Update whether we're in the live range of a virtual register
+ if (isVirtKillInsn) inVirtLiveRange = false;
+ if (isVirtDefInsn) inVirtLiveRange = true;
+
+ // Was our survivor untouched by this instruction?
+ if (Candidates.test(Survivor))
+ continue;
+
+ // All candidates gone?
+ if (Candidates.none())
+ break;
+
+ Survivor = Candidates.find_first();
+ }
+ // If we ran off the end, that's where we want to restore.
+ if (MI == ME) RestorePointMI = ME;
+ assert (RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
+
+ // We ran out of candidates, so stop the search.
+ UseMI = RestorePointMI;
+ return Survivor;
+}
+
+static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
+ unsigned i = 0;
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ return i;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill. Search explicitly rather than masking out based on
+ // RegsAvailable, as RegsAvailable does not take aliases into account.
+ // That's what getRegsAvailable() is for.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ // Find an available scavenging slot.
+ unsigned SI;
+ for (SI = 0; SI < Scavenged.size(); ++SI)
+ if (Scavenged[SI].Reg == 0)
+ break;
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo());
+ }
+
+ // Avoid infinite regress
+ Scavenged[SI].Reg = SReg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+ // Spill the scavenged register before I.
+ assert(Scavenged[SI].FrameIndex >= 0 &&
+ "Cannot scavenge register without an emergency spill slot!");
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ MachineBasicBlock::iterator II = prior(I);
+
+ unsigned FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ II = prior(UseMI);
+
+ FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+ }
+
+ Scavenged[SI].Restore = prior(UseMI);
+
+ // Doing this here leads to infinite regress.
+ // Scavenged[SI].Reg = SReg;
+
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
+ return SReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 000000000000..07e5b470fb1e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,642 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
+void SchedulingPriorityQueue::anchor() { }
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ MF(mf), MRI(mf.getRegInfo()),
+ EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+}
+
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D, bool Required) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+ // add them if another kind of edge already exists.
+ if (!Required && I->getSUnit() == D.getSUnit())
+ return false;
+ if (I->overlaps(D)) {
+ // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
+ if (I->getLatency() < D.getLatency()) {
+ SUnit *PredSU = I->getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = *I;
+ ForwardD.setSUnit(this);
+ for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ if (*II == ForwardD) {
+ II->setLatency(D.getLatency());
+ break;
+ }
+ }
+ I->setLatency(D.getLatency());
+ }
+ return false;
+ }
+ }
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+ assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak()) {
+ ++WeakPredsLeft;
+ }
+ else {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak()) {
+ ++N->WeakSuccsLeft;
+ }
+ else {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
+ N->Succs.end(), P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::biasCriticalPath() {
+ if (NumPreds < 2)
+ return;
+
+ SUnit::pred_iterator BestI = Preds.begin();
+ unsigned MaxDepth = BestI->getSUnit()->getDepth();
+ for (SUnit::pred_iterator
+ I = llvm::next(BestI), E = Preds.end(); I != E; ++I) {
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ BestI = I;
+ }
+ if (BestI != Preds.begin())
+ std::swap(*Preds.begin(), *BestI);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ dbgs() << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ if (WeakPredsLeft)
+ dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
+ if (WeakSuccsLeft)
+ dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
+
+ if (Preds.size() != 0) {
+ dbgs() << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ dbgs() << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+ dbgs() << "\n";
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
+///
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ assert(!AnyNotSched);
+ return SUnits.size() - DeadNodes;
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ if (ExitSU)
+ WorkList.push_back(ExitSU);
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize)
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accommodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accommodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool &HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (s >= Node2Index.size())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ // Is SU reachable from TargetSU via successor edges?
+ if (IsReachable(SU, TargetSU))
+ return true;
+ for (SUnit::pred_iterator
+ I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(SU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+ : SUnits(sunits), ExitSU(exitsu) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 000000000000..71e7a21ef2bc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,1322 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable use of AA during MI GAD construction"));
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt,
+ bool IsPostRAFlag,
+ LiveIntervals *lis)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
+ assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
+ DbgValues.clear();
+ assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+ "Virtual registers must be removed prior to PostRA scheduling");
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const Operator *U = dyn_cast<Operator>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (U->getOpcode() == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant, a multiplied value, or a phi, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed by the multiply,
+ // because our callers only care when the result is an
+ // identifiable object.
+ if (U->getOpcode() != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+ !isa<PHINode>(U->getOperand(1))))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static void getUnderlyingObjects(const Value *V,
+ SmallVectorImpl<Value *> &Objects) {
+ SmallPtrSet<const Value*, 16> Visited;
+ SmallVector<const Value *, 4> Working(1, V);
+ do {
+ V = Working.pop_back_val();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+ I != IE; ++I) {
+ V = *I;
+ if (!Visited.insert(V))
+ continue;
+ if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+ const Value *O =
+ getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ if (O->getType()->isPointerTy()) {
+ Working.push_back(O);
+ continue;
+ }
+ }
+ Objects.push_back(const_cast<Value *>(V));
+ }
+ } while (!Working.empty());
+}
+
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+ const MachineFrameInfo *MFI,
+ SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
+ if (!MI->hasOneMemOperand() ||
+ !(*MI->memoperands_begin())->getValue() ||
+ (*MI->memoperands_begin())->isVolatile())
+ return;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return;
+
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs);
+
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+ I != IE; ++I) {
+ bool MayAlias = true;
+ V = *I;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+
+ if (PSV->isAliased(MFI)) {
+ Objects.clear();
+ return;
+ }
+
+ MayAlias = PSV->mayAlias(MFI);
+ } else if (!isIdentifiedObject(V)) {
+ Objects.clear();
+ return;
+ }
+
+ Objects.push_back(std::make_pair(V, MayAlias));
+ }
+}
+
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
+}
+
+void ScheduleDAGInstrs::finishBlock() {
+ // Subclasses should no longer refer to the old block.
+ BB = 0;
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ assert(bb == BB && "startBlock should set BB");
+ RegionBegin = begin;
+ RegionEnd = end;
+ EndIndex = endcount;
+ MISUnitMap.clear();
+
+ ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->isCall() || ExitMI->isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ else {
+ assert(!IsPostRA && "Virtual register encountered after regalloc.");
+ if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(&ExitSU, i);
+ }
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ assert(Uses.empty() && "Uses in set before adding deps?");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (!Uses.contains(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ }
+ }
+}
+
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Uses.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ SUnit *UseSU = I->SU;
+ if (UseSU == SU)
+ continue;
+
+ // Adjust the dependence latency using operand def/use information,
+ // then allow the target to perform its own adjustments.
+ int UseOp = I->OpIdx;
+ MachineInstr *RegUse = 0;
+ SDep Dep;
+ if (UseOp < 0)
+ Dep = SDep(SU, SDep::Artificial);
+ else {
+ Dep = SDep(SU, SDep::Data, *Alias);
+ RegUse = UseSU->getInstr();
+ Dep.setMinLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/true));
+ }
+ Dep.setLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/false));
+
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+ }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ SUnit *DefSU = I->SU;
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
+ else {
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
+ }
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+ }
+ else {
+ addPhysRegDataDeps(SU, OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // clear this register's use list
+ if (Uses.contains(Reg))
+ Uses.eraseAll(Reg);
+
+ if (!MO.isDead()) {
+ Defs.eraseAll(Reg);
+ } else if (SU->isCall) {
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+ Reg2SUnitsMap::iterator B = P.first;
+ Reg2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
+ }
+
+ // Defs are pushed in the order they are visited and never reordered.
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Singly defined vregs do not have output/anti dependencies.
+ // The current operand is a def, so we have at least one.
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
+ return;
+
+ // Add output dependence to the next nearest def of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
+ if (DefI == VRegDefs.end())
+ VRegDefs.insert(VReg2SUnit(Reg, SU));
+ else {
+ SUnit *DefSU = DefI->SU;
+ if (DefSU != SU && DefSU != &ExitSU) {
+ SDep Dep(SU, SDep::Output, Reg);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
+ }
+ DefI->SU = SU;
+ }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Lookup this operand's reaching definition.
+ assert(LIS && "vreg dependencies requires LiveIntervals");
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ VNInfo *VNI = LRQ.valueIn();
+
+ // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ assert(VNI && "No value to read by operand");
+ MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+ // Phis and other noninstructions (after coalescing) have a NULL Def.
+ if (Def) {
+ SUnit *DefSU = getSUnit(Def);
+ if (DefSU) {
+ // The reaching Def lives within this scheduling region.
+ // Create a data dependence.
+ SDep dep(DefSU, SDep::Data, Reg);
+ // Adjust the dependence latency using operand def/use information, then
+ // allow the target to perform its own adjustments.
+ int DefOp = Def->findRegisterDefOperandIdx(Reg);
+ dep.setLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+ dep.setMinLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+ SU->addPred(dep);
+ }
+ }
+
+ // Add antidependence to the following def of the vreg it uses.
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
+ if (DefI != VRegDefs.end() && DefI->SU != SU)
+ DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
+}
+
+/// Return true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
+ return true;
+ return false;
+}
+
+// This MI might have either incomplete info, or known to be unsafe
+// to deal with (i.e. volatile object).
+static inline bool isUnsafeMemoryObject(MachineInstr *MI,
+ const MachineFrameInfo *MFI) {
+ if (!MI || MI->memoperands_empty())
+ return true;
+ // We purposefully do no check for hasOneMemOperand() here
+ // in hope to trigger an assert downstream in order to
+ // finish implementation.
+ if ((*MI->memoperands_begin())->isVolatile() ||
+ MI->hasUnmodeledSideEffects())
+ return true;
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return true;
+
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs);
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
+ IE = Objs.end(); I != IE; ++I) {
+ V = *I;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // Similarly to getUnderlyingObjectForInstr:
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return true;
+ }
+
+ // Does this pointer refer to a distinct and identifiable object?
+ if (!isIdentifiedObject(V))
+ return true;
+ }
+
+ return false;
+}
+
+/// This returns true if the two MIs need a chain edge betwee them.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
+static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ MachineInstr *MIa,
+ MachineInstr *MIb) {
+ // Cover a trivial case - no edge is need to itself.
+ if (MIa == MIb)
+ return false;
+
+ if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ return true;
+
+ // If we are dealing with two "normal" loads, we do not need an edge
+ // between them - they could be reordered.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
+ // To this point analysis is generic. From here on we do need AA.
+ if (!AA)
+ return true;
+
+ MachineMemOperand *MMOa = *MIa->memoperands_begin();
+ MachineMemOperand *MMOb = *MIb->memoperands_begin();
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ llvm_unreachable("Multiple memory operands.");
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasAnalysis::AliasResult AAResult = AA->alias(
+ AliasAnalysis::Location(MMOa->getValue(), Overlapa,
+ MMOa->getTBAAInfo()),
+ AliasAnalysis::Location(MMOb->getValue(), Overlapb,
+ MMOb->getTBAAInfo()));
+
+ return (AAResult != AliasAnalysis::NoAlias);
+}
+
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned
+iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSet<const SUnit*, 16> &Visited) {
+ if (!SUa || !SUb || SUb == ExitSU)
+ return *Depth;
+
+ // Remember visited nodes.
+ if (!Visited.insert(SUb))
+ return *Depth;
+ // If there is _some_ dependency already in place, do not
+ // descend any further.
+ // TODO: Need to make sure that if that dependency got eliminated or ignored
+ // for any reason in the future, we would not violate DAG topology.
+ // Currently it does not happen, but makes an implicit assumption about
+ // future implementation.
+ //
+ // Independently, if we encounter node that is some sort of global
+ // object (like a call) we already have full set of dependencies to it
+ // and we can stop descending.
+ if (SUa->isSucc(SUb) ||
+ isGlobalMemoryObject(AA, SUb->getInstr()))
+ return *Depth;
+
+ // If we do need an edge, or we have exceeded depth budget,
+ // add that edge to the predecessors chain of SUb,
+ // and stop descending.
+ if (*Depth > 200 ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SUb->addPred(SDep(SUa, SDep::MayAliasMem));
+ return *Depth;
+ }
+ // Track current depth.
+ (*Depth)++;
+ // Iterate over chain dependencies only.
+ for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ unsigned LatencyToLoad) {
+ if (!SU)
+ return;
+
+ SmallPtrSet<const SUnit*, 16> Visited;
+ unsigned Depth = 0;
+
+ for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+ I != IE; ++I) {
+ if (SU == *I)
+ continue;
+ if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ SDep Dep(SU, SDep::MayAliasMem);
+ Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+ (*I)->addPred(Dep);
+ }
+ // Now go through all the chain successors and iterate from them.
+ // Keep track of visited nodes.
+ for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+ JE = (*I)->Succs.end(); J != JE; ++J)
+ if (J->isCtrl())
+ iterateChainSucc (AA, MFI, SU, J->getSUnit(),
+ ExitSU, &Depth, Visited);
+ }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline
+void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb,
+ std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
+ // If this is a false dependency,
+ // do not add the edge, but rememeber the rejected node.
+ if (!EnableAASchedMI ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ SUb->addPred(Dep);
+ }
+ else {
+ // Duplicate entries should be ignored.
+ RejectList.insert(SUb);
+ DEBUG(dbgs() << "\tReject chain dep between SU("
+ << SUa->NodeNum << ") and SU("
+ << SUb->NodeNum << ")\n");
+ }
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
+ SUnits.reserve(BB->size());
+
+ for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SUnit *SU = newSUnit(MI);
+ MISUnitMap[MI] = SU;
+
+ SU->isCall = MI->isCall();
+ SU->isCommutable = MI->isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+ }
+}
+
+/// If RegPressure is non null, compute register pressure as a side effect. The
+/// DAG builder is an efficient place to do it because it already visits
+/// operands.
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+ RegPressureTracker *RPTracker) {
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
+
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ std::set<SUnit*> RejectMemNodes;
+
+ // Remove any stale debug info; sometimes BuildSchedGraph is called again
+ // without emitting the info from the previous call.
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setUniverse(TRI->getNumRegs());
+ Uses.setUniverse(TRI->getNumRegs());
+
+ assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ // FIXME: Allow SparseSet to reserve space for the creation of virtual
+ // registers during scheduling. Don't artificially inflate the Universe
+ // because we want to assert that vregs are not created during DAG building.
+ VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ addSchedBarrierDeps();
+
+ // Walk the list of instructions, from bottom moving up.
+ MachineInstr *DbgMI = NULL;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ if (MI && DbgMI) {
+ DbgValues.push_back(std::make_pair(DbgMI, MI));
+ DbgMI = NULL;
+ }
+
+ if (MI->isDebugValue()) {
+ DbgMI = MI;
+ continue;
+ }
+ if (RPTracker) {
+ RPTracker->recede();
+ assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
+ }
+
+ assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
+ "Cannot schedule terminators or labels!");
+
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
+
+ // Add register-based dependencies (data, anti, and output).
+ bool HasVRegDef = false;
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else {
+ assert(!IsPostRA && "Virtual register encountered!");
+ if (MO.isDef()) {
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
+ }
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
+ }
+ }
+ // If we haven't seen any uses in this scheduling region, create a
+ // dependence edge to ExitSU to model the live-out latency. This is required
+ // for vreg defs with no in-region use, and prefetches with no vreg def.
+ //
+ // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+ // check currently relies on being called before adding chain deps.
+ if (SU->NumSuccs == 0 && SU->Latency > 1
+ && (HasVRegDef || MI->mayLoad())) {
+ SDep Dep(SU, SDep::Artificial);
+ Dep.setLatency(SU->Latency - 1);
+ ExitSU.addPred(Dep);
+ }
+
+ // Add chain dependencies.
+ // Chain dependencies used to enforce memory order should have
+ // latency of 0 (except for true dependency of Store followed by
+ // aliased Load... we estimate that with a single cycle of latency
+ // assuming the hardware will bypass)
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+ unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
+ if (isGlobalMemoryObject(AA, MI)) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (MapVector<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Barrier));
+ }
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+ SDep Dep(SU, SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ I->second[i]->addPred(Dep);
+ }
+ }
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ BarrierChain = SU;
+ // This is a barrier event that acts as a pivotal node in the DAG,
+ // so it is safe to clear list of exposed nodes.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ RejectMemNodes.clear();
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain) {
+ unsigned ChainLatency = 0;
+ if (AliasChain->getInstr()->mayLoad())
+ ChainLatency = TrueMemOrderLatency;
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ ChainLatency);
+ }
+ AliasChain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ TrueMemOrderLatency);
+ }
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
+ } else if (MI->mayStore()) {
+ SmallVector<std::pair<const Value *, bool>, 4> Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+ if (Objs.empty()) {
+ // Treat all other stores conservatively.
+ goto new_alias_chain;
+ }
+
+ bool MayAlias = false;
+ for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+ K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
+ const Value *V = K->first;
+ bool ThisMayAlias = K->second;
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ MapVector<const Value *, SUnit *>::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<const Value *, SUnit *>::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ I->second = SU;
+ } else {
+ if (ThisMayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ MapVector<const Value *, std::vector<SUnit *> >::iterator J =
+ ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ TrueMemOrderLatency, true);
+ J->second.clear();
+ }
+ }
+ if (MayAlias) {
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ // But we also should check dependent instructions for the
+ // SU in question.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ }
+ // Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Artificial));
+ } else if (MI->mayLoad()) {
+ bool MayAlias = true;
+ if (MI->isInvariantLoad(AA)) {
+ // Invariant load, no chain dependencies needed!
+ } else {
+ SmallVector<std::pair<const Value *, bool>, 4> Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+ if (Objs.empty()) {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (MapVector<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
+ } else {
+ MayAlias = false;
+ }
+
+ for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+ J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+ const Value *V = J->first;
+ bool ThisMayAlias = J->second;
+
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ MapVector<const Value *, SUnit *>::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<const Value *, SUnit *>::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ if (ThisMayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ }
+ if (MayAlias)
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ if (DbgMI)
+ FirstDbgValue = DbgMI;
+
+ Defs.clear();
+ Uses.clear();
+ VRegDefs.clear();
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ SU->getInstr()->dump();
+#endif
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+ return oss.str();
+}
+
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
+}
+
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+ SchedDFSResult &R;
+
+ /// Join DAG nodes into equivalence classes by their subtree.
+ IntEqClasses SubtreeClasses;
+ /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+ std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+ struct RootData {
+ unsigned NodeID;
+ unsigned ParentNodeID; // Parent node (member of the parent subtree).
+ unsigned SubInstrCount; // Instr count in this tree only, not children.
+
+ RootData(unsigned id): NodeID(id),
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID),
+ SubInstrCount(0) {}
+
+ unsigned getSparseSetIndex() const { return NodeID; }
+ };
+
+ SparseSet<RootData> RootSet;
+
+public:
+ SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+ RootSet.setUniverse(R.DFSNodeData.size());
+ }
+
+ /// Return true if this node been visited by the DFS traversal.
+ ///
+ /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+ /// ID. Later, SubtreeID is updated but remains valid.
+ bool isVisited(const SUnit *SU) const {
+ return R.DFSNodeData[SU->NodeNum].SubtreeID
+ != SchedDFSResult::InvalidSubtreeID;
+ }
+
+ /// Initialize this node's instruction count. We don't need to flag the node
+ /// visited until visitPostorder because the DAG cannot have cycles.
+ void visitPreorder(const SUnit *SU) {
+ R.DFSNodeData[SU->NodeNum].InstrCount =
+ SU->getInstr()->isTransient() ? 0 : 1;
+ }
+
+ /// Called once for each node after all predecessors are visited. Revisit this
+ /// node's predecessors and potentially join them now that we know the ILP of
+ /// the other predecessors.
+ void visitPostorderNode(const SUnit *SU) {
+ // Mark this node as the root of a subtree. It may be joined with its
+ // successors later.
+ R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+ RootData RData(SU->NodeNum);
+ RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+ // If any predecessors are still in their own subtree, they either cannot be
+ // joined or are large enough to remain separate. If this parent node's
+ // total instruction count is not greater than a child subtree by at least
+ // the subtree limit, then try to join it now since splitting subtrees is
+ // only useful if multiple high-pressure paths are possible.
+ unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->getKind() != SDep::Data)
+ continue;
+ unsigned PredNum = PI->getSUnit()->NodeNum;
+ if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+ joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+
+ // Either link or merge the TreeData entry from the child to the parent.
+ if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+ // If the predecessor's parent is invalid, this is a tree edge and the
+ // current node is the parent.
+ if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+ RootSet[PredNum].ParentNodeID = SU->NodeNum;
+ }
+ else if (RootSet.count(PredNum)) {
+ // The predecessor is not a root, but is still in the root set. This
+ // must be the new parent that it was just joined to. Note that
+ // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+ // set to the original parent.
+ RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+ RootSet.erase(PredNum);
+ }
+ }
+ RootSet[SU->NodeNum] = RData;
+ }
+
+ /// Called once for each tree edge after calling visitPostOrderNode on the
+ /// predecessor. Increment the parent node's instruction count and
+ /// preemptively join this subtree to its parent's if it is small enough.
+ void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+ R.DFSNodeData[Succ->NodeNum].InstrCount
+ += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+ joinPredSubtree(PredDep, Succ);
+ }
+
+ /// Add a connection for cross edges.
+ void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
+ ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+ }
+
+ /// Set each node's subtree ID to the representative ID and record connections
+ /// between trees.
+ void finalize() {
+ SubtreeClasses.compress();
+ R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+ assert(SubtreeClasses.getNumClasses() == RootSet.size()
+ && "number of roots should match trees");
+ for (SparseSet<RootData>::const_iterator
+ RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
+ unsigned TreeID = SubtreeClasses[RI->NodeID];
+ if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+ // Note that SubInstrCount may be greater than InstrCount if we joined
+ // subtrees across a cross edge. InstrCount will be attributed to the
+ // original parent, while SubInstrCount will be attributed to the joined
+ // parent.
+ }
+ R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+ R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+ DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+ for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+ R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
+ DEBUG(dbgs() << " SU(" << Idx << ") in tree "
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
+ }
+ for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+ I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+ I != E; ++I) {
+ unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+ unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+ if (PredTree == SuccTree)
+ continue;
+ unsigned Depth = I->first->getDepth();
+ addConnection(PredTree, SuccTree, Depth);
+ addConnection(SuccTree, PredTree, Depth);
+ }
+ }
+
+protected:
+ /// Join the predecessor subtree with the successor that is its DFS
+ /// parent. Apply some heuristics before joining.
+ bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+ bool CheckLimit = true) {
+ assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+ // Check if the predecessor is already joined.
+ const SUnit *PredSU = PredDep.getSUnit();
+ unsigned PredNum = PredSU->NodeNum;
+ if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+ return false;
+
+ // Four is the magic number of successors before a node is considered a
+ // pinch point.
+ unsigned NumDataSucs = 0;
+ for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
+ SE = PredSU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data) {
+ if (++NumDataSucs >= 4)
+ return false;
+ }
+ }
+ if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+ return false;
+ R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+ SubtreeClasses.join(Succ->NodeNum, PredNum);
+ return true;
+ }
+
+ /// Called by finalize() to record a connection between trees.
+ void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+ if (!Depth)
+ return;
+
+ do {
+ SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+ R.SubtreeConnections[FromTree];
+ for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+ I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+ if (I->TreeID == ToTree) {
+ I->Level = std::max(I->Level, Depth);
+ return;
+ }
+ }
+ Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+ } while (FromTree != SchedDFSResult::InvalidSubtreeID);
+ }
+};
+} // namespace llvm
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ const SDep *backtrack() {
+ DFSStack.pop_back();
+ return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+ }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+} // anonymous
+
+static bool hasDataSucc(const SUnit *SU) {
+ for (SUnit::const_succ_iterator
+ SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+ return true;
+ }
+ return false;
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+ SchedDFSImpl Impl(*this);
+ for (ArrayRef<SUnit>::const_iterator
+ SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+ const SUnit *SU = &*SI;
+ if (Impl.isVisited(SU) || hasDataSucc(SU))
+ continue;
+
+ SchedDAGReverseDFS DFS;
+ Impl.visitPreorder(SU);
+ DFS.follow(SU);
+ for (;;) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SDep &PredDep = *DFS.getPred();
+ DFS.advance();
+ // Ignore non-data edges.
+ if (PredDep.getKind() != SDep::Data
+ || PredDep.getSUnit()->isBoundaryNode()) {
+ continue;
+ }
+ // An already visited edge is a cross edge, assuming an acyclic DAG.
+ if (Impl.isVisited(PredDep.getSUnit())) {
+ Impl.visitCrossEdge(PredDep, DFS.getCurr());
+ continue;
+ }
+ Impl.visitPreorder(PredDep.getSUnit());
+ DFS.follow(PredDep.getSUnit());
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ const SUnit *Child = DFS.getCurr();
+ const SDep *PredDep = DFS.backtrack();
+ Impl.visitPostorderNode(Child);
+ if (PredDep)
+ Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
+ if (DFS.isComplete())
+ break;
+ }
+ }
+ Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+ for (SmallVectorImpl<Connection>::const_iterator
+ I = SubtreeConnections[SubtreeID].begin(),
+ E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+ SubtreeConnectLevels[I->TreeID] =
+ std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+ DEBUG(dbgs() << " Tree: " << I->TreeID
+ << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+ OS << InstrCount << " / " << Length << " = ";
+ if (!Length)
+ OS << "BADILP";
+ else
+ OS << format("%g", ((double)InstrCount / Length));
+}
+
+void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 000000000000..8ddb3e892f25
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,100 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..2cd84d670aaa
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,248 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
+ }
+ }
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel->IssueWidth;
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+#endif
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(ReservedScoreboard.dump());
+ DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..eb1609575016
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,10214 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ SmallVector<SDNode*, 64> WorkListOrder;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkListContents.erase(N);
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType);
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVUnaryOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitMEMBARRIER(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset,
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return True if some memory operations were changed.
+ bool MergeConsecutiveStores(StoreSDNode *N);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+ }
+
+ /// isTypeLegal - This method returns true if we are running before type
+ /// legalization or if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
+ unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(DAG.getTarget().Options.UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(DAG.getTarget().Options.UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ }
+ if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ }
+ if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n";
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To);
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ DebugLoc dl = Load->getDebugLoc();
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+ DEBUG(dbgs() << "\nReplacing.9 ";
+ Load->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
+ removeFromWorkList(Load);
+ DAG.DeleteNode(Load);
+ AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ DebugLoc dl = Op.getDebugLoc();
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ return DAG.getNode(ISD::AssertSext, dl, PVT,
+ SExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::AssertZext:
+ return DAG.getNode(ISD::AssertZext, dl, PVT,
+ ZExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, dl, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+ if (NN0.getNode() == 0)
+ return SDValue();
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (NN1.getNode() == 0)
+ return SDValue();
+ }
+
+ AddToWorkList(NN0.getNode());
+ if (NN1.getNode())
+ AddToWorkList(NN1.getNode());
+
+ if (Replace0)
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ if (Replace1)
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ }
+ return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+ if (N0.getNode() == 0)
+ return SDValue();
+
+ AddToWorkList(N0.getNode());
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+ DEBUG(dbgs() << "\nPromoting ";
+ N->dump(&DAG);
+ dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ AddToWorkList(Result.getNode());
+ return true;
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
+
+ // Add all the dag nodes to the worklist.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ AddToWorkList(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, find a node and
+ // try and combine it.
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.pop_back_val();
+ } while (!WorkListContents.erase(N));
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DEBUG(dbgs() << "\nReplacing.3 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ RV.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ // Transfer debug value.
+ DAG.TransferDbgValues(SDValue(N, 0), RV);
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::MEMBARRIER: return visitMEMBARRIER(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (RV.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+ } while (!N->use_empty());
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ EVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (add x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL &&
+ N1.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (sub x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (N0C && N0C->isAllOnesValue())
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+ VT);
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+ N1.getOperand(0));
+ }
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val,
+ getShiftAmountTy(N0.getValueType()))));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getAPIntValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy(SGN.getValueType())));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (N1C->getAPIntValue().isNonNegative())
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ // Avoid infinite looping with PromoteIntBinOp.
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST ||
+ N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ Level == AfterLegalizeTypes) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ DebugLoc DL = N->getDebugLoc();
+ // If both incoming values are integers, and the original types are the
+ // same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N0;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (BitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+ SplatBitSize < BitWidth;
+ SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getSimpleValueType())))))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), NewPtr,
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Alignment);
+ AddToWorkList(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01C || N01C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() ||
+ !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ if (!N101C || N101C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword is zero since the SRL 16
+ // will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16 &&
+ (!LookPassAnd0 || !LookPassAnd1) &&
+ !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+ if (OpSizeInBits > 16)
+ Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+ DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned Num;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: Num = 0; break;
+ case 0xFF00: Num = 1; break;
+ case 0xFF0000: Num = 2; break;
+ case 0xFF000000: Num = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ SDValue N0 = N.getOperand(0);
+ if (Opc == ISD::AND) {
+ if (Num == 0 || Num == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (N0.getOpcode() != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (N0.getOpcode() != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (Num != 0 && Num != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (Num != 1 && Num != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[Num])
+ return false;
+
+ Parts[Num] = N0.getOperand(0).getNode();
+ return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ // Look for either
+ // (or (or (and), (and)), (or (and), (and)))
+ // (or (or (or (and), (and)), (and)), (and))
+ if (N0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (N1.getOpcode() == ISD::OR &&
+ N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ // (or (or (and), (and)), (or (and), (and)))
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ SDValue N010 = N01.getOperand(0);
+ if (!isBSwapHWordElement(N010, Parts))
+ return SDValue();
+ SDValue N011 = N01.getOperand(1);
+ if (!isBSwapHWordElement(N011, Parts))
+ return SDValue();
+ } else {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ if (!isBSwapHWordElement(N01, Parts))
+ return SDValue();
+ if (N00.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+ SDValue(Parts[0],0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, than
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N1;
+ }
+
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+ BSwap = MatchBSwapHWordLow(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getValueType())))))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations ||
+ TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2-c1);
+ Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1-c2);
+ Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+ DAG.getConstant(Mask, VT));
+ }
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() -
+ N1C->getZExtValue()),
+ VT);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ HiBitsMask);
+ }
+
+ if (N1C) {
+ SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSHL.getNode())
+ return NewSHL;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT =
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1C) {
+ SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRA.getNode())
+ return NewSRA;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
+ // fold (srl (shl x, c), c) -> (and x, cst2)
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ N0.getValueSizeInBits() <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
+
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents(false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ // None of the supported targets knows how to perform load and sign extend
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ NegOne, DAG.getConstant(0, VT));
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ // None of the supported targets knows how to perform load and vector_zext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ if (!LegalOperations && VT.isVector()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // Only do this before legalize for now.
+ EVT N0VT = N0.getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+ DAG.getConstant(1, EltVT));
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
+ return SDValue();
+ }
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ANY_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV != 0 && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal) {
+ return DAG.getConstant(NewVal, V.getValueType());
+ }
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL) {
+ // Another special-case: SRL is basically zero-extending a narrower value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
+ }
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
+ }
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ // Don't change the width of a volatile load.
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LN0->getNumValues() > 2)
+ return SDValue();
+
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ if (ShLeftAmt >= VT.getSizeInBits())
+ Result = DAG.getConstant(0, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorkList(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode() != 0)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ BSwap, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ EVT IndexTy = N0->getOperand(1).getValueType();
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, IndexTy));
+ }
+ }
+
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+ Opnds.size());
+ }
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+ }
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ SDValue Reduced = ReduceLoadWidth(N);
+ if (Reduced.getNode())
+ return Reduced;
+ }
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (X.getOpcode() != ISD::UNDEF) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+ AddToWorkList(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ &Opnds[0], Opnds.size());
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+ return Res;
+
+ // Folding it resulted in an illegal node, and it's too late to
+ // do that. Clean up the old node and forego the transformation.
+ // Ideally this won't happen very often, because instcombine
+ // and the earlier dagcombine runs (where illegal nodes are
+ // permitted) should have folded most of them already.
+ DAG.DeleteNode(Res.getNode());
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getDataLayout()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() &&
+ !VT.isVector() && !N0.getValueType().isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, BV->getOperand(0)));
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+ zextOrTrunc(SrcBitSize).zext(DstBitSize);
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = OpVal.trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> c1 + c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has hard time in dealing with FP constant.
+ //
+ // We don't need test this condition for transformation like following, as
+ // the DAG being transformed implies it is legal to take FP constant as
+ // operand.
+ //
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ //
+ bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
+ // If allow, fold (fadd (fneg x), x) -> 0.0
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // If allow, fold (fadd x, (fneg x)) -> 0.0
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
+ N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // In unsafe math mode, we can fold chains of FADD's of the same value
+ // into multiplications. This transform is not safe in general because
+ // we are reducing the number of rounding steps.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul x, c), x) -> (fmul c+1, x)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+
+ // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+ if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+
+ // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
+ if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
+ if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+ if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+ if (AllowNewFpConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getConstantFP(4.0, VT));
+ }
+ }
+
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, x) -> 0.0 &
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, VT);
+
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul A, 0) -> 0, vector edition.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FMUL &&
+ N0 == N2.getOperand(0) &&
+ N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ }
+
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FMUL && N1CFP &&
+ N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+ N2);
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ AddToWorkList(RHSNeg.getNode());
+ return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ }
+ }
+
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, VT)));
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, VT)));
+ }
+
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && isTypeLegal(EVT)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
+ !VT.isVector() &&
+ N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ VT, Int);
+ }
+ }
+
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ N0.getOperand(1)));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
+ // Replace the uses of SRL with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
+ }
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode()) {
+ if (Tmp.getNode() != TheXor) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+
+ // visitXOR has changed XOR's operands or replaced the XOR completely,
+ // bail out.
+ return SDValue(N, 0);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
+ E = BasePtr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == Ptr.getNode())
+ continue;
+
+ if (Use->isPredecessorOf(N))
+ continue;
+
+ if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
+ if (Op1.getNode() == BasePtr.getNode())
+ std::swap(Op0, Op1);
+ assert(Op0.getNode() == BasePtr.getNode() &&
+ "Use of ADD/SUB but not an operand");
+
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use);
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (N->hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ APInt OV =
+ cast<ConstantSDNode>(Offset)->getAPIntValue();
+ if (AM == ISD::PRE_DEC)
+ OV = -OV;
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ APInt CNV = CN->getAPIntValue();
+ if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
+ CNV += OV;
+ else
+ CNV -= OV;
+
+ SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
+ if (OffsetIdx == 0)
+ std::swap(NewOp1, NewOp2);
+
+ SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ OtherUses[i]->getDebugLoc(),
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ removeFromWorkList(OtherUses[i]);
+ DAG.DeleteNode(OtherUses[i]);
+ }
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0));
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (!N->hasAnyUseOfValue(0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DEBUG(dbgs() << "\nReplacing.7 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ SDValue NewLoad =
+ DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->isNonTemporal(), Align);
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->isNonTemporal(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out. If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // The store should be chained directly to the load or be an operand of a
+ // tokenfactor.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() != ISD::TokenFactor)
+ return Result; // Fail.
+ else {
+ bool isOk = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+ if (Chain->getOperand(i).getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk) return Result;
+ }
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo. If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization.
+ MVT VT = MVT::getIntegerVT(NumBytes*8);
+ if (!DC->isTypeLegal(VT))
+ return 0;
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift)
+ IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8,
+ DC->getShiftAmountTy(IVal.getValueType())));
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ unsigned NewAlign = St->getAlignment();
+
+ if (DAG.getTargetLoweringInfo().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+ NewAlign = MinAlign(NewAlign, StOffset);
+ }
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+
+ ++OpsNarrowed;
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ false, false, NewAlign).getNode();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return SDValue(NewST, 0);
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return SDValue(NewST, 0);
+ }
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+ std::min(BitWidth, ShAmt + NewBW));
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff),
+ false, false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
+ }
+
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr) {
+ bool IsIndexSignExt = false;
+
+ // Just Base or possibly anything else.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Base + offset.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+ IsIndexSignExt);
+ }
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+ }
+};
+
+/// Holds a pointer to an LSBaseSDNode as well as information on where it
+/// is located in a sequence of memory operations connected by a chain.
+struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+};
+
+/// Sorts store nodes in a link according to their offset from a shared
+// base ptr.
+struct ConsecutiveMemoryChainSorter {
+ bool operator()(MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+ bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+
+ // Don't merge vectors into wider inputs.
+ if (MemVT.isVector() || !MemVT.isSimple())
+ return false;
+
+ // Perform an early exit check. Do not bother looking at stored values that
+ // are not constants or loads.
+ SDValue StoredVal = St->getValue();
+ bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+ if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+ !IsLoadSrc)
+ return false;
+
+ // Only look at ends of store sequences.
+ SDValue Chain = SDValue(St, 1);
+ if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+ return false;
+
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ return false;
+
+ // Save the LoadSDNodes that we find in the chain.
+ // We need to make sure that these nodes do not interfere with
+ // any of the store nodes.
+ SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+ // Save the StoreSDNodes that we find in the chain.
+ SmallVector<MemOpLink, 8> StoreNodes;
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ unsigned Seq = 0;
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 1)->hasOneUse())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Check that the alignment is the same.
+ if (Index->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // No truncation.
+ if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+ if (St->isTruncatingStore())
+ break;
+
+ // The stored memory type must be the same.
+ if (Index->getMemoryVT() != MemVT)
+ break;
+
+ // We do not allow unaligned stores because we want to prevent overriding
+ // stores.
+ if (Index->getAlignment()*8 != MemVT.getSizeInBits())
+ break;
+
+ // We found a potential memory operand to merge.
+ StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (1) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ // Save the load node for later. Continue the scan.
+ AliasLoadNodes.push_back(Ldn);
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = NULL;
+ break;
+ }
+ }
+ }
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the base pointer.
+ std::sort(StoreNodes.begin(), StoreNodes.end(),
+ ConsecutiveMemoryChainSorter());
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // store memory address.
+ unsigned LastConsecutiveStore = 0;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ if (i > 0) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ }
+
+ bool Alias = false;
+ // Check if this store interferes with any of the loads that we found.
+ for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
+ if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
+ Alias = true;
+ break;
+ }
+ // We found a load that alias with this store. Stop the sequence.
+ if (Alias)
+ break;
+
+ // Mark this node as useful.
+ LastConsecutiveStore = i;
+ }
+
+ // The node with the lowest store address.
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+
+ // Store the constants into memory as one consecutive store.
+ if (!IsLoadSrc) {
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non constant.
+ break;
+ }
+
+ // Find a legal type for the constant store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalType = i+1;
+ // Or check whether a truncstore is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
+ LastLegalType = i+1;
+ }
+
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ LastLegalVectorType = i + 1;
+ }
+
+ // We only use vectors if the constant is known to be zero and the
+ // function is not marked with the noimplicitfloat attribute.
+ if (NonZero || NoVectors)
+ LastLegalVectorType = 0;
+
+ // Check if we found a legal integer type to store.
+ if (LastLegalType == 0 && LastLegalVectorType == 0)
+ return false;
+
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ unsigned EarliestNodeUsed = 0;
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt<<=ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt|=C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ assert(false && "Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+ }
+
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld) break;
+
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ break;
+
+ // Check that the alignment is the same as the stores.
+ if (Ld->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
+
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
+ break;
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ // If this is not the first ptr that we check.
+ if (LdBasePtr.Base.getNode()) {
+ // The base ptr must be the same.
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ }
+
+ if (LoadNodes.size() < 2)
+ return false;
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // load memory address. These variables hold the index in the store node
+ // array.
+ unsigned LastConsecutiveLoad = 0;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 0;
+ unsigned LastLegalIntegerType = 0;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads much share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalVectorType = i + 1;
+
+ // Find a legal type for the integer store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalIntegerType = i + 1;
+ // Or check whether a truncstore and extload is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+ LastLegalIntegerType = i+1;
+ }
+ }
+
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2)
+ return false;
+
+ // The earliest Node in the DAG.
+ unsigned EarliestNodeUsed = 0;
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ for (unsigned i=1; i<NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ }
+
+ DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc();
+ DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc();
+
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+ FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(),
+ false, false, false,
+ FirstLoad->getAlignment());
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), false, false,
+ FirstInChain->getAlignment());
+
+ // Replace one of the loads with the new load.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+
+ // Remove the rest of the load chains.
+ for (unsigned i = 1; i < NumElem ; ++i) {
+ // Replace all chain users of the old load nodes with the chain of the new
+ // load node.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ }
+
+ // Replace the first store with the new store.
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ // Remove all Store nodes.
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ EVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getDataLayout()->
+ getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign);
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(), Align);
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ // Only perform this optimization before the types are legal, because we
+ // don't want to perform this optimization on every DAGCombine invocation.
+ if (!LegalTypes) {
+ bool EverChanged = false;
+
+ do {
+ // There can be multiple store sequences on the same chain.
+ // Keep trying to merge store sequences until we are unable to do so
+ // or until we merge the last store on the chain.
+ bool Changed = MergeConsecutiveStores(ST);
+ EverChanged |= Changed;
+ if (!Changed) break;
+ } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+ if (EverChanged)
+ return SDValue(N, 0);
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.getOpcode() == ISD::UNDEF)
+ return InVec;
+
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // Check that we know which element is being inserted
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.getOpcode() == ISD::UNDEF) {
+ unsigned NElts = VT.getVectorNumElements();
+ Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
+ }
+
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ if (InVal.getValueType() != OpVT)
+ InVal = OpVT.bitsGT(InVal.getValueType()) ?
+ DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ Ops[Elt] = InVal;
+ }
+
+ // Return the new vector
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
+ VT, &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
+
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ EVT IndexTy = N->getOperand(1).getValueType();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, IndexTy));
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+
+ if (ConstEltNo) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ InVec = InVec.getOperand(0);
+ }
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ }
+ }
+
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getDataLayout()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
+ if (Elt) {
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
+ EVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = VT.getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT SrcVT = MVT::Other;
+ unsigned Opcode = ISD::DELETED_NODE;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opc = In.getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ // If all scalar values are floats and converted from integers.
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+ }
+
+ if (Opc != Opcode)
+ return SDValue();
+
+ EVT InVT = In.getOperand(0).getValueType();
+
+ // If all scalar values are typed differently, bail out. It's chosen to
+ // simplify BUILD_VECTOR of integer types.
+ if (SrcVT == MVT::Other)
+ SrcVT = InVT;
+ if (SrcVT != InVT)
+ return SDValue();
+ NumDefs++;
+ }
+
+ // If the vector has just one element defined, it's not worth to fold it into
+ // a vectorized one.
+ if (NumDefs < 2)
+ return SDValue();
+
+ assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+ && "Should only handle conversion from integer to float.");
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+
+ if (In.getOpcode() == ISD::UNDEF)
+ Opnds.push_back(DAG.getUNDEF(SrcVT));
+ else
+ Opnds.push_back(In.getOperand(0));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ &Opnds[0], Opnds.size());
+ AddToWorkList(BV.getNode());
+
+ return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ SDValue V = reduceBuildVecExtToExtBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ V = reduceBuildVecConvertToConvertBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // We allow up to two distinct input vectors.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ if (VecIn1.getValueType().getVectorElementType() !=
+ VT.getVectorElementType())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same
+ // type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2;
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ // Skip bitcasting
+ if (V->getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ DebugLoc dl = N->getDebugLoc();
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same type, and are half size of larger vectors.
+ EVT BigVT = V->getOperand(0).getValueType();
+ EVT SmallVT = V->getOperand(1).getValueType();
+ if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ return SDValue();
+
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ N->getOperand(0).getValueType(),
+ V->getOperand(0)), N->getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ SDNode *V = N0.getNode();
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+ if (!TLI.getShouldFoldAtomicFences())
+ return SDValue();
+
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3)), atomic.getResNo());
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2)),
+ atomic.getResNo());
+ default:
+ return SDValue();
+ }
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ EVT VT = LHSOp.getValueType();
+ EVT RVT = RHSOp.getValueType();
+ if (RVT != VT) {
+ // Integer BUILD_VECTOR operands may have types larger than the element
+ // size (e.g., when the element type is not legal). Prior to type
+ // legalization, the types may not match between the two BUILD_VECTORS.
+ // Truncate one of the operands to make them match.
+ if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+ RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+ } else {
+ LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+ VT = RVT;
+ }
+ }
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
+ LHSOp, RHSOp);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::Constant &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVUnaryOp only works on vectors!");
+
+ SDValue N0 = N->getOperand(0);
+
+ if (N0.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Operand is a BUILD_VECTOR node, see if we can constant fold it.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.getOpcode() != ISD::UNDEF &&
+ Op.getOpcode() != ISD::ConstantFP)
+ break;
+ EVT EltVT = Op.getValueType();
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() != N0.getNumOperands())
+ return SDValue();
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ N0.getValueType(), &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->isInvariant(), LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = *TLI.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ AddToWorkList(Cond.getNode());
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ AddToWorkList(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ AddToWorkList(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false,
+ false, false, Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV,
+ getShiftAmountTy(N0.getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1,
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents(N0.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // NOTE: Don't create a SETCC if it's not legal on this target.
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC,
+ LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(),
+ N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ EVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy(Ctlz.getValueType())));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(XType)));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C) {
+ ConstantSDNode *SubC = NULL;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+ EVT XType = N0.getValueType();
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself. Provides base object and offset as
+// results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ const GlobalValue *&GV, const void *&CV) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+ : (const void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ const GlobalValue *GV1, *GV2;
+ const void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((SrcValueAlign1 == SrcValueAlign2) &&
+ (SrcValueOffset1 != SrcValueOffset2) &&
+ (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+ int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+ int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ return false;
+ }
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
+ SDValue Ptr0, Ptr1;
+ int64_t Size0, Size1;
+ const Value *SrcValue0, *SrcValue1;
+ int SrcValueOffset0, SrcValueOffset1;
+ unsigned SrcValueAlign0, SrcValueAlign1;
+ const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
+ FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ SrcValueAlign0, SrcTBAAInfo0);
+ FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ SrcValueAlign1, SrcTBAAInfo1);
+ return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ SrcValueAlign0, SrcTBAAInfo0,
+ Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ SrcValueAlign1, SrcTBAAInfo1);
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ unsigned SrcValueAlign;
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
+ // find more and revert to original chain since the xform is unlikely to be
+ // profitable.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > 6 || Aliases.size() == 2) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ break;
+ }
+
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()))
+ continue;
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..9ac738e50726
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1507 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ EmitStartPt = 0;
+
+ // Advance the emit start point past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ EmitStartPt = I;
+ ++I;
+ }
+ LastLocalValue = EmitStartPt;
+}
+
+bool FastISel::LowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!FastLowerArguments())
+ return false;
+
+ // Enter non-dead arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+ if (!I->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[I] = VI->second;
+ }
+ }
+ return true;
+}
+
+void FastISel::flushLocalValueMap() {
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg != 0)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
+ }
+ }
+ } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DL;
+ recomputeInsertPt();
+ DL = DebugLoc();
+ SavePoint SP = { OldInsertPt, OldDL };
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+ E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ // N = N + Offset
+ TotalOffs +=
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
+ const Function *F = Call->getCalledFunction();
+ if (!F) return false;
+
+ // Handle selected intrinsic function calls.
+ switch (F->getIntrinsicID()) {
+ default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
+ return true;
+
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ if (!DIVariable(DI->getVariable()).Verify() ||
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ if (!Reg)
+ Reg = lookUpRegForValue(Address);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addCImm(CI).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ }
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ if (!isa<IntrinsicInst>(Call))
+ flushLocalValueMap();
+
+ // An arbitrary call. Bail.
+ return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(I->getType());
+ if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+ !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DstVT = DstEVT.getSimpleVT();
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT == DstVT) {
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(I))
+ if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ return false;
+
+ DL = I->getDebugLoc();
+
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DL = DebugLoc();
+ return true;
+ }
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
+
+ // Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
+ if (TargetSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
+ DL = DebugLoc();
+ return true;
+ }
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+
+ DL = DebugLoc();
+ return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
+ }
+ FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (OpReg == 0) return false;
+
+ bool OpRegIsKill = hasTrivialKill(I);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(I->getType());
+ unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::FNEG, OpReg, OpRegIsKill);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64) return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (IntReg == 0)
+ return false;
+
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits()-1),
+ IntVT.getSimpleVT());
+ if (IntResultReg == 0)
+ return false;
+
+ ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ UpdateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SelectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return SelectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return SelectFNeg(I);
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return SelectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return SelectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
+ TD(*TM.getDataLayout()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
+}
+
+FastISel::~FastISel() {}
+
+bool FastISel::FastLowerArguments() {
+ return false;
+}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+ unsigned) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+ unsigned, const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ assert (MaterialReg != 0 && "Unable to materialize imm.");
+ if (MaterialReg == 0) return 0;
+ }
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+ else {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (Reg == 0) {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 000000000000..b46edad7a3d4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,483 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+ }
+ return false;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI);
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(),
+ Outs, Fn->getContext());
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ // The object may need to be placed onto the stack near the stack
+ // protector if one exists. Determine here if this object is a suitable
+ // candidate. I.e., it would trigger the creation of a stack protector.
+ bool MayNeedSP =
+ (AI->isArrayAllocation() ||
+ (TySize >= 8 && isa<ArrayType>(Ty) &&
+ cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP, AI);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB->hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (BasicBlock::const_iterator I = BB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN->getDebugLoc();
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ // Mark landing pad blocks.
+ for (BB = Fn->begin(); BB != EB; ++BB)
+ if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+ MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = CreateReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return NULL;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return NULL;
+
+ if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+ LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.KnownZero = ~Val;
+ DestLOI.KnownOne = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+ DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.KnownZero &= ~Val;
+ DestLOI.KnownOne &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.KnownZero &= SrcLOI->KnownZero;
+ DestLOI.KnownOne &= SrcLOI->KnownOne;
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ return 0;
+}
+
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<const GlobalVariable *> TyInfo;
+ unsigned N = I.getNumArgOperands();
+
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert(FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+/// AddLandingPadInfo - Extract the exception handling information from the
+/// landingpad instruction and add them to the specified machine module info.
+void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
+ MachineBasicBlock *MBB) {
+ MMI.addPersonality(MBB,
+ cast<Function>(I.getPersonalityFn()->stripPointerCasts()));
+
+ if (I.isCleanup())
+ MMI.addCleanup(MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MMI.addCatchTypeInfo(MBB,
+ dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalVariable*, 4> FilterList;
+ for (User::op_iterator
+ II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
+ FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+
+ MMI.addFilterTypeInfo(MBB, FilterList);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..3b1abd7c836e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,982 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// countOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ MVT VT = Node->getSimpleValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = 0;
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ unsigned NumResults = CountResults(Node);
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MIB.addReg(VRBase, RegState::Define);
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(Op.getSimpleValueType());
+ VReg = MRI->createVirtualRegister(RC);
+ }
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const MCInstrDesc &MCID = MIB->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
+ if (II) {
+ const TargetRegisterClass *DstRC = 0;
+ if (IIOpNum < II->getNumOperands())
+ DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
+ if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MIB->getNumOperands();
+ while (Idx > 0 &&
+ MIB->getOperand(Idx-1).isReg() &&
+ MIB->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+ getDebugRegState(IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding.
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MIB.addImm(C->getSExtValue());
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ MIB.addFPImm(F->getConstantFPValue());
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MIB.addReg(R->getReg(), getImplRegState(Imp));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MIB.addRegMask(RM->getRegMask());
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags());
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MIB.addMBB(BBNode->getBasicBlock());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MIB.addFrameIndex(FI->getIndex());
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM->getDataLayout()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MIB.addBlockAddress(BA->getBlockAddress(),
+ BA->getOffset(),
+ BA->getTargetFlags());
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, DebugLoc DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0));
+
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ } else {
+ // VReg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ VReg = ConstrainForSubReg(VReg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
+
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
+ VRBase = MRI->createVirtualRegister(SRC);
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MIB.addImm(SD->getZExtValue());
+ } else
+ AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregster being inserted
+ AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MIB.addImm(SubIdx);
+ MBB->insert(InsertPos, MIB);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ }
+ AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MIB);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ uint64_t Offset = SD->getOffset();
+ MDNode* MDPtr = SD->getMDPtr();
+ DebugLoc DL = SD->getDebugLoc();
+
+ if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ // Stack address; this needs to be lowered in target-dependent fashion.
+ // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+ unsigned FrameIx = SD->getFrameIx();
+ return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+ }
+ // Otherwise, we're going to create an instruction here.
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ const Value *V = SD->getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MIB.addReg(0U);
+ }
+
+ MIB.addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
+ bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = II.getNumDefs() > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MIB);
+
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+
+ // Run post-isel target hook to adjust this instruction if needed.
+#ifdef NDEBUG
+ if (II.hasPostISelHook())
+#endif
+ TLI->AdjustInstrPostInstrSelection(MIB, Node);
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL: {
+ MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ break;
+ }
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MIB.addExternalSymbol(AsmStr);
+
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MIB.addImm(ExtraInfo);
+
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ GroupIdx.push_back(MIB->getNumOperands());
+ MIB.addImm(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MIB.addReg(Reg, RegState::Define |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
+ AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MIB->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
+ break;
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MIB.addMetadata(MD);
+
+ MBB->insert(InsertPos, MIB);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()),
+ MRI(&MF->getRegInfo()),
+ TM(&MF->getTarget()),
+ TII(TM->getInstrInfo()),
+ TRI(TM->getRegisterInfo()),
+ TLI(TM->getTargetLowering()),
+ MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..a9c2203e8400
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,146 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+
+class MachineInstrBuilder;
+class MCInstrDesc;
+class SDDbgValue;
+
+class InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const;
+
+ void CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, DebugLoc DL);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..51cc254b2c82
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3924 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ /// LegalizePosition - The iterator for walking through the node list.
+ SelectionDAG::allnodes_iterator LegalizePosition;
+
+ /// LegalizedNodes - The set of nodes which have already been legalized.
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+
+ // Libcall insertion helpers.
+
+public:
+ explicit SelectionDAGLegalize(SelectionDAG &DAG);
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
+
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const;
+
+ void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ void ExpandNode(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+ void ForgetNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+ ++LegalizePosition;
+ }
+
+public:
+ // DAGUpdateListener implementation.
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ ForgetNode(N);
+ }
+ virtual void NodeUpdated(SDNode *N) {}
+
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ if (N->use_empty()) {
+ DAG.RemoveDeadNode(N);
+ } else {
+ ForgetNode(N);
+ }
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old);
+ }
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+ : SelectionDAG::DAGUpdateListener(dag),
+ TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag) {
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ DAG.AssignTopologicalOrder();
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (LegalizePosition = DAG.allnodes_end();
+ LegalizePosition != DAG.allnodes_begin(); ) {
+ --LegalizePosition;
+
+ SDNode *N = LegalizePosition;
+ if (LegalizedNodes.insert(N)) {
+ AnyLegalized = true;
+ LegalizeOp(N);
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend) {
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return Result;
+ }
+ SDValue Result =
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false, false,
+ Alignment);
+ return Result;
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SelectionDAGLegalize *DAGLegalize) {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->isVolatile(), ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
+ MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Val.getValueType()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getPointerInfo(), NewStoredVT,
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+ Alignment);
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SDValue &ValResult, SDValue &ChainResult) {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
+ LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ ValResult = Result;
+ ChainResult = Chain;
+ return;
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ ValResult = Load;
+ ChainResult = TF;
+ return;
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Hi.getValueType()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ ValResult = Result;
+ ChainResult = TF;
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Tmp3.getValueType();
+ EVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
+ false, false, 0);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI), false, false,
+ false, 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue(0, 0);
+}
+
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ DebugLoc dl = Node->getDebugLoc();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
+ StVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ DebugLoc dl = Node->getDebugLoc();
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ }
+}
+
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((TLI.getTypeAction(*DAG.getContext(),
+ Node->getOperand(i).getValueType()) ==
+ TargetLowering::TypeLegal ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SDNode *NewNode = Node;
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(1).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(1));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Handle.getValue());
+ }
+ break;
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(2).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(2));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Node->getOperand(1),
+ Handle.getValue());
+ }
+ break;
+ }
+
+ if (NewNode != Node) {
+ DAG.ReplaceAllUsesWith(Node, NewNode);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+ ReplacedNode(Node);
+ Node = NewNode;
+ }
+ switch (Action) {
+ case TargetLowering::Legal:
+ return;
+ case TargetLowering::Custom: {
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Res);
+ else
+ ResultVals.push_back(Res.getValue(i));
+ }
+ if (Res.getNode() != Node || Res.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data());
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+ }
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Node);
+ return;
+ case TargetLowering::Promote:
+ PromoteNode(Node);
+ return;
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ break;
+ case ISD::LOAD: {
+ return LegalizeLoadOps(Node);
+ }
+ case ISD::STORE: {
+ return LegalizeStoreOps(Node);
+ }
+ }
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ if (Op.getValueType().isVector())
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
+ false, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ EltVT, false, false, 0));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
+ false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+
+ // Get the sign bit of the RHS. First obtain a value that has the same
+ // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
+ SDValue SignBit;
+ EVT FloatVT = Tmp2.getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+ if (TLI.isTypeLegal(IVT)) {
+ // Convert to an integer with the same sign bit.
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ } else {
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getPointerTy();
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ // Then store the float to it.
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ if (TLI.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+ } else { // Little endian
+ SDValue LoadPtr = StackPtr;
+ // The float may be wider than the integer we are going to load. Advance
+ // the pointer so that the loaded integer will contain the sign bit.
+ unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+ unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+ LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+ // Load a legal integer containing the sign bit.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, false, 0);
+ // Move the sign bit to the top bit of the loaded integer.
+ unsigned BitShift = LoadTy.getSizeInBits() -
+ (FloatVT.getSizeInBits() - 8 * ByteOffset);
+ assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+ if (BitShift)
+ SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+ DAG.getConstant(BitShift,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+ }
+ // Now get the sign bit proper, by seeing whether the value is negative.
+ SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ ISD::CondCode InvCC = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETO:
+ assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+ == TargetLowering::Legal
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+ == TargetLowering::Legal
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+ // We only support using the inverted operation and not a
+ // different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+ LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+ RHS = SDValue();
+ CC = SDValue();
+ return;
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ EVT SlotVT,
+ EVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, SlotVT, false, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, false, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ Node->getValueType(0).getVectorElementType(),
+ false, false, 0);
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, false, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ SmallVector<Constant*, 16> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ }
+
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ SDValue TCChain = InChain;
+ bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain);
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// useDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ // The other use might have been replaced with a divrem already.
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ unsigned OtherOpcode = 0;
+ if (isSigned)
+ OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+ else
+ OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ DebugLoc dl = Node->getDebugLoc();
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+ MachinePointerInfo(), false, false, false, 0);
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// canCombineSinCosLibcall - Return true if sincos libcall is available and
+/// can be used to combine sin and cos.
+static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
+ const TargetMachine &TM) {
+ if (!isSinCosLibcallAvailable(Node, TLI))
+ return false;
+ // GNU sin/cos functions set errno while sincos does not. Therefore
+ // combining sin and cos is only safe if unsafe-fpmath is enabled.
+ bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU;
+ if (isGNU && !TM.Options.UnsafeFPMath)
+ return false;
+ return true;
+}
+
+/// useSinCos - Only issue sincos libcall if both sin and cos are
+/// needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
+/// pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ DebugLoc dl = Node->getDebugLoc();
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
+ MachinePointerInfo(), false, false, false, 0));
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
+ MachinePointerInfo(), false, false, false, 0));
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ EVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, MachinePointerInfo(),
+ false, false, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, false, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ SDValue TwoP52 =
+ DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+ SDValue TwoP84PlusTwoP52 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+ SDValue TwoP84 =
+ DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+ SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+ DAG.getConstant(32, MVT::i64));
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
+ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ }
+
+ // Implementation of unsigned i64 to f32.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst =
+ DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0));
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ else {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: llvm_unreachable("Cannot expand this yet!");
+ case ISD::CTPOP: {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
+ SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
+ SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
+ SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
+ return Op;
+ }
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ EVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_FENCE:
+ case ISD::MEMBARRIER: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, VT));
+ }
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+ if (VT.isVector())
+ ShiftAmountTy = VT;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ APFloat apf(DAG.EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V),
+ false, false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(Align - 1,
+ TLI.getPointerTy()));
+
+ VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(-(int64_t)Align,
+ TLI.getPointerTy()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getDataLayout()->
+ getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, false, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Op0,
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Op1,
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ bool isSIN = Node->getOpcode() == ISD::FSIN;
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ canCombineSinCosLibcall(Node, TLI, TM))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (!isSIN)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ } else if (isSIN) {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ }
+ break;
+ }
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FP16_TO_FP32:
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ case ISD::FP32_TO_FP16:
+ Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionPointerRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ // If div is legal, it's better to do the normal expansion
+ !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
+ useDivRem(Node, isSigned, false))) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ useDivRem(Node, isSigned, true)))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ EVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, delete the
+ // node. The above EXTRACT_ELEMENT nodes should have been folded.
+ DAG.DeleteNode(Ret.getNode());
+ }
+
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+ TLI.getShiftAmountTy(BottomHalf.getValueType()));
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(PairTy)));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ EVT PTy = TLI.getPointerTy();
+
+ const DataLayout &TD = *TLI.getDataLayout();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ Index = DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
+ false, false, 0);
+ Addr = LD;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(VT.isVector())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+ Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ break;
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ MVT OVT = Node->getSimpleValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC) {
+ OVT = Node->getOperand(0).getSimpleValueType();
+ }
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // FIXME: This should set a bit in the zero extended value instead.
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this).LegalizeDAG();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..de217d8571ff
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1451 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy(RVT)));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy(LVT)));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, 3, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+ N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+ NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+ L->getMemoryVT(), dl, L->getChain(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
+ L->getMemoryVT(), L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC,
+ TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[1])),
+ NVT);
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[0])),
+ NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, 3, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ ArrayRef<uint64_t> Parts;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+ APInt(128, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..d19c13b8ff13
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,3042 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ case TargetLowering::TypeWidenVector:
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ return DAG.getNode(ISD::AssertZext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->isNonTemporal(), N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ DebugLoc dl = N->getDebugLoc();
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ // Convert to the expected type.
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector:
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
+
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred if the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred if the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::FP16_TO_FP32:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
+ N->getOrdering(), N->getSynchScope());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ?
+ OpTy.getScalarType() : OpTy);
+ Cond = PromoteTargetBoolean(Cond, SVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
+ isVolatile, isNonTemporal, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc DL = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ bool isInvariant = N->isInvariant();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ MemVT, isVolatile, isNonTemporal, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT);
+ assert(ShiftTy.getScalarType().getSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+ "ShiftAmountTy is too small to cover the range of this type!");
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+
+ SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, VT), ISD::SETEQ);
+ SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+ DAG.getConstant(1, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+ SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+ ISD::SETNE);
+ Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
+ DAG.getConstant(0, N->getValueType(1)),
+ Overflow);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy();
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = N->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+ MachinePointerInfo(), false, false, false, 0);
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ N->getOperand(0),
+ N->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), isVolatile, isNonTemporal,
+ Alignment);
+ }
+
+ if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
+ false, false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ return Swap.getValue(1);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getIntPtrConstant(i));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ NewMask.push_back(SV->getMaskElt(i));
+ }
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT InElemTy = OutVT.getVectorElementType();
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InElemTy, Op, DAG.getIntPtrConstant(j));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = N->getOperand(1);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElems = N->getNumOperands();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+ Incoming, DAG.getIntPtrConstant(i));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ &NewOps[0], NewOps.size());
+ }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..b6436bf42741
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1145 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((I->getNodeId() == NewNode && Mapped > 1) ||
+ (I->getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ case TargetLowering::TypeLegal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.append(N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
+ return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ return true;
+}
+
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N->getOperand(ResNo));
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+ // Currently all types are split in half.
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ } else {
+ unsigned NumElements = InVT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElements/2);
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ &Ops[0], NumOps, isSigned, dl);
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+ Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..54ea926241cf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,750 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
+ /// node with the corresponding input operand, except for the result 'ResNo',
+ /// for which the corresponding input operand is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
+ SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into vectors
+ /// of half the size, this method returns the halves. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Ternary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_SETCC(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper GenWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD);
+
+ /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+ /// loads to load a ector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+ /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+ /// stores to store a truncate widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..222d1c043a63
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,525 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getIntPtrConstant(i)));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (TLI.isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(
+ *DAG.getContext(),
+ LHS.getValueType().getSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
+ getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
+ false, false, false, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
+ false, false, MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+ bool isNonTemporal = St->isNonTemporal();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ "Condition legalized before result?");
+ unsigned NumElements = Cond.getValueType().getVectorNumElements();
+ EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
+ CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(0));
+ CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(NumElements / 2));
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..c6e066e2709b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,770 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ // SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
+ // Implement vselect in terms of XOR, AND, OR when blend is not supported
+ // by the target.
+ SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+ // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
+ // operand to the next size up.
+ SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+ J != E; ++J)
+ HasVectors |= J->isVector();
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ return TranslateLegalizeResults(Op, Result);
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ Changed = true;
+ return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ }
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ EVT QueryType;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ case ISD::FMA:
+ case ISD::SIGN_EXTEND_INREG:
+ QueryType = Node->getValueType(0);
+ break;
+ case ISD::FP_ROUND_INREG:
+ QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ QueryType = Node->getOperand(0).getValueType();
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ case TargetLowering::Promote:
+ switch (Op.getOpcode()) {
+ default:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ Result = PromoteVectorOpINT_TO_FP(Op);
+ Changed = true;
+ break;
+ }
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ Result = ExpandSEXTINREG(Op);
+ else if (Node->getOpcode() == ISD::VSELECT)
+ Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::SELECT)
+ Result = ExpandSELECT(Op);
+ else if (Node->getOpcode() == ISD::UINT_TO_FP)
+ Result = ExpandUINT_TO_FLOAT(Op);
+ else if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::SETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = DAG.UnrollVectorOp(Op.getNode());
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ MVT VT = Op.getSimpleValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ EVT VT = Op.getOperand(0).getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+
+ // Normal getTypeToPromoteTo() doesn't work here, as that will promote
+ // by widening the vector w/ the same element width and twice the number
+ // of elements. We want the other way around, the same number of elements,
+ // each twice the width.
+ //
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
+ assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
+ Operands.size());
+}
+
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+
+ if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ // When elements in a vector is not byte-addressable, we cannot directly
+ // load each element by advancing pointer, which could only address bytes.
+ // Instead, we load all significant words, mask bits off, and concatenate
+ // them to form each element. Finally, they are extended to destination
+ // scalar type to build the destination vector.
+ EVT WideVT = TLI.getPointerTy();
+
+ assert(WideVT.isRound() &&
+ "Could not handle the sophisticated case when the widest integer is"
+ " not power of 2.");
+ assert(WideVT.bitsGE(SrcEltVT) &&
+ "Type is not legalized?");
+
+ unsigned WideBytes = WideVT.getStoreSize();
+ unsigned Offset = 0;
+ unsigned RemainingBytes = SrcVT.getStoreSize();
+ SmallVector<SDValue, 8> LoadVals;
+
+ while (RemainingBytes > 0) {
+ SDValue ScalarLoad;
+ unsigned LoadBytes = WideBytes;
+
+ if (RemainingBytes >= LoadBytes) {
+ ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ } else {
+ EVT LoadVT = WideVT;
+ while (RemainingBytes < LoadBytes) {
+ LoadBytes >>= 1; // Reduce the load size by half.
+ LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+ }
+ ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LoadVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
+
+ RemainingBytes -= LoadBytes;
+ Offset += LoadBytes;
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(LoadBytes));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+
+ unsigned BitOffset = 0;
+ unsigned WideIdx = 0;
+ unsigned WideBits = WideVT.getSizeInBits();
+
+ for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+ SDValue Lo, Hi, ShAmt;
+
+ if (BitOffset < WideBits) {
+ ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+ }
+
+ BitOffset += SrcEltBits;
+ if (BitOffset >= WideBits) {
+ WideIdx++;
+ Offset -= WideBits;
+ if (Offset > 0) {
+ ShAmt = DAG.getConstant(SrcEltBits - Offset,
+ TLI.getShiftAmountTy(WideVT));
+ Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ }
+ }
+
+ if (Hi.getNode())
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+
+ switch (ExtType) {
+ default: llvm_unreachable("Unknown extended-load op!");
+ case ISD::EXTLOAD:
+ Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::ZEXTLOAD:
+ Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::SEXTLOAD:
+ ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+ TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ }
+ Vals.push_back(Lo);
+ }
+ } else {
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), &Vals[0], Vals.size());
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue Chain = ST->getChain();
+ SDValue BasePTR = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ unsigned NumElem = VT.getVectorNumElements();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = TLI.getSetCCResultType(VT);
+ assert(MaskTy.isVector() && "Invalid CC type");
+ assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
+ DAG.getConstant(0, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ SmallVector<SDValue, 8> Ops(NumElem, Mask);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarType().getSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(true) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..5ec853563888
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2795 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getDebugLoc(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->isInvariant(), N->getOriginalAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ Res = ScalarizeVecOp_EXTEND(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SmallVector<SDValue, 1> Ops(1);
+ Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], 1);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::CONVERT_RNDSAT:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment =
+ TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ isInvariant, Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ SDValue LL, LH, RL, RH;
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(0));
+ RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ } else {
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ }
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FTRUNC:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ DebugLoc DL = N->getDebugLoc();
+ EVT MaskVT = Mask.getValueType();
+ assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");;
+
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
+ assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
+
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+ EVT Src0VT = Src0.getValueType();
+ EVT Src0EltTy = Src0VT.getVectorElementType();
+ EVT MaskEltTy = MaskVT.getVectorElementType();
+
+ EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
+ EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
+ EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
+ EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+
+ SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
+ SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
+
+ SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
+ SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
+
+ SDValue LoMask =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
+ SDValue HiMask =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType())), 0);
+ }
+
+ // Store the vector to the stack.
+ EVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc DL = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ bool isVol = N->isVolatile();
+ bool isNT = N->isNonTemporal();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ LoMemVT, isVol, isNT, Alignment);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ isVol, isNT, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVol, isNT, Alignment);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ isVol, isNT, Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ unsigned Opcode = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+ &LdChain[0], LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVector<SDValue, 16>& LdOps,
+ unsigned Start, unsigned End) {
+ DebugLoc dl = LdOps[Start].getDebugLoc();
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getIntPtrConstant(Idx++));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Align);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ SDValue L;
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
+ LdEltVT, isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getIntPtrConstant(Idx));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
+ isVolatile, isNonTemporal, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..473e1384e399
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,655 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirement could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ MVT VT = SU->getNode()->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is achieved by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I) {
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 000000000000..4af7172847d7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+ enum DbgValueKind {
+ SDNODE = 0, // value is the result of an expression
+ CONST = 1, // value is a constant
+ FRAMEIX = 2 // value is contents of a stack location
+ };
+private:
+ enum DbgValueKind kind;
+ union {
+ struct {
+ SDNode *Node; // valid for expressions
+ unsigned ResNo; // valid for expressions
+ } s;
+ const Value *Const; // valid for constants
+ unsigned FrameIx; // valid for stack objects
+ } u;
+ MDNode *mdPtr;
+ uint64_t Offset;
+ DebugLoc DL;
+ unsigned Order;
+ bool Invalid;
+public:
+ // Constructor for non-constants.
+ SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+ unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
+ kind = SDNODE;
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+
+ // Constructor for constants.
+ SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+ unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = CONST;
+ u.Const = C;
+ }
+
+ // Constructor for frame indices.
+ SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = FRAMEIX;
+ u.FrameIx = FI;
+ }
+
+ // Returns the kind.
+ DbgValueKind getKind() { return kind; }
+
+ // Returns the MDNode pointer.
+ MDNode *getMDPtr() { return mdPtr; }
+
+ // Returns the SDNode* for a register ref
+ SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+ // Returns the ResNo for a register ref
+ unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+ // Returns the Value* for a constant
+ const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+ // Returns the FrameIx for a stack object
+ unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+ // Returns the offset.
+ uint64_t getOffset() { return Offset; }
+
+ // Returns the DebugLoc.
+ DebugLoc getDebugLoc() { return DL; }
+
+ // Returns the SDNodeOrder. This is the order of the preceding node in the
+ // input.
+ unsigned getOrder() { return Order; }
+
+ // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ // property. A SDDbgValue is invalid if the SDNode that produces the value is
+ // deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 000000000000..7e7b8974be48
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+ DenseMap<const SDNode*, unsigned> OrderMap;
+
+ void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+ SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+public:
+ SDNodeOrdering() {}
+
+ void add(const SDNode *Node, unsigned NewOrder) {
+ unsigned &OldOrder = OrderMap[Node];
+ if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+ OldOrder = NewOrder;
+ }
+ void remove(const SDNode *Node) {
+ DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+ if (Itr != OrderMap.end())
+ OrderMap.erase(Itr);
+ }
+ void clear() {
+ OrderMap.clear();
+ }
+ unsigned getOrder(const SDNode *Node) {
+ return OrderMap[Node];
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..d1f36cb647dc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,799 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = newSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = newSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI)) {
+ LRegs.push_back(*AI);
+ Added = true;
+ }
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(0);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = 0;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..c009cfcc516d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,3039 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<SUnit*> LiveRegGens;
+
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+ typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+ LRegsMapT LRegsMap;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits, NULL) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGRRList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+
+ void releaseInterferences(unsigned Reg = 0);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = newSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool forceUnitLatencies() const {
+ return !NeedLatency;
+ }
+};
+} // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
+ MVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::Untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ NumLiveRegs = 0;
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+
+ AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!forceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = 0;
+ unsigned BestMaxNest = MaxNest;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return 0;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return 0;
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[I->getReg()] = SU;
+ }
+ }
+ }
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ HazardRec->RecedeCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = SU->getHeight();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, -Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->scheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
+ }
+ }
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ DEBUG(SU->dump(this));
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
+ }
+ }
+
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->unscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (isOperandOf(I->getSUnit(), LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI)) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVector<unsigned, 4> &LRegs = LRegsPos->second;
+ if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ DEBUG(dbgs() << " Interfering reg " <<
+ (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+ : TRI->getName(LRegs[0]))
+ << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Intereferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU)
+ return CurSU;
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
+ << TrySU->NodeNum << ")\n");
+ AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer available.
+ if (!TrySU->isAvailable)
+ CurSU = AvailableQueue->pop();
+ else {
+ AvailableQueue->remove(TrySU);
+ CurSU = TrySU;
+ }
+ // Interferences has been mutated. We must break.
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
+ DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+ bool SrcOrder;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void scheduledNode(SUnit *SU);
+
+ void unscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ SU->dump(DAG);
+ }
+ }
+#endif
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+#endif
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ MVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ dumpRegPressure();
+}
+
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = PN->getSimpleValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ #ifndef NDEBUG
+ const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall)
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight();
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure && !SrcOrder)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const uint16_t *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
+ return false;
+
+ for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
+ SI != SE; ++SI) {
+ SUnit *SuccSU = SI->getSUnit();
+ for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
+ PE = SuccSU->Preds.end(); PI != PE; ++PI) {
+ if (!PI->isAssignedRegDep())
+ continue;
+
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const uint16_t *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
+ << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..b22440daf16d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,914 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf), BB(0), DAG(0),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
+ DAG = dag;
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = newSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
+ SmallVectorImpl<EVT> &VTs,
+ SDValue ExtraOper = SDValue()) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
+
+ SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
+}
+
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SmallVector<EVT, 4> VTs;
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ SmallVector<EVT, 4> VTs;
+ for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ CloneNodeWithValues(N, DAG, VTs);
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ SDValue InGlue = SDValue(0, 0);
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 64> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+ if (Visited.insert(NI->getOperand(i).getNode()))
+ Worklist.push_back(NI->getOperand(i).getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = newSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ computeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = forceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
+ if (!isChain && !UnitLatencies) {
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, SU, Dep);
+ }
+
+ if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getSimpleValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (!SU->getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
+namespace {
+ struct OrderSorter {
+ bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+ const std::pair<unsigned, MachineInstr*> &B) {
+ return A.first < B.first;
+ }
+ };
+}
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (!Order || DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI)
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ GluedNodes.pop_back();
+ }
+ Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+ Seen);
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values.
+ std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ if (!MI)
+ continue;
+ for (; DI != DE &&
+ (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
+ while (DI != DE) {
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
+ ++DI;
+ }
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+ }
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..2ff37e0a15e1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,185 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ MachineBasicBlock *BB;
+ SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *newSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+ /// CopyToReg and its only active data operands are CopyFromReg within a
+ /// single block loop.
+ ///
+ void InitVRegCycleFlag(SUnit *SU);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// computeLatency - Compute node latency.
+ ///
+ virtual void computeLatency(SUnit *SU);
+
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ void dumpSchedule() const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual std::string getDAGName() const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ MVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ MVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 000000000000..58aa1fe0ebbe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,278 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGVLIW() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, *I);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(0);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..64244313a326
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6382 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeDbgValue.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
+ SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (!CN->isNullValue())
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (!CFPN->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one 0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+/// allOperandsUndef - Return true if the node has at least one operand
+/// and all operands of the specified node are ISD::UNDEF.
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+
+ return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal, bool isInvariant) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
+ RemoveDeadNodes(DeadNodes);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Remove the ordering of this node.
+ Ordering->remove(N);
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+ ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
+ for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+ DbgVals[i]->setIsInvalidated();
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(
+ std::pair<std::string,unsigned char>(ESN->getSymbol(),
+ ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing);
+
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return TLI.getDataLayout()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
+ : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+ TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),
+ getVTList(MVT::Other)),
+ Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
+ AllNodes.push_back(&EntryNode);
+ Ordering = new SDNodeOrdering();
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
+ MF = &mf;
+ TTI = tti;
+ Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
+ allnodes_clear();
+ delete Ordering;
+ delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+ Ordering->clear();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType() == VT) return Op;
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ const ConstantInt *Elt = &Val;
+
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(Elt);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else if (EltVT==MVT::f64)
+ return getConstantFP(APFloat(Val), VT, isTarget);
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
+ EltVT==MVT::f16) {
+ bool ignored;
+ APFloat apf = APFloat(Val);
+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(apf, VT, isTarget);
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
+ EVT VT, int64_t Offset,
+ bool isTargetGA,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+ if (BitWidth < 64)
+ Offset = SignExtend64(Offset, BitWidth);
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ unsigned Opc;
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
+ Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->addSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned char TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+ TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N =
+ new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+ Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddPointer(Label);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(BA);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || V->getType()->isPointerTy()) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MD);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const DataLayout *TD = TLI.getDataLayout();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ // This predicate is not safe for vector operations.
+ if (Op.getValueType().isVector())
+ return false;
+
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit = InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clearAllBits();
+ return;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ computeMaskedBitsLoad(*Ranges, KnownZero);
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD:
+ case ISD::ADDE: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ KnownZero |= ~LowBits;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ return;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
+ default:
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Allow the target to implement this method for its nodes.
+ TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp =
+ cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+
+ APInt Mask;
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->getValueAPF().isNaN();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ EVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), x);
+ return getConstant(api, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid sext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid zext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid anyext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+ "Invalid truncate node, src < dst!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ return Operand.getNode()->getOperand(0);
+ }
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITCAST:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BITCAST between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
+
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
+ }
+
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
+ }
+
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // Concat of UNDEFs is UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF &&
+ N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FMUL) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
+ SDValue V = N2;
+
+ // If the first operand isn't the constant, try the second
+ if (!CFP) {
+ CFP = dyn_cast<ConstantFPSDNode>(N2);
+ V = N1;
+ }
+
+ if (CFP) {
+ // 0*x --> 0
+ if (CFP->isZero())
+ return SDValue(CFP,0);
+ // 1*x --> x
+ if (CFP->isExactlyValue(1.0))
+ return V;
+ }
+ }
+ }
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "FP_ROUND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in FP_ROUND_INREG");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ (void)EVT;
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+
+ if (VT != Elt.getValueType())
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
+ break;
+ }
+ }
+
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Canonicalize constant to RHS if commutative.
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(Value.getOpcode() != ISD::UNDEF);
+
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
+ }
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI, StringRef Str) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ else if (VT == MVT::f32 || VT == MVT::f64)
+ return DAG.getConstantFP(0.0, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
+ APInt Val(NumVTBits, 0);
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
+ }
+
+ // If the "cost" of materializing the integer immediate is 1 or free, then
+ // it is cost effective to turn the load into the immediate.
+ const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
+ if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+ return DAG.getConstant(Val, VT);
+ return SDValue(0, 0);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ EVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
+}
+
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+ "Expecting memcpy / memset source to meet alignment requirement!");
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ DAG.getMachineFunction());
+
+ if (VT == MVT::Other) {
+ if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
+ TLI.allowsUnalignedMemoryAccesses(VT)) {
+ VT = TLI.getPointerTy();
+ } else {
+ switch (DstAlign & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ TLI.isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ // FIXME: Only does this for 64-bit or more since we don't have proper
+ // cost model for unaligned load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap &&
+ VTSize >= 8 && NewVTSize < Size &&
+ TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memcpy of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize =
+ MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ StringRef Str;
+ bool CopyFromStr = isMemSrcFromString(Src, Str);
+ bool isZeroStr = CopyFromStr && Str.empty();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ (isZeroStr ? 0 : SrcAlign),
+ false, false, CopyFromStr, true, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
+ if (CopyFromStr &&
+ (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
+ if (Value.getNode())
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
+ }
+
+ if (!Store.getNode()) {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.bitsGE(VT));
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
+ MinAlign(SrcAlign, SrcOff));
+ Store = DAG.getTruncStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memmove of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align), SrcAlign,
+ false, false, false, false, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, false, SrcAlign);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ // Turn a memset of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZeroVal =
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ Size, (DstAlignCanChange ? 0 : Align), 0,
+ true, IsZeroVal, false, true, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ true, DstPtrInfo, SrcPtrInfo);
+ }
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, isVol, DstPtrInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+
+ // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
+ // For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ EVT VT = Cmp.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Cmp, Swp, MMO, Ordering,
+ SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Val, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, MMO, Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, PtrInfo, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = 0;
+ if (WriteMem)
+ Flags |= MachineMemOperand::MOStore;
+ if (ReadMem)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Vol)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ Opcode == ISD::LIFETIME_START ||
+ Opcode == ISD::LIFETIME_END ||
+ (Opcode <= INT_MAX &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal, bool isInvariant,
+ unsigned Alignment, const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(VT);
+
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo, Ranges);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ bool isInvariant, unsigned Alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo, Ranges);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
+ TBAAInfo);
+}
+
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ false, LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ false, VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ true, SVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(),
+ ST->getMemoryVT(),
+ ST->getMemOperand());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV,
+ unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ ArrayRef<EVT> ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: llvm_unreachable("Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
+ return *I;
+ }
+
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ // Reset the NodeID to -1.
+ N->setNodeId(-1);
+ return N;
+}
+
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+ // Initialize the memory references information.
+ MN->setMemRefs(0, 0);
+ // If NumOps is larger than the # of operands we can have in a
+ // MachineSDNode, reallocate the operand list.
+ if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+ if (MN->OperandsNeedDelete)
+ delete[] MN->OperandList;
+ if (NumOps > array_lengthof(MN->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+ MN->OperandsNeedDelete = false;
+ } else
+ MN->InitOperands(MN->OperandList, Ops, NumOps);
+ } else {
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+ N->OperandsNeedDelete = true;
+ } else
+ N->InitOperands(N->OperandList, Ops, NumOps);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+ EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ ArrayRef<EVT> ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = 0;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+ IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
+ }
+
+ // Allocate a new MachineSDNode.
+ N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
+
+ // Initialize the operands list.
+ if (NumOps > array_lengthof(N->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ N->InitOperands(N->LocalOperands, Ops, NumOps);
+ N->OperandsNeedDelete = false;
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyMachineNode(N);
+#endif
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG &d,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ checkForCycles(N);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ checkForCycles(N);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (I == SortedPos) {
+#ifndef NDEBUG
+ SDNode *S = ++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+ assert(SD && "Trying to assign an order to a null node!");
+ Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+ assert(SD && "Trying to get the order of a null node!");
+ return Ordering->getOrder(SD);
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+ DbgInfo->add(DB, SD, isParameter);
+ if (SD)
+ SD->setHasDebugValue(true);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+ const GlobalValue *GA,
+ EVT VT, int64_t o, unsigned char TF)
+ : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+ SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallVector<const SDNode *, 16> &Worklist) const {
+ if (Visited.empty()) {
+ Worklist.push_back(this);
+ } else {
+ // Take a look in the visited set. If we've already encountered this node
+ // we needn't search further.
+ if (Visited.count(N))
+ return true;
+ }
+
+ // Haven't visited N yet. Continue the search.
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ SDNode *Op = M->getOperand(i).getNode();
+ if (Visited.insert(Op))
+ Worklist.push_back(Op);
+ if (Op == N)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ getConstant(i, TLI.getPointerTy()));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ return getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*getContext(), EltVT, ResNE),
+ &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI.getDataLayout());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool isBigEndian) {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos;
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+#ifdef XDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSet<const SDNode*, 32> &Visited,
+ SmallPtrSet<const SDNode*, 32> &Checked) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N)) {
+ dbgs() << "Offending node:\n";
+ N->dumprFull();
+ errs() << "Detected cycle in SelectionDAG\n";
+ abort();
+ }
+
+ for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+ assert(N && "Checking nonexistant SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+ checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 000000000000..ce40cd6a0c9c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6873 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, EVT ValueVT,
+ const Value *V,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT, V);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL,
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartEVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, DL, PartEVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ llvm_unreachable("Unknown mismatch!");
+}
+
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getSimpleValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT, V);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ bool Smaller = ValueVT.bitsLE(PartEVT);
+ return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT, Val);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ if (ValueVT.getVectorNumElements() != 1) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("non-trivial scalar-to-vector conversion");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ }
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartEVT) {
+ bool Smaller = ValueVT.bitsLE(PartEVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT.getScalarType(), Val);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartEVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ if (PartEVT != ValueVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("scalar-to-vector conversion failed");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (NumParts == 1) {
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+ PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ bool Smaller = PartEVT.bitsLE(ValueVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ } else{
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ IntermediateVT, Val,
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
+ }
+}
+
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<MVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ MVT regvt, EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal(const TargetLowering &TLI) {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT RegisterVT = RegVTs[Value];
+ if (!TLI.isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V = 0) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag, const Value *V) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
+
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT, V);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+ ISD::NodeType ExtendKind =
+ TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() &&
+ TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
+ AA = &aa;
+ GFI = gfi;
+ LibInfo = li;
+ TD = DAG.getTarget().getDataLayout();
+ Context = DAG.getContext();
+ LPadToCallSiteMap.clear();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ CurDebugLoc = DebugLoc();
+ HasTailCall = false;
+}
+
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is separated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+ if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+ DAG.AssignOrdering(Node, SDNodeOrder);
+
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+ AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (isa<TerminatorInst>(&I))
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+
+ CurDebugLoc = I.getDebugLoc();
+
+ visit(I.getOpcode(), I);
+
+ if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ CopyToExportRegsIfNeeded(&I);
+
+ CurDebugLoc = DebugLoc();
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
+#include "llvm/IR/Instruction.def"
+ }
+
+ // Assign the ordering to the freshly created DAG nodes.
+ if (NodeMap.count(&I)) {
+ ++SDNodeOrder;
+ AssignOrderingToNode(getValue(&I).getNode());
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
+/// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
+ resolveDanglingDebugInfo(V, N);
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return DAG.getConstant(0, TLI.getPointerTy());
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+
+ return DAG.getMergeValues(&Constants[0], NumElts,
+ getCurDebugLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CV->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
+ }
+
+ llvm_unreachable("Can't get register for value!");
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
+ Chains[i] =
+ DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
+
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ /*isfixed=*/true, 0, 0));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction()->getCallingConv();
+ Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+ Outs, OutVals, getCurDebugLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (!BPI)
+ return 0;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight /* = 0 */) {
+ if (!Weight)
+ Weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, Weight);
+}
+
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ llvm_unreachable("Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = BrMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+ BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0], BrMBB);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, Succ0MBB, Succ1MBB, BrMBB);
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETCC_INVALID &&
+ "Condition is undefined for to-the-range belonging check.");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETULE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()), Sub,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
+
+ B.RegVT = VT.getSimpleVT();
+ B.Reg = FuncInfo.CreateReg(B.RegVT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, Sub);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ addSuccessorWithWeight(SwitchBB, B.Default);
+ addSuccessorWithWeight(SwitchBB, MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB != NextBlock)
+ BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = CountPopulation_64(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+ ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
+ ISD::SETNE);
+ }
+
+ // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+ addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+ // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+ addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (NextMBB != NextBlock)
+ BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else if (Fn && Fn->isIntrinsic()) {
+ assert(Fn->getIntrinsicID() == Intrinsic::donothing);
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ } else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ addSuccessorWithWeight(InvokeMBB, Return);
+ addSuccessorWithWeight(InvokeMBB, LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to landingpad not in landing pad!");
+
+ MachineBasicBlock *MBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ AddLandingPadInfo(LP, MMI, MBB);
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = DAG.getRoot();
+ SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+ SDValue Chain = Op1.getValue(1);
+
+ // Insert the EHSELECTION instruction.
+ VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ Ops[0] = Op1;
+ Ops[1] = Chain;
+ SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+ Chain = Op2.getValue(1);
+ Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+ Ops[0] = Op1;
+ Ops[1] = Op2;
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Ops[0], 2);
+
+ std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+ setValue(&LP, RetPair.first);
+ DAG.setRoot(RetPair.second);
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ // Both Small and Big will jump to Small.BB, so we sum up the weights.
+ addSuccessorWithWeight(SwitchBB, Small.BB,
+ Small.ExtraWeight + Big.ExtraWeight);
+ addSuccessorWithWeight(SwitchBB, Default,
+ // The default destination is the first successor in IR.
+ BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
+
+ // Order cases by weight so the most likely case will be checked first.
+ uint32_t UnhandledWeights = 0;
+ if (BPI) {
+ for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
+ uint32_t IWeight = I->ExtraWeight;
+ UnhandledWeights += IWeight;
+ for (CaseItr J = CR.Range.first; J < I; ++J) {
+ uint32_t JWeight = J->ExtraWeight;
+ if (IWeight > JWeight)
+ std::swap(*I, *J);
+ }
+ }
+ }
+ // Rearrange the case blocks so that the last one falls through if possible.
+ Case &BackCase = *(CR.Range.second-1);
+ if (Size > 1 &&
+ NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ // We start at the bottom as it's the case with the least weight.
+ for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETCC_INVALID;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+
+ // The false weight should be sum of all un-handled cases.
+ UnhandledWeights -= I->ExtraWeight;
+ CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+ /* me */ CurBlock,
+ /* trueweight */ I->ExtraWeight,
+ /* falseweight */ UnhandledWeights);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return TLI.supportJumpTables() &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ // The density is TSize / Range. Require at least 40%.
+ // It should not be possible for IntTSize to saturate for sane code, but make
+ // sure we handle Range saturation correctly.
+ uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
+ uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
+ if (IntTSize * 10 < IntRange * 4)
+ return false;
+
+ DEBUG(dbgs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range << ". Size: " << TSize << ".\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+
+ addSuccessorWithWeight(CR.CaseBB, Default);
+ addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.ule(TEI) && TEI.ule(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Calculate weight for each unique destination in CR.
+ DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+ if (FuncInfo.BPI)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(I->BB);
+ if (Itr != DestWeights.end())
+ Itr->second += I->ExtraWeight;
+ else
+ DestWeights[I->BB] = I->ExtraWeight;
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(*I);
+ addSuccessorWithWeight(JumpTableBB, *I,
+ Itr != DestWeights.end() ? Itr->second : 0);
+ }
+ }
+
+ // Create a jump table index for this jump table.
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+ ->createJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+ if (CR.CaseBB == SwitchBB)
+ visitJumpTableHeader(JT, JTH, SwitchBB);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
+ DEBUG(dbgs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(dbgs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ const Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB){
+ EVT PTy = TLI.getPointerTy();
+ unsigned IntPtrBits = PTy.getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(dbgs() << "Total number of unique destinations: "
+ << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(IntPtrBits) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(dbgs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (maxValue.ult(IntPtrBits)) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+ CasesBits[i].ExtraWeight += I->ExtraWeight;
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(dbgs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB, CasesBits[i].ExtraWeight));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == SwitchBB)
+ visitBitTestHeader(BTB, SwitchBB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+
+ /// Use a shorter form of declaration, and also
+ /// show the we want to use CRSBuilder as Clusterifier.
+ typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
+
+ Clusterifier TheClusterifier;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ // Start with "simple" cases
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+ TheClusterifier.add(i.getCaseValueEx(), SMBB,
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
+ }
+
+ TheClusterifier.optimize();
+
+ size_t numCmps = 0;
+ for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
+ e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+ Clusterifier::Cluster &C = *i;
+ // Update edge weight for the cluster.
+ unsigned W = C.first.Weight;
+
+ // FIXME: Currently work with ConstantInt based numbers.
+ // Changing it to APInt based is a pretty heavy for this commit.
+ Cases.push_back(Case(C.first.getLow().toConstantInt(),
+ C.first.getHigh().toConstantInt(), C.second, W));
+
+ if (C.first.getLow() != C.first.getHigh())
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (!SI.getNumCases()) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ SwitchMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ (void)numCmps;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ const Value *SV = SI.getCondition();
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(SwitchMBB,0,0,
+ CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the switch has more than N blocks, and is at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
+ if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+ }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB);
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
+ addSuccessorWithWeight(IndirectBrMBB, Succ);
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+ // -0.0 - X --> fneg
+ Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
+ // If the operand is smaller than the shift count type, promote it.
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ // Turn exact SDivs into multiplications.
+ // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+ // exact bit.
+ if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+ !isa<ConstantSDNode>(Op1) &&
+ isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+ setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+ else
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+ Cond,
+ SDValue(TrueVal.getNode(),
+ TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(),
+ FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I){
+ // FPExt is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ DestVT, N)); // convert types.
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// beginning from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
+ return false;
+ return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
+ EVT VT = TLI.getValueType(I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ unsigned Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ continue;
+ }
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0)
+ Src = DAG.getUNDEF(VT);
+ else
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ SDValue Res;
+
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src, DAG.getConstant(Idx, PtrVT));
+ }
+
+ Ops.push_back(Res);
+ }
+
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
+
+ for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getConstant(Offset, N.getValueType()));
+ }
+
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ EVT PTy = TLI.getPointerTy();
+ unsigned PtrBits = PTy.getSizeInBits();
+ if (PtrBits < 64)
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+ TD->getTypeAllocSize(Ty));
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy();
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ Type *Ty = I.getType();
+
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isInvariant = I.getMetadata("invariant.load") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile() || NumValues > MaxParallelChains)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ Ranges);
+
+ Values[i] = L;
+ Chains[ChainI] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
+}
+
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+ SynchronizationScope Scope,
+ bool Before, DebugLoc dl,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // Fence, if necessary
+ if (Before) {
+ if (Order == AcquireRelease || Order == SequentiallyConsistent)
+ Order = Release;
+ else if (Order == Acquire || Order == Monotonic)
+ return Chain;
+ } else {
+ if (Order == AcquireRelease)
+ Order = Acquire;
+ else if (Order == Release || Order == Monotonic)
+ return Chain;
+ }
+ SDValue Ops[3];
+ Ops[0] = Chain;
+ Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+ return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()),
+ MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation");
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ }
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl,
+ getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValOperand()),
+ I.getPointerOperand(), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = TLI.getValueType(I.getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValueOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
+ Ops.push_back(Op);
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
+ }
+
+ setValue(&I, Result);
+ } else {
+ // Assign order to result here. If the intrinsic does not produce a result,
+ // it won't be mapped to a SDNode and visit() will not assign it an order
+ // number.
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.getNode());
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+ MVT::f32);
+}
+
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFracPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+}
+
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue LogOfMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+}
+
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+}
+
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue Log10ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+}
+
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool IsExp10 = false;
+ if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
+ }
+ }
+
+ if (IsExp10) {
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, LHS.getValueType());
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ // If optimizing for size, don't insert too many multiplies. This
+ // inserts up to 5 multiplies.
+ CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+ if (N.getOpcode() != ISD::TRUNCATE)
+ return 0;
+
+ const SDValue &Ext = N.getOperand(0);
+ if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+ const SDValue &CFR = Ext.getOperand(0);
+ if (CFR.getOpcode() == ISD::CopyFromReg)
+ return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
+ }
+ return 0;
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
+ const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+ // Ignore inlined function arguments here.
+ DIVariable DV(Variable);
+ if (DV.isInlinedFnArgument(MF.getFunction()))
+ return false;
+
+ unsigned Reg = 0;
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI->getFrameRegister(MF);
+
+ if (!Reg && N.getNode()) {
+ if (N.getOpcode() == ISD::CopyFromReg)
+ Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ else
+ Reg = getTruncatedArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ }
+
+ if (!Reg) {
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
+ }
+
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+ FuncInfo.ArgDbgValues.push_back(&*MIB);
+ return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::setjmp:
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+ case Intrinsic::longjmp:
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+ case Intrinsic::memcpy: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ MDNode *Variable = DI.getVariable();
+ const Value *Address = DI.getAddress();
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return 0;
+ }
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return 0;
+ }
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+ if (isParameter && !AI) {
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (FINode)
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, 0, N);
+ return 0;
+ }
+ } else if (AI)
+ SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
+ return 0;
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ if (!DIVariable(DI.getVariable()).Verify())
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ uint64_t Offset = DI.getOffset();
+ const Value *V = DI.getValue();
+ if (!V)
+ return 0;
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ } else {
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
+ // We may expand this to cover more cases. One case where we have no
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+
+ // Build a debug info table entry.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ return 0;
+ }
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+ MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, MVT::i32);
+ setValue(&I, Res);
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ return 0;
+ case Intrinsic::eh_dwarf_cfa: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+ TLI.getPointerTy());
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
+ setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ FA, Offset));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI->setFunctionContextIndex(FI);
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Ops, 2);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256:
+ case Intrinsic::x86_avx2_vinserti128: {
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vextractf128_pd_256:
+ case Intrinsic::x86_avx_vextractf128_ps_256:
+ case Intrinsic::x86_avx_vextractf128_si_256:
+ case Intrinsic::x86_avx2_vextracti128: {
+ EVT DestVT = TLI.getValueType(I.getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+ DestVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ EVT DestVT = TLI.getValueType(I.getType());
+ const Value *Op1 = I.getArgOperand(0);
+ Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ Code);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return 0;
+ case Intrinsic::log:
+ setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::log2:
+ setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::log10:
+ setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::exp:
+ setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::exp2:
+ setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::pow:
+ setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI));
+ return 0;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return 0;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanMulAndAdd(VT)){
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return 0;
+ }
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+ MVT::i16, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+ MVT::f32, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(getRoot(), dl, Src, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+ assert(CI && "Non-constant type in __builtin_object_size?");
+
+ SDValue Arg = getValue(I.getCalledValue());
+ EVT Ty = Arg.getValueType();
+
+ if (CI->isZero())
+ Res = DAG.getConstant(-1ULL, Ty);
+ else
+ Res = DAG.getConstant(0, Ty);
+
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
+
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+ TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::gcroot:
+ if (GFI) {
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return 0;
+ }
+
+ case Intrinsic::debugtrap:
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return 0;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2));
+ return 0;
+ }
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 5,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
+ return 0;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return 0;
+
+ SmallVector<Value *, 4> Allocas;
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+
+ for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
+ E = Allocas.end(); Object != E; ++Object) {
+ AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+ unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+ Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+ DAG.setRoot(Res);
+ }
+ return 0;
+ }
+ case Intrinsic::invariant_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ // Discard region information.
+ return 0;
+ case Intrinsic::donothing:
+ // ignore
+ return 0;
+ }
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ MCSymbol *BeginLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().CreateTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI.LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, TLI))
+ isTailCall = false;
+
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
+ getCurDebugLoc(), CS);
+ std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
+ assert((isTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+ if (Result.first.getNode()) {
+ setValue(CS.getInstruction(), Result.first);
+ } else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+
+ unsigned NumValues = RetTys.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+ DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ setValue(CS.getInstruction(),
+ DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &Values[0], Values.size()));
+ }
+
+ // Assign order to nodes here. If the call does not produce a result, it won't
+ // be mapped to a SDNode and visit() will not assign it an order number.
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
+ HasTailCall = true;
+ ++SDNodeOrder;
+ AssignOrderingToNode(DAG.getRoot().getNode());
+ } else {
+ DAG.setRoot(Result.second);
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.second.getNode());
+ }
+
+ if (LandingPad) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ Type *LoadTy,
+ SelectionDAGBuilder &Builder) {
+
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ Builder.TD))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+ Ptr, MachinePointerInfo(PtrVal),
+ false /*volatile*/,
+ false /*nontemporal*/,
+ false /*isinvariant*/, 1 /* align=1 */);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ Type *LoadTy;
+ switch (Size->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = 0;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(Size->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(Size->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+ ActuallyDoIt = false;
+ }
+
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ EVT CallVT = TLI.getValueType(I.getType(), true);
+ setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+ return true;
+ }
+ }
+
+
+ return false;
+}
+
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ComputeUsesVAFloatArgument(I, &MMI);
+
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
+ return;
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
+ return;
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
+ return;
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
+ return;
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
+ return;
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
+ return;
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
+ return;
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
+ return;
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
+ return;
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
+ return;
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
+ return;
+ break;
+ case LibFunc::memcmp:
+ if (visitMemCmpCall(I))
+ return;
+ break;
+ }
+ }
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getCalledValue());
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ // Check if we can potentially perform a tail call. More detailed checking is
+ // be done within LowerCallTo, after more information about the call is known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context,
+ const TargetLowering &TLI,
+ const DataLayout *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ llvm::Type *OpTy = CallOperandVal->getType();
+
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end anonymous namespace
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+///
+static void GetRegistersForValue(SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ DebugLoc DL,
+ SDISelAsmOperandInfo &OpInfo) {
+ LLVMContext &Context = *DAG.getContext();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ MVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ }
+
+ MVT RegVT;
+ EVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI.ParseConstraints(CS);
+
+ bool hasMemory = false;
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getSimpleValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD).
+ getSimpleVT();
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
+ }
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
+ }
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+ TLI.getPointerTy()));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ // Set the asm dialect.
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for other constriants as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ ExtraInfo |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ ExtraInfo |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ InlineAsm::Kind_RegDefEarlyClobber :
+ InlineAsm::Kind_RegDef,
+ false,
+ 0,
+ DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ report_fatal_error("Cannot handle indirect register inputs!");
+ }
+
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.push_back
+ (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "Don't know how to handle indirect register inputs yet "
+ "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ const Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag, IA);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ MachinePointerInfo(StoresToEmit[i].second),
+ false, false, 0);
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const DataLayout &TD = *TLI.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle all of the outgoing arguments.
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ ArgListTy &Args = CLI.Args;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getDataLayout()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
+ PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ i < CLI.NumFixedArgs,
+ i, j*Parts[j].getValueType().getStoreSize());
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0)
+ MyFlags.Flags.setOrigAlign(1);
+
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
+ }
+ }
+
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ CLI.Chain = LowerCall(CLI, InVals);
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!CLI.IsTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ });
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, NULL,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ return std::make_pair(Res, CLI.Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (FastISel)
+ return A->use_empty();
+
+ const BasicBlock *Entry = A->getParent()->begin();
+ for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+ }
+ return true;
+}
+
+void SelectionDAGISel::LowerArguments(const Function &F) {
+ SelectionDAG &DAG = SDB->DAG;
+ DebugLoc dl = SDB->getCurDebugLoc();
+ const DataLayout *TD = TLI.getDataLayout();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+ Ins.push_back(RetArg);
+ }
+
+ // Set up the incoming argument description vector.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++Idx) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ bool isArgValueUsed = !I->use_empty();
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ TD->getABITypeAlignment(ArgTy);
+
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ Flags.setSExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+ Flags.setInReg();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ Flags.setByVal();
+ PointerType *Ty = cast<PointerType>(I->getType());
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (F.getParamAlignment(Idx))
+ FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+ Idx-1, i*RegisterVT.getStoreSize());
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.Flags.setOrigAlign(1);
+ Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+ F.isVarArg(), Ins,
+ dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ Idx = 1;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ MVT VT = ValueVTs[0].getSimpleVT();
+ MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+ RegVT, VT, NULL, AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+ SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
+
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues)
+ SDB->setUnusedArgValue(I, InVals[i]);
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+ if (!I->use_empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+ NumParts, PartVT, VT,
+ NULL, AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+
+ SDB->setValue(I, Res);
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(I);
+ SDB->CopyToExportRegsIfNeeded(I);
+ }
+ }
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C->getType());
+ CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ ConstantsOut.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 000000000000..9188945bd906
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,559 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class DataLayout;
+class TargetLibraryInfo;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+private:
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// SDNodeOrder - A unique monotonically increasing number used to order the
+ /// SDNodes we create.
+ unsigned SDNodeOrder;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ const Constant *Low;
+ const Constant *High;
+ MachineBasicBlock* BB;
+ uint32_t ExtraWeight;
+
+ Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
+ uint32_t extraweight) : Low(low), High(high), BB(bb),
+ ExtraWeight(extraweight) { }
+
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+ uint32_t ExtraWeight;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
+ uint32_t Weight):
+ Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+ CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ const Constant *LT;
+ const Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ struct CaseBitsCmp {
+ bool operator()(const CaseBits &C1, const CaseBits &C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between
+ /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+ /// blocks needed by multi-case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me,
+ uint32_t trueweight = 0, uint32_t falseweight = 0)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+ TrueWeight(trueweight), FalseWeight(falseweight) { }
+
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+
+ // TrueWeight/FalseWeight - branch weights.
+ uint32_t TrueWeight, FalseWeight;
+ };
+
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ const Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ uint32_t Weight):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ uint32_t ExtraWeight;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, const Value* SV,
+ unsigned Rg, MVT RgVT, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ const Value *SValue;
+ unsigned Reg;
+ MVT RegVT;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const DataLayout *TD;
+ AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+
+ /// HasTailCall - This is set to true if a call in the current
+ /// block has been translated as a tail call. In this case,
+ /// no subsequent DAG nodes should be created.
+ ///
+ bool HasTailCall;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ HasTailCall(false) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
+
+ /// clear - Clear out the current SelectionDAG and the associated
+ /// state and prepare this SelectionDAGBuilder object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is separated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+ /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+ /// from how the code appeared in the source. The ordering is used by the
+ /// scheduler to effectively turn off scheduling.
+ void AssignOrderingToNode(const SDNode *Node);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ SDValue getValue(const Value *V);
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB, unsigned Opc);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight = 0);
+public:
+ void visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitResume(const ResumeInst &I);
+
+ void visitBinary(const User &I, unsigned OpCode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I);
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const ExtractValueInst &I);
+ void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &I);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
+ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..3b5823bfb277
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,645 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FSINCOS: return "fsincos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (const void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (const void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..eeea9e4cfcff
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,3019 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction"));
+static cl::opt<bool>
+EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower a formal argument"));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
+
+ if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
+ TLI.getSchedulingPreference() == Sched::Source)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(0);
+}
+
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ assert(!MI->hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ const Function &Fn = *mf.getFunction();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
+ TargetSubtargetInfo &ST =
+ const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
+ ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
+
+ DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
+ CurDAG->init(*MF, TTI);
+ FuncInfo->set(Fn, *MF);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ else
+ FuncInfo->BPI = 0;
+
+ SDB->init(GFI, *AA, LibInfo);
+
+ MF->setHasMSInlineAsm(false);
+ SelectAllBasicBlocks(Fn);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ MachineBasicBlock *EntryMBB = MF->begin();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(llvm::next(InsertPos), MI);
+ }
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ unsigned Offset = MI->getOperand(1).getImm();
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(LDI->second, RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
+ }
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+
+ if (MFI->hasCalls() && MF->hasMSInlineAsm())
+ break;
+
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end();
+ II != IE; ++II) {
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
+ MFI->setHasCalls(true);
+ }
+ if (II->isMSInlineAsm()) {
+ MF->setHasMSInlineAsm(true);
+ }
+ }
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Replace it.
+ MRI.replaceRegWith(From, To);
+ }
+
+ // Freeze the set of reserved registers now that MachineFrameInfo has been
+ // set up. All the information required by getReservedRegs() should be
+ // available now.
+ MRI.freezeReservedRegs(*MF);
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ return true;
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Lower all of the non-terminator instructions. If a call is emitted
+ // as a tail call, cease emitting nodes for this block. Terminators
+ // are handled below.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+ SDB->visit(*I);
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt KnownZero;
+ APInt KnownOne;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ int BlockNumber = -1;
+ (void)BlockNumber;
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
+ BlockName = MF->getName().str() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getName().str();
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+namespace {
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+} // end anonymous namespace
+
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(errs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = --ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ SDNode *ResNode = Select(Node);
+
+ // FIXME: This is pretty gross. 'Select' should be changed to not return
+ // anything at all and this code should be nuked with a tactical strike.
+
+ // If node should not be replaced, continue with the next one.
+ if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+ continue;
+ // Replace node.
+ if (ResNode) {
+ // Propagate ordering
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
+ ReplaceUses(Node, ResNode);
+ }
+
+ // If after the replacement this node is not used any more,
+ // remove this dead node.
+ if (Node->use_empty()) // Don't delete EntryToken, etc.
+ CurDAG->RemoveDeadNode(Node);
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ DEBUG(errs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+
+ // Assign the call site to the landing pad's begin label.
+ MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+
+ const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionPointerRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+}
+
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
+ FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (TM.Options.EnableFastISel)
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
+
+ // Iterate over all basic blocks in the function.
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+ for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ const BasicBlock *LLVMBB = *I;
+
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->ComputePHILiveOutRegInfo(PN);
+ } else {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ FastIS->startNewBlock();
+
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (!FastIS->LowerArguments()) {
+ // Fast isel failed to lower these arguments
+ if (EnableFastISelAbortArgs)
+ llvm_unreachable("FastISel didn't lower all arguments");
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
+ }
+
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
+ continue;
+ }
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->SelectInstruction(Inst)) {
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(Inst)) {
+
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ Inst->dump();
+ }
+
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst->getType());
+ }
+
+ bool HadTailCall = false;
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
+ SelectBasicBlock(Inst, BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ // We also need to delete any previously emitted instructions.
+ if (HadTailCall) {
+ FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
+ --BI;
+ break;
+ }
+
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
+ continue;
+ }
+
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+ // Don't abort, and use a different message for terminator misses.
+ NumFastIselFailures += NumFastIselRemaining;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed terminator: ";
+ Inst->dump();
+ }
+ } else {
+ NumFastIselFailures += NumFastIselRemaining;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ Inst->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't select the entire block");
+ }
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ } else {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &Fn.getEntryBlock())
+ LowerArguments(Fn);
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
+
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ }
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ dbgs() << "Node " << i << " : ("
+ << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty()) {
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+ return;
+ }
+
+ for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ uint32_t UnhandledWeight = 0;
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
+ UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
+
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ if (j+1 != ej)
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ UnhandledWeight,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+ else
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
+ UnhandledWeight,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDB->BitTestCases[i].Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->BitTestCases[i].Parent)
+ .addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB);
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ }
+ }
+ }
+ SDB->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+ FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->JTCases[i].second.Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->JTCases[i].first.HeaderBB);
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
+ }
+ }
+ SDB->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SwitchCases[i].TrueBB);
+ if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator
+ MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+ MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+ MachineInstrBuilder PHI(*MF, MBBI);
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SwitchCases.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of glue
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+
+ SmallPtrSet<SDNode*, 16> Visited;
+ return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops);
+
+ EVT VTs[] = { MVT::Other, MVT::Glue };
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
+void SelectionDAGISel::
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() != 0 &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
+ // Handle any interior nodes explicitly marked.
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (FRN->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+ InputGlue);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (FRN->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
+ NowDeadNodes.push_back(FRN);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
+
+ DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(const SDNode *ChainedNode,
+ SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+ SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ if (User->isMachineOpcode() ||
+ User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ unsigned UserOpcode = User->getOpcode();
+ if (UserOpcode == ISD::CopyToReg ||
+ UserOpcode == ISD::CopyFromReg ||
+ UserOpcode == ISD::INLINEASM ||
+ UserOpcode == ISD::EH_LABEL ||
+ UserOpcode == ISD::LIFETIME_START ||
+ UserOpcode == ISD::LIFETIME_END) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ N->getOperand(op).getNode()))
+ InputChains.push_back(N->getOperand(op));
+ }
+ }
+
+ SDValue Res;
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+ MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node)
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+
+ return Res;
+}
+
+/// CheckSame - Implements OP_CheckSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C != 0 && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
+};
+
+}
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::RegisterMask:
+ //case ISD::VALUETYPE:
+ //case ISD::CONDCODE:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return 0;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
+ DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ errs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate
+ << ", continuing at " << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI.getPointerTy();
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ Opcode-OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (InputChain.getNode() == 0)
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+ continue;
+ }
+
+ case OPC_EmitNode:
+ case OPC_MorphNodeTo: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
+ // Create the node.
+ SDNode *Res = 0;
+ if (Opcode != OPC_MorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+ VTList, Ops.data(), Ops.size());
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
+ }
+
+ } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+ EmitNodeInfo);
+ } else {
+ // NodeToMatch was eliminated by CSE when the target changed the DAG.
+ // We will visit the equivalent node later.
+ DEBUG(dbgs() << "Node was eliminated by CSE\n");
+ return 0;
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ }
+
+ DEBUG(errs() << " "
+ << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (Opcode == OPC_MorphNodeTo) {
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
+ return Res;
+ }
+
+ continue;
+ }
+
+ case OPC_MarkGlueResults: {
+ unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+ // Read and remember all the glue-result nodes.
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+
+ // FIXME: We just return here, which interacts correctly with SelectRoot
+ // above. We should fix this to not return an SDNode* anymore.
+ return 0;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return 0;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getName();
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..b752b482e3a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,299 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..f5fc66c4d3da
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2593 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TargetLoweringBase(tm, tlof) {}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias).hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
+}
+
+
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
+ RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+ (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = { NewLHS, NewRHS };
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
+
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+ unsigned SmallVTBits = DemandedSize;
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ bool NeedZext = DemandedSize > SmallVTBits;
+ SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
+ dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
+ if (KnownOne == KnownOne2) { // set bits are the same on both sides
+ EVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ EVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy(InnerVT);
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ }
+
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (NewMask == 1)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth - ExVT.getScalarType().getSizeInBits());
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if ((NewBits & NewMask) == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit =
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt InputDemandedBits =
+ APInt::getLowBitsSet(BitWidth,
+ ExVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
+ } else { // Otherwise, top bits aren't known.
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() &&
+ !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (!ShAmt)
+ break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BITCAST:
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt
+ = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not losing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!getDataLayout()->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound()) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ false, false, false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ getCondCodeAction(Cond, newVT.getSimpleVT())==Legal))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(C1.trunc(InSize), newVT),
+ Cond);
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents(false) == ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ }
+ if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ if (Op0.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (AndRHS->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy)));
+ }
+ }
+ }
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal = 0;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger()) {
+ return DAG.getConstant(EqVal, VT);
+ }
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(EqVal, VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(EqVal, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
+ DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0)
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode()))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}"
+ return C_Memory;
+ return C_Register;
+ }
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C ? C->getDebugLoc() : DebugLoc(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ if (!isLegalRC(RC))
+ continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getName(*I))) {
+ std::pair<unsigned, const TargetRegisterClass*> S =
+ std::make_pair(*I, RC);
+
+ // If this register class has the requested value type, return it,
+ // otherwise keep searching and return the first class found
+ // if no other is found which explicitly has the requested type.
+ if (RC->hasType(VT))
+ return S;
+ else if (!R.second)
+ R = S;
+ }
+ }
+ }
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(
+ 8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
+ } else {
+ OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+ SelectionDAG &DAG) const {
+ ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+ APInt d = C->getAPIntValue();
+ assert(d != 0 && "Division by zero!");
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ unsigned ShAmt = d.countTrailingZeros();
+ if (ShAmt) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ d = d.ashr(ShAmt);
+ }
+
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t, xn = d;
+ while ((t = d*xn) != 1)
+ xn *= APInt(d.getBitWidth(), 2) - t;
+
+ Op2 = DAG.getConstant(xn, Op1.getValueType());
+ return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*> *Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*> *Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::mu magics = N1C.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !N1C[0]) {
+ unsigned Shift = N1C.countTrailingZeros();
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = N1C.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..f769b44efbb3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+ : TD(TM.getDataLayout()) {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 000000000000..10f64c709c7a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,452 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
+ Type::getInt32Ty(C), NULL);
+ Constant *PersFn =
+ F.getParent()->
+ getOrInsertFunction("__gcc_personality_v0",
+ FunctionType::get(Type::getInt32Ty(C), true));
+ LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
+ "cleanup.lpad",
+ CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+ NewBB, CleanupBB,
+ Args, CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(RI->getParent(), RI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
+ };
+
+ Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+ };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+}
+
+Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type*> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(M, StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
+ 0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+ StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 000000000000..9ab491808fe5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getName().str();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ dbgs() << "Place CSR spills/restores for "
+ << MF->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn != AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut != AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG({
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs()
+ << "-----------------------------------------------------------\n"
+ << " Antic/Avail Sets:\n"
+ << "-----------------------------------------------------------\n"
+ << "iterations = " << iterations << "\n"
+ << "-----------------------------------------------------------\n"
+ << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+ << "-----------------------------------------------------------\n";
+
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+
+ dbgs()
+ << "-----------------------------------------------------------\n";
+ }
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ dbgs() << "ENABLED: " << Fn.getName();
+ if (HasFastExitPath)
+ dbgs() << " (fast exit path)";
+ dbgs() << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "-----------------------------------------------------------\n";
+ dbgs() << "total iterations = " << iterations << " ( "
+ << Fn.getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ dbgs() << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ dbgs() << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DEBUG(dbgs() << MF->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n");
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n");
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DEBUG(dbgs() << MF->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n");
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ if (!MBB)
+ return "";
+
+ if (MBB->getBasicBlock())
+ return MBB->getBasicBlock()->getName().str();
+
+ std::ostringstream name;
+ name << "_MBB_" << MBB->getNumber();
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ DEBUG({
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (!CSRSave[MBB].empty()) {
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ dbgs() << '\n';
+ }
+
+ if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+ dbgs() << " "
+ << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ });
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..3903743878b4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,502 @@
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+ class SjLjEHPrepare : public FunctionPass {
+ const TargetLoweringBase *TLI;
+ Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *CallSiteFn;
+ Constant *FuncCtxFn;
+ AllocaInst *FuncCtx;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL)
+ : FunctionPass(ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
+
+ private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
+ void insertCallSiteStore(Instruction *I, int Number);
+ };
+} // end anonymous namespace
+
+char SjLjEHPrepare::ID = 0;
+
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) {
+ return new SjLjEHPrepare(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPrepare::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ FunctionContextTy =
+ StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ UnregisterFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+ PersonalityFn = 0;
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
+
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+ if (!LiveBBs.insert(BB)) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI) continue;
+ if (EVI->getNumIndices() != 1) continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->getNumUses() == 0)
+ EVI->eraseFromParent();
+ }
+
+ if (LPI->getNumUses() == 0) return;
+
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ IRBuilder<>
+ Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+}
+
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPrepare::
+setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ unsigned Align =
+ TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
+ FuncCtx =
+ new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
+
+ // Fill in the function context structure.
+ for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
+ LandingPadInst *LPI = LPads[I];
+ IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+
+ // Reference the __data field.
+ Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+
+ // The exception values come back in context->__data[0].
+ Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0,
+ "exception_gep");
+ Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
+
+ Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1,
+ "exn_selector_gep");
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+
+ substituteLPadValues(LPI, ExnVal, SelVal);
+ }
+
+ // Personality function
+ IRBuilder<> Builder(EntryBB->getTerminator());
+ if (!PersonalityFn)
+ PersonalityFn = LPads[0]->getPersonalityFn();
+ Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
+ "pers_fn_gep");
+ Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
+
+ // LSDA address
+ Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
+ Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+ Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
+ ++AfterAllocaInsPt;
+
+ for (Function::arg_iterator
+ AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC =
+ new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(NC);
+
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with. We do
+ // this because the above replaceAllUsesWith() clobbered the operand, but
+ // we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator
+ BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator
+ II = BB->begin(), IIE = BB->end(); II != IIE; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*, 16> Users;
+ for (Value::use_iterator
+ UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ SmallPtrSet<BasicBlock*, 64> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+ << UnwindBlock->getName() << "\n");
+ NeedsSpill = true;
+ break;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ DemoteRegToStack(*Inst, true);
+ ++NumSpilled;
+ }
+ }
+ }
+
+ // Go through the landing pads and remove any PHIs there.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode*, 8> PHIsToDemote;
+ for (BasicBlock::iterator
+ PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty()) continue;
+
+ // Demote the PHIs to the stack.
+ for (SmallPtrSet<PHINode*, 8>::iterator
+ I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+ DemotePHIToStack(*I);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(UnwindBlock->begin());
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
+ SmallSetVector<LandingPadInst*, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->isIntrinsic() &&
+ Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Invokes.push_back(II);
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx =
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
+ BasicBlock *EntryBB = F.begin();
+ IRBuilder<> Builder(EntryBB->getTerminator());
+
+ // Get a reference to the jump buffer.
+ Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+
+ // Save the frame pointer.
+ Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+
+ Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+ Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
+
+ // Save the stack pointer.
+ Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+
+ Val = Builder.CreateCall(StackAddrFn, "sp");
+ Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
+ Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+ Builder.CreateCall(FuncCtxFn, FuncCtxArg);
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!CI->doesNotThrow())
+ insertCallSiteStore(CI, -1);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
+ EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (BB == F.begin())
+ continue;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(I);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned I = 0, E = Returns.size(); I != E; ++I)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);
+
+ return true;
+}
+
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ bool Res = setupEntryBlockAndCallSites(F);
+ return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 000000000000..20049a89d15d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,250 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ MBBRanges.clear();
+ idx2MBBMap.clear();
+ indexList.clear();
+ ileAllocator.Reset();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+
+ // Check that the list contains only the sentinal.
+ assert(indexList.empty() && "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(MBBRanges.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
+
+ indexList.push_back(createEntry(0, index));
+
+ // Iterate over the function.
+ for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ MachineBasicBlock *mbb = &*mbbItr;
+
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
+
+ for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ MachineInstr *mi = miItr;
+ if (mi->isDebugValue())
+ continue;
+
+ // Insert a store index for the instr.
+ indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block)));
+ }
+
+ // We insert one blank instructions between basic blocks.
+ indexList.push_back(createEntry(0, index += SlotIndex::InstrDist));
+
+ MBBRanges[mbb->getNumber()].first = blockStartIndex;
+ MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block);
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+ }
+
+ // Sort the Idx2MBBMap
+ std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+ DEBUG(mf->print(dbgs(), this));
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+ // Renumber updates the index of every element of the index list.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ ++NumGlobalRenum;
+
+ unsigned index = 0;
+
+ for (IndexList::iterator I = indexList.begin(), E = indexList.end();
+ I != E; ++I) {
+ I->setIndex(index);
+ index += SlotIndex::InstrDist;
+ }
+}
+
+// Renumber indexes locally after curItr was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+ IndexList::iterator startItr = prior(curItr);
+ unsigned index = startItr->getIndex();
+ do {
+ curItr->setIndex(index += Space);
+ ++curItr;
+ // If the next index is bigger, we have caught up.
+ } while (curItr != indexList.end() && curItr->getIndex() <= index);
+
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-'
+ << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) {
+ // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
+ // does the same thing.
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !hasIndex(End))
+ ++End;
+
+ bool includeStart = (Begin == MBB->begin());
+ SlotIndex startIdx;
+ if (includeStart)
+ startIdx = getMBBStartIdx(MBB);
+ else
+ startIdx = getInstructionIndex(Begin);
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB);
+ else
+ endIdx = getInstructionIndex(End);
+
+ // FIXME: Conceptually, this code is implementing an iterator on MBB that
+ // optionally includes an additional position prior to MBB->begin(), indicated
+ // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+ // in parallel with SlotIndexes, but there should be a better way to do this.
+ IndexList::iterator ListB = startIdx.listEntry();
+ IndexList::iterator ListI = endIdx.listEntry();
+ MachineBasicBlock::iterator MBBI = End;
+ bool pastStart = false;
+ while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+ assert(ListI->getIndex() >= startIdx.getIndex() &&
+ (includeStart || !pastStart) &&
+ "Decremented past the beginning of region to repair.");
+
+ MachineInstr *SlotMI = ListI->getInstr();
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0;
+ bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+ if (SlotMI == MI && !MBBIAtBegin) {
+ --ListI;
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else {
+ --ListI;
+ if (SlotMI)
+ removeMachineInstrFromMaps(SlotMI);
+ }
+ }
+
+ // In theory this could be combined with the previous loop, but it is tricky
+ // to update the IndexList while we are iterating it.
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end())
+ insertMachineInstrInMaps(MI);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SlotIndexes::dump() const {
+ for (IndexList::const_iterator itr = indexList.begin();
+ itr != indexList.end(); ++itr) {
+ dbgs() << itr->getIndex() << " ";
+
+ if (itr->getInstr() != 0) {
+ dbgs() << *itr->getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
+}
+#endif
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ if (isValid())
+ os << listEntry()->getIndex() << "Berd"[getSlot()];
+ else
+ os << "invalid";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..c5bbba3ffccc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,381 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Scale[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Scale[0] = Scale[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ /// Return the change to the total number of positive biases.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequency.resize(mf.getNumBlockIDs());
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = LiveIntervals::getSpillWeight(true, false,
+ loops->getLoopDepth(I));
+ unsigned Num = I->getNumber();
+ BlockFrequency[Num] = Freq;
+ nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+ }
+
+ // Scales are reciprocal frequencies.
+ for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+ for (unsigned d = 0; d != 2; ++d)
+ if (nodes[i].Scale[d] > 0)
+ nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that 1/32 of the
+ // connected blocks need to be interested before we consider expanding the
+ // region through the bundle. This helps compile time by limiting the number
+ // of blocks visited and the number of links in the Hopfield network.
+ if (bundles->getBlocks(n).size() > 100)
+ nodes[n].Bias = -0.0625f;
+}
+
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I->Number);
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ 0, // PrefBoth
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ float Freq = getBlockFrequency(*I);
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(*I, 0);
+ unsigned ob = bundles->getBundle(*I, 1);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(-Freq, 1);
+ nodes[ob].addBias(-Freq, 0);
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+ ++I) {
+ unsigned Number = *I;
+ unsigned ib = bundles->getBundle(Number, 0);
+ unsigned ob = bundles->getBundle(Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+ Linked.push_back(ib);
+ if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+ Linked.push_back(ob);
+ float Freq = getBlockFrequency(Number);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ Linked.clear();
+ RecentPositive.clear();
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (!nodes[n].Links.empty())
+ Linked.push_back(n);
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate() {
+ // First update the recently positive nodes. They have likely received new
+ // negative bias that will turn them off.
+ while (!RecentPositive.empty())
+ nodes[RecentPositive.pop_back_val()].update(nodes);
+
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ Linked.clear();
+ RecentPositive.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = 0;
+ return Perfect;
+}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..fc412f817cdb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,156 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<float, 4> BlockFrequency;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(unsigned Number) const {
+ return BlockFrequency[Number];
+ }
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp
new file mode 100644
index 000000000000..209792fd407b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,194 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+ enum SpillerName { trivial, inline_ };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+ cl::desc("Spiller to use: (default: standard)"),
+ cl::Prefix,
+ cl::values(clEnumVal(trivial, "trivial spiller"),
+ clEnumValN(inline_, "inline", "inline spiller"),
+ clEnumValEnd),
+ cl::init(trivial));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+ MachineFunctionPass *pass;
+ MachineFunction *mf;
+ VirtRegMap *vrm;
+ LiveIntervals *lis;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : pass(&pass), mf(&mf), vrm(&vrm)
+ {
+ lis = &pass.getAnalysis<LiveIntervals>();
+ mfi = mf.getFrameInfo();
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
+ }
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding or
+ /// remat is attempted.
+ void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+ LiveInterval* li = &LRE.getParent();
+
+ DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
+ "Trying to spill a stack slot.");
+
+ DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ // Iterate over reg uses/defs.
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ // Grab the use/def instr.
+ MachineInstr *mi = &*regItr;
+
+ DEBUG(dbgs() << " Processing " << *mi);
+
+ // Step regItr to the next use/def instr.
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ // Collect uses & defs for this instr.
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+ indices.push_back(i);
+ }
+
+ // Create a new vreg & interval for this instr.
+ LiveInterval *newLI = &LRE.create();
+ newLI->weight = HUGE_VALF;
+
+ // Update the reg operands & kill flags.
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ unsigned mopIdx = indices[i];
+ MachineOperand &mop = mi->getOperand(mopIdx);
+ mop.setReg(newLI->reg);
+ if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+ mop.setIsKill(true);
+ }
+ }
+ assert(hasUse || hasDef);
+
+ // Insert reload if necessary.
+ MachineBasicBlock::iterator miItr(mi);
+ if (hasUse) {
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
+ tri);
+ MachineInstr *loadInstr(prior(miItr));
+ SlotIndex loadIndex =
+ lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
+ SlotIndex endIndex = loadIndex.getNextIndex();
+ VNInfo *loadVNI =
+ newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+ }
+
+ // Insert store if necessary.
+ if (hasDef) {
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
+ true, ss, trc, tri);
+ MachineInstr *storeInstr(llvm::next(miItr));
+ SlotIndex storeIndex =
+ lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
+ SlotIndex beginIndex = storeIndex.getPrevIndex();
+ VNInfo *storeVNI =
+ newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+ }
+ }
+ }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+ TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : SpillerBase(pass, mf, vrm) {}
+
+ void spill(LiveRangeEdit &LRE) {
+ // Ignore spillIs - we don't use it.
+ trivialSpillEverywhere(LRE);
+ }
+};
+
+} // end anonymous namespace
+
+void Spiller::anchor() { }
+
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ switch (spillerOpt) {
+ case trivial: return new TrivialSpiller(pass, mf, vrm);
+ case inline_: return createInlineSpiller(pass, mf, vrm);
+ }
+ llvm_unreachable("Invalid spiller optimization");
+}
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
new file mode 100644
index 000000000000..b7d5beaab1b2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -0,0 +1,47 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+ class LiveRangeEdit;
+ class MachineFunction;
+ class MachineFunctionPass;
+ class VirtRegMap;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ virtual void anchor();
+ public:
+ virtual ~Spiller() = 0;
+
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
+
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..0a3818e43ff9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1432 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
+ const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = 0;
+ DidRepairRange = false;
+}
+
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+ const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+ std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ // Compute split points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LSP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+ if (FirstTerm == MBB->end())
+ LSP.first = MBBEnd;
+ else
+ LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+ // If there is a landing pad successor, also find the call instruction.
+ if (!LPad)
+ return LSP.first;
+ // There may not be a call instruction (?) in which case we ignore LPad.
+ LSP.second = LSP.first;
+ for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+ I != E;) {
+ --I;
+ if (I->isCall()) {
+ LSP.second = LIS.getInstructionIndex(I);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last split point
+ // back to the call that may throw.
+ if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LSP.first;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
+ return LSP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow splits before the call.
+ return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+ SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+ if (LSP == LIS.getMBBEndIdx(MBB))
+ return MBB->end();
+ return LIS.getInstructionFromIndex(LSP);
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+ E = CurLI->vni_end(); I != E; ++I)
+ if (!(*I)->isPHIDef() && !(*I)->isUnused())
+ UseSlots.push_back((*I)->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+ ++I)
+ if (!I.getOperand().isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ if (!calcLiveBlockInfo()) {
+ // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+ // I am looking at you, RegisterCoalescer!
+ DidRepairRange = true;
+ ++NumRepairs;
+ DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ const_cast<LiveIntervals&>(LIS)
+ .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ bool fixed = calcLiveBlockInfo();
+ (void)fixed;
+ assert(fixed && "Couldn't fix broken live interval");
+ }
+
+ DEBUG(dbgs() << "Analyze counted "
+ << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return true;
+
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ if (LVI->end < Stop)
+ return false;
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastInstr = LastStop;
+ break;
+ }
+
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastInstr = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
+ }
+
+ // A LiveRange that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
+ }
+
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
+ }
+
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
+ }
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+ return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ for (;;) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ CurLI = li;
+ analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(0),
+ OpenIdx(0),
+ SpillMode(SM_Partition),
+ RegAssign(Allocator)
+{}
+
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+
+ // Reset the LiveRangeCalc instances needed for this spill mode.
+ LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit->anyRematerializable(0);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
+}
+#endif
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
+ ValueForcePair(VNI, false)));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped.
+ // Keep it as a simple def without any liveness.
+ if (InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
+ SlotIndex Def = OldVNI->def;
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
+ // No longer a simple mapping. Switch to a complex, non-forced mapping.
+ InsP.first->second = ValueForcePair();
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ SlotIndex Def = VNI->def;
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+
+ return VNI;
+}
+
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
+ assert(ParentVNI && "Mapping NULL value");
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ VNInfo *VNI = VFP.getPointer();
+
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
+ return;
+ }
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ SlotIndex Def = VNI->def;
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(0, true);
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ ++NumRemats;
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit->getReg());
+ Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+ .getRegSlot();
+ ++NumCopies;
+ }
+
+ // Define the value in Reg.
+ return defValue(RegIdx, ParentVNI, Def);
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->create();
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->create();
+ return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ SA.getLastSplitPointIter(&MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+
+ // The interval must be live beyond the instruction at Idx.
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Boundary.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
+ assert(MI && "No instruction at index");
+
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by LRCalc.extend().
+ if (ParentVNI)
+ forceRecompute(0, ParentVNI);
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = Edit->get(0);
+ DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ VNInfo *VNI = Copies[i];
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugValue());
+
+ DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LI->removeValNo(VNI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at VNI->def. We
+ // want to avoid calculating the live range of the source register if
+ // possible.
+ AssignI.find(Def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ } else {
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
+ DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
+ }
+ }
+}
+
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = UINT_MAX;
+
+ for (;;) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::hoistCopiesForSize() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = Edit->get(0);
+ LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ }
+
+ DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
+ << " for parent " << ParentVNI->id << '@' << ParentVNI->def
+ << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << Dom.second << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
+ Dom.second =
+ defFromParent(0, ParentVNI, Last, *Dom.first,
+ SA.getLastSplitPointIter(Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def)
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, ParentVNI);
+ }
+ removeBackCopies(BackCopies);
+}
+
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need LRCalc.extend().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+ ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+ DEBUG(dbgs() << " blit " << *ParentI << ':');
+ VNInfo *ParentVNI = ParentI->valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = ParentI->start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = ParentI->end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Check for a simply defined value that can be blitted directly.
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
+ DEBUG(dbgs() << ':' << VNI->id);
+ LI->addRange(LiveRange(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ DEBUG(dbgs() << "(recalc)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex BlockStart, BlockEnd;
+ tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped value");
+ DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End)
+ LRC.setLiveOutValue(MBB, VNI);
+
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ BlockEnd = LIS.getMBBEndIdx(MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd)
+ LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+ } else {
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
+ if (End < BlockEnd)
+ LRC.addLiveInBlock(LI, MDT[MBB], End);
+ else {
+ // Live-through, and we don't know the value.
+ LRC.addLiveInBlock(LI, MDT[MBB]);
+ LRC.setLiveOutValue(MBB, 0);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != ParentI->end);
+ DEBUG(dbgs() << '\n');
+ }
+
+ LRCalc[0].calculateValues();
+ if (SpillMode)
+ LRCalc[1].calculateValues();
+
+ return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveInterval *LI = Edit->get(RegIdx);
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (Edit->getParent().liveAt(LastUse)) {
+ assert(RegAssign.lookup(LastUse) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LRC.extend(LI, End);
+ }
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ MO.setReg(0);
+ continue;
+ }
+
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (MO.isDef() || MO.isUndef())
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ LiveInterval *LI = Edit->get(RegIdx);
+ MO.setReg(LI->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges || MO.isUndef())
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may wan't to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ Idx = Idx.getPrevSlot();
+ if (!Edit->getParent().liveAt(Idx))
+ continue;
+ } else
+ Idx = Idx.getRegSlot(true);
+
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot());
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+ LiveInterval *LI = *I;
+ for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+ LII != LIE; ++LII) {
+ // Dead defs end at the dead slot.
+ if (LII->end != LII->valno->def.getDeadSlot())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg, &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
+ }
+ }
+
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
+
+ // Force rematted values to be recomputed everywhere.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ forceRecompute(i, ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ hoistCopiesForSize();
+ break;
+ case SM_Speed:
+ llvm_unreachable("Spill mode 'speed' not implemented yet");
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ LRMap->clear();
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ LRMap->push_back(i);
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit->get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned j = 1; j != NumComp; ++j)
+ dups.push_back(&Edit->create());
+ ConEQ.Distribute(&dups[0], MRI);
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
+ return false;
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+ openIntv();
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastInstr);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
+ if (!IntvOut) {
+ DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
+ DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastInstr);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastInstr < LSP) {
+ DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastInstr);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
+ DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstInstr, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
+ DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
+ useIntv(From, Idx);
+}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..4005a3d5cbbf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,469 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
+ }
+ };
+
+private:
+ // Current live interval.
+ const LiveInterval *CurLI;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// LastSplitPoint - Last legal split point in each basic block in the current
+ /// function. The first entry is the first terminator, the second entry is the
+ /// last valid split point for a variable that is live in to a landing pad
+ /// successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
+
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
+
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks;
+
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks;
+
+ /// DidRepairRange - analyze was forced to shrinkToUses().
+ bool DidRepairRange;
+
+ SlotIndex computeLastSplitPoint(unsigned Num);
+
+ // Sumarize statistics by counting instructions using CurLI.
+ void analyzeUses();
+
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ bool calcLiveBlockInfo();
+
+public:
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+ /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+ /// can create live ranges that end in mid-air.
+ bool didRepairRange() const { return DidRepairRange; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// getLastSplitPoint - Return the base index of the last valid split point
+ /// in the basic block numbered Num.
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ // Inline the common simple case.
+ if (LastSplitPoint[Num].first.isValid() &&
+ !LastSplitPoint[Num].second.isValid())
+ return LastSplitPoint[Num].first;
+ return computeLastSplitPoint(Num);
+ }
+
+ /// getLastSplitPointIter - Returns the last split point as an iterator.
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class SplitEditor {
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+public:
+
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
+
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
+
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
+
+private:
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
+ typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveRangeCalc instance for the complement interval when in spill mode.
+ LiveRangeCalc LRCalc[2];
+
+ /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LRCalc instance. When not in spill mode, all intervals can share one.
+ LiveRangeCalc &getLRCalc(unsigned RegIdx) {
+ return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
+
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
+
+ /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+ /// way that minimizes code size. This implements the SM_Size spill mode.
+ void hoistCopiesForSize();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+ MachineDominatorTree&);
+
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
+
+ /// Create a new virtual register and live interval.
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
+
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
+
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use OpenLI.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
+
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
+
+ // ===--- High level methods ---===
+
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 000000000000..a789a2596dbf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,802 @@
+//===-- StackColoring.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+// offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+ cl::init(false), cl::Hidden,
+ cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
+
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+ MachineFrameInfo *MFI;
+ MachineFunction *MF;
+
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+ /// Maps active slots (per bit) for each basic block.
+ typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ /// Maps serial numbers to basic blocks.
+ DenseMap<const MachineBasicBlock*, int> BasicBlocks;
+ /// Maps basic blocks to a serial number.
+ SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+
+ /// Maps liveness intervals for each slot.
+ SmallVector<LiveInterval*, 16> Intervals;
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+ /// SlotIndex analysis object.
+ SlotIndexes *Indexes;
+
+ /// The list of lifetime markers found. These markers are to be removed
+ /// once the coloring is done.
+ SmallVector<MachineInstr*, 8> Markers;
+
+ /// SlotSizeSorter - A Sort utility for arranging stack slots according
+ /// to their size.
+ struct SlotSizeSorter {
+ MachineFrameInfo *MFI;
+ SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
+ bool operator()(int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1) return false;
+ if (RHS == -1) return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ }
+};
+
+public:
+ static char ID;
+ StackColoring() : MachineFunctionPass(ID) {
+ initializeStackColoringPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ /// Debug.
+ void dump() const;
+
+ /// Removes all of the lifetime marker instructions from the function.
+ /// \returns true if any markers were removed.
+ bool removeAllMarkers();
+
+ /// Scan the machine function and find all of the lifetime markers.
+ /// Record the findings in the BEGIN and END vectors.
+ /// \returns the number of markers found.
+ unsigned collectMarkers(unsigned NumSlot);
+
+ /// Perform the dataflow calculation and calculate the lifetime for each of
+ /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+ /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+ /// in and out blocks.
+ void calculateLocalLiveness();
+
+ /// Construct the LiveIntervals for the slots.
+ void calculateLiveIntervals(unsigned NumSlots);
+
+ /// Go over the machine function and change instructions which use stack
+ /// slots to use the joint slots.
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+ /// The input program may contain intructions which are not inside lifetime
+ /// markers. This can happen due to a bug in the compiler or due to a bug in
+ /// user code (for example, returning a reference to a local variable).
+ /// This procedure checks all of the instructions in the function and
+ /// invalidates lifetime ranges which do not contain all of the instructions
+ /// which access that frame slot.
+ void removeInvalidSlotRanges();
+
+ /// Map entries which point to other entries to their destination.
+ /// A->B->C becomes A->C.
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+};
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void StackColoring::dump() const {
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+ DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<<
+ " ["<<FI->getName()<<"]\n");
+
+ LivenessMap::const_iterator BI = BlockLiveness.find(*FI);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
+
+ DEBUG(dbgs()<<"BEGIN : {");
+ for (unsigned i=0; i < BlockInfo.Begin.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"END : {");
+ for (unsigned i=0; i < BlockInfo.End.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.End.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"LIVE_IN: {");
+ for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+ DEBUG(dbgs()<<"LIVEOUT: {");
+ for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+ }
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+ unsigned MarkersFound = 0;
+ // Scan the function to find all lifetime markers.
+ // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+ // deterministic numbering, and because we'll need a post-order iteration
+ // later for solving the liveness dataflow problem.
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+
+ // Assign a serial number to this basic block.
+ BasicBlocks[*FI] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(*FI);
+
+ // Keep a reference to avoid repeated lookups.
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI];
+
+ BlockInfo.Begin.resize(NumSlot);
+ BlockInfo.End.resize(NumSlot);
+
+ for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
+ BI != BE; ++BI) {
+
+ if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
+ BI->getOpcode() != TargetOpcode::LIFETIME_END)
+ continue;
+
+ Markers.push_back(BI);
+
+ bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
+ const MachineOperand &MI = BI->getOperand(0);
+ unsigned Slot = MI.getIndex();
+
+ MarkersFound++;
+
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
+ " with allocation: "<< Allocation->getName()<<"\n");
+ }
+
+ if (IsStart) {
+ BlockInfo.Begin.set(Slot);
+ } else {
+ if (BlockInfo.Begin.test(Slot)) {
+ // Allocas that start and end within a single block are handled
+ // specially when computing the LiveIntervals to avoid pessimizing
+ // the liveness propagation.
+ BlockInfo.Begin.reset(Slot);
+ } else {
+ BlockInfo.End.set(Slot);
+ }
+ }
+ }
+ }
+
+ // Update statistics.
+ NumMarkerSeen += MarkersFound;
+ return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ // Perform a standard reverse dataflow computation to solve for
+ // global liveness. The BEGIN set here is equivalent to KILL in the standard
+ // formulation, and END is equivalent to GEN. The result of this computation
+ // is a map from blocks to bitvectors where the bitvectors represent which
+ // allocas are live in/out of that block.
+ SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+ BasicBlockNumbering.end());
+ unsigned NumSSMIters = 0;
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ ++NumSSMIters;
+
+ SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
+
+ for (SmallVector<const MachineBasicBlock*, 8>::iterator
+ PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
+ PI != PE; ++PI) {
+
+ const MachineBasicBlock *BB = *PI;
+ if (!BBSet.count(BB)) continue;
+
+ // Use an iterator to avoid repeated lookups.
+ LivenessMap::iterator BI = BlockLiveness.find(BB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ BlockLifetimeInfo &BlockInfo = BI->second;
+
+ BitVector LocalLiveIn;
+ BitVector LocalLiveOut;
+
+ // Forward propagation from begins to ends.
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+ LocalLiveIn |= BlockInfo.End;
+ LocalLiveIn.reset(BlockInfo.Begin);
+
+ // Reverse propagation from ends to begins.
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*SI);
+ assert(I != BlockLiveness.end() && "Successor not found");
+ LocalLiveOut |= I->second.LiveIn;
+ }
+ LocalLiveOut |= BlockInfo.Begin;
+ LocalLiveOut.reset(BlockInfo.End);
+
+ LocalLiveIn |= LocalLiveOut;
+ LocalLiveOut |= LocalLiveIn;
+
+ // After adopting the live bits, we need to turn-off the bits which
+ // are de-activated in this block.
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveIn.reset(BlockInfo.Begin);
+
+ // If we have both BEGIN and END markers in the same basic block then
+ // we know that the BEGIN marker comes after the END, because we already
+ // handle the case where the BEGIN comes before the END when collecting
+ // the markers (and building the BEGIN/END vectore).
+ // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
+ // BEGIN and END because it means that the value lives before and after
+ // this basic block.
+ BitVector LocalEndBegin = BlockInfo.End;
+ LocalEndBegin &= BlockInfo.Begin;
+ LocalLiveIn |= LocalEndBegin;
+ LocalLiveOut |= LocalEndBegin;
+
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI)
+ NextBBSet.insert(*PI);
+ }
+
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ NextBBSet.insert(*SI);
+ }
+ }
+
+ BBSet = NextBBSet;
+ }// while changed.
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+ SmallVector<SlotIndex, 16> Finishes;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
+ MBB != MBBe; ++MBB) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+ Finishes.clear();
+ Finishes.resize(NumSlots);
+
+ // Create the interval for the basic blocks with lifetime markers in them.
+ for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(),
+ e = Markers.end(); it != e; ++it) {
+ const MachineInstr *MI = *it;
+ if (MI->getParent() != MBB)
+ continue;
+
+ assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Invalid Lifetime marker");
+
+ bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
+ const MachineOperand &Mo = MI->getOperand(0);
+ int Slot = Mo.getIndex();
+ assert(Slot >= 0 && "Invalid slot");
+
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+
+ if (IsStart) {
+ if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+ Finishes[Slot] = ThisIndex;
+ }
+ }
+
+ // Create the interval of the blocks that we previously found to be 'alive'.
+ BitVector Alive = BlockLiveness[MBB].LiveIn;
+ Alive |= BlockLiveness[MBB].LiveOut;
+
+ if (Alive.any()) {
+ for (int pos = Alive.find_first(); pos != -1;
+ pos = Alive.find_next(pos)) {
+ if (!Starts[pos].isValid())
+ Starts[pos] = Indexes->getMBBStartIdx(MBB);
+ if (!Finishes[pos].isValid())
+ Finishes[pos] = Indexes->getMBBEndIdx(MBB);
+ }
+ }
+
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+ if (!Starts[i].isValid())
+ continue;
+
+ assert(Starts[i] && Finishes[i] && "Invalid interval");
+ VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+ SlotIndex S = Starts[i];
+ SlotIndex F = Finishes[i];
+ if (S < F) {
+ // We have a single consecutive region.
+ Intervals[i]->addRange(LiveRange(S, F, ValNum));
+ } else {
+ // We have two non consecutive regions. This happens when
+ // LIFETIME_START appears after the LIFETIME_END marker.
+ SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
+ SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
+ Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
+ Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
+ }
+ }
+ }
+}
+
+bool StackColoring::removeAllMarkers() {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < Markers.size(); ++i) {
+ Markers[i]->eraseFromParent();
+ Count++;
+ }
+ Markers.clear();
+
+ DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+ return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+ unsigned FixedInstr = 0;
+ unsigned FixedMemOp = 0;
+ unsigned FixedDbg = 0;
+ MachineModuleInfo *MMI = &MF->getMMI();
+
+ // Remap debug information that refers to stack slots.
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ std::pair<unsigned, DebugLoc> &VP = VI->second;
+ if (SlotRemap.count(VP.first)) {
+ DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
+ VP.first = SlotRemap[VP.first];
+ FixedDbg++;
+ }
+ }
+
+ // Keep a list of *allocas* which need to be remapped.
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+ for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(),
+ e = SlotRemap.end(); it != e; ++it) {
+ const AllocaInst *From = MFI->getObjectAllocation(it->first);
+ const AllocaInst *To = MFI->getObjectAllocation(it->second);
+ assert(To && From && "Invalid allocation object");
+ Allocas[From] = To;
+ }
+
+ // Remap all instructions to the new stack slots.
+ MachineFunction::iterator BB, BBE;
+ MachineBasicBlock::iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ // Skip lifetime markers. We'll remove them soon.
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END)
+ continue;
+
+ // Update the MachineMemOperand to use the new alloca.
+ for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
+ E = I->memoperands_end(); MM != E; ++MM) {
+ MachineMemOperand *MMO = *MM;
+
+ const Value *V = MMO->getValue();
+
+ if (!V)
+ continue;
+
+ // Climb up and find the original alloca.
+ V = GetUnderlyingObject(V);
+ // If we did not find one, or if the one that we found is not in our
+ // map, then move on.
+ if (!V || !isa<AllocaInst>(V)) {
+ // Clear mem operand since we don't know for sure that it doesn't
+ // alias a merged alloca.
+ MMO->setValue(0);
+ continue;
+ }
+ const AllocaInst *AI= cast<AllocaInst>(V);
+ if (!Allocas.count(AI))
+ continue;
+
+ MMO->setValue(Allocas[AI]);
+ FixedMemOp++;
+ }
+
+ // Update all of the machine instruction operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+ int FromSlot = MO.getIndex();
+
+ // Don't touch arguments.
+ if (FromSlot<0)
+ continue;
+
+ // Only look at mapped slots.
+ if (!SlotRemap.count(FromSlot))
+ continue;
+
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+ bool TouchesMemory = I->mayLoad() || I->mayStore();
+ // If we *don't* protect the user from escaped allocas, don't bother
+ // validating the instructions.
+ if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ LiveInterval *Interval = Intervals[FromSlot];
+ assert(Interval->find(Index) != Interval->end() &&
+ "Found instruction usage outside of live range.");
+ }
+#endif
+
+ // Fix the machine instructions.
+ int ToSlot = SlotRemap[FromSlot];
+ MO.setIndex(ToSlot);
+ FixedInstr++;
+ }
+ }
+
+ DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+ MachineFunction::const_iterator BB, BBE;
+ MachineBasicBlock::const_iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+ continue;
+
+ // Some intervals are suspicious! In some cases we find address
+ // calculations outside of the lifetime zone, but not actual memory
+ // read or write. Memory accesses outside of the lifetime zone are a clear
+ // violation, but address calculations are okay. This can happen when
+ // GEPs are hoisted outside of the lifetime zone.
+ // So, in here we only check instructions which can read or write memory.
+ if (!I->mayLoad() && !I->mayStore())
+ continue;
+
+ // Check all of the machine operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+
+ int Slot = MO.getIndex();
+
+ if (Slot<0)
+ continue;
+
+ if (Intervals[Slot]->empty())
+ continue;
+
+ // Check that the used slot is inside the calculated lifetime range.
+ // If it is not, warn about it and invalidate the range.
+ LiveInterval *Interval = Intervals[Slot];
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ if (Interval->find(Index) == Interval->end()) {
+ Intervals[Slot]->clear();
+ DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+ EscapedAllocas++;
+ }
+ }
+ }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+ unsigned NumSlots) {
+ // Expunge slot remap map.
+ for (unsigned i=0; i < NumSlots; ++i) {
+ // If we are remapping i
+ if (SlotRemap.count(i)) {
+ int Target = SlotRemap[i];
+ // As long as our target is mapped to something else, follow it.
+ while (SlotRemap.count(Target)) {
+ Target = SlotRemap[Target];
+ SlotRemap[i] = Target;
+ }
+ }
+ }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+ DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: "
+ << ((const Value*)Func.getFunction())->getName() << '\n');
+ MF = &Func;
+ MFI = MF->getFrameInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ BlockLiveness.clear();
+ BasicBlocks.clear();
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+
+ // If there are no stack slots then there are no markers to remove.
+ if (!NumSlots)
+ return false;
+
+ SmallVector<int, 8> SortedSlots;
+
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+ unsigned TotalSize = 0;
+ DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
+ DEBUG(dbgs()<<"Slot structure:\n");
+
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+ DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+ TotalSize += MFI->getObjectSize(i);
+ }
+
+ DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+
+ // Don't continue because there are not enough lifetime markers, or the
+ // stack is too small, or we are told not to optimize the slots.
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+ DEBUG(dbgs()<<"Will not try to merge slots.\n");
+ return removeAllMarkers();
+ }
+
+ for (unsigned i=0; i < NumSlots; ++i) {
+ LiveInterval *LI = new LiveInterval(i, 0);
+ Intervals.push_back(LI);
+ LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+ SortedSlots.push_back(i);
+ }
+
+ // Calculate the liveness of each block.
+ calculateLocalLiveness();
+
+ // Propagate the liveness information.
+ calculateLiveIntervals(NumSlots);
+
+ // Search for allocas which are used outside of the declared lifetime
+ // markers.
+ if (ProtectFromEscapedAllocas)
+ removeInvalidSlotRanges();
+
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
+
+ // Do not bother looking at empty intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
+
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and look
+ // for disjoint slots. When you find disjoint slots, merge the samller one
+ // into the bigger one and update the live interval. Remove the small alloca
+ // and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ // Use stable sort to guarantee deterministic code generation.
+ std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+ SlotSizeSorter(MFI));
+
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
+ continue;
+
+ for (unsigned J=I+1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
+
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
+ LiveInterval *First = Intervals[FirstSlot];
+ LiveInterval *Second = Intervals[SecondSlot];
+ assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+ // Merge disjoint slots.
+ if (!First->overlaps(*Second)) {
+ Changed = true;
+ First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+ SecondSlot<<" together.\n");
+ unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
+ MFI->getObjectAlignment(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots+=1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
+ }
+ }
+ }// While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
+ ReducedSize<<" bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+
+ // Release the intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ delete Intervals[I];
+ }
+
+ return removeAllMarkers();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 000000000000..fbef34772b08
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,370 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+ " taken.");
+
+namespace {
+ class StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLoweringBase *TLI;
+
+ Function *F;
+ Module *M;
+
+ DominatorTree *DT;
+
+ /// VisitedPHIs - The set of PHI nodes visited when determining
+ /// if a variable's reference has been taken. This set
+ /// is maintained to ensure we don't visit the same PHI node multiple
+ /// times.
+ SmallPtrSet<const PHINode*, 16> VisitedPHIs;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool Strong = false,
+ bool InStruct = false) const;
+
+ /// \brief Check whether a stack allocation has its address taken.
+ bool HasAddressTaken(const Instruction *AI);
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector();
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+ StackProtector(const TargetLoweringBase *tli)
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector",
+ "Insert stack protectors", false, false)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+
+ if (!RequiresStackProtector()) return false;
+
+ ++NumFunProtected;
+ return InsertStackProtectors();
+}
+
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
+ bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ // In strong mode any array, regardless of type and size, triggers a
+ // protector
+ if (Strong)
+ return true;
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, Strong, true))
+ return true;
+
+ return false;
+}
+
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AI == SI->getValueOperand())
+ return true;
+ } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
+ if (AI == SI->getOperand(0))
+ return true;
+ } else if (isa<CallInst>(U)) {
+ return true;
+ } else if (isa<InvokeInst>(U)) {
+ return true;
+ } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ if (HasAddressTaken(SI))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ if (VisitedPHIs.insert(PN))
+ if (HasAddressTaken(PN))
+ return true;
+ } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (HasAddressTaken(GEP))
+ return true;
+ } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (HasAddressTaken(BI))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// \brief Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::RequiresStackProtector() {
+ bool Strong = false;
+ if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq))
+ return true;
+ else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Strong = true;
+ else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect))
+ return false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation()) {
+ // SSP-Strong: Enable protectors for any call to alloca, regardless
+ // of size.
+ if (Strong)
+ return true;
+
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize;
+ if (CI->getLimitedValue(BufferSize) >= BufferSize)
+ // A call to alloca with size >= SSPBufferSize requires
+ // stack protectors.
+ return true;
+ } else // A call to alloca with a variable size requires protectors.
+ return true;
+ }
+
+ if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
+ return true;
+
+ if (Strong && HasAddressTaken(AI)) {
+ ++NumAddrTaken;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Value *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ unsigned AddressSpace, Offset;
+ if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+ Constant *OffsetVal =
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+ PointerType::get(PtrTy, AddressSpace));
+ } else {
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ }
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ Args, "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
+ }
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ if (DT && FailBBDom)
+ DT->addNewBlock(FailBB, FailBBDom);
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+ "CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail",
+ Type::getVoidTy(F->getContext()), NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(F->getContext(), FailBB);
+ return FailBB;
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 000000000000..f9515610d7e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,439 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackslotcoloring"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class StackSlotColoring : public MachineFunctionPass {
+ LiveStacks* LS;
+ MachineFrameInfo *MFI;
+ const TargetInstrInfo *TII;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(ID), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ if (!MI->isDebugValue())
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DEBUG(dbgs() << '\n');
+#endif
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS))
+ continue;
+
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ // Update the operands.
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the memory references. This changes the MachineMemOperands
+ // directly. They may be in use by multiple instructions, however all
+ // instructions using OldFI are being rewritten to use NewFI.
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I)
+ if ((*I)->getValue() == OldSV)
+ (*I)->setValue(NewSV);
+}
+
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = llvm::next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ MFI = MF.getFrameInfo();
+ TII = MF.getTarget().getInstrInfo();
+ LS = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
+
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 000000000000..b337c5393343
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,825 @@
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
+ };
+
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
+
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
+
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
+ };
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+ }
+
+ LiveIntervals *LI;
+ };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
+ }
+ }
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+ }
+ }
+ }
+
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
+
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
+ }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+ }
+ }
+ }
+
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
+ }
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
+ }
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
+ }
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
+ }
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (isLiveOut)
+ continue;
+
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
+ }
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ Node *&N = RegNodeMap[Reg];
+ if (!N)
+ N = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
+ }
+
+ return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
+ }
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
+ }
+ return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
+ } else {
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
+ }
+ }
+ }
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necessary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+ break;
+ }
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
+ else
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
+ }
+ }
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
+ continue;
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
+ assert(SrcVNI);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
+ } else {
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
+ }
+
+ // Insert into map if not already there.
+ InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
+ CopyInstr));
+ }
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+ }
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
+ assert(DestVNI);
+
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getRegSlot(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ LI->getVNInfoAllocator());
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getRegSlot();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
+ }
+
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
+ }
+
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 000000000000..1ec88172a0b0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,970 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumTails , "Number of tails duplicated");
+STATISTIC(NumTailDups , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"),
+ cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+ /// TailDuplicatePass - Perform tail duplication.
+ class TailDuplicatePass : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ MachineRegisterInfo *MRI;
+ OwningPtr<RegScavenger> RS;
+ bool PreRegAlloc;
+
+ // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ SmallVector<unsigned, 16> SSAUpdateVRs;
+
+ // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+ // source virtual registers.
+ DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+ public:
+ static char ID;
+ explicit TailDuplicatePass() :
+ MachineFunctionPass(ID), PreRegAlloc(false) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB);
+ void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &UsedByPhi,
+ bool Remove);
+ void DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi);
+ void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*, 8> &Succs);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple, MachineBasicBlock &TailBB);
+ bool isSimpleBB(MachineBasicBlock *TailBB);
+ bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+ bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF);
+
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ };
+
+ char TailDuplicatePass::ID = 0;
+}
+
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+ false, false)
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ PreRegAlloc = MRI->isSSA();
+ RS.reset();
+ if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS.reset(new RegScavenger());
+
+ bool MadeChange = false;
+ while (TailDuplicateBlocks(MF))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor BB#"
+ << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ continue;
+
+ MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->isDebugValue())
+ continue;
+ if (UseMI->getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i+1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ bool Remove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, SrcReg));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx+1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
+ MachineInstr *NewMI = TII->duplicate(MI, MF);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, NewReg));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ AddSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end()) {
+ MO.setReg(VI->second);
+ MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
+ }
+ }
+ }
+ PredBB->insert(PredBB->instr_end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*,8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ MachineInstrBuilder MIB(*FromBB->getParent(), II);
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i+1);
+ II->RemoveOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
+ DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ unsigned SrcReg = LI->second[j].second;
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(SrcReg).addMBB(SrcBB);
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(Reg).addMBB(SrcBB);
+ }
+ }
+ }
+ if (Idx != 0) {
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
+ }
+ }
+}
+
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+bool
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->isNotDuplicable())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && I->isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && I->isCall())
+ return false;
+
+ if (!I->isPHI() && !I->isDebugValue())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ MachineBasicBlock::iterator E = TailBB->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I == E)
+ return true;
+ return I->isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+ SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+ for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+ SE = A.succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *BB = *SI;
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+ }
+
+ return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+ for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->getLandingPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = NULL;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = NULL;
+ if (PredTBB == NextBB && PredFBB == NULL)
+ PredTBB = NULL;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ PredBB->removeSuccessor(TailBB);
+ unsigned NumSuccessors = PredBB->succ_size();
+ assert(NumSuccessors <= 1);
+ if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+ PredBB->addSuccessor(NewTarget);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ if (RS && !TailBB->livein_empty()) {
+ // Update PredBB livein.
+ RS->enterBasicBlock(PredBB);
+ if (!PredBB->empty())
+ RS->forward(prior(PredBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
+ E = TailBB->livein_end(); I != E; ++I) {
+ if (!RegsLiveAtExit[*I])
+ // If a register is previously livein to the tail but it's not live
+ // at the end of predecessor BB, then it should be added to its
+ // livein list.
+ PredBB->addLiveIn(*I);
+ }
+ }
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ }
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+
+ // Simplify
+ TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+ NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ if (MI->getParent())
+ MI->eraseFromParent();
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first)
+ .addReg(CopyInfos[i].second));
+ }
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+ }
+
+ return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..883e9d1846d9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,44 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..20eb91879317
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -0,0 +1,739 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ if (OpNum >= MCID.getNumOperands())
+ return 0;
+
+ short RegClass = MCID.OpInfo[OpNum].RegClass;
+ if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(MF, RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool HasDef = MCID.getNumDefs();
+ if (HasDef && !MI->getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return 0;
+ unsigned Idx1, Idx2;
+ if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Don't know how to commute: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+
+ assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(Idx1).getReg();
+ unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
+ bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ Reg2IsKill = false;
+ Reg0 = Reg2;
+ SubReg0 = SubReg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
+ SubReg0 = SubReg1;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ }
+
+ if (HasDef) {
+ MI->getOperand(0).setReg(Reg0);
+ MI->getOperand(0).setSubReg(SubReg0);
+ }
+ MI->getOperand(Idx2).setReg(Reg1);
+ MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setSubReg(SubReg1);
+ MI->getOperand(Idx1).setSubReg(SubReg2);
+ MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI->isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ SrcOpIdx1 = MCID.getNumDefs();
+ SrcOpIdx2 = SrcOpIdx1 + 1;
+ if (!MI->getOperand(SrcOpIdx1).isReg() ||
+ !MI->getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+
+bool
+TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+
+ assert(!MI->isBundle() &&
+ "TargetInstrInfo::PredicateInstruction() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MI->isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+ assert(!Orig->isNotDuplicable() &&
+ "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+ unsigned FoldIdx) {
+ assert(MI->isCopy() && "MI must be a COPY instruction");
+ if (MI->getNumOperands() != 2)
+ return 0;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return 0;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return 0;
+}
+
+bool TargetInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
+ // Ask the target to do the actual folding.
+ if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
+
+ // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+ return MBB->insert(MI, NewMI);
+ }
+
+ // Straight COPY may fold as load/store.
+ if (!MI->isCopy() || Ops.size() != 1)
+ return 0;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return 0;
+
+ const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ NewMI = MBB.insert(MI, NewMI);
+
+ // Copy the memoperands from the load to the folded instruction.
+ NewMI->setMemRefs(LoadMI->memoperands_begin(),
+ LoadMI->memoperands_end());
+
+ return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+ return false;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+ MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+ return false;
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (MI->isNotDuplicable() || MI->mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (MI->mayLoad() && !MI->isInvariantLoad(AA))
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.isConstantPhysReg(Reg, MF))
+ return false;
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Terminators and labels can't be scheduled around.
+ if (MI->isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
+ return true;
+
+ return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+ const MachineInstr *DefMI) const {
+ if (DefMI->isTransient())
+ return 0;
+ if (DefMI->mayLoad())
+ return SchedModel->LoadLatency;
+ if (isHighLatencyDef(DefMI->getOpcode()))
+ return SchedModel->HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfo::
+getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI->mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfo::
+getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+int TargetInstrInfo::computeDefOperandLatency(
+ const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) const {
+
+ // Let the target hook getInstrLatency handle missing itineraries.
+ if (!ItinData)
+ return getInstrLatency(ItinData, DefMI);
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+ // it exists before defaulting to MinLatency.
+ if (ItinData->SchedModel->MinLatency >= 0)
+ return getInstrLatency(ItinData, DefMI);
+
+ // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+ // For empty itineraries, short-cirtuit the check and default to one cycle.
+ if (ItinData->isEmpty())
+ return 1;
+ }
+ else if(ItinData->isEmpty())
+ return defaultDefLatency(ItinData->SchedModel, DefMI);
+
+ // ...operand lookup required
+ return -1;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use when the operand indices are already known. UseMI may
+/// be NULL for an unknown use.
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// Depending on the subtarget's itinerary properties, this may or may not need
+/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
+/// UseIdx to compute min latency.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx,
+ bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ int OperLatency = 0;
+ if (UseMI)
+ OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
+ return InstrLatency;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 000000000000..f42bdbd27643
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,1305 @@
+//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_F128] = "__addtf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_F128] = "__subtf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_F128] = "__multf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_F128] = "__divtf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_F128] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_F128] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_F128] = "__powitf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_F128] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_F128] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_F128] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_F128] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_F128] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_F128] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_F128] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_F128] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_F128] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_F128] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_F128] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_F128] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_F128] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::COPYSIGN_F32] = "copysignf";
+ Names[RTLIB::COPYSIGN_F64] = "copysign";
+ Names[RTLIB::COPYSIGN_F80] = "copysignl";
+ Names[RTLIB::COPYSIGN_F128] = "copysignl";
+ Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+ Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+ Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+ Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::OEQ_F128] = "__eqtf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::UNE_F128] = "__netf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OGE_F128] = "__getf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLT_F128] = "__lttf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OLE_F128] = "__letf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::OGT_F128] = "__gttf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::UO_F128] = "__unordtf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::O_F128] = "__unordtf2";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+
+ if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+ Names[RTLIB::SINCOS_F32] = "sincosf";
+ Names[RTLIB::SINCOS_F64] = "sincos";
+ Names[RTLIB::SINCOS_F80] = "sincosl";
+ Names[RTLIB::SINCOS_F128] = "sincosl";
+ Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+ } else {
+ // These are generally not available.
+ Names[RTLIB::SINCOS_F32] = 0;
+ Names[RTLIB::SINCOS_F64] = 0;
+ Names[RTLIB::SINCOS_F80] = 0;
+ Names[RTLIB::SINCOS_F128] = 0;
+ Names[RTLIB::SINCOS_PPCF128] = 0;
+ }
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F32_F128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F64_F128;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+ CCs[RTLIB::O_F128] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f128, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f128, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f128, Expand);
+ setOperationAction(ISD::FEXP , MVT::f128, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f128, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ PredictableSelectIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
+ InsertFencesForAtomic = false;
+ SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
+
+ InitLibcallNames(LibcallRoutineNames, TM);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+ delete &TLOF;
+}
+
+MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize(0));
+}
+
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ return getScalarShiftAmountTy(LHSTy);
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT,
+ TargetLoweringBase *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ MVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLoweringBase::findRepresentativeClass(MVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
+ const TargetRegisterClass *BestRC = RC;
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
+ continue;
+ BestRC = SuperRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (const MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ }
+
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (isTypeLegal(VT)) continue;
+
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
+ bool IsLegalWiderType = false;
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
+ return getPointerTy(0).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterEVT)) {
+ IntermediateVT = RegisterEVT;
+ RegisterVT = RegisterEVT.getSimpleVT();
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ Flags.setSExt();
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+ enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+ };
+ switch (static_cast<InstructionOpcodes>(Opcode)) {
+ case Ret: return 0;
+ case Br: return 0;
+ case Switch: return 0;
+ case IndirectBr: return 0;
+ case Invoke: return 0;
+ case Resume: return 0;
+ case Unreachable: return 0;
+ case Add: return ISD::ADD;
+ case FAdd: return ISD::FADD;
+ case Sub: return ISD::SUB;
+ case FSub: return ISD::FSUB;
+ case Mul: return ISD::MUL;
+ case FMul: return ISD::FMUL;
+ case UDiv: return ISD::UDIV;
+ case SDiv: return ISD::UDIV;
+ case FDiv: return ISD::FDIV;
+ case URem: return ISD::UREM;
+ case SRem: return ISD::SREM;
+ case FRem: return ISD::FREM;
+ case Shl: return ISD::SHL;
+ case LShr: return ISD::SRL;
+ case AShr: return ISD::SRA;
+ case And: return ISD::AND;
+ case Or: return ISD::OR;
+ case Xor: return ISD::XOR;
+ case Alloca: return 0;
+ case Load: return ISD::LOAD;
+ case Store: return ISD::STORE;
+ case GetElementPtr: return 0;
+ case Fence: return 0;
+ case AtomicCmpXchg: return 0;
+ case AtomicRMW: return 0;
+ case Trunc: return ISD::TRUNCATE;
+ case ZExt: return ISD::ZERO_EXTEND;
+ case SExt: return ISD::SIGN_EXTEND;
+ case FPToUI: return ISD::FP_TO_UINT;
+ case FPToSI: return ISD::FP_TO_SINT;
+ case UIToFP: return ISD::UINT_TO_FP;
+ case SIToFP: return ISD::SINT_TO_FP;
+ case FPTrunc: return ISD::FP_ROUND;
+ case FPExt: return ISD::FP_EXTEND;
+ case PtrToInt: return ISD::BITCAST;
+ case IntToPtr: return ISD::BITCAST;
+ case BitCast: return ISD::BITCAST;
+ case ICmp: return ISD::SETCC;
+ case FCmp: return ISD::SETCC;
+ case PHI: return 0;
+ case Call: return 0;
+ case Select: return ISD::SELECT;
+ case UserOp1: return 0;
+ case UserOp2: return 0;
+ case VAArg: return 0;
+ case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+ case InsertElement: return ISD::INSERT_VECTOR_ELT;
+ case ShuffleVector: return ISD::VECTOR_SHUFFLE;
+ case ExtractValue: return ISD::MERGE_VALUES;
+ case InsertValue: return ISD::MERGE_VALUES;
+ case LandingPad: return 0;
+ }
+
+ llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<unsigned, MVT>
+TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+ LLVMContext &C = Ty->getContext();
+ EVT MTy = getValueType(Ty);
+
+ unsigned Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ LegalizeKind LK = getTypeConversion(C, MTy);
+
+ if (LK.first == TypeLegal)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
+ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+ Cost *= 2;
+
+ // Keep legalizing the type.
+ MTy = LK.second;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 000000000000..3bdca4c64078
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,784 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+ Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ return Mang->getSymbol(GV);
+ case dwarf::DW_EH_PE_pcrel: {
+ return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+ Mang->getSymbol(GV)->getName());
+ }
+ }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ StringRef Prefix = ".data.";
+ NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ const MCSection *Sec = getContext().getELFSection(NameData,
+ ELF::SHT_PROGBITS,
+ Flags,
+ SectionKind::getDataRel(),
+ 0, Label->getName());
+ unsigned Size = TM.getDataLayout()->getPointerSize();
+ Streamer.SwitchSection(Sec);
+ Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::Create(Size, getContext());
+ Streamer.EmitELFSize(Label, E);
+ Streamer.EmitLabel(Label);
+
+ Streamer.EmitSymbolValue(Sym, Size);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are no the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return ELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return ELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ // K.isMergeableConst() is left out to honour PR4650
+ if (K.isMergeableCString() || K.isMergeableConst4() ||
+ K.isMergeableConst8() || K.isMergeableConst16())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ return getContext().getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind),
+ getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText()) return ".text.";
+ if (Kind.isReadOnly()) return ".rodata.";
+ if (Kind.isBSS()) return ".bss.";
+
+ if (Kind.isThreadData()) return ".tdata.";
+ if (Kind.isThreadBSS()) return ".tbss.";
+
+ if (Kind.isDataNoRel()) return ".data.";
+ if (Kind.isDataRelLocal()) return ".data.rel.local.";
+ if (Kind.isDataRel()) return ".data.rel.";
+ if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ !Kind.isCommon()) {
+ const char *Prefix;
+ Prefix = getSectionPrefixForGlobal(Kind);
+
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ return getContext().getELFSection(Name.str(),
+ getELFSectionType(Name.str(), Kind),
+ Flags, Kind, 0, Group);
+ }
+
+ if (Kind.isText()) return TextSection;
+
+ if (Kind.isMergeable1ByteCString() ||
+ Kind.isMergeable2ByteCString() ||
+ Kind.isMergeable4ByteCString()) {
+
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align =
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+ const char *SizeSpec = ".rodata.str1.";
+ if (Kind.isMergeable2ByteCString())
+ SizeSpec = ".rodata.str2.";
+ else if (Kind.isMergeable4ByteCString())
+ SizeSpec = ".rodata.str4.";
+ else
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+ std::string Name = SizeSpec + utostr(Align);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ Kind);
+ }
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ return ReadOnlySection; // .const
+ }
+
+ if (Kind.isReadOnly()) return ReadOnlySection;
+
+ if (Kind.isThreadData()) return TLSDataSection;
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+ if (Kind.isDataNoRel()) return DataSection;
+ if (Kind.isDataRelLocal()) return DataRelLocalSection;
+ if (Kind.isDataRel()) return DataRelSection;
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ if (UseInitArray) {
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ if (UseInitArray) {
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ if (!UseInitArray)
+ return;
+
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+/// emitModuleFlags - Perform code emission for module flags.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ unsigned VersionVal = 0;
+ unsigned ImageInfoFlags = 0;
+ MDNode *LinkerOptions = 0;
+ StringRef SectionVal;
+
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+
+ // Ignore flags with 'Require' behavior.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+
+ if (Key == "Objective-C Image Info Version") {
+ VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated") {
+ ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Image Info Section") {
+ SectionVal = cast<MDString>(Val)->getString();
+ } else if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ }
+ }
+
+ // Emit the linker options if present.
+ if (LinkerOptions) {
+ for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+ MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ SmallVector<std::string, 4> StrOptions;
+
+ // Convert to strings.
+ for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+ MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+ StrOptions.push_back(MDOption->getString());
+ }
+
+ Streamer.EmitLinkerOptions(StrOptions);
+ }
+ }
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty()) return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty())
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section + "': " +
+ ErrorCode + ".");
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize,
+ SectionKind::getDataNoRel());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(getContext().
+ GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(VersionVal, 4);
+ Streamer.EmitIntValue(ImageInfoFlags, 4);
+ Streamer.AddBlankLine();
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' has an invalid section specifier '" +
+ GV->getSection() + "': " + ErrorCode + ".");
+ }
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+ /// On Darwin, internally linked data beginning with "L" or "l" does not have
+ /// the directive emitted (this occurs in ObjC metadata).
+ if (!GV) return false;
+
+ // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+ if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+ // FIXME: ObjC metadata is currently emitted as internal symbols that have
+ // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
+ // this horrible hack can go away.
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+ return false;
+ }
+
+ return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
+ MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE;
+ else if (K.isBSS ())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ int Selection = 0;
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+ SmallString<128> Name(GV->getSection().c_str());
+ if (GV->isWeakForLinker()) {
+ Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append("$");
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+ }
+ return getContext().getCOFFSection(Name,
+ Characteristics,
+ Selection,
+ Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text$";
+ if (Kind.isBSS ())
+ return ".bss$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
+ if (Kind.isWriteable())
+ return ".data$";
+ return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker()) {
+ const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name.str(), Characteristics,
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+ }
+
+ if (Kind.isText())
+ return getTextSection();
+
+ if (Kind.isThreadLocal())
+ return getTLSDataSection();
+
+ return getDataSection();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..0f59d0169e18
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+ return TrapFuncName;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..84b4bfc33221
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,285 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *SRINames,
+ const unsigned *SRILaneMasks)
+ : InfoDesc(ID), SubRegIndexNames(SRINames),
+ SubRegIndexLaneMasks(SRILaneMasks),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+}
+
+void PrintRegUnit::print(raw_ostream &OS) const {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
+
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
+
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+}
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+ if (!RC || RC->isAllocatable())
+ return RC;
+
+ const unsigned *SubClass = RC->getSubClassMask();
+ for (unsigned Base = 0, BaseE = getNumRegClasses();
+ Base < BaseE; Base += 32) {
+ unsigned Idx = Base;
+ for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
+ unsigned Offset = CountTrailingZeros_32(Mask);
+ const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
+ if (SubRC->isAllocatable())
+ return SubRC;
+ Mask >>= Offset;
+ Idx += Offset + 1;
+ }
+ }
+ return NULL;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+ ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ // A register class with no allocatable subclass returns an empty set.
+ const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+ if (SubClass)
+ getAllocatableSetForRC(MF, SubClass, Allocatable);
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ if ((*I)->isAllocatable())
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable &= Reserved.flip();
+
+ return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+ const uint32_t *B,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + CountTrailingZeros_32(Common));
+ return 0;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ assert(A && B && "Missing register class");
+ assert(Idx && "Bad sub-register index");
+
+ // Find Idx in the list of super-register indices.
+ for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+ if (RCI.getSubReg() == Idx)
+ // The bit mask contains all register classes that are projected into B
+ // by Idx. Find a class that is also a sub-class of A.
+ return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+ return 0;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+ const TargetRegisterClass *RCB, unsigned SubB,
+ unsigned &PreA, unsigned &PreB) const {
+ assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+ // Search all pairs of sub-register indices that project into RCA and RCB
+ // respectively. This is quadratic, but usually the sets are very small. On
+ // most targets like X86, there will only be a single sub-register index
+ // (e.g., sub_16bit projecting into GR16).
+ //
+ // The worst case is a register class like DPR on ARM.
+ // We have indices dsub_0..dsub_7 projecting into that class.
+ //
+ // It is very common that one register class is a sub-register of the other.
+ // Arrange for RCA to be the larger register so the answer will be found in
+ // the first iteration. This makes the search linear for the most common
+ // case.
+ const TargetRegisterClass *BestRC = 0;
+ unsigned *BestPreA = &PreA;
+ unsigned *BestPreB = &PreB;
+ if (RCA->getSize() < RCB->getSize()) {
+ std::swap(RCA, RCB);
+ std::swap(SubA, SubB);
+ std::swap(BestPreA, BestPreB);
+ }
+
+ // Also terminate the search one we have found a register class as small as
+ // RCA.
+ unsigned MinSize = RCA->getSize();
+
+ for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+ unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+ for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+ // Check if a common super-register class exists for this index pair.
+ const TargetRegisterClass *RC =
+ firstCommonClass(IA.getMask(), IB.getMask(), this);
+ if (!RC || RC->getSize() < MinSize)
+ continue;
+
+ // The indexes must compose identically: PreA+SubA == PreB+SubB.
+ unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+ if (FinalA != FinalB)
+ continue;
+
+ // Is RC a better candidate than BestRC?
+ if (BestRC && RC->getSize() >= BestRC->getSize())
+ continue;
+
+ // Yes, RC is the smallest super-register seen so far.
+ BestRC = RC;
+ *BestPreA = IA.getSubReg();
+ *BestPreB = IB.getSubReg();
+
+ // Bail early if we reached MinSize. We won't find a better candidate.
+ if (BestRC->getSize() == MinSize)
+ return BestRC;
+ }
+ }
+ return BestRC;
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ // Hints with HintType != 0 were set by target-dependent code.
+ // Such targets must provide their own implementation of
+ // TRI::getRegAllocationHints to interpret those hint types.
+ assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+ // Target-independent hints are either a physical or a virtual register.
+ unsigned Phys = Hint.second;
+ if (VRM && isVirtualRegister(Phys))
+ Phys = VRM->getPhys(Phys);
+
+ // Check that Phys is a valid hint in VirtReg's register class.
+ if (!isPhysicalRegister(Phys))
+ return;
+ if (MRI.isReserved(Phys))
+ return;
+ // Check that Phys is in the allocation order. We shouldn't heed hints
+ // from VirtReg's register class if they aren't in the allocation order. The
+ // target probably has a reason for removing the register.
+ if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+ return;
+
+ // All clear, tell the register allocator to prefer this register.
+ Hints.push_back(Phys);
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 000000000000..783bfa1c1a18
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,309 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+ return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+ return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+ // Dividend and Divisor will be naturally swapped as needed.
+ while(Divisor) {
+ unsigned Rem = Dividend % Divisor;
+ Dividend = Divisor;
+ Divisor = Rem;
+ };
+ return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+ unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+ assert((LCM >= A && LCM >= B) && "LCM overflow");
+ return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+ const TargetSubtargetInfo *sti,
+ const TargetInstrInfo *tii) {
+ SchedModel = sm;
+ STI = sti;
+ TII = tii;
+ STI->initInstrItins(InstrItins);
+
+ unsigned NumRes = SchedModel.getNumProcResourceKinds();
+ ResourceFactors.resize(NumRes);
+ ResourceLCM = SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ if (NumUnits > 0)
+ ResourceLCM = lcm(ResourceLCM, NumUnits);
+ }
+ MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+ }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrItineraries()) {
+ int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+ }
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// If we can determine the operand latency from the def only, without machine
+/// model or itinerary lookup, do so. Otherwise return -1.
+int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
+ bool FindMin) const {
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is invalid, then use the itinerary for MinLatency. If no
+ // itinerary exists either, then use single cycle latency.
+ if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
+ return 1;
+ }
+ return SchedModel.MinLatency;
+ }
+ else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
+ return TII->defaultDefLatency(&SchedModel, DefMI);
+ }
+ // ...operand lookup required
+ return -1;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+ // Get the definition's scheduling class descriptor from this machine model.
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+
+#ifndef NDEBUG
+ unsigned NIter = 0;
+#endif
+ while (SCDesc->isVariant()) {
+ assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+ SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+ SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ }
+ return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+ unsigned DefIdx = 0;
+ for (unsigned i = 0; i != DefOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ ++DefIdx;
+ }
+ return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+ unsigned UseIdx = 0;
+ for (unsigned i = 0; i != UseOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.readsReg())
+ ++UseIdx;
+ }
+ return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+ const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *UseMI, unsigned UseOperIdx,
+ bool FindMin) const {
+
+ int DefLatency = getDefLatency(DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ if (hasInstrItineraries()) {
+ int OperLatency = 0;
+ if (UseMI) {
+ OperLatency =
+ TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+ }
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ // Rather than directly querying InstrItins stage latency, we call a TII
+ // hook to allow subtargets to specialize latency. This hook is only
+ // applicable to the InstrItins model. InstrSchedModel should model all
+ // special cases without TII hooks.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ TII->defaultDefLatency(&SchedModel, DefMI));
+ return InstrLatency;
+ }
+ assert(!FindMin && hasInstrSchedModel() &&
+ "Expected a SchedModel for this cpu");
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+ if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ unsigned WriteID = WLEntry->WriteResourceID;
+ unsigned Latency = convertLatency(WLEntry->Cycles);
+ if (!UseMI)
+ return Latency;
+
+ // Lookup the use's latency adjustment in SubtargetInfo.
+ const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+ if (UseDesc->NumReadAdvanceEntries == 0)
+ return Latency;
+ unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+ return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+ }
+ // If DefIdx does not exist in the model (e.g. implicit defs), then return
+ // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+ && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+ std::string Err;
+ raw_string_ostream ss(Err);
+ ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI;
+ report_fatal_error(ss.str());
+ }
+#endif
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+ // For the itinerary model, fall back to the old subtarget hook.
+ // Allow subtargets to compute Bundle latencies outside the machine model.
+ if (hasInstrItineraries() || MI->isBundle())
+ return TII->getInstrLatency(&InstrItins, MI);
+
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid()) {
+ unsigned Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+ }
+ return Latency;
+ }
+ }
+ return TII->defaultDefLatency(&SchedModel, MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *DepMI) const {
+ // MinLatency == -1 is for in-order processors that always have unit
+ // MinLatency. MinLatency > 0 is for in-order processors with varying min
+ // latencies, but since this is not a RAW dep, we always use unit latency.
+ if (SchedModel.MinLatency != 0)
+ return 1;
+
+ // MinLatency == 0 indicates an out-of-order processor that can dispatch
+ // WAW dependencies in the same cycle.
+
+ // Treat predication as a data dependency for out-of-order cpus. In-order
+ // cpus do not need to treat predicated writes specially.
+ //
+ // TODO: The following hack exists because predication passes do not
+ // correctly append imp-use operands, and readsReg() strangely returns false
+ // for predicated defs.
+ unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ const MachineFunction &MF = *DefMI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+ return computeInstrLatency(DefMI);
+
+ // If we have a per operand scheduling model, check if this def is writing
+ // an unbuffered resource. If so, it treated like an in-order cpu.
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 000000000000..e6dfe104c82f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1678 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+
+namespace {
+class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+ AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
+
+ // The current basic block being processed.
+ MachineBasicBlock *MBB;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // Set of already processed instructions in the current block.
+ SmallPtrSet<MachineInstr*, 8> Processed;
+
+ // SrcRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies from physical registers to
+ // virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies to physical registers from
+ // virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
+
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist);
+
+ bool commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB, unsigned Dist);
+
+ bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
+
+ bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+
+ bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute);
+
+ void scanUses(unsigned DstReg);
+
+ void processCopy(MachineInstr *MI);
+
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+ void eliminateRegSequence(MachineBasicBlock::iterator&);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+
+/// sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // FIXME: Shouldn't we be trying to do this before we three-addressify the
+ // instruction? After this transformation is done, we no longer need
+ // the instruction to be in three-address form.
+
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(SavedReg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ }
+ if (!KillMI) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SavedReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+ }
+
+ // If we find the instruction that kills SavedReg, and it is in an
+ // appropriate location, we can try to sink the current instruction
+ // past it.
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
+ KillMI == OldPos || KillMI->isTerminator())
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+ assert(KillMO && "Didn't find kill");
+
+ if (!LIS) {
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+ }
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ if (LIS)
+ LIS->handleMove(MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// noUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
+
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+}
+
+/// isPLainlyKilled - Test if the given register value, which is used by the
+// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+ LiveIntervals *LIS) {
+ if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LIS->isNotInMIMap(MI)) {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ // This is to match the kill flag version where undefs don't have kill
+ // flags.
+ if (!LI.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(useIdx);
+ assert(I != LI.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ }
+
+ return MI->killsRegister(Reg);
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ LiveIntervals *LIS,
+ bool allowFalsePositives) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ // All uses of physical registers are likely to be kills.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ (allowFalsePositives || MRI->hasOneUse(Reg)))
+ return true;
+ if (!isPlainlyKilled(DefMI, Reg, LIS))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (llvm::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned NumOps = MI.isInlineAsm()
+ ? MI.getNumOperands() : MCID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // None or more than one use.
+ return 0;
+ MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!isPlainlyKilled(MI, regC, LIS))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ if (ToRegA) {
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
+ if (BComp != CComp)
+ return !BComp && CComp;
+ }
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!noUseAfterLastDef(regC, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!noUseAfterLastDef(regB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// commuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool TwoAddressInstructionPass::
+commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ assert(NewMI == MI &&
+ "TargetInstrInfo::commuteInstruction() should not return a new "
+ "instruction unless it was requested.");
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// convertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
+ // FIXME: Why does convertToThreeAddress() need an iterator reference?
+ MachineFunction::iterator MFI = MBB;
+ MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+ assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+ if (!NewMI)
+ return false;
+
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
+
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(mi, NewMI);
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = sink3AddrInstruction(NewMI, RegB, mi);
+
+ MBB->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = llvm::next(mi);
+ }
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
+}
+
+/// scanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ unsigned Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys)) {
+ if (IsCopy && !Processed.insert(UseMI))
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
+/// processCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ scanUses(DstReg);
+ }
+
+ Processed.insert(MI);
+ return;
+}
+
+/// rescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.insert(MOReg);
+ else {
+ Uses.insert(MOReg);
+ if (MOReg != Reg && (MO.isKill() ||
+ (LIS && isPlainlyKilled(MI, MOReg, LIS))))
+ Kills.insert(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator Begin = MI;
+ MachineBasicBlock::iterator AfterMI = llvm::next(Begin);
+
+ MachineBasicBlock::iterator End = AfterMI;
+ while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
+ Defs.insert(End->getOperand(0).getReg());
+ ++End;
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(MOReg))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && Defs.count(MOReg))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (Defs.count(MOReg))
+ return false;
+ bool isKill = MO.isKill() ||
+ (LIS && isPlainlyKilled(OtherMI, MOReg, LIS));
+ if (MOReg != Reg &&
+ ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (MOReg == Reg && !isKill)
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue())
+ --Begin;
+
+ nmi = End;
+ MachineBasicBlock::iterator InsertPos = KillPos;
+ if (LIS) {
+ // We have to move the copies first so that the MBB is still well-formed
+ // when calling handleMove().
+ for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+ MachineInstr *CopyMI = MBBI;
+ ++MBBI;
+ MBB->splice(InsertPos, MBB, CopyMI);
+ LIS->handleMove(CopyMI);
+ InsertPos = CopyMI;
+ }
+ End = llvm::next(MachineBasicBlock::iterator(MI));
+ }
+
+ // Copies following MI may have been moved as well.
+ MBB->splice(InsertPos, MBB, Begin, End);
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(MI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
+ return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ MachineInstr *DefMI = &*DI;
+ if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+ continue;
+ if (DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// rescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ SmallSet<unsigned, 2> LiveDefs;
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI))
+ return false;
+ bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ if (MOReg == Reg && !isKill)
+ return false;
+ Uses.insert(MOReg);
+ if (isKill && MOReg != Reg)
+ Kills.insert(MOReg);
+ } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Defs.insert(MOReg);
+ if (!MO.isDead())
+ LiveDefs.insert(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<unsigned, 2> OtherDefs;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (Defs.count(MOReg))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (Kills.count(MOReg))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (OtherMI != MI && MOReg == Reg &&
+ !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))))
+ // We can't schedule across a use of the register in question.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ unsigned MOReg = OtherDefs[i];
+ if (Uses.count(MOReg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ LiveDefs.count(MOReg))
+ return false;
+ // Physical register def is seen.
+ Defs.erase(MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (llvm::prior(From)->isDebugValue())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(KillMI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
+ return true;
+}
+
+/// tryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy. Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later). If the
+/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
+bool TwoAddressInstructionPass::
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ unsigned regA = MI.getOperand(DstIdx).getReg();
+ unsigned regB = MI.getOperand(SrcIdx).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+ bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ scanUses(regA);
+
+ // Check if it is profitable to commute the operands.
+ unsigned SrcOp1, SrcOp2;
+ unsigned regC = 0;
+ unsigned regCIdx = ~0U;
+ bool TryCommute = false;
+ bool AggressiveCommute = false;
+ if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
+ if (SrcIdx == SrcOp1)
+ regCIdx = SrcOp2;
+ else if (SrcIdx == SrcOp2)
+ regCIdx = SrcOp1;
+
+ if (regCIdx != ~0U) {
+ regC = MI.getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
+ // If C dies but B does not, swap the B and C operands.
+ // This makes the live ranges of A and C joinable.
+ TryCommute = true;
+ else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
+ TryCommute = true;
+ AggressiveCommute = true;
+ }
+ }
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return false;
+ }
+
+ if (shouldOnlyCommute)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (rescheduleMIBelowKill(mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
+ if (MI.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+ // Try to convert it.
+ if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (rescheduleKillAboveMI(mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (MI.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
+ /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+ NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ MBB->insert(mi, NewMIs[0]);
+ MBB->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformResult =
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+ (void)TransformResult;
+ assert(!TransformResult &&
+ "tryInstructionTransform() should return false.");
+ if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg())
+ OrigRegs.push_back(MOI->getReg());
+ }
+ }
+
+ MI.eraseFromParent();
+
+ // Update LiveIntervals.
+ if (LIS) {
+ MachineBasicBlock::iterator Begin(NewMIs[0]);
+ MachineBasicBlock::iterator End(NewMIs[1]);
+ LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+ }
+
+ mi = NewMIs[1];
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+ }
+
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ SlotIndex LastCopyIdx;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ if (LIS) {
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ LiveInterval &LI = LIS->getInterval(RegA);
+ VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
+ LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI));
+ }
+ }
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ // Update LiveIntervals.
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(RegB);
+ SlotIndex MIIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(MIIdx);
+ assert(I != LI.end() && "RegB must be live-in to use.");
+
+ SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
+ if (I->end == UseIdx)
+ LI.removeRange(LastCopyIdx, UseIdx);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ InstrItins = TM.getInstrItineraryData();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ AA = &getAnalysis<AliasAnalysis>();
+ OptLevel = TM.getOptLevel();
+
+ bool MadeChange = false;
+
+ DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF->getName() << '\n');
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ TiedOperandMap TiedOperands;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MBB = MBBI;
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ if (mi->isDebugValue()) {
+ mi = nmi;
+ continue;
+ }
+
+ // Expand REG_SEQUENCE instructions. This will position mi at the first
+ // expanded instruction.
+ if (mi->isRegSequence())
+ eliminateRegSequence(mi);
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ processCopy(&*mi);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
+ }
+
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
+ // The tied operands have been eliminated or shifted further down the
+ // block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
+ continue;
+ }
+ }
+ }
+
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ processTiedPairs(mi, OI->second, Dist);
+ DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ mi = nmi;
+ }
+ }
+
+ if (LIS)
+ MF->verify(this, "After two-address instruction pass");
+
+ return MadeChange;
+}
+
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+/// %dst:ssub0<def,undef> = COPY %v1
+/// %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(0).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !(MI->getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ OrigRegs.push_back(MI->getOperand(0).getReg());
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI->getOperand(i).getReg());
+ }
+
+ bool DefEmitted = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ MachineOperand &UseMO = MI->getOperand(i);
+ unsigned SrcReg = UseMO.getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ // Nothing needs to be inserted for <undef> operands.
+ if (UseMO.isUndef())
+ continue;
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ bool isKill = UseMO.isKill();
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI->getOperand(j).getReg() == SrcReg) {
+ MI->getOperand(j).setIsKill();
+ UseMO.setIsKill(false);
+ isKill = false;
+ break;
+ }
+
+ // Insert the sub-register copy.
+ MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .addOperand(UseMO);
+
+ // The first def needs an <undef> flag because there is no live register
+ // before it.
+ if (!DefEmitted) {
+ CopyMI->getOperand(0).setIsUndef(true);
+ // Return an iterator pointing to the first inserted instr.
+ MBBI = CopyMI;
+ }
+ DefEmitted = true;
+
+ // Update LiveVariables' kill info.
+ if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ }
+
+ MachineBasicBlock::iterator EndMBBI =
+ llvm::next(MachineBasicBlock::iterator(MI));
+
+ if (!DefEmitted) {
+ DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ } else {
+ DEBUG(dbgs() << "Eliminated: " << *MI);
+ MI->eraseFromParent();
+ }
+
+ // Udpate LiveIntervals.
+ if (LIS)
+ LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
+}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 000000000000..a95ebcd16da8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,215 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+ class UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ProfileInfo>();
+ }
+ };
+}
+char UnreachableBlockElim::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ if (PI) PI->removeBlock(DeadBlocks[i]);
+ DeadBlocks[i]->eraseFromParent();
+ }
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(BB);
+ if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() && start->isPHI()) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() && phi->isPHI()) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+ ModifiedPHI = true;
+
+ if (Input != Output) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+ MRI.replaceRegWith(Output, Input);
+ }
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return (DeadBlocks.size() || ModifiedPHI);
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 000000000000..cd012d297489
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,359 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2SplitMap.clear();
+
+ grow();
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ ++NumSpillSlots;
+ return SS;
+}
+
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint)
+ return 0;
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = getPhys(Hint);
+ return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ return true;
+ if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+ return hasPhys(Hint.second);
+ return false;
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ void rewrite();
+ void addMBBLiveIns();
+public:
+ static char ID;
+ VirtRegRewriter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+char VirtRegRewriter::ID = 0;
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getName() << '\n');
+ DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags(VRM);
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ return true;
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ SmallVector<MachineBasicBlock*, 16> LiveIn;
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+
+ // Scan the segments of LI.
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E;
+ ++I) {
+ if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn))
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
+ if (!LiveIn[i]->isLiveIn(PhysReg))
+ LiveIn[i]->addLiveIn(PhysReg);
+ LiveIn.clear();
+ }
+ }
+}
+
+void VirtRegRewriter::rewrite() {
+ SmallVector<unsigned, 8> SuperDeads;
+ SmallVector<unsigned, 8> SuperDefs;
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::instr_iterator
+ MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "Instruction uses unmapped VirtReg");
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // The <def,undef> flag only makes sense for sub-register defs, and
+ // we are substituting a full physreg. An <imp-use,kill> operand
+ // from the SuperKills list will represent the partial read of the
+ // super-register.
+ MO.setIsUndef(false);
+
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ ++NumIdCopies;
+ if (MI->getNumOperands() == 2) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ } else {
+ // Transform identity copy to a KILL to deal with subregisters.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "Identity copy: " << *MI);
+ }
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
diff --git a/contrib/llvm/lib/DebugInfo/DIContext.cpp b/contrib/llvm/lib/DebugInfo/DIContext.cpp
new file mode 100644
index 000000000000..49a44097d3e2
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DIContext.cpp
@@ -0,0 +1,18 @@
+//===-- DIContext.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DIContext.h"
+#include "DWARFContext.h"
+using namespace llvm;
+
+DIContext::~DIContext() {}
+
+DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) {
+ return new DWARFContextInMemory(Obj);
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
new file mode 100644
index 000000000000..2de62ab9380d
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
@@ -0,0 +1,83 @@
+//===-- DWARFAbbreviationDeclaration.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFAbbreviationDeclaration.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+bool
+DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr){
+ return extract(data, offset_ptr, data.getULEB128(offset_ptr));
+}
+
+bool
+DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr,
+ uint32_t code) {
+ Code = code;
+ Attribute.clear();
+ if (Code) {
+ Tag = data.getULEB128(offset_ptr);
+ HasChildren = data.getU8(offset_ptr);
+
+ while (data.isValidOffset(*offset_ptr)) {
+ uint16_t attr = data.getULEB128(offset_ptr);
+ uint16_t form = data.getULEB128(offset_ptr);
+
+ if (attr && form)
+ Attribute.push_back(DWARFAttribute(attr, form));
+ else
+ break;
+ }
+
+ return Tag != 0;
+ } else {
+ Tag = 0;
+ HasChildren = false;
+ }
+
+ return false;
+}
+
+void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
+ const char *tagString = TagString(getTag());
+ OS << '[' << getCode() << "] ";
+ if (tagString)
+ OS << tagString;
+ else
+ OS << format("DW_TAG_Unknown_%x", getTag());
+ OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n';
+ for (unsigned i = 0, e = Attribute.size(); i != e; ++i) {
+ OS << '\t';
+ const char *attrString = AttributeString(Attribute[i].getAttribute());
+ if (attrString)
+ OS << attrString;
+ else
+ OS << format("DW_AT_Unknown_%x", Attribute[i].getAttribute());
+ OS << '\t';
+ const char *formString = FormEncodingString(Attribute[i].getForm());
+ if (formString)
+ OS << formString;
+ else
+ OS << format("DW_FORM_Unknown_%x", Attribute[i].getForm());
+ OS << '\n';
+ }
+ OS << '\n';
+}
+
+uint32_t
+DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const {
+ for (uint32_t i = 0, e = Attribute.size(); i != e; ++i) {
+ if (Attribute[i].getAttribute() == attr)
+ return i;
+ }
+ return -1U;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h
new file mode 100644
index 000000000000..9a3fcd8a783c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h
@@ -0,0 +1,54 @@
+//===-- DWARFAbbreviationDeclaration.h --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+
+#include "DWARFAttribute.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFAbbreviationDeclaration {
+ uint32_t Code;
+ uint32_t Tag;
+ bool HasChildren;
+ SmallVector<DWARFAttribute, 8> Attribute;
+public:
+ enum { InvalidCode = 0 };
+ DWARFAbbreviationDeclaration()
+ : Code(InvalidCode), Tag(0), HasChildren(0) {}
+
+ uint32_t getCode() const { return Code; }
+ uint32_t getTag() const { return Tag; }
+ bool hasChildren() const { return HasChildren; }
+ uint32_t getNumAttributes() const { return Attribute.size(); }
+ uint16_t getAttrByIndex(uint32_t idx) const {
+ return Attribute.size() > idx ? Attribute[idx].getAttribute() : 0;
+ }
+ uint16_t getFormByIndex(uint32_t idx) const {
+ return Attribute.size() > idx ? Attribute[idx].getForm() : 0;
+ }
+
+ uint32_t findAttributeIndex(uint16_t attr) const;
+ bool extract(DataExtractor data, uint32_t* offset_ptr);
+ bool extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code);
+ bool isValid() const { return Code != 0 && Tag != 0; }
+ void dump(raw_ostream &OS) const;
+ const SmallVectorImpl<DWARFAttribute> &getAttributes() const {
+ return Attribute;
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFAttribute.h b/contrib/llvm/lib/DebugInfo/DWARFAttribute.h
new file mode 100644
index 000000000000..6f49b637c6a6
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFAttribute.h
@@ -0,0 +1,30 @@
+//===-- DWARFAttribute.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H
+#define LLVM_DEBUGINFO_DWARFATTRIBUTE_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class DWARFAttribute {
+ uint16_t Attribute;
+ uint16_t Form;
+ public:
+ DWARFAttribute(uint16_t attr, uint16_t form)
+ : Attribute(attr), Form(form) {}
+
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
new file mode 100644
index 000000000000..e3e4ccd7d9e1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -0,0 +1,272 @@
+//===-- DWARFCompileUnit.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const {
+ return DataExtractor(InfoSection, isLittleEndian, AddrSize);
+}
+
+bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
+ clear();
+
+ Offset = *offset_ptr;
+
+ if (debug_info.isValidOffset(*offset_ptr)) {
+ uint64_t abbrOffset;
+ Length = debug_info.getU32(offset_ptr);
+ Version = debug_info.getU16(offset_ptr);
+ abbrOffset = debug_info.getU32(offset_ptr);
+ AddrSize = debug_info.getU8(offset_ptr);
+
+ bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1);
+ bool versionOK = DWARFContext::isSupportedVersion(Version);
+ bool abbrOffsetOK = AbbrevSection.size() > abbrOffset;
+ bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+ if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && Abbrev != NULL) {
+ Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset);
+ return true;
+ }
+
+ // reset the offset to where we tried to parse from if anything went wrong
+ *offset_ptr = Offset;
+ }
+
+ return false;
+}
+
+uint32_t
+DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data,
+ const DWARFAbbreviationDeclarationSet *abbrevs) {
+ clear();
+
+ Offset = offset;
+
+ if (debug_info_data.isValidOffset(offset)) {
+ Length = debug_info_data.getU32(&offset);
+ Version = debug_info_data.getU16(&offset);
+ bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset();
+ Abbrevs = abbrevs;
+ AddrSize = debug_info_data.getU8(&offset);
+
+ bool versionOK = DWARFContext::isSupportedVersion(Version);
+ bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+ if (versionOK && addrSizeOK && abbrevsOK &&
+ debug_info_data.isValidOffset(offset))
+ return offset;
+ }
+ return 0;
+}
+
+bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset,
+ DWARFDebugRangeList &RangeList) const {
+ // Require that compile unit is extracted.
+ assert(DieArray.size() > 0);
+ DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
+ return RangeList.extract(RangesData, &RangeListOffset);
+}
+
+void DWARFCompileUnit::clear() {
+ Offset = 0;
+ Length = 0;
+ Version = 0;
+ Abbrevs = 0;
+ AddrSize = 0;
+ BaseAddr = 0;
+ clearDIEs(false);
+}
+
+void DWARFCompileUnit::dump(raw_ostream &OS) {
+ OS << format("0x%08x", Offset) << ": Compile Unit:"
+ << " length = " << format("0x%08x", Length)
+ << " version = " << format("0x%04x", Version)
+ << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset())
+ << " addr_size = " << format("0x%02x", AddrSize)
+ << " (next CU at " << format("0x%08x", getNextCompileUnitOffset())
+ << ")\n";
+
+ const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false);
+ assert(CU && "Null Compile Unit?");
+ CU->dump(OS, this, -1U);
+}
+
+const char *DWARFCompileUnit::getCompilationDir() {
+ extractDIEsIfNeeded(true);
+ if (DieArray.empty())
+ return 0;
+ return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
+}
+
+void DWARFCompileUnit::setDIERelations() {
+ if (DieArray.empty())
+ return;
+ DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
+ DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
+ DWARFDebugInfoEntryMinimal *curr_die;
+ // We purposely are skipping the last element in the array in the loop below
+ // so that we can always have a valid next item
+ for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
+ // Since our loop doesn't include the last element, we can always
+ // safely access the next die in the array.
+ DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
+
+ const DWARFAbbreviationDeclaration *curr_die_abbrev =
+ curr_die->getAbbreviationDeclarationPtr();
+
+ if (curr_die_abbrev) {
+ // Normal DIE
+ if (curr_die_abbrev->hasChildren())
+ next_die->setParent(curr_die);
+ else
+ curr_die->setSibling(next_die);
+ } else {
+ // NULL DIE that terminates a sibling chain
+ DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
+ if (parent)
+ parent->setSibling(next_die);
+ }
+ }
+
+ // Since we skipped the last element, we need to fix it up!
+ if (die_array_begin < die_array_end)
+ curr_die->setParent(die_array_begin);
+}
+
+size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
+ const size_t initial_die_array_size = DieArray.size();
+ if ((cu_die_only && initial_die_array_size > 0) ||
+ initial_die_array_size > 1)
+ return 0; // Already parsed
+
+ // Set the offset to that of the first DIE and calculate the start of the
+ // next compilation unit header.
+ uint32_t offset = getFirstDIEOffset();
+ uint32_t next_cu_offset = getNextCompileUnitOffset();
+
+ DWARFDebugInfoEntryMinimal die;
+ // Keep a flat array of the DIE for binary lookup by DIE offset
+ uint32_t depth = 0;
+ // We are in our compile unit, parse starting at the offset
+ // we were told to parse
+
+ const uint8_t *fixed_form_sizes =
+ DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+
+ while (offset < next_cu_offset &&
+ die.extractFast(this, fixed_form_sizes, &offset)) {
+
+ if (depth == 0) {
+ uint64_t base_addr =
+ die.getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U);
+ if (base_addr == -1U)
+ base_addr = die.getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0);
+ setBaseAddress(base_addr);
+ }
+
+ if (cu_die_only) {
+ addDIE(die);
+ return 1;
+ }
+ else if (depth == 0 && initial_die_array_size == 1)
+ // Don't append the CU die as we already did that
+ ;
+ else
+ addDIE(die);
+
+ const DWARFAbbreviationDeclaration *abbrDecl =
+ die.getAbbreviationDeclarationPtr();
+ if (abbrDecl) {
+ // Normal DIE
+ if (abbrDecl->hasChildren())
+ ++depth;
+ } else {
+ // NULL DIE.
+ if (depth > 0)
+ --depth;
+ if (depth == 0)
+ break; // We are done with this compile unit!
+ }
+
+ }
+
+ // Give a little bit of info if we encounter corrupt DWARF (our offset
+ // should always terminate at or before the start of the next compilation
+ // unit header).
+ if (offset > next_cu_offset)
+ fprintf(stderr, "warning: DWARF compile unit extends beyond its "
+ "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset);
+
+ setDIERelations();
+ return DieArray.size();
+}
+
+void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) {
+ if (DieArray.size() > (unsigned)keep_compile_unit_die) {
+ // std::vectors never get any smaller when resized to a smaller size,
+ // or when clear() or erase() are called, the size will report that it
+ // is smaller, but the memory allocated remains intact (call capacity()
+ // to see this). So we need to create a temporary vector and swap the
+ // contents which will cause just the internal pointers to be swapped
+ // so that when "tmp_array" goes out of scope, it will destroy the
+ // contents.
+
+ // Save at least the compile unit DIE
+ std::vector<DWARFDebugInfoEntryMinimal> tmpArray;
+ DieArray.swap(tmpArray);
+ if (keep_compile_unit_die)
+ DieArray.push_back(tmpArray.front());
+ }
+}
+
+void
+DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed){
+ // This function is usually called if there in no .debug_aranges section
+ // in order to produce a compile unit level set of address ranges that
+ // is accurate. If the DIEs weren't parsed, then we don't want all dies for
+ // all compile units to stay loaded when they weren't needed. So we can end
+ // up parsing the DWARF and then throwing them all away to keep memory usage
+ // down.
+ const bool clear_dies = extractDIEsIfNeeded(false) > 1 &&
+ clear_dies_if_already_not_parsed;
+ DieArray[0].buildAddressRangeTable(this, debug_aranges);
+
+ // Keep memory down by clearing DIEs if this generate function
+ // caused them to be parsed.
+ if (clear_dies)
+ clearDIEs(true);
+}
+
+DWARFDebugInfoEntryMinimal::InlinedChain
+DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) {
+ // First, find a subprogram that contains the given address (the root
+ // of inlined chain).
+ extractDIEsIfNeeded(false);
+ const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0;
+ for (size_t i = 0, n = DieArray.size(); i != n; i++) {
+ if (DieArray[i].isSubprogramDIE() &&
+ DieArray[i].addressRangeContainsAddress(this, Address)) {
+ SubprogramDIE = &DieArray[i];
+ break;
+ }
+ }
+ // Get inlined chain rooted at this subprogram DIE.
+ if (!SubprogramDIE)
+ return DWARFDebugInfoEntryMinimal::InlinedChain();
+ return SubprogramDIE->getInlinedChainForAddress(this, Address);
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h
new file mode 100644
index 000000000000..2a74605fcb2d
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h
@@ -0,0 +1,143 @@
+//===-- DWARFCompileUnit.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
+#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
+
+#include "DWARFDebugAbbrev.h"
+#include "DWARFDebugInfoEntry.h"
+#include "DWARFDebugRangeList.h"
+#include "DWARFRelocMap.h"
+#include <vector>
+
+namespace llvm {
+
+class DWARFDebugAbbrev;
+class StringRef;
+class raw_ostream;
+
+class DWARFCompileUnit {
+ const DWARFDebugAbbrev *Abbrev;
+ StringRef InfoSection;
+ StringRef AbbrevSection;
+ StringRef RangeSection;
+ StringRef StringSection;
+ StringRef StringOffsetSection;
+ StringRef AddrOffsetSection;
+ const RelocAddrMap *RelocMap;
+ bool isLittleEndian;
+
+ uint32_t Offset;
+ uint32_t Length;
+ uint16_t Version;
+ const DWARFAbbreviationDeclarationSet *Abbrevs;
+ uint8_t AddrSize;
+ uint64_t BaseAddr;
+ // The compile unit debug information entry item.
+ std::vector<DWARFDebugInfoEntryMinimal> DieArray;
+public:
+
+ DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
+ StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ const RelocAddrMap *M, bool LE) :
+ Abbrev(DA), InfoSection(IS), AbbrevSection(AS),
+ RangeSection(RS), StringSection(SS), StringOffsetSection(SOS),
+ AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) {
+ clear();
+ }
+
+ StringRef getStringSection() const { return StringSection; }
+ StringRef getStringOffsetSection() const { return StringOffsetSection; }
+ StringRef getAddrOffsetSection() const { return AddrOffsetSection; }
+ const RelocAddrMap *getRelocMap() const { return RelocMap; }
+ DataExtractor getDebugInfoExtractor() const;
+
+ bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
+ uint32_t extract(uint32_t offset, DataExtractor debug_info_data,
+ const DWARFAbbreviationDeclarationSet *abbrevs);
+
+ /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
+ /// hasn't already been done. Returns the number of DIEs parsed at this call.
+ size_t extractDIEsIfNeeded(bool cu_die_only);
+ /// extractRangeList - extracts the range list referenced by this compile
+ /// unit from .debug_ranges section. Returns true on success.
+ /// Requires that compile unit is already extracted.
+ bool extractRangeList(uint32_t RangeListOffset,
+ DWARFDebugRangeList &RangeList) const;
+ void clear();
+ void dump(raw_ostream &OS);
+ uint32_t getOffset() const { return Offset; }
+ /// Size in bytes of the compile unit header.
+ uint32_t getSize() const { return 11; }
+ bool containsDIEOffset(uint32_t die_offset) const {
+ return die_offset >= getFirstDIEOffset() &&
+ die_offset < getNextCompileUnitOffset();
+ }
+ uint32_t getFirstDIEOffset() const { return Offset + getSize(); }
+ uint32_t getNextCompileUnitOffset() const { return Offset + Length + 4; }
+ /// Size in bytes of the .debug_info data associated with this compile unit.
+ size_t getDebugInfoSize() const { return Length + 4 - getSize(); }
+ uint32_t getLength() const { return Length; }
+ uint16_t getVersion() const { return Version; }
+ const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
+ return Abbrevs;
+ }
+ uint8_t getAddressByteSize() const { return AddrSize; }
+ uint64_t getBaseAddress() const { return BaseAddr; }
+
+ void setBaseAddress(uint64_t base_addr) {
+ BaseAddr = base_addr;
+ }
+
+ const DWARFDebugInfoEntryMinimal *
+ getCompileUnitDIE(bool extract_cu_die_only = true) {
+ extractDIEsIfNeeded(extract_cu_die_only);
+ if (DieArray.empty())
+ return NULL;
+ return &DieArray[0];
+ }
+
+ const char *getCompilationDir();
+
+ /// setDIERelations - We read in all of the DIE entries into our flat list
+ /// of DIE entries and now we need to go back through all of them and set the
+ /// parent, sibling and child pointers for quick DIE navigation.
+ void setDIERelations();
+
+ void addDIE(DWARFDebugInfoEntryMinimal &die) {
+ // The average bytes per DIE entry has been seen to be
+ // around 14-20 so lets pre-reserve the needed memory for
+ // our DIE entries accordingly. Search forward for "Compute
+ // average bytes per DIE" to see #if'ed out code that does
+ // that determination.
+
+ // Only reserve the memory if we are adding children of
+ // the main compile unit DIE. The compile unit DIE is always
+ // the first entry, so if our size is 1, then we are adding
+ // the first compile unit child DIE and should reserve
+ // the memory.
+ if (DieArray.empty())
+ DieArray.reserve(getDebugInfoSize() / 14);
+ DieArray.push_back(die);
+ }
+
+ void clearDIEs(bool keep_compile_unit_die);
+
+ void buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed);
+
+ /// getInlinedChainForAddress - fetches inlined chain for a given address.
+ /// Returns empty chain if there is no subprogram containing address.
+ DWARFDebugInfoEntryMinimal::InlinedChain getInlinedChainForAddress(
+ uint64_t Address);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
new file mode 100644
index 000000000000..9e19310a99c0
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
@@ -0,0 +1,596 @@
+//===-- DWARFContext.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace dwarf;
+
+typedef DWARFDebugLine::LineTable DWARFLineTable;
+
+void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
+ if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) {
+ OS << ".debug_abbrev contents:\n";
+ getDebugAbbrev()->dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Info) {
+ OS << "\n.debug_info contents:\n";
+ for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i)
+ getCompileUnitAtIndex(i)->dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Frames) {
+ OS << "\n.debug_frame contents:\n";
+ getDebugFrame()->dump(OS);
+ }
+
+ uint32_t offset = 0;
+ if (DumpType == DIDT_All || DumpType == DIDT_Aranges) {
+ OS << "\n.debug_aranges contents:\n";
+ DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
+ DWARFDebugArangeSet set;
+ while (set.extract(arangesData, &offset))
+ set.dump(OS);
+ }
+
+ uint8_t savedAddressByteSize = 0;
+ if (DumpType == DIDT_All || DumpType == DIDT_Line) {
+ OS << "\n.debug_line contents:\n";
+ for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) {
+ DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
+ savedAddressByteSize = cu->getAddressByteSize();
+ unsigned stmtOffset =
+ cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
+ -1U);
+ if (stmtOffset != -1U) {
+ DataExtractor lineData(getLineSection(), isLittleEndian(),
+ savedAddressByteSize);
+ DWARFDebugLine::DumpingState state(OS);
+ DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state);
+ }
+ }
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Str) {
+ OS << "\n.debug_str contents:\n";
+ DataExtractor strData(getStringSection(), isLittleEndian(), 0);
+ offset = 0;
+ uint32_t strOffset = 0;
+ while (const char *s = strData.getCStr(&offset)) {
+ OS << format("0x%8.8x: \"%s\"\n", strOffset, s);
+ strOffset = offset;
+ }
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Ranges) {
+ OS << "\n.debug_ranges contents:\n";
+ // In fact, different compile units may have different address byte
+ // sizes, but for simplicity we just use the address byte size of the last
+ // compile unit (there is no easy and fast way to associate address range
+ // list and the compile unit it describes).
+ DataExtractor rangesData(getRangeSection(), isLittleEndian(),
+ savedAddressByteSize);
+ offset = 0;
+ DWARFDebugRangeList rangeList;
+ while (rangeList.extract(rangesData, &offset))
+ rangeList.dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) {
+ OS << "\n.debug_pubnames contents:\n";
+ DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0);
+ offset = 0;
+ OS << "Length: " << pubNames.getU32(&offset) << "\n";
+ OS << "Version: " << pubNames.getU16(&offset) << "\n";
+ OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n";
+ OS << "Size: " << pubNames.getU32(&offset) << "\n";
+ OS << "\n Offset Name\n";
+ while (offset < getPubNamesSection().size()) {
+ uint32_t n = pubNames.getU32(&offset);
+ if (n == 0)
+ break;
+ OS << format("%8x ", n);
+ OS << pubNames.getCStr(&offset) << "\n";
+ }
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
+ OS << "\n.debug_abbrev.dwo contents:\n";
+ getDebugAbbrevDWO()->dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) {
+ OS << "\n.debug_info.dwo contents:\n";
+ for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
+ getDWOCompileUnitAtIndex(i)->dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) {
+ OS << "\n.debug_str.dwo contents:\n";
+ DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
+ offset = 0;
+ uint32_t strDWOOffset = 0;
+ while (const char *s = strDWOData.getCStr(&offset)) {
+ OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+ strDWOOffset = offset;
+ }
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
+ OS << "\n.debug_str_offsets.dwo contents:\n";
+ DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
+ offset = 0;
+ while (offset < getStringOffsetDWOSection().size()) {
+ OS << format("0x%8.8x: ", offset);
+ OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+ }
+ }
+}
+
+const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
+ if (Abbrev)
+ return Abbrev.get();
+
+ DataExtractor abbrData(getAbbrevSection(), isLittleEndian(), 0);
+
+ Abbrev.reset(new DWARFDebugAbbrev());
+ Abbrev->parse(abbrData);
+ return Abbrev.get();
+}
+
+const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() {
+ if (AbbrevDWO)
+ return AbbrevDWO.get();
+
+ DataExtractor abbrData(getAbbrevDWOSection(), isLittleEndian(), 0);
+ AbbrevDWO.reset(new DWARFDebugAbbrev());
+ AbbrevDWO->parse(abbrData);
+ return AbbrevDWO.get();
+}
+
+const DWARFDebugAranges *DWARFContext::getDebugAranges() {
+ if (Aranges)
+ return Aranges.get();
+
+ DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
+
+ Aranges.reset(new DWARFDebugAranges());
+ Aranges->extract(arangesData);
+ // Generate aranges from DIEs: even if .debug_aranges section is present,
+ // it may describe only a small subset of compilation units, so we need to
+ // manually build aranges for the rest of them.
+ Aranges->generate(this);
+ return Aranges.get();
+}
+
+const DWARFDebugFrame *DWARFContext::getDebugFrame() {
+ if (DebugFrame)
+ return DebugFrame.get();
+
+ // There's a "bug" in the DWARFv3 standard with respect to the target address
+ // size within debug frame sections. While DWARF is supposed to be independent
+ // of its container, FDEs have fields with size being "target address size",
+ // which isn't specified in DWARF in general. It's only specified for CUs, but
+ // .eh_frame can appear without a .debug_info section. Follow the example of
+ // other tools (libdwarf) and extract this from the container (ObjectFile
+ // provides this information). This problem is fixed in DWARFv4
+ // See this dwarf-discuss discussion for more details:
+ // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
+ DataExtractor debugFrameData(getDebugFrameSection(), isLittleEndian(),
+ getAddressSize());
+ DebugFrame.reset(new DWARFDebugFrame());
+ DebugFrame->parse(debugFrameData);
+ return DebugFrame.get();
+}
+
+const DWARFLineTable *
+DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
+ if (!Line)
+ Line.reset(new DWARFDebugLine(&lineRelocMap()));
+
+ unsigned stmtOffset =
+ cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
+ -1U);
+ if (stmtOffset == -1U)
+ return 0; // No line table for this compile unit.
+
+ // See if the line table is cached.
+ if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
+ return lt;
+
+ // We have to parse it first.
+ DataExtractor lineData(getLineSection(), isLittleEndian(),
+ cu->getAddressByteSize());
+ return Line->getOrParseLineTable(lineData, stmtOffset);
+}
+
+void DWARFContext::parseCompileUnits() {
+ uint32_t offset = 0;
+ const DataExtractor &DIData = DataExtractor(getInfoSection(),
+ isLittleEndian(), 0);
+ while (DIData.isValidOffset(offset)) {
+ CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(),
+ getAbbrevSection(), getRangeSection(),
+ getStringSection(), StringRef(),
+ getAddrSection(),
+ &infoRelocMap(),
+ isLittleEndian()));
+ if (!CUs.back().extract(DIData, &offset)) {
+ CUs.pop_back();
+ break;
+ }
+
+ offset = CUs.back().getNextCompileUnitOffset();
+ }
+}
+
+void DWARFContext::parseDWOCompileUnits() {
+ uint32_t offset = 0;
+ const DataExtractor &DIData = DataExtractor(getInfoDWOSection(),
+ isLittleEndian(), 0);
+ while (DIData.isValidOffset(offset)) {
+ DWOCUs.push_back(DWARFCompileUnit(getDebugAbbrevDWO(), getInfoDWOSection(),
+ getAbbrevDWOSection(),
+ getRangeDWOSection(),
+ getStringDWOSection(),
+ getStringOffsetDWOSection(),
+ getAddrSection(),
+ &infoDWORelocMap(),
+ isLittleEndian()));
+ if (!DWOCUs.back().extract(DIData, &offset)) {
+ DWOCUs.pop_back();
+ break;
+ }
+
+ offset = DWOCUs.back().getNextCompileUnitOffset();
+ }
+}
+
+namespace {
+ struct OffsetComparator {
+ bool operator()(const DWARFCompileUnit &LHS,
+ const DWARFCompileUnit &RHS) const {
+ return LHS.getOffset() < RHS.getOffset();
+ }
+ bool operator()(const DWARFCompileUnit &LHS, uint32_t RHS) const {
+ return LHS.getOffset() < RHS;
+ }
+ bool operator()(uint32_t LHS, const DWARFCompileUnit &RHS) const {
+ return LHS < RHS.getOffset();
+ }
+ };
+}
+
+DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
+ if (CUs.empty())
+ parseCompileUnits();
+
+ DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset,
+ OffsetComparator());
+ if (CU != CUs.end())
+ return &*CU;
+ return 0;
+}
+
+DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
+ // First, get the offset of the compile unit.
+ uint32_t CUOffset = getDebugAranges()->findAddress(Address);
+ // Retrieve the compile unit.
+ return getCompileUnitForOffset(CUOffset);
+}
+
+static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t FileIndex,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName) {
+ if (CU == 0 ||
+ LineTable == 0 ||
+ !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath,
+ FileName))
+ return false;
+ if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) {
+ // We may still need to append compilation directory of compile unit.
+ SmallString<16> AbsolutePath;
+ if (const char *CompilationDir = CU->getCompilationDir()) {
+ sys::path::append(AbsolutePath, CompilationDir);
+ }
+ sys::path::append(AbsolutePath, FileName);
+ FileName = AbsolutePath.str();
+ }
+ return true;
+}
+
+static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t Address,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName,
+ uint32_t &Line, uint32_t &Column) {
+ if (CU == 0 || LineTable == 0)
+ return false;
+ // Get the index of row we're looking for in the line table.
+ uint32_t RowIndex = LineTable->lookupAddress(Address);
+ if (RowIndex == -1U)
+ return false;
+ // Take file number and line/column from the row.
+ const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
+ if (!getFileNameForCompileUnit(CU, LineTable, Row.File,
+ NeedsAbsoluteFilePath, FileName))
+ return false;
+ Line = Row.Line;
+ Column = Row.Column;
+ return true;
+}
+
+DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
+ DILineInfoSpecifier Specifier) {
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ if (!CU)
+ return DILineInfo();
+ std::string FileName = "<invalid>";
+ std::string FunctionName = "<invalid>";
+ uint32_t Line = 0;
+ uint32_t Column = 0;
+ if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
+ // The address may correspond to instruction in some inlined function,
+ // so we have to build the chain of inlined functions and take the
+ // name of the topmost function in it.
+ const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain =
+ CU->getInlinedChainForAddress(Address);
+ if (InlinedChain.size() > 0) {
+ const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0];
+ if (const char *Name = TopFunctionDIE.getSubroutineName(CU))
+ FunctionName = Name;
+ }
+ }
+ if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
+ const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
+ const bool NeedsAbsoluteFilePath =
+ Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
+ getFileLineInfoForCompileUnit(CU, LineTable, Address,
+ NeedsAbsoluteFilePath,
+ FileName, Line, Column);
+ }
+ return DILineInfo(StringRef(FileName), StringRef(FunctionName),
+ Line, Column);
+}
+
+DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
+ uint64_t Size,
+ DILineInfoSpecifier Specifier) {
+ DILineInfoTable Lines;
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ if (!CU)
+ return Lines;
+
+ std::string FunctionName = "<invalid>";
+ if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
+ // The address may correspond to instruction in some inlined function,
+ // so we have to build the chain of inlined functions and take the
+ // name of the topmost function in it.
+ const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain =
+ CU->getInlinedChainForAddress(Address);
+ if (InlinedChain.size() > 0) {
+ const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0];
+ if (const char *Name = TopFunctionDIE.getSubroutineName(CU))
+ FunctionName = Name;
+ }
+ }
+
+ StringRef FuncNameRef = StringRef(FunctionName);
+
+ // If the Specifier says we don't need FileLineInfo, just
+ // return the top-most function at the starting address.
+ if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
+ Lines.push_back(std::make_pair(Address,
+ DILineInfo(StringRef("<invalid>"),
+ FuncNameRef, 0, 0)));
+ return Lines;
+ }
+
+ const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
+ const bool NeedsAbsoluteFilePath =
+ Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
+
+ // Get the index of row we're looking for in the line table.
+ std::vector<uint32_t> RowVector;
+ if (!LineTable->lookupAddressRange(Address, Size, RowVector))
+ return Lines;
+
+ uint32_t NumRows = RowVector.size();
+ for (uint32_t i = 0; i < NumRows; ++i) {
+ uint32_t RowIndex = RowVector[i];
+ // Take file number and line/column from the row.
+ const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
+ std::string FileName = "<invalid>";
+ getFileNameForCompileUnit(CU, LineTable, Row.File,
+ NeedsAbsoluteFilePath, FileName);
+ Lines.push_back(std::make_pair(Row.Address,
+ DILineInfo(StringRef(FileName),
+ FuncNameRef, Row.Line, Row.Column)));
+ }
+
+ return Lines;
+}
+
+DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
+ DILineInfoSpecifier Specifier) {
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ if (!CU)
+ return DIInliningInfo();
+
+ const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain =
+ CU->getInlinedChainForAddress(Address);
+ if (InlinedChain.size() == 0)
+ return DIInliningInfo();
+
+ DIInliningInfo InliningInfo;
+ uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
+ const DWARFLineTable *LineTable = 0;
+ for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) {
+ const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i];
+ std::string FileName = "<invalid>";
+ std::string FunctionName = "<invalid>";
+ uint32_t Line = 0;
+ uint32_t Column = 0;
+ // Get function name if necessary.
+ if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
+ if (const char *Name = FunctionDIE.getSubroutineName(CU))
+ FunctionName = Name;
+ }
+ if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
+ const bool NeedsAbsoluteFilePath =
+ Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
+ if (i == 0) {
+ // For the topmost frame, initialize the line table of this
+ // compile unit and fetch file/line info from it.
+ LineTable = getLineTableForCompileUnit(CU);
+ // For the topmost routine, get file/line info from line table.
+ getFileLineInfoForCompileUnit(CU, LineTable, Address,
+ NeedsAbsoluteFilePath,
+ FileName, Line, Column);
+ } else {
+ // Otherwise, use call file, call line and call column from
+ // previous DIE in inlined chain.
+ getFileNameForCompileUnit(CU, LineTable, CallFile,
+ NeedsAbsoluteFilePath, FileName);
+ Line = CallLine;
+ Column = CallColumn;
+ }
+ // Get call file/line/column of a current DIE.
+ if (i + 1 < n) {
+ FunctionDIE.getCallerFrame(CU, CallFile, CallLine, CallColumn);
+ }
+ }
+ DILineInfo Frame(StringRef(FileName), StringRef(FunctionName),
+ Line, Column);
+ InliningInfo.addFrame(Frame);
+ }
+ return InliningInfo;
+}
+
+DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
+ IsLittleEndian(Obj->isLittleEndian()),
+ AddressSize(Obj->getBytesInAddress()) {
+ error_code ec;
+ for (object::section_iterator i = Obj->begin_sections(),
+ e = Obj->end_sections();
+ i != e; i.increment(ec)) {
+ StringRef name;
+ i->getName(name);
+ StringRef data;
+ i->getContents(data);
+
+ name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
+ if (name == "debug_info")
+ InfoSection = data;
+ else if (name == "debug_abbrev")
+ AbbrevSection = data;
+ else if (name == "debug_line")
+ LineSection = data;
+ else if (name == "debug_aranges")
+ ARangeSection = data;
+ else if (name == "debug_frame")
+ DebugFrameSection = data;
+ else if (name == "debug_str")
+ StringSection = data;
+ else if (name == "debug_ranges") {
+ // FIXME: Use the other dwo range section when we emit it.
+ RangeDWOSection = data;
+ RangeSection = data;
+ }
+ else if (name == "debug_pubnames")
+ PubNamesSection = data;
+ else if (name == "debug_info.dwo")
+ InfoDWOSection = data;
+ else if (name == "debug_abbrev.dwo")
+ AbbrevDWOSection = data;
+ else if (name == "debug_str.dwo")
+ StringDWOSection = data;
+ else if (name == "debug_str_offsets.dwo")
+ StringOffsetDWOSection = data;
+ else if (name == "debug_addr")
+ AddrSection = data;
+ // Any more debug info sections go here.
+ else
+ continue;
+
+ // TODO: Add support for relocations in other sections as needed.
+ // Record relocations for the debug_info and debug_line sections.
+ RelocAddrMap *Map;
+ if (name == "debug_info")
+ Map = &InfoRelocMap;
+ else if (name == "debug_info.dwo")
+ Map = &InfoDWORelocMap;
+ else if (name == "debug_line")
+ Map = &LineRelocMap;
+ else
+ continue;
+
+ if (i->begin_relocations() != i->end_relocations()) {
+ uint64_t SectionSize;
+ i->getSize(SectionSize);
+ for (object::relocation_iterator reloc_i = i->begin_relocations(),
+ reloc_e = i->end_relocations();
+ reloc_i != reloc_e; reloc_i.increment(ec)) {
+ uint64_t Address;
+ reloc_i->getAddress(Address);
+ uint64_t Type;
+ reloc_i->getType(Type);
+ uint64_t SymAddr = 0;
+ // ELF relocations may need the symbol address
+ if (Obj->isELF()) {
+ object::SymbolRef Sym;
+ reloc_i->getSymbol(Sym);
+ Sym.getAddress(SymAddr);
+ }
+
+ object::RelocVisitor V(Obj->getFileFormatName());
+ // The section address is always 0 for debug sections.
+ object::RelocToApply R(V.visit(Type, *reloc_i, 0, SymAddr));
+ if (V.error()) {
+ SmallString<32> Name;
+ error_code ec(reloc_i->getTypeName(Name));
+ if (ec) {
+ errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
+ }
+ errs() << "error: failed to compute relocation: "
+ << Name << "\n";
+ continue;
+ }
+
+ if (Address + R.Width > SectionSize) {
+ errs() << "error: " << R.Width << "-byte relocation starting "
+ << Address << " bytes into section " << name << " which is "
+ << SectionSize << " bytes long.\n";
+ continue;
+ }
+ if (R.Width > 8) {
+ errs() << "error: can't handle a relocation of more than 8 bytes at "
+ "a time.\n";
+ continue;
+ }
+ DEBUG(dbgs() << "Writing " << format("%p", R.Value)
+ << " at " << format("%p", Address)
+ << " with width " << format("%d", R.Width)
+ << "\n");
+ Map->insert(std::make_pair(Address, std::make_pair(R.Width, R.Value)));
+ }
+ }
+ }
+}
+
+void DWARFContextInMemory::anchor() { }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h
new file mode 100644
index 000000000000..37b272993f37
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h
@@ -0,0 +1,197 @@
+//===-- DWARFContext.h ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_DEBUGINFO_DWARFCONTEXT_H
+#define LLVM_DEBUGINFO_DWARFCONTEXT_H
+
+#include "DWARFCompileUnit.h"
+#include "DWARFDebugAranges.h"
+#include "DWARFDebugFrame.h"
+#include "DWARFDebugLine.h"
+#include "DWARFDebugRangeList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DIContext.h"
+
+namespace llvm {
+
+/// DWARFContext
+/// This data structure is the top level entity that deals with dwarf debug
+/// information parsing. The actual data is supplied through pure virtual
+/// methods that a concrete implementation provides.
+class DWARFContext : public DIContext {
+ SmallVector<DWARFCompileUnit, 1> CUs;
+ OwningPtr<DWARFDebugAbbrev> Abbrev;
+ OwningPtr<DWARFDebugAranges> Aranges;
+ OwningPtr<DWARFDebugLine> Line;
+ OwningPtr<DWARFDebugFrame> DebugFrame;
+
+ SmallVector<DWARFCompileUnit, 1> DWOCUs;
+ OwningPtr<DWARFDebugAbbrev> AbbrevDWO;
+
+ DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION;
+ DWARFContext &operator=(DWARFContext &) LLVM_DELETED_FUNCTION;
+
+ /// Read compile units from the debug_info section and store them in CUs.
+ void parseCompileUnits();
+
+ /// Read compile units from the debug_info.dwo section and store them in
+ /// DWOCUs.
+ void parseDWOCompileUnits();
+
+public:
+ DWARFContext() {}
+ virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All);
+
+ /// Get the number of compile units in this context.
+ unsigned getNumCompileUnits() {
+ if (CUs.empty())
+ parseCompileUnits();
+ return CUs.size();
+ }
+
+ /// Get the number of compile units in the DWO context.
+ unsigned getNumDWOCompileUnits() {
+ if (DWOCUs.empty())
+ parseDWOCompileUnits();
+ return DWOCUs.size();
+ }
+
+ /// Get the compile unit at the specified index for this compile unit.
+ DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
+ if (CUs.empty())
+ parseCompileUnits();
+ return &CUs[index];
+ }
+
+ /// Get the compile unit at the specified index for the DWO compile units.
+ DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) {
+ if (DWOCUs.empty())
+ parseDWOCompileUnits();
+ return &DWOCUs[index];
+ }
+
+ /// Get a pointer to the parsed DebugAbbrev object.
+ const DWARFDebugAbbrev *getDebugAbbrev();
+
+ /// Get a pointer to the parsed dwo abbreviations object.
+ const DWARFDebugAbbrev *getDebugAbbrevDWO();
+
+ /// Get a pointer to the parsed DebugAranges object.
+ const DWARFDebugAranges *getDebugAranges();
+
+ /// Get a pointer to the parsed frame information object.
+ const DWARFDebugFrame *getDebugFrame();
+
+ /// Get a pointer to a parsed line table corresponding to a compile unit.
+ const DWARFDebugLine::LineTable *
+ getLineTableForCompileUnit(DWARFCompileUnit *cu);
+
+ virtual DILineInfo getLineInfoForAddress(uint64_t Address,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier());
+ virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
+ uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier());
+ virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier());
+
+ virtual bool isLittleEndian() const = 0;
+ virtual uint8_t getAddressSize() const = 0;
+ virtual const RelocAddrMap &infoRelocMap() const = 0;
+ virtual const RelocAddrMap &lineRelocMap() const = 0;
+ virtual StringRef getInfoSection() = 0;
+ virtual StringRef getAbbrevSection() = 0;
+ virtual StringRef getARangeSection() = 0;
+ virtual StringRef getDebugFrameSection() = 0;
+ virtual StringRef getLineSection() = 0;
+ virtual StringRef getStringSection() = 0;
+ virtual StringRef getRangeSection() = 0;
+ virtual StringRef getPubNamesSection() = 0;
+
+ // Sections for DWARF5 split dwarf proposal.
+ virtual StringRef getInfoDWOSection() = 0;
+ virtual StringRef getAbbrevDWOSection() = 0;
+ virtual StringRef getStringDWOSection() = 0;
+ virtual StringRef getStringOffsetDWOSection() = 0;
+ virtual StringRef getRangeDWOSection() = 0;
+ virtual StringRef getAddrSection() = 0;
+ virtual const RelocAddrMap &infoDWORelocMap() const = 0;
+
+ static bool isSupportedVersion(unsigned version) {
+ return version == 2 || version == 3;
+ }
+private:
+ /// Return the compile unit that includes an offset (relative to .debug_info).
+ DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset);
+
+ /// Return the compile unit which contains instruction with provided
+ /// address.
+ DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);
+};
+
+/// DWARFContextInMemory is the simplest possible implementation of a
+/// DWARFContext. It assumes all content is available in memory and stores
+/// pointers to it.
+class DWARFContextInMemory : public DWARFContext {
+ virtual void anchor();
+ bool IsLittleEndian;
+ uint8_t AddressSize;
+ RelocAddrMap InfoRelocMap;
+ RelocAddrMap LineRelocMap;
+ StringRef InfoSection;
+ StringRef AbbrevSection;
+ StringRef ARangeSection;
+ StringRef DebugFrameSection;
+ StringRef LineSection;
+ StringRef StringSection;
+ StringRef RangeSection;
+ StringRef PubNamesSection;
+
+ // Sections for DWARF5 split dwarf proposal.
+ RelocAddrMap InfoDWORelocMap;
+ StringRef InfoDWOSection;
+ StringRef AbbrevDWOSection;
+ StringRef StringDWOSection;
+ StringRef StringOffsetDWOSection;
+ StringRef RangeDWOSection;
+ StringRef AddrSection;
+
+public:
+ DWARFContextInMemory(object::ObjectFile *);
+ virtual bool isLittleEndian() const { return IsLittleEndian; }
+ virtual uint8_t getAddressSize() const { return AddressSize; }
+ virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
+ virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; }
+ virtual StringRef getInfoSection() { return InfoSection; }
+ virtual StringRef getAbbrevSection() { return AbbrevSection; }
+ virtual StringRef getARangeSection() { return ARangeSection; }
+ virtual StringRef getDebugFrameSection() { return DebugFrameSection; }
+ virtual StringRef getLineSection() { return LineSection; }
+ virtual StringRef getStringSection() { return StringSection; }
+ virtual StringRef getRangeSection() { return RangeSection; }
+ virtual StringRef getPubNamesSection() { return PubNamesSection; }
+
+ // Sections for DWARF5 split dwarf proposal.
+ virtual StringRef getInfoDWOSection() { return InfoDWOSection; }
+ virtual StringRef getAbbrevDWOSection() { return AbbrevDWOSection; }
+ virtual StringRef getStringDWOSection() { return StringDWOSection; }
+ virtual StringRef getStringOffsetDWOSection() {
+ return StringOffsetDWOSection;
+ }
+ virtual StringRef getRangeDWOSection() { return RangeDWOSection; }
+ virtual StringRef getAddrSection() {
+ return AddrSection;
+ }
+ virtual const RelocAddrMap &infoDWORelocMap() const {
+ return InfoDWORelocMap;
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp
new file mode 100644
index 000000000000..6e6c37e30945
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -0,0 +1,106 @@
+//===-- DWARFDebugAbbrev.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugAbbrev.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool DWARFAbbreviationDeclarationSet::extract(DataExtractor data,
+ uint32_t* offset_ptr) {
+ const uint32_t beginOffset = *offset_ptr;
+ Offset = beginOffset;
+ clear();
+ DWARFAbbreviationDeclaration abbrevDeclaration;
+ uint32_t prevAbbrAode = 0;
+ while (abbrevDeclaration.extract(data, offset_ptr)) {
+ Decls.push_back(abbrevDeclaration);
+ if (IdxOffset == 0) {
+ IdxOffset = abbrevDeclaration.getCode();
+ } else {
+ if (prevAbbrAode + 1 != abbrevDeclaration.getCode())
+ IdxOffset = UINT32_MAX;// Out of order indexes, we can't do O(1) lookups
+ }
+ prevAbbrAode = abbrevDeclaration.getCode();
+ }
+ return beginOffset != *offset_ptr;
+}
+
+void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const {
+ for (unsigned i = 0, e = Decls.size(); i != e; ++i)
+ Decls[i].dump(OS);
+}
+
+const DWARFAbbreviationDeclaration*
+DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(uint32_t abbrCode)
+ const {
+ if (IdxOffset == UINT32_MAX) {
+ DWARFAbbreviationDeclarationCollConstIter pos;
+ DWARFAbbreviationDeclarationCollConstIter end = Decls.end();
+ for (pos = Decls.begin(); pos != end; ++pos) {
+ if (pos->getCode() == abbrCode)
+ return &(*pos);
+ }
+ } else {
+ uint32_t idx = abbrCode - IdxOffset;
+ if (idx < Decls.size())
+ return &Decls[idx];
+ }
+ return NULL;
+}
+
+DWARFDebugAbbrev::DWARFDebugAbbrev() :
+ AbbrevCollMap(),
+ PrevAbbrOffsetPos(AbbrevCollMap.end()) {}
+
+
+void DWARFDebugAbbrev::parse(DataExtractor data) {
+ uint32_t offset = 0;
+
+ while (data.isValidOffset(offset)) {
+ uint32_t initial_cu_offset = offset;
+ DWARFAbbreviationDeclarationSet abbrevDeclSet;
+
+ if (abbrevDeclSet.extract(data, &offset))
+ AbbrevCollMap[initial_cu_offset] = abbrevDeclSet;
+ else
+ break;
+ }
+ PrevAbbrOffsetPos = AbbrevCollMap.end();
+}
+
+void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
+ if (AbbrevCollMap.empty()) {
+ OS << "< EMPTY >\n";
+ return;
+ }
+
+ DWARFAbbreviationDeclarationCollMapConstIter pos;
+ for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) {
+ OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", pos->first);
+ pos->second.dump(OS);
+ }
+}
+
+const DWARFAbbreviationDeclarationSet*
+DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const {
+ DWARFAbbreviationDeclarationCollMapConstIter end = AbbrevCollMap.end();
+ DWARFAbbreviationDeclarationCollMapConstIter pos;
+ if (PrevAbbrOffsetPos != end &&
+ PrevAbbrOffsetPos->first == cu_abbr_offset) {
+ return &(PrevAbbrOffsetPos->second);
+ } else {
+ pos = AbbrevCollMap.find(cu_abbr_offset);
+ PrevAbbrOffsetPos = pos;
+ }
+
+ if (pos != AbbrevCollMap.end())
+ return &(pos->second);
+ return NULL;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h
new file mode 100644
index 000000000000..c7c0436866c4
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -0,0 +1,73 @@
+//===-- DWARFDebugAbbrev.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
+#define LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
+
+#include "DWARFAbbreviationDeclaration.h"
+#include <list>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+typedef std::vector<DWARFAbbreviationDeclaration>
+ DWARFAbbreviationDeclarationColl;
+typedef DWARFAbbreviationDeclarationColl::iterator
+ DWARFAbbreviationDeclarationCollIter;
+typedef DWARFAbbreviationDeclarationColl::const_iterator
+ DWARFAbbreviationDeclarationCollConstIter;
+
+class DWARFAbbreviationDeclarationSet {
+ uint32_t Offset;
+ uint32_t IdxOffset;
+ std::vector<DWARFAbbreviationDeclaration> Decls;
+ public:
+ DWARFAbbreviationDeclarationSet()
+ : Offset(0), IdxOffset(0) {}
+
+ DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset)
+ : Offset(offset), IdxOffset(idxOffset) {}
+
+ void clear() {
+ IdxOffset = 0;
+ Decls.clear();
+ }
+ uint32_t getOffset() const { return Offset; }
+ void dump(raw_ostream &OS) const;
+ bool extract(DataExtractor data, uint32_t* offset_ptr);
+
+ const DWARFAbbreviationDeclaration *
+ getAbbreviationDeclaration(uint32_t abbrCode) const;
+};
+
+class DWARFDebugAbbrev {
+public:
+ typedef std::map<uint64_t, DWARFAbbreviationDeclarationSet>
+ DWARFAbbreviationDeclarationCollMap;
+ typedef DWARFAbbreviationDeclarationCollMap::iterator
+ DWARFAbbreviationDeclarationCollMapIter;
+ typedef DWARFAbbreviationDeclarationCollMap::const_iterator
+ DWARFAbbreviationDeclarationCollMapConstIter;
+
+private:
+ DWARFAbbreviationDeclarationCollMap AbbrevCollMap;
+ mutable DWARFAbbreviationDeclarationCollMapConstIter PrevAbbrOffsetPos;
+
+public:
+ DWARFDebugAbbrev();
+ const DWARFAbbreviationDeclarationSet *
+ getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const;
+ void dump(raw_ostream &OS) const;
+ void parse(DataExtractor data);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp
new file mode 100644
index 000000000000..7dff9ff49a62
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -0,0 +1,151 @@
+//===-- DWARFDebugArangeSet.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugArangeSet.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+using namespace llvm;
+
+void DWARFDebugArangeSet::clear() {
+ Offset = -1U;
+ std::memset(&HeaderData, 0, sizeof(Header));
+ ArangeDescriptors.clear();
+}
+
+void DWARFDebugArangeSet::compact() {
+ if (ArangeDescriptors.empty())
+ return;
+
+ // Iterate through all arange descriptors and combine any ranges that
+ // overlap or have matching boundaries. The ArangeDescriptors are assumed
+ // to be in ascending order.
+ uint32_t i = 0;
+ while (i + 1 < ArangeDescriptors.size()) {
+ if (ArangeDescriptors[i].getEndAddress() >= ArangeDescriptors[i+1].Address){
+ // The current range ends at or exceeds the start of the next address
+ // range. Compute the max end address between the two and use that to
+ // make the new length.
+ const uint64_t max_end_addr =
+ std::max(ArangeDescriptors[i].getEndAddress(),
+ ArangeDescriptors[i+1].getEndAddress());
+ ArangeDescriptors[i].Length = max_end_addr - ArangeDescriptors[i].Address;
+ // Now remove the next entry as it was just combined with the previous one
+ ArangeDescriptors.erase(ArangeDescriptors.begin()+i+1);
+ } else {
+ // Discontiguous address range, just proceed to the next one.
+ ++i;
+ }
+ }
+}
+
+bool
+DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
+ if (data.isValidOffset(*offset_ptr)) {
+ ArangeDescriptors.clear();
+ Offset = *offset_ptr;
+
+ // 7.20 Address Range Table
+ //
+ // Each set of entries in the table of address ranges contained in
+ // the .debug_aranges section begins with a header consisting of: a
+ // 4-byte length containing the length of the set of entries for this
+ // compilation unit, not including the length field itself; a 2-byte
+ // version identifier containing the value 2 for DWARF Version 2; a
+ // 4-byte offset into the.debug_infosection; a 1-byte unsigned integer
+ // containing the size in bytes of an address (or the offset portion of
+ // an address for segmented addressing) on the target system; and a
+ // 1-byte unsigned integer containing the size in bytes of a segment
+ // descriptor on the target system. This header is followed by a series
+ // of tuples. Each tuple consists of an address and a length, each in
+ // the size appropriate for an address on the target architecture.
+ HeaderData.Length = data.getU32(offset_ptr);
+ HeaderData.Version = data.getU16(offset_ptr);
+ HeaderData.CuOffset = data.getU32(offset_ptr);
+ HeaderData.AddrSize = data.getU8(offset_ptr);
+ HeaderData.SegSize = data.getU8(offset_ptr);
+
+ // Perform basic validation of the header fields.
+ if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) ||
+ (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) {
+ clear();
+ return false;
+ }
+
+ // The first tuple following the header in each set begins at an offset
+ // that is a multiple of the size of a single tuple (that is, twice the
+ // size of an address). The header is padded, if necessary, to the
+ // appropriate boundary.
+ const uint32_t header_size = *offset_ptr - Offset;
+ const uint32_t tuple_size = HeaderData.AddrSize * 2;
+ uint32_t first_tuple_offset = 0;
+ while (first_tuple_offset < header_size)
+ first_tuple_offset += tuple_size;
+
+ *offset_ptr = Offset + first_tuple_offset;
+
+ Descriptor arangeDescriptor;
+
+ assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
+ assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize);
+
+ while (data.isValidOffset(*offset_ptr)) {
+ arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+ arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+
+ // Each set of tuples is terminated by a 0 for the address and 0
+ // for the length.
+ if (arangeDescriptor.Address || arangeDescriptor.Length)
+ ArangeDescriptors.push_back(arangeDescriptor);
+ else
+ break; // We are done if we get a zero address and length
+ }
+
+ return !ArangeDescriptors.empty();
+ }
+ return false;
+}
+
+void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
+ OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
+ HeaderData.Length, HeaderData.Version)
+ << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
+ HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize);
+
+ const uint32_t hex_width = HeaderData.AddrSize * 2;
+ for (DescriptorConstIter pos = ArangeDescriptors.begin(),
+ end = ArangeDescriptors.end(); pos != end; ++pos)
+ OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
+ << format(" 0x%*.*" PRIx64 ")\n",
+ hex_width, hex_width, pos->getEndAddress());
+}
+
+
+namespace {
+ class DescriptorContainsAddress {
+ const uint64_t Address;
+ public:
+ DescriptorContainsAddress(uint64_t address) : Address(address) {}
+ bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const {
+ return Address >= desc.Address && Address < (desc.Address + desc.Length);
+ }
+ };
+}
+
+uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
+ DescriptorConstIter end = ArangeDescriptors.end();
+ DescriptorConstIter pos =
+ std::find_if(ArangeDescriptors.begin(), end, // Range
+ DescriptorContainsAddress(address)); // Predicate
+ if (pos != end)
+ return HeaderData.CuOffset;
+
+ return -1U;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h
new file mode 100644
index 000000000000..d76867615aa1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -0,0 +1,75 @@
+//===-- DWARFDebugArangeSet.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
+#define LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
+
+#include "llvm/Support/DataExtractor.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugArangeSet {
+public:
+ struct Header {
+ // The total length of the entries for that set, not including the length
+ // field itself.
+ uint32_t Length;
+ // The offset from the beginning of the .debug_info section of the
+ // compilation unit entry referenced by the table.
+ uint32_t CuOffset;
+ // The DWARF version number.
+ uint16_t Version;
+ // The size in bytes of an address on the target architecture. For segmented
+ // addressing, this is the size of the offset portion of the address.
+ uint8_t AddrSize;
+ // The size in bytes of a segment descriptor on the target architecture.
+ // If the target system uses a flat address space, this value is 0.
+ uint8_t SegSize;
+ };
+
+ struct Descriptor {
+ uint64_t Address;
+ uint64_t Length;
+ uint64_t getEndAddress() const { return Address + Length; }
+ };
+
+private:
+ typedef std::vector<Descriptor> DescriptorColl;
+ typedef DescriptorColl::iterator DescriptorIter;
+ typedef DescriptorColl::const_iterator DescriptorConstIter;
+
+ uint32_t Offset;
+ Header HeaderData;
+ DescriptorColl ArangeDescriptors;
+
+public:
+ DWARFDebugArangeSet() { clear(); }
+ void clear();
+ void compact();
+ bool extract(DataExtractor data, uint32_t *offset_ptr);
+ void dump(raw_ostream &OS) const;
+
+ uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
+ uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
+ uint32_t findAddress(uint64_t address) const;
+ uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
+ const struct Header &getHeader() const { return HeaderData; }
+ const Descriptor *getDescriptor(uint32_t i) const {
+ if (i < ArangeDescriptors.size())
+ return &ArangeDescriptors[i];
+ return NULL;
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp
new file mode 100644
index 000000000000..f79862d606f5
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -0,0 +1,230 @@
+//===-- DWARFDebugAranges.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugAranges.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+using namespace llvm;
+
+// Compare function DWARFDebugAranges::Range structures
+static bool RangeLessThan(const DWARFDebugAranges::Range &range1,
+ const DWARFDebugAranges::Range &range2) {
+ return range1.LoPC < range2.LoPC;
+}
+
+namespace {
+ class CountArangeDescriptors {
+ public:
+ CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {}
+ void operator()(const DWARFDebugArangeSet &Set) {
+ Count += Set.getNumDescriptors();
+ }
+ uint32_t &Count;
+ };
+
+ class AddArangeDescriptors {
+ public:
+ AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges,
+ DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets)
+ : RangeCollection(Ranges),
+ CUOffsetCollection(CUOffsets) {}
+ void operator()(const DWARFDebugArangeSet &Set) {
+ DWARFDebugAranges::Range Range;
+ Range.Offset = Set.getCompileUnitDIEOffset();
+ CUOffsetCollection.insert(Range.Offset);
+
+ for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) {
+ const DWARFDebugArangeSet::Descriptor *ArangeDescPtr =
+ Set.getDescriptor(i);
+ Range.LoPC = ArangeDescPtr->Address;
+ Range.Length = ArangeDescPtr->Length;
+
+ // Insert each item in increasing address order so binary searching
+ // can later be done!
+ DWARFDebugAranges::RangeColl::iterator InsertPos =
+ std::lower_bound(RangeCollection.begin(), RangeCollection.end(),
+ Range, RangeLessThan);
+ RangeCollection.insert(InsertPos, Range);
+ }
+
+ }
+ DWARFDebugAranges::RangeColl &RangeCollection;
+ DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection;
+ };
+}
+
+bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
+ if (debug_aranges_data.isValidOffset(0)) {
+ uint32_t offset = 0;
+
+ typedef std::vector<DWARFDebugArangeSet> SetCollection;
+ SetCollection sets;
+
+ DWARFDebugArangeSet set;
+ Range range;
+ while (set.extract(debug_aranges_data, &offset))
+ sets.push_back(set);
+
+ uint32_t count = 0;
+
+ std::for_each(sets.begin(), sets.end(), CountArangeDescriptors(count));
+
+ if (count > 0) {
+ Aranges.reserve(count);
+ AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets);
+ std::for_each(sets.begin(), sets.end(), range_adder);
+ }
+ }
+ return false;
+}
+
+bool DWARFDebugAranges::generate(DWARFContext *ctx) {
+ if (ctx) {
+ const uint32_t num_compile_units = ctx->getNumCompileUnits();
+ for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) {
+ if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) {
+ uint32_t CUOffset = cu->getOffset();
+ if (ParsedCUOffsets.insert(CUOffset).second)
+ cu->buildAddressRangeTable(this, true);
+ }
+ }
+ }
+ sort(true, /* overlap size */ 0);
+ return !isEmpty();
+}
+
+void DWARFDebugAranges::dump(raw_ostream &OS) const {
+ const uint32_t num_ranges = getNumRanges();
+ for (uint32_t i = 0; i < num_ranges; ++i) {
+ const Range &range = Aranges[i];
+ OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+ range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC());
+ }
+}
+
+void DWARFDebugAranges::Range::dump(raw_ostream &OS) const {
+ OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+ Offset, LoPC, HiPC());
+}
+
+void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc,
+ uint64_t high_pc) {
+ if (!Aranges.empty()) {
+ if (Aranges.back().Offset == offset && Aranges.back().HiPC() == low_pc) {
+ Aranges.back().setHiPC(high_pc);
+ return;
+ }
+ }
+ Aranges.push_back(Range(low_pc, high_pc, offset));
+}
+
+void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
+ const size_t orig_arange_size = Aranges.size();
+ // Size of one? If so, no sorting is needed
+ if (orig_arange_size <= 1)
+ return;
+ // Sort our address range entries
+ std::stable_sort(Aranges.begin(), Aranges.end(), RangeLessThan);
+
+ if (!minimize)
+ return;
+
+ // Most address ranges are contiguous from function to function
+ // so our new ranges will likely be smaller. We calculate the size
+ // of the new ranges since although std::vector objects can be resized,
+ // the will never reduce their allocated block size and free any excesss
+ // memory, so we might as well start a brand new collection so it is as
+ // small as possible.
+
+ // First calculate the size of the new minimal arange vector
+ // so we don't have to do a bunch of re-allocations as we
+ // copy the new minimal stuff over to the new collection.
+ size_t minimal_size = 1;
+ for (size_t i = 1; i < orig_arange_size; ++i) {
+ if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i], n))
+ ++minimal_size;
+ }
+
+ // If the sizes are the same, then no consecutive aranges can be
+ // combined, we are done.
+ if (minimal_size == orig_arange_size)
+ return;
+
+ // Else, make a new RangeColl that _only_ contains what we need.
+ RangeColl minimal_aranges;
+ minimal_aranges.resize(minimal_size);
+ uint32_t j = 0;
+ minimal_aranges[j] = Aranges[0];
+ for (size_t i = 1; i < orig_arange_size; ++i) {
+ if(Range::SortedOverlapCheck (minimal_aranges[j], Aranges[i], n)) {
+ minimal_aranges[j].setHiPC (Aranges[i].HiPC());
+ } else {
+ // Only increment j if we aren't merging
+ minimal_aranges[++j] = Aranges[i];
+ }
+ }
+ assert (j+1 == minimal_size);
+
+ // Now swap our new minimal aranges into place. The local
+ // minimal_aranges will then contian the old big collection
+ // which will get freed.
+ minimal_aranges.swap(Aranges);
+}
+
+uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
+ if (!Aranges.empty()) {
+ Range range(address);
+ RangeCollIterator begin = Aranges.begin();
+ RangeCollIterator end = Aranges.end();
+ RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
+
+ if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
+ return pos->Offset;
+ } else if (pos != begin) {
+ --pos;
+ if (pos->LoPC <= address && address < pos->HiPC())
+ return (*pos).Offset;
+ }
+ }
+ return -1U;
+}
+
+bool
+DWARFDebugAranges::allRangesAreContiguous(uint64_t &LoPC, uint64_t &HiPC) const{
+ if (Aranges.empty())
+ return false;
+
+ uint64_t next_addr = 0;
+ RangeCollIterator begin = Aranges.begin();
+ for (RangeCollIterator pos = begin, end = Aranges.end(); pos != end;
+ ++pos) {
+ if (pos != begin && pos->LoPC != next_addr)
+ return false;
+ next_addr = pos->HiPC();
+ }
+ // We checked for empty at the start of function so front() will be valid.
+ LoPC = Aranges.front().LoPC;
+ // We checked for empty at the start of function so back() will be valid.
+ HiPC = Aranges.back().HiPC();
+ return true;
+}
+
+bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const {
+ if (Aranges.empty())
+ return false;
+ // We checked for empty at the start of function so front() will be valid.
+ LoPC = Aranges.front().LoPC;
+ // We checked for empty at the start of function so back() will be valid.
+ HiPC = Aranges.back().HiPC();
+ return true;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h
new file mode 100644
index 000000000000..1509ffad41f1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h
@@ -0,0 +1,104 @@
+//===-- DWARFDebugAranges.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
+#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
+
+#include "DWARFDebugArangeSet.h"
+#include "llvm/ADT/DenseSet.h"
+#include <list>
+
+namespace llvm {
+
+class DWARFContext;
+
+class DWARFDebugAranges {
+public:
+ struct Range {
+ explicit Range(uint64_t lo = -1ULL, uint64_t hi = -1ULL,
+ uint32_t off = -1U)
+ : LoPC(lo), Length(hi-lo), Offset(off) {}
+
+ void clear() {
+ LoPC = -1ULL;
+ Length = 0;
+ Offset = -1U;
+ }
+
+ void setHiPC(uint64_t HiPC) {
+ if (HiPC == -1ULL || HiPC <= LoPC)
+ Length = 0;
+ else
+ Length = HiPC - LoPC;
+ }
+ uint64_t HiPC() const {
+ if (Length)
+ return LoPC + Length;
+ return -1ULL;
+ }
+ bool isValidRange() const { return Length > 0; }
+
+ static bool SortedOverlapCheck(const Range &curr_range,
+ const Range &next_range, uint32_t n) {
+ if (curr_range.Offset != next_range.Offset)
+ return false;
+ return curr_range.HiPC() + n >= next_range.LoPC;
+ }
+
+ bool contains(const Range &range) const {
+ return LoPC <= range.LoPC && range.HiPC() <= HiPC();
+ }
+
+ void dump(raw_ostream &OS) const;
+ uint64_t LoPC; // Start of address range
+ uint32_t Length; // End of address range (not including this address)
+ uint32_t Offset; // Offset of the compile unit or die
+ };
+
+ void clear() {
+ Aranges.clear();
+ ParsedCUOffsets.clear();
+ }
+ bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const;
+ bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const;
+ bool extract(DataExtractor debug_aranges_data);
+ bool generate(DWARFContext *ctx);
+
+ // Use append range multiple times and then call sort
+ void appendRange(uint32_t cu_offset, uint64_t low_pc, uint64_t high_pc);
+ void sort(bool minimize, uint32_t n);
+
+ const Range *rangeAtIndex(uint32_t idx) const {
+ if (idx < Aranges.size())
+ return &Aranges[idx];
+ return NULL;
+ }
+ void dump(raw_ostream &OS) const;
+ uint32_t findAddress(uint64_t address) const;
+ bool isEmpty() const { return Aranges.empty(); }
+ uint32_t getNumRanges() const { return Aranges.size(); }
+
+ uint32_t offsetAtIndex(uint32_t idx) const {
+ if (idx < Aranges.size())
+ return Aranges[idx].Offset;
+ return -1U;
+ }
+
+ typedef std::vector<Range> RangeColl;
+ typedef RangeColl::const_iterator RangeCollIterator;
+ typedef DenseSet<uint32_t> ParsedCUOffsetColl;
+
+private:
+ RangeColl Aranges;
+ ParsedCUOffsetColl ParsedCUOffsets;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp
new file mode 100644
index 000000000000..3efe6a1ebd30
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp
@@ -0,0 +1,391 @@
+//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugFrame.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace dwarf;
+
+
+/// \brief Abstract frame entry defining the common interface concrete
+/// entries implement.
+class llvm::FrameEntry {
+public:
+ enum FrameKind {FK_CIE, FK_FDE};
+ FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length)
+ : Kind(K), Data(D), Offset(Offset), Length(Length) {}
+
+ virtual ~FrameEntry() {
+ }
+
+ FrameKind getKind() const { return Kind; }
+ virtual uint64_t getOffset() const { return Offset; }
+
+ /// \brief Parse and store a sequence of CFI instructions from our data
+ /// stream, starting at *Offset and ending at EndOffset. If everything
+ /// goes well, *Offset should be equal to EndOffset when this method
+ /// returns. Otherwise, an error occurred.
+ virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset);
+
+ /// \brief Dump the entry header to the given output stream.
+ virtual void dumpHeader(raw_ostream &OS) const = 0;
+
+ /// \brief Dump the entry's instructions to the given output stream.
+ virtual void dumpInstructions(raw_ostream &OS) const;
+
+protected:
+ const FrameKind Kind;
+
+ /// \brief The data stream holding the section from which the entry was
+ /// parsed.
+ DataExtractor Data;
+
+ /// \brief Offset of this entry in the section.
+ uint64_t Offset;
+
+ /// \brief Entry length as specified in DWARF.
+ uint64_t Length;
+
+ /// An entry may contain CFI instructions. An instruction consists of an
+ /// opcode and an optional sequence of operands.
+ typedef std::vector<uint64_t> Operands;
+ struct Instruction {
+ Instruction(uint8_t Opcode)
+ : Opcode(Opcode)
+ {}
+
+ uint8_t Opcode;
+ Operands Ops;
+ };
+
+ std::vector<Instruction> Instructions;
+
+ /// Convenience methods to add a new instruction with the given opcode and
+ /// operands to the Instructions vector.
+ void addInstruction(uint8_t Opcode) {
+ Instructions.push_back(Instruction(Opcode));
+ }
+
+ void addInstruction(uint8_t Opcode, uint64_t Operand1) {
+ Instructions.push_back(Instruction(Opcode));
+ Instructions.back().Ops.push_back(Operand1);
+ }
+
+ void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) {
+ Instructions.push_back(Instruction(Opcode));
+ Instructions.back().Ops.push_back(Operand1);
+ Instructions.back().Ops.push_back(Operand2);
+ }
+};
+
+
+// See DWARF standard v3, section 7.23
+const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
+const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
+
+
+void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) {
+ while (*Offset < EndOffset) {
+ uint8_t Opcode = Data.getU8(Offset);
+ // Some instructions have a primary opcode encoded in the top bits.
+ uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK;
+
+ if (Primary) {
+ // If it's a primary opcode, the first operand is encoded in the bottom
+ // bits of the opcode itself.
+ uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
+ switch (Primary) {
+ default: llvm_unreachable("Impossible primary CFI opcode");
+ case DW_CFA_advance_loc:
+ case DW_CFA_restore:
+ addInstruction(Primary, Op1);
+ break;
+ case DW_CFA_offset:
+ addInstruction(Primary, Op1, Data.getULEB128(Offset));
+ break;
+ }
+ } else {
+ // Extended opcode - its value is Opcode itself.
+ switch (Opcode) {
+ default: llvm_unreachable("Invalid extended CFI opcode");
+ case DW_CFA_nop:
+ case DW_CFA_remember_state:
+ case DW_CFA_restore_state:
+ // No operands
+ addInstruction(Opcode);
+ break;
+ case DW_CFA_set_loc:
+ // Operands: Address
+ addInstruction(Opcode, Data.getAddress(Offset));
+ break;
+ case DW_CFA_advance_loc1:
+ // Operands: 1-byte delta
+ addInstruction(Opcode, Data.getU8(Offset));
+ break;
+ case DW_CFA_advance_loc2:
+ // Operands: 2-byte delta
+ addInstruction(Opcode, Data.getU16(Offset));
+ break;
+ case DW_CFA_advance_loc4:
+ // Operands: 4-byte delta
+ addInstruction(Opcode, Data.getU32(Offset));
+ break;
+ case DW_CFA_restore_extended:
+ case DW_CFA_undefined:
+ case DW_CFA_same_value:
+ case DW_CFA_def_cfa_register:
+ case DW_CFA_def_cfa_offset:
+ // Operands: ULEB128
+ addInstruction(Opcode, Data.getULEB128(Offset));
+ break;
+ case DW_CFA_def_cfa_offset_sf:
+ // Operands: SLEB128
+ addInstruction(Opcode, Data.getSLEB128(Offset));
+ break;
+ case DW_CFA_offset_extended:
+ case DW_CFA_register:
+ case DW_CFA_def_cfa:
+ case DW_CFA_val_offset:
+ // Operands: ULEB128, ULEB128
+ addInstruction(Opcode, Data.getULEB128(Offset),
+ Data.getULEB128(Offset));
+ break;
+ case DW_CFA_offset_extended_sf:
+ case DW_CFA_def_cfa_sf:
+ case DW_CFA_val_offset_sf:
+ // Operands: ULEB128, SLEB128
+ addInstruction(Opcode, Data.getULEB128(Offset),
+ Data.getSLEB128(Offset));
+ break;
+ case DW_CFA_def_cfa_expression:
+ case DW_CFA_expression:
+ case DW_CFA_val_expression:
+ // TODO: implement this
+ report_fatal_error("Values with expressions not implemented yet!");
+ }
+ }
+ }
+}
+
+
+void FrameEntry::dumpInstructions(raw_ostream &OS) const {
+ // TODO: at the moment only instruction names are dumped. Expand this to
+ // dump operands as well.
+ for (std::vector<Instruction>::const_iterator I = Instructions.begin(),
+ E = Instructions.end();
+ I != E; ++I) {
+ uint8_t Opcode = I->Opcode;
+ if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
+ Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
+ OS << " " << CallFrameString(Opcode) << ":\n";
+ }
+}
+
+
+namespace {
+/// \brief DWARF Common Information Entry (CIE)
+class CIE : public FrameEntry {
+public:
+ // CIEs (and FDEs) are simply container classes, so the only sensible way to
+ // create them is by providing the full parsed contents in the constructor.
+ CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version,
+ SmallString<8> Augmentation, uint64_t CodeAlignmentFactor,
+ int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister)
+ : FrameEntry(FK_CIE, D, Offset, Length), Version(Version),
+ Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor),
+ DataAlignmentFactor(DataAlignmentFactor),
+ ReturnAddressRegister(ReturnAddressRegister) {}
+
+ ~CIE() {
+ }
+
+ void dumpHeader(raw_ostream &OS) const {
+ OS << format("%08x %08x %08x CIE",
+ (uint32_t)Offset, (uint32_t)Length, DW_CIE_ID)
+ << "\n";
+ OS << format(" Version: %d\n", Version);
+ OS << " Augmentation: \"" << Augmentation << "\"\n";
+ OS << format(" Code alignment factor: %u\n",
+ (uint32_t)CodeAlignmentFactor);
+ OS << format(" Data alignment factor: %d\n",
+ (int32_t)DataAlignmentFactor);
+ OS << format(" Return address column: %d\n",
+ (int32_t)ReturnAddressRegister);
+ OS << "\n";
+ }
+
+ static bool classof(const FrameEntry *FE) {
+ return FE->getKind() == FK_CIE;
+ }
+
+private:
+ /// The following fields are defined in section 6.4.1 of the DWARF standard v3
+ uint8_t Version;
+ SmallString<8> Augmentation;
+ uint64_t CodeAlignmentFactor;
+ int64_t DataAlignmentFactor;
+ uint64_t ReturnAddressRegister;
+};
+
+
+/// \brief DWARF Frame Description Entry (FDE)
+class FDE : public FrameEntry {
+public:
+ // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with
+ // an offset to the CIE (provided by parsing the FDE header). The CIE itself
+ // is obtained lazily once it's actually required.
+ FDE(DataExtractor D, uint64_t Offset, uint64_t Length,
+ int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange)
+ : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset),
+ InitialLocation(InitialLocation), AddressRange(AddressRange),
+ LinkedCIE(NULL) {}
+
+ ~FDE() {
+ }
+
+ void dumpHeader(raw_ostream &OS) const {
+ OS << format("%08x %08x %08x FDE ",
+ (uint32_t)Offset, (uint32_t)Length, (int32_t)LinkedCIEOffset);
+ OS << format("cie=%08x pc=%08x...%08x\n",
+ (int32_t)LinkedCIEOffset,
+ (uint32_t)InitialLocation,
+ (uint32_t)InitialLocation + (uint32_t)AddressRange);
+ if (LinkedCIE) {
+ OS << format("%p\n", LinkedCIE);
+ }
+ }
+
+ static bool classof(const FrameEntry *FE) {
+ return FE->getKind() == FK_FDE;
+ }
+private:
+
+ /// The following fields are defined in section 6.4.1 of the DWARF standard v3
+ uint64_t LinkedCIEOffset;
+ uint64_t InitialLocation;
+ uint64_t AddressRange;
+ CIE *LinkedCIE;
+};
+} // end anonymous namespace
+
+
+DWARFDebugFrame::DWARFDebugFrame() {
+}
+
+
+DWARFDebugFrame::~DWARFDebugFrame() {
+ for (EntryVector::iterator I = Entries.begin(), E = Entries.end();
+ I != E; ++I) {
+ delete *I;
+ }
+}
+
+
+static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
+ uint32_t Offset, int Length) {
+ errs() << "DUMP: ";
+ for (int i = 0; i < Length; ++i) {
+ uint8_t c = Data.getU8(&Offset);
+ errs().write_hex(c); errs() << " ";
+ }
+ errs() << "\n";
+}
+
+
+void DWARFDebugFrame::parse(DataExtractor Data) {
+ uint32_t Offset = 0;
+
+ while (Data.isValidOffset(Offset)) {
+ uint32_t StartOffset = Offset;
+
+ bool IsDWARF64 = false;
+ uint64_t Length = Data.getU32(&Offset);
+ uint64_t Id;
+
+ if (Length == UINT32_MAX) {
+ // DWARF-64 is distinguished by the first 32 bits of the initial length
+ // field being 0xffffffff. Then, the next 64 bits are the actual entry
+ // length.
+ IsDWARF64 = true;
+ Length = Data.getU64(&Offset);
+ }
+
+ // At this point, Offset points to the next field after Length.
+ // Length is the structure size excluding itself. Compute an offset one
+ // past the end of the structure (needed to know how many instructions to
+ // read).
+ // TODO: For honest DWARF64 support, DataExtractor will have to treat
+ // offset_ptr as uint64_t*
+ uint32_t EndStructureOffset = Offset + static_cast<uint32_t>(Length);
+
+ // The Id field's size depends on the DWARF format
+ Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4);
+ bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID);
+
+ FrameEntry *Entry = 0;
+ if (IsCIE) {
+ // Note: this is specifically DWARFv3 CIE header structure. It was
+ // changed in DWARFv4. We currently don't support reading DWARFv4
+ // here because LLVM itself does not emit it (and LLDB doesn't
+ // support it either).
+ uint8_t Version = Data.getU8(&Offset);
+ const char *Augmentation = Data.getCStr(&Offset);
+ uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
+ int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
+ uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
+
+ Entry = new CIE(Data, StartOffset, Length, Version,
+ StringRef(Augmentation), CodeAlignmentFactor,
+ DataAlignmentFactor, ReturnAddressRegister);
+ } else {
+ // FDE
+ uint64_t CIEPointer = Id;
+ uint64_t InitialLocation = Data.getAddress(&Offset);
+ uint64_t AddressRange = Data.getAddress(&Offset);
+
+ Entry = new FDE(Data, StartOffset, Length, CIEPointer,
+ InitialLocation, AddressRange);
+ }
+
+ assert(Entry && "Expected Entry to be populated with CIE or FDE");
+ Entry->parseInstructions(&Offset, EndStructureOffset);
+
+ if (Offset == EndStructureOffset) {
+ // Entry instrucitons parsed successfully.
+ Entries.push_back(Entry);
+ } else {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << format("Parsing entry instructions at %lx failed",
+ Entry->getOffset());
+ report_fatal_error(Str);
+ }
+ }
+}
+
+
+void DWARFDebugFrame::dump(raw_ostream &OS) const {
+ OS << "\n";
+ for (EntryVector::const_iterator I = Entries.begin(), E = Entries.end();
+ I != E; ++I) {
+ FrameEntry *Entry = *I;
+ Entry->dumpHeader(OS);
+ Entry->dumpInstructions(OS);
+ OS << "\n";
+ }
+}
+
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.h b/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.h
new file mode 100644
index 000000000000..48b8d63a5a64
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.h
@@ -0,0 +1,46 @@
+//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
+#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+
+namespace llvm {
+
+class FrameEntry;
+
+
+/// \brief A parsed .debug_frame section
+///
+class DWARFDebugFrame {
+public:
+ DWARFDebugFrame();
+ ~DWARFDebugFrame();
+
+ /// \brief Dump the section data into the given stream.
+ void dump(raw_ostream &OS) const;
+
+ /// \brief Parse the section from raw data.
+ /// data is assumed to be pointing to the beginning of the section.
+ void parse(DataExtractor Data);
+
+private:
+ typedef std::vector<FrameEntry *> EntryVector;
+ EntryVector Entries;
+};
+
+
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
new file mode 100644
index 000000000000..02b15d69043f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -0,0 +1,589 @@
+//===-- DWARFDebugInfoEntry.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugInfoEntry.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "DWARFDebugAbbrev.h"
+#include "DWARFFormValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
+ const DWARFCompileUnit *cu,
+ unsigned recurseDepth,
+ unsigned indent) const {
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ uint32_t offset = Offset;
+
+ if (debug_info_data.isValidOffset(offset)) {
+ uint32_t abbrCode = debug_info_data.getULEB128(&offset);
+
+ OS << format("\n0x%8.8x: ", Offset);
+ if (abbrCode) {
+ if (AbbrevDecl) {
+ const char *tagString = TagString(getTag());
+ if (tagString)
+ OS.indent(indent) << tagString;
+ else
+ OS.indent(indent) << format("DW_TAG_Unknown_%x", getTag());
+ OS << format(" [%u] %c\n", abbrCode,
+ AbbrevDecl->hasChildren() ? '*' : ' ');
+
+ // Dump all data in the DIE for the attributes.
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ for (uint32_t i = 0; i != numAttributes; ++i) {
+ uint16_t attr = AbbrevDecl->getAttrByIndex(i);
+ uint16_t form = AbbrevDecl->getFormByIndex(i);
+ dumpAttribute(OS, cu, &offset, attr, form, indent);
+ }
+
+ const DWARFDebugInfoEntryMinimal *child = getFirstChild();
+ if (recurseDepth > 0 && child) {
+ while (child) {
+ child->dump(OS, cu, recurseDepth-1, indent+2);
+ child = child->getSibling();
+ }
+ }
+ } else {
+ OS << "Abbreviation code not found in 'debug_abbrev' class for code: "
+ << abbrCode << '\n';
+ }
+ } else {
+ OS.indent(indent) << "NULL\n";
+ }
+ }
+}
+
+void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
+ const DWARFCompileUnit *cu,
+ uint32_t* offset_ptr,
+ uint16_t attr,
+ uint16_t form,
+ unsigned indent) const {
+ OS << format("0x%8.8x: ", *offset_ptr);
+ OS.indent(indent+2);
+ const char *attrString = AttributeString(attr);
+ if (attrString)
+ OS << attrString;
+ else
+ OS << format("DW_AT_Unknown_%x", attr);
+ const char *formString = FormEncodingString(form);
+ if (formString)
+ OS << " [" << formString << ']';
+ else
+ OS << format(" [DW_FORM_Unknown_%x]", form);
+
+ DWARFFormValue formValue(form);
+
+ if (!formValue.extractValue(cu->getDebugInfoExtractor(), offset_ptr, cu))
+ return;
+
+ OS << "\t(";
+ formValue.dump(OS, cu);
+ OS << ")\n";
+}
+
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
+ const uint8_t *fixed_form_sizes,
+ uint32_t *offset_ptr) {
+ Offset = *offset_ptr;
+
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
+
+ assert(fixed_form_sizes); // For best performance this should be specified!
+
+ if (abbrCode) {
+ uint32_t offset = *offset_ptr;
+
+ AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
+
+ // Skip all data in the .debug_info for the attributes
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ uint32_t i;
+ uint16_t form;
+ for (i=0; i<numAttributes; ++i) {
+
+ form = AbbrevDecl->getFormByIndex(i);
+
+ // FIXME: Currently we're checking if this is less than the last
+ // entry in the fixed_form_sizes table, but this should be changed
+ // to use dynamic dispatch.
+ const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
+ fixed_form_sizes[form] : 0;
+ if (fixed_skip_size)
+ offset += fixed_skip_size;
+ else {
+ bool form_is_indirect = false;
+ do {
+ form_is_indirect = false;
+ uint32_t form_size = 0;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data bytes
+ // inlined in the .debug_info.
+ case DW_FORM_exprloc:
+ case DW_FORM_block:
+ form_size = debug_info_data.getULEB128(&offset);
+ break;
+ case DW_FORM_block1:
+ form_size = debug_info_data.getU8(&offset);
+ break;
+ case DW_FORM_block2:
+ form_size = debug_info_data.getU16(&offset);
+ break;
+ case DW_FORM_block4:
+ form_size = debug_info_data.getU32(&offset);
+ break;
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(&offset);
+ break;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ form_size = cu->getAddressByteSize();
+ break;
+
+ // 0 sized form.
+ case DW_FORM_flag_present:
+ form_size = 0;
+ break;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ form_size = 1;
+ break;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ form_size = 2;
+ break;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ form_size = 4;
+ break;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8:
+ form_size = 8;
+ break;
+
+ // signed or unsigned LEB 128 values
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_addr_index:
+ debug_info_data.getULEB128(&offset);
+ break;
+
+ case DW_FORM_indirect:
+ form_is_indirect = true;
+ form = debug_info_data.getULEB128(&offset);
+ break;
+
+ // FIXME: 64-bit for DWARF64
+ case DW_FORM_sec_offset:
+ debug_info_data.getU32(offset_ptr);
+ break;
+
+ default:
+ *offset_ptr = Offset;
+ return false;
+ }
+ offset += form_size;
+ } while (form_is_indirect);
+ }
+ }
+ *offset_ptr = offset;
+ return true;
+ } else {
+ AbbrevDecl = NULL;
+ return true; // NULL debug tag entry
+ }
+}
+
+bool
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
+ uint32_t *offset_ptr) {
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
+ const uint8_t cu_addr_size = cu->getAddressByteSize();
+ uint32_t offset = *offset_ptr;
+ if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
+ Offset = offset;
+
+ uint64_t abbrCode = debug_info_data.getULEB128(&offset);
+
+ if (abbrCode) {
+ AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
+
+ if (AbbrevDecl) {
+ uint16_t tag = AbbrevDecl->getTag();
+
+ bool isCompileUnitTag = tag == DW_TAG_compile_unit;
+ if(cu && isCompileUnitTag)
+ const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
+
+ // Skip all data in the .debug_info for the attributes
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ for (uint32_t i = 0; i != numAttributes; ++i) {
+ uint16_t attr = AbbrevDecl->getAttrByIndex(i);
+ uint16_t form = AbbrevDecl->getFormByIndex(i);
+
+ if (isCompileUnitTag &&
+ ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
+ DWARFFormValue form_value(form);
+ if (form_value.extractValue(debug_info_data, &offset, cu)) {
+ if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
+ const_cast<DWARFCompileUnit*>(cu)
+ ->setBaseAddress(form_value.getUnsigned());
+ }
+ } else {
+ bool form_is_indirect = false;
+ do {
+ form_is_indirect = false;
+ register uint32_t form_size = 0;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data
+ // bytes // inlined in the .debug_info
+ case DW_FORM_exprloc:
+ case DW_FORM_block:
+ form_size = debug_info_data.getULEB128(&offset);
+ break;
+ case DW_FORM_block1:
+ form_size = debug_info_data.getU8(&offset);
+ break;
+ case DW_FORM_block2:
+ form_size = debug_info_data.getU16(&offset);
+ break;
+ case DW_FORM_block4:
+ form_size = debug_info_data.getU32(&offset);
+ break;
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(&offset);
+ break;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ form_size = cu_addr_size;
+ break;
+
+ // 0 byte value
+ case DW_FORM_flag_present:
+ form_size = 0;
+ break;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ form_size = 1;
+ break;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ form_size = 2;
+ break;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ form_size = 4;
+ break;
+
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ form_size = 4;
+ break;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8:
+ form_size = 8;
+ break;
+
+ // signed or unsigned LEB 128 values
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_addr_index:
+ debug_info_data.getULEB128(&offset);
+ break;
+
+ case DW_FORM_indirect:
+ form = debug_info_data.getULEB128(&offset);
+ form_is_indirect = true;
+ break;
+
+ // FIXME: 64-bit for DWARF64.
+ case DW_FORM_sec_offset:
+ debug_info_data.getU32(offset_ptr);
+ break;
+
+ default:
+ *offset_ptr = offset;
+ return false;
+ }
+
+ offset += form_size;
+ } while (form_is_indirect);
+ }
+ }
+ *offset_ptr = offset;
+ return true;
+ }
+ } else {
+ AbbrevDecl = NULL;
+ *offset_ptr = offset;
+ return true; // NULL debug tag entry
+ }
+ }
+
+ return false;
+}
+
+bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
+ return getTag() == DW_TAG_subprogram;
+}
+
+bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const {
+ uint32_t Tag = getTag();
+ return Tag == DW_TAG_subprogram ||
+ Tag == DW_TAG_inlined_subroutine;
+}
+
+uint32_t
+DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ DWARFFormValue &form_value,
+ uint32_t *end_attr_offset_ptr)
+ const {
+ if (AbbrevDecl) {
+ uint32_t attr_idx = AbbrevDecl->findAttributeIndex(attr);
+
+ if (attr_idx != -1U) {
+ uint32_t offset = getOffset();
+
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+
+ // Skip the abbreviation code so we are at the data for the attributes
+ debug_info_data.getULEB128(&offset);
+
+ uint32_t idx = 0;
+ while (idx < attr_idx)
+ DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(idx++),
+ debug_info_data, &offset, cu);
+
+ const uint32_t attr_offset = offset;
+ form_value = DWARFFormValue(AbbrevDecl->getFormByIndex(idx));
+ if (form_value.extractValue(debug_info_data, &offset, cu)) {
+ if (end_attr_offset_ptr)
+ *end_attr_offset_ptr = offset;
+ return attr_offset;
+ }
+ }
+ }
+
+ return 0;
+}
+
+const char*
+DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ const char* fail_value)
+ const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value)) {
+ DataExtractor stringExtractor(cu->getStringSection(), false, 0);
+ return form_value.getAsCString(&stringExtractor);
+ }
+ return fail_value;
+}
+
+uint64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ uint64_t fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getUnsigned();
+ return fail_value;
+}
+
+int64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ int64_t fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getSigned();
+ return fail_value;
+}
+
+uint64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsReference(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ uint64_t fail_value)
+ const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getReference(cu);
+ return fail_value;
+}
+
+bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU,
+ uint64_t &LowPC,
+ uint64_t &HighPC) const {
+ HighPC = -1ULL;
+ LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL);
+ if (LowPC != -1ULL)
+ HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL);
+ return (HighPC != -1ULL);
+}
+
+void
+DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU,
+ DWARFDebugAranges *DebugAranges)
+ const {
+ if (AbbrevDecl) {
+ if (isSubprogramDIE()) {
+ uint64_t LowPC, HighPC;
+ if (getLowAndHighPC(CU, LowPC, HighPC)) {
+ DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC);
+ }
+ // FIXME: try to append ranges from .debug_ranges section.
+ }
+
+ const DWARFDebugInfoEntryMinimal *child = getFirstChild();
+ while (child) {
+ child->buildAddressRangeTable(CU, DebugAranges);
+ child = child->getSibling();
+ }
+ }
+}
+
+bool
+DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
+ const DWARFCompileUnit *CU,
+ const uint64_t Address)
+ const {
+ if (isNULL())
+ return false;
+ uint64_t LowPC, HighPC;
+ if (getLowAndHighPC(CU, LowPC, HighPC))
+ return (LowPC <= Address && Address <= HighPC);
+ // Try to get address ranges from .debug_ranges section.
+ uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U);
+ if (RangesOffset != -1U) {
+ DWARFDebugRangeList RangeList;
+ if (CU->extractRangeList(RangesOffset, RangeList))
+ return RangeList.containsAddress(CU->getBaseAddress(), Address);
+ }
+ return false;
+}
+
+const char*
+DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFCompileUnit *CU)
+ const {
+ if (!isSubroutineDIE())
+ return 0;
+ // Try to get mangled name if possible.
+ if (const char *name =
+ getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0))
+ return name;
+ if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0))
+ return name;
+ if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0))
+ return name;
+ // Try to get name from specification DIE.
+ uint32_t spec_ref =
+ getAttributeValueAsReference(CU, DW_AT_specification, -1U);
+ if (spec_ref != -1U) {
+ DWARFDebugInfoEntryMinimal spec_die;
+ if (spec_die.extract(CU, &spec_ref)) {
+ if (const char *name = spec_die.getSubroutineName(CU))
+ return name;
+ }
+ }
+ // Try to get name from abstract origin DIE.
+ uint32_t abs_origin_ref =
+ getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U);
+ if (abs_origin_ref != -1U) {
+ DWARFDebugInfoEntryMinimal abs_origin_die;
+ if (abs_origin_die.extract(CU, &abs_origin_ref)) {
+ if (const char *name = abs_origin_die.getSubroutineName(CU))
+ return name;
+ }
+ }
+ return 0;
+}
+
+void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFCompileUnit *CU,
+ uint32_t &CallFile,
+ uint32_t &CallLine,
+ uint32_t &CallColumn) const {
+ CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0);
+ CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0);
+ CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0);
+}
+
+DWARFDebugInfoEntryMinimal::InlinedChain
+DWARFDebugInfoEntryMinimal::getInlinedChainForAddress(
+ const DWARFCompileUnit *CU,
+ const uint64_t Address)
+ const {
+ DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain;
+ if (isNULL())
+ return InlinedChain;
+ for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) {
+ // Append current DIE to inlined chain only if it has correct tag
+ // (e.g. it is not a lexical block).
+ if (DIE->isSubroutineDIE()) {
+ InlinedChain.push_back(*DIE);
+ }
+ // Try to get child which also contains provided address.
+ const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild();
+ while (Child) {
+ if (Child->addressRangeContainsAddress(CU, Address)) {
+ // Assume there is only one such child.
+ break;
+ }
+ Child = Child->getSibling();
+ }
+ DIE = Child;
+ }
+ // Reverse the obtained chain to make the root of inlined chain last.
+ std::reverse(InlinedChain.begin(), InlinedChain.end());
+ return InlinedChain;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
new file mode 100644
index 000000000000..9c1b2be0a71f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -0,0 +1,176 @@
+//===-- DWARFDebugInfoEntry.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+
+#include "DWARFAbbreviationDeclaration.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class DWARFDebugAranges;
+class DWARFCompileUnit;
+class DWARFContext;
+class DWARFFormValue;
+class DWARFInlinedSubroutineChain;
+
+/// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data.
+class DWARFDebugInfoEntryMinimal {
+ /// Offset within the .debug_info of the start of this entry.
+ uint32_t Offset;
+
+ /// How many to subtract from "this" to get the parent.
+ /// If zero this die has no parent.
+ uint32_t ParentIdx;
+
+ /// How many to add to "this" to get the sibling.
+ uint32_t SiblingIdx;
+
+ const DWARFAbbreviationDeclaration *AbbrevDecl;
+public:
+ DWARFDebugInfoEntryMinimal()
+ : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {}
+
+ void dump(raw_ostream &OS, const DWARFCompileUnit *cu,
+ unsigned recurseDepth, unsigned indent = 0) const;
+ void dumpAttribute(raw_ostream &OS, const DWARFCompileUnit *cu,
+ uint32_t *offset_ptr, uint16_t attr, uint16_t form,
+ unsigned indent = 0) const;
+
+ bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
+ uint32_t *offset_ptr);
+
+ /// Extract a debug info entry for a given compile unit from the
+ /// .debug_info and .debug_abbrev data starting at the given offset.
+ bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+
+ uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
+ bool isNULL() const { return AbbrevDecl == 0; }
+
+ /// Returns true if DIE represents a subprogram (not inlined).
+ bool isSubprogramDIE() const;
+ /// Returns true if DIE represents a subprogram or an inlined
+ /// subroutine.
+ bool isSubroutineDIE() const;
+
+ uint32_t getOffset() const { return Offset; }
+ uint32_t getNumAttributes() const {
+ return !isNULL() ? AbbrevDecl->getNumAttributes() : 0;
+ }
+ bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); }
+
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our parent will be some index behind "this".
+ DWARFDebugInfoEntryMinimal *getParent() {
+ return ParentIdx > 0 ? this - ParentIdx : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getParent() const {
+ return ParentIdx > 0 ? this - ParentIdx : 0;
+ }
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our sibling will be some index after "this".
+ DWARFDebugInfoEntryMinimal *getSibling() {
+ return SiblingIdx > 0 ? this + SiblingIdx : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getSibling() const {
+ return SiblingIdx > 0 ? this + SiblingIdx : 0;
+ }
+ // We know we are kept in a vector of contiguous entries, so we know
+ // we don't need to store our child pointer, if we have a child it will
+ // be the next entry in the list...
+ DWARFDebugInfoEntryMinimal *getFirstChild() {
+ return hasChildren() ? this + 1 : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getFirstChild() const {
+ return hasChildren() ? this + 1 : 0;
+ }
+
+ void setParent(DWARFDebugInfoEntryMinimal *parent) {
+ if (parent) {
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our parent will be some index behind "this".
+ ParentIdx = this - parent;
+ } else
+ ParentIdx = 0;
+ }
+ void setSibling(DWARFDebugInfoEntryMinimal *sibling) {
+ if (sibling) {
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our sibling will be some index after "this".
+ SiblingIdx = sibling - this;
+ sibling->setParent(getParent());
+ } else
+ SiblingIdx = 0;
+ }
+
+ const DWARFAbbreviationDeclaration *getAbbreviationDeclarationPtr() const {
+ return AbbrevDecl;
+ }
+
+ uint32_t getAttributeValue(const DWARFCompileUnit *cu,
+ const uint16_t attr, DWARFFormValue &formValue,
+ uint32_t *end_attr_offset_ptr = 0) const;
+
+ const char* getAttributeValueAsString(const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ const char *fail_value) const;
+
+ uint64_t getAttributeValueAsUnsigned(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ uint64_t fail_value) const;
+
+ uint64_t getAttributeValueAsReference(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ uint64_t fail_value) const;
+
+ int64_t getAttributeValueAsSigned(const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ int64_t fail_value) const;
+
+ /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
+ /// Returns true if both attributes are present.
+ bool getLowAndHighPC(const DWARFCompileUnit *CU,
+ uint64_t &LowPC, uint64_t &HighPC) const;
+
+ void buildAddressRangeTable(const DWARFCompileUnit *CU,
+ DWARFDebugAranges *DebugAranges) const;
+
+ bool addressRangeContainsAddress(const DWARFCompileUnit *CU,
+ const uint64_t Address) const;
+
+ /// If a DIE represents a subprogram (or inlined subroutine),
+ /// returns its mangled name (or short name, if mangled is missing).
+ /// This name may be fetched from specification or abstract origin
+ /// for this subprogram. Returns null if no name is found.
+ const char* getSubroutineName(const DWARFCompileUnit *CU) const;
+
+ /// Retrieves values of DW_AT_call_file, DW_AT_call_line and
+ /// DW_AT_call_column from DIE (or zeroes if they are missing).
+ void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile,
+ uint32_t &CallLine, uint32_t &CallColumn) const;
+
+ /// InlinedChain - represents a chain of inlined_subroutine
+ /// DIEs, (possibly ending with subprogram DIE), all of which are contained
+ /// in some concrete inlined instance tree. Address range for each DIE
+ /// (except the last DIE) in this chain is contained in address
+ /// range for next DIE in the chain.
+ typedef SmallVector<DWARFDebugInfoEntryMinimal, 4> InlinedChain;
+
+ /// Get inlined chain for a given address, rooted at the current DIE.
+ /// Returns empty chain if address is not contained in address range
+ /// of current DIE.
+ InlinedChain getInlinedChainForAddress(const DWARFCompileUnit *CU,
+ const uint64_t Address) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp
new file mode 100644
index 000000000000..192381c6f7c6
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp
@@ -0,0 +1,628 @@
+//===-- DWARFDebugLine.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace dwarf;
+
+void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
+ OS << "Line table prologue:\n"
+ << format(" total_length: 0x%8.8x\n", TotalLength)
+ << format(" version: %u\n", Version)
+ << format("prologue_length: 0x%8.8x\n", PrologueLength)
+ << format("min_inst_length: %u\n", MinInstLength)
+ << format("default_is_stmt: %u\n", DefaultIsStmt)
+ << format(" line_base: %i\n", LineBase)
+ << format(" line_range: %u\n", LineRange)
+ << format(" opcode_base: %u\n", OpcodeBase);
+
+ for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i)
+ OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(i+1),
+ StandardOpcodeLengths[i]);
+
+ if (!IncludeDirectories.empty())
+ for (uint32_t i = 0; i < IncludeDirectories.size(); ++i)
+ OS << format("include_directories[%3u] = '", i+1)
+ << IncludeDirectories[i] << "'\n";
+
+ if (!FileNames.empty()) {
+ OS << " Dir Mod Time File Len File Name\n"
+ << " ---- ---------- ---------- -----------"
+ "----------------\n";
+ for (uint32_t i = 0; i < FileNames.size(); ++i) {
+ const FileNameEntry& fileEntry = FileNames[i];
+ OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx)
+ << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ",
+ fileEntry.ModTime, fileEntry.Length)
+ << fileEntry.Name << '\n';
+ }
+ }
+}
+
+void DWARFDebugLine::Row::postAppend() {
+ BasicBlock = false;
+ PrologueEnd = false;
+ EpilogueBegin = false;
+}
+
+void DWARFDebugLine::Row::reset(bool default_is_stmt) {
+ Address = 0;
+ Line = 1;
+ Column = 0;
+ File = 1;
+ Isa = 0;
+ IsStmt = default_is_stmt;
+ BasicBlock = false;
+ EndSequence = false;
+ PrologueEnd = false;
+ EpilogueBegin = false;
+}
+
+void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
+ OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
+ << format(" %6u %3u ", File, Isa)
+ << (IsStmt ? " is_stmt" : "")
+ << (BasicBlock ? " basic_block" : "")
+ << (PrologueEnd ? " prologue_end" : "")
+ << (EpilogueBegin ? " epilogue_begin" : "")
+ << (EndSequence ? " end_sequence" : "")
+ << '\n';
+}
+
+void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
+ Prologue.dump(OS);
+ OS << '\n';
+
+ if (!Rows.empty()) {
+ OS << "Address Line Column File ISA Flags\n"
+ << "------------------ ------ ------ ------ --- -------------\n";
+ for (std::vector<Row>::const_iterator pos = Rows.begin(),
+ end = Rows.end(); pos != end; ++pos)
+ pos->dump(OS);
+ }
+}
+
+DWARFDebugLine::State::~State() {}
+
+void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
+ if (Sequence::Empty) {
+ // Record the beginning of instruction sequence.
+ Sequence::Empty = false;
+ Sequence::LowPC = Address;
+ Sequence::FirstRowIndex = row;
+ }
+ ++row; // Increase the row number.
+ LineTable::appendRow(*this);
+ if (EndSequence) {
+ // Record the end of instruction sequence.
+ Sequence::HighPC = Address;
+ Sequence::LastRowIndex = row;
+ if (Sequence::isValid())
+ LineTable::appendSequence(*this);
+ Sequence::reset();
+ }
+ Row::postAppend();
+}
+
+void DWARFDebugLine::State::finalize() {
+ row = DoneParsingLineTable;
+ if (!Sequence::Empty) {
+ fprintf(stderr, "warning: last sequence in debug line table is not"
+ "terminated!\n");
+ }
+ // Sort all sequences so that address lookup will work faster.
+ if (!Sequences.empty()) {
+ std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+ // Note: actually, instruction address ranges of sequences should not
+ // overlap (in shared objects and executables). If they do, the address
+ // lookup would still work, though, but result would be ambiguous.
+ // We don't report warning in this case. For example,
+ // sometimes .so compiled from multiple object files contains a few
+ // rudimentary sequences for address ranges [0x0, 0xsomething).
+ }
+}
+
+DWARFDebugLine::DumpingState::~DumpingState() {}
+
+void DWARFDebugLine::DumpingState::finalize() {
+ LineTable::dump(OS);
+}
+
+const DWARFDebugLine::LineTable *
+DWARFDebugLine::getLineTable(uint32_t offset) const {
+ LineTableConstIter pos = LineTableMap.find(offset);
+ if (pos != LineTableMap.end())
+ return &pos->second;
+ return 0;
+}
+
+const DWARFDebugLine::LineTable *
+DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
+ uint32_t offset) {
+ std::pair<LineTableIter, bool> pos =
+ LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable()));
+ if (pos.second) {
+ // Parse and cache the line table for at this offset.
+ State state;
+ if (!parseStatementTable(debug_line_data, RelocMap, &offset, state))
+ return 0;
+ pos.first->second = state;
+ }
+ return &pos.first->second;
+}
+
+bool
+DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
+ uint32_t *offset_ptr, Prologue *prologue) {
+ const uint32_t prologue_offset = *offset_ptr;
+
+ prologue->clear();
+ prologue->TotalLength = debug_line_data.getU32(offset_ptr);
+ prologue->Version = debug_line_data.getU16(offset_ptr);
+ if (prologue->Version != 2)
+ return false;
+
+ prologue->PrologueLength = debug_line_data.getU32(offset_ptr);
+ const uint32_t end_prologue_offset = prologue->PrologueLength + *offset_ptr;
+ prologue->MinInstLength = debug_line_data.getU8(offset_ptr);
+ prologue->DefaultIsStmt = debug_line_data.getU8(offset_ptr);
+ prologue->LineBase = debug_line_data.getU8(offset_ptr);
+ prologue->LineRange = debug_line_data.getU8(offset_ptr);
+ prologue->OpcodeBase = debug_line_data.getU8(offset_ptr);
+
+ prologue->StandardOpcodeLengths.reserve(prologue->OpcodeBase-1);
+ for (uint32_t i = 1; i < prologue->OpcodeBase; ++i) {
+ uint8_t op_len = debug_line_data.getU8(offset_ptr);
+ prologue->StandardOpcodeLengths.push_back(op_len);
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *s = debug_line_data.getCStr(offset_ptr);
+ if (s && s[0])
+ prologue->IncludeDirectories.push_back(s);
+ else
+ break;
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *name = debug_line_data.getCStr(offset_ptr);
+ if (name && name[0]) {
+ FileNameEntry fileEntry;
+ fileEntry.Name = name;
+ fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
+ prologue->FileNames.push_back(fileEntry);
+ } else {
+ break;
+ }
+ }
+
+ if (*offset_ptr != end_prologue_offset) {
+ fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
+ " have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
+ prologue_offset, end_prologue_offset, *offset_ptr);
+ return false;
+ }
+ return true;
+}
+
+bool
+DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
+ const RelocAddrMap *RMap,
+ uint32_t *offset_ptr, State &state) {
+ const uint32_t debug_line_offset = *offset_ptr;
+
+ Prologue *prologue = &state.Prologue;
+
+ if (!parsePrologue(debug_line_data, offset_ptr, prologue)) {
+ // Restore our offset and return false to indicate failure!
+ *offset_ptr = debug_line_offset;
+ return false;
+ }
+
+ const uint32_t end_offset = debug_line_offset + prologue->TotalLength +
+ sizeof(prologue->TotalLength);
+
+ state.reset();
+
+ while (*offset_ptr < end_offset) {
+ uint8_t opcode = debug_line_data.getU8(offset_ptr);
+
+ if (opcode == 0) {
+ // Extended Opcodes always start with a zero opcode followed by
+ // a uleb128 length so you can skip ones you don't know about
+ uint32_t ext_offset = *offset_ptr;
+ uint64_t len = debug_line_data.getULEB128(offset_ptr);
+ uint32_t arg_size = len - (*offset_ptr - ext_offset);
+
+ uint8_t sub_opcode = debug_line_data.getU8(offset_ptr);
+ switch (sub_opcode) {
+ case DW_LNE_end_sequence:
+ // Set the end_sequence register of the state machine to true and
+ // append a row to the matrix using the current values of the
+ // state-machine registers. Then reset the registers to the initial
+ // values specified above. Every statement program sequence must end
+ // with a DW_LNE_end_sequence instruction which creates a row whose
+ // address is that of the byte after the last target machine instruction
+ // of the sequence.
+ state.EndSequence = true;
+ state.appendRowToMatrix(*offset_ptr);
+ state.reset();
+ break;
+
+ case DW_LNE_set_address:
+ // Takes a single relocatable address as an operand. The size of the
+ // operand is the size appropriate to hold an address on the target
+ // machine. Set the address register to the value given by the
+ // relocatable address. All of the other statement program opcodes
+ // that affect the address register add a delta to it. This instruction
+ // stores a relocatable value into it instead.
+ {
+ // If this address is in our relocation map, apply the relocation.
+ RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
+ if (AI != RMap->end()) {
+ const std::pair<uint8_t, int64_t> &R = AI->second;
+ state.Address = debug_line_data.getAddress(offset_ptr) + R.second;
+ } else
+ state.Address = debug_line_data.getAddress(offset_ptr);
+ }
+ break;
+
+ case DW_LNE_define_file:
+ // Takes 4 arguments. The first is a null terminated string containing
+ // a source file name. The second is an unsigned LEB128 number
+ // representing the directory index of the directory in which the file
+ // was found. The third is an unsigned LEB128 number representing the
+ // time of last modification of the file. The fourth is an unsigned
+ // LEB128 number representing the length in bytes of the file. The time
+ // and length fields may contain LEB128(0) if the information is not
+ // available.
+ //
+ // The directory index represents an entry in the include_directories
+ // section of the statement program prologue. The index is LEB128(0)
+ // if the file was found in the current directory of the compilation,
+ // LEB128(1) if it was found in the first directory in the
+ // include_directories section, and so on. The directory index is
+ // ignored for file names that represent full path names.
+ //
+ // The files are numbered, starting at 1, in the order in which they
+ // appear; the names in the prologue come before names defined by
+ // the DW_LNE_define_file instruction. These numbers are used in the
+ // the file register of the state machine.
+ {
+ FileNameEntry fileEntry;
+ fileEntry.Name = debug_line_data.getCStr(offset_ptr);
+ fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
+ prologue->FileNames.push_back(fileEntry);
+ }
+ break;
+
+ default:
+ // Length doesn't include the zero opcode byte or the length itself, but
+ // it does include the sub_opcode, so we have to adjust for that below
+ (*offset_ptr) += arg_size;
+ break;
+ }
+ } else if (opcode < prologue->OpcodeBase) {
+ switch (opcode) {
+ // Standard Opcodes
+ case DW_LNS_copy:
+ // Takes no arguments. Append a row to the matrix using the
+ // current values of the state-machine registers. Then set
+ // the basic_block register to false.
+ state.appendRowToMatrix(*offset_ptr);
+ break;
+
+ case DW_LNS_advance_pc:
+ // Takes a single unsigned LEB128 operand, multiplies it by the
+ // min_inst_length field of the prologue, and adds the
+ // result to the address register of the state machine.
+ state.Address += debug_line_data.getULEB128(offset_ptr) *
+ prologue->MinInstLength;
+ break;
+
+ case DW_LNS_advance_line:
+ // Takes a single signed LEB128 operand and adds that value to
+ // the line register of the state machine.
+ state.Line += debug_line_data.getSLEB128(offset_ptr);
+ break;
+
+ case DW_LNS_set_file:
+ // Takes a single unsigned LEB128 operand and stores it in the file
+ // register of the state machine.
+ state.File = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ case DW_LNS_set_column:
+ // Takes a single unsigned LEB128 operand and stores it in the
+ // column register of the state machine.
+ state.Column = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ case DW_LNS_negate_stmt:
+ // Takes no arguments. Set the is_stmt register of the state
+ // machine to the logical negation of its current value.
+ state.IsStmt = !state.IsStmt;
+ break;
+
+ case DW_LNS_set_basic_block:
+ // Takes no arguments. Set the basic_block register of the
+ // state machine to true
+ state.BasicBlock = true;
+ break;
+
+ case DW_LNS_const_add_pc:
+ // Takes no arguments. Add to the address register of the state
+ // machine the address increment value corresponding to special
+ // opcode 255. The motivation for DW_LNS_const_add_pc is this:
+ // when the statement program needs to advance the address by a
+ // small amount, it can use a single special opcode, which occupies
+ // a single byte. When it needs to advance the address by up to
+ // twice the range of the last special opcode, it can use
+ // DW_LNS_const_add_pc followed by a special opcode, for a total
+ // of two bytes. Only if it needs to advance the address by more
+ // than twice that range will it need to use both DW_LNS_advance_pc
+ // and a special opcode, requiring three or more bytes.
+ {
+ uint8_t adjust_opcode = 255 - prologue->OpcodeBase;
+ uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
+ prologue->MinInstLength;
+ state.Address += addr_offset;
+ }
+ break;
+
+ case DW_LNS_fixed_advance_pc:
+ // Takes a single uhalf operand. Add to the address register of
+ // the state machine the value of the (unencoded) operand. This
+ // is the only extended opcode that takes an argument that is not
+ // a variable length number. The motivation for DW_LNS_fixed_advance_pc
+ // is this: existing assemblers cannot emit DW_LNS_advance_pc or
+ // special opcodes because they cannot encode LEB128 numbers or
+ // judge when the computation of a special opcode overflows and
+ // requires the use of DW_LNS_advance_pc. Such assemblers, however,
+ // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
+ state.Address += debug_line_data.getU16(offset_ptr);
+ break;
+
+ case DW_LNS_set_prologue_end:
+ // Takes no arguments. Set the prologue_end register of the
+ // state machine to true
+ state.PrologueEnd = true;
+ break;
+
+ case DW_LNS_set_epilogue_begin:
+ // Takes no arguments. Set the basic_block register of the
+ // state machine to true
+ state.EpilogueBegin = true;
+ break;
+
+ case DW_LNS_set_isa:
+ // Takes a single unsigned LEB128 operand and stores it in the
+ // column register of the state machine.
+ state.Isa = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ default:
+ // Handle any unknown standard opcodes here. We know the lengths
+ // of such opcodes because they are specified in the prologue
+ // as a multiple of LEB128 operands for each opcode.
+ {
+ assert(opcode - 1U < prologue->StandardOpcodeLengths.size());
+ uint8_t opcode_length = prologue->StandardOpcodeLengths[opcode - 1];
+ for (uint8_t i=0; i<opcode_length; ++i)
+ debug_line_data.getULEB128(offset_ptr);
+ }
+ break;
+ }
+ } else {
+ // Special Opcodes
+
+ // A special opcode value is chosen based on the amount that needs
+ // to be added to the line and address registers. The maximum line
+ // increment for a special opcode is the value of the line_base
+ // field in the header, plus the value of the line_range field,
+ // minus 1 (line base + line range - 1). If the desired line
+ // increment is greater than the maximum line increment, a standard
+ // opcode must be used instead of a special opcode. The "address
+ // advance" is calculated by dividing the desired address increment
+ // by the minimum_instruction_length field from the header. The
+ // special opcode is then calculated using the following formula:
+ //
+ // opcode = (desired line increment - line_base) +
+ // (line_range * address advance) + opcode_base
+ //
+ // If the resulting opcode is greater than 255, a standard opcode
+ // must be used instead.
+ //
+ // To decode a special opcode, subtract the opcode_base from the
+ // opcode itself to give the adjusted opcode. The amount to
+ // increment the address register is the result of the adjusted
+ // opcode divided by the line_range multiplied by the
+ // minimum_instruction_length field from the header. That is:
+ //
+ // address increment = (adjusted opcode / line_range) *
+ // minimum_instruction_length
+ //
+ // The amount to increment the line register is the line_base plus
+ // the result of the adjusted opcode modulo the line_range. That is:
+ //
+ // line increment = line_base + (adjusted opcode % line_range)
+
+ uint8_t adjust_opcode = opcode - prologue->OpcodeBase;
+ uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
+ prologue->MinInstLength;
+ int32_t line_offset = prologue->LineBase +
+ (adjust_opcode % prologue->LineRange);
+ state.Line += line_offset;
+ state.Address += addr_offset;
+ state.appendRowToMatrix(*offset_ptr);
+ }
+ }
+
+ state.finalize();
+
+ return end_offset;
+}
+
+uint32_t
+DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+ uint32_t unknown_index = UINT32_MAX;
+ if (Sequences.empty())
+ return unknown_index;
+ // First, find an instruction sequence containing the given address.
+ DWARFDebugLine::Sequence sequence;
+ sequence.LowPC = address;
+ SequenceIter first_seq = Sequences.begin();
+ SequenceIter last_seq = Sequences.end();
+ SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+ DWARFDebugLine::Sequence::orderByLowPC);
+ DWARFDebugLine::Sequence found_seq;
+ if (seq_pos == last_seq) {
+ found_seq = Sequences.back();
+ } else if (seq_pos->LowPC == address) {
+ found_seq = *seq_pos;
+ } else {
+ if (seq_pos == first_seq)
+ return unknown_index;
+ found_seq = *(seq_pos - 1);
+ }
+ if (!found_seq.containsPC(address))
+ return unknown_index;
+ // Search for instruction address in the rows describing the sequence.
+ // Rows are stored in a vector, so we may use arithmetical operations with
+ // iterators.
+ DWARFDebugLine::Row row;
+ row.Address = address;
+ RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
+ RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
+ RowIter row_pos = std::lower_bound(first_row, last_row, row,
+ DWARFDebugLine::Row::orderByAddress);
+ if (row_pos == last_row) {
+ return found_seq.LastRowIndex - 1;
+ }
+ uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
+ if (row_pos->Address > address) {
+ if (row_pos == first_row)
+ return unknown_index;
+ else
+ index--;
+ }
+ return index;
+}
+
+bool
+DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address,
+ uint64_t size,
+ std::vector<uint32_t>& result) const {
+ if (Sequences.empty())
+ return false;
+ uint64_t end_addr = address + size;
+ // First, find an instruction sequence containing the given address.
+ DWARFDebugLine::Sequence sequence;
+ sequence.LowPC = address;
+ SequenceIter first_seq = Sequences.begin();
+ SequenceIter last_seq = Sequences.end();
+ SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+ DWARFDebugLine::Sequence::orderByLowPC);
+ if (seq_pos == last_seq || seq_pos->LowPC != address) {
+ if (seq_pos == first_seq)
+ return false;
+ seq_pos--;
+ }
+ if (!seq_pos->containsPC(address))
+ return false;
+
+ SequenceIter start_pos = seq_pos;
+
+ // Add the rows from the first sequence to the vector, starting with the
+ // index we just calculated
+
+ while (seq_pos != last_seq && seq_pos->LowPC < end_addr) {
+ DWARFDebugLine::Sequence cur_seq = *seq_pos;
+ uint32_t first_row_index;
+ uint32_t last_row_index;
+ if (seq_pos == start_pos) {
+ // For the first sequence, we need to find which row in the sequence is the
+ // first in our range. Rows are stored in a vector, so we may use
+ // arithmetical operations with iterators.
+ DWARFDebugLine::Row row;
+ row.Address = address;
+ RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
+ RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
+ RowIter row_pos = std::upper_bound(first_row, last_row, row,
+ DWARFDebugLine::Row::orderByAddress);
+ // The 'row_pos' iterator references the first row that is greater than
+ // our start address. Unless that's the first row, we want to start at
+ // the row before that.
+ first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row);
+ if (row_pos != first_row)
+ --first_row_index;
+ } else
+ first_row_index = cur_seq.FirstRowIndex;
+
+ // For the last sequence in our range, we need to figure out the last row in
+ // range. For all other sequences we can go to the end of the sequence.
+ if (cur_seq.HighPC > end_addr) {
+ DWARFDebugLine::Row row;
+ row.Address = end_addr;
+ RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
+ RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
+ RowIter row_pos = std::upper_bound(first_row, last_row, row,
+ DWARFDebugLine::Row::orderByAddress);
+ // The 'row_pos' iterator references the first row that is greater than
+ // our end address. The row before that is the last row we want.
+ last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1;
+ } else
+ // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex
+ // isn't a valid index within the current sequence. It's that plus one.
+ last_row_index = cur_seq.LastRowIndex - 1;
+
+ for (uint32_t i = first_row_index; i <= last_row_index; ++i) {
+ result.push_back(i);
+ }
+
+ ++seq_pos;
+ }
+
+ return true;
+}
+
+bool
+DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
+ bool NeedsAbsoluteFilePath,
+ std::string &Result) const {
+ if (FileIndex == 0 || FileIndex > Prologue.FileNames.size())
+ return false;
+ const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+ const char *FileName = Entry.Name;
+ if (!NeedsAbsoluteFilePath ||
+ sys::path::is_absolute(FileName)) {
+ Result = FileName;
+ return true;
+ }
+ SmallString<16> FilePath;
+ uint64_t IncludeDirIndex = Entry.DirIdx;
+ // Be defensive about the contents of Entry.
+ if (IncludeDirIndex > 0 &&
+ IncludeDirIndex <= Prologue.IncludeDirectories.size()) {
+ const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1];
+ sys::path::append(FilePath, IncludeDir);
+ }
+ sys::path::append(FilePath, FileName);
+ Result = FilePath.str();
+ return true;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h
new file mode 100644
index 000000000000..2990756bd7c9
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.h
@@ -0,0 +1,254 @@
+//===-- DWARFDebugLine.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H
+#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
+
+#include "DWARFRelocMap.h"
+#include "llvm/Support/DataExtractor.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugLine {
+public:
+ DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {}
+ struct FileNameEntry {
+ FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {}
+
+ const char *Name;
+ uint64_t DirIdx;
+ uint64_t ModTime;
+ uint64_t Length;
+ };
+
+ struct Prologue {
+ Prologue()
+ : TotalLength(0), Version(0), PrologueLength(0), MinInstLength(0),
+ DefaultIsStmt(0), LineBase(0), LineRange(0), OpcodeBase(0) {}
+
+ // The size in bytes of the statement information for this compilation unit
+ // (not including the total_length field itself).
+ uint32_t TotalLength;
+ // Version identifier for the statement information format.
+ uint16_t Version;
+ // The number of bytes following the prologue_length field to the beginning
+ // of the first byte of the statement program itself.
+ uint32_t PrologueLength;
+ // The size in bytes of the smallest target machine instruction. Statement
+ // program opcodes that alter the address register first multiply their
+ // operands by this value.
+ uint8_t MinInstLength;
+ // The initial value of theis_stmtregister.
+ uint8_t DefaultIsStmt;
+ // This parameter affects the meaning of the special opcodes. See below.
+ int8_t LineBase;
+ // This parameter affects the meaning of the special opcodes. See below.
+ uint8_t LineRange;
+ // The number assigned to the first special opcode.
+ uint8_t OpcodeBase;
+ std::vector<uint8_t> StandardOpcodeLengths;
+ std::vector<const char*> IncludeDirectories;
+ std::vector<FileNameEntry> FileNames;
+
+ // Length of the prologue in bytes.
+ uint32_t getLength() const {
+ return PrologueLength + sizeof(TotalLength) + sizeof(Version) +
+ sizeof(PrologueLength);
+ }
+ // Length of the line table data in bytes (not including the prologue).
+ uint32_t getStatementTableLength() const {
+ return TotalLength + sizeof(TotalLength) - getLength();
+ }
+ int32_t getMaxLineIncrementForSpecialOpcode() const {
+ return LineBase + (int8_t)LineRange - 1;
+ }
+ void dump(raw_ostream &OS) const;
+ void clear() {
+ TotalLength = Version = PrologueLength = 0;
+ MinInstLength = LineBase = LineRange = OpcodeBase = 0;
+ StandardOpcodeLengths.clear();
+ IncludeDirectories.clear();
+ FileNames.clear();
+ }
+ };
+
+ // Standard .debug_line state machine structure.
+ struct Row {
+ Row(bool default_is_stmt = false) { reset(default_is_stmt); }
+ /// Called after a row is appended to the matrix.
+ void postAppend();
+ void reset(bool default_is_stmt);
+ void dump(raw_ostream &OS) const;
+
+ static bool orderByAddress(const Row& LHS, const Row& RHS) {
+ return LHS.Address < RHS.Address;
+ }
+
+ // The program-counter value corresponding to a machine instruction
+ // generated by the compiler.
+ uint64_t Address;
+ // An unsigned integer indicating a source line number. Lines are numbered
+ // beginning at 1. The compiler may emit the value 0 in cases where an
+ // instruction cannot be attributed to any source line.
+ uint32_t Line;
+ // An unsigned integer indicating a column number within a source line.
+ // Columns are numbered beginning at 1. The value 0 is reserved to indicate
+ // that a statement begins at the 'left edge' of the line.
+ uint16_t Column;
+ // An unsigned integer indicating the identity of the source file
+ // corresponding to a machine instruction.
+ uint16_t File;
+ // An unsigned integer whose value encodes the applicable instruction set
+ // architecture for the current instruction.
+ uint8_t Isa;
+ // A boolean indicating that the current instruction is the beginning of a
+ // statement.
+ uint8_t IsStmt:1,
+ // A boolean indicating that the current instruction is the
+ // beginning of a basic block.
+ BasicBlock:1,
+ // A boolean indicating that the current address is that of the
+ // first byte after the end of a sequence of target machine
+ // instructions.
+ EndSequence:1,
+ // A boolean indicating that the current address is one (of possibly
+ // many) where execution should be suspended for an entry breakpoint
+ // of a function.
+ PrologueEnd:1,
+ // A boolean indicating that the current address is one (of possibly
+ // many) where execution should be suspended for an exit breakpoint
+ // of a function.
+ EpilogueBegin:1;
+ };
+
+ // Represents a series of contiguous machine instructions. Line table for each
+ // compilation unit may consist of multiple sequences, which are not
+ // guaranteed to be in the order of ascending instruction address.
+ struct Sequence {
+ // Sequence describes instructions at address range [LowPC, HighPC)
+ // and is described by line table rows [FirstRowIndex, LastRowIndex).
+ uint64_t LowPC;
+ uint64_t HighPC;
+ unsigned FirstRowIndex;
+ unsigned LastRowIndex;
+ bool Empty;
+
+ Sequence() { reset(); }
+ void reset() {
+ LowPC = 0;
+ HighPC = 0;
+ FirstRowIndex = 0;
+ LastRowIndex = 0;
+ Empty = true;
+ }
+ static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
+ return LHS.LowPC < RHS.LowPC;
+ }
+ bool isValid() const {
+ return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
+ }
+ bool containsPC(uint64_t pc) const {
+ return (LowPC <= pc && pc < HighPC);
+ }
+ };
+
+ struct LineTable {
+ void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
+ void appendSequence(const DWARFDebugLine::Sequence &sequence) {
+ Sequences.push_back(sequence);
+ }
+ void clear() {
+ Prologue.clear();
+ Rows.clear();
+ Sequences.clear();
+ }
+
+ // Returns the index of the row with file/line info for a given address,
+ // or -1 if there is no such row.
+ uint32_t lookupAddress(uint64_t address) const;
+
+ bool lookupAddressRange(uint64_t address,
+ uint64_t size,
+ std::vector<uint32_t>& result) const;
+
+ // Extracts filename by its index in filename table in prologue.
+ // Returns true on success.
+ bool getFileNameByIndex(uint64_t FileIndex,
+ bool NeedsAbsoluteFilePath,
+ std::string &Result) const;
+
+ void dump(raw_ostream &OS) const;
+
+ struct Prologue Prologue;
+ typedef std::vector<Row> RowVector;
+ typedef RowVector::const_iterator RowIter;
+ typedef std::vector<Sequence> SequenceVector;
+ typedef SequenceVector::const_iterator SequenceIter;
+ RowVector Rows;
+ SequenceVector Sequences;
+ };
+
+ struct State : public Row, public Sequence, public LineTable {
+ // Special row codes.
+ enum {
+ StartParsingLineTable = 0,
+ DoneParsingLineTable = -1
+ };
+
+ State() : row(StartParsingLineTable) {}
+ virtual ~State();
+
+ virtual void appendRowToMatrix(uint32_t offset);
+ virtual void finalize();
+ virtual void reset() {
+ Row::reset(Prologue.DefaultIsStmt);
+ Sequence::reset();
+ }
+
+ // The row number that starts at zero for the prologue, and increases for
+ // each row added to the matrix.
+ unsigned row;
+ };
+
+ struct DumpingState : public State {
+ DumpingState(raw_ostream &OS) : OS(OS) {}
+ virtual ~DumpingState();
+ virtual void finalize();
+ private:
+ raw_ostream &OS;
+ };
+
+ static bool parsePrologue(DataExtractor debug_line_data, uint32_t *offset_ptr,
+ Prologue *prologue);
+ /// Parse a single line table (prologue and all rows).
+ static bool parseStatementTable(DataExtractor debug_line_data,
+ const RelocAddrMap *RMap,
+ uint32_t *offset_ptr, State &state);
+
+ const LineTable *getLineTable(uint32_t offset) const;
+ const LineTable *getOrParseLineTable(DataExtractor debug_line_data,
+ uint32_t offset);
+
+private:
+ typedef std::map<uint32_t, LineTable> LineTableMapTy;
+ typedef LineTableMapTy::iterator LineTableIter;
+ typedef LineTableMapTy::const_iterator LineTableConstIter;
+
+ const RelocAddrMap *RelocMap;
+ LineTableMapTy LineTableMap;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp
new file mode 100644
index 000000000000..1806beee7285
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp
@@ -0,0 +1,67 @@
+//===-- DWARFDebugRangesList.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugRangeList.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void DWARFDebugRangeList::clear() {
+ Offset = -1U;
+ AddressSize = 0;
+ Entries.clear();
+}
+
+bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) {
+ clear();
+ if (!data.isValidOffset(*offset_ptr))
+ return false;
+ AddressSize = data.getAddressSize();
+ if (AddressSize != 4 && AddressSize != 8)
+ return false;
+ Offset = *offset_ptr;
+ while (true) {
+ RangeListEntry entry;
+ uint32_t prev_offset = *offset_ptr;
+ entry.StartAddress = data.getAddress(offset_ptr);
+ entry.EndAddress = data.getAddress(offset_ptr);
+ // Check that both values were extracted correctly.
+ if (*offset_ptr != prev_offset + 2 * AddressSize) {
+ clear();
+ return false;
+ }
+ if (entry.isEndOfListEntry())
+ break;
+ Entries.push_back(entry);
+ }
+ return true;
+}
+
+void DWARFDebugRangeList::dump(raw_ostream &OS) const {
+ for (int i = 0, n = Entries.size(); i != n; ++i) {
+ const char *format_str = (AddressSize == 4
+ ? "%08x %08" PRIx64 " %08" PRIx64 "\n"
+ : "%08x %016" PRIx64 " %016" PRIx64 "\n");
+ OS << format(format_str, Offset, Entries[i].StartAddress,
+ Entries[i].EndAddress);
+ }
+ OS << format("%08x <End of list>\n", Offset);
+}
+
+bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress,
+ uint64_t Address) const {
+ for (int i = 0, n = Entries.size(); i != n; ++i) {
+ if (Entries[i].isBaseAddressSelectionEntry(AddressSize))
+ BaseAddress = Entries[i].EndAddress;
+ else if (Entries[i].containsAddress(BaseAddress, Address))
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.h b/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.h
new file mode 100644
index 000000000000..4e34a916f4a3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.h
@@ -0,0 +1,78 @@
+//===-- DWARFDebugRangeList.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H
+#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H
+
+#include "llvm/Support/DataExtractor.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugRangeList {
+public:
+ struct RangeListEntry {
+ // A beginning address offset. This address offset has the size of an
+ // address and is relative to the applicable base address of the
+ // compilation unit referencing this range list. It marks the beginning
+ // of an address range.
+ uint64_t StartAddress;
+ // An ending address offset. This address offset again has the size of
+ // an address and is relative to the applicable base address of the
+ // compilation unit referencing this range list. It marks the first
+ // address past the end of the address range. The ending address must
+ // be greater than or equal to the beginning address.
+ uint64_t EndAddress;
+ // The end of any given range list is marked by an end of list entry,
+ // which consists of a 0 for the beginning address offset
+ // and a 0 for the ending address offset.
+ bool isEndOfListEntry() const {
+ return (StartAddress == 0) && (EndAddress == 0);
+ }
+ // A base address selection entry consists of:
+ // 1. The value of the largest representable address offset
+ // (for example, 0xffffffff when the size of an address is 32 bits).
+ // 2. An address, which defines the appropriate base address for
+ // use in interpreting the beginning and ending address offsets of
+ // subsequent entries of the location list.
+ bool isBaseAddressSelectionEntry(uint8_t AddressSize) const {
+ assert(AddressSize == 4 || AddressSize == 8);
+ if (AddressSize == 4)
+ return StartAddress == -1U;
+ else
+ return StartAddress == -1ULL;
+ }
+ bool containsAddress(uint64_t BaseAddress, uint64_t Address) const {
+ return (BaseAddress + StartAddress <= Address) &&
+ (Address < BaseAddress + EndAddress);
+ }
+ };
+
+private:
+ // Offset in .debug_ranges section.
+ uint32_t Offset;
+ uint8_t AddressSize;
+ std::vector<RangeListEntry> Entries;
+
+public:
+ DWARFDebugRangeList() { clear(); }
+ void clear();
+ void dump(raw_ostream &OS) const;
+ bool extract(DataExtractor data, uint32_t *offset_ptr);
+ /// containsAddress - Returns true if range list contains the given
+ /// address. Has to be passed base address of the compile unit that
+ /// references this range list.
+ bool containsAddress(uint64_t BaseAddress, uint64_t Address) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
new file mode 100644
index 000000000000..9f807aac5fd4
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
@@ -0,0 +1,534 @@
+//===-- DWARFFormValue.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFFormValue.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+using namespace llvm;
+using namespace dwarf;
+
+static const uint8_t form_sizes_addr4[] = {
+ 0, // 0x00 unused
+ 4, // 0x01 DW_FORM_addr
+ 0, // 0x02 unused
+ 0, // 0x03 DW_FORM_block2
+ 0, // 0x04 DW_FORM_block4
+ 2, // 0x05 DW_FORM_data2
+ 4, // 0x06 DW_FORM_data4
+ 8, // 0x07 DW_FORM_data8
+ 0, // 0x08 DW_FORM_string
+ 0, // 0x09 DW_FORM_block
+ 0, // 0x0a DW_FORM_block1
+ 1, // 0x0b DW_FORM_data1
+ 1, // 0x0c DW_FORM_flag
+ 0, // 0x0d DW_FORM_sdata
+ 4, // 0x0e DW_FORM_strp
+ 0, // 0x0f DW_FORM_udata
+ 4, // 0x10 DW_FORM_ref_addr
+ 1, // 0x11 DW_FORM_ref1
+ 2, // 0x12 DW_FORM_ref2
+ 4, // 0x13 DW_FORM_ref4
+ 8, // 0x14 DW_FORM_ref8
+ 0, // 0x15 DW_FORM_ref_udata
+ 0, // 0x16 DW_FORM_indirect
+ 4, // 0x17 DW_FORM_sec_offset
+ 0, // 0x18 DW_FORM_exprloc
+ 0, // 0x19 DW_FORM_flag_present
+ 8, // 0x20 DW_FORM_ref_sig8
+};
+
+static const uint8_t form_sizes_addr8[] = {
+ 0, // 0x00 unused
+ 8, // 0x01 DW_FORM_addr
+ 0, // 0x02 unused
+ 0, // 0x03 DW_FORM_block2
+ 0, // 0x04 DW_FORM_block4
+ 2, // 0x05 DW_FORM_data2
+ 4, // 0x06 DW_FORM_data4
+ 8, // 0x07 DW_FORM_data8
+ 0, // 0x08 DW_FORM_string
+ 0, // 0x09 DW_FORM_block
+ 0, // 0x0a DW_FORM_block1
+ 1, // 0x0b DW_FORM_data1
+ 1, // 0x0c DW_FORM_flag
+ 0, // 0x0d DW_FORM_sdata
+ 4, // 0x0e DW_FORM_strp
+ 0, // 0x0f DW_FORM_udata
+ 8, // 0x10 DW_FORM_ref_addr
+ 1, // 0x11 DW_FORM_ref1
+ 2, // 0x12 DW_FORM_ref2
+ 4, // 0x13 DW_FORM_ref4
+ 8, // 0x14 DW_FORM_ref8
+ 0, // 0x15 DW_FORM_ref_udata
+ 0, // 0x16 DW_FORM_indirect
+ 4, // 0x17 DW_FORM_sec_offset
+ 0, // 0x18 DW_FORM_exprloc
+ 0, // 0x19 DW_FORM_flag_present
+ 8, // 0x20 DW_FORM_ref_sig8
+};
+
+const uint8_t *
+DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
+ switch (addr_size) {
+ case 4: return form_sizes_addr4;
+ case 8: return form_sizes_addr8;
+ }
+ return NULL;
+}
+
+bool
+DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu) {
+ bool indirect = false;
+ bool is_block = false;
+ Value.data = NULL;
+ // Read the value for the form into value and follow and DW_FORM_indirect
+ // instances we run into
+ do {
+ indirect = false;
+ switch (Form) {
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr: {
+ RelocAddrMap::const_iterator AI
+ = cu->getRelocMap()->find(*offset_ptr);
+ if (AI != cu->getRelocMap()->end()) {
+ const std::pair<uint8_t, int64_t> &R = AI->second;
+ Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
+ R.second;
+ } else
+ Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+ break;
+ }
+ case DW_FORM_exprloc:
+ case DW_FORM_block:
+ Value.uval = data.getULEB128(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block1:
+ Value.uval = data.getU8(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block2:
+ Value.uval = data.getU16(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block4:
+ Value.uval = data.getU32(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_data1:
+ case DW_FORM_ref1:
+ case DW_FORM_flag:
+ Value.uval = data.getU8(offset_ptr);
+ break;
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ Value.uval = data.getU16(offset_ptr);
+ break;
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ Value.uval = data.getU32(offset_ptr);
+ break;
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ Value.uval = data.getU64(offset_ptr);
+ break;
+ case DW_FORM_sdata:
+ Value.sval = data.getSLEB128(offset_ptr);
+ break;
+ case DW_FORM_strp: {
+ RelocAddrMap::const_iterator AI
+ = cu->getRelocMap()->find(*offset_ptr);
+ if (AI != cu->getRelocMap()->end()) {
+ const std::pair<uint8_t, int64_t> &R = AI->second;
+ Value.uval = data.getU32(offset_ptr) + R.second;
+ } else
+ Value.uval = data.getU32(offset_ptr);
+ break;
+ }
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
+ case DW_FORM_string:
+ Value.cstr = data.getCStr(offset_ptr);
+ // Set the string value to also be the data for inlined cstr form
+ // values only so we can tell the differnence between DW_FORM_string
+ // and DW_FORM_strp form values
+ Value.data = (const uint8_t*)Value.cstr;
+ break;
+ case DW_FORM_indirect:
+ Form = data.getULEB128(offset_ptr);
+ indirect = true;
+ break;
+ case DW_FORM_sec_offset:
+ // FIXME: This is 64-bit for DWARF64.
+ Value.uval = data.getU32(offset_ptr);
+ break;
+ case DW_FORM_flag_present:
+ Value.uval = 1;
+ break;
+ case DW_FORM_ref_sig8:
+ Value.uval = data.getU64(offset_ptr);
+ break;
+ case DW_FORM_GNU_addr_index:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
+ case DW_FORM_GNU_str_index:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
+ default:
+ return false;
+ }
+ } while (indirect);
+
+ if (is_block) {
+ StringRef str = data.getData().substr(*offset_ptr, Value.uval);
+ Value.data = NULL;
+ if (!str.empty()) {
+ Value.data = reinterpret_cast<const uint8_t *>(str.data());
+ *offset_ptr += Value.uval;
+ }
+ }
+
+ return true;
+}
+
+bool
+DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
+ const DWARFCompileUnit *cu) const {
+ return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu);
+}
+
+bool
+DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
+ bool indirect = false;
+ do {
+ indirect = false;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data bytes
+ // inlined in the .debug_info
+ case DW_FORM_exprloc:
+ case DW_FORM_block: {
+ uint64_t size = debug_info_data.getULEB128(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block1: {
+ uint8_t size = debug_info_data.getU8(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block2: {
+ uint16_t size = debug_info_data.getU16(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block4: {
+ uint32_t size = debug_info_data.getU32(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(offset_ptr);
+ return true;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ *offset_ptr += cu->getAddressByteSize();
+ return true;
+
+ // 0 byte values - implied from the form.
+ case DW_FORM_flag_present:
+ return true;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ *offset_ptr += 1;
+ return true;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ *offset_ptr += 2;
+ return true;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ *offset_ptr += 4;
+ return true;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8:
+ *offset_ptr += 8;
+ return true;
+
+ // signed or unsigned LEB 128 values
+ // case DW_FORM_APPLE_db_str:
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_addr_index:
+ debug_info_data.getULEB128(offset_ptr);
+ return true;
+
+ case DW_FORM_indirect:
+ indirect = true;
+ form = debug_info_data.getULEB128(offset_ptr);
+ break;
+
+ // FIXME: 4 for DWARF32, 8 for DWARF64.
+ case DW_FORM_sec_offset:
+ *offset_ptr += 4;
+ return true;
+
+ default:
+ return false;
+ }
+ } while (indirect);
+ return true;
+}
+
+void
+DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
+ DataExtractor debug_str_data(cu->getStringSection(), true, 0);
+ DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0);
+ uint64_t uvalue = getUnsigned();
+ bool cu_relative_offset = false;
+
+ switch (Form) {
+ case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break;
+ case DW_FORM_GNU_addr_index: {
+ StringRef AddrOffsetSec = cu->getAddrOffsetSection();
+ OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue);
+ if (AddrOffsetSec.size() != 0) {
+ DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize());
+ OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu));
+ } else
+ OS << "<no .debug_addr section>";
+ break;
+ }
+ case DW_FORM_flag_present: OS << "true"; break;
+ case DW_FORM_flag:
+ case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break;
+ case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break;
+ case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break;
+ case DW_FORM_ref_sig8:
+ case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break;
+ case DW_FORM_string:
+ OS << '"';
+ OS.write_escaped(getAsCString(NULL));
+ OS << '"';
+ break;
+ case DW_FORM_exprloc:
+ case DW_FORM_block:
+ case DW_FORM_block1:
+ case DW_FORM_block2:
+ case DW_FORM_block4:
+ if (uvalue > 0) {
+ switch (Form) {
+ case DW_FORM_exprloc:
+ case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break;
+ case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break;
+ case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break;
+ case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break;
+ default: break;
+ }
+
+ const uint8_t* data_ptr = Value.data;
+ if (data_ptr) {
+ // uvalue contains size of block
+ const uint8_t* end_data_ptr = data_ptr + uvalue;
+ while (data_ptr < end_data_ptr) {
+ OS << format("%2.2x ", *data_ptr);
+ ++data_ptr;
+ }
+ }
+ else
+ OS << "NULL";
+ }
+ break;
+
+ case DW_FORM_sdata: OS << getSigned(); break;
+ case DW_FORM_udata: OS << getUnsigned(); break;
+ case DW_FORM_strp: {
+ OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
+ const char* dbg_str = getAsCString(&debug_str_data);
+ if (dbg_str) {
+ OS << '"';
+ OS.write_escaped(dbg_str);
+ OS << '"';
+ }
+ break;
+ }
+ case DW_FORM_GNU_str_index: {
+ OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
+ const char *dbg_str = getIndirectCString(&debug_str_data,
+ &debug_str_offset_data);
+ if (dbg_str) {
+ OS << '"';
+ OS.write_escaped(dbg_str);
+ OS << '"';
+ }
+ break;
+ }
+ case DW_FORM_ref_addr:
+ OS << format("0x%016" PRIx64, uvalue);
+ break;
+ case DW_FORM_ref1:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%2.2x", (uint8_t)uvalue);
+ break;
+ case DW_FORM_ref2:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%4.4x", (uint16_t)uvalue);
+ break;
+ case DW_FORM_ref4:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%4.4x", (uint32_t)uvalue);
+ break;
+ case DW_FORM_ref8:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%8.8" PRIx64, uvalue);
+ break;
+ case DW_FORM_ref_udata:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%" PRIx64, uvalue);
+ break;
+
+ // All DW_FORM_indirect attributes should be resolved prior to calling
+ // this function
+ case DW_FORM_indirect:
+ OS << "DW_FORM_indirect";
+ break;
+
+ // Should be formatted to 64-bit for DWARF64.
+ case DW_FORM_sec_offset:
+ OS << format("0x%08x", (uint32_t)uvalue);
+ break;
+
+ default:
+ OS << format("DW_FORM(0x%4.4x)", Form);
+ break;
+ }
+
+ if (cu_relative_offset)
+ OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0));
+}
+
+const char*
+DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const {
+ if (isInlinedCStr()) {
+ return Value.cstr;
+ } else if (debug_str_data_ptr) {
+ uint32_t offset = Value.uval;
+ return debug_str_data_ptr->getCStr(&offset);
+ }
+ return NULL;
+}
+
+const char*
+DWARFFormValue::getIndirectCString(const DataExtractor *DS,
+ const DataExtractor *DSO) const {
+ if (!DS || !DSO) return NULL;
+
+ uint32_t offset = Value.uval * 4;
+ uint32_t soffset = DSO->getU32(&offset);
+ return DS->getCStr(&soffset);
+}
+
+uint64_t
+DWARFFormValue::getIndirectAddress(const DataExtractor *DA,
+ const DWARFCompileUnit *cu) const {
+ if (!DA) return 0;
+
+ uint32_t offset = Value.uval * cu->getAddressByteSize();
+ return DA->getAddress(&offset);
+}
+
+uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const {
+ uint64_t die_offset = Value.uval;
+ switch (Form) {
+ case DW_FORM_ref1:
+ case DW_FORM_ref2:
+ case DW_FORM_ref4:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_udata:
+ die_offset += (cu ? cu->getOffset() : 0);
+ break;
+ default:
+ break;
+ }
+
+ return die_offset;
+}
+
+bool
+DWARFFormValue::resolveCompileUnitReferences(const DWARFCompileUnit *cu) {
+ switch (Form) {
+ case DW_FORM_ref1:
+ case DW_FORM_ref2:
+ case DW_FORM_ref4:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_udata:
+ Value.uval += cu->getOffset();
+ Form = DW_FORM_ref_addr;
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+const uint8_t *DWARFFormValue::BlockData() const {
+ if (!isInlinedCStr())
+ return Value.data;
+ return NULL;
+}
+
+bool DWARFFormValue::isBlockForm(uint16_t form) {
+ switch (form) {
+ case DW_FORM_exprloc:
+ case DW_FORM_block:
+ case DW_FORM_block1:
+ case DW_FORM_block2:
+ case DW_FORM_block4:
+ return true;
+ }
+ return false;
+}
+
+bool DWARFFormValue::isDataForm(uint16_t form) {
+ switch (form) {
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_data1:
+ case DW_FORM_data2:
+ case DW_FORM_data4:
+ case DW_FORM_data8:
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
new file mode 100644
index 000000000000..b863001e4af8
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
@@ -0,0 +1,82 @@
+//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H
+#define LLVM_DEBUGINFO_DWARFFORMVALUE_H
+
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+class DWARFCompileUnit;
+class raw_ostream;
+
+class DWARFFormValue {
+public:
+ struct ValueType {
+ ValueType() : data(NULL) {
+ uval = 0;
+ }
+
+ union {
+ uint64_t uval;
+ int64_t sval;
+ const char* cstr;
+ };
+ const uint8_t* data;
+ };
+
+ enum {
+ eValueTypeInvalid = 0,
+ eValueTypeUnsigned,
+ eValueTypeSigned,
+ eValueTypeCStr,
+ eValueTypeBlock
+ };
+
+private:
+ uint16_t Form; // Form for this value.
+ ValueType Value; // Contains all data for the form.
+
+public:
+ DWARFFormValue(uint16_t form = 0) : Form(form) {}
+ uint16_t getForm() const { return Form; }
+ const ValueType& value() const { return Value; }
+ void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const;
+ bool extractValue(DataExtractor data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu);
+ bool isInlinedCStr() const {
+ return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr;
+ }
+ const uint8_t *BlockData() const;
+ uint64_t getReference(const DWARFCompileUnit* cu) const;
+
+ /// Resolve any compile unit specific references so that we don't need
+ /// the compile unit at a later time in order to work with the form
+ /// value.
+ bool resolveCompileUnitReferences(const DWARFCompileUnit* cu);
+ uint64_t getUnsigned() const { return Value.uval; }
+ int64_t getSigned() const { return Value.sval; }
+ const char *getAsCString(const DataExtractor *debug_str_data_ptr) const;
+ const char *getIndirectCString(const DataExtractor *,
+ const DataExtractor *) const;
+ uint64_t getIndirectAddress(const DataExtractor *,
+ const DWARFCompileUnit *) const;
+ bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu) const;
+ static bool skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, const DWARFCompileUnit *cu);
+ static bool isBlockForm(uint16_t form);
+ static bool isDataForm(uint16_t form);
+ static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DWARFRelocMap.h b/contrib/llvm/lib/DebugInfo/DWARFRelocMap.h
new file mode 100644
index 000000000000..6929e367b84c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARFRelocMap.h
@@ -0,0 +1,22 @@
+//===-- DWARFRelocMap.h -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H
+#define LLVM_DEBUGINFO_DWARFRELOCMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t> > RelocAddrMap;
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H
+
diff --git a/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h b/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h
new file mode 100644
index 000000000000..314db8bd84c2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h
@@ -0,0 +1,67 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for JITEventListener implementations
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EVENT_LISTENER_COMMON_H
+#define EVENT_LISTENER_COMMON_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+namespace jitprofiling {
+
+class FilenameCache {
+ // Holds the filename of each Scope, so that we can pass a null-terminated
+ // string into oprofile. Use an AssertingVH rather than a ValueMap because we
+ // shouldn't be modifying any MDNodes while this map is alive.
+ DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+ DenseMap<AssertingVH<MDNode>, std::string> Paths;
+
+ public:
+ const char *getFilename(MDNode *Scope) {
+ std::string &Filename = Filenames[Scope];
+ if (Filename.empty()) {
+ DIScope DIScope(Scope);
+ Filename = DIScope.getFilename();
+ }
+ return Filename.c_str();
+ }
+
+ const char *getFullPath(MDNode *Scope) {
+ std::string &P = Paths[Scope];
+ if (P.empty()) {
+ DIScope DIScope(Scope);
+ StringRef DirName = DIScope.getDirectory();
+ StringRef FileName = DIScope.getFilename();
+ SmallString<256> FullPath;
+ if (DirName != "." && DirName != "") {
+ FullPath = DirName;
+ }
+ if (FileName != "") {
+ sys::path::append(FullPath, FileName);
+ }
+ P = FullPath.str();
+ }
+ return P.c_str();
+ }
+};
+
+} // namespace jitprofiling
+
+} // namespace llvm
+
+#endif //EVENT_LISTENER_COMMON_H
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
new file mode 100644
index 000000000000..906a3a3fda7f
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -0,0 +1,1308 @@
+//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interface used by the various execution engine
+// subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
+STATISTIC(NumGlobals , "Number of global vars initialized");
+
+ExecutionEngine *(*ExecutionEngine::JITCtor)(
+ Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+ Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
+ std::string *ErrorStr) = 0;
+
+ExecutionEngine::ExecutionEngine(Module *M)
+ : EEState(*this),
+ LazyFunctionCreator(0),
+ ExceptionTableRegister(0),
+ ExceptionTableDeregister(0) {
+ CompilingLazily = false;
+ GVCompilationDisabled = false;
+ SymbolSearchingDisabled = false;
+ Modules.push_back(M);
+ assert(M && "Module is null?");
+}
+
+ExecutionEngine::~ExecutionEngine() {
+ clearAllGlobalMappings();
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ delete Modules[i];
+}
+
+void ExecutionEngine::DeregisterAllTables() {
+ if (ExceptionTableDeregister) {
+ DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
+ DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
+ for (; it != ite; ++it)
+ ExceptionTableDeregister(it->second);
+ AllExceptionTables.clear();
+ }
+}
+
+namespace {
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
+class GVMemoryBlock : public CallbackVH {
+ GVMemoryBlock(const GlobalVariable *GV)
+ : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
+
+public:
+ /// \brief Returns the address the GlobalVariable should be written into. The
+ /// GVMemoryBlock object prefixes that.
+ static char *Create(const GlobalVariable *GV, const DataLayout& TD) {
+ Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
+ void *RawMemory = ::operator new(
+ DataLayout::RoundUpAlignment(sizeof(GVMemoryBlock),
+ TD.getPreferredAlignment(GV))
+ + GVSize);
+ new(RawMemory) GVMemoryBlock(GV);
+ return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
+ }
+
+ virtual void deleted() {
+ // We allocated with operator new and with some extra memory hanging off the
+ // end, so don't just delete this. I'm not sure if this is actually
+ // required.
+ this->~GVMemoryBlock();
+ ::operator delete(this);
+ }
+};
+} // anonymous namespace
+
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
+ return GVMemoryBlock::Create(GV, *getDataLayout());
+}
+
+bool ExecutionEngine::removeModule(Module *M) {
+ for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+ E = Modules.end(); I != E; ++I) {
+ Module *Found = *I;
+ if (Found == M) {
+ Modules.erase(I);
+ clearGlobalMappingsFromModule(M);
+ return true;
+ }
+ }
+ return false;
+}
+
+Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+ if (Function *F = Modules[i]->getFunction(FnName))
+ return F;
+ }
+ return 0;
+}
+
+
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+ const GlobalValue *ToUnmap) {
+ GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
+ void *OldVal;
+
+ // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+ // GlobalAddressMap.
+ if (I == GlobalAddressMap.end())
+ OldVal = 0;
+ else {
+ OldVal = I->second;
+ GlobalAddressMap.erase(I);
+ }
+
+ GlobalAddressReverseMap.erase(OldVal);
+ return OldVal;
+}
+
+void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
+ << "\' to [" << Addr << "]\n";);
+ void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
+ assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too.
+ if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ AssertingVH<const GlobalValue> &V =
+ EEState.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+}
+
+void ExecutionEngine::clearAllGlobalMappings() {
+ MutexGuard locked(lock);
+
+ EEState.getGlobalAddressMap(locked).clear();
+ EEState.getGlobalAddressReverseMap(locked).clear();
+}
+
+void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
+ MutexGuard locked(lock);
+
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+ EEState.RemoveMapping(locked, FI);
+ for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+ GI != GE; ++GI)
+ EEState.RemoveMapping(locked, GI);
+}
+
+void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ ExecutionEngineState::GlobalAddressMapTy &Map =
+ EEState.getGlobalAddressMap(locked);
+
+ // Deleting from the mapping?
+ if (Addr == 0)
+ return EEState.RemoveMapping(locked, GV);
+
+ void *&CurVal = Map[GV];
+ void *OldVal = CurVal;
+
+ if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
+ EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too.
+ if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ AssertingVH<const GlobalValue> &V =
+ EEState.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+ return OldVal;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+ MutexGuard locked(lock);
+
+ ExecutionEngineState::GlobalAddressMapTy::iterator I =
+ EEState.getGlobalAddressMap(locked).find(GV);
+ return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
+}
+
+const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
+ MutexGuard locked(lock);
+
+ // If we haven't computed the reverse mapping yet, do so first.
+ if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+ for (ExecutionEngineState::GlobalAddressMapTy::iterator
+ I = EEState.getGlobalAddressMap(locked).begin(),
+ E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
+ EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+ I->second, I->first));
+ }
+
+ std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
+ EEState.getGlobalAddressReverseMap(locked).find(Addr);
+ return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+}
+
+namespace {
+class ArgvArray {
+ char *Array;
+ std::vector<char*> Values;
+public:
+ ArgvArray() : Array(NULL) {}
+ ~ArgvArray() { clear(); }
+ void clear() {
+ delete[] Array;
+ Array = NULL;
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ delete[] Values[I];
+ }
+ Values.clear();
+ }
+ /// Turn a vector of strings into a nice argv style array of pointers to null
+ /// terminated strings.
+ void *reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv);
+};
+} // anonymous namespace
+void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv) {
+ clear(); // Free the old contents.
+ unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ Array = new char[(InputArgv.size()+1)*PtrSize];
+
+ DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
+ Type *SBytePtr = Type::getInt8PtrTy(C);
+
+ for (unsigned i = 0; i != InputArgv.size(); ++i) {
+ unsigned Size = InputArgv[i].size()+1;
+ char *Dest = new char[Size];
+ Values.push_back(Dest);
+ DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+
+ std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+ Dest[Size-1] = 0;
+
+ // Endian safe: Array[i] = (PointerTy)Dest;
+ EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
+ SBytePtr);
+ }
+
+ // Null terminate it
+ EE->StoreValueToMemory(PTOGV(0),
+ (GenericValue*)(Array+InputArgv.size()*PtrSize),
+ SBytePtr);
+ return Array;
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+ bool isDtors) {
+ const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+ GlobalVariable *GV = module->getNamedGlobal(Name);
+
+ // If this global has internal linkage, or if it has a use, then it must be
+ // an old-style (llvmgcc3) static ctor with __main linked in and in use. If
+ // this is the case, don't execute any of the global ctors, __main will do
+ // it.
+ if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+ // Should be an array of '{ i32, void ()* }' structs. The first value is
+ // the init priority, which we ignore.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0)
+ return;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (CS == 0) continue;
+
+ Constant *FP = CS->getOperand(1);
+ if (FP->isNullValue())
+ continue; // Found a sentinal value, ignore.
+
+ // Strip off constant expression casts.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+
+ // Execute the ctor/dtor function!
+ if (Function *F = dyn_cast<Function>(FP))
+ runFunction(F, std::vector<GenericValue>());
+
+ // FIXME: It is marginally lame that we just do nothing here if we see an
+ // entry we don't recognize. It might not be unreasonable for the verifier
+ // to not even allow this and just assert here.
+ }
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
+ // Execute global ctors/dtors for each module in the program.
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ runStaticConstructorsDestructors(Modules[i], isDtors);
+}
+
+#ifndef NDEBUG
+/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
+static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
+ unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ for (unsigned i = 0; i < PtrSize; ++i)
+ if (*(i + (uint8_t*)Loc))
+ return false;
+ return true;
+}
+#endif
+
+int ExecutionEngine::runFunctionAsMain(Function *Fn,
+ const std::vector<std::string> &argv,
+ const char * const * envp) {
+ std::vector<GenericValue> GVArgs;
+ GenericValue GVArgc;
+ GVArgc.IntVal = APInt(32, argv.size());
+
+ // Check main() type
+ unsigned NumArgs = Fn->getFunctionType()->getNumParams();
+ FunctionType *FTy = Fn->getFunctionType();
+ Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+
+ // Check the argument types.
+ if (NumArgs > 3)
+ report_fatal_error("Invalid number of arguments of main() supplied");
+ if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+ report_fatal_error("Invalid type for third argument of main() supplied");
+ if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+ report_fatal_error("Invalid type for second argument of main() supplied");
+ if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+ report_fatal_error("Invalid type for first argument of main() supplied");
+ if (!FTy->getReturnType()->isIntegerTy() &&
+ !FTy->getReturnType()->isVoidTy())
+ report_fatal_error("Invalid return type of main() supplied");
+
+ ArgvArray CArgv;
+ ArgvArray CEnv;
+ if (NumArgs) {
+ GVArgs.push_back(GVArgc); // Arg #0 = argc.
+ if (NumArgs > 1) {
+ // Arg #1 = argv.
+ GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv)));
+ assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
+ "argv[0] was null after CreateArgv");
+ if (NumArgs > 2) {
+ std::vector<std::string> EnvVars;
+ for (unsigned i = 0; envp[i]; ++i)
+ EnvVars.push_back(envp[i]);
+ // Arg #2 = envp.
+ GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
+ }
+ }
+ }
+
+ return runFunction(Fn, GVArgs).IntVal.getZExtValue();
+}
+
+ExecutionEngine *ExecutionEngine::create(Module *M,
+ bool ForceInterpreter,
+ std::string *ErrorStr,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode) {
+ EngineBuilder EB = EngineBuilder(M)
+ .setEngineKind(ForceInterpreter
+ ? EngineKind::Interpreter
+ : EngineKind::JIT)
+ .setErrorStr(ErrorStr)
+ .setOptLevel(OptLevel)
+ .setAllocateGVsWithCode(GVsWithCode);
+
+ return EB.create();
+}
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter. This takes ownership
+/// of the module.
+ExecutionEngine *ExecutionEngine::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OL,
+ bool GVsWithCode,
+ Reloc::Model RM,
+ CodeModel::Model CMM) {
+ if (ExecutionEngine::JITCtor == 0) {
+ if (ErrorStr)
+ *ErrorStr = "JIT has not been linked in.";
+ return 0;
+ }
+
+ // Use the defaults for extra parameters. Users can use EngineBuilder to
+ // set them.
+ EngineBuilder EB(M);
+ EB.setEngineKind(EngineKind::JIT);
+ EB.setErrorStr(ErrorStr);
+ EB.setRelocationModel(RM);
+ EB.setCodeModel(CMM);
+ EB.setAllocateGVsWithCode(GVsWithCode);
+ EB.setOptLevel(OL);
+ EB.setJITMemoryManager(JMM);
+
+ // TODO: permit custom TargetOptions here
+ TargetMachine *TM = EB.selectTarget();
+ if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+
+ return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
+}
+
+ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
+ OwningPtr<TargetMachine> TheTM(TM); // Take ownership.
+
+ // Make sure we can resolve symbols in the program as well. The zero arg
+ // to the function tells DynamicLibrary to load the program, not a library.
+ if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+ return 0;
+
+ // If the user specified a memory manager but didn't specify which engine to
+ // create, we assume they only want the JIT, and we fail if they only want
+ // the interpreter.
+ if (JMM) {
+ if (WhichEngine & EngineKind::JIT)
+ WhichEngine = EngineKind::JIT;
+ else {
+ if (ErrorStr)
+ *ErrorStr = "Cannot create an interpreter with a memory manager.";
+ return 0;
+ }
+ }
+
+ // Unless the interpreter was explicitly selected or the JIT is not linked,
+ // try making a JIT.
+ if ((WhichEngine & EngineKind::JIT) && TheTM) {
+ Triple TT(M->getTargetTriple());
+ if (!TM->getTarget().hasJIT()) {
+ errs() << "WARNING: This target JIT is not designed for the host"
+ << " you are running. If bad things happen, please choose"
+ << " a different -march switch.\n";
+ }
+
+ if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
+ AllocateGVsWithCode, TheTM.take());
+ if (EE) return EE;
+ } else if (ExecutionEngine::JITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::JITCtor(M, ErrorStr, JMM,
+ AllocateGVsWithCode, TheTM.take());
+ if (EE) return EE;
+ }
+ }
+
+ // If we can't make a JIT and we didn't request one specifically, try making
+ // an interpreter instead.
+ if (WhichEngine & EngineKind::Interpreter) {
+ if (ExecutionEngine::InterpCtor)
+ return ExecutionEngine::InterpCtor(M, ErrorStr);
+ if (ErrorStr)
+ *ErrorStr = "Interpreter has not been linked in.";
+ return 0;
+ }
+
+ if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 &&
+ ExecutionEngine::MCJITCtor == 0) {
+ if (ErrorStr)
+ *ErrorStr = "JIT has not been linked in.";
+ }
+
+ return 0;
+}
+
+void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
+ if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
+ return getPointerToFunction(F);
+
+ MutexGuard locked(lock);
+ if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+ return P;
+
+ // Global variable might have been added since interpreter started.
+ if (GlobalVariable *GVar =
+ const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
+ EmitGlobalVariable(GVar);
+ else
+ llvm_unreachable("Global hasn't had an address allocated yet!");
+
+ return EEState.getGlobalAddressMap(locked)[GV];
+}
+
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
+GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
+ // If its undefined, return the garbage.
+ if (isa<UndefValue>(C)) {
+ GenericValue Result;
+ switch (C->getType()->getTypeID()) {
+ default:
+ break;
+ case Type::IntegerTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ // Although the value is undefined, we still have to construct an APInt
+ // with the correct bit width.
+ Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
+ break;
+ case Type::VectorTyID:
+ // if the whole vector is 'undef' just reserve memory for the value.
+ const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+ const Type *ElemTy = VTy->getElementType();
+ unsigned int elemNum = VTy->getNumElements();
+ Result.AggregateVal.resize(elemNum);
+ if (ElemTy->isIntegerTy())
+ for (unsigned int i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].IntVal =
+ APInt(ElemTy->getPrimitiveSizeInBits(), 0);
+ break;
+ }
+ return Result;
+ }
+
+ // Otherwise, if the value is a ConstantExpr...
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *Op0 = CE->getOperand(0);
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Compute the index
+ GenericValue Result = getConstantValue(Op0);
+ APInt Offset(TD->getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(*TD, Offset);
+
+ char* tmp = (char*) Result.PointerVal;
+ Result = PTOGV(tmp + Offset.getSExtValue());
+ return Result;
+ }
+ case Instruction::Trunc: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.trunc(BitWidth);
+ return GV;
+ }
+ case Instruction::ZExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.zext(BitWidth);
+ return GV;
+ }
+ case Instruction::SExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.sext(BitWidth);
+ return GV;
+ }
+ case Instruction::FPTrunc: {
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.FloatVal = float(GV.DoubleVal);
+ return GV;
+ }
+ case Instruction::FPExt:{
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.DoubleVal = double(GV.FloatVal);
+ return GV;
+ }
+ case Instruction::UIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType()->isFloatTy())
+ GV.FloatVal = float(GV.IntVal.roundToDouble());
+ else if (CE->getType()->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.roundToDouble();
+ else if (CE->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
+ false,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::SIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType()->isFloatTy())
+ GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
+ else if (CE->getType()->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.signedRoundToDouble();
+ else if (CE->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
+ true,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::FPToUI: // double->APInt conversion handles sign
+ case Instruction::FPToSI: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ if (Op0->getType()->isFloatTy())
+ GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
+ else if (Op0->getType()->isDoubleTy())
+ GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
+ else if (Op0->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat(APFloat::x87DoubleExtended, GV.IntVal);
+ uint64_t v;
+ bool ignored;
+ (void)apf.convertToInteger(&v, BitWidth,
+ CE->getOpcode()==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ GV.IntVal = v; // endian?
+ }
+ return GV;
+ }
+ case Instruction::PtrToInt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getTypeSizeInBits(Op0->getType());
+ assert(PtrWidth <= 64 && "Bad pointer width");
+ GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
+ uint32_t IntWidth = TD->getTypeSizeInBits(CE->getType());
+ GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth);
+ return GV;
+ }
+ case Instruction::IntToPtr: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getType());
+ GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
+ assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
+ GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
+ return GV;
+ }
+ case Instruction::BitCast: {
+ GenericValue GV = getConstantValue(Op0);
+ Type* DestTy = CE->getType();
+ switch (Op0->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid bitcast operand");
+ case Type::IntegerTyID:
+ assert(DestTy->isFloatingPointTy() && "invalid bitcast");
+ if (DestTy->isFloatTy())
+ GV.FloatVal = GV.IntVal.bitsToFloat();
+ else if (DestTy->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.bitsToDouble();
+ break;
+ case Type::FloatTyID:
+ assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
+ GV.IntVal = APInt::floatToBits(GV.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
+ GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
+ break;
+ case Type::PointerTyID:
+ assert(DestTy->isPointerTy() && "Invalid bitcast");
+ break; // getConstantValue(Op0) above already converted it
+ }
+ return GV;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ GenericValue LHS = getConstantValue(Op0);
+ GenericValue RHS = getConstantValue(CE->getOperand(1));
+ GenericValue GV;
+ switch (CE->getOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Bad add type!");
+ case Type::IntegerTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid integer opcode");
+ case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
+ case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
+ case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
+ case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break;
+ case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break;
+ case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break;
+ case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break;
+ case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break;
+ case Instruction::Or: GV.IntVal = LHS.IntVal | RHS.IntVal; break;
+ case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break;
+ }
+ break;
+ case Type::FloatTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid float opcode");
+ case Instruction::FAdd:
+ GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
+ case Instruction::FSub:
+ GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
+ case Instruction::FMul:
+ GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
+ case Instruction::FDiv:
+ GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
+ case Instruction::FRem:
+ GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
+ }
+ break;
+ case Type::DoubleTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid double opcode");
+ case Instruction::FAdd:
+ GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
+ case Instruction::FSub:
+ GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
+ case Instruction::FMul:
+ GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
+ case Instruction::FDiv:
+ GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
+ case Instruction::FRem:
+ GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+ }
+ break;
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ const fltSemantics &Sem = CE->getOperand(0)->getType()->getFltSemantics();
+ APFloat apfLHS = APFloat(Sem, LHS.IntVal);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid long double opcode");
+ case Instruction::FAdd:
+ apfLHS.add(APFloat(Sem, RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FSub:
+ apfLHS.subtract(APFloat(Sem, RHS.IntVal),
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FMul:
+ apfLHS.multiply(APFloat(Sem, RHS.IntVal),
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FDiv:
+ apfLHS.divide(APFloat(Sem, RHS.IntVal),
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FRem:
+ apfLHS.mod(APFloat(Sem, RHS.IntVal),
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ }
+ }
+ break;
+ }
+ return GV;
+ }
+ default:
+ break;
+ }
+
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ConstantExpr not handled: " << *CE;
+ report_fatal_error(OS.str());
+ }
+
+ // Otherwise, we have a simple constant.
+ GenericValue Result;
+ switch (C->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ Result.IntVal = cast <ConstantFP>(C)->getValueAPF().bitcastToAPInt();
+ break;
+ case Type::IntegerTyID:
+ Result.IntVal = cast<ConstantInt>(C)->getValue();
+ break;
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(C))
+ Result.PointerVal = 0;
+ else if (const Function *F = dyn_cast<Function>(C))
+ Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
+ else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+ else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
+ BA->getBasicBlock())));
+ else
+ llvm_unreachable("Unknown constant pointer type!");
+ break;
+ case Type::VectorTyID: {
+ unsigned elemNum;
+ Type* ElemTy;
+ const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+ const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+ const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+ if (CDV) {
+ elemNum = CDV->getNumElements();
+ ElemTy = CDV->getElementType();
+ } else if (CV || CAZ) {
+ VectorType* VTy = dyn_cast<VectorType>(C->getType());
+ elemNum = VTy->getNumElements();
+ ElemTy = VTy->getElementType();
+ } else {
+ llvm_unreachable("Unknown constant vector type!");
+ }
+
+ Result.AggregateVal.resize(elemNum);
+ // Check if vector holds floats.
+ if(ElemTy->isFloatTy()) {
+ if (CAZ) {
+ GenericValue floatZero;
+ floatZero.FloatVal = 0.f;
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ floatZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+ CV->getOperand(i))->getValueAPF().convertToFloat();
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+ break;
+ }
+ // Check if vector holds doubles.
+ if (ElemTy->isDoubleTy()) {
+ if (CAZ) {
+ GenericValue doubleZero;
+ doubleZero.DoubleVal = 0.0;
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ doubleZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+ CV->getOperand(i))->getValueAPF().convertToDouble();
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+ break;
+ }
+ // Check if vector holds integers.
+ if (ElemTy->isIntegerTy()) {
+ if (CAZ) {
+ GenericValue intZero;
+ intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ intZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+ CV->getOperand(i))->getValue();
+ else {
+ Result.AggregateVal[i].IntVal =
+ APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+ }
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].IntVal = APInt(
+ CDV->getElementType()->getPrimitiveSizeInBits(),
+ CDV->getElementAsInteger(i));
+
+ break;
+ }
+ llvm_unreachable("Unknown constant pointer type!");
+ }
+ break;
+
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+ report_fatal_error(OS.str());
+ }
+
+ return Result;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+ unsigned StoreBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+ const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
+
+ if (sys::isLittleEndianHost()) {
+ // Little-endian host - the source is ordered from LSB to MSB. Order the
+ // destination from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, StoreBytes);
+ } else {
+ // Big-endian host - the source is an array of 64 bit words ordered from
+ // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
+ // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+ while (StoreBytes > sizeof(uint64_t)) {
+ StoreBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+ Src += sizeof(uint64_t);
+ }
+
+ memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+ }
+}
+
+void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
+ GenericValue *Ptr, Type *Ty) {
+ const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
+
+ switch (Ty->getTypeID()) {
+ default:
+ dbgs() << "Cannot store value of type " << *Ty << "!\n";
+ break;
+ case Type::IntegerTyID:
+ StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
+ break;
+ case Type::FloatTyID:
+ *((float*)Ptr) = Val.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ *((double*)Ptr) = Val.DoubleVal;
+ break;
+ case Type::X86_FP80TyID:
+ memcpy(Ptr, Val.IntVal.getRawData(), 10);
+ break;
+ case Type::PointerTyID:
+ // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
+ if (StoreBytes != sizeof(PointerTy))
+ memset(&(Ptr->PointerVal), 0, StoreBytes);
+
+ *((PointerTy*)Ptr) = Val.PointerVal;
+ break;
+ case Type::VectorTyID:
+ for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+ if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+ *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+ if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+ *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+ if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+ unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+ StoreIntToMemory(Val.AggregateVal[i].IntVal,
+ (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+ }
+ }
+ break;
+ }
+
+ if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+ // Host and target are different endian - reverse the stored bytes.
+ std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+ uint8_t *Dst = reinterpret_cast<uint8_t *>(
+ const_cast<uint64_t *>(IntVal.getRawData()));
+
+ if (sys::isLittleEndianHost())
+ // Little-endian host - the destination must be ordered from LSB to MSB.
+ // The source is ordered from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, LoadBytes);
+ else {
+ // Big-endian - the destination is an array of 64 bit words ordered from
+ // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
+ // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+ // a word.
+ while (LoadBytes > sizeof(uint64_t)) {
+ LoadBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+ Dst += sizeof(uint64_t);
+ }
+
+ memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+ }
+}
+
+/// FIXME: document
+///
+void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
+ GenericValue *Ptr,
+ Type *Ty) {
+ const unsigned LoadBytes = getDataLayout()->getTypeStoreSize(Ty);
+
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ // An APInt with all words initially zero.
+ Result.IntVal = APInt(cast<IntegerType>(Ty)->getBitWidth(), 0);
+ LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes);
+ break;
+ case Type::FloatTyID:
+ Result.FloatVal = *((float*)Ptr);
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = *((double*)Ptr);
+ break;
+ case Type::PointerTyID:
+ Result.PointerVal = *((PointerTy*)Ptr);
+ break;
+ case Type::X86_FP80TyID: {
+ // This is endian dependent, but it will only work on x86 anyway.
+ // FIXME: Will not trap if loading a signaling NaN.
+ uint64_t y[2];
+ memcpy(y, Ptr, 10);
+ Result.IntVal = APInt(80, y);
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType *VT = cast<VectorType>(Ty);
+ const Type *ElemT = VT->getElementType();
+ const unsigned numElems = VT->getNumElements();
+ if (ElemT->isFloatTy()) {
+ Result.AggregateVal.resize(numElems);
+ for (unsigned i = 0; i < numElems; ++i)
+ Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+ }
+ if (ElemT->isDoubleTy()) {
+ Result.AggregateVal.resize(numElems);
+ for (unsigned i = 0; i < numElems; ++i)
+ Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+ }
+ if (ElemT->isIntegerTy()) {
+ GenericValue intZero;
+ const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+ intZero.IntVal = APInt(elemBitWidth, 0);
+ Result.AggregateVal.resize(numElems, intZero);
+ for (unsigned i = 0; i < numElems; ++i)
+ LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+ (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+ }
+ break;
+ }
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "Cannot load value of type " << *Ty << "!";
+ report_fatal_error(OS.str());
+ }
+}
+
+void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
+ DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
+ DEBUG(Init->dump());
+ if (isa<UndefValue>(Init))
+ return;
+
+ if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+ unsigned ElementSize =
+ getDataLayout()->getTypeAllocSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ }
+
+ if (isa<ConstantAggregateZero>(Init)) {
+ memset(Addr, 0, (size_t)getDataLayout()->getTypeAllocSize(Init->getType()));
+ return;
+ }
+
+ if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+ unsigned ElementSize =
+ getDataLayout()->getTypeAllocSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ }
+
+ if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+ const StructLayout *SL =
+ getDataLayout()->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(Init)) {
+ // CDS is already laid out in host memory order.
+ StringRef Data = CDS->getRawDataValues();
+ memcpy(Addr, Data.data(), Data.size());
+ return;
+ }
+
+ if (Init->getType()->isFirstClassType()) {
+ GenericValue Val = getConstantValue(Init);
+ StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
+ return;
+ }
+
+ DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
+ llvm_unreachable("Unknown constant type to initialize memory with!");
+}
+
+/// EmitGlobals - Emit all of the global variables to memory, storing their
+/// addresses into GlobalAddress. This must make sure to copy the contents of
+/// their initializers into the memory.
+void ExecutionEngine::emitGlobals() {
+ // Loop over all of the global variables in the program, allocating the memory
+ // to hold them. If there is more than one module, do a prepass over globals
+ // to figure out how the different modules should link together.
+ std::map<std::pair<std::string, Type*>,
+ const GlobalValue*> LinkedGlobalsMap;
+
+ if (Modules.size() != 1) {
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m];
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ const GlobalValue *GV = I;
+ if (GV->hasLocalLinkage() || GV->isDeclaration() ||
+ GV->hasAppendingLinkage() || !GV->hasName())
+ continue;// Ignore external globals and globals with internal linkage.
+
+ const GlobalValue *&GVEntry =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+
+ // If this is the first time we've seen this global, it is the canonical
+ // version.
+ if (!GVEntry) {
+ GVEntry = GV;
+ continue;
+ }
+
+ // If the existing global is strong, never replace it.
+ if (GVEntry->hasExternalLinkage() ||
+ GVEntry->hasDLLImportLinkage() ||
+ GVEntry->hasDLLExportLinkage())
+ continue;
+
+ // Otherwise, we know it's linkonce/weak, replace it if this is a strong
+ // symbol. FIXME is this right for common?
+ if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+ GVEntry = GV;
+ }
+ }
+ }
+
+ std::vector<const GlobalValue*> NonCanonicalGlobals;
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m];
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ // In the multi-module case, see what this global maps to.
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+ // If something else is the canonical global, ignore this one.
+ if (GVEntry != &*I) {
+ NonCanonicalGlobals.push_back(I);
+ continue;
+ }
+ }
+ }
+
+ if (!I->isDeclaration()) {
+ addGlobalMapping(I, getMemoryForGV(I));
+ } else {
+ // External variable reference. Try to use the dynamic loader to
+ // get a pointer to it.
+ if (void *SymAddr =
+ sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
+ addGlobalMapping(I, SymAddr);
+ else {
+ report_fatal_error("Could not resolve external global address: "
+ +I->getName());
+ }
+ }
+ }
+
+ // If there are multiple modules, map the non-canonical globals to their
+ // canonical location.
+ if (!NonCanonicalGlobals.empty()) {
+ for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
+ const GlobalValue *GV = NonCanonicalGlobals[i];
+ const GlobalValue *CGV =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+ void *Ptr = getPointerToGlobalIfAvailable(CGV);
+ assert(Ptr && "Canonical global wasn't codegen'd!");
+ addGlobalMapping(GV, Ptr);
+ }
+ }
+
+ // Now that all of the globals are set up in memory, loop through them all
+ // and initialize their contents.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
+ if (GVEntry != &*I) // Not the canonical variable.
+ continue;
+ }
+ EmitGlobalVariable(I);
+ }
+ }
+ }
+}
+
+// EmitGlobalVariable - This method emits the specified global variable to the
+// address specified in GlobalAddresses, or allocates new memory if it's not
+// already in the map.
+void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
+ void *GA = getPointerToGlobalIfAvailable(GV);
+
+ if (GA == 0) {
+ // If it's not already specified, allocate memory for the global.
+ GA = getMemoryForGV(GV);
+ addGlobalMapping(GV, GA);
+ }
+
+ // Don't initialize if it's thread local, let the client do it.
+ if (!GV->isThreadLocal())
+ InitializeMemory(GV->getInitializer(), GA);
+
+ Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)getDataLayout()->getTypeAllocSize(ElTy);
+ NumInitBytes += (unsigned)GVSize;
+ ++NumGlobals;
+}
+
+ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
+ : EE(EE), GlobalAddressMap(this) {
+}
+
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
+ return &EES->EE.lock;
+}
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+ const GlobalValue *Old) {
+ void *OldVal = EES->GlobalAddressMap.lookup(Old);
+ EES->GlobalAddressReverseMap.erase(OldVal);
+}
+
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+ const GlobalValue *,
+ const GlobalValue *) {
+ llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
+ " RAUW on a value it has a global mapping for.");
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
new file mode 100644
index 000000000000..f4e8246476a5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -0,0 +1,252 @@
+//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings for the ExecutionEngine library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+
+using namespace llvm;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+ unsigned long long N,
+ LLVMBool IsSigned) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->PointerVal = P;
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
+ GenericValue *GenVal = new GenericValue();
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ GenVal->FloatVal = N;
+ break;
+ case Type::DoubleTyID:
+ GenVal->DoubleVal = N;
+ break;
+ default:
+ llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+ }
+ return wrap(GenVal);
+}
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
+ return unwrap(GenValRef)->IntVal.getBitWidth();
+}
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
+ LLVMBool IsSigned) {
+ GenericValue *GenVal = unwrap(GenValRef);
+ if (IsSigned)
+ return GenVal->IntVal.getSExtValue();
+ else
+ return GenVal->IntVal.getZExtValue();
+}
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) {
+ return unwrap(GenVal)->PointerVal;
+}
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ return unwrap(GenVal)->FloatVal;
+ case Type::DoubleTyID:
+ return unwrap(GenVal)->DoubleVal;
+ default:
+ llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+ }
+}
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
+ delete unwrap(GenVal);
+}
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleRef M,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::Either)
+ .setErrorStr(&Error);
+ if (ExecutionEngine *EE = builder.create()){
+ *OutEE = wrap(EE);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleRef M,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::Interpreter)
+ .setErrorStr(&Error);
+ if (ExecutionEngine *Interp = builder.create()) {
+ *OutInterp = wrap(Interp);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleRef M,
+ unsigned OptLevel,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::JIT)
+ .setErrorStr(&Error)
+ .setOptLevel((CodeGenOpt::Level)OptLevel);
+ if (ExecutionEngine *JIT = builder.create()) {
+ *OutJIT = wrap(JIT);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateExecutionEngineForModule(OutEE,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateInterpreterForModule(OutInterp,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleProviderRef MP,
+ unsigned OptLevel,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateJITCompilerForModule(OutJIT,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OptLevel, OutError);
+}
+
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
+ delete unwrap(EE);
+}
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(false);
+}
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(true);
+}
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned ArgC, const char * const *ArgV,
+ const char * const *EnvP) {
+ std::vector<std::string> ArgVec;
+ for (unsigned I = 0; I != ArgC; ++I)
+ ArgVec.push_back(ArgV[I]);
+
+ return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
+}
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned NumArgs,
+ LLVMGenericValueRef *Args) {
+ std::vector<GenericValue> ArgVec;
+ ArgVec.reserve(NumArgs);
+ for (unsigned I = 0; I != NumArgs; ++I)
+ ArgVec.push_back(*unwrap(Args[I]));
+
+ GenericValue *Result = new GenericValue();
+ *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
+ return wrap(Result);
+}
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
+ unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
+}
+
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+ unwrap(EE)->addModule(unwrap(M));
+}
+
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
+ /* The module provider is now actually a module. */
+ LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+ LLVMModuleRef *OutMod, char **OutError) {
+ Module *Mod = unwrap(M);
+ unwrap(EE)->removeModule(Mod);
+ *OutMod = wrap(Mod);
+ return 0;
+}
+
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+ LLVMModuleProviderRef MP,
+ LLVMModuleRef *OutMod, char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+ OutError);
+}
+
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+ LLVMValueRef *OutFn) {
+ if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
+ *OutFn = wrap(F);
+ return 0;
+ }
+ return 1;
+}
+
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+ return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
+ return wrap(unwrap(EE)->getDataLayout());
+}
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+ void* Addr) {
+ unwrap(EE)->addGlobalMapping(unwrap<GlobalValue>(Global), Addr);
+}
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+ return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
new file mode 100644
index 000000000000..7dc295fcbf73
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -0,0 +1,302 @@
+//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object to tell Intel(R) VTune(TM)
+// Amplifier XE 2011 about JITted functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/ValueHandle.h"
+#include "EventListenerCommon.h"
+#include "IntelJITEventsWrapper.h"
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class IntelJITEventListener : public JITEventListener {
+ typedef DenseMap<void*, unsigned int> MethodIDMap;
+
+ OwningPtr<IntelJITEventsWrapper> Wrapper;
+ MethodIDMap MethodIDs;
+ FilenameCache Filenames;
+
+ typedef SmallVector<const void *, 64> MethodAddressVector;
+ typedef DenseMap<const void *, MethodAddressVector> ObjectMap;
+
+ ObjectMap LoadedObjectMap;
+
+public:
+ IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
+ Wrapper.reset(libraryWrapper);
+ }
+
+ ~IntelJITEventListener() {
+ }
+
+ virtual void NotifyFunctionEmitted(const Function &F,
+ void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details);
+
+ virtual void NotifyFreeingMachineCode(void *OldPtr);
+
+ virtual void NotifyObjectEmitted(const ObjectImage &Obj);
+
+ virtual void NotifyFreeingObject(const ObjectImage &Obj);
+};
+
+static LineNumberInfo LineStartToIntelJITFormat(
+ uintptr_t StartAddress,
+ uintptr_t Address,
+ DebugLoc Loc) {
+ LineNumberInfo Result;
+
+ Result.Offset = Address - StartAddress;
+ Result.LineNumber = Loc.getLine();
+
+ return Result;
+}
+
+static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
+ uintptr_t Address,
+ DILineInfo Line) {
+ LineNumberInfo Result;
+
+ Result.Offset = Address - StartAddress;
+ Result.LineNumber = Line.getLine();
+
+ return Result;
+}
+
+static iJIT_Method_Load FunctionDescToIntelJITFormat(
+ IntelJITEventsWrapper& Wrapper,
+ const char* FnName,
+ uintptr_t FnStart,
+ size_t FnSize) {
+ iJIT_Method_Load Result;
+ memset(&Result, 0, sizeof(iJIT_Method_Load));
+
+ Result.method_id = Wrapper.iJIT_GetNewMethodID();
+ Result.method_name = const_cast<char*>(FnName);
+ Result.method_load_address = reinterpret_cast<void*>(FnStart);
+ Result.method_size = FnSize;
+
+ Result.class_id = 0;
+ Result.class_file_name = NULL;
+ Result.user_data = NULL;
+ Result.user_data_size = 0;
+ Result.env = iJDE_JittingAPI;
+
+ return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void IntelJITEventListener::NotifyFunctionEmitted(
+ const Function &F, void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details) {
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
+ F.getName().data(),
+ reinterpret_cast<uint64_t>(FnStart),
+ FnSize);
+
+ std::vector<LineNumberInfo> LineInfo;
+
+ if (!Details.LineStarts.empty()) {
+ // Now convert the line number information from the address/DebugLoc
+ // format in Details to the offset/lineno in Intel JIT API format.
+
+ LineInfo.reserve(Details.LineStarts.size() + 1);
+
+ DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+ assert(!FirstLoc.isUnknown()
+ && "LineStarts should not contain unknown DebugLocs");
+
+ MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+ DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+ if (FunctionDI.Verify()) {
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(FirstLocScope));
+
+ LineNumberInfo FirstLine;
+ FirstLine.Offset = 0;
+ FirstLine.LineNumber = FunctionDI.getLineNumber();
+ LineInfo.push_back(FirstLine);
+ }
+
+ for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
+ Details.LineStarts.begin(), E = Details.LineStarts.end();
+ I != E; ++I) {
+ // This implementation ignores the DebugLoc filename because the Intel
+ // JIT API does not support multiple source files associated with a single
+ // JIT function
+ LineInfo.push_back(LineStartToIntelJITFormat(
+ reinterpret_cast<uintptr_t>(FnStart),
+ I->Address,
+ I->Loc));
+
+ // If we have no file name yet for the function, use the filename from
+ // the first instruction that has one
+ if (FunctionMessage.source_file_name == 0) {
+ MDNode *scope = I->Loc.getScope(
+ Details.MF->getFunction()->getContext());
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(scope));
+ }
+ }
+
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ } else {
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ }
+
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[FnStart] = FunctionMessage.method_id;
+}
+
+void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+ MethodIDMap::iterator I = MethodIDs.find(FnStart);
+ if (I != MethodIDs.end()) {
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+ MethodIDs.erase(I);
+ }
+}
+
+void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
+ // Get the address of the object image for use as a unique identifier
+ const void* ObjData = Obj.getData().data();
+ DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile());
+ MethodAddressVector Functions;
+
+ // Use symbol info to iterate functions in the object.
+ error_code ec;
+ for (object::symbol_iterator I = Obj.begin_symbols(),
+ E = Obj.end_symbols();
+ I != E && !ec;
+ I.increment(ec)) {
+ std::vector<LineNumberInfo> LineInfo;
+ std::string SourceFileName;
+
+ object::SymbolRef::Type SymType;
+ if (I->getType(SymType)) continue;
+ if (SymType == object::SymbolRef::ST_Function) {
+ StringRef Name;
+ uint64_t Addr;
+ uint64_t Size;
+ if (I->getName(Name)) continue;
+ if (I->getAddress(Addr)) continue;
+ if (I->getSize(Size)) continue;
+
+ // Record this address in a local vector
+ Functions.push_back((void*)Addr);
+
+ // Build the function loaded notification message
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
+ Name.data(),
+ Addr,
+ Size);
+ if (Context) {
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr,
+ It->first,
+ It->second));
+ }
+ if (LineInfo.size() == 0) {
+ FunctionMessage.source_file_name = 0;
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ } else {
+ SourceFileName = Lines.front().second.getFileName();
+ FunctionMessage.source_file_name = (char *)SourceFileName.c_str();
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ }
+ } else {
+ FunctionMessage.source_file_name = 0;
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ }
+
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+ }
+ }
+
+ // To support object unload notification, we need to keep a list of
+ // registered function addresses for each loaded object. We will
+ // use the MethodIDs map to get the registered ID for each function.
+ LoadedObjectMap[ObjData] = Functions;
+}
+
+void IntelJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) {
+ // Get the address of the object image for use as a unique identifier
+ const void* ObjData = Obj.getData().data();
+
+ // Get the object's function list from LoadedObjectMap
+ ObjectMap::iterator OI = LoadedObjectMap.find(ObjData);
+ if (OI == LoadedObjectMap.end())
+ return;
+ MethodAddressVector& Functions = OI->second;
+
+ // Walk the function list, unregistering each function
+ for (MethodAddressVector::iterator FI = Functions.begin(),
+ FE = Functions.end();
+ FI != FE;
+ ++FI) {
+ void* FnStart = const_cast<void*>(*FI);
+ MethodIDMap::iterator MI = MethodIDs.find(FnStart);
+ if (MI != MethodIDs.end()) {
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+ &MI->second);
+ MethodIDs.erase(MI);
+ }
+ }
+
+ // Erase the object from LoadedObjectMap
+ LoadedObjectMap.erase(OI);
+}
+
+} // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createIntelJITEventListener() {
+ return new IntelJITEventListener(new IntelJITEventsWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createIntelJITEventListener(
+ IntelJITEventsWrapper* TestImpl) {
+ return new IntelJITEventListener(TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
new file mode 100644
index 000000000000..3d9ff5351610
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -0,0 +1,96 @@
+//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper for the Intel JIT Events API. It allows for the
+// implementation of the jitprofiling library to be swapped with an alternative
+// implementation (for testing). To include this file, you must have the
+// jitprofiling.h header available; it is available in Intel(R) VTune(TM)
+// Amplifier XE 2011.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INTEL_JIT_EVENTS_WRAPPER_H
+#define INTEL_JIT_EVENTS_WRAPPER_H
+
+#include "jitprofiling.h"
+
+namespace llvm {
+
+class IntelJITEventsWrapper {
+ // Function pointer types for testing implementation of Intel jitprofiling
+ // library
+ typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*);
+ typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx );
+ typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void);
+ typedef void (*FinalizeThreadPtr)(void);
+ typedef void (*FinalizeProcessPtr)(void);
+ typedef unsigned int (*GetNewMethodIDPtr)(void);
+
+ NotifyEventPtr NotifyEventFunc;
+ RegisterCallbackExPtr RegisterCallbackExFunc;
+ IsProfilingActivePtr IsProfilingActiveFunc;
+ GetNewMethodIDPtr GetNewMethodIDFunc;
+
+public:
+ bool isAmplifierRunning() {
+ return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON;
+ }
+
+ IntelJITEventsWrapper()
+ : NotifyEventFunc(::iJIT_NotifyEvent),
+ RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
+ IsProfilingActiveFunc(::iJIT_IsProfilingActive),
+ GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
+ }
+
+ IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl,
+ RegisterCallbackExPtr RegisterCallbackExImpl,
+ IsProfilingActivePtr IsProfilingActiveImpl,
+ FinalizeThreadPtr FinalizeThreadImpl,
+ FinalizeProcessPtr FinalizeProcessImpl,
+ GetNewMethodIDPtr GetNewMethodIDImpl)
+ : NotifyEventFunc(NotifyEventImpl),
+ RegisterCallbackExFunc(RegisterCallbackExImpl),
+ IsProfilingActiveFunc(IsProfilingActiveImpl),
+ GetNewMethodIDFunc(GetNewMethodIDImpl) {
+ }
+
+ // Sends an event anncouncing that a function has been emitted
+ // return values are event-specific. See Intel documentation for details.
+ int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+ if (!NotifyEventFunc)
+ return -1;
+ return NotifyEventFunc(EventType, EventSpecificData);
+ }
+
+ // Registers a callback function to receive notice of profiling state changes
+ void iJIT_RegisterCallbackEx(void *UserData,
+ iJIT_ModeChangedEx NewModeCallBackFuncEx) {
+ if (RegisterCallbackExFunc)
+ RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx);
+ }
+
+ // Returns the current profiler mode
+ iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) {
+ if (!IsProfilingActiveFunc)
+ return iJIT_NOTHING_RUNNING;
+ return IsProfilingActiveFunc();
+ }
+
+ // Generates a locally unique method ID for use in code registration
+ unsigned int iJIT_GetNewMethodID(void) {
+ if (!GetNewMethodIDFunc)
+ return -1;
+ return GetNewMethodIDFunc();
+ }
+};
+
+} //namespace llvm
+
+#endif //INTEL_JIT_EVENTS_WRAPPER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
new file mode 100644
index 000000000000..1f029fb1c45b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -0,0 +1,454 @@
+/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API internal config.
+ *
+ * NOTE: This file comes in a style different from the rest of LLVM
+ * source base since this is a piece of code shared from Intel(R)
+ * products. Please do not reformat / re-style this code to make
+ * subsequent merges and contributions from the original source base eaiser.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef _ITTNOTIFY_CONFIG_H_
+#define _ITTNOTIFY_CONFIG_H_
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif /* _WIN32 */
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define STDCALL /* not supported on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define STDCALL __attribute__ ((stdcall))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#endif /* __STRICT_ANSI__ */
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifndef ITT_ARCH_IA32
+# define ITT_ARCH_IA32 1
+#endif /* ITT_ARCH_IA32 */
+
+#ifndef ITT_ARCH_IA32E
+# define ITT_ARCH_IA32E 2
+#endif /* ITT_ARCH_IA32E */
+
+#ifndef ITT_ARCH_IA64
+# define ITT_ARCH_IA64 3
+#endif /* ITT_ARCH_IA64 */
+
+#ifndef ITT_ARCH
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define ITT_ARCH ITT_ARCH_IA32E
+# elif defined _M_IA64 || defined __ia64
+# define ITT_ARCH ITT_ARCH_IA64
+# else
+# define ITT_ARCH ITT_ARCH_IA32
+# endif
+#endif
+
+#ifdef __cplusplus
+# define ITT_EXTERN_C extern "C"
+#else
+# define ITT_EXTERN_C /* nothing */
+#endif /* __cplusplus */
+
+#define ITT_TO_STR_AUX(x) #x
+#define ITT_TO_STR(x) ITT_TO_STR_AUX(x)
+
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+ static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+ __itt_build_check_##suffix[0] = 0; \
+} while(0)
+#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix)
+#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__)
+
+#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
+
+/* Replace with snapshot date YYYYMMDD for promotion build. */
+#define API_VERSION_BUILD 20111111
+
+#ifndef API_VERSION_NUM
+#define API_VERSION_NUM 0.0.0
+#endif /* API_VERSION_NUM */
+
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+ " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+
+/* OS communication functions */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+typedef HMODULE lib_t;
+typedef DWORD TIDT;
+typedef CRITICAL_SECTION mutex_t;
+#define MUTEX_INITIALIZER { 0 }
+#define strong_alias(name, aliasname) /* empty for Windows */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <dlfcn.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
+#endif /* _GNU_SOURCE */
+#include <pthread.h>
+typedef void* lib_t;
+typedef pthread_t TIDT;
+typedef pthread_mutex_t mutex_t;
+#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_get_proc(lib, name) GetProcAddress(lib, name)
+#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex)
+#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex)
+#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_load_lib(name) LoadLibraryA(name)
+#define __itt_unload_lib(handle) FreeLibrary(handle)
+#define __itt_system_error() (int)GetLastError()
+#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2)
+#define __itt_fstrlen(s) lstrlenA(s)
+#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l)
+#define __itt_fstrdup(s) _strdup(s)
+#define __itt_thread_id() GetCurrentThreadId()
+#define __itt_thread_yield() SwitchToThread()
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return InterlockedIncrement(ptr);
+}
+#endif /* ITT_SIMPLE_INIT */
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#define __itt_get_proc(lib, name) dlsym(lib, name)
+#define __itt_mutex_init(mutex) {\
+ pthread_mutexattr_t mutex_attr; \
+ int error_code = pthread_mutexattr_init(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \
+ error_code); \
+ error_code = pthread_mutexattr_settype(&mutex_attr, \
+ PTHREAD_MUTEX_RECURSIVE); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+ error_code); \
+ error_code = pthread_mutex_init(mutex, &mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutex_init", \
+ error_code); \
+ error_code = pthread_mutexattr_destroy(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+ error_code); \
+}
+#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex)
+#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_load_lib(name) dlopen(name, RTLD_LAZY)
+#define __itt_unload_lib(handle) dlclose(handle)
+#define __itt_system_error() errno
+#define __itt_fstrcmp(s1, s2) strcmp(s1, s2)
+#define __itt_fstrlen(s) strlen(s)
+#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l)
+#define __itt_fstrdup(s) strdup(s)
+#define __itt_thread_id() pthread_self()
+#define __itt_thread_yield() sched_yield()
+#if ITT_ARCH==ITT_ARCH_IA64
+#ifdef __INTEL_COMPILER
+#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
+#else /* __INTEL_COMPILER */
+/* TODO: Add Support for not Intel compilers for IA64 */
+#endif /* __INTEL_COMPILER */
+#else /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+{
+ long result;
+ __asm__ __volatile__("lock\nxadd %0,%1"
+ : "=r"(result),"=m"(*(long*)ptr)
+ : "0"(addend), "m"(*(long*)ptr)
+ : "memory");
+ return result;
+}
+#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return __TBB_machine_fetchadd4(ptr, 1) + 1L;
+}
+#endif /* ITT_SIMPLE_INIT */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+typedef enum {
+ __itt_collection_normal = 0,
+ __itt_collection_paused = 1
+} __itt_collection_state;
+
+typedef enum {
+ __itt_thread_normal = 0,
+ __itt_thread_ignored = 1
+} __itt_thread_state;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_thread_info
+{
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ TIDT tid;
+ __itt_thread_state state; /*!< Thread state (paused or normal) */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_thread_info* next;
+} __itt_thread_info;
+
+#include "ittnotify_types.h" /* For __itt_group_id definition */
+
+typedef struct ___itt_api_info_20101001
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ __itt_group_id group;
+} __itt_api_info_20101001;
+
+typedef struct ___itt_api_info
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ void* null_func;
+ __itt_group_id group;
+} __itt_api_info;
+
+struct ___itt_domain;
+struct ___itt_string_handle;
+
+typedef struct ___itt_global
+{
+ unsigned char magic[8];
+ unsigned long version_major;
+ unsigned long version_minor;
+ unsigned long version_build;
+ volatile long api_initialized;
+ volatile long mutex_initialized;
+ volatile long atomic_counter;
+ mutex_t mutex;
+ lib_t lib;
+ void* error_handler;
+ const char** dll_path_ptr;
+ __itt_api_info* api_list_ptr;
+ struct ___itt_global* next;
+ /* Joinable structures below */
+ __itt_thread_info* thread_list;
+ struct ___itt_domain* domain_list;
+ struct ___itt_string_handle* string_list;
+ __itt_collection_state state;
+} __itt_global;
+
+#pragma pack(pop)
+
+#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = NULL; \
+ h->nameW = n ? _wcsdup(n) : NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = n ? __itt_fstrdup(n) : NULL; \
+ h->nameW = NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = NULL; \
+ h->strW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = name ? __itt_fstrdup(name) : NULL; \
+ h->strW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#endif /* _ITTNOTIFY_CONFIG_H_ */
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
new file mode 100644
index 000000000000..5df752f66f10
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
@@ -0,0 +1,70 @@
+/*===-- ittnotify_types.h - JIT Profiling API internal types--------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * NOTE: This file comes in a style different from the rest of LLVM
+ * source base since this is a piece of code shared from Intel(R)
+ * products. Please do not reformat / re-style this code to make
+ * subsequent merges and contributions from the original source base eaiser.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef _ITTNOTIFY_TYPES_H_
+#define _ITTNOTIFY_TYPES_H_
+
+typedef enum ___itt_group_id
+{
+ __itt_group_none = 0,
+ __itt_group_legacy = 1<<0,
+ __itt_group_control = 1<<1,
+ __itt_group_thread = 1<<2,
+ __itt_group_mark = 1<<3,
+ __itt_group_sync = 1<<4,
+ __itt_group_fsync = 1<<5,
+ __itt_group_jit = 1<<6,
+ __itt_group_model = 1<<7,
+ __itt_group_splitter_min = 1<<7,
+ __itt_group_counter = 1<<8,
+ __itt_group_frame = 1<<9,
+ __itt_group_stitch = 1<<10,
+ __itt_group_heap = 1<<11,
+ __itt_group_splitter_max = 1<<12,
+ __itt_group_structure = 1<<12,
+ __itt_group_suppress = 1<<13,
+ __itt_group_all = -1
+} __itt_group_id;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_list
+{
+ __itt_group_id id;
+ const char* name;
+} __itt_group_list;
+
+#pragma pack(pop)
+
+#define ITT_GROUP_LIST(varname) \
+ static __itt_group_list varname[] = { \
+ { __itt_group_all, "all" }, \
+ { __itt_group_control, "control" }, \
+ { __itt_group_thread, "thread" }, \
+ { __itt_group_mark, "mark" }, \
+ { __itt_group_sync, "sync" }, \
+ { __itt_group_fsync, "fsync" }, \
+ { __itt_group_jit, "jit" }, \
+ { __itt_group_model, "model" }, \
+ { __itt_group_counter, "counter" }, \
+ { __itt_group_frame, "frame" }, \
+ { __itt_group_stitch, "stitch" }, \
+ { __itt_group_heap, "heap" }, \
+ { __itt_group_structure, "structure" }, \
+ { __itt_group_suppress, "suppress" }, \
+ { __itt_group_none, NULL } \
+ }
+
+#endif /* _ITTNOTIFY_TYPES_H_ */
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
new file mode 100644
index 000000000000..7b507de864cd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -0,0 +1,481 @@
+/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API implementation.
+ *
+ * NOTE: This file comes in a style different from the rest of LLVM
+ * source base since this is a piece of code shared from Intel(R)
+ * products. Please do not reformat / re-style this code to make
+ * subsequent merges and contributions from the original source base eaiser.
+ *
+ *===----------------------------------------------------------------------===*/
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+#pragma optimize("", off)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <pthread.h>
+#include <dlfcn.h>
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "jitprofiling.h"
+
+static const char rcsid[] = "\n@(#) $Revision: 243501 $\n";
+
+#define DLL_ENVIRONMENT_VAR "VS_PROFILER"
+
+#ifndef NEW_DLL_ENVIRONMENT_VAR
+#if ITT_ARCH==ITT_ARCH_IA32
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32"
+#else
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64"
+#endif
+#endif /* NEW_DLL_ENVIRONMENT_VAR */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define DEFAULT_DLLNAME "JitPI.dll"
+HINSTANCE m_libHandle = NULL;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define DEFAULT_DLLNAME "libJitPI.so"
+void* m_libHandle = NULL;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/* default location of JIT profiling agent on Android */
+#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so"
+
+/* the function pointers */
+typedef unsigned int(*TPInitialize)(void);
+static TPInitialize FUNC_Initialize=NULL;
+
+typedef unsigned int(*TPNotify)(unsigned int, void*);
+static TPNotify FUNC_NotifyEvent=NULL;
+
+static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING;
+
+/* end collector dll part. */
+
+/* loadiJIT_Funcs() : this function is called just in the beginning
+ * and is responsible to load the functions from BistroJavaCollector.dll
+ * result:
+ * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1
+ * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0
+ */
+static int loadiJIT_Funcs(void);
+
+/* global representing whether the BistroJavaCollector can't be loaded */
+static int iJIT_DLL_is_missing = 0;
+
+/* Virtual stack - the struct is used as a virtual stack for each thread.
+ * Every thread initializes with a stack of size INIT_TOP_STACK.
+ * Every method entry decreases from the current stack point,
+ * and when a thread stack reaches its top of stack (return from the global
+ * function), the top of stack and the current stack increase. Notice that
+ * when returning from a function the stack pointer is the address of
+ * the function return.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static DWORD threadLocalStorageHandle = 0;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#define INIT_TOP_Stack 10000
+
+typedef struct
+{
+ unsigned int TopStack;
+ unsigned int CurrentStack;
+} ThreadStack, *pThreadStack;
+
+/* end of virtual stack. */
+
+/*
+ * The function for reporting virtual-machine related events to VTune.
+ * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill
+ * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it.
+ * The return value in iJVM_EVENT_TYPE_ENTER_NIDS &&
+ * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure.
+ * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event
+ * it will be -1 if EventSpecificData == 0 otherwise it will be 0.
+*/
+
+ITT_EXTERN_C int JITAPI
+iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData)
+{
+ int ReturnValue;
+
+ /*
+ * This section is for debugging outside of VTune.
+ * It creates the environment variables that indicates call graph mode.
+ * If running outside of VTune remove the remark.
+ *
+ *
+ * static int firstTime = 1;
+ * char DoCallGraph[12] = "DoCallGraph";
+ * if (firstTime)
+ * {
+ * firstTime = 0;
+ * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph);
+ * }
+ *
+ * end of section.
+ */
+
+ /* initialization part - the functions have not been loaded yet. This part
+ * will load the functions, and check if we are in Call Graph mode.
+ * (for special treatment).
+ */
+ if (!FUNC_NotifyEvent)
+ {
+ if (iJIT_DLL_is_missing)
+ return 0;
+
+ /* load the Function from the DLL */
+ if (!loadiJIT_Funcs())
+ return 0;
+
+ /* Call Graph initialization. */
+ }
+
+ /* If the event is method entry/exit, check that in the current mode
+ * VTune is allowed to receive it
+ */
+ if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS ||
+ event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) &&
+ (executionMode != iJIT_CALLGRAPH_ON))
+ {
+ return 0;
+ }
+ /* This section is performed when method enter event occurs.
+ * It updates the virtual stack, or creates it if this is the first
+ * method entry in the thread. The stack pointer is decreased.
+ */
+ if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* initialize the stack. */
+ threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1);
+ threadStack->TopStack = INIT_TOP_Stack;
+ threadStack->CurrentStack = INIT_TOP_Stack;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue(threadLocalStorageHandle,(void*)threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle,(void*)threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* decrease the stack. */
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (threadStack->CurrentStack)--;
+ }
+
+ /* This section is performed when method leave event occurs
+ * It updates the virtual stack.
+ * Increases the stack pointer.
+ * If the stack pointer reached the top (left the global function)
+ * increase the pointer and the top pointer.
+ */
+ if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* Error: first report in this thread is method exit */
+ exit (1);
+ }
+
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ ++(threadStack->CurrentStack) + 1;
+
+ if (((piJIT_Method_NIDS) EventSpecificData)->stack_id
+ > threadStack->TopStack)
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (unsigned int)-1;
+ }
+
+ if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED)
+ {
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 )
+ return 0;
+ }
+
+ ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData);
+
+ return ReturnValue;
+}
+
+/* The new mode call back routine */
+ITT_EXTERN_C void JITAPI
+iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx
+ NewModeCallBackFuncEx)
+{
+ /* is it already missing... or the load of functions from the DLL failed */
+ if (iJIT_DLL_is_missing || !loadiJIT_Funcs())
+ {
+ /* then do not bother with notifications */
+ NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS);
+ /* Error: could not load JIT functions. */
+ return;
+ }
+ /* nothing to do with the callback */
+}
+
+/*
+ * This function allows the user to query in which mode, if at all,
+ *VTune is running
+ */
+ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive()
+{
+ if (!iJIT_DLL_is_missing)
+ {
+ loadiJIT_Funcs();
+ }
+
+ return executionMode;
+}
+
+/* this function loads the collector dll (BistroJavaCollector)
+ * and the relevant functions.
+ * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1
+ * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0
+ */
+static int loadiJIT_Funcs()
+{
+ static int bDllWasLoaded = 0;
+ char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ DWORD dNameLength = 0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if(bDllWasLoaded)
+ {
+ /* dll was already loaded, no need to do it for the second time */
+ return 1;
+ }
+
+ /* Assumes that the DLL will not be found */
+ iJIT_DLL_is_missing = 1;
+ FUNC_NotifyEvent = NULL;
+
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ /* Try to get the dll name from the environment */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryExA(dllName,
+ NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+ }
+ free(dllName);
+ } else {
+ /* Try to use old VS_PROFILER variable */
+ dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryA(dllName);
+ }
+ free(dllName);
+ }
+ }
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dllName = getenv(NEW_DLL_ENVIRONMENT_VAR);
+ if (!dllName)
+ dllName = getenv(DLL_ENVIRONMENT_VAR);
+#ifdef ANDROID
+ if (!dllName)
+ dllName = ANDROID_JIT_AGENT_PATH;
+#endif
+ if (dllName)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = dlopen(dllName, RTLD_LAZY);
+ }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if (!m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ m_libHandle = LoadLibraryA(DEFAULT_DLLNAME);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* if the dll wasn't loaded - exit. */
+ if (!m_libHandle)
+ {
+ iJIT_DLL_is_missing = 1; /* don't try to initialize
+ * JIT agent the second time
+ */
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_NotifyEvent)
+ {
+ FUNC_Initialize = NULL;
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_Initialize)
+ {
+ FUNC_NotifyEvent = NULL;
+ return 0;
+ }
+
+ executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize();
+
+ bDllWasLoaded = 1;
+ iJIT_DLL_is_missing = 0; /* DLL is ok. */
+
+ /*
+ * Call Graph mode: init the thread local storage
+ * (need to store the virtual stack there).
+ */
+ if ( executionMode == iJIT_CALLGRAPH_ON )
+ {
+ /* Allocate a thread local storage slot for the thread "stack" */
+ if (!threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ threadLocalStorageHandle = TlsAlloc();
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_create(&threadLocalStorageHandle, NULL);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ return 1;
+}
+
+/*
+ * This function should be called by the user whenever a thread ends,
+ * to free the thread "virtual stack" storage
+ */
+ITT_EXTERN_C void JITAPI FinalizeThread()
+{
+ if (threadLocalStorageHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (threadStack)
+ {
+ free (threadStack);
+ threadStack = NULL;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue (threadLocalStorageHandle, threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle, threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+ }
+}
+
+/*
+ * This function should be called by the user when the process ends,
+ * to free the local storage index
+*/
+ITT_EXTERN_C void JITAPI FinalizeProcess()
+{
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ if (threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsFree (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_delete(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+}
+
+/*
+ * This function should be called by the user for any method once.
+ * The function will return a unique method ID, the user should maintain
+ * the ID for each method
+ */
+ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID()
+{
+ static unsigned int methodID = 0x100000;
+
+ if (methodID == 0)
+ return 0; /* ERROR : this is not a valid value */
+
+ return methodID++;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
new file mode 100644
index 000000000000..f08e2870dcef
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -0,0 +1,259 @@
+/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API declaration.
+ *
+ * NOTE: This file comes in a style different from the rest of LLVM
+ * source base since this is a piece of code shared from Intel(R)
+ * products. Please do not reformat / re-style this code to make
+ * subsequent merges and contributions from the original source base eaiser.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef __JITPROFILING_H__
+#define __JITPROFILING_H__
+
+/*
+ * Various constants used by functions
+ */
+
+/* event notification */
+typedef enum iJIT_jvm_event
+{
+
+ /* shutdown */
+
+ /*
+ * Program exiting EventSpecificData NA
+ */
+ iJVM_EVENT_TYPE_SHUTDOWN = 2,
+
+ /* JIT profiling */
+
+ /*
+ * issued after method code jitted into memory but before code is executed
+ * EventSpecificData is an iJIT_Method_Load
+ */
+ iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
+
+ /* issued before unload. Method code will no longer be executed, but code
+ * and info are still in memory. The VTune profiler may capture method
+ * code only at this point EventSpecificData is iJIT_Method_Id
+ */
+ iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+
+ /* Method Profiling */
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be entered EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_ENTER_NIDS = 19,
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be left EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_LEAVE_NIDS
+} iJIT_JVM_EVENT;
+
+typedef enum _iJIT_ModeFlags
+{
+ /* No need to Notify VTune, since VTune is not running */
+ iJIT_NO_NOTIFICATIONS = 0x0000,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ * )
+ * for all the method already jitted
+ */
+ iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED,
+ * ) for all the method that are unloaded
+ */
+ iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls on
+ * method entries
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls
+ * on method exit
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
+
+} iJIT_ModeFlags;
+
+
+ /* Flags used by iJIT_IsProfilingActive() */
+typedef enum _iJIT_IsProfilingActiveFlags
+{
+ /* No profiler is running. Currently not used */
+ iJIT_NOTHING_RUNNING = 0x0000,
+
+ /* Sampling is running. This is the default value
+ * returned by iJIT_IsProfilingActive()
+ */
+ iJIT_SAMPLING_ON = 0x0001,
+
+ /* Call Graph is running */
+ iJIT_CALLGRAPH_ON = 0x0002
+
+} iJIT_IsProfilingActiveFlags;
+
+/* Enumerator for the environment of methods*/
+typedef enum _iJDEnvironmentType
+{
+ iJDE_JittingAPI = 2
+} iJDEnvironmentType;
+
+/**********************************
+ * Data structures for the events *
+ **********************************/
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_START
+ */
+
+typedef struct _iJIT_Method_Id
+{
+ /* Id of the method (same as the one passed in
+ * the iJIT_Method_Load struct
+ */
+ unsigned int method_id;
+
+} *piJIT_Method_Id, iJIT_Method_Id;
+
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_ENTER_NIDS,
+ * iJVM_EVENT_TYPE_LEAVE_NIDS,
+ * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS
+ */
+
+typedef struct _iJIT_Method_NIDS
+{
+ /* unique method ID */
+ unsigned int method_id;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ unsigned int stack_id;
+
+ /* method name (just the method, without the class) */
+ char* method_name;
+} *piJIT_Method_NIDS, iJIT_Method_NIDS;
+
+/* structures for the events:
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED
+ */
+
+typedef struct _LineNumberInfo
+{
+ /* x86 Offset from the begining of the method*/
+ unsigned int Offset;
+
+ /* source line number from the begining of the source file */
+ unsigned int LineNumber;
+
+} *pLineNumberInfo, LineNumberInfo;
+
+typedef struct _iJIT_Method_Load
+{
+ /* unique method ID - can be any unique value, (except 0 - 999) */
+ unsigned int method_id;
+
+ /* method name (can be with or without the class and signature, in any case
+ * the class name will be added to it)
+ */
+ char* method_name;
+
+ /* virtual address of that method - This determines the method range for the
+ * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events
+ */
+ void* method_load_address;
+
+ /* Size in memory - Must be exact */
+ unsigned int method_size;
+
+ /* Line Table size in number of entries - Zero if none */
+ unsigned int line_number_size;
+
+ /* Pointer to the begining of the line numbers info array */
+ pLineNumberInfo line_number_table;
+
+ /* unique class ID */
+ unsigned int class_id;
+
+ /* class file name */
+ char* class_file_name;
+
+ /* source file name */
+ char* source_file_name;
+
+ /* bits supplied by the user for saving in the JIT file */
+ void* user_data;
+
+ /* the size of the user data buffer */
+ unsigned int user_data_size;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ iJDEnvironmentType env;
+
+} *piJIT_Method_Load, iJIT_Method_Load;
+
+/* API Functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef CDECL
+# if defined WIN32 || defined _WIN32
+# define CDECL __cdecl
+# else /* defined WIN32 || defined _WIN32 */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* defined WIN32 || defined _WIN32 */
+#endif /* CDECL */
+
+#define JITAPI CDECL
+
+/* called when the settings are changed with new settings */
+typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags);
+
+int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData);
+
+/* The new mode call back routine */
+void JITAPI iJIT_RegisterCallbackEx(void *userdata,
+ iJIT_ModeChangedEx NewModeCallBackFuncEx);
+
+iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
+
+void JITAPI FinalizeThread(void);
+
+void JITAPI FinalizeProcess(void);
+
+unsigned int JITAPI iJIT_GetNewMethodID(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __JITPROFILING_H__ */
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
new file mode 100644
index 000000000000..526c04e082d2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -0,0 +1,1396 @@
+//===-- Execution.cpp - Implement code to simulate the program ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the actual instruction interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "interpreter"
+#include "Interpreter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
+
+static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
+ cl::desc("make the interpreter print every volatile load and store"));
+
+//===----------------------------------------------------------------------===//
+// Various Helper Functions
+//===----------------------------------------------------------------------===//
+
+static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
+ SF.Values[V] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+// Binary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
+ break
+
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(+, Float);
+ IMPLEMENT_BINARY_OPERATOR(+, Double);
+ default:
+ dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(-, Float);
+ IMPLEMENT_BINARY_OPERATOR(-, Double);
+ default:
+ dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(*, Float);
+ IMPLEMENT_BINARY_OPERATOR(*, Double);
+ default:
+ dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(/, Float);
+ IMPLEMENT_BINARY_OPERATOR(/, Double);
+ default:
+ dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
+ break;
+ default:
+ dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+#define IMPLEMENT_INTEGER_ICMP(OP, TY) \
+ case Type::IntegerTyID: \
+ Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
+ break;
+
+// Handle pointers specially because they must be compared with only as much
+// width as the host has. We _do not_ want to be comparing 64 bit values when
+// running on a 32-bit target, otherwise the upper 32 bits might mess up
+// comparisons if they contain garbage.
+#define IMPLEMENT_POINTER_ICMP(OP) \
+ case Type::PointerTyID: \
+ Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \
+ (void*)(intptr_t)Src2.PointerVal); \
+ break;
+
+static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(eq,Ty);
+ IMPLEMENT_POINTER_ICMP(==);
+ default:
+ dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ne,Ty);
+ IMPLEMENT_POINTER_ICMP(!=);
+ default:
+ dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ult,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(slt,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ule,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sle,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(uge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+void Interpreter::visitICmpInst(ICmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
+ default:
+ dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+#define IMPLEMENT_FCMP(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
+ break
+
+static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(==, Float);
+ IMPLEMENT_FCMP(==, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(!=, Float);
+ IMPLEMENT_FCMP(!=, Double);
+
+ default:
+ dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<=, Float);
+ IMPLEMENT_FCMP(<=, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>=, Float);
+ IMPLEMENT_FCMP(>=, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<, Float);
+ IMPLEMENT_FCMP(<, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>, Float);
+ IMPLEMENT_FCMP(>, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+#define IMPLEMENT_UNORDERED(TY, X,Y) \
+ if (TY->isFloatTy()) { \
+ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ } \
+ } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ }
+
+
+static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OEQ(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_ONE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ if (Ty->isFloatTy())
+ Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
+ Src2.FloatVal == Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
+ Src2.DoubleVal == Src2.DoubleVal));
+ return Dest;
+}
+
+static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
+ Type *Ty) {
+ GenericValue Dest;
+ if (Ty->isFloatTy())
+ Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
+ Src2.FloatVal != Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
+ Src2.DoubleVal != Src2.DoubleVal));
+ return Dest;
+}
+
+void Interpreter::visitFCmpInst(FCmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
+ case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break;
+ case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OEQ: R = executeFCMP_OEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNE: R = executeFCMP_UNE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ONE: R = executeFCMP_ONE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULT: R = executeFCMP_ULT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLT: R = executeFCMP_OLT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGT: R = executeFCMP_UGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGT: R = executeFCMP_OGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULE: R = executeFCMP_ULE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
+ default:
+ dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
+ GenericValue Src2, Type *Ty) {
+ GenericValue Result;
+ switch (predicate) {
+ case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty);
+ case ICmpInst::ICMP_NE: return executeICMP_NE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGT: return executeICMP_UGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGT: return executeICMP_SGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULT: return executeICMP_ULT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLT: return executeICMP_SLT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGE: return executeICMP_UGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGE: return executeICMP_SGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULE: return executeICMP_ULE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLE: return executeICMP_SLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ORD: return executeFCMP_ORD(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNO: return executeFCMP_UNO(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OEQ: return executeFCMP_OEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UEQ: return executeFCMP_UEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ONE: return executeFCMP_ONE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNE: return executeFCMP_UNE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLT: return executeFCMP_OLT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULT: return executeFCMP_ULT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGT: return executeFCMP_OGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGT: return executeFCMP_UGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLE: return executeFCMP_OLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_FALSE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, false);
+ return Result;
+ }
+ case FCmpInst::FCMP_TRUE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, true);
+ return Result;
+ }
+ default:
+ dbgs() << "Unhandled Cmp predicate\n";
+ llvm_unreachable(0);
+ }
+}
+
+void Interpreter::visitBinaryOperator(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
+ case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
+ case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
+ case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
+ case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
+ case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
+ case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
+ case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+ case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+ case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+ case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+ case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
+ case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
+ case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
+ GenericValue Src3) {
+ return Src1.IntVal == 0 ? Src3 : Src2;
+}
+
+void Interpreter::visitSelectInst(SelectInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+ GenericValue R = executeSelectInst(Src1, Src2, Src3);
+ SetValue(&I, R, SF);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Terminator Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::exitCalled(GenericValue GV) {
+ // runAtExitHandlers() assumes there are no stack frames, but
+ // if exit() was called, then it had a stack frame. Blow away
+ // the stack before interpreting atexit handlers.
+ ECStack.clear();
+ runAtExitHandlers();
+ exit(GV.IntVal.zextOrTrunc(32).getZExtValue());
+}
+
+/// Pop the last stack frame off of ECStack and then copy the result
+/// back into the result variable if we are not returning void. The
+/// result variable may be the ExitValue, or the Value of the calling
+/// CallInst if there was a previous stack frame. This method may
+/// invalidate any ECStack iterators you have. This method also takes
+/// care of switching to the normal destination BB, if we are returning
+/// from an invoke.
+///
+void Interpreter::popStackAndReturnValueToCaller(Type *RetTy,
+ GenericValue Result) {
+ // Pop the current stack frame.
+ ECStack.pop_back();
+
+ if (ECStack.empty()) { // Finished main. Put result into exit code...
+ if (RetTy && !RetTy->isVoidTy()) { // Nonvoid return type?
+ ExitValue = Result; // Capture the exit value of the program
+ } else {
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ }
+ } else {
+ // If we have a previous stack frame, and we have a previous call,
+ // fill in the return value...
+ ExecutionContext &CallingSF = ECStack.back();
+ if (Instruction *I = CallingSF.Caller.getInstruction()) {
+ // Save result...
+ if (!CallingSF.Caller.getType()->isVoidTy())
+ SetValue(I, Result, CallingSF);
+ if (InvokeInst *II = dyn_cast<InvokeInst> (I))
+ SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
+ CallingSF.Caller = CallSite(); // We returned from the call...
+ }
+ }
+}
+
+void Interpreter::visitReturnInst(ReturnInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *RetTy = Type::getVoidTy(I.getContext());
+ GenericValue Result;
+
+ // Save away the return value... (if we are not 'ret void')
+ if (I.getNumOperands()) {
+ RetTy = I.getReturnValue()->getType();
+ Result = getOperandValue(I.getReturnValue(), SF);
+ }
+
+ popStackAndReturnValueToCaller(RetTy, Result);
+}
+
+void Interpreter::visitUnreachableInst(UnreachableInst &I) {
+ report_fatal_error("Program executed an 'unreachable' instruction!");
+}
+
+void Interpreter::visitBranchInst(BranchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ BasicBlock *Dest;
+
+ Dest = I.getSuccessor(0); // Uncond branches have a fixed dest...
+ if (!I.isUnconditional()) {
+ Value *Cond = I.getCondition();
+ if (getOperandValue(Cond, SF).IntVal == 0) // If false cond...
+ Dest = I.getSuccessor(1);
+ }
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitSwitchInst(SwitchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Value* Cond = I.getCondition();
+ Type *ElTy = Cond->getType();
+ GenericValue CondVal = getOperandValue(Cond, SF);
+
+ // Check to see if any of the cases match...
+ BasicBlock *Dest = 0;
+ for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
+ IntegersSubset& Case = i.getCaseValueEx();
+ if (Case.isSingleNumber()) {
+ // FIXME: Currently work with ConstantInt based numbers.
+ const ConstantInt *CI = Case.getSingleNumber(0).toConstantInt();
+ GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF);
+ if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) {
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
+ break;
+ }
+ }
+ if (Case.isSingleNumbersOnly()) {
+ for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) {
+ // FIXME: Currently work with ConstantInt based numbers.
+ const ConstantInt *CI = Case.getSingleNumber(n).toConstantInt();
+ GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF);
+ if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) {
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
+ break;
+ }
+ }
+ } else
+ for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) {
+ IntegersSubset::Range r = Case.getItem(n);
+ // FIXME: Currently work with ConstantInt based numbers.
+ const ConstantInt *LowCI = r.getLow().toConstantInt();
+ const ConstantInt *HighCI = r.getHigh().toConstantInt();
+ GenericValue Low = getOperandValue(const_cast<ConstantInt*>(LowCI), SF);
+ GenericValue High = getOperandValue(const_cast<ConstantInt*>(HighCI), SF);
+ if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 &&
+ executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) {
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
+ break;
+ }
+ }
+ }
+ if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitIndirectBrInst(IndirectBrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ void *Dest = GVTOP(getOperandValue(I.getAddress(), SF));
+ SwitchToNewBasicBlock((BasicBlock*)Dest, SF);
+}
+
+
+// SwitchToNewBasicBlock - This method is used to jump to a new basic block.
+// This function handles the actual updating of block and instruction iterators
+// as well as execution of all of the PHI nodes in the destination block.
+//
+// This method does this because all of the PHI nodes must be executed
+// atomically, reading their inputs before any of the results are updated. Not
+// doing this can cause problems if the PHI nodes depend on other PHI nodes for
+// their inputs. If the input PHI node is updated before it is read, incorrect
+// results can happen. Thus we use a two phase approach.
+//
+void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
+ BasicBlock *PrevBB = SF.CurBB; // Remember where we came from...
+ SF.CurBB = Dest; // Update CurBB to branch destination
+ SF.CurInst = SF.CurBB->begin(); // Update new instruction ptr...
+
+ if (!isa<PHINode>(SF.CurInst)) return; // Nothing fancy to do
+
+ // Loop over all of the PHI nodes in the current block, reading their inputs.
+ std::vector<GenericValue> ResultValues;
+
+ for (; PHINode *PN = dyn_cast<PHINode>(SF.CurInst); ++SF.CurInst) {
+ // Search for the value corresponding to this previous bb...
+ int i = PN->getBasicBlockIndex(PrevBB);
+ assert(i != -1 && "PHINode doesn't contain entry for predecessor??");
+ Value *IncomingValue = PN->getIncomingValue(i);
+
+ // Save the incoming value for this PHI node...
+ ResultValues.push_back(getOperandValue(IncomingValue, SF));
+ }
+
+ // Now loop over all of the PHI nodes setting their values...
+ SF.CurInst = SF.CurBB->begin();
+ for (unsigned i = 0; isa<PHINode>(SF.CurInst); ++SF.CurInst, ++i) {
+ PHINode *PN = cast<PHINode>(SF.CurInst);
+ SetValue(PN, ResultValues[i], SF);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitAllocaInst(AllocaInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ Type *Ty = I.getType()->getElementType(); // Type to be allocated
+
+ // Get the number of elements being allocated by the array...
+ unsigned NumElements =
+ getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
+
+ unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+
+ // Avoid malloc-ing zero bytes, use max()...
+ unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
+
+ // Allocate enough memory to hold the type...
+ void *Memory = malloc(MemToAlloc);
+
+ DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x "
+ << NumElements << " (Total: " << MemToAlloc << ") at "
+ << uintptr_t(Memory) << '\n');
+
+ GenericValue Result = PTOGV(Memory);
+ assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+ SetValue(&I, Result, SF);
+
+ if (I.getOpcode() == Instruction::Alloca)
+ ECStack.back().Allocas.add(Memory);
+}
+
+// getElementOffset - The workhorse for getelementptr.
+//
+GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E,
+ ExecutionContext &SF) {
+ assert(Ptr->getType()->isPointerTy() &&
+ "Cannot getElementOffset of a nonpointer type!");
+
+ uint64_t Total = 0;
+
+ for (; I != E; ++I) {
+ if (StructType *STy = dyn_cast<StructType>(*I)) {
+ const StructLayout *SLO = TD.getStructLayout(STy);
+
+ const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
+ unsigned Index = unsigned(CPU->getZExtValue());
+
+ Total += SLO->getElementOffset(Index);
+ } else {
+ SequentialType *ST = cast<SequentialType>(*I);
+ // Get the index number for the array... which must be long type...
+ GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
+
+ int64_t Idx;
+ unsigned BitWidth =
+ cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
+ if (BitWidth == 32)
+ Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
+ else {
+ assert(BitWidth == 64 && "Invalid index type for getelementptr");
+ Idx = (int64_t)IdxGV.IntVal.getZExtValue();
+ }
+ Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+ }
+ }
+
+ GenericValue Result;
+ Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
+ DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n");
+ return Result;
+}
+
+void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeGEPOperation(I.getPointerOperand(),
+ gep_type_begin(I), gep_type_end(I), SF), SF);
+}
+
+void Interpreter::visitLoadInst(LoadInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
+ GenericValue Result;
+ LoadValueFromMemory(Result, Ptr, I.getType());
+ SetValue(&I, Result, SF);
+ if (I.isVolatile() && PrintVolatile)
+ dbgs() << "Volatile load " << I;
+}
+
+void Interpreter::visitStoreInst(StoreInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Val = getOperandValue(I.getOperand(0), SF);
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
+ I.getOperand(0)->getType());
+ if (I.isVolatile() && PrintVolatile)
+ dbgs() << "Volatile store: " << I;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitCallSite(CallSite CS) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Check to see if this is an intrinsic function call...
+ Function *F = CS.getCalledFunction();
+ if (F && F->isDeclaration())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ break;
+ case Intrinsic::vastart: { // va_start
+ GenericValue ArgIndex;
+ ArgIndex.UIntPairVal.first = ECStack.size() - 1;
+ ArgIndex.UIntPairVal.second = 0;
+ SetValue(CS.getInstruction(), ArgIndex, SF);
+ return;
+ }
+ case Intrinsic::vaend: // va_end is a noop for the interpreter
+ return;
+ case Intrinsic::vacopy: // va_copy: dest = src
+ SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF);
+ return;
+ default:
+ // If it is an unknown intrinsic function, use the intrinsic lowering
+ // class to transform it into hopefully tasty LLVM code.
+ //
+ BasicBlock::iterator me(CS.getInstruction());
+ BasicBlock *Parent = CS.getInstruction()->getParent();
+ bool atBegin(Parent->begin() == me);
+ if (!atBegin)
+ --me;
+ IL->LowerIntrinsicCall(cast<CallInst>(CS.getInstruction()));
+
+ // Restore the CurInst pointer to the first instruction newly inserted, if
+ // any.
+ if (atBegin) {
+ SF.CurInst = Parent->begin();
+ } else {
+ SF.CurInst = me;
+ ++SF.CurInst;
+ }
+ return;
+ }
+
+
+ SF.Caller = CS;
+ std::vector<GenericValue> ArgVals;
+ const unsigned NumArgs = SF.Caller.arg_size();
+ ArgVals.reserve(NumArgs);
+ uint16_t pNum = 1;
+ for (CallSite::arg_iterator i = SF.Caller.arg_begin(),
+ e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
+ Value *V = *i;
+ ArgVals.push_back(getOperandValue(V, SF));
+ }
+
+ // To handle indirect calls, we must get the pointer value from the argument
+ // and treat it as a function pointer.
+ GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF);
+ callFunction((Function*)GVTOP(SRC), ArgVals);
+}
+
+void Interpreter::visitShl(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitLShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitAShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.sext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.zext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+ "Invalid FPTrunc instruction");
+ Dest.FloatVal = (float) Src.DoubleVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+ "Invalid FPTrunc instruction");
+ Dest.DoubleVal = (double) Src.FloatVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+ return Dest;
+
+}
+
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
+
+ Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
+
+ uint32_t PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize != Src.IntVal.getBitWidth())
+ Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
+
+ Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue()));
+ return Dest;
+}
+
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF) {
+
+ Type *SrcTy = SrcVal->getType();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ if (DstTy->isPointerTy()) {
+ assert(SrcTy->isPointerTy() && "Invalid BitCast");
+ Dest.PointerVal = Src.PointerVal;
+ } else if (DstTy->isIntegerTy()) {
+ if (SrcTy->isFloatTy()) {
+ Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+ } else if (SrcTy->isDoubleTy()) {
+ Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+ } else if (SrcTy->isIntegerTy()) {
+ Dest.IntVal = Src.IntVal;
+ } else
+ llvm_unreachable("Invalid BitCast");
+ } else if (DstTy->isFloatTy()) {
+ if (SrcTy->isIntegerTy())
+ Dest.FloatVal = Src.IntVal.bitsToFloat();
+ else
+ Dest.FloatVal = Src.FloatVal;
+ } else if (DstTy->isDoubleTy()) {
+ if (SrcTy->isIntegerTy())
+ Dest.DoubleVal = Src.IntVal.bitsToDouble();
+ else
+ Dest.DoubleVal = Src.DoubleVal;
+ } else
+ llvm_unreachable("Invalid Bitcast");
+
+ return Dest;
+}
+
+void Interpreter::visitTruncInst(TruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSExtInst(SExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitZExtInst(ZExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPTruncInst(FPTruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPExtInst(FPExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitUIToFPInst(UIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSIToFPInst(SIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToUIInst(FPToUIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToSIInst(FPToSIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitPtrToIntInst(PtrToIntInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitIntToPtrInst(IntToPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitBitCastInst(BitCastInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+#define IMPLEMENT_VAARG(TY) \
+ case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break
+
+void Interpreter::visitVAArgInst(VAArgInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Get the incoming valist parameter. LLI treats the valist as a
+ // (ec-stack-depth var-arg-index) pair.
+ GenericValue VAList = getOperandValue(I.getOperand(0), SF);
+ GenericValue Dest;
+ GenericValue Src = ECStack[VAList.UIntPairVal.first]
+ .VarArgs[VAList.UIntPairVal.second];
+ Type *Ty = I.getType();
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ Dest.IntVal = Src.IntVal;
+ break;
+ IMPLEMENT_VAARG(Pointer);
+ IMPLEMENT_VAARG(Float);
+ IMPLEMENT_VAARG(Double);
+ default:
+ dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+
+ // Set the Value of this Instruction.
+ SetValue(&I, Dest, SF);
+
+ // Move the pointer to the next vararg.
+ ++VAList.UIntPairVal.second;
+}
+
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+
+ Type *Ty = I.getType();
+ const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+ if(Src1.AggregateVal.size() > indx) {
+ switch (Ty->getTypeID()) {
+ default:
+ dbgs() << "Unhandled destination type for extractelement instruction: "
+ << *Ty << "\n";
+ llvm_unreachable(0);
+ break;
+ case Type::IntegerTyID:
+ Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+ break;
+ case Type::FloatTyID:
+ Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+ break;
+ }
+ } else {
+ dbgs() << "Invalid index in extractelement instruction\n";
+ }
+
+ SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
+ ExecutionContext &SF) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::ZExt:
+ return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SExt:
+ return executeSExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPTrunc:
+ return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPExt:
+ return executeFPExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::UIToFP:
+ return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SIToFP:
+ return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToUI:
+ return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToSI:
+ return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::PtrToInt:
+ return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::IntToPtr:
+ return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::BitCast:
+ return executeBitCastInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::GetElementPtr:
+ return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE),
+ gep_type_end(CE), SF);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ return executeCmpInst(CE->getPredicate(),
+ getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ CE->getOperand(0)->getType());
+ case Instruction::Select:
+ return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ getOperandValue(CE->getOperand(2), SF));
+ default :
+ break;
+ }
+
+ // The cases below here require a GenericValue parameter for the result
+ // so we initialize one, compute it and then return it.
+ GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
+ GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
+ GenericValue Dest;
+ Type * Ty = CE->getOperand(0)->getType();
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+ case Instruction::Sub: Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+ case Instruction::Mul: Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
+ case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
+ case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
+ case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
+ case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+ case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+ case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
+ case Instruction::Shl:
+ Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::LShr:
+ Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::AShr:
+ Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
+ break;
+ default:
+ dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
+ llvm_unreachable("Unhandled ConstantExpr");
+ }
+ return Dest;
+}
+
+GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return getConstantExprValue(CE, SF);
+ } else if (Constant *CPV = dyn_cast<Constant>(V)) {
+ return getConstantValue(CPV);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ return PTOGV(getPointerToGlobal(GV));
+ } else {
+ return SF.Values[V];
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Dispatch and Execution Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// callFunction - Execute the specified function...
+//
+void Interpreter::callFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+ ECStack.back().Caller.arg_size() == ArgVals.size()) &&
+ "Incorrect number of arguments passed into function call!");
+ // Make a new stack frame... and fill it in.
+ ECStack.push_back(ExecutionContext());
+ ExecutionContext &StackFrame = ECStack.back();
+ StackFrame.CurFunction = F;
+
+ // Special handling for external functions.
+ if (F->isDeclaration()) {
+ GenericValue Result = callExternalFunction (F, ArgVals);
+ // Simulate a 'ret' instruction of the appropriate type.
+ popStackAndReturnValueToCaller (F->getReturnType (), Result);
+ return;
+ }
+
+ // Get pointers to first LLVM BB & Instruction in function.
+ StackFrame.CurBB = F->begin();
+ StackFrame.CurInst = StackFrame.CurBB->begin();
+
+ // Run through the function arguments and initialize their values...
+ assert((ArgVals.size() == F->arg_size() ||
+ (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&&
+ "Invalid number of values passed to function invocation!");
+
+ // Handle non-varargs arguments...
+ unsigned i = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++i)
+ SetValue(AI, ArgVals[i], StackFrame);
+
+ // Handle varargs arguments...
+ StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
+}
+
+
+void Interpreter::run() {
+ while (!ECStack.empty()) {
+ // Interpret a single instruction & increment the "PC".
+ ExecutionContext &SF = ECStack.back(); // Current stack frame
+ Instruction &I = *SF.CurInst++; // Increment before execute
+
+ // Track the number of dynamic instructions executed.
+ ++NumDynamicInsts;
+
+ DEBUG(dbgs() << "About to interpret: " << I);
+ visit(I); // Dispatch to one of the visit* methods...
+#if 0
+ // This is not safe, as visiting the instruction could lower it and free I.
+DEBUG(
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I) &&
+ I.getType() != Type::VoidTy) {
+ dbgs() << " --> ";
+ const GenericValue &Val = SF.Values[&I];
+ switch (I.getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid GenericValue Type");
+ case Type::VoidTyID: dbgs() << "void"; break;
+ case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break;
+ case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break;
+ case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
+ break;
+ case Type::IntegerTyID:
+ dbgs() << "i" << Val.IntVal.getBitWidth() << " "
+ << Val.IntVal.toStringUnsigned(10)
+ << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+ break;
+ }
+ });
+#endif
+ }
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
new file mode 100644
index 000000000000..bef4bbf66023
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -0,0 +1,484 @@
+//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains both code to deal with invoking "external" functions, but
+// also contains code that implements "exported" external functions.
+//
+// There are currently two mechanisms for handling external functions in the
+// Interpreter. The first is to implement lle_* wrapper functions that are
+// specific to well-known library functions which manually translate the
+// arguments from GenericValues and make the call. If such a wrapper does
+// not exist, and libffi is available, then the Interpreter will attempt to
+// invoke the function using libffi, after finding its address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/Config/config.h" // Detect libffi
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <cmath>
+#include <csignal>
+#include <cstdio>
+#include <cstring>
+#include <map>
+
+#ifdef HAVE_FFI_CALL
+#ifdef HAVE_FFI_H
+#include <ffi.h>
+#define USE_LIBFFI
+#elif HAVE_FFI_FFI_H
+#include <ffi/ffi.h>
+#define USE_LIBFFI
+#endif
+#endif
+
+using namespace llvm;
+
+static ManagedStatic<sys::Mutex> FunctionsLock;
+
+typedef GenericValue (*ExFunc)(FunctionType *,
+ const std::vector<GenericValue> &);
+static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
+static std::map<std::string, ExFunc> FuncNames;
+
+#ifdef USE_LIBFFI
+typedef void (*RawFunc)();
+static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
+#endif
+
+static Interpreter *TheInterpreter;
+
+static char getTypeID(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return 'V';
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 1: return 'o';
+ case 8: return 'B';
+ case 16: return 'S';
+ case 32: return 'I';
+ case 64: return 'L';
+ default: return 'N';
+ }
+ case Type::FloatTyID: return 'F';
+ case Type::DoubleTyID: return 'D';
+ case Type::PointerTyID: return 'P';
+ case Type::FunctionTyID:return 'M';
+ case Type::StructTyID: return 'T';
+ case Type::ArrayTyID: return 'A';
+ default: return 'U';
+ }
+}
+
+// Try to find address of external function given a Function object.
+// Please note, that interpreter doesn't know how to assemble a
+// real call in general case (this is JIT job), that's why it assumes,
+// that all external functions has the same (and pretty "general") signature.
+// The typical example of such functions are "lle_X_" ones.
+static ExFunc lookupFunction(const Function *F) {
+ // Function not found, look it up... start by figuring out what the
+ // composite function name should be.
+ std::string ExtName = "lle_";
+ FunctionType *FT = F->getFunctionType();
+ for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
+ ExtName += getTypeID(FT->getContainedType(i));
+ ExtName += "_" + F->getName().str();
+
+ sys::ScopedLock Writer(*FunctionsLock);
+ ExFunc FnPtr = FuncNames[ExtName];
+ if (FnPtr == 0)
+ FnPtr = FuncNames["lle_X_" + F->getName().str()];
+ if (FnPtr == 0) // Try calling a generic function... if it exists...
+ FnPtr = (ExFunc)(intptr_t)
+ sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
+ F->getName().str());
+ if (FnPtr != 0)
+ ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later
+ return FnPtr;
+}
+
+#ifdef USE_LIBFFI
+static ffi_type *ffiTypeFor(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return &ffi_type_void;
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: return &ffi_type_sint8;
+ case 16: return &ffi_type_sint16;
+ case 32: return &ffi_type_sint32;
+ case 64: return &ffi_type_sint64;
+ }
+ case Type::FloatTyID: return &ffi_type_float;
+ case Type::DoubleTyID: return &ffi_type_double;
+ case Type::PointerTyID: return &ffi_type_pointer;
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ report_fatal_error("Type could not be mapped for use with libffi.");
+ return NULL;
+}
+
+static void *ffiValueFor(Type *Ty, const GenericValue &AV,
+ void *ArgDataPtr) {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: {
+ int8_t *I8Ptr = (int8_t *) ArgDataPtr;
+ *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 16: {
+ int16_t *I16Ptr = (int16_t *) ArgDataPtr;
+ *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 32: {
+ int32_t *I32Ptr = (int32_t *) ArgDataPtr;
+ *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 64: {
+ int64_t *I64Ptr = (int64_t *) ArgDataPtr;
+ *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ }
+ case Type::FloatTyID: {
+ float *FloatPtr = (float *) ArgDataPtr;
+ *FloatPtr = AV.FloatVal;
+ return ArgDataPtr;
+ }
+ case Type::DoubleTyID: {
+ double *DoublePtr = (double *) ArgDataPtr;
+ *DoublePtr = AV.DoubleVal;
+ return ArgDataPtr;
+ }
+ case Type::PointerTyID: {
+ void **PtrPtr = (void **) ArgDataPtr;
+ *PtrPtr = GVTOP(AV);
+ return ArgDataPtr;
+ }
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ report_fatal_error("Type value could not be mapped for use with libffi.");
+ return NULL;
+}
+
+static bool ffiInvoke(RawFunc Fn, Function *F,
+ const std::vector<GenericValue> &ArgVals,
+ const DataLayout *TD, GenericValue &Result) {
+ ffi_cif cif;
+ FunctionType *FTy = F->getFunctionType();
+ const unsigned NumArgs = F->arg_size();
+
+ // TODO: We don't have type information about the remaining arguments, because
+ // this information is never passed into ExecutionEngine::runFunction().
+ if (ArgVals.size() > NumArgs && F->isVarArg()) {
+ report_fatal_error("Calling external var arg function '" + F->getName()
+ + "' is not supported by the Interpreter.");
+ }
+
+ unsigned ArgBytes = 0;
+
+ std::vector<ffi_type*> args(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ Type *ArgTy = FTy->getParamType(ArgNo);
+ args[ArgNo] = ffiTypeFor(ArgTy);
+ ArgBytes += TD->getTypeStoreSize(ArgTy);
+ }
+
+ SmallVector<uint8_t, 128> ArgData;
+ ArgData.resize(ArgBytes);
+ uint8_t *ArgDataPtr = ArgData.data();
+ SmallVector<void*, 16> values(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ Type *ArgTy = FTy->getParamType(ArgNo);
+ values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
+ ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+ }
+
+ Type *RetTy = FTy->getReturnType();
+ ffi_type *rtype = ffiTypeFor(RetTy);
+
+ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+ SmallVector<uint8_t, 128> ret;
+ if (RetTy->getTypeID() != Type::VoidTyID)
+ ret.resize(TD->getTypeStoreSize(RetTy));
+ ffi_call(&cif, Fn, ret.data(), values.data());
+ switch (RetTy->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(RetTy)->getBitWidth()) {
+ case 8: Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
+ case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
+ case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
+ case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
+ }
+ break;
+ case Type::FloatTyID: Result.FloatVal = *(float *) ret.data(); break;
+ case Type::DoubleTyID: Result.DoubleVal = *(double*) ret.data(); break;
+ case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
+ default: break;
+ }
+ return true;
+ }
+
+ return false;
+}
+#endif // USE_LIBFFI
+
+GenericValue Interpreter::callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ TheInterpreter = this;
+
+ FunctionsLock->acquire();
+
+ // Do a lookup to see if the function is in our cache... this should just be a
+ // deferred annotation!
+ std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
+ if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
+ : FI->second) {
+ FunctionsLock->release();
+ return Fn(F->getFunctionType(), ArgVals);
+ }
+
+#ifdef USE_LIBFFI
+ std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
+ RawFunc RawFn;
+ if (RF == RawFunctions->end()) {
+ RawFn = (RawFunc)(intptr_t)
+ sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+ if (!RawFn)
+ RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
+ if (RawFn != 0)
+ RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later
+ } else {
+ RawFn = RF->second;
+ }
+
+ FunctionsLock->release();
+
+ GenericValue Result;
+ if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getDataLayout(), Result))
+ return Result;
+#endif // USE_LIBFFI
+
+ if (F->getName() == "__main")
+ errs() << "Tried to execute an unknown external function: "
+ << *F->getType() << " __main\n";
+ else
+ report_fatal_error("Tried to execute an unknown external function: " +
+ F->getName());
+#ifndef USE_LIBFFI
+ errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
+ return GenericValue();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functions "exported" to the running application...
+//
+
+// void atexit(Function*)
+static
+GenericValue lle_X_atexit(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() == 1);
+ TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
+ GenericValue GV;
+ GV.IntVal = 0;
+ return GV;
+}
+
+// void exit(int)
+static
+GenericValue lle_X_exit(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ TheInterpreter->exitCalled(Args[0]);
+ return GenericValue();
+}
+
+// void abort(void)
+static
+GenericValue lle_X_abort(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ //FIXME: should we report or raise here?
+ //report_fatal_error("Interpreted program raised SIGABRT");
+ raise (SIGABRT);
+ return GenericValue();
+}
+
+// int sprintf(char *, const char *, ...) - a very rough implementation to make
+// output useful.
+static
+GenericValue lle_X_sprintf(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char *OutputBuffer = (char *)GVTOP(Args[0]);
+ const char *FmtStr = (const char *)GVTOP(Args[1]);
+ unsigned ArgNo = 2;
+
+ // printf should return # chars printed. This is completely incorrect, but
+ // close enough for now.
+ GenericValue GV;
+ GV.IntVal = APInt(32, strlen(FmtStr));
+ while (1) {
+ switch (*FmtStr) {
+ case 0: return GV; // Null terminator...
+ default: // Normal nonspecial character
+ sprintf(OutputBuffer++, "%c", *FmtStr++);
+ break;
+ case '\\': { // Handle escape codes
+ sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
+ FmtStr += 2; OutputBuffer += 2;
+ break;
+ }
+ case '%': { // Handle format specifiers
+ char FmtBuf[100] = "", Buffer[1000] = "";
+ char *FB = FmtBuf;
+ *FB++ = *FmtStr++;
+ char Last = *FB++ = *FmtStr++;
+ unsigned HowLong = 0;
+ while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
+ Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
+ Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
+ Last != 'p' && Last != 's' && Last != '%') {
+ if (Last == 'l' || Last == 'L') HowLong++; // Keep track of l's
+ Last = *FB++ = *FmtStr++;
+ }
+ *FB = 0;
+
+ switch (Last) {
+ case '%':
+ memcpy(Buffer, "%", 2); break;
+ case 'c':
+ sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'd': case 'i':
+ case 'u': case 'o':
+ case 'x': case 'X':
+ if (HowLong >= 1) {
+ if (HowLong == 1 &&
+ TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+ sizeof(long) < sizeof(int64_t)) {
+ // Make sure we use %lld with a 64 bit argument because we might be
+ // compiling LLI on a 32 bit compiler.
+ unsigned Size = strlen(FmtBuf);
+ FmtBuf[Size] = FmtBuf[Size-1];
+ FmtBuf[Size+1] = 0;
+ FmtBuf[Size-1] = 'l';
+ }
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
+ } else
+ sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'e': case 'E': case 'g': case 'G': case 'f':
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
+ case 'p':
+ sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
+ case 's':
+ sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
+ default:
+ errs() << "<unknown printf code '" << *FmtStr << "'!>";
+ ArgNo++; break;
+ }
+ size_t Len = strlen(Buffer);
+ memcpy(OutputBuffer, Buffer, Len + 1);
+ OutputBuffer += Len;
+ }
+ break;
+ }
+ }
+}
+
+// int printf(const char *, ...) - a very rough implementation to make output
+// useful.
+static
+GenericValue lle_X_printf(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV((void*)&Buffer[0]));
+ NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+ outs() << Buffer;
+ return GV;
+}
+
+// int sscanf(const char *format, ...);
+static
+GenericValue lle_X_sscanf(FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ return GV;
+}
+
+// int scanf(const char *format, ...);
+static
+GenericValue lle_X_scanf(FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ return GV;
+}
+
+// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
+// output useful.
+static
+GenericValue lle_X_fprintf(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() >= 2);
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV(Buffer));
+ NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+
+ fputs(Buffer, (FILE *) GVTOP(Args[0]));
+ return GV;
+}
+
+void Interpreter::initializeExternalFunctions() {
+ sys::ScopedLock Writer(*FunctionsLock);
+ FuncNames["lle_X_atexit"] = lle_X_atexit;
+ FuncNames["lle_X_exit"] = lle_X_exit;
+ FuncNames["lle_X_abort"] = lle_X_abort;
+
+ FuncNames["lle_X_printf"] = lle_X_printf;
+ FuncNames["lle_X_sprintf"] = lle_X_sprintf;
+ FuncNames["lle_X_sscanf"] = lle_X_sscanf;
+ FuncNames["lle_X_scanf"] = lle_X_scanf;
+ FuncNames["lle_X_fprintf"] = lle_X_fprintf;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
new file mode 100644
index 000000000000..9ee9d9456d1d
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -0,0 +1,98 @@
+//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top-level functionality for the LLVM interpreter.
+// This interpreter is designed to be a very simple, portable, inefficient
+// interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include <cstring>
+using namespace llvm;
+
+namespace {
+
+static struct RegisterInterp {
+ RegisterInterp() { Interpreter::Register(); }
+} InterpRegistrator;
+
+}
+
+extern "C" void LLVMLinkInInterpreter() { }
+
+/// create - Create a new interpreter object. This can never fail.
+///
+ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+ // Tell this Module to materialize everything and release the GVMaterializer.
+ if (M->MaterializeAllPermanently(ErrStr))
+ // We got an error, just return 0
+ return 0;
+
+ return new Interpreter(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Interpreter ctor - Initialize stuff
+//
+Interpreter::Interpreter(Module *M)
+ : ExecutionEngine(M), TD(M) {
+
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ setDataLayout(&TD);
+ // Initialize the "backend"
+ initializeExecutionEngine();
+ initializeExternalFunctions();
+ emitGlobals();
+
+ IL = new IntrinsicLowering(TD);
+}
+
+Interpreter::~Interpreter() {
+ delete IL;
+}
+
+void Interpreter::runAtExitHandlers () {
+ while (!AtExitHandlers.empty()) {
+ callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+ AtExitHandlers.pop_back();
+ run();
+ }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue
+Interpreter::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert (F && "Function *F was null at entry to run()");
+
+ // Try extra hard not to pass extra args to a function that isn't
+ // expecting them. C programmers frequently bend the rules and
+ // declare main() with fewer parameters than it actually gets
+ // passed, and the interpreter barfs if you pass a function more
+ // parameters than it is declared to take. This does not attempt to
+ // take into account gratuitous differences in declared types,
+ // though.
+ std::vector<GenericValue> ActualArgs;
+ const unsigned ArgCount = F->getFunctionType()->getNumParams();
+ for (unsigned i = 0; i < ArgCount; ++i)
+ ActualArgs.push_back(ArgValues[i]);
+
+ // Set up the function call.
+ callFunction(F, ActualArgs);
+
+ // Start executing the function.
+ run();
+
+ return ExitValue;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
new file mode 100644
index 000000000000..2952d7eabe2b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -0,0 +1,248 @@
+//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interpreter structure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLI_INTERPRETER_H
+#define LLI_INTERPRETER_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+
+class IntrinsicLowering;
+struct FunctionInfo;
+template<typename T> class generic_gep_type_iterator;
+class ConstantExpr;
+typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
+
+
+// AllocaHolder - Object to track all of the blocks of memory allocated by
+// alloca. When the function returns, this object is popped off the execution
+// stack, which causes the dtor to be run, which frees all the alloca'd memory.
+//
+class AllocaHolder {
+ friend class AllocaHolderHandle;
+ std::vector<void*> Allocations;
+ unsigned RefCnt;
+public:
+ AllocaHolder() : RefCnt(0) {}
+ void add(void *mem) { Allocations.push_back(mem); }
+ ~AllocaHolder() {
+ for (unsigned i = 0; i < Allocations.size(); ++i)
+ free(Allocations[i]);
+ }
+};
+
+// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
+// a vector...
+//
+class AllocaHolderHandle {
+ AllocaHolder *H;
+public:
+ AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
+ AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
+ ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+
+ void add(void *mem) { H->add(mem); }
+};
+
+typedef std::vector<GenericValue> ValuePlaneTy;
+
+// ExecutionContext struct - This struct represents one stack frame currently
+// executing.
+//
+struct ExecutionContext {
+ Function *CurFunction;// The currently executing function
+ BasicBlock *CurBB; // The currently executing BB
+ BasicBlock::iterator CurInst; // The next instruction to execute
+ std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+ std::vector<GenericValue> VarArgs; // Values passed through an ellipsis
+ CallSite Caller; // Holds the call that called subframes.
+ // NULL if main func or debugger invoked fn
+ AllocaHolderHandle Allocas; // Track memory allocated by alloca
+};
+
+// Interpreter - This class represents the entirety of the interpreter.
+//
+class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
+ GenericValue ExitValue; // The return value of the called function
+ DataLayout TD;
+ IntrinsicLowering *IL;
+
+ // The runtime stack of executing code. The top of the stack is the current
+ // function record.
+ std::vector<ExecutionContext> ECStack;
+
+ // AtExitHandlers - List of functions to call when the program exits,
+ // registered with the atexit() library function.
+ std::vector<Function*> AtExitHandlers;
+
+public:
+ explicit Interpreter(Module *M);
+ ~Interpreter();
+
+ /// runAtExitHandlers - Run any functions registered by the program's calls to
+ /// atexit(3), which we intercept and store in AtExitHandlers.
+ ///
+ void runAtExitHandlers();
+
+ static void Register() {
+ InterpCtor = create;
+ }
+
+ /// create - Create an interpreter ExecutionEngine. This can never fail.
+ ///
+ static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
+
+ /// run - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) {
+ // FIXME: not implemented.
+ return 0;
+ }
+
+ /// recompileAndRelinkFunction - For the interpreter, functions are always
+ /// up-to-date.
+ ///
+ virtual void *recompileAndRelinkFunction(Function *F) {
+ return getPointerToFunction(F);
+ }
+
+ /// freeMachineCodeForFunction - The interpreter does not generate any code.
+ ///
+ void freeMachineCodeForFunction(Function *F) { }
+
+ // Methods used to execute code:
+ // Place a call on the stack
+ void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+ void run(); // Execute instructions until nothing left to do
+
+ // Opcode Implementations
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+
+ void visitBinaryOperator(BinaryOperator &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitPHINode(PHINode &PN) {
+ llvm_unreachable("PHI nodes already handled!");
+ }
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitSelectInst(SelectInst &I);
+
+
+ void visitCallSite(CallSite CS);
+ void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
+ void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitShl(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+
+ void visitVAArgInst(VAArgInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInstruction(Instruction &I) {
+ errs() << I << "\n";
+ llvm_unreachable("Instruction not interpretable yet!");
+ }
+
+ GenericValue callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals);
+ void exitCalled(GenericValue GV);
+
+ void addAtExitHandler(Function *F) {
+ AtExitHandlers.push_back(F);
+ }
+
+ GenericValue *getFirstVarArg () {
+ return &(ECStack.back ().VarArgs[0]);
+ }
+
+private: // Helper functions
+ GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, ExecutionContext &SF);
+
+ // SwitchToNewBasicBlock - Start execution in a new basic block and run any
+ // PHI nodes in the top of the block. This is used for intraprocedural
+ // control flow.
+ //
+ void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
+
+ void *getPointerToFunction(Function *F) { return (void*)F; }
+ void *getPointerToBasicBlock(BasicBlock *BB) { return (void*)BB; }
+
+ void initializeExecutionEngine() { }
+ void initializeExternalFunctions();
+ GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
+ GenericValue getOperandValue(Value *V, ExecutionContext &SF);
+ GenericValue executeTruncInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeZExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPTruncInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPExtInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToUIInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToSIInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeUIToFPInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSIToFPInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executePtrToIntInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeIntToPtrInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
+ Type *Ty, ExecutionContext &SF);
+ void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 000000000000..53ea0a260087
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,848 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bitcode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#ifdef __APPLE__
+// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
+// of atexit). It passes the address of linker generated symbol __dso_handle
+// to the function.
+// This configuration change happened at version 5330.
+# include <AvailabilityMacros.h>
+# if defined(MAC_OS_X_VERSION_10_4) && \
+ ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
+ (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
+ __APPLE_CC__ >= 5330))
+# ifndef HAVE___DSO_HANDLE
+# define HAVE___DSO_HANDLE 1
+# endif
+# endif
+#endif
+
+#if HAVE___DSO_HANDLE
+extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
+#endif
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { JIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInJIT() {
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+
+// libgcc defines the __register_frame function to dynamically register new
+// dwarf frames for exception handling. This functionality is not portable
+// across compilers and is only provided by GCC. We use the __register_frame
+// function here so that code generated by the JIT cooperates with the unwinding
+// runtime of libgcc. When JITting with exception handling enable, LLVM
+// generates dwarf frames and registers it to libgcc with __register_frame.
+//
+// The __register_frame function works with Linux.
+//
+// Unfortunately, this functionality seems to be in libgcc after the unwinding
+// library of libgcc for darwin was written. The code for darwin overwrites the
+// value updated by __register_frame with a value fetched with "keymgr".
+// "keymgr" is an obsolete functionality, which should be rewritten some day.
+// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
+// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
+// values of an opaque key, used by libgcc to find dwarf tables.
+
+extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
+
+#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
+# define USE_KEYMGR 1
+#else
+# define USE_KEYMGR 0
+#endif
+
+#if USE_KEYMGR
+
+namespace {
+
+// LibgccObject - This is the structure defined in libgcc. There is no #include
+// provided for this structure, so we also define it here. libgcc calls it
+// "struct object". The structure is undocumented in libgcc.
+struct LibgccObject {
+ void *unused1;
+ void *unused2;
+ void *unused3;
+
+ /// frame - Pointer to the exception table.
+ void *frame;
+
+ /// encoding - The encoding of the object?
+ union {
+ struct {
+ unsigned long sorted : 1;
+ unsigned long from_array : 1;
+ unsigned long mixed_encoding : 1;
+ unsigned long encoding : 8;
+ unsigned long count : 21;
+ } b;
+ size_t i;
+ } encoding;
+
+ /// fde_end - libgcc defines this field only if some macro is defined. We
+ /// include this field even if it may not there, to make libgcc happy.
+ char *fde_end;
+
+ /// next - At least we know it's a chained list!
+ struct LibgccObject *next;
+};
+
+// "kemgr" stuff. Apparently, all frame tables are stored there.
+extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
+extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
+#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */
+
+/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
+/// probably contains all dwarf tables that are loaded.
+struct LibgccObjectInfo {
+
+ /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
+ ///
+ struct LibgccObject* seenObjects;
+
+ /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
+ ///
+ struct LibgccObject* unseenObjects;
+
+ unsigned unused[2];
+};
+
+/// darwin_register_frame - Since __register_frame does not work with darwin's
+/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
+/// "Dwarf2 object list" key.
+void DarwinRegisterFrame(void* FrameBegin) {
+ // Get the key.
+ LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+ assert(LOI && "This should be preallocated by the runtime");
+
+ // Allocate a new LibgccObject to represent this frame. Deallocation of this
+ // object may be impossible: since darwin code in libgcc was written after
+ // the ability to dynamically register frames, things may crash if we
+ // deallocate it.
+ struct LibgccObject* ob = (struct LibgccObject*)
+ malloc(sizeof(struct LibgccObject));
+
+ // Do like libgcc for the values of the field.
+ ob->unused1 = (void *)-1;
+ ob->unused2 = 0;
+ ob->unused3 = 0;
+ ob->frame = FrameBegin;
+ ob->encoding.i = 0;
+ ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
+
+ // Put the info on both places, as libgcc uses the first or the second
+ // field. Note that we rely on having two pointers here. If fde_end was a
+ // char, things would get complicated.
+ ob->fde_end = (char*)LOI->unseenObjects;
+ ob->next = LOI->unseenObjects;
+
+ // Update the key's unseenObjects list.
+ LOI->unseenObjects = ob;
+
+ // Finally update the "key". Apparently, libgcc requires it.
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+ LOI);
+
+}
+
+}
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter. This takes ownership
+/// of the module.
+ExecutionEngine *JIT::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM) {
+ // Try to register the program as a source of symbols to resolve against.
+ //
+ // FIXME: Don't do this here.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+ // If the target supports JIT code generation, create the JIT.
+ if (TargetJITInfo *TJ = TM->getJITInfo()) {
+ return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
+ } else {
+ if (ErrorStr)
+ *ErrorStr = "target does not support JIT code generation";
+ return 0;
+ }
+}
+
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+ SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT.
+ mutable sys::Mutex Lock;
+public:
+ void Add(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.insert(jit);
+ }
+ void Remove(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.erase(jit);
+ }
+ void *getPointerToNamedFunction(const char *Name) const {
+ MutexGuard guard(Lock);
+ assert(JITs.size() != 0 && "No Jit registered");
+ //search function in every instance of JIT
+ for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+ end = JITs.end();
+ Jit != end; ++Jit) {
+ if (Function *F = (*Jit)->FindFunctionNamed(Name))
+ return (*Jit)->getPointerToFunction(F);
+ }
+ // The function is not available : fallback on the first created (will
+ // search in symbol of the current program/library)
+ return (*JITs.begin())->getPointerToNamedFunction(Name);
+ }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+ // getPointerToNamedFunction - This function is used as a global wrapper to
+ // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+ // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+ // need to resolve function(s) that are being mis-codegenerated, so we need to
+ // resolve their addresses at runtime, and this is the way to do it.
+ void *getPointerToNamedFunction(const char *Name) {
+ return AllJits->getPointerToNamedFunction(Name);
+ }
+}
+
+JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *jmm, bool GVsWithCode)
+ : ExecutionEngine(M), TM(tm), TJI(tji),
+ JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()),
+ AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
+ setDataLayout(TM.getDataLayout());
+
+ jitstate = new JITState(M);
+
+ // Initialize JCE
+ JCE = createEmitter(*this, JMM, TM);
+
+ // Register in global list of all JITs.
+ AllJits->Add(this);
+
+ // Add target data
+ MutexGuard locked(lock);
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new DataLayout(*TM.getDataLayout()));
+
+ // Turn the machine code intermediate representation into bytes in memory that
+ // may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Register routine for informing unwinding runtime about new EH frames
+#if HAVE_EHTABLE_SUPPORT
+#if USE_KEYMGR
+ struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+
+ // The key is created on demand, and libgcc creates it the first time an
+ // exception occurs. Since we need the key to register frames, we create
+ // it now.
+ if (!LOI)
+ LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
+ InstallExceptionTableRegister(DarwinRegisterFrame);
+ // Not sure about how to deregister on Darwin.
+#else
+ InstallExceptionTableRegister(__register_frame);
+ InstallExceptionTableDeregister(__deregister_frame);
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+ // Initialize passes.
+ PM.doInitialization();
+}
+
+JIT::~JIT() {
+ // Unregister all exception tables registered by this JIT.
+ DeregisterAllTables();
+ // Cleanup.
+ AllJits->Remove(this);
+ delete jitstate;
+ delete JCE;
+ // JMM is a ownership of JCE, so we no need delete JMM here.
+ delete &TM;
+}
+
+/// addModule - Add a new Module to the JIT. If we previously removed the last
+/// Module, we need re-initialize jitstate with a valid Module.
+void JIT::addModule(Module *M) {
+ MutexGuard locked(lock);
+
+ if (Modules.empty()) {
+ assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
+
+ jitstate = new JITState(M);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new DataLayout(*TM.getDataLayout()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+
+ ExecutionEngine::addModule(M);
+}
+
+/// removeModule - If we are removing the last Module, invalidate the jitstate
+/// since the PassManager it contains references a released Module.
+bool JIT::removeModule(Module *M) {
+ bool result = ExecutionEngine::removeModule(M);
+
+ MutexGuard locked(lock);
+
+ if (jitstate && jitstate->getModule() == M) {
+ delete jitstate;
+ jitstate = 0;
+ }
+
+ if (!jitstate && !Modules.empty()) {
+ jitstate = new JITState(Modules[0]);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new DataLayout(*TM.getDataLayout()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+ return result;
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue JIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert(F && "Function *F was null at entry to run()");
+
+ void *FPtr = getPointerToFunction(F);
+ assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+ FunctionType *FTy = F->getFunctionType();
+ Type *RetTy = FTy->getReturnType();
+
+ assert((FTy->getNumParams() == ArgValues.size() ||
+ (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+ "Wrong number of arguments passed into function!");
+ assert(FTy->getNumParams() == ArgValues.size() &&
+ "This doesn't support passing arguments through varargs (yet)!");
+
+ // Handle some common cases first. These cases correspond to common `main'
+ // prototypes.
+ if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+ switch (ArgValues.size()) {
+ case 3:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
+ int (*PF)(int, char **, const char **) =
+ (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1]),
+ (const char **)GVTOP(ArgValues[2])));
+ return rv;
+ }
+ break;
+ case 2:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy()) {
+ int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1])));
+ return rv;
+ }
+ break;
+ case 1:
+ if (FTy->getParamType(0)->isIntegerTy(32)) {
+ GenericValue rv;
+ int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+ return rv;
+ }
+ if (FTy->getParamType(0)->isPointerTy()) {
+ GenericValue rv;
+ int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0])));
+ return rv;
+ }
+ break;
+ }
+ }
+
+ // Handle cases where no arguments are passed first.
+ if (ArgValues.empty()) {
+ GenericValue rv;
+ switch (RetTy->getTypeID()) {
+ default: llvm_unreachable("Unknown return type for function call!");
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+ if (BitWidth == 1)
+ rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 8)
+ rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 16)
+ rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 32)
+ rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 64)
+ rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+ else
+ llvm_unreachable("Integer types > 64 bits not supported");
+ return rv;
+ }
+ case Type::VoidTyID:
+ rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+ return rv;
+ case Type::FloatTyID:
+ rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::DoubleTyID:
+ rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ llvm_unreachable("long double not supported yet");
+ case Type::PointerTyID:
+ return PTOGV(((void*(*)())(intptr_t)FPtr)());
+ }
+ }
+
+ // Okay, this is not one of our quick and easy cases. Because we don't have a
+ // full FFI, we have to codegen a nullary stub function that just calls the
+ // function we are interested in, passing in constants for all of the
+ // arguments. Make this function and return.
+
+ // First, create the function.
+ FunctionType *STy=FunctionType::get(RetTy, false);
+ Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
+ F->getParent());
+
+ // Insert a basic block.
+ BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
+
+ // Convert all of the GenericValue arguments over to constants. Note that we
+ // currently don't support varargs.
+ SmallVector<Value*, 8> Args;
+ for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
+ Constant *C = 0;
+ Type *ArgTy = FTy->getParamType(i);
+ const GenericValue &AV = ArgValues[i];
+ switch (ArgTy->getTypeID()) {
+ default: llvm_unreachable("Unknown argument type for function call!");
+ case Type::IntegerTyID:
+ C = ConstantInt::get(F->getContext(), AV.IntVal);
+ break;
+ case Type::FloatTyID:
+ C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
+ break;
+ case Type::DoubleTyID:
+ C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
+ break;
+ case Type::PPC_FP128TyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(),
+ AV.IntVal));
+ break;
+ case Type::PointerTyID:
+ void *ArgPtr = GVTOP(AV);
+ if (sizeof(void*) == 4)
+ C = ConstantInt::get(Type::getInt32Ty(F->getContext()),
+ (int)(intptr_t)ArgPtr);
+ else
+ C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
+ (intptr_t)ArgPtr);
+ // Cast the integer to pointer
+ C = ConstantExpr::getIntToPtr(C, ArgTy);
+ break;
+ }
+ Args.push_back(C);
+ }
+
+ CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
+ TheCall->setCallingConv(F->getCallingConv());
+ TheCall->setTailCall();
+ if (!TheCall->getType()->isVoidTy())
+ // Return result of the call.
+ ReturnInst::Create(F->getContext(), TheCall, StubBB);
+ else
+ ReturnInst::Create(F->getContext(), StubBB); // Just return void.
+
+ // Finally, call our nullary stub function.
+ GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+ // Erase it, since no other function can have a reference to it.
+ Stub->eraseFromParent();
+ // And return the result.
+ return Result;
+}
+
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ std::vector<JITEventListener*>::reverse_iterator I=
+ std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+ if (I != EventListeners.rend()) {
+ std::swap(*I, EventListeners.back());
+ EventListeners.pop_back();
+ }
+}
+void JIT::NotifyFunctionEmitted(
+ const Function &F,
+ void *Code, size_t Size,
+ const JITEvent_EmittedFunctionDetails &Details) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+ }
+}
+
+void JIT::NotifyFreeingMachineCode(void *OldPtr) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
+ }
+}
+
+/// runJITOnFunction - Run the FunctionPassManager full of
+/// just-in-time compilation passes on F, hopefully filling in
+/// GlobalAddress[F] with the address of F's machine code.
+///
+void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
+ MutexGuard locked(lock);
+
+ class MCIListener : public JITEventListener {
+ MachineCodeInfo *const MCI;
+ public:
+ MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+ virtual void NotifyFunctionEmitted(const Function &,
+ void *Code, size_t Size,
+ const EmittedFunctionDetails &) {
+ MCI->setAddress(Code);
+ MCI->setSize(Size);
+ }
+ };
+ MCIListener MCIL(MCI);
+ if (MCI)
+ RegisterJITEventListener(&MCIL);
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ if (MCI)
+ UnregisterJITEventListener(&MCIL);
+}
+
+void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+ assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
+
+ jitTheFunction(F, locked);
+
+ // If the function referred to another function that had not yet been
+ // read from bitcode, and we are jitting non-lazily, emit it now.
+ while (!jitstate->getPendingFunctions(locked).empty()) {
+ Function *PF = jitstate->getPendingFunctions(locked).back();
+ jitstate->getPendingFunctions(locked).pop_back();
+
+ assert(!PF->hasAvailableExternallyLinkage() &&
+ "Externally-defined function should not be in pending list.");
+
+ jitTheFunction(PF, locked);
+
+ // Now that the function has been jitted, ask the JITEmitter to rewrite
+ // the stub with real address of the function.
+ updateFunctionStub(PF);
+ }
+}
+
+void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+ isAlreadyCodeGenerating = true;
+ jitstate->getPM(locked).run(*F);
+ isAlreadyCodeGenerating = false;
+
+ // clear basic block addresses after this function is done
+ getBasicBlockAddressMap(locked).clear();
+}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if necessary.
+///
+void *JIT::getPointerToFunction(Function *F) {
+
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr; // Check if function already code gen'd
+
+ MutexGuard locked(lock);
+
+ // Now that this thread owns the lock, make sure we read in the function if it
+ // exists in this Module.
+ std::string ErrorMsg;
+ if (F->Materialize(&ErrorMsg)) {
+ report_fatal_error("Error reading function '" + F->getName()+
+ "' from bitcode file: " + ErrorMsg);
+ }
+
+ // ... and check if another thread has already code gen'd the function.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+
+ if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+ bool AbortOnFailure = !F->hasExternalWeakLinkage();
+ void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+ addGlobalMapping(F, Addr);
+ return Addr;
+ }
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ return Addr;
+}
+
+void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I == getBasicBlockAddressMap(locked).end()) {
+ getBasicBlockAddressMap(locked)[BB] = Addr;
+ } else {
+ // ignore repeats: some BBs can be split into few MBBs?
+ }
+}
+
+void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
+ MutexGuard locked(lock);
+ getBasicBlockAddressMap(locked).erase(BB);
+}
+
+void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
+ // make sure it's function is compiled by JIT
+ (void)getPointerToFunction(BB->getParent());
+
+ // resolve basic block address
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I != getBasicBlockAddressMap(locked).end()) {
+ return I->second;
+ } else {
+ llvm_unreachable("JIT does not have BB address for address-of-label, was"
+ " it eliminated by optimizer?");
+ }
+}
+
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure){
+ if (!isSymbolSearchingDisabled()) {
+ void *ptr = JMM->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
+/// getOrEmitGlobalVariable - Return the address of the specified global
+/// variable, possibly emitting it to memory if needed. This is used by the
+/// Emitter.
+void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
+ MutexGuard locked(lock);
+
+ void *Ptr = getPointerToGlobalIfAvailable(GV);
+ if (Ptr) return Ptr;
+
+ // If the global is external, just remember the address.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
+#if HAVE___DSO_HANDLE
+ if (GV->getName() == "__dso_handle")
+ return (void*)&__dso_handle;
+#endif
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
+ if (Ptr == 0) {
+ report_fatal_error("Could not resolve external global address: "
+ +GV->getName());
+ }
+ addGlobalMapping(GV, Ptr);
+ } else {
+ // If the global hasn't been emitted to memory yet, allocate space and
+ // emit it into memory.
+ Ptr = getMemoryForGV(GV);
+ addGlobalMapping(GV, Ptr);
+ EmitGlobalVariable(GV); // Initialize the variable.
+ }
+ return Ptr;
+}
+
+/// recompileAndRelinkFunction - This method is used to force a function
+/// which has already been compiled, to be compiled again, possibly
+/// after it has been modified. Then the entry to the old copy is overwritten
+/// with a branch to the new copy. If there was no old copy, this acts
+/// just like JIT::getPointerToFunction().
+///
+void *JIT::recompileAndRelinkFunction(Function *F) {
+ void *OldAddr = getPointerToGlobalIfAvailable(F);
+
+ // If it's not already compiled there is no reason to patch it up.
+ if (OldAddr == 0) { return getPointerToFunction(F); }
+
+ // Delete the old function mapping.
+ addGlobalMapping(F, 0);
+
+ // Recodegen the function
+ runJITOnFunction(F);
+
+ // Update state, forward the old function to the new function.
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ TJI.replaceMachineCodeForFunction(OldAddr, Addr);
+ return Addr;
+}
+
+/// getMemoryForGV - This method abstracts memory allocation of global
+/// variable so that the JIT can allocate thread local variables depending
+/// on the target.
+///
+char* JIT::getMemoryForGV(const GlobalVariable* GV) {
+ char *Ptr;
+
+ // GlobalVariable's which are not "constant" will cause trouble in a server
+ // situation. It's returned in the same block of memory as code which may
+ // not be writable.
+ if (isGVCompilationDisabled() && !GV->isConstant()) {
+ report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
+ }
+
+ // Some applications require globals and code to live together, so they may
+ // be allocated into the same buffer, but in general globals are allocated
+ // through the memory manager which puts them near the code but not in the
+ // same buffer.
+ Type *GlobalType = GV->getType()->getElementType();
+ size_t S = getDataLayout()->getTypeAllocSize(GlobalType);
+ size_t A = getDataLayout()->getPreferredAlignment(GV);
+ if (GV->isThreadLocal()) {
+ MutexGuard locked(lock);
+ Ptr = TJI.allocateThreadLocalMemory(S);
+ } else if (TJI.allocateSeparateGVMemory()) {
+ if (A <= 8) {
+ Ptr = (char*)malloc(S);
+ } else {
+ // Allocate S+A bytes of memory, then use an aligned pointer within that
+ // space.
+ Ptr = (char*)malloc(S+A);
+ unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+ Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
+ }
+ } else if (AllocateGVsWithCode) {
+ Ptr = (char*)JCE->allocateSpace(S, A);
+ } else {
+ Ptr = (char*)JCE->allocateGlobal(S, A);
+ }
+ return Ptr;
+}
+
+void JIT::addPendingFunction(Function *F) {
+ MutexGuard locked(lock);
+ jitstate->getPendingFunctions(locked).push_back(F);
+}
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
new file mode 100644
index 000000000000..2ae155bebf40
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
@@ -0,0 +1,226 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JIT_H
+#define JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+class Function;
+struct JITEvent_EmittedFunctionDetails;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
+
+class JITState {
+private:
+ FunctionPassManager PM; // Passes to compile a function
+ Module *M; // Module used to create the PM
+
+ /// PendingFunctions - Functions which have not been code generated yet, but
+ /// were called from a function being code generated.
+ std::vector<AssertingVH<Function> > PendingFunctions;
+
+public:
+ explicit JITState(Module *M) : PM(M), M(M) {}
+
+ FunctionPassManager &getPM(const MutexGuard &L) {
+ return PM;
+ }
+
+ Module *getModule() const { return M; }
+ std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
+ return PendingFunctions;
+ }
+};
+
+
+class JIT : public ExecutionEngine {
+ /// types
+ typedef ValueMap<const BasicBlock *, void *>
+ BasicBlockAddressMapTy;
+ /// data
+ TargetMachine &TM; // The current target we are compiling to
+ TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
+ JITCodeEmitter *JCE; // JCE object
+ JITMemoryManager *JMM;
+ std::vector<JITEventListener*> EventListeners;
+
+ /// AllocateGVsWithCode - Some applications require that global variables and
+ /// code be allocated into the same region of memory, in which case this flag
+ /// should be set to true. Doing so breaks freeMachineCodeForFunction.
+ bool AllocateGVsWithCode;
+
+ /// True while the JIT is generating code. Used to assert against recursive
+ /// entry.
+ bool isAlreadyCodeGenerating;
+
+ JITState *jitstate;
+
+ /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+ /// actualized version, only filled for basic blocks that have their address
+ /// taken.
+ BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
+ JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, bool AllocateGVsWithCode);
+public:
+ ~JIT();
+
+ static void Register() {
+ JITCtor = createJIT;
+ }
+
+ /// getJITInfo - Return the target JIT information structure.
+ ///
+ TargetJITInfo &getJITInfo() const { return TJI; }
+
+ /// create - Create an return a new JIT compiler if there is one available
+ /// for the current target. Otherwise, return null.
+ ///
+ static ExecutionEngine *create(Module *M,
+ std::string *Err,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel =
+ CodeGenOpt::Default,
+ bool GVsWithCode = true,
+ Reloc::Model RM = Reloc::Default,
+ CodeModel::Model CMM = CodeModel::JITDefault) {
+ return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
+ RM, CMM);
+ }
+
+ virtual void addModule(Module *M);
+
+ /// removeModule - Remove a Module from the list of modules. Returns true if
+ /// M is found.
+ virtual bool removeModule(Module *M);
+
+ /// runFunction - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the MemoryManager. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ // CompilationCallback - Invoked the first time that a call site is found,
+ // which causes lazy compilation of the target function.
+ //
+ static void CompilationCallback();
+
+ /// getPointerToFunction - This returns the address of the specified function,
+ /// compiling it if necessary.
+ ///
+ void *getPointerToFunction(Function *F);
+
+ /// addPointerToBasicBlock - Adds address of the specific basic block.
+ void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+ /// clearPointerToBasicBlock - Removes address of specific basic block.
+ void clearPointerToBasicBlock(const BasicBlock *BB);
+
+ /// getPointerToBasicBlock - This returns the address of the specified basic
+ /// block, assuming function is compiled.
+ void *getPointerToBasicBlock(BasicBlock *BB);
+
+ /// getOrEmitGlobalVariable - Return the address of the specified global
+ /// variable, possibly emitting it to memory if needed. This is used by the
+ /// Emitter.
+ void *getOrEmitGlobalVariable(const GlobalVariable *GV);
+
+ /// getPointerToFunctionOrStub - If the specified function has been
+ /// code-gen'd, return a pointer to the function. If not, compile it, or use
+ /// a stub to implement lazy compilation if available.
+ ///
+ void *getPointerToFunctionOrStub(Function *F);
+
+ /// recompileAndRelinkFunction - This method is used to force a function
+ /// which has already been compiled, to be compiled again, possibly
+ /// after it has been modified. Then the entry to the old copy is overwritten
+ /// with a branch to the new copy. If there was no old copy, this acts
+ /// just like JIT::getPointerToFunction().
+ ///
+ void *recompileAndRelinkFunction(Function *F);
+
+ /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+ /// Function.
+ ///
+ void freeMachineCodeForFunction(Function *F);
+
+ /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+ /// function was encountered. Add it to a pending list to be processed after
+ /// the current function.
+ ///
+ void addPendingFunction(Function *F);
+
+ /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+ ///
+ JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM);
+
+ // Run the JIT on F and return information about the generated code
+ void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
+
+ virtual void RegisterJITEventListener(JITEventListener *L);
+ virtual void UnregisterJITEventListener(JITEventListener *L);
+ /// These functions correspond to the methods on JITEventListener. They
+ /// iterate over the registered listeners and call the corresponding method on
+ /// each.
+ void NotifyFunctionEmitted(
+ const Function &F, void *Code, size_t Size,
+ const JITEvent_EmittedFunctionDetails &Details);
+ void NotifyFreeingMachineCode(void *OldPtr);
+
+ BasicBlockAddressMapTy &
+ getBasicBlockAddressMap(const MutexGuard &) {
+ return BasicBlockAddressMap;
+ }
+
+
+private:
+ static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+ TargetMachine &tm);
+ void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
+ void updateFunctionStub(Function *F);
+ void jitTheFunction(Function *F, const MutexGuard &locked);
+
+protected:
+
+ /// getMemoryforGV - Allocate memory for a global variable.
+ virtual char* getMemoryForGV(const GlobalVariable* GV);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
new file mode 100644
index 000000000000..35d2b8b1e9f2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -0,0 +1,596 @@
+//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITDwarfEmitter.h"
+#include "JIT.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
+
+
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& jce,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* &EHFramePtr) {
+ assert(MMI && "MachineModuleInfo not registered!");
+
+ const TargetMachine& TM = F.getTarget();
+ TD = TM.getDataLayout();
+ stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
+ RI = TM.getRegisterInfo();
+ MAI = TM.getMCAsmInfo();
+ JCE = &jce;
+
+ unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
+ EndFunction);
+
+ unsigned char* Result = 0;
+
+ const std::vector<const Function *> Personalities = MMI->getPersonalities();
+ EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
+
+ Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
+ StartFunction, EndFunction, ExceptionTable);
+
+ return Result;
+}
+
+
+void
+JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+ PointerSize : -PointerSize;
+ MCSymbol *BaseLabel = 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ MCSymbol *Label = Move.getLabel();
+
+ // Throw out move if the label is invalid.
+ if (Label && (*JCE->getLabelLocations())[Label] == 0)
+ continue;
+
+ intptr_t LabelPtr = 0;
+ if (Label) LabelPtr = JCE->getLabelAddress(Label);
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabelPtr && Label && BaseLabel != Label) {
+ JCE->emitByte(dwarf::DW_CFA_advance_loc4);
+ JCE->emitInt32(LabelPtr - BaseLabelPtr);
+
+ BaseLabel = Label;
+ BaseLabelPtr = LabelPtr;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
+ }
+
+ JCE->emitULEB128Bytes(-Src.getOffset());
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitSLEB128Bytes(Offset);
+ } else if (Reg < 64) {
+ JCE->emitByte(dwarf::DW_CFA_offset + Reg);
+ JCE->emitULEB128Bytes(Offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitULEB128Bytes(Offset);
+ }
+ }
+ }
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+
+/// PadLT - Order landing pads lexicographically by type id.
+static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+namespace {
+
+/// ActionEntry - Structure describing an entry in the actions table.
+struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+};
+
+/// PadRange - Structure holding a try-range and the associated landing pad.
+struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+};
+
+typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
+
+/// CallSiteEntry - Structure describing an entry in the call-site table.
+struct CallSiteEntry {
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+};
+
+}
+
+unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const {
+ assert(MMI && "MachineModuleInfo not registered!");
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads(JCE->getLabelLocations());
+
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return 0;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type
+ // id itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries
+ // are written using a variable width encoding which outputs one byte per
+ // entry as long as the value written is not too large, but can differ.
+ // This kind of complication does not occur for positive type ids because
+ // type infos are output using a fixed width encoding.
+ // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. Entries must be ordered by address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ bool MayThrow = false;
+ MCSymbol *LastLabel = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ MayThrow |= MI->isCall();
+ continue;
+ }
+
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ assert(BeginLabel && "Invalid label!");
+
+ if (BeginLabel == LastLabel)
+ MayThrow = false;
+
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+ if (L == PadMap.end())
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+ assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+ "Invalid landing pad!");
+
+ // Try to merge with the previous call-site.
+ if (CallSites.size()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ }
+ }
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+ unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+ sizeof(int32_t) + // Site length.
+ sizeof(int32_t)); // Landing pad.
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ // Call-site table length
+ MCAsmInfo::getULEB128Size(SizeSites) +
+ SizeSites + SizeActions + SizeTypes;
+
+ // Begin the exception table.
+ JCE->emitAlignmentWithFill(4, 0);
+ // Asm->EOL("Padding");
+
+ unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
+
+ // Emit the header.
+ JCE->emitByte(dwarf::DW_EH_PE_omit);
+ // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ JCE->emitByte(dwarf::DW_EH_PE_absptr);
+ // Asm->EOL("TType format (DW_EH_PE_absptr)");
+ JCE->emitULEB128Bytes(TypeOffset);
+ // Asm->EOL("TType base offset");
+ JCE->emitByte(dwarf::DW_EH_PE_udata4);
+ // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ JCE->emitULEB128Bytes(SizeSites);
+ // Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+ intptr_t BeginLabelPtr = 0;
+ intptr_t EndLabelPtr = 0;
+
+ if (!S.BeginLabel) {
+ BeginLabelPtr = (intptr_t)StartFunction;
+ JCE->emitInt32(0);
+ } else {
+ BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
+ JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
+ }
+
+ // Asm->EOL("Region start");
+
+ if (!S.EndLabel)
+ EndLabelPtr = (intptr_t)EndFunction;
+ else
+ EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
+
+ JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
+ //Asm->EOL("Region length");
+
+ if (!S.PadLabel) {
+ JCE->emitInt32(0);
+ } else {
+ unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
+ JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
+ }
+ // Asm->EOL("Landing pad");
+
+ JCE->emitULEB128Bytes(S.Action);
+ // Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ JCE->emitSLEB128Bytes(Action.ValueForTypeID);
+ //Asm->EOL("TypeInfo index");
+ JCE->emitSLEB128Bytes(Action.NextAction);
+ //Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ const GlobalVariable *GV = TypeInfos[M - 1];
+
+ if (GV) {
+ if (TD->getPointerSize() == sizeof(int32_t))
+ JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ else
+ JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ } else {
+ if (TD->getPointerSize() == sizeof(int32_t))
+ JCE->emitInt32(0);
+ else
+ JCE->emitInt64(0);
+ }
+ // Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ JCE->emitULEB128Bytes(TypeID);
+ //Asm->EOL("Filter TypeInfo index");
+ }
+
+ JCE->emitAlignmentWithFill(4, 0);
+
+ return DwarfExceptionTable;
+}
+
+unsigned char*
+JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+ PointerSize : -PointerSize;
+
+ unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // EH Common Frame header
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->emitInt32((int)0);
+ JCE->emitByte(dwarf::DW_CIE_VERSION);
+ JCE->emitString(Personality ? "zPLR" : "zR");
+ JCE->emitULEB128Bytes(1);
+ JCE->emitSLEB128Bytes(stackGrowth);
+ JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+ if (Personality) {
+ // Augmentation Size: 3 small ULEBs of one byte each, and the personality
+ // function which size is PointerSize.
+ JCE->emitULEB128Bytes(3 + PointerSize);
+
+ // We set the encoding of the personality as direct encoding because we use
+ // the function pointer. The encoding is not relative because the current
+ // PC value may be bigger than the personality function pointer.
+ if (PointerSize == 4) {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata4);
+ JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ } else {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata8);
+ JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ }
+
+ // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+ if (PointerSize == 4)
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ else
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ } else {
+ JCE->emitULEB128Bytes(1);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ }
+
+ EmitFrameMoves(0, MAI->getInitialFrameState());
+
+ JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+ JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ FrameCommonBeginPtr));
+
+ return StartCommonPtr;
+}
+
+
+unsigned char*
+JITDwarfEmitter::EmitEHFrame(const Function* Personality,
+ unsigned char* StartCommonPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const {
+ unsigned PointerSize = TD->getPointerSize();
+
+ // EH frame header.
+ unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // FDE CIE Offset
+ JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
+ JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
+ JCE->emitInt32(EndFunction - StartFunction);
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (Personality) {
+ JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
+
+ if (PointerSize == 4) {
+ if (!MMI->getLandingPads().empty())
+ JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+ else
+ JCE->emitInt32((int)0);
+ } else {
+ if (!MMI->getLandingPads().empty())
+ JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+ else
+ JCE->emitInt64((int)0);
+ }
+ } else {
+ JCE->emitULEB128Bytes(0);
+ }
+
+ // Indicate locations of function specific callee saved registers in
+ // frame.
+ EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
+
+ JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+ // Indicate the size of the table
+ JCE->emitInt32At((uintptr_t*)StartEHPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ StartEHPtr));
+
+ // Double zeroes for the unwind runtime
+ if (PointerSize == 8) {
+ JCE->emitInt64(0);
+ JCE->emitInt64(0);
+ } else {
+ JCE->emitInt32(0);
+ JCE->emitInt32(0);
+ }
+
+ return StartEHPtr;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
new file mode 100644
index 000000000000..98ac34049176
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -0,0 +1,77 @@
+//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class Function;
+class JIT;
+class JITCodeEmitter;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineMove;
+class MCAsmInfo;
+class DataLayout;
+class TargetMachine;
+class TargetRegisterInfo;
+
+class JITDwarfEmitter {
+ const DataLayout* TD;
+ JITCodeEmitter* JCE;
+ const TargetRegisterInfo* RI;
+ const MCAsmInfo *MAI;
+ MachineModuleInfo* MMI;
+ JIT& Jit;
+ bool stackGrowthDirection;
+
+ unsigned char* EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const;
+
+ void EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const;
+
+ unsigned char* EmitCommonEHFrame(const Function* Personality) const;
+
+ unsigned char* EmitEHFrame(const Function* Personality,
+ unsigned char* StartBufferPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const;
+
+public:
+
+ JITDwarfEmitter(JIT& jit);
+
+ unsigned char* EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& JCE,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* &EHFramePtr);
+
+
+ void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ }
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 000000000000..c27387699ab6
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,1301 @@
+//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by the JIT to
+// write machine code to memory and remember where relocatable values are.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumBytes, "Number of bytes of machine code compiled");
+STATISTIC(NumRelos, "Number of relocations applied");
+STATISTIC(NumRetries, "Number of retries with more memory");
+
+
+// A declaration may stop being a declaration once it's fully read from bitcode.
+// This function returns true if F is fully read and is still a declaration.
+static bool isNonGhostDeclaration(const Function *F) {
+ return F->isDeclaration() && !F->isMaterializable();
+}
+
+//===----------------------------------------------------------------------===//
+// JIT lazy compilation code.
+//
+namespace {
+ class JITEmitter;
+ class JITResolverState;
+
+ template<typename ValueTy>
+ struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
+ typedef JITResolverState *ExtraData;
+ static void onRAUW(JITResolverState *, Value *Old, Value *New) {
+ llvm_unreachable("The JIT doesn't know how to handle a"
+ " RAUW on a value it has emitted.");
+ }
+ };
+
+ struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
+ typedef JITResolverState *ExtraData;
+ static void onDelete(JITResolverState *JRS, Function *F);
+ };
+
+ class JITResolverState {
+ public:
+ typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
+ FunctionToLazyStubMapTy;
+ typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
+ typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
+ CallSiteValueMapConfig> FunctionToCallSitesMapTy;
+ typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
+ private:
+ /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
+ /// particular function so that we can reuse them if necessary.
+ FunctionToLazyStubMapTy FunctionToLazyStubMap;
+
+ /// CallSiteToFunctionMap - Keep track of the function that each lazy call
+ /// site corresponds to, and vice versa.
+ CallSiteToFunctionMapTy CallSiteToFunctionMap;
+ FunctionToCallSitesMapTy FunctionToCallSitesMap;
+
+ /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
+ /// particular GlobalVariable so that we can reuse them if necessary.
+ GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+
+#ifndef NDEBUG
+ /// Instance of the JIT this ResolverState serves.
+ JIT *TheJIT;
+#endif
+
+ public:
+ JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+ FunctionToCallSitesMap(this) {
+#ifndef NDEBUG
+ TheJIT = jit;
+#endif
+ }
+
+ FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
+ const MutexGuard& locked) {
+ assert(locked.holds(TheJIT->lock));
+ return FunctionToLazyStubMap;
+ }
+
+ GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
+ assert(lck.holds(TheJIT->lock));
+ return GlobalToIndirectSymMap;
+ }
+
+ std::pair<void *, Function *> LookupFunctionFromCallSite(
+ const MutexGuard &locked, void *CallSite) const {
+ assert(locked.holds(TheJIT->lock));
+
+ // The address given to us for the stub may not be exactly right, it
+ // might be a little bit after the stub. As such, use upper_bound to
+ // find it.
+ CallSiteToFunctionMapTy::const_iterator I =
+ CallSiteToFunctionMap.upper_bound(CallSite);
+ assert(I != CallSiteToFunctionMap.begin() &&
+ "This is not a known call site!");
+ --I;
+ return *I;
+ }
+
+ void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
+ assert(locked.holds(TheJIT->lock));
+
+ bool Inserted = CallSiteToFunctionMap.insert(
+ std::make_pair(CallSite, F)).second;
+ (void)Inserted;
+ assert(Inserted && "Pair was already in CallSiteToFunctionMap");
+ FunctionToCallSitesMap[F].insert(CallSite);
+ }
+
+ void EraseAllCallSitesForPrelocked(Function *F);
+
+ // Erases _all_ call sites regardless of their function. This is used to
+ // unregister the stub addresses from the StubToResolverMap in
+ // ~JITResolver().
+ void EraseAllCallSitesPrelocked();
+ };
+
+ /// JITResolver - Keep track of, and resolve, call sites for functions that
+ /// have not yet been compiled.
+ class JITResolver {
+ typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
+ typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
+ typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
+
+ /// LazyResolverFn - The target lazy resolver function that we actually
+ /// rewrite instructions to use.
+ TargetJITInfo::LazyResolverFn LazyResolverFn;
+
+ JITResolverState state;
+
+ /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
+ /// for external functions. TODO: Of course, external functions don't need
+ /// a lazy stub. It's actually here to make it more likely that far calls
+ /// succeed, but no single stub can guarantee that. I'll remove this in a
+ /// subsequent checkin when I actually fix far calls.
+ std::map<void*, void*> ExternalFnToStubMap;
+
+ /// revGOTMap - map addresses to indexes in the GOT
+ std::map<void*, unsigned> revGOTMap;
+ unsigned nextGOTIndex;
+
+ JITEmitter &JE;
+
+ /// Instance of JIT corresponding to this Resolver.
+ JIT *TheJIT;
+
+ public:
+ explicit JITResolver(JIT &jit, JITEmitter &je)
+ : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
+ LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
+ }
+
+ ~JITResolver();
+
+ /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
+ /// lazy-compilation stub if it has already been created.
+ void *getLazyFunctionStubIfAvailable(Function *F);
+
+ /// getLazyFunctionStub - This returns a pointer to a function's
+ /// lazy-compilation stub, creating one on demand as needed.
+ void *getLazyFunctionStub(Function *F);
+
+ /// getExternalFunctionStub - Return a stub for the function at the
+ /// specified address, created lazily on demand.
+ void *getExternalFunctionStub(void *FnAddr);
+
+ /// getGlobalValueIndirectSym - Return an indirect symbol containing the
+ /// specified GV address.
+ void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
+
+ /// getGOTIndexForAddress - Return a new or existing index in the GOT for
+ /// an address. This function only manages slots, it does not manage the
+ /// contents of the slots or the memory associated with the GOT.
+ unsigned getGOTIndexForAddr(void *addr);
+
+ /// JITCompilerFn - This function is called to resolve a stub to a compiled
+ /// address. If the LLVM Function corresponding to the stub has not yet
+ /// been compiled, this function compiles it first.
+ static void *JITCompilerFn(void *Stub);
+ };
+
+ class StubToResolverMapTy {
+ /// Map a stub address to a specific instance of a JITResolver so that
+ /// lazily-compiled functions can find the right resolver to use.
+ ///
+ /// Guarded by Lock.
+ std::map<void*, JITResolver*> Map;
+
+ /// Guards Map from concurrent accesses.
+ mutable sys::Mutex Lock;
+
+ public:
+ /// Registers a Stub to be resolved by Resolver.
+ void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+ MutexGuard guard(Lock);
+ Map.insert(std::make_pair(Stub, Resolver));
+ }
+ /// Unregisters the Stub when it's invalidated.
+ void UnregisterStubResolver(void *Stub) {
+ MutexGuard guard(Lock);
+ Map.erase(Stub);
+ }
+ /// Returns the JITResolver instance that owns the Stub.
+ JITResolver *getResolverFromStub(void *Stub) const {
+ MutexGuard guard(Lock);
+ // The address given to us for the stub may not be exactly right, it might
+ // be a little bit after the stub. As such, use upper_bound to find it.
+ // This is the same trick as in LookupFunctionFromCallSite from
+ // JITResolverState.
+ std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+ assert(I != Map.begin() && "This is not a known stub!");
+ --I;
+ return I->second;
+ }
+ /// True if any stubs refer to the given resolver. Only used in an assert().
+ /// O(N)
+ bool ResolverHasStubs(JITResolver* Resolver) const {
+ MutexGuard guard(Lock);
+ for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+ if (I->second == Resolver)
+ return true;
+ }
+ return false;
+ }
+ };
+ /// This needs to be static so that a lazy call stub can access it with no
+ /// context except the address of the stub.
+ ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
+ /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+ /// used to output functions to memory for execution.
+ class JITEmitter : public JITCodeEmitter {
+ JITMemoryManager *MemMgr;
+
+ // When outputting a function stub in the context of some other function, we
+ // save BufferBegin/BufferEnd/CurBufferPtr here.
+ uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+ // When reattempting to JIT a function after running out of space, we store
+ // the estimated size of the function we're trying to JIT here, so we can
+ // ask the memory manager for at least this much space. When we
+ // successfully emit the function, we reset this back to zero.
+ uintptr_t SizeEstimate;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ /// ConstantPool - The constant pool for the current function.
+ ///
+ MachineConstantPool *ConstantPool;
+
+ /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+ ///
+ void *ConstantPoolBase;
+
+ /// ConstPoolAddresses - Addresses of individual constant pool entries.
+ ///
+ SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+ /// JumpTable - The jump tables for the current function.
+ ///
+ MachineJumpTableInfo *JumpTable;
+
+ /// JumpTableBase - A pointer to the first entry in the jump table.
+ ///
+ void *JumpTableBase;
+
+ /// Resolver - This contains info about the currently resolved functions.
+ JITResolver Resolver;
+
+ /// DE - The dwarf emitter for the jit.
+ OwningPtr<JITDwarfEmitter> DE;
+
+ /// LabelLocations - This vector is a mapping from Label ID's to their
+ /// address.
+ DenseMap<MCSymbol*, uintptr_t> LabelLocations;
+
+ /// MMI - Machine module info for exception informations
+ MachineModuleInfo* MMI;
+
+ // CurFn - The llvm function being emitted. Only valid during
+ // finishFunction().
+ const Function *CurFn;
+
+ /// Information about emitted code, which is passed to the
+ /// JITEventListeners. This is reset in startFunction and used in
+ /// finishFunction.
+ JITEvent_EmittedFunctionDetails EmissionDetails;
+
+ struct EmittedCode {
+ void *FunctionBody; // Beginning of the function's allocation.
+ void *Code; // The address the function's code actually starts at.
+ void *ExceptionTable;
+ EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+ };
+ struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+ typedef JITEmitter *ExtraData;
+ static void onDelete(JITEmitter *, const Function*);
+ static void onRAUW(JITEmitter *, const Function*, const Function*);
+ };
+ ValueMap<const Function *, EmittedCode,
+ EmittedFunctionConfig> EmittedFunctions;
+
+ DebugLoc PrevDL;
+
+ /// Instance of the JIT
+ JIT *TheJIT;
+
+ bool JITExceptionHandling;
+
+ public:
+ JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+ : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+ EmittedFunctions(this), TheJIT(&jit),
+ JITExceptionHandling(TM.Options.JITExceptionHandling) {
+ MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+ if (jit.getJITInfo().needsGOT()) {
+ MemMgr->AllocateGOT();
+ DEBUG(dbgs() << "JIT is managing a GOT\n");
+ }
+
+ if (JITExceptionHandling) {
+ DE.reset(new JITDwarfEmitter(jit));
+ }
+ }
+ ~JITEmitter() {
+ delete MemMgr;
+ }
+
+ JITResolver &getJITResolver() { return Resolver; }
+
+ virtual void startFunction(MachineFunction &F);
+ virtual bool finishFunction(MachineFunction &F);
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+ void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+ void startGVStub(const GlobalValue* GV,
+ unsigned StubSize, unsigned Alignment = 1);
+ void startGVStub(void *Buffer, unsigned StubSize);
+ void finishGVStub();
+ virtual void *allocIndirectGV(const GlobalValue *GV,
+ const uint8_t *Buffer, size_t Size,
+ unsigned Alignment);
+
+ /// allocateSpace - Reserves space in the current block if any, or
+ /// allocate a new one of the given size.
+ virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+ /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace,
+ /// this method does not allocate memory in the current output buffer,
+ /// because a global may live longer than the current function.
+ virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+ if (MBB->hasAddressTaken())
+ TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
+ (void*)getCurrentPCValue());
+ DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+ << (void*) getCurrentPCValue() << "]\n");
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+ /// given function. Increase the minimum allocation size so that we get
+ /// more memory next time.
+ void retryWithMoreMemory(MachineFunction &F);
+
+ /// deallocateMemForFunction - Deallocate all memory for the specified
+ /// function body.
+ void deallocateMemForFunction(const Function *F);
+
+ virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+ virtual void emitLabel(MCSymbol *Label) {
+ LabelLocations[Label] = getCurrentPCValue();
+ }
+
+ virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+ return &LabelLocations;
+ }
+
+ virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+ assert(LabelLocations.count(Label) && "Label not emitted!");
+ return LabelLocations.find(Label)->second;
+ }
+
+ virtual void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ if (DE.get()) DE->setModuleInfo(Info);
+ }
+
+ private:
+ void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+ bool MayNeedFarStub);
+ void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+ };
+}
+
+void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
+ JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+ FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+ if (F2C == FunctionToCallSitesMap.end())
+ return;
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+ E = F2C->second.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(*I);
+ bool Erased = CallSiteToFunctionMap.erase(*I);
+ (void)Erased;
+ assert(Erased && "Missing call site->function mapping");
+ }
+ FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (CallSiteToFunctionMapTy::const_iterator
+ I = CallSiteToFunctionMap.begin(),
+ E = CallSiteToFunctionMap.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(I->first);
+ }
+ CallSiteToFunctionMap.clear();
+ FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+ // No need to lock because we're in the destructor, and state isn't shared.
+ state.EraseAllCallSitesPrelocked();
+ assert(!StubToResolverMap->ResolverHasStubs(this) &&
+ "Resolver destroyed with stubs still alive.");
+}
+
+/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
+/// if it has already been created.
+void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this function, recycle it.
+ return state.getFunctionToLazyStubMap(locked).lookup(F);
+}
+
+/// getFunctionStub - This returns a pointer to a function stub, creating
+/// one on demand as needed.
+void *JITResolver::getLazyFunctionStub(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a lazy stub for this function, recycle it.
+ void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+ if (Stub) return Stub;
+
+ // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we
+ // must resolve the symbol now.
+ void *Actual = TheJIT->isCompilingLazily()
+ ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+
+ // If this is an external declaration, attempt to resolve the address now
+ // to place in the stub.
+ if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) {
+ Actual = TheJIT->getPointerToFunction(F);
+
+ // If we resolved the symbol to a null address (eg. a weak external)
+ // don't emit a stub. Return a null pointer to the application.
+ if (!Actual) return 0;
+ }
+
+ TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+ JE.startGVStub(F, SL.Size, SL.Alignment);
+ // Codegen a new stub, calling the lazy resolver or the actual address of the
+ // external function, if it was resolved.
+ Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
+ JE.finishGVStub();
+
+ if (Actual != (void*)(intptr_t)LazyResolverFn) {
+ // If we are getting the stub for an external function, we really want the
+ // address of the stub in the GlobalAddressMap for the JIT, not the address
+ // of the external function.
+ TheJIT->updateGlobalMapping(F, Stub);
+ }
+
+ DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
+ << F->getName() << "'\n");
+
+ if (TheJIT->isCompilingLazily()) {
+ // Register this JITResolver as the one corresponding to this call site so
+ // JITCompilerFn will be able to find it.
+ StubToResolverMap->RegisterStubResolver(Stub, this);
+
+ // Finally, keep track of the stub-to-Function mapping so that the
+ // JITCompilerFn knows which function to compile!
+ state.AddCallSite(locked, Stub, F);
+ } else if (!Actual) {
+ // If we are JIT'ing non-lazily but need to call a function that does not
+ // exist yet, add it to the JIT's work list so that we can fill in the
+ // stub address later.
+ assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+ "'Actual' should have been set above.");
+ TheJIT->addPendingFunction(F);
+ }
+
+ return Stub;
+}
+
+/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
+/// GV address.
+void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this global variable, recycle it.
+ void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+ if (IndirectSym) return IndirectSym;
+
+ // Otherwise, codegen a new indirect symbol.
+ IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
+ JE);
+
+ DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+ << "] for GV '" << GV->getName() << "'\n");
+
+ return IndirectSym;
+}
+
+/// getExternalFunctionStub - Return a stub for the function at the
+/// specified address, created lazily on demand.
+void *JITResolver::getExternalFunctionStub(void *FnAddr) {
+ // If we already have a stub for this function, recycle it.
+ void *&Stub = ExternalFnToStubMap[FnAddr];
+ if (Stub) return Stub;
+
+ TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+ JE.startGVStub(0, SL.Size, SL.Alignment);
+ Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+ JE.finishGVStub();
+
+ DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
+ << "] for external function at '" << FnAddr << "'\n");
+ return Stub;
+}
+
+unsigned JITResolver::getGOTIndexForAddr(void* addr) {
+ unsigned idx = revGOTMap[addr];
+ if (!idx) {
+ idx = ++nextGOTIndex;
+ revGOTMap[addr] = idx;
+ DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
+ << addr << "]\n");
+ }
+ return idx;
+}
+
+/// JITCompilerFn - This function is called when a lazy compilation stub has
+/// been entered. It looks up which function this stub corresponds to, compiles
+/// it if necessary, then returns the resultant function pointer.
+void *JITResolver::JITCompilerFn(void *Stub) {
+ JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+ assert(JR && "Unable to find the corresponding JITResolver to the call site");
+
+ Function* F = 0;
+ void* ActualPtr = 0;
+
+ {
+ // Only lock for getting the Function. The call getPointerToFunction made
+ // in this function might trigger function materializing, which requires
+ // JIT lock to be unlocked.
+ MutexGuard locked(JR->TheJIT->lock);
+
+ // The address given to us for the stub may not be exactly right, it might
+ // be a little bit after the stub. As such, use upper_bound to find it.
+ std::pair<void*, Function*> I =
+ JR->state.LookupFunctionFromCallSite(locked, Stub);
+ F = I.second;
+ ActualPtr = I.first;
+ }
+
+ // If we have already code generated the function, just return the address.
+ void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
+
+ if (!Result) {
+ // Otherwise we don't have it, do lazy compilation now.
+
+ // If lazy compilation is disabled, emit a useful error message and abort.
+ if (!JR->TheJIT->isCompilingLazily()) {
+ report_fatal_error("LLVM JIT requested to do lazy compilation of"
+ " function '"
+ + F->getName() + "' when lazy compiles are disabled!");
+ }
+
+ DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
+ << "' In stub ptr = " << Stub << " actual ptr = "
+ << ActualPtr << "\n");
+ (void)ActualPtr;
+
+ Result = JR->TheJIT->getPointerToFunction(F);
+ }
+
+ // Reacquire the lock to update the GOT map.
+ MutexGuard locked(JR->TheJIT->lock);
+
+ // We might like to remove the call site from the CallSiteToFunction map, but
+ // we can't do that! Multiple threads could be stuck, waiting to acquire the
+ // lock above. As soon as the 1st function finishes compiling the function,
+ // the next one will be released, and needs to be able to find the function it
+ // needs to call.
+
+ // FIXME: We could rewrite all references to this stub if we knew them.
+
+ // What we will do is set the compiled function address to map to the
+ // same GOT entry as the stub so that later clients may update the GOT
+ // if they see it still using the stub address.
+ // Note: this is done so the Resolver doesn't have to manage GOT memory
+ // Do this without allocating map space if the target isn't using a GOT
+ if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+ JR->revGOTMap[Result] = JR->revGOTMap[Stub];
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// JITEmitter code.
+//
+void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
+ bool MayNeedFarStub) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return TheJIT->getOrEmitGlobalVariable(GV);
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false));
+
+ // If we have already compiled the function, return a pointer to its body.
+ Function *F = cast<Function>(V);
+
+ void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
+ if (FnStub) {
+ // Return the function stub if it's already created. We do this first so
+ // that we're returning the same address for the function as any previous
+ // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
+ // close enough to call.
+ return FnStub;
+ }
+
+ // If we know the target can handle arbitrary-distance calls, try to
+ // return a direct pointer.
+ if (!MayNeedFarStub) {
+ // If we have code, go ahead and return that.
+ void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+ if (ResultPtr) return ResultPtr;
+
+ // If this is an external function pointer, we can force the JIT to
+ // 'compile' it, which really just adds it to the map.
+ if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage())
+ return TheJIT->getPointerToFunction(F);
+ }
+
+ // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+ // so. Note that it's possible to return null from getLazyFunctionStub in the
+ // case of a weak extern that fails to resolve.
+ return Resolver.getLazyFunctionStub(F);
+}
+
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
+ // Make sure GV is emitted first, and create a stub containing the fully
+ // resolved address.
+ void *GVAddress = getPointerToGlobal(V, Reference, false);
+ void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
+ return StubAddr;
+}
+
+void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
+ if (DL.isUnknown()) return;
+ if (!BeforePrintingInsn) return;
+
+ const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
+
+ if (DL.getScope(Context) != 0 && PrevDL != DL) {
+ JITEvent_EmittedFunctionDetails::LineStart NextLine;
+ NextLine.Address = getCurrentPCValue();
+ NextLine.Loc = DL;
+ EmissionDetails.LineStarts.push_back(NextLine);
+ }
+
+ PrevDL = DL;
+}
+
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+ const DataLayout *TD) {
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return 0;
+
+ unsigned Size = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Size = (Size + AlignMask) & ~AlignMask;
+ Type *Ty = CPE.getType();
+ Size += TD->getTypeAllocSize(Ty);
+ }
+ return Size;
+}
+
+void JITEmitter::startFunction(MachineFunction &F) {
+ DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
+ << F.getName() << "\n");
+
+ uintptr_t ActualSize = 0;
+ // Set the memory writable, if it's not already
+ MemMgr->setMemoryWritable();
+
+ if (SizeEstimate > 0) {
+ // SizeEstimate will be non-zero on reallocation attempts.
+ ActualSize = SizeEstimate;
+ }
+
+ BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+ EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin;
+
+ // Ensure the constant pool/jump table info is at least 4-byte aligned.
+ emitAlignment(16);
+
+ emitConstantPool(F.getConstantPool());
+ if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+ initJumpTableInfo(MJTI);
+
+ // About to start emitting the machine code for the function.
+ emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
+ TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+ EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
+
+ MBBLocations.clear();
+
+ EmissionDetails.MF = &F;
+ EmissionDetails.LineStarts.clear();
+}
+
+bool JITEmitter::finishFunction(MachineFunction &F) {
+ if (CurBufferPtr == BufferEnd) {
+ // We must call endFunctionBody before retrying, because
+ // deallocateMemForFunction requires it.
+ MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+ retryWithMoreMemory(F);
+ return true;
+ }
+
+ if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+ emitJumpTableInfo(MJTI);
+
+ // FnStart is the start of the text, not the start of the constant pool and
+ // other per-function data.
+ uint8_t *FnStart =
+ (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+
+ // FnEnd is the end of the function's machine code.
+ uint8_t *FnEnd = CurBufferPtr;
+
+ if (!Relocations.empty()) {
+ CurFn = F.getFunction();
+ NumRelos += Relocations.size();
+
+ // Resolve the relocations to concrete pointers.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ void *ResultPtr = 0;
+ if (!MR.letTargetResolve()) {
+ if (MR.isExternalSymbol()) {
+ ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
+ false);
+ DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+ << ResultPtr << "]\n");
+
+ // If the target REALLY wants a stub for this function, emit it now.
+ if (MR.mayNeedFarStub()) {
+ ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
+ }
+ } else if (MR.isGlobalValue()) {
+ ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
+ BufferBegin+MR.getMachineCodeOffset(),
+ MR.mayNeedFarStub());
+ } else if (MR.isIndirectSymbol()) {
+ ResultPtr = getPointerToGVIndirectSym(
+ MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
+ } else if (MR.isBasicBlock()) {
+ ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
+ } else if (MR.isConstantPoolIndex()) {
+ ResultPtr =
+ (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ } else {
+ assert(MR.isJumpTableIndex());
+ ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
+ }
+
+ MR.setResultPointer(ResultPtr);
+ }
+
+ // if we are managing the GOT and the relocation wants an index,
+ // give it one
+ if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
+ MR.setGOTIndex(idx);
+ if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
+ DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+ << "\n");
+ ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
+ }
+ }
+ }
+
+ CurFn = 0;
+ TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
+ Relocations.size(), MemMgr->getGOTBase());
+ }
+
+ // Update the GOT entry for F to point to the new code.
+ if (MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
+ if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
+ DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+ << "\n");
+ ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
+ }
+ }
+
+ // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
+ // global variables that were referenced in the relocations.
+ MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+
+ if (CurBufferPtr == BufferEnd) {
+ retryWithMoreMemory(F);
+ return true;
+ } else {
+ // Now that we've succeeded in emitting the function, reset the
+ // SizeEstimate back down to zero.
+ SizeEstimate = 0;
+ }
+
+ BufferBegin = CurBufferPtr = 0;
+ NumBytes += FnEnd-FnStart;
+
+ // Invalidate the icache if necessary.
+ sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
+
+ TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+ EmissionDetails);
+
+ // Reset the previous debug location.
+ PrevDL = DebugLoc();
+
+ DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+ << "] Function: " << F.getName()
+ << ": " << (FnEnd-FnStart) << " bytes of text, "
+ << Relocations.size() << " relocations\n");
+
+ Relocations.clear();
+ ConstPoolAddresses.clear();
+
+ // Mark code region readable and executable if it's not so already.
+ MemMgr->setMemoryExecutable();
+
+ DEBUG({
+ if (sys::hasDisassembler()) {
+ dbgs() << "JIT: Disassembled code:\n";
+ dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+ (uintptr_t)FnStart);
+ } else {
+ dbgs() << "JIT: Binary code:\n";
+ uint8_t* q = FnStart;
+ for (int i = 0; q < FnEnd; q += 4, ++i) {
+ if (i == 4)
+ i = 0;
+ if (i == 0)
+ dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+ bool Done = false;
+ for (int j = 3; j >= 0; --j) {
+ if (q + j >= FnEnd)
+ Done = true;
+ else
+ dbgs() << (unsigned short)q[j];
+ }
+ if (Done)
+ break;
+ dbgs() << ' ';
+ if (i == 3)
+ dbgs() << '\n';
+ }
+ dbgs()<< '\n';
+ }
+ });
+
+ if (JITExceptionHandling) {
+ uintptr_t ActualSize = 0;
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+ uint8_t *FrameRegister;
+
+ while (true) {
+ BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+ EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
+ uint8_t *EhStart;
+ FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart);
+
+ // If the buffer was large enough to hold the table then we are done.
+ if (CurBufferPtr != BufferEnd)
+ break;
+
+ // Try again with twice as much space.
+ ActualSize = (CurBufferPtr - BufferBegin) * 2;
+ MemMgr->deallocateExceptionTable(BufferBegin);
+ }
+ MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
+ FrameRegister);
+ BufferBegin = SavedBufferBegin;
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+
+ if (JITExceptionHandling) {
+ TheJIT->RegisterTable(F.getFunction(), FrameRegister);
+ }
+ }
+
+ if (MMI)
+ MMI->EndFunction();
+
+ return false;
+}
+
+void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
+ DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n");
+ Relocations.clear(); // Clear the old relocations or we'll reapply them.
+ ConstPoolAddresses.clear();
+ ++NumRetries;
+ deallocateMemForFunction(F.getFunction());
+ // Try again with at least twice as much free space.
+ SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+
+ for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
+ if (MBB->hasAddressTaken())
+ TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
+ }
+}
+
+/// deallocateMemForFunction - Deallocate all memory for the specified
+/// function body. Also drop any references the function has to stubs.
+/// May be called while the Function is being destroyed inside ~Value().
+void JITEmitter::deallocateMemForFunction(const Function *F) {
+ ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
+ Emitted = EmittedFunctions.find(F);
+ if (Emitted != EmittedFunctions.end()) {
+ MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
+ MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
+ TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
+
+ EmittedFunctions.erase(Emitted);
+ }
+
+ if (JITExceptionHandling) {
+ TheJIT->DeregisterTable(F);
+ }
+}
+
+
+void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+ if (BufferBegin)
+ return JITCodeEmitter::allocateSpace(Size, Alignment);
+
+ // create a new memory block if there is no active one.
+ // care must be taken so that BufferBegin is invalidated when a
+ // block is trimmed
+ BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
+ BufferEnd = BufferBegin+Size;
+ return CurBufferPtr;
+}
+
+void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+ // Delegate this call through the memory manager.
+ return MemMgr->allocateGlobal(Size, Alignment);
+}
+
+void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ if (TheJIT->getJITInfo().hasCustomConstantPool())
+ return;
+
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return;
+
+ unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout());
+ unsigned Align = MCP->getConstantPoolAlignment();
+ ConstantPoolBase = allocateSpace(Size, Align);
+ ConstantPool = MCP;
+
+ if (ConstantPoolBase == 0) return; // Buffer overflow.
+
+ DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+ << "] (size: " << Size << ", alignment: " << Align << ")\n");
+
+ // Initialize the memory for all of the constant pool entries.
+ unsigned Offset = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Offset = (Offset + AlignMask) & ~AlignMask;
+
+ uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+ ConstPoolAddresses.push_back(CAddr);
+ if (CPE.isMachineConstantPoolEntry()) {
+ // FIXME: add support to lower machine constant pool values into bytes!
+ report_fatal_error("Initialize memory with machine specific constant pool"
+ "entry has not been implemented!");
+ }
+ TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+ DEBUG(dbgs() << "JIT: CP" << i << " at [0x";
+ dbgs().write_hex(CAddr) << "]\n");
+
+ Type *Ty = CPE.Val.ConstVal->getType();
+ Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty);
+ }
+}
+
+void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ unsigned NumEntries = 0;
+ for (unsigned i = 0, e = JT.size(); i != e; ++i)
+ NumEntries += JT[i].MBBs.size();
+
+ unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout());
+
+ // Just allocate space for all the jump tables now. We will fix up the actual
+ // MBB entries in the tables after we emit the code for each block, since then
+ // we will know the final locations of the MBBs in memory.
+ JumpTable = MJTI;
+ JumpTableBase = allocateSpace(NumEntries * EntrySize,
+ MJTI->getEntryAlignment(*TheJIT->getDataLayout()));
+}
+
+void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty() || JumpTableBase == 0) return;
+
+
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ return;
+ case MachineJumpTableInfo::EK_BlockAddress: {
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) &&
+ "Cross JIT'ing?");
+
+ // For each jump table, map each target in the jump table to the address of
+ // an emitted MachineBasicBlock.
+ intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the address of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+ *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+ }
+ break;
+ }
+
+ case MachineJumpTableInfo::EK_Custom32:
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?");
+ // For each jump table, place the offset from the beginning of the table
+ // to the target address.
+ int *SlotPtr = (int*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the offset of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ uintptr_t Base = (uintptr_t)SlotPtr;
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+ /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
+ *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
+ }
+ }
+ break;
+ }
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ llvm_unreachable(
+ "JT Info emission not implemented for GPRel64BlockAddress yet.");
+ }
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV,
+ unsigned StubSize, unsigned Alignment) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::finishGVStub() {
+ assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
+ NumBytes += getCurrentPCOffset();
+ BufferBegin = SavedBufferBegin;
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+}
+
+void *JITEmitter::allocIndirectGV(const GlobalValue *GV,
+ const uint8_t *Buffer, size_t Size,
+ unsigned Alignment) {
+ uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment);
+ memcpy(IndGV, Buffer, Size);
+ return IndGV;
+}
+
+// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
+// in the constant pool that was last emitted with the 'emitConstantPool'
+// method.
+//
+uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
+ assert(ConstantNum < ConstantPool->getConstants().size() &&
+ "Invalid ConstantPoolIndex!");
+ return ConstPoolAddresses[ConstantNum];
+}
+
+// getJumpTableEntryAddress - Return the address of the JumpTable with index
+// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
+//
+uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
+ const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
+ assert(Index < JT.size() && "Invalid jump table index!");
+
+ unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout());
+
+ unsigned Offset = 0;
+ for (unsigned i = 0; i < Index; ++i)
+ Offset += JT[i].MBBs.size();
+
+ Offset *= EntrySize;
+
+ return (uintptr_t)((char *)JumpTableBase + Offset);
+}
+
+void JITEmitter::EmittedFunctionConfig::onDelete(
+ JITEmitter *Emitter, const Function *F) {
+ Emitter->deallocateMemForFunction(F);
+}
+void JITEmitter::EmittedFunctionConfig::onRAUW(
+ JITEmitter *, const Function*, const Function*) {
+ llvm_unreachable("The JIT doesn't know how to handle a"
+ " RAUW on a value it has emitted.");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public interface to this file
+//===----------------------------------------------------------------------===//
+
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
+ TargetMachine &tm) {
+ return new JITEmitter(jit, JMM, tm);
+}
+
+// getPointerToFunctionOrStub - If the specified function has been
+// code-gen'd, return a pointer to the function. If not, compile it, or use
+// a stub to implement lazy compilation if available.
+//
+void *JIT::getPointerToFunctionOrStub(Function *F) {
+ // If we have already code generated the function, just return the address.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+
+ // Get a stub if the target supports it.
+ JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
+ return JE->getJITResolver().getLazyFunctionStub(F);
+}
+
+void JIT::updateFunctionStub(Function *F) {
+ // Get the empty stub we generated earlier.
+ JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
+ void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr != Stub && "Function must have non-stub address to be updated.");
+
+ // Tell the target jit info to rewrite the stub at the specified address,
+ // rather than creating a new one.
+ TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
+ JE->startGVStub(Stub, layout.Size);
+ getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
+ JE->finishGVStub();
+}
+
+/// freeMachineCodeForFunction - release machine code memory for given Function.
+///
+void JIT::freeMachineCodeForFunction(Function *F) {
+ // Delete translation for this from the ExecutionEngine, so it will get
+ // retranslated next time it is used.
+ updateGlobalMapping(F, 0);
+
+ // Free the actual memory for the function body and related stuff.
+ static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F);
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
new file mode 100644
index 000000000000..66aeb772ddc3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -0,0 +1,932 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <climits>
+#include <cstring>
+#include <vector>
+
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+using namespace llvm;
+
+STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
+
+JITMemoryManager::~JITMemoryManager() {}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// MemoryRangeHeader - For a range of memory, this is the header that we put
+ /// on the block of memory. It is carefully crafted to be one word of memory.
+ /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
+ /// which starts with this.
+ struct FreeRangeHeader;
+ struct MemoryRangeHeader {
+ /// ThisAllocated - This is true if this block is currently allocated. If
+ /// not, this can be converted to a FreeRangeHeader.
+ unsigned ThisAllocated : 1;
+
+ /// PrevAllocated - Keep track of whether the block immediately before us is
+ /// allocated. If not, the word immediately before this header is the size
+ /// of the previous block.
+ unsigned PrevAllocated : 1;
+
+ /// BlockSize - This is the size in bytes of this memory block,
+ /// including this header.
+ uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
+
+
+ /// getBlockAfter - Return the memory block immediately after this one.
+ ///
+ MemoryRangeHeader &getBlockAfter() const {
+ return *reinterpret_cast<MemoryRangeHeader *>(
+ reinterpret_cast<char*>(
+ const_cast<MemoryRangeHeader *>(this))+BlockSize);
+ }
+
+ /// getFreeBlockBefore - If the block before this one is free, return it,
+ /// otherwise return null.
+ FreeRangeHeader *getFreeBlockBefore() const {
+ if (PrevAllocated) return 0;
+ intptr_t PrevSize = reinterpret_cast<intptr_t *>(
+ const_cast<MemoryRangeHeader *>(this))[-1];
+ return reinterpret_cast<FreeRangeHeader *>(
+ reinterpret_cast<char*>(
+ const_cast<MemoryRangeHeader *>(this))-PrevSize);
+ }
+
+ /// FreeBlock - Turn an allocated block into a free block, adjusting
+ /// bits in the object headers, and adding an end of region memory block.
+ FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
+
+ /// TrimAllocationToSize - If this allocated block is significantly larger
+ /// than NewSize, split it into two pieces (where the former is NewSize
+ /// bytes, including the header), and add the new block to the free list.
+ FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+ uint64_t NewSize);
+ };
+
+ /// FreeRangeHeader - For a memory block that isn't already allocated, this
+ /// keeps track of the current block and has a pointer to the next free block.
+ /// Free blocks are kept on a circularly linked list.
+ struct FreeRangeHeader : public MemoryRangeHeader {
+ FreeRangeHeader *Prev;
+ FreeRangeHeader *Next;
+
+ /// getMinBlockSize - Get the minimum size for a memory block. Blocks
+ /// smaller than this size cannot be created.
+ static unsigned getMinBlockSize() {
+ return sizeof(FreeRangeHeader)+sizeof(intptr_t);
+ }
+
+ /// SetEndOfBlockSizeMarker - The word at the end of every free block is
+ /// known to be the size of the free block. Set it for this block.
+ void SetEndOfBlockSizeMarker() {
+ void *EndOfBlock = (char*)this + BlockSize;
+ ((intptr_t *)EndOfBlock)[-1] = BlockSize;
+ }
+
+ FreeRangeHeader *RemoveFromFreeList() {
+ assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+ Next->Prev = Prev;
+ return Prev->Next = Next;
+ }
+
+ void AddToFreeList(FreeRangeHeader *FreeList) {
+ Next = FreeList;
+ Prev = FreeList->Prev;
+ Prev->Next = this;
+ Next->Prev = this;
+ }
+
+ /// GrowBlock - The block after this block just got deallocated. Merge it
+ /// into the current block.
+ void GrowBlock(uintptr_t NewSize);
+
+ /// AllocateBlock - Mark this entire block allocated, updating freelists
+ /// etc. This returns a pointer to the circular free-list.
+ FreeRangeHeader *AllocateBlock();
+ };
+}
+
+
+/// AllocateBlock - Mark this entire block allocated, updating freelists
+/// etc. This returns a pointer to the circular free-list.
+FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
+ assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
+ "Cannot allocate an allocated block!");
+ // Mark this block allocated.
+ ThisAllocated = 1;
+ getBlockAfter().PrevAllocated = 1;
+
+ // Remove it from the free list.
+ return RemoveFromFreeList();
+}
+
+/// FreeBlock - Turn an allocated block into a free block, adjusting
+/// bits in the object headers, and adding an end of region memory block.
+/// If possible, coalesce this block with neighboring blocks. Return the
+/// FreeRangeHeader to allocate from.
+FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
+ MemoryRangeHeader *FollowingBlock = &getBlockAfter();
+ assert(ThisAllocated && "This block is already free!");
+ assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
+
+ FreeRangeHeader *FreeListToReturn = FreeList;
+
+ // If the block after this one is free, merge it into this block.
+ if (!FollowingBlock->ThisAllocated) {
+ FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
+ // "FreeList" always needs to be a valid free block. If we're about to
+ // coalesce with it, update our notion of what the free list is.
+ if (&FollowingFreeBlock == FreeList) {
+ FreeList = FollowingFreeBlock.Next;
+ FreeListToReturn = 0;
+ assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
+ }
+ FollowingFreeBlock.RemoveFromFreeList();
+
+ // Include the following block into this one.
+ BlockSize += FollowingFreeBlock.BlockSize;
+ FollowingBlock = &FollowingFreeBlock.getBlockAfter();
+
+ // Tell the block after the block we are coalescing that this block is
+ // allocated.
+ FollowingBlock->PrevAllocated = 1;
+ }
+
+ assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
+
+ if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
+ PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
+ return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
+ }
+
+ // Otherwise, mark this block free.
+ FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
+ FollowingBlock->PrevAllocated = 0;
+ FreeBlock.ThisAllocated = 0;
+
+ // Link this into the linked list of free blocks.
+ FreeBlock.AddToFreeList(FreeList);
+
+ // Add a marker at the end of the block, indicating the size of this free
+ // block.
+ FreeBlock.SetEndOfBlockSizeMarker();
+ return FreeListToReturn ? FreeListToReturn : &FreeBlock;
+}
+
+/// GrowBlock - The block after this block just got deallocated. Merge it
+/// into the current block.
+void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
+ assert(NewSize > BlockSize && "Not growing block?");
+ BlockSize = NewSize;
+ SetEndOfBlockSizeMarker();
+ getBlockAfter().PrevAllocated = 0;
+}
+
+/// TrimAllocationToSize - If this allocated block is significantly larger
+/// than NewSize, split it into two pieces (where the former is NewSize
+/// bytes, including the header), and add the new block to the free list.
+FreeRangeHeader *MemoryRangeHeader::
+TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
+ assert(ThisAllocated && getBlockAfter().PrevAllocated &&
+ "Cannot deallocate part of an allocated block!");
+
+ // Don't allow blocks to be trimmed below minimum required size
+ NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
+
+ // Round up size for alignment of header.
+ unsigned HeaderAlign = __alignof(FreeRangeHeader);
+ NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
+
+ // Size is now the size of the block we will remove from the start of the
+ // current block.
+ assert(NewSize <= BlockSize &&
+ "Allocating more space from this block than exists!");
+
+ // If splitting this block will cause the remainder to be too small, do not
+ // split the block.
+ if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
+ return FreeList;
+
+ // Otherwise, we splice the required number of bytes out of this block, form
+ // a new block immediately after it, then mark this block allocated.
+ MemoryRangeHeader &FormerNextBlock = getBlockAfter();
+
+ // Change the size of this block.
+ BlockSize = NewSize;
+
+ // Get the new block we just sliced out and turn it into a free block.
+ FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
+ NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
+ NewNextBlock.ThisAllocated = 0;
+ NewNextBlock.PrevAllocated = 1;
+ NewNextBlock.SetEndOfBlockSizeMarker();
+ FormerNextBlock.PrevAllocated = 0;
+ NewNextBlock.AddToFreeList(FreeList);
+ return &NewNextBlock;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class DefaultJITMemoryManager;
+
+ class JITSlabAllocator : public SlabAllocator {
+ DefaultJITMemoryManager &JMM;
+ public:
+ JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+ virtual ~JITSlabAllocator() { }
+ virtual MemSlab *Allocate(size_t Size);
+ virtual void Deallocate(MemSlab *Slab);
+ };
+
+ /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
+ /// This splits a large block of MAP_NORESERVE'd memory into two
+ /// sections, one for function stubs, one for the functions themselves. We
+ /// have to do this because we may need to emit a function stub while in the
+ /// middle of emitting a function, and we don't know how large the function we
+ /// are emitting is.
+ class DefaultJITMemoryManager : public JITMemoryManager {
+
+ // Whether to poison freed memory.
+ bool PoisonMemory;
+
+ /// LastSlab - This points to the last slab allocated and is used as the
+ /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
+ /// stubs, data, and code contiguously in memory. In general, however, this
+ /// is not possible because the NearBlock parameter is ignored on Windows
+ /// platforms and even on Unix it works on a best-effort pasis.
+ sys::MemoryBlock LastSlab;
+
+ // Memory slabs allocated by the JIT. We refer to them as slabs so we don't
+ // confuse them with the blocks of memory described above.
+ std::vector<sys::MemoryBlock> CodeSlabs;
+ JITSlabAllocator BumpSlabAllocator;
+ BumpPtrAllocator StubAllocator;
+ BumpPtrAllocator DataAllocator;
+
+ // Circular list of free blocks.
+ FreeRangeHeader *FreeMemoryList;
+
+ // When emitting code into a memory block, this is the block.
+ MemoryRangeHeader *CurBlock;
+
+ uint8_t *GOTBase; // Target Specific reserved memory
+ public:
+ DefaultJITMemoryManager();
+ ~DefaultJITMemoryManager();
+
+ /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
+ /// last slab it allocated, so that subsequent allocations follow it.
+ sys::MemoryBlock allocateNewSlab(size_t size);
+
+ /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
+ /// least this much unless more is requested.
+ static const size_t DefaultCodeSlabSize;
+
+ /// DefaultSlabSize - Allocate data into slabs of this size unless we get
+ /// an allocation above SizeThreshold.
+ static const size_t DefaultSlabSize;
+
+ /// DefaultSizeThreshold - For any allocation larger than this threshold, we
+ /// should allocate a separate slab.
+ static const size_t DefaultSizeThreshold;
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call.
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ void AllocateGOT();
+
+ // Testing methods.
+ virtual bool CheckInvariants(std::string &ErrorStr);
+ size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; }
+ size_t GetDefaultDataSlabSize() { return DefaultSlabSize; }
+ size_t GetDefaultStubSlabSize() { return DefaultSlabSize; }
+ unsigned GetNumCodeSlabs() { return CodeSlabs.size(); }
+ unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); }
+ unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); }
+
+ /// startFunctionBody - When a function starts, allocate a block of free
+ /// executable memory, returning a pointer to it and its actual size.
+ uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+
+ FreeRangeHeader* candidateBlock = FreeMemoryList;
+ FreeRangeHeader* head = FreeMemoryList;
+ FreeRangeHeader* iter = head->Next;
+
+ uintptr_t largest = candidateBlock->BlockSize;
+
+ // Search for the largest free block
+ while (iter != head) {
+ if (iter->BlockSize > largest) {
+ largest = iter->BlockSize;
+ candidateBlock = iter;
+ }
+ iter = iter->Next;
+ }
+
+ largest = largest - sizeof(MemoryRangeHeader);
+
+ // If this block isn't big enough for the allocation desired, allocate
+ // another block of memory and add it to the free list.
+ if (largest < ActualSize ||
+ largest <= FreeRangeHeader::getMinBlockSize()) {
+ DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+ candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
+ }
+
+ // Select this candidate block for allocation
+ CurBlock = candidateBlock;
+
+ // Allocate the entire memory block.
+ FreeMemoryList = candidateBlock->AllocateBlock();
+ ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
+ return (uint8_t *)(CurBlock + 1);
+ }
+
+ /// allocateNewCodeSlab - Helper method to allocate a new slab of code
+ /// memory from the OS and add it to the free list. Returns the new
+ /// FreeRangeHeader at the base of the slab.
+ FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
+ // If the user needs at least MinSize free memory, then we account for
+ // two MemoryRangeHeaders: the one in the user's block, and the one at the
+ // end of the slab.
+ size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
+ size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
+ sys::MemoryBlock B = allocateNewSlab(SlabSize);
+ CodeSlabs.push_back(B);
+ char *MemBase = (char*)(B.base());
+
+ // Put a tiny allocated block at the end of the memory chunk, so when
+ // FreeBlock calls getBlockAfter it doesn't fall off the end.
+ MemoryRangeHeader *EndBlock =
+ (MemoryRangeHeader*)(MemBase + B.size()) - 1;
+ EndBlock->ThisAllocated = 1;
+ EndBlock->PrevAllocated = 0;
+ EndBlock->BlockSize = sizeof(MemoryRangeHeader);
+
+ // Start out with a vast new block of free memory.
+ FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
+ NewBlock->ThisAllocated = 0;
+ // Make sure getFreeBlockBefore doesn't look into unmapped memory.
+ NewBlock->PrevAllocated = 1;
+ NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
+ NewBlock->SetEndOfBlockSizeMarker();
+ NewBlock->AddToFreeList(FreeMemoryList);
+
+ assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
+ "The block was too small!");
+ return NewBlock;
+ }
+
+ /// endFunctionBody - The function F is now allocated, and takes the memory
+ /// in the range [FunctionStart,FunctionEnd).
+ void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+ uint8_t *FunctionEnd) {
+ assert(FunctionEnd > FunctionStart);
+ assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched function start/end!");
+
+ uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ /// allocateSpace - Allocate a memory block of the given size. This method
+ /// cannot be called between calls to startFunctionBody and endFunctionBody.
+ uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+ CurBlock = FreeMemoryList;
+ FreeMemoryList = FreeMemoryList->AllocateBlock();
+
+ uint8_t *result = (uint8_t *)(CurBlock + 1);
+
+ if (Alignment == 0) Alignment = 1;
+ result = (uint8_t*)(((intptr_t)result+Alignment-1) &
+ ~(intptr_t)(Alignment-1));
+
+ uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+
+ return result;
+ }
+
+ /// allocateStub - Allocate memory for a function stub.
+ uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment) {
+ return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
+ }
+
+ /// allocateGlobal - Allocate memory for a global.
+ uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+ return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+ }
+
+ /// allocateCodeSection - Allocate memory for a code section.
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID) {
+ // Grow the required block size to account for the block header
+ Size += sizeof(*CurBlock);
+
+ // FIXME: Alignement handling.
+ FreeRangeHeader* candidateBlock = FreeMemoryList;
+ FreeRangeHeader* head = FreeMemoryList;
+ FreeRangeHeader* iter = head->Next;
+
+ uintptr_t largest = candidateBlock->BlockSize;
+
+ // Search for the largest free block.
+ while (iter != head) {
+ if (iter->BlockSize > largest) {
+ largest = iter->BlockSize;
+ candidateBlock = iter;
+ }
+ iter = iter->Next;
+ }
+
+ largest = largest - sizeof(MemoryRangeHeader);
+
+ // If this block isn't big enough for the allocation desired, allocate
+ // another block of memory and add it to the free list.
+ if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) {
+ DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+ candidateBlock = allocateNewCodeSlab((size_t)Size);
+ }
+
+ // Select this candidate block for allocation
+ CurBlock = candidateBlock;
+
+ // Allocate the entire memory block.
+ FreeMemoryList = candidateBlock->AllocateBlock();
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size);
+ return (uint8_t *)(CurBlock + 1);
+ }
+
+ /// allocateDataSection - Allocate memory for a data section.
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, bool IsReadOnly) {
+ return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+ }
+
+ bool applyPermissions(std::string *ErrMsg) {
+ return false;
+ }
+
+ /// startExceptionTable - Use startFunctionBody to allocate memory for the
+ /// function's exception table.
+ uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
+ return startFunctionBody(F, ActualSize);
+ }
+
+ /// endExceptionTable - The exception table of F is now allocated,
+ /// and takes the memory in the range [TableStart,TableEnd).
+ void endExceptionTable(const Function *F, uint8_t *TableStart,
+ uint8_t *TableEnd, uint8_t* FrameRegister) {
+ assert(TableEnd > TableStart);
+ assert(TableStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched table start/end!");
+
+ uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ uint8_t *getGOTBase() const {
+ return GOTBase;
+ }
+
+ void deallocateBlock(void *Block) {
+ // Find the block that is allocated for this function.
+ MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
+ assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+ // Fill the buffer with garbage!
+ if (PoisonMemory) {
+ memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+ }
+
+ // Free the memory.
+ FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+ }
+
+ /// deallocateFunctionBody - Deallocate all memory for the specified
+ /// function body.
+ void deallocateFunctionBody(void *Body) {
+ if (Body) deallocateBlock(Body);
+ }
+
+ /// deallocateExceptionTable - Deallocate memory for the specified
+ /// exception table.
+ void deallocateExceptionTable(void *ET) {
+ if (ET) deallocateBlock(ET);
+ }
+
+ /// setMemoryWritable - When code generation is in progress,
+ /// the code pages may need permissions changed.
+ void setMemoryWritable()
+ {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::setWritable(CodeSlabs[i]);
+ }
+ /// setMemoryExecutable - When code generation is done and we're ready to
+ /// start execution, the code pages may need permissions changed.
+ void setMemoryExecutable()
+ {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::setExecutable(CodeSlabs[i]);
+ }
+
+ /// setPoisonMemory - Controls whether we write garbage over freed memory.
+ ///
+ void setPoisonMemory(bool poison) {
+ PoisonMemory = poison;
+ }
+ };
+}
+
+MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+ sys::MemoryBlock B = JMM.allocateNewSlab(Size);
+ MemSlab *Slab = (MemSlab*)B.base();
+ Slab->Size = B.size();
+ Slab->NextPtr = 0;
+ return Slab;
+}
+
+void JITSlabAllocator::Deallocate(MemSlab *Slab) {
+ sys::MemoryBlock B(Slab, Slab->Size);
+ sys::Memory::ReleaseRWX(B);
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager()
+ :
+#ifdef NDEBUG
+ PoisonMemory(false),
+#else
+ PoisonMemory(true),
+#endif
+ LastSlab(0, 0),
+ BumpSlabAllocator(*this),
+ StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator),
+ DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) {
+
+ // Allocate space for code.
+ sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
+ CodeSlabs.push_back(MemBlock);
+ uint8_t *MemBase = (uint8_t*)MemBlock.base();
+
+ // We set up the memory chunk with 4 mem regions, like this:
+ // [ START
+ // [ Free #0 ] -> Large space to allocate functions from.
+ // [ Allocated #1 ] -> Tiny space to separate regions.
+ // [ Free #2 ] -> Tiny space so there is always at least 1 free block.
+ // [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
+ // END ]
+ //
+ // The last three blocks are never deallocated or touched.
+
+ // Add MemoryRangeHeader to the end of the memory region, indicating that
+ // the space after the block of memory is allocated. This is block #3.
+ MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
+ Mem3->ThisAllocated = 1;
+ Mem3->PrevAllocated = 0;
+ Mem3->BlockSize = sizeof(MemoryRangeHeader);
+
+ /// Add a tiny free region so that the free list always has one entry.
+ FreeRangeHeader *Mem2 =
+ (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
+ Mem2->ThisAllocated = 0;
+ Mem2->PrevAllocated = 1;
+ Mem2->BlockSize = FreeRangeHeader::getMinBlockSize();
+ Mem2->SetEndOfBlockSizeMarker();
+ Mem2->Prev = Mem2; // Mem2 *is* the free list for now.
+ Mem2->Next = Mem2;
+
+ /// Add a tiny allocated region so that Mem2 is never coalesced away.
+ MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
+ Mem1->ThisAllocated = 1;
+ Mem1->PrevAllocated = 0;
+ Mem1->BlockSize = sizeof(MemoryRangeHeader);
+
+ // Add a FreeRangeHeader to the start of the function body region, indicating
+ // that the space is free. Mark the previous block allocated so we never look
+ // at it.
+ FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
+ Mem0->ThisAllocated = 0;
+ Mem0->PrevAllocated = 1;
+ Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
+ Mem0->SetEndOfBlockSizeMarker();
+ Mem0->AddToFreeList(Mem2);
+
+ // Start out with the freelist pointing to Mem0.
+ FreeMemoryList = Mem0;
+
+ GOTBase = NULL;
+}
+
+void DefaultJITMemoryManager::AllocateGOT() {
+ assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+ GOTBase = new uint8_t[sizeof(void*) * 8192];
+ HasGOT = true;
+}
+
+DefaultJITMemoryManager::~DefaultJITMemoryManager() {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::ReleaseRWX(CodeSlabs[i]);
+
+ delete[] GOTBase;
+}
+
+sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
+ // Allocate a new block close to the last one.
+ std::string ErrMsg;
+ sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+ sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
+ if (B.base() == 0) {
+ report_fatal_error("Allocation failed when allocating new memory in the"
+ " JIT\n" + Twine(ErrMsg));
+ }
+ LastSlab = B;
+ ++NumSlabs;
+ // Initialize the slab to garbage when debugging.
+ if (PoisonMemory) {
+ memset(B.base(), 0xCD, B.size());
+ }
+ return B;
+}
+
+/// CheckInvariants - For testing only. Return "" if all internal invariants
+/// are preserved, and a helpful error message otherwise. For free and
+/// allocated blocks, make sure that adding BlockSize gives a valid block.
+/// For free blocks, make sure they're in the free list and that their end of
+/// block size marker is correct. This function should return an error before
+/// accessing bad memory. This function is defined here instead of in
+/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
+/// implementation details of DefaultJITMemoryManager.
+bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
+ raw_string_ostream Err(ErrorStr);
+
+ // Construct a the set of FreeRangeHeader pointers so we can query it
+ // efficiently.
+ llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
+ FreeRangeHeader* FreeHead = FreeMemoryList;
+ FreeRangeHeader* FreeRange = FreeHead;
+
+ do {
+ // Check that the free range pointer is in the blocks we've allocated.
+ bool Found = false;
+ for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+ E = CodeSlabs.end(); I != E && !Found; ++I) {
+ char *Start = (char*)I->base();
+ char *End = Start + I->size();
+ Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
+ }
+ if (!Found) {
+ Err << "Corrupt free list; points to " << FreeRange;
+ return false;
+ }
+
+ if (FreeRange->Next->Prev != FreeRange) {
+ Err << "Next and Prev pointers do not match.";
+ return false;
+ }
+
+ // Otherwise, add it to the set.
+ FreeHdrSet.insert(FreeRange);
+ FreeRange = FreeRange->Next;
+ } while (FreeRange != FreeHead);
+
+ // Go over each block, and look at each MemoryRangeHeader.
+ for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+ E = CodeSlabs.end(); I != E; ++I) {
+ char *Start = (char*)I->base();
+ char *End = Start + I->size();
+
+ // Check each memory range.
+ for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+ Start <= (char*)Hdr && (char*)Hdr < End;
+ Hdr = &Hdr->getBlockAfter()) {
+ if (Hdr->ThisAllocated == 0) {
+ // Check that this range is in the free list.
+ if (!FreeHdrSet.count(Hdr)) {
+ Err << "Found free header at " << Hdr << " that is not in free list.";
+ return false;
+ }
+
+ // Now make sure the size marker at the end of the block is correct.
+ uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
+ if (!(Start <= (char*)Marker && (char*)Marker < End)) {
+ Err << "Block size in header points out of current MemoryBlock.";
+ return false;
+ }
+ if (Hdr->BlockSize != *Marker) {
+ Err << "End of block size marker (" << *Marker << ") "
+ << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
+ return false;
+ }
+ }
+
+ if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
+ Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
+ << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
+ return false;
+ } else if (!LastHdr && !Hdr->PrevAllocated) {
+ Err << "The first header should have PrevAllocated true.";
+ return false;
+ }
+
+ // Remember the last header.
+ LastHdr = Hdr;
+ }
+ }
+
+ // All invariants are preserved.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+//===----------------------------------------------------------------------===//
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // try again without the underscore.
+ if (NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
+
+JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
+ return new DefaultJITMemoryManager();
+}
+
+// Allocate memory for code in 512K slabs.
+const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024;
+
+// Allocate globals and stubs in slabs of 64K. (probably 16 pages)
+const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024;
+
+// Waste at most 16K at the end of each bump slab. (probably 4 pages)
+const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024;
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 000000000000..fee10e194355
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,336 @@
+//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MutexGuard.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM) {
+ // Try to register the program as a source of symbols to resolve against.
+ //
+ // FIXME: Don't do this here.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+ return new MCJIT(M, TM, JMM, GVsWithCode);
+}
+
+MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
+ bool AllocateGVsWithCode)
+ : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
+ isCompiled(false), M(m) {
+
+ setDataLayout(TM->getDataLayout());
+}
+
+MCJIT::~MCJIT() {
+ if (LoadedObject)
+ NotifyFreeingObject(*LoadedObject.get());
+ delete MemMgr;
+ delete TM;
+}
+
+void MCJIT::emitObject(Module *m) {
+ /// Currently, MCJIT only supports a single module and the module passed to
+ /// this function call is expected to be the contained module. The module
+ /// is passed as a parameter here to prepare for multiple module support in
+ /// the future.
+ assert(M == m);
+
+ // Get a thread lock to make sure we aren't trying to compile multiple times
+ MutexGuard locked(lock);
+
+ // FIXME: Track compilation state on a per-module basis when multiple modules
+ // are supported.
+ // Re-compilation is not supported
+ if (isCompiled)
+ return;
+
+ PassManager PM;
+
+ PM.add(new DataLayout(*TM->getDataLayout()));
+
+ // The RuntimeDyld will take ownership of this shortly
+ OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream());
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) {
+ report_fatal_error("Target does not support MC emission!");
+ }
+
+ // Initialize passes.
+ PM.run(*m);
+ // Flush the output buffer to get the generated code into memory
+ Buffer->flush();
+
+ // Load the object into the dynamic linker.
+ // handing off ownership of the buffer
+ LoadedObject.reset(Dyld.loadObject(Buffer.take()));
+ if (!LoadedObject)
+ report_fatal_error(Dyld.getErrorString());
+
+ // Resolve any relocations.
+ Dyld.resolveRelocations();
+
+ // FIXME: Make this optional, maybe even move it to a JIT event listener
+ LoadedObject->registerWithDebugger();
+
+ NotifyObjectEmitted(*LoadedObject);
+
+ // FIXME: Add support for per-module compilation state
+ isCompiled = true;
+}
+
+// FIXME: Add a parameter to identify which object is being finalized when
+// MCJIT supports multiple modules.
+// FIXME: Provide a way to separate code emission, relocations and page
+// protection in the interface.
+void MCJIT::finalizeObject() {
+ // If the module hasn't been compiled, just do that.
+ if (!isCompiled) {
+ // If the call to Dyld.resolveRelocations() is removed from emitObject()
+ // we'll need to do that here.
+ emitObject(M);
+
+ // Set page permissions.
+ MemMgr->applyPermissions();
+
+ return;
+ }
+
+ // Resolve any relocations.
+ Dyld.resolveRelocations();
+
+ // Set page permissions.
+ MemMgr->applyPermissions();
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+ report_fatal_error("not yet implemented");
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+ // FIXME: This should really return a uint64_t since it's a pointer in the
+ // target address space, not our local address space. That's part of the
+ // ExecutionEngine interface, though. Fix that when the old JIT finally
+ // dies.
+
+ // FIXME: Add support for per-module compilation state
+ if (!isCompiled)
+ emitObject(M);
+
+ if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+ bool AbortOnFailure = !F->hasExternalWeakLinkage();
+ void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+ addGlobalMapping(F, Addr);
+ return Addr;
+ }
+
+ // FIXME: Should the Dyld be retaining module information? Probably not.
+ // FIXME: Should we be using the mangler for this? Probably.
+ //
+ // This is the accessor for the target address, so make sure to check the
+ // load address of the symbol, not the local address.
+ StringRef BaseName = F->getName();
+ if (BaseName[0] == '\1')
+ return (void*)Dyld.getSymbolLoadAddress(BaseName.substr(1));
+ return (void*)Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix()
+ + BaseName).str());
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert(F && "Function *F was null at entry to run()");
+
+ void *FPtr = getPointerToFunction(F);
+ assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+ FunctionType *FTy = F->getFunctionType();
+ Type *RetTy = FTy->getReturnType();
+
+ assert((FTy->getNumParams() == ArgValues.size() ||
+ (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+ "Wrong number of arguments passed into function!");
+ assert(FTy->getNumParams() == ArgValues.size() &&
+ "This doesn't support passing arguments through varargs (yet)!");
+
+ // Handle some common cases first. These cases correspond to common `main'
+ // prototypes.
+ if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+ switch (ArgValues.size()) {
+ case 3:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
+ int (*PF)(int, char **, const char **) =
+ (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1]),
+ (const char **)GVTOP(ArgValues[2])));
+ return rv;
+ }
+ break;
+ case 2:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy()) {
+ int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1])));
+ return rv;
+ }
+ break;
+ case 1:
+ if (FTy->getNumParams() == 1 &&
+ FTy->getParamType(0)->isIntegerTy(32)) {
+ GenericValue rv;
+ int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+ return rv;
+ }
+ break;
+ }
+ }
+
+ // Handle cases where no arguments are passed first.
+ if (ArgValues.empty()) {
+ GenericValue rv;
+ switch (RetTy->getTypeID()) {
+ default: llvm_unreachable("Unknown return type for function call!");
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+ if (BitWidth == 1)
+ rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 8)
+ rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 16)
+ rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 32)
+ rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 64)
+ rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+ else
+ llvm_unreachable("Integer types > 64 bits not supported");
+ return rv;
+ }
+ case Type::VoidTyID:
+ rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+ return rv;
+ case Type::FloatTyID:
+ rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::DoubleTyID:
+ rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ llvm_unreachable("long double not supported yet");
+ case Type::PointerTyID:
+ return PTOGV(((void*(*)())(intptr_t)FPtr)());
+ }
+ }
+
+ llvm_unreachable("Full-featured argument passing not supported yet!");
+}
+
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ // FIXME: Add support for per-module compilation state
+ if (!isCompiled)
+ emitObject(M);
+
+ if (!isSymbolSearchingDisabled() && MemMgr) {
+ void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+void MCJIT::RegisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ EventListeners.push_back(L);
+}
+void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ SmallVector<JITEventListener*, 2>::reverse_iterator I=
+ std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+ if (I != EventListeners.rend()) {
+ std::swap(*I, EventListeners.back());
+ EventListeners.pop_back();
+ }
+}
+void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyObjectEmitted(Obj);
+ }
+}
+void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyFreeingObject(Obj);
+ }
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 000000000000..283a8e528118
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,113 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/PassManager.h"
+
+namespace llvm {
+
+class ObjectImage;
+
+// FIXME: This makes all kinds of horrible assumptions for the time being,
+// like only having one module, not needing to worry about multi-threading,
+// blah blah. Purely in get-it-up-and-limping mode for now.
+
+class MCJIT : public ExecutionEngine {
+ MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr,
+ bool AllocateGVsWithCode);
+
+ TargetMachine *TM;
+ MCContext *Ctx;
+ RTDyldMemoryManager *MemMgr;
+ RuntimeDyld Dyld;
+ SmallVector<JITEventListener*, 2> EventListeners;
+
+ // FIXME: Add support for multiple modules
+ bool isCompiled;
+ Module *M;
+ OwningPtr<ObjectImage> LoadedObject;
+
+public:
+ ~MCJIT();
+
+ /// @name ExecutionEngine interface implementation
+ /// @{
+
+ virtual void finalizeObject();
+
+ virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+ virtual void *getPointerToFunction(Function *F);
+
+ virtual void *recompileAndRelinkFunction(Function *F);
+
+ virtual void freeMachineCodeForFunction(Function *F);
+
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ /// mapSectionAddress - map a section to its target address space value.
+ /// Map the address of a JIT section as returned from the memory manager
+ /// to the address in the target process as the running code will see it.
+ /// This is the address which will be used for relocation resolution.
+ virtual void mapSectionAddress(const void *LocalAddress,
+ uint64_t TargetAddress) {
+ Dyld.mapSectionAddress(LocalAddress, TargetAddress);
+ }
+
+ virtual void RegisterJITEventListener(JITEventListener *L);
+ virtual void UnregisterJITEventListener(JITEventListener *L);
+
+ /// @}
+ /// @name (Private) Registration Interfaces
+ /// @{
+
+ static void Register() {
+ MCJITCtor = createJIT;
+ }
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM);
+
+ // @}
+
+protected:
+ /// emitObject -- Generate a JITed object in memory from the specified module
+ /// Currently, MCJIT only supports a single module and the module passed to
+ /// this function call is expected to be the contained module. The module
+ /// is passed as a parameter here to prepare for multiple module support in
+ /// the future.
+ void emitObject(Module *M);
+
+ void NotifyObjectEmitted(const ObjectImage& Obj);
+ void NotifyFreeingObject(const ObjectImage& Obj);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
new file mode 100644
index 000000000000..fa35acd389ae
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -0,0 +1,226 @@
+//===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the section-based memory manager used by the MCJIT
+// execution engine and RuntimeDyld
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
+
+#ifdef __linux__
+ // These includes used by SectionMemoryManager::getPointerToNamedFunction()
+ // for Glibc trickery. See comments in this function for more information.
+ #ifdef HAVE_SYS_STAT_H
+ #include <sys/stat.h>
+ #endif
+ #include <fcntl.h>
+ #include <unistd.h>
+#endif
+
+namespace llvm {
+
+uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
+ unsigned Alignment,
+ unsigned SectionID,
+ bool IsReadOnly) {
+ if (IsReadOnly)
+ return allocateSection(RODataMem, Size, Alignment);
+ return allocateSection(RWDataMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
+ unsigned Alignment,
+ unsigned SectionID) {
+ return allocateSection(CodeMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
+ uintptr_t Size,
+ unsigned Alignment) {
+ if (!Alignment)
+ Alignment = 16;
+
+ assert(!(Alignment & (Alignment - 1)) && "Alignment must be a power of two.");
+
+ uintptr_t RequiredSize = Alignment * ((Size + Alignment - 1)/Alignment + 1);
+ uintptr_t Addr = 0;
+
+ // Look in the list of free memory regions and use a block there if one
+ // is available.
+ for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) {
+ sys::MemoryBlock &MB = MemGroup.FreeMem[i];
+ if (MB.size() >= RequiredSize) {
+ Addr = (uintptr_t)MB.base();
+ uintptr_t EndOfBlock = Addr + MB.size();
+ // Align the address.
+ Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+ // Store cutted free memory block.
+ MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
+ EndOfBlock - Addr - Size);
+ return (uint8_t*)Addr;
+ }
+ }
+
+ // No pre-allocated free block was large enough. Allocate a new memory region.
+ // Note that all sections get allocated as read-write. The permissions will
+ // be updated later based on memory group.
+ //
+ // FIXME: It would be useful to define a default allocation size (or add
+ // it as a constructor parameter) to minimize the number of allocations.
+ //
+ // FIXME: Initialize the Near member for each memory group to avoid
+ // interleaving.
+ error_code ec;
+ sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize,
+ &MemGroup.Near,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE,
+ ec);
+ if (ec) {
+ // FIXME: Add error propogation to the interface.
+ return NULL;
+ }
+
+ // Save this address as the basis for our next request
+ MemGroup.Near = MB;
+
+ MemGroup.AllocatedMem.push_back(MB);
+ Addr = (uintptr_t)MB.base();
+ uintptr_t EndOfBlock = Addr + MB.size();
+
+ // Align the address.
+ Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+
+ // The allocateMappedMemory may allocate much more memory than we need. In
+ // this case, we store the unused memory as a free memory block.
+ unsigned FreeSize = EndOfBlock-Addr-Size;
+ if (FreeSize > 16)
+ MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
+
+ // Return aligned address
+ return (uint8_t*)Addr;
+}
+
+bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
+{
+ // FIXME: Should in-progress permissions be reverted if an error occurs?
+ error_code ec;
+
+ // Make code memory executable.
+ ec = applyMemoryGroupPermissions(CodeMem,
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ if (ec) {
+ if (ErrMsg) {
+ *ErrMsg = ec.message();
+ }
+ return true;
+ }
+
+ // Make read-only data memory read-only.
+ ec = applyMemoryGroupPermissions(RODataMem,
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ if (ec) {
+ if (ErrMsg) {
+ *ErrMsg = ec.message();
+ }
+ return true;
+ }
+
+ // Read-write data memory already has the correct permissions
+
+ return false;
+}
+
+error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+ unsigned Permissions) {
+
+ for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
+ error_code ec;
+ ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i],
+ Permissions);
+ if (ec) {
+ return ec;
+ }
+ }
+
+ return error_code::success();
+}
+
+void SectionMemoryManager::invalidateInstructionCache() {
+ for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+ sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(),
+ CodeMem.AllocatedMem[i].size());
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+#if defined(__linux__)
+ //===--------------------------------------------------------------------===//
+ // Function stubs that are invoked instead of certain library calls
+ //
+ // Force the following functions to be linked in to anything that uses the
+ // JIT. This is a hack designed to work around the all-too-clever Glibc
+ // strategy of making these functions work differently when inlined vs. when
+ // not inlined, and hiding their real definitions in a separate archive file
+ // that the dynamic linker can't see. For more info, search for
+ // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+ if (Name == "stat") return (void*)(intptr_t)&stat;
+ if (Name == "fstat") return (void*)(intptr_t)&fstat;
+ if (Name == "lstat") return (void*)(intptr_t)&lstat;
+ if (Name == "stat64") return (void*)(intptr_t)&stat64;
+ if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
+ if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
+ if (Name == "atexit") return (void*)(intptr_t)&atexit;
+ if (Name == "mknod") return (void*)(intptr_t)&mknod;
+#endif // __linux__
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // try again without the underscore.
+ if (NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ if (AbortOnFailure)
+ report_fatal_error("Program used external function '" + Name +
+ "' which could not be resolved!");
+ return 0;
+}
+
+SectionMemoryManager::~SectionMemoryManager() {
+ for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+ sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
+ for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i)
+ sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]);
+ for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i)
+ sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]);
+}
+
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
new file mode 100644
index 000000000000..38867ecca591
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -0,0 +1,177 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that uses OProfileWrapper to tell
+// oprofile about JITted functions, including source line information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "EventListenerCommon.h"
+
+#include <dirent.h>
+#include <fcntl.h>
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+ OProfileWrapper& Wrapper;
+
+ void initialize();
+
+public:
+ OProfileJITEventListener(OProfileWrapper& LibraryWrapper)
+ : Wrapper(LibraryWrapper) {
+ initialize();
+ }
+
+ ~OProfileJITEventListener();
+
+ virtual void NotifyFunctionEmitted(const Function &F,
+ void *FnStart, size_t FnSize,
+ const JITEvent_EmittedFunctionDetails &Details);
+
+ virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+void OProfileJITEventListener::initialize() {
+ if (!Wrapper.op_open_agent()) {
+ const std::string err_str = sys::StrError();
+ DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+ } else {
+ DEBUG(dbgs() << "Connected to OProfile agent.\n");
+ }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+ if (Wrapper.isAgentAvailable()) {
+ if (Wrapper.op_close_agent() == -1) {
+ const std::string err_str = sys::StrError();
+ DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
+ << err_str << "\n");
+ } else {
+ DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
+ }
+ }
+}
+
+static debug_line_info LineStartToOProfileFormat(
+ const MachineFunction &MF, FilenameCache &Filenames,
+ uintptr_t Address, DebugLoc Loc) {
+ debug_line_info Result;
+ Result.vma = Address;
+ Result.lineno = Loc.getLine();
+ Result.filename = Filenames.getFilename(
+ Loc.getScope(MF.getFunction()->getContext()));
+ DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+ << Result.filename << ":" << Result.lineno << "\n");
+ return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+ const Function &F, void *FnStart, size_t FnSize,
+ const JITEvent_EmittedFunctionDetails &Details) {
+ assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+ if (Wrapper.op_write_native_code(F.getName().data(),
+ reinterpret_cast<uint64_t>(FnStart),
+ FnStart, FnSize) == -1) {
+ DEBUG(dbgs() << "Failed to tell OProfile about native function "
+ << F.getName() << " at ["
+ << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+ return;
+ }
+
+ if (!Details.LineStarts.empty()) {
+ // Now we convert the line number information from the address/DebugLoc
+ // format in Details to the address/filename/lineno format that OProfile
+ // expects. Note that OProfile 0.9.4 has a bug that causes it to ignore
+ // line numbers for addresses above 4G.
+ FilenameCache Filenames;
+ std::vector<debug_line_info> LineInfo;
+ LineInfo.reserve(1 + Details.LineStarts.size());
+
+ DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+ assert(!FirstLoc.isUnknown()
+ && "LineStarts should not contain unknown DebugLocs");
+ MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+ DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+ if (FunctionDI.Verify()) {
+ // If we have debug info for the function itself, use that as the line
+ // number of the first several instructions. Otherwise, after filling
+ // LineInfo, we'll adjust the address of the first line number to point at
+ // the start of the function.
+ debug_line_info line_info;
+ line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+ line_info.lineno = FunctionDI.getLineNumber();
+ line_info.filename = Filenames.getFilename(FirstLocScope);
+ LineInfo.push_back(line_info);
+ }
+
+ for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+ I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+ I != E; ++I) {
+ LineInfo.push_back(LineStartToOProfileFormat(
+ *Details.MF, Filenames, I->Address, I->Loc));
+ }
+
+ // In case the function didn't have line info of its own, adjust the first
+ // line info's address to include the start of the function.
+ LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
+ if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
+ &*LineInfo.begin()) == -1) {
+ DEBUG(dbgs()
+ << "Failed to tell OProfile about line numbers for native function "
+ << F.getName() << " at ["
+ << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+ }
+ }
+}
+
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+ assert(FnStart && "Invalid function pointer");
+ if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
+ DEBUG(dbgs()
+ << "Failed to tell OProfile about unload of native function at "
+ << FnStart << "\n");
+ }
+}
+
+} // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createOProfileJITEventListener() {
+ static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper);
+ return new OProfileJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createOProfileJITEventListener(
+ OProfileWrapper* TestImpl) {
+ return new OProfileJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
new file mode 100644
index 000000000000..7c0d39518595
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -0,0 +1,264 @@
+//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface in OProfileWrapper.h. It is responsible
+// for loading the opagent dynamic library when the first call to an op_
+// function occurs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+
+#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/ADT/SmallString.h"
+
+#include <sstream>
+#include <cstring>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+namespace {
+
+// Global mutex to ensure a single thread initializes oprofile agent.
+llvm::sys::Mutex OProfileInitializationMutex;
+
+} // anonymous namespace
+
+namespace llvm {
+
+OProfileWrapper::OProfileWrapper()
+: Agent(0),
+ OpenAgentFunc(0),
+ CloseAgentFunc(0),
+ WriteNativeCodeFunc(0),
+ WriteDebugLineInfoFunc(0),
+ UnloadNativeCodeFunc(0),
+ MajorVersionFunc(0),
+ MinorVersionFunc(0),
+ IsOProfileRunningFunc(0),
+ Initialized(false) {
+}
+
+bool OProfileWrapper::initialize() {
+ using namespace llvm;
+ using namespace llvm::sys;
+
+ MutexGuard Guard(OProfileInitializationMutex);
+
+ if (Initialized)
+ return OpenAgentFunc != 0;
+
+ Initialized = true;
+
+ // If the oprofile daemon is not running, don't load the opagent library
+ if (!isOProfileRunning()) {
+ DEBUG(dbgs() << "OProfile daemon is not detected.\n");
+ return false;
+ }
+
+ std::string error;
+ if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) {
+ DEBUG(dbgs()
+ << "OProfile connector library libopagent.so could not be loaded: "
+ << error << "\n");
+ }
+
+ // Get the addresses of the opagent functions
+ OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_open_agent");
+ CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_close_agent");
+ WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code");
+ WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info");
+ UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code");
+ MajorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_major_version");
+ MinorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_minor_version");
+
+ // With missing functions, we can do nothing
+ if (!OpenAgentFunc
+ || !CloseAgentFunc
+ || !WriteNativeCodeFunc
+ || !WriteDebugLineInfoFunc
+ || !UnloadNativeCodeFunc) {
+ OpenAgentFunc = 0;
+ CloseAgentFunc = 0;
+ WriteNativeCodeFunc = 0;
+ WriteDebugLineInfoFunc = 0;
+ UnloadNativeCodeFunc = 0;
+ return false;
+ }
+
+ return true;
+}
+
+bool OProfileWrapper::isOProfileRunning() {
+ if (IsOProfileRunningFunc != 0)
+ return IsOProfileRunningFunc();
+ return checkForOProfileProcEntry();
+}
+
+bool OProfileWrapper::checkForOProfileProcEntry() {
+ DIR* ProcDir;
+
+ ProcDir = opendir("/proc");
+ if (!ProcDir)
+ return false;
+
+ // Walk the /proc tree looking for the oprofile daemon
+ struct dirent* Entry;
+ while (0 != (Entry = readdir(ProcDir))) {
+ if (Entry->d_type == DT_DIR) {
+ // Build a path from the current entry name
+ SmallString<256> CmdLineFName;
+ raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name
+ << "/cmdline";
+
+ // Open the cmdline file
+ int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR);
+ if (CmdLineFD != -1) {
+ char ExeName[PATH_MAX+1];
+ char* BaseName = 0;
+
+ // Read the cmdline file
+ ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1);
+ close(CmdLineFD);
+ ssize_t Idx = 0;
+
+ // Find the terminator for the first string
+ while (Idx < NumRead-1 && ExeName[Idx] != 0) {
+ Idx++;
+ }
+
+ // Go back to the last non-null character
+ Idx--;
+
+ // Find the last path separator in the first string
+ while (Idx > 0) {
+ if (ExeName[Idx] == '/') {
+ BaseName = ExeName + Idx + 1;
+ break;
+ }
+ Idx--;
+ }
+
+ // Test this to see if it is the oprofile daemon
+ if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+ // If it is, we're done
+ closedir(ProcDir);
+ return true;
+ }
+ }
+ }
+ }
+
+ // We've looked through all the files and didn't find the daemon
+ closedir(ProcDir);
+ return false;
+}
+
+bool OProfileWrapper::op_open_agent() {
+ if (!Initialized)
+ initialize();
+
+ if (OpenAgentFunc != 0) {
+ Agent = OpenAgentFunc();
+ return Agent != 0;
+ }
+
+ return false;
+}
+
+int OProfileWrapper::op_close_agent() {
+ if (!Initialized)
+ initialize();
+
+ int ret = -1;
+ if (Agent && CloseAgentFunc) {
+ ret = CloseAgentFunc(Agent);
+ if (ret == 0) {
+ Agent = 0;
+ }
+ }
+ return ret;
+}
+
+bool OProfileWrapper::isAgentAvailable() {
+ return Agent != 0;
+}
+
+int OProfileWrapper::op_write_native_code(const char* Name,
+ uint64_t Addr,
+ void const* Code,
+ const unsigned int Size) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteNativeCodeFunc)
+ return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size);
+
+ return -1;
+}
+
+int OProfileWrapper::op_write_debug_line_info(
+ void const* Code,
+ size_t NumEntries,
+ struct debug_line_info const* Info) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteDebugLineInfoFunc)
+ return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info);
+
+ return -1;
+}
+
+int OProfileWrapper::op_major_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MajorVersionFunc)
+ return MajorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_minor_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MinorVersionFunc)
+ return MinorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_unload_native_code(uint64_t Addr) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && UnloadNativeCodeFunc)
+ return UnloadNativeCodeFunc(Agent, Addr);
+
+ return -1;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
new file mode 100644
index 000000000000..603c526d06e3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
@@ -0,0 +1,214 @@
+//===-- GDBRegistrar.cpp - Registers objects with GDB ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITRegistrar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+
+using namespace llvm;
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+ typedef enum {
+ JIT_NOACTION = 0,
+ JIT_REGISTER_FN,
+ JIT_UNREGISTER_FN
+ } jit_actions_t;
+
+ struct jit_code_entry {
+ struct jit_code_entry *next_entry;
+ struct jit_code_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+ };
+
+ struct jit_descriptor {
+ uint32_t version;
+ // This should be jit_actions_t, but we want to be specific about the
+ // bit-width.
+ uint32_t action_flag;
+ struct jit_code_entry *relevant_entry;
+ struct jit_code_entry *first_entry;
+ };
+
+ // We put information about the JITed function in this global, which the
+ // debugger reads. Make sure to specify the version statically, because the
+ // debugger checks the version before we can set it during runtime.
+ struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+ // Debuggers puts a breakpoint in this function.
+ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
+
+}
+
+namespace {
+
+// Buffer for an in-memory object file in executable memory
+typedef llvm::DenseMap< const char*,
+ std::pair<std::size_t, jit_code_entry*> >
+ RegisteredObjectBufferMap;
+
+/// Global access point for the JIT debugging interface designed for use with a
+/// singleton toolbox. Handles thread-safe registration and deregistration of
+/// object files that are in executable memory managed by the client of this
+/// class.
+class GDBJITRegistrar : public JITRegistrar {
+ /// A map of in-memory object files that have been registered with the
+ /// JIT interface.
+ RegisteredObjectBufferMap ObjectBufferMap;
+
+public:
+ /// Instantiates the JIT service.
+ GDBJITRegistrar() : ObjectBufferMap() {}
+
+ /// Unregisters each object that was previously registered and releases all
+ /// internal resources.
+ virtual ~GDBJITRegistrar();
+
+ /// Creates an entry in the JIT registry for the buffer @p Object,
+ /// which must contain an object file in executable memory with any
+ /// debug information for the debugger.
+ void registerObject(const ObjectBuffer &Object);
+
+ /// Removes the internal registration of @p Object, and
+ /// frees associated resources.
+ /// Returns true if @p Object was found in ObjectBufferMap.
+ bool deregisterObject(const ObjectBuffer &Object);
+
+private:
+ /// Deregister the debug info for the given object file from the debugger
+ /// and delete any temporary copies. This private method does not remove
+ /// the function from Map so that it can be called while iterating over Map.
+ void deregisterObjectInternal(RegisteredObjectBufferMap::iterator I);
+};
+
+/// Lock used to serialize all jit registration events, since they
+/// modify global variables.
+llvm::sys::Mutex JITDebugLock;
+
+/// Acquire the lock and do the registration.
+void NotifyDebugger(jit_code_entry* JITCodeEntry) {
+ llvm::MutexGuard locked(JITDebugLock);
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+
+ // Insert this entry at the head of the list.
+ JITCodeEntry->prev_entry = NULL;
+ jit_code_entry* NextEntry = __jit_debug_descriptor.first_entry;
+ JITCodeEntry->next_entry = NextEntry;
+ if (NextEntry != NULL) {
+ NextEntry->prev_entry = JITCodeEntry;
+ }
+ __jit_debug_descriptor.first_entry = JITCodeEntry;
+ __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+ __jit_debug_register_code();
+}
+
+GDBJITRegistrar::~GDBJITRegistrar() {
+ // Free all registered object files.
+ for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end();
+ I != E; ++I) {
+ // Call the private method that doesn't update the map so our iterator
+ // doesn't break.
+ deregisterObjectInternal(I);
+ }
+ ObjectBufferMap.clear();
+}
+
+void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) {
+
+ const char *Buffer = Object.getBufferStart();
+ size_t Size = Object.getBufferSize();
+
+ assert(Buffer && "Attempt to register a null object with a debugger.");
+ assert(ObjectBufferMap.find(Buffer) == ObjectBufferMap.end() &&
+ "Second attempt to perform debug registration.");
+ jit_code_entry* JITCodeEntry = new jit_code_entry();
+
+ if (JITCodeEntry == 0) {
+ llvm::report_fatal_error(
+ "Allocation failed when registering a JIT entry!\n");
+ }
+ else {
+ JITCodeEntry->symfile_addr = Buffer;
+ JITCodeEntry->symfile_size = Size;
+
+ ObjectBufferMap[Buffer] = std::make_pair(Size, JITCodeEntry);
+ NotifyDebugger(JITCodeEntry);
+ }
+}
+
+bool GDBJITRegistrar::deregisterObject(const ObjectBuffer& Object) {
+ const char *Buffer = Object.getBufferStart();
+ RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Buffer);
+
+ if (I != ObjectBufferMap.end()) {
+ deregisterObjectInternal(I);
+ ObjectBufferMap.erase(I);
+ return true;
+ }
+ return false;
+}
+
+void GDBJITRegistrar::deregisterObjectInternal(
+ RegisteredObjectBufferMap::iterator I) {
+
+ jit_code_entry*& JITCodeEntry = I->second.second;
+
+ // Acquire the lock and do the unregistration.
+ {
+ llvm::MutexGuard locked(JITDebugLock);
+ __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
+
+ // Remove the jit_code_entry from the linked list.
+ jit_code_entry* PrevEntry = JITCodeEntry->prev_entry;
+ jit_code_entry* NextEntry = JITCodeEntry->next_entry;
+
+ if (NextEntry) {
+ NextEntry->prev_entry = PrevEntry;
+ }
+ if (PrevEntry) {
+ PrevEntry->next_entry = NextEntry;
+ }
+ else {
+ assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
+ __jit_debug_descriptor.first_entry = NextEntry;
+ }
+
+ // Tell the debugger which entry we removed, and unregister the code.
+ __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+ __jit_debug_register_code();
+ }
+
+ delete JITCodeEntry;
+ JITCodeEntry = NULL;
+}
+
+} // end namespace
+
+namespace llvm {
+
+JITRegistrar& JITRegistrar::getGDBRegistrar() {
+ static GDBJITRegistrar* sRegistrar = NULL;
+ if (sRegistrar == NULL) {
+ // The mutex is here so that it won't slow down access once the registrar
+ // is instantiated
+ llvm::MutexGuard locked(JITDebugLock);
+ // Check again to be sure another thread didn't create this while we waited
+ if (sRegistrar == NULL) {
+ sRegistrar = new GDBJITRegistrar;
+ }
+ }
+ return *sRegistrar;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
new file mode 100644
index 000000000000..69e9dbe490d6
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
@@ -0,0 +1,43 @@
+//===-- JITRegistrar.h - Registers objects with a debugger ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
+#define LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
+
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+
+namespace llvm {
+
+/// Global access point for the JIT debugging interface.
+class JITRegistrar {
+public:
+ /// Instantiates the JIT service.
+ JITRegistrar() {}
+
+ /// Unregisters each object that was previously registered and releases all
+ /// internal resources.
+ virtual ~JITRegistrar() {}
+
+ /// Creates an entry in the JIT registry for the buffer @p Object,
+ /// which must contain an object file in executable memory with any
+ /// debug information for the debugger.
+ virtual void registerObject(const ObjectBuffer &Object) = 0;
+
+ /// Removes the internal registration of @p Object, and
+ /// frees associated resources.
+ /// Returns true if @p Object was previously registered.
+ virtual bool deregisterObject(const ObjectBuffer &Object) = 0;
+
+ /// Returns a reference to a GDB JIT registrar singleton
+ static JITRegistrar& getGDBRegistrar();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
new file mode 100644
index 000000000000..89350cc5b621
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -0,0 +1,78 @@
+//===-- ObjectImageCommon.h - Format independent executuable object image -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectImage class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ObjectFile.h"
+
+namespace llvm {
+
+class ObjectImageCommon : public ObjectImage {
+ ObjectImageCommon(); // = delete
+ ObjectImageCommon(const ObjectImageCommon &other); // = delete
+
+protected:
+ object::ObjectFile *ObjFile;
+
+ // This form of the constructor allows subclasses to use
+ // format-specific subclasses of ObjectFile directly
+ ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
+ : ObjectImage(Input), // saves Input as Buffer and takes ownership
+ ObjFile(Obj)
+ {
+ }
+
+public:
+ ObjectImageCommon(ObjectBuffer* Input)
+ : ObjectImage(Input) // saves Input as Buffer and takes ownership
+ {
+ ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer());
+ }
+ virtual ~ObjectImageCommon() { delete ObjFile; }
+
+ virtual object::symbol_iterator begin_symbols() const
+ { return ObjFile->begin_symbols(); }
+ virtual object::symbol_iterator end_symbols() const
+ { return ObjFile->end_symbols(); }
+
+ virtual object::section_iterator begin_sections() const
+ { return ObjFile->begin_sections(); }
+ virtual object::section_iterator end_sections() const
+ { return ObjFile->end_sections(); }
+
+ virtual /* Triple::ArchType */ unsigned getArch() const
+ { return ObjFile->getArch(); }
+
+ virtual StringRef getData() const { return ObjFile->getData(); }
+
+ virtual object::ObjectFile* getObjectFile() const { return ObjFile; }
+
+ // Subclasses can override these methods to update the image with loaded
+ // addresses for sections and common symbols
+ virtual void updateSectionAddress(const object::SectionRef &Sec,
+ uint64_t Addr) {}
+ virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr)
+ {}
+
+ // Subclasses can override these methods to provide JIT debugging support
+ virtual void registerWithDebugger() {}
+ virtual void deregisterWithDebugger() {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
+
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
new file mode 100644
index 000000000000..409b25fef3af
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -0,0 +1,537 @@
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "ObjectImageCommon.h"
+#include "RuntimeDyldELF.h"
+#include "RuntimeDyldImpl.h"
+#include "RuntimeDyldMachO.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+// Empty out-of-line virtual destructor as the key function.
+RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() {}
+
+namespace llvm {
+
+// Resolve the relocations for all symbols we currently know about.
+void RuntimeDyldImpl::resolveRelocations() {
+ // First, resolve relocations associated with external symbols.
+ resolveExternalSymbols();
+
+ // Just iterate over the sections we have and resolve all the relocations
+ // in them. Gross overkill, but it gets the job done.
+ for (int i = 0, e = Sections.size(); i != e; ++i) {
+ uint64_t Addr = Sections[i].LoadAddress;
+ DEBUG(dbgs() << "Resolving relocations Section #" << i
+ << "\t" << format("%p", (uint8_t *)Addr)
+ << "\n");
+ resolveRelocationList(Relocations[i], Addr);
+ }
+}
+
+void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
+ uint64_t TargetAddress) {
+ for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
+ if (Sections[i].Address == LocalAddress) {
+ reassignSectionAddress(i, TargetAddress);
+ return;
+ }
+ }
+ llvm_unreachable("Attempting to remap address of unknown section!");
+}
+
+// Subclasses can implement this method to create specialized image instances.
+// The caller owns the pointer that is returned.
+ObjectImage *RuntimeDyldImpl::createObjectImage(ObjectBuffer *InputBuffer) {
+ return new ObjectImageCommon(InputBuffer);
+}
+
+ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
+ OwningPtr<ObjectImage> obj(createObjectImage(InputBuffer));
+ if (!obj)
+ report_fatal_error("Unable to create object image from memory buffer!");
+
+ Arch = (Triple::ArchType)obj->getArch();
+
+ // Symbols found in this object
+ StringMap<SymbolLoc> LocalSymbols;
+ // Used sections from the object file
+ ObjSectionToIDMap LocalSections;
+
+ // Common symbols requiring allocation, with their sizes and alignments
+ CommonSymbolMap CommonSymbols;
+ // Maximum required total memory to allocate all common symbols
+ uint64_t CommonSize = 0;
+
+ error_code err;
+ // Parse symbols
+ DEBUG(dbgs() << "Parse symbols:\n");
+ for (symbol_iterator i = obj->begin_symbols(), e = obj->end_symbols();
+ i != e; i.increment(err)) {
+ Check(err);
+ object::SymbolRef::Type SymType;
+ StringRef Name;
+ Check(i->getType(SymType));
+ Check(i->getName(Name));
+
+ uint32_t flags;
+ Check(i->getFlags(flags));
+
+ bool isCommon = flags & SymbolRef::SF_Common;
+ if (isCommon) {
+ // Add the common symbols to a list. We'll allocate them all below.
+ uint64_t Align = getCommonSymbolAlignment(*i);
+ uint64_t Size = 0;
+ Check(i->getSize(Size));
+ CommonSize += Size + Align;
+ CommonSymbols[*i] = CommonSymbolInfo(Size, Align);
+ } else {
+ if (SymType == object::SymbolRef::ST_Function ||
+ SymType == object::SymbolRef::ST_Data ||
+ SymType == object::SymbolRef::ST_Unknown) {
+ uint64_t FileOffset;
+ StringRef SectionData;
+ bool IsCode;
+ section_iterator si = obj->end_sections();
+ Check(i->getFileOffset(FileOffset));
+ Check(i->getSection(si));
+ if (si == obj->end_sections()) continue;
+ Check(si->getContents(SectionData));
+ Check(si->isText(IsCode));
+ const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+ (uintptr_t)FileOffset;
+ uintptr_t SectOffset = (uintptr_t)(SymPtr -
+ (const uint8_t*)SectionData.begin());
+ unsigned SectionID = findOrEmitSection(*obj, *si, IsCode, LocalSections);
+ LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+ DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+ << " flags: " << flags
+ << " SID: " << SectionID
+ << " Offset: " << format("%p", SectOffset));
+ GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+ }
+ }
+ DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
+ }
+
+ // Allocate common symbols
+ if (CommonSize != 0)
+ emitCommonSymbols(*obj, CommonSymbols, CommonSize, LocalSymbols);
+
+ // Parse and process relocations
+ DEBUG(dbgs() << "Parse relocations:\n");
+ for (section_iterator si = obj->begin_sections(),
+ se = obj->end_sections(); si != se; si.increment(err)) {
+ Check(err);
+ bool isFirstRelocation = true;
+ unsigned SectionID = 0;
+ StubMap Stubs;
+
+ for (relocation_iterator i = si->begin_relocations(),
+ e = si->end_relocations(); i != e; i.increment(err)) {
+ Check(err);
+
+ // If it's the first relocation in this section, find its SectionID
+ if (isFirstRelocation) {
+ SectionID = findOrEmitSection(*obj, *si, true, LocalSections);
+ DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
+ isFirstRelocation = false;
+ }
+
+ ObjRelocationInfo RI;
+ RI.SectionID = SectionID;
+ Check(i->getAdditionalInfo(RI.AdditionalInfo));
+ Check(i->getOffset(RI.Offset));
+ Check(i->getSymbol(RI.Symbol));
+ Check(i->getType(RI.Type));
+
+ DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
+ << " Offset: " << format("%p", (uintptr_t)RI.Offset)
+ << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
+ << "\n");
+ processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+ }
+ }
+
+ return obj.take();
+}
+
+void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
+ const CommonSymbolMap &CommonSymbols,
+ uint64_t TotalSize,
+ SymbolTableMap &SymbolTable) {
+ // Allocate memory for the section
+ unsigned SectionID = Sections.size();
+ uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
+ SectionID, false);
+ if (!Addr)
+ report_fatal_error("Unable to allocate memory for common symbols!");
+ uint64_t Offset = 0;
+ Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
+ memset(Addr, 0, TotalSize);
+
+ DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
+ << " new addr: " << format("%p", Addr)
+ << " DataSize: " << TotalSize
+ << "\n");
+
+ // Assign the address of each symbol
+ for (CommonSymbolMap::const_iterator it = CommonSymbols.begin(),
+ itEnd = CommonSymbols.end(); it != itEnd; it++) {
+ uint64_t Size = it->second.first;
+ uint64_t Align = it->second.second;
+ StringRef Name;
+ it->first.getName(Name);
+ if (Align) {
+ // This symbol has an alignment requirement.
+ uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align);
+ Addr += AlignOffset;
+ Offset += AlignOffset;
+ DEBUG(dbgs() << "Allocating common symbol " << Name << " address " <<
+ format("%p\n", Addr));
+ }
+ Obj.updateSymbolAddress(it->first, (uint64_t)Addr);
+ SymbolTable[Name.data()] = SymbolLoc(SectionID, Offset);
+ Offset += Size;
+ Addr += Size;
+ }
+}
+
+unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
+ const SectionRef &Section,
+ bool IsCode) {
+
+ unsigned StubBufSize = 0,
+ StubSize = getMaxStubSize();
+ error_code err;
+ if (StubSize > 0) {
+ for (relocation_iterator i = Section.begin_relocations(),
+ e = Section.end_relocations(); i != e; i.increment(err), Check(err))
+ StubBufSize += StubSize;
+ }
+ StringRef data;
+ uint64_t Alignment64;
+ Check(Section.getContents(data));
+ Check(Section.getAlignment(Alignment64));
+
+ unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
+ bool IsRequired;
+ bool IsVirtual;
+ bool IsZeroInit;
+ bool IsReadOnly;
+ uint64_t DataSize;
+ StringRef Name;
+ Check(Section.isRequiredForExecution(IsRequired));
+ Check(Section.isVirtual(IsVirtual));
+ Check(Section.isZeroInit(IsZeroInit));
+ Check(Section.isReadOnlyData(IsReadOnly));
+ Check(Section.getSize(DataSize));
+ Check(Section.getName(Name));
+
+ unsigned Allocate;
+ unsigned SectionID = Sections.size();
+ uint8_t *Addr;
+ const char *pData = 0;
+
+ // Some sections, such as debug info, don't need to be loaded for execution.
+ // Leave those where they are.
+ if (IsRequired) {
+ Allocate = DataSize + StubBufSize;
+ Addr = IsCode
+ ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+ : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly);
+ if (!Addr)
+ report_fatal_error("Unable to allocate section memory!");
+
+ // Virtual sections have no data in the object image, so leave pData = 0
+ if (!IsVirtual)
+ pData = data.data();
+
+ // Zero-initialize or copy the data from the image
+ if (IsZeroInit || IsVirtual)
+ memset(Addr, 0, DataSize);
+ else
+ memcpy(Addr, pData, DataSize);
+
+ DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " Name: " << Name
+ << " obj addr: " << format("%p", pData)
+ << " new addr: " << format("%p", Addr)
+ << " DataSize: " << DataSize
+ << " StubBufSize: " << StubBufSize
+ << " Allocate: " << Allocate
+ << "\n");
+ Obj.updateSectionAddress(Section, (uint64_t)Addr);
+ }
+ else {
+ // Even if we didn't load the section, we need to record an entry for it
+ // to handle later processing (and by 'handle' I mean don't do anything
+ // with these sections).
+ Allocate = 0;
+ Addr = 0;
+ DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " Name: " << Name
+ << " obj addr: " << format("%p", data.data())
+ << " new addr: 0"
+ << " DataSize: " << DataSize
+ << " StubBufSize: " << StubBufSize
+ << " Allocate: " << Allocate
+ << "\n");
+ }
+
+ Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
+ (uintptr_t)pData));
+ return SectionID;
+}
+
+unsigned RuntimeDyldImpl::findOrEmitSection(ObjectImage &Obj,
+ const SectionRef &Section,
+ bool IsCode,
+ ObjSectionToIDMap &LocalSections) {
+
+ unsigned SectionID = 0;
+ ObjSectionToIDMap::iterator i = LocalSections.find(Section);
+ if (i != LocalSections.end())
+ SectionID = i->second;
+ else {
+ SectionID = emitSection(Obj, Section, IsCode);
+ LocalSections[Section] = SectionID;
+ }
+ return SectionID;
+}
+
+void RuntimeDyldImpl::addRelocationForSection(const RelocationEntry &RE,
+ unsigned SectionID) {
+ Relocations[SectionID].push_back(RE);
+}
+
+void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
+ StringRef SymbolName) {
+ // Relocation by symbol. If the symbol is found in the global symbol table,
+ // create an appropriate section relocation. Otherwise, add it to
+ // ExternalSymbolRelocations.
+ SymbolTableMap::const_iterator Loc =
+ GlobalSymbolTable.find(SymbolName);
+ if (Loc == GlobalSymbolTable.end()) {
+ ExternalSymbolRelocations[SymbolName].push_back(RE);
+ } else {
+ // Copy the RE since we want to modify its addend.
+ RelocationEntry RECopy = RE;
+ RECopy.Addend += Loc->second.second;
+ Relocations[Loc->second.first].push_back(RECopy);
+ }
+}
+
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
+ if (Arch == Triple::arm) {
+ // TODO: There is only ARM far stub now. We should add the Thumb stub,
+ // and stubs for branches Thumb - ARM and ARM - Thumb.
+ uint32_t *StubAddr = (uint32_t*)Addr;
+ *StubAddr = 0xe51ff004; // ldr pc,<label>
+ return (uint8_t*)++StubAddr;
+ } else if (Arch == Triple::mipsel || Arch == Triple::mips) {
+ uint32_t *StubAddr = (uint32_t*)Addr;
+ // 0: 3c190000 lui t9,%hi(addr).
+ // 4: 27390000 addiu t9,t9,%lo(addr).
+ // 8: 03200008 jr t9.
+ // c: 00000000 nop.
+ const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000;
+ const unsigned JrT9Instr = 0x03200008, NopInstr = 0x0;
+
+ *StubAddr = LuiT9Instr;
+ StubAddr++;
+ *StubAddr = AdduiT9Instr;
+ StubAddr++;
+ *StubAddr = JrT9Instr;
+ StubAddr++;
+ *StubAddr = NopInstr;
+ return Addr;
+ } else if (Arch == Triple::ppc64) {
+ // PowerPC64 stub: the address points to a function descriptor
+ // instead of the function itself. Load the function address
+ // on r11 and sets it to control register. Also loads the function
+ // TOC in r2 and environment pointer to r11.
+ writeInt32BE(Addr, 0x3D800000); // lis r12, highest(addr)
+ writeInt32BE(Addr+4, 0x618C0000); // ori r12, higher(addr)
+ writeInt32BE(Addr+8, 0x798C07C6); // sldi r12, r12, 32
+ writeInt32BE(Addr+12, 0x658C0000); // oris r12, r12, h(addr)
+ writeInt32BE(Addr+16, 0x618C0000); // ori r12, r12, l(addr)
+ writeInt32BE(Addr+20, 0xF8410028); // std r2, 40(r1)
+ writeInt32BE(Addr+24, 0xE96C0000); // ld r11, 0(r12)
+ writeInt32BE(Addr+28, 0xE84C0008); // ld r2, 0(r12)
+ writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11
+ writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2)
+ writeInt32BE(Addr+40, 0x4E800420); // bctr
+
+ return Addr;
+ }
+ return Addr;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
+ uint64_t Addr) {
+ // The address to use for relocation resolution is not
+ // the address of the local section buffer. We must be doing
+ // a remote execution environment of some sort. Relocations can't
+ // be applied until all the sections have been moved. The client must
+ // trigger this with a call to MCJIT::finalize() or
+ // RuntimeDyld::resolveRelocations().
+ //
+ // Addr is a uint64_t because we can't assume the pointer width
+ // of the target is the same as that of the host. Just use a generic
+ // "big enough" type.
+ Sections[SectionID].LoadAddress = Addr;
+}
+
+void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
+ uint64_t Value) {
+ // Ignore relocations for sections that were not loaded
+ if (Sections[RE.SectionID].Address != 0) {
+ DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+ << " + " << RE.Offset << " ("
+ << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")"
+ << " RelType: " << RE.RelType
+ << " Addend: " << RE.Addend
+ << "\n");
+
+ resolveRelocation(Sections[RE.SectionID], RE.Offset,
+ Value, RE.RelType, RE.Addend);
+ }
+}
+
+void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
+ uint64_t Value) {
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ resolveRelocationEntry(Relocs[i], Value);
+ }
+}
+
+void RuntimeDyldImpl::resolveExternalSymbols() {
+ StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin(),
+ e = ExternalSymbolRelocations.end();
+ for (; i != e; i++) {
+ StringRef Name = i->first();
+ RelocationList &Relocs = i->second;
+ SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name);
+ if (Loc == GlobalSymbolTable.end()) {
+ if (Name.size() == 0) {
+ // This is an absolute symbol, use an address of zero.
+ DEBUG(dbgs() << "Resolving absolute relocations." << "\n");
+ resolveRelocationList(Relocs, 0);
+ } else {
+ // This is an external symbol, try to get its address from
+ // MemoryManager.
+ uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+ true);
+ DEBUG(dbgs() << "Resolving relocations Name: " << Name
+ << "\t" << format("%p", Addr)
+ << "\n");
+ resolveRelocationList(Relocs, (uintptr_t)Addr);
+ }
+ } else {
+ report_fatal_error("Expected external symbol");
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// RuntimeDyld class implementation
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+ // FIXME: There's a potential issue lurking here if a single instance of
+ // RuntimeDyld is used to load multiple objects. The current implementation
+ // associates a single memory manager with a RuntimeDyld instance. Even
+ // though the public class spawns a new 'impl' instance for each load,
+ // they share a single memory manager. This can become a problem when page
+ // permissions are applied.
+ Dyld = 0;
+ MM = mm;
+}
+
+RuntimeDyld::~RuntimeDyld() {
+ delete Dyld;
+}
+
+ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
+ if (!Dyld) {
+ sys::LLVMFileType type = sys::IdentifyFileType(
+ InputBuffer->getBufferStart(),
+ static_cast<unsigned>(InputBuffer->getBufferSize()));
+ switch (type) {
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType:
+ Dyld = new RuntimeDyldELF(MM);
+ break;
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::Mach_O_DSYMCompanion_FileType:
+ Dyld = new RuntimeDyldMachO(MM);
+ break;
+ case sys::Unknown_FileType:
+ case sys::Bitcode_FileType:
+ case sys::Archive_FileType:
+ case sys::COFF_FileType:
+ report_fatal_error("Incompatible object format!");
+ }
+ } else {
+ if (!Dyld->isCompatibleFormat(InputBuffer))
+ report_fatal_error("Incompatible object format!");
+ }
+
+ return Dyld->loadObject(InputBuffer);
+}
+
+void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+ return Dyld->getSymbolAddress(Name);
+}
+
+uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) {
+ return Dyld->getSymbolLoadAddress(Name);
+}
+
+void RuntimeDyld::resolveRelocations() {
+ Dyld->resolveRelocations();
+}
+
+void RuntimeDyld::reassignSectionAddress(unsigned SectionID,
+ uint64_t Addr) {
+ Dyld->reassignSectionAddress(SectionID, Addr);
+}
+
+void RuntimeDyld::mapSectionAddress(const void *LocalAddress,
+ uint64_t TargetAddress) {
+ Dyld->mapSectionAddress(LocalAddress, TargetAddress);
+}
+
+StringRef RuntimeDyld::getErrorString() {
+ return Dyld->getErrorString();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
new file mode 100644
index 000000000000..b8537b1f2f9c
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -0,0 +1,851 @@
+//===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of ELF support for the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "RuntimeDyldELF.h"
+#include "JITRegistrar.h"
+#include "ObjectImageCommon.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+
+static inline
+error_code check(error_code Err) {
+ if (Err) {
+ report_fatal_error(Err.message());
+ }
+ return Err;
+}
+
+template<class ELFT>
+class DyldELFObject
+ : public ELFObjectFile<ELFT> {
+ LLVM_ELF_IMPORT_TYPES(ELFT)
+
+ typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
+ typedef Elf_Sym_Impl<ELFT> Elf_Sym;
+ typedef
+ Elf_Rel_Impl<ELFT, false> Elf_Rel;
+ typedef
+ Elf_Rel_Impl<ELFT, true> Elf_Rela;
+
+ typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
+
+ typedef typename ELFDataTypeTypedefHelper<
+ ELFT>::value_type addr_type;
+
+public:
+ DyldELFObject(MemoryBuffer *Wrapper, error_code &ec);
+
+ void updateSectionAddress(const SectionRef &Sec, uint64_t Addr);
+ void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr);
+
+ // Methods for type inquiry through isa, cast and dyn_cast
+ static inline bool classof(const Binary *v) {
+ return (isa<ELFObjectFile<ELFT> >(v)
+ && classof(cast<ELFObjectFile
+ <ELFT> >(v)));
+ }
+ static inline bool classof(
+ const ELFObjectFile<ELFT> *v) {
+ return v->isDyldType();
+ }
+};
+
+template<class ELFT>
+class ELFObjectImage : public ObjectImageCommon {
+ protected:
+ DyldELFObject<ELFT> *DyldObj;
+ bool Registered;
+
+ public:
+ ELFObjectImage(ObjectBuffer *Input,
+ DyldELFObject<ELFT> *Obj)
+ : ObjectImageCommon(Input, Obj),
+ DyldObj(Obj),
+ Registered(false) {}
+
+ virtual ~ELFObjectImage() {
+ if (Registered)
+ deregisterWithDebugger();
+ }
+
+ // Subclasses can override these methods to update the image with loaded
+ // addresses for sections and common symbols
+ virtual void updateSectionAddress(const SectionRef &Sec, uint64_t Addr)
+ {
+ DyldObj->updateSectionAddress(Sec, Addr);
+ }
+
+ virtual void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr)
+ {
+ DyldObj->updateSymbolAddress(Sym, Addr);
+ }
+
+ virtual void registerWithDebugger()
+ {
+ JITRegistrar::getGDBRegistrar().registerObject(*Buffer);
+ Registered = true;
+ }
+ virtual void deregisterWithDebugger()
+ {
+ JITRegistrar::getGDBRegistrar().deregisterObject(*Buffer);
+ }
+};
+
+// The MemoryBuffer passed into this constructor is just a wrapper around the
+// actual memory. Ultimately, the Binary parent class will take ownership of
+// this MemoryBuffer object but not the underlying memory.
+template<class ELFT>
+DyldELFObject<ELFT>::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec)
+ : ELFObjectFile<ELFT>(Wrapper, ec) {
+ this->isDyldELFObject = true;
+}
+
+template<class ELFT>
+void DyldELFObject<ELFT>::updateSectionAddress(const SectionRef &Sec,
+ uint64_t Addr) {
+ DataRefImpl ShdrRef = Sec.getRawDataRefImpl();
+ Elf_Shdr *shdr = const_cast<Elf_Shdr*>(
+ reinterpret_cast<const Elf_Shdr *>(ShdrRef.p));
+
+ // This assumes the address passed in matches the target address bitness
+ // The template-based type cast handles everything else.
+ shdr->sh_addr = static_cast<addr_type>(Addr);
+}
+
+template<class ELFT>
+void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
+ uint64_t Addr) {
+
+ Elf_Sym *sym = const_cast<Elf_Sym*>(
+ ELFObjectFile<ELFT>::getSymbol(SymRef.getRawDataRefImpl()));
+
+ // This assumes the address passed in matches the target address bitness
+ // The template-based type cast handles everything else.
+ sym->st_value = static_cast<addr_type>(Addr);
+}
+
+} // namespace
+
+namespace llvm {
+
+ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
+ if (Buffer->getBufferSize() < ELF::EI_NIDENT)
+ llvm_unreachable("Unexpected ELF object size");
+ std::pair<unsigned char, unsigned char> Ident = std::make_pair(
+ (uint8_t)Buffer->getBufferStart()[ELF::EI_CLASS],
+ (uint8_t)Buffer->getBufferStart()[ELF::EI_DATA]);
+ error_code ec;
+
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) {
+ DyldELFObject<ELFType<support::little, 4, false> > *Obj =
+ new DyldELFObject<ELFType<support::little, 4, false> >(
+ Buffer->getMemBuffer(), ec);
+ return new ELFObjectImage<ELFType<support::little, 4, false> >(Buffer, Obj);
+ }
+ else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) {
+ DyldELFObject<ELFType<support::big, 4, false> > *Obj =
+ new DyldELFObject<ELFType<support::big, 4, false> >(
+ Buffer->getMemBuffer(), ec);
+ return new ELFObjectImage<ELFType<support::big, 4, false> >(Buffer, Obj);
+ }
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) {
+ DyldELFObject<ELFType<support::big, 8, true> > *Obj =
+ new DyldELFObject<ELFType<support::big, 8, true> >(
+ Buffer->getMemBuffer(), ec);
+ return new ELFObjectImage<ELFType<support::big, 8, true> >(Buffer, Obj);
+ }
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
+ DyldELFObject<ELFType<support::little, 8, true> > *Obj =
+ new DyldELFObject<ELFType<support::little, 8, true> >(
+ Buffer->getMemBuffer(), ec);
+ return new ELFObjectImage<ELFType<support::little, 8, true> >(Buffer, Obj);
+ }
+ else
+ llvm_unreachable("Unexpected ELF format");
+}
+
+RuntimeDyldELF::~RuntimeDyldELF() {
+}
+
+void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_X86_64_64: {
+ uint64_t *Target = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+ *Target = Value + Addend;
+ DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend))
+ << " at " << format("%p\n",Target));
+ break;
+ }
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S: {
+ Value += Addend;
+ assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) ||
+ (Type == ELF::R_X86_64_32S &&
+ ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
+ uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ *Target = TruncatedAddr;
+ DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr)
+ << " at " << format("%p\n",Target));
+ break;
+ }
+ case ELF::R_X86_64_PC32: {
+ // Get the placeholder value from the generated object since
+ // a previous relocation attempt may have overwritten the loaded version
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress
+ + Offset);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ uint64_t FinalAddress = Section.LoadAddress + Offset;
+ int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
+ int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+ *Target = TruncOffset;
+ break;
+ }
+ }
+}
+
+void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ switch (Type) {
+ case ELF::R_386_32: {
+ // Get the placeholder value from the generated object since
+ // a previous relocation attempt may have overwritten the loaded version
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress
+ + Offset);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ *Target = *Placeholder + Value + Addend;
+ break;
+ }
+ case ELF::R_386_PC32: {
+ // Get the placeholder value from the generated object since
+ // a previous relocation attempt may have overwritten the loaded version
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress
+ + Offset);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+ uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ *Target = RealOffset;
+ break;
+ }
+ default:
+ // There are other relocation types, but it appears these are the
+ // only ones currently used by the LLVM ELF object writer
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ }
+}
+
+void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ // TODO: Add Thumb relocations.
+ uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset);
+ uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+ Value += Addend;
+
+ DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: "
+ << Section.Address + Offset
+ << " FinalAddress: " << format("%p",FinalAddress)
+ << " Value: " << format("%x",Value)
+ << " Type: " << format("%x",Type)
+ << " Addend: " << format("%x",Addend)
+ << "\n");
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+
+ // Write a 32bit value to relocation address, taking into account the
+ // implicit addend encoded in the target.
+ case ELF::R_ARM_TARGET1 :
+ case ELF::R_ARM_ABS32 :
+ *TargetPtr += Value;
+ break;
+
+ // Write first 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVW_ABS_NC :
+ // We are not expecting any other addend in the relocation address.
+ // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2
+ // non-contiguous fields.
+ assert((*TargetPtr & 0x000F0FFF) == 0);
+ Value = Value & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write last 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVT_ABS :
+ // We are not expecting any other addend in the relocation address.
+ // Use 0x000F0FFF for the same reason as R_ARM_MOVW_ABS_NC.
+ assert((*TargetPtr & 0x000F0FFF) == 0);
+ Value = (Value >> 16) & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write 24 bit relative value to the branch instruction.
+ case ELF::R_ARM_PC24 : // Fall through.
+ case ELF::R_ARM_CALL : // Fall through.
+ case ELF::R_ARM_JUMP24 :
+ int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
+ RelValue = (RelValue & 0x03FFFFFC) >> 2;
+ *TargetPtr &= 0xFF000000;
+ *TargetPtr |= RelValue;
+ break;
+ }
+}
+
+void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset);
+ Value += Addend;
+
+ DEBUG(dbgs() << "resolveMipselocation, LocalAddress: "
+ << Section.Address + Offset
+ << " FinalAddress: "
+ << format("%p",Section.LoadAddress + Offset)
+ << " Value: " << format("%x",Value)
+ << " Type: " << format("%x",Type)
+ << " Addend: " << format("%x",Addend)
+ << "\n");
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+ break;
+ case ELF::R_MIPS_32:
+ *TargetPtr = Value + (*TargetPtr);
+ break;
+ case ELF::R_MIPS_26:
+ *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2);
+ break;
+ case ELF::R_MIPS_HI16:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ Value += ((*TargetPtr) & 0x0000ffff) << 16;
+ *TargetPtr = ((*TargetPtr) & 0xffff0000) |
+ (((Value + 0x8000) >> 16) & 0xffff);
+ break;
+ case ELF::R_MIPS_LO16:
+ Value += ((*TargetPtr) & 0x0000ffff);
+ *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
+ break;
+ }
+}
+
+// Return the .TOC. section address to R_PPC64_TOC relocations.
+uint64_t RuntimeDyldELF::findPPC64TOC() const {
+ // The TOC consists of sections .got, .toc, .tocbss, .plt in that
+ // order. The TOC starts where the first of these sections starts.
+ SectionList::const_iterator it = Sections.begin();
+ SectionList::const_iterator ite = Sections.end();
+ for (; it != ite; ++it) {
+ if (it->Name == ".got" ||
+ it->Name == ".toc" ||
+ it->Name == ".tocbss" ||
+ it->Name == ".plt")
+ break;
+ }
+ if (it == ite) {
+ // This may happen for
+ // * references to TOC base base (sym@toc, .odp relocation) without
+ // a .toc directive.
+ // In this case just use the first section (which is usually
+ // the .odp) since the code won't reference the .toc base
+ // directly.
+ it = Sections.begin();
+ }
+ assert (it != ite);
+ // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
+ // thus permitting a full 64 Kbytes segment.
+ return it->LoadAddress + 0x8000;
+}
+
+// Returns the sections and offset associated with the ODP entry referenced
+// by Symbol.
+void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
+ ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel) {
+ // Get the ELF symbol value (st_value) to compare with Relocation offset in
+ // .opd entries
+
+ error_code err;
+ for (section_iterator si = Obj.begin_sections(),
+ se = Obj.end_sections(); si != se; si.increment(err)) {
+ StringRef SectionName;
+ check(si->getName(SectionName));
+ if (SectionName != ".opd")
+ continue;
+
+ for (relocation_iterator i = si->begin_relocations(),
+ e = si->end_relocations(); i != e;) {
+ check(err);
+
+ // The R_PPC64_ADDR64 relocation indicates the first field
+ // of a .opd entry
+ uint64_t TypeFunc;
+ check(i->getType(TypeFunc));
+ if (TypeFunc != ELF::R_PPC64_ADDR64) {
+ i.increment(err);
+ continue;
+ }
+
+ SymbolRef TargetSymbol;
+ uint64_t TargetSymbolOffset;
+ int64_t TargetAdditionalInfo;
+ check(i->getSymbol(TargetSymbol));
+ check(i->getOffset(TargetSymbolOffset));
+ check(i->getAdditionalInfo(TargetAdditionalInfo));
+
+ i = i.increment(err);
+ if (i == e)
+ break;
+ check(err);
+
+ // Just check if following relocation is a R_PPC64_TOC
+ uint64_t TypeTOC;
+ check(i->getType(TypeTOC));
+ if (TypeTOC != ELF::R_PPC64_TOC)
+ continue;
+
+ // Finally compares the Symbol value and the target symbol offset
+ // to check if this .opd entry refers to the symbol the relocation
+ // points to.
+ if (Rel.Addend != (intptr_t)TargetSymbolOffset)
+ continue;
+
+ section_iterator tsi(Obj.end_sections());
+ check(TargetSymbol.getSection(tsi));
+ Rel.SectionID = findOrEmitSection(Obj, (*tsi), true, LocalSections);
+ Rel.Addend = (intptr_t)TargetAdditionalInfo;
+ return;
+ }
+ }
+ llvm_unreachable("Attempting to get address of ODP entry!");
+}
+
+// Relocation masks following the #lo(value), #hi(value), #higher(value),
+// and #highest(value) macros defined in section 4.5.1. Relocation Types
+// in PPC-elf64abi document.
+//
+static inline
+uint16_t applyPPClo (uint64_t value)
+{
+ return value & 0xffff;
+}
+
+static inline
+uint16_t applyPPChi (uint64_t value)
+{
+ return (value >> 16) & 0xffff;
+}
+
+static inline
+uint16_t applyPPChigher (uint64_t value)
+{
+ return (value >> 32) & 0xffff;
+}
+
+static inline
+uint16_t applyPPChighest (uint64_t value)
+{
+ return (value >> 48) & 0xffff;
+}
+
+void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ uint8_t* LocalAddress = Section.Address + Offset;
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_PPC64_ADDR16_LO :
+ writeInt16BE(LocalAddress, applyPPClo (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HI :
+ writeInt16BE(LocalAddress, applyPPChi (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHER :
+ writeInt16BE(LocalAddress, applyPPChigher (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHEST :
+ writeInt16BE(LocalAddress, applyPPChighest (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR14 : {
+ assert(((Value + Addend) & 3) == 0);
+ // Preserve the AA/LK bits in the branch instruction
+ uint8_t aalk = *(LocalAddress+3);
+ writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
+ } break;
+ case ELF::R_PPC64_ADDR32 : {
+ int32_t Result = static_cast<int32_t>(Value + Addend);
+ if (SignExtend32<32>(Result) != Result)
+ llvm_unreachable("Relocation R_PPC64_ADDR32 overflow");
+ writeInt32BE(LocalAddress, Result);
+ } break;
+ case ELF::R_PPC64_REL24 : {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
+ if (SignExtend32<24>(delta) != delta)
+ llvm_unreachable("Relocation R_PPC64_REL24 overflow");
+ // Generates a 'bl <address>' instruction
+ writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
+ } break;
+ case ELF::R_PPC64_REL32 : {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
+ if (SignExtend32<32>(delta) != delta)
+ llvm_unreachable("Relocation R_PPC64_REL32 overflow");
+ writeInt32BE(LocalAddress, delta);
+ } break;
+ case ELF::R_PPC64_ADDR64 :
+ writeInt64BE(LocalAddress, Value + Addend);
+ break;
+ case ELF::R_PPC64_TOC :
+ writeInt64BE(LocalAddress, findPPC64TOC());
+ break;
+ case ELF::R_PPC64_TOC16 : {
+ uint64_t TOCStart = findPPC64TOC();
+ Value = applyPPClo((Value + Addend) - TOCStart);
+ writeInt16BE(LocalAddress, applyPPClo(Value));
+ } break;
+ case ELF::R_PPC64_TOC16_DS : {
+ uint64_t TOCStart = findPPC64TOC();
+ Value = ((Value + Addend) - TOCStart);
+ writeInt16BE(LocalAddress, applyPPClo(Value));
+ } break;
+ }
+}
+
+void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Arch) {
+ case Triple::x86_64:
+ resolveX86_64Relocation(Section, Offset, Value, Type, Addend);
+ break;
+ case Triple::x86:
+ resolveX86Relocation(Section, Offset,
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(Section, Offset,
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
+ case Triple::mips: // Fall through.
+ case Triple::mipsel:
+ resolveMIPSRelocation(Section, Offset,
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
+ case Triple::ppc64:
+ resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
+ break;
+ default: llvm_unreachable("Unsupported CPU type!");
+ }
+}
+
+void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
+ intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
+ const SymbolRef &Symbol = Rel.Symbol;
+
+ // Obtain the symbol name which is referenced in the relocation
+ StringRef TargetName;
+ Symbol.getName(TargetName);
+ DEBUG(dbgs() << "\t\tRelType: " << RelType
+ << " Addend: " << Addend
+ << " TargetName: " << TargetName
+ << "\n");
+ RelocationValueRef Value;
+ // First search for the symbol in the local symbol table
+ SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
+ if (lsi != Symbols.end()) {
+ Value.SectionID = lsi->second.first;
+ Value.Addend = lsi->second.second;
+ } else {
+ // Search for the symbol in the global symbol table
+ SymbolTableMap::const_iterator gsi =
+ GlobalSymbolTable.find(TargetName.data());
+ if (gsi != GlobalSymbolTable.end()) {
+ Value.SectionID = gsi->second.first;
+ Value.Addend = gsi->second.second;
+ } else {
+ switch (SymType) {
+ case SymbolRef::ST_Debug: {
+ // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
+ // and can be changed by another developers. Maybe best way is add
+ // a new symbol type ST_Section to SymbolRef and use it.
+ section_iterator si(Obj.end_sections());
+ Symbol.getSection(si);
+ if (si == Obj.end_sections())
+ llvm_unreachable("Symbol section not found, bad object file format!");
+ DEBUG(dbgs() << "\t\tThis is section symbol\n");
+ // Default to 'true' in case isText fails (though it never does).
+ bool isCode = true;
+ si->isText(isCode);
+ Value.SectionID = findOrEmitSection(Obj,
+ (*si),
+ isCode,
+ ObjSectionToID);
+ Value.Addend = Addend;
+ break;
+ }
+ case SymbolRef::ST_Unknown: {
+ Value.SymbolName = TargetName.data();
+ Value.Addend = Addend;
+ break;
+ }
+ default:
+ llvm_unreachable("Unresolved symbol type!");
+ break;
+ }
+ }
+ }
+ DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
+ << " Rel.Offset: " << Rel.Offset
+ << "\n");
+ if (Arch == Triple::arm &&
+ (RelType == ELF::R_ARM_PC24 ||
+ RelType == ELF::R_ARM_CALL ||
+ RelType == ELF::R_ARM_JUMP24)) {
+ // This is an ARM branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+ SectionEntry &Section = Sections[Rel.SectionID];
+
+ // Look for an existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ ELF::R_ARM_ABS32, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else if ((Arch == Triple::mipsel || Arch == Triple::mips) &&
+ RelType == ELF::R_MIPS_26) {
+ // This is an Mips branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+ uint32_t *TargetAddress = (uint32_t *)Target;
+
+ // Extract the addend from the instruction.
+ uint32_t Addend = ((*TargetAddress) & 0x03ffffff) << 2;
+
+ Value.Addend += Addend;
+
+ // Look up for existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+
+ // Creating Hi and Lo relocations for the filled stub instructions.
+ RelocationEntry REHi(Rel.SectionID,
+ StubTargetAddr - Section.Address,
+ ELF::R_MIPS_HI16, Value.Addend);
+ RelocationEntry RELo(Rel.SectionID,
+ StubTargetAddr - Section.Address + 4,
+ ELF::R_MIPS_LO16, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REHi, Value.SymbolName);
+ addRelocationForSymbol(RELo, Value.SymbolName);
+ } else {
+ addRelocationForSection(REHi, Value.SectionID);
+ addRelocationForSection(RELo, Value.SectionID);
+ }
+
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else if (Arch == Triple::ppc64) {
+ if (RelType == ELF::R_PPC64_REL24) {
+ // A PPC branch relocation will need a stub function if the target is
+ // an external symbol (Symbol::ST_Unknown) or if the target address
+ // is not within the signed 24-bits branch address.
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+ bool RangeOverflow = false;
+ if (SymType != SymbolRef::ST_Unknown) {
+ // A function call may points to the .opd entry, so the final symbol value
+ // in calculated based in the relocation values in .opd section.
+ findOPDEntrySection(Obj, ObjSectionToID, Value);
+ uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
+ int32_t delta = static_cast<int32_t>(Target - RelocTarget);
+ // If it is within 24-bits branch range, just set the branch target
+ if (SignExtend32<24>(delta) == delta) {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ } else {
+ RangeOverflow = true;
+ }
+ }
+ if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) {
+ // It is an external symbol (SymbolRef::ST_Unknown) or within a range
+ // larger than 24-bits.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ // Symbol function stub already created, just relocate to it
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ ELF::R_PPC64_ADDR64, Value.Addend);
+
+ // Generates the 64-bits address loads as exemplified in section
+ // 4.5.1 in PPC64 ELF ABI.
+ RelocationEntry REhst(Rel.SectionID,
+ StubTargetAddr - Section.Address + 2,
+ ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
+ RelocationEntry REhr(Rel.SectionID,
+ StubTargetAddr - Section.Address + 6,
+ ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
+ RelocationEntry REh(Rel.SectionID,
+ StubTargetAddr - Section.Address + 14,
+ ELF::R_PPC64_ADDR16_HI, Value.Addend);
+ RelocationEntry REl(Rel.SectionID,
+ StubTargetAddr - Section.Address + 18,
+ ELF::R_PPC64_ADDR16_LO, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REhst, Value.SymbolName);
+ addRelocationForSymbol(REhr, Value.SymbolName);
+ addRelocationForSymbol(REh, Value.SymbolName);
+ addRelocationForSymbol(REl, Value.SymbolName);
+ } else {
+ addRelocationForSection(REhst, Value.SectionID);
+ addRelocationForSection(REhr, Value.SectionID);
+ addRelocationForSection(REh, Value.SectionID);
+ addRelocationForSection(REl, Value.SectionID);
+ }
+
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ if (SymType == SymbolRef::ST_Unknown)
+ // Restore the TOC for external calls
+ writeInt32BE(Target+4, 0xE8410028); // ld r2,40(r1)
+ Section.StubOffset += getMaxStubSize();
+ }
+ }
+ } else {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ // Extra check to avoid relocation againt empty symbols (usually
+ // the R_PPC64_TOC).
+ if (Value.SymbolName && !TargetName.empty())
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+ } else {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+}
+
+unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) {
+ // In ELF, the value of an SHN_COMMON symbol is its alignment requirement.
+ uint64_t Align;
+ Check(Sym.getValue(Align));
+ return Align;
+}
+
+bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
+ if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
+ return false;
+ return (memcmp(Buffer->getBufferStart(), ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
new file mode 100644
index 000000000000..07e704b45930
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -0,0 +1,97 @@
+//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_ELF_H
+#define LLVM_RUNTIME_DYLD_ELF_H
+
+#include "RuntimeDyldImpl.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+namespace {
+ // Helper for extensive error checking in debug builds.
+ error_code Check(error_code Err) {
+ if (Err) {
+ report_fatal_error(Err.message());
+ }
+ return Err;
+ }
+} // end anonymous namespace
+
+class RuntimeDyldELF : public RuntimeDyldImpl {
+protected:
+ void resolveX86_64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ void resolveX86Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ void resolveARMRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ void resolveMIPSRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ void resolvePPC64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ virtual void resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs);
+
+ unsigned getCommonSymbolAlignment(const SymbolRef &Sym);
+
+ virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+
+ uint64_t findPPC64TOC() const;
+ void findOPDEntrySection(ObjectImage &Obj,
+ ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel);
+
+public:
+ RuntimeDyldELF(RTDyldMemoryManager *mm)
+ : RuntimeDyldImpl(mm) {}
+
+ virtual ~RuntimeDyldELF();
+
+ bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
new file mode 100644
index 000000000000..f1009945775c
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -0,0 +1,344 @@
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the implementations of runtime dynamic linker facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_IMPL_H
+#define LLVM_RUNTIME_DYLD_IMPL_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <map>
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+class ObjectBuffer;
+class Twine;
+
+
+/// SectionEntry - represents a section emitted into memory by the dynamic
+/// linker.
+class SectionEntry {
+public:
+ /// Name - section name.
+ StringRef Name;
+
+ /// Address - address in the linker's memory where the section resides.
+ uint8_t *Address;
+
+ /// Size - section size.
+ size_t Size;
+
+ /// LoadAddress - the address of the section in the target process's memory.
+ /// Used for situations in which JIT-ed code is being executed in the address
+ /// space of a separate process. If the code executes in the same address
+ /// space where it was JIT-ed, this just equals Address.
+ uint64_t LoadAddress;
+
+ /// StubOffset - used for architectures with stub functions for far
+ /// relocations (like ARM).
+ uintptr_t StubOffset;
+
+ /// ObjAddress - address of the section in the in-memory object file. Used
+ /// for calculating relocations in some object formats (like MachO).
+ uintptr_t ObjAddress;
+
+ SectionEntry(StringRef name, uint8_t *address, size_t size,
+ uintptr_t stubOffset, uintptr_t objAddress)
+ : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
+ StubOffset(stubOffset), ObjAddress(objAddress) {}
+};
+
+/// RelocationEntry - used to represent relocations internally in the dynamic
+/// linker.
+class RelocationEntry {
+public:
+ /// SectionID - the section this relocation points to.
+ unsigned SectionID;
+
+ /// Offset - offset into the section.
+ uintptr_t Offset;
+
+ /// RelType - relocation type.
+ uint32_t RelType;
+
+ /// Addend - the relocation addend encoded in the instruction itself. Also
+ /// used to make a relocation section relative instead of symbol relative.
+ intptr_t Addend;
+
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
+ : SectionID(id), Offset(offset), RelType(type), Addend(addend) {}
+};
+
+/// ObjRelocationInfo - relocation information as read from the object file.
+/// Used to pass around data taken from object::RelocationRef, together with
+/// the section to which the relocation points (represented by a SectionID).
+class ObjRelocationInfo {
+public:
+ unsigned SectionID;
+ uint64_t Offset;
+ SymbolRef Symbol;
+ uint64_t Type;
+ int64_t AdditionalInfo;
+};
+
+class RelocationValueRef {
+public:
+ unsigned SectionID;
+ intptr_t Addend;
+ const char *SymbolName;
+ RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+
+ inline bool operator==(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+ }
+ inline bool operator <(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+ }
+};
+
+class RuntimeDyldImpl {
+protected:
+ // The MemoryManager to load objects into.
+ RTDyldMemoryManager *MemMgr;
+
+ // A list of all sections emitted by the dynamic linker. These sections are
+ // referenced in the code by means of their index in this list - SectionID.
+ typedef SmallVector<SectionEntry, 64> SectionList;
+ SectionList Sections;
+
+ // Keep a map of sections from object file to the SectionID which
+ // references it.
+ typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
+
+ // A global symbol table for symbols from all loaded modules. Maps the
+ // symbol name to a (SectionID, offset in section) pair.
+ typedef std::pair<unsigned, uintptr_t> SymbolLoc;
+ typedef StringMap<SymbolLoc> SymbolTableMap;
+ SymbolTableMap GlobalSymbolTable;
+
+ // Pair representing the size and alignment requirement for a common symbol.
+ typedef std::pair<unsigned, unsigned> CommonSymbolInfo;
+ // Keep a map of common symbols to their info pairs
+ typedef std::map<SymbolRef, CommonSymbolInfo> CommonSymbolMap;
+
+ // For each symbol, keep a list of relocations based on it. Anytime
+ // its address is reassigned (the JIT re-compiled the function, e.g.),
+ // the relocations get re-resolved.
+ // The symbol (or section) the relocation is sourced from is the Key
+ // in the relocation list where it's stored.
+ typedef SmallVector<RelocationEntry, 64> RelocationList;
+ // Relocations to sections already loaded. Indexed by SectionID which is the
+ // source of the address. The target where the address will be written is
+ // SectionID/Offset in the relocation itself.
+ DenseMap<unsigned, RelocationList> Relocations;
+
+ // Relocations to external symbols that are not yet resolved. Symbols are
+ // external when they aren't found in the global symbol table of all loaded
+ // modules. This map is indexed by symbol name.
+ StringMap<RelocationList> ExternalSymbolRelocations;
+
+ typedef std::map<RelocationValueRef, uintptr_t> StubMap;
+
+ Triple::ArchType Arch;
+
+ inline unsigned getMaxStubSize() {
+ if (Arch == Triple::arm || Arch == Triple::thumb)
+ return 8; // 32-bit instruction and 32-bit address
+ else if (Arch == Triple::mipsel || Arch == Triple::mips)
+ return 16;
+ else if (Arch == Triple::ppc64)
+ return 44;
+ else
+ return 0;
+ }
+
+ bool HasError;
+ std::string ErrorStr;
+
+ // Set the error state and record an error string.
+ bool Error(const Twine &Msg) {
+ ErrorStr = Msg.str();
+ HasError = true;
+ return true;
+ }
+
+ uint64_t getSectionLoadAddress(unsigned SectionID) {
+ return Sections[SectionID].LoadAddress;
+ }
+
+ uint8_t *getSectionAddress(unsigned SectionID) {
+ return (uint8_t*)Sections[SectionID].Address;
+ }
+
+ // Subclasses can override this method to get the alignment requirement of
+ // a common symbol. Returns no alignment requirement if not implemented.
+ virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) {
+ return 0;
+ }
+
+
+ void writeInt16BE(uint8_t *Addr, uint16_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 8) & 0xFF;
+ *(Addr+1) = Value & 0xFF;
+ }
+
+ void writeInt32BE(uint8_t *Addr, uint32_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 24) & 0xFF;
+ *(Addr+1) = (Value >> 16) & 0xFF;
+ *(Addr+2) = (Value >> 8) & 0xFF;
+ *(Addr+3) = Value & 0xFF;
+ }
+
+ void writeInt64BE(uint8_t *Addr, uint64_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 56) & 0xFF;
+ *(Addr+1) = (Value >> 48) & 0xFF;
+ *(Addr+2) = (Value >> 40) & 0xFF;
+ *(Addr+3) = (Value >> 32) & 0xFF;
+ *(Addr+4) = (Value >> 24) & 0xFF;
+ *(Addr+5) = (Value >> 16) & 0xFF;
+ *(Addr+6) = (Value >> 8) & 0xFF;
+ *(Addr+7) = Value & 0xFF;
+ }
+
+ /// \brief Given the common symbols discovered in the object file, emit a
+ /// new section for them and update the symbol mappings in the object and
+ /// symbol table.
+ void emitCommonSymbols(ObjectImage &Obj,
+ const CommonSymbolMap &CommonSymbols,
+ uint64_t TotalSize,
+ SymbolTableMap &SymbolTable);
+
+ /// \brief Emits section data from the object file to the MemoryManager.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned emitSection(ObjectImage &Obj,
+ const SectionRef &Section,
+ bool IsCode);
+
+ /// \brief Find Section in LocalSections. If the secton is not found - emit
+ /// it and store in LocalSections.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emmits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned findOrEmitSection(ObjectImage &Obj,
+ const SectionRef &Section,
+ bool IsCode,
+ ObjSectionToIDMap &LocalSections);
+
+ // \brief Add a relocation entry that uses the given section.
+ void addRelocationForSection(const RelocationEntry &RE, unsigned SectionID);
+
+ // \brief Add a relocation entry that uses the given symbol. This symbol may
+ // be found in the global symbol table, or it may be external.
+ void addRelocationForSymbol(const RelocationEntry &RE, StringRef SymbolName);
+
+ /// \brief Emits long jump instruction to Addr.
+ /// \return Pointer to the memory area for emitting target address.
+ uint8_t* createStubFunction(uint8_t *Addr);
+
+ /// \brief Resolves relocations from Relocs list with address from Value.
+ void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
+ void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
+
+ /// \brief A object file specific relocation resolver
+ /// \param Section The section where the relocation is being applied
+ /// \param Offset The offset into the section for this relocation
+ /// \param Value Target symbol address to apply the relocation action
+ /// \param Type object file specific relocation type
+ /// \param Addend A constant addend used to compute the value to be stored
+ /// into the relocatable field
+ virtual void resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) = 0;
+
+ /// \brief Parses the object file relocation and stores it to Relocations
+ /// or SymbolRelocations (this depends on the object file type).
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs) = 0;
+
+ /// \brief Resolve relocations to external symbols.
+ void resolveExternalSymbols();
+ virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+public:
+ RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+ virtual ~RuntimeDyldImpl();
+
+ ObjectImage *loadObject(ObjectBuffer *InputBuffer);
+
+ void *getSymbolAddress(StringRef Name) {
+ // FIXME: Just look up as a function for now. Overly simple of course.
+ // Work in progress.
+ if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end())
+ return 0;
+ SymbolLoc Loc = GlobalSymbolTable.lookup(Name);
+ return getSectionAddress(Loc.first) + Loc.second;
+ }
+
+ uint64_t getSymbolLoadAddress(StringRef Name) {
+ // FIXME: Just look up as a function for now. Overly simple of course.
+ // Work in progress.
+ if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end())
+ return 0;
+ SymbolLoc Loc = GlobalSymbolTable.lookup(Name);
+ return getSectionLoadAddress(Loc.first) + Loc.second;
+ }
+
+ void resolveRelocations();
+
+ void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
+
+ void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
+
+ // Is the linker in an error state?
+ bool hasError() { return HasError; }
+
+ // Mark the error condition as handled and continue.
+ void clearError() { HasError = false; }
+
+ // Get the error message.
+ StringRef getErrorString() { return ErrorStr; }
+
+ virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
new file mode 100644
index 000000000000..bcc3df1b4e7c
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -0,0 +1,311 @@
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "RuntimeDyldMachO.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ uint8_t *LocalAddress = Section.Address + Offset;
+ uint64_t FinalAddress = Section.LoadAddress + Offset;
+ bool isPCRel = (Type >> 24) & 1;
+ unsigned MachoType = (Type >> 28) & 0xf;
+ unsigned Size = 1 << ((Type >> 25) & 3);
+
+ DEBUG(dbgs() << "resolveRelocation LocalAddress: "
+ << format("%p", LocalAddress)
+ << " FinalAddress: " << format("%p", FinalAddress)
+ << " Value: " << format("%p", Value)
+ << " Addend: " << Addend
+ << " isPCRel: " << isPCRel
+ << " MachoType: " << MachoType
+ << " Size: " << Size
+ << "\n");
+
+ // This just dispatches to the proper target specific routine.
+ switch (Arch) {
+ default: llvm_unreachable("Unsupported CPU type!");
+ case Triple::x86_64:
+ resolveX86_64Relocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ case Triple::x86:
+ resolveI386Relocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ }
+}
+
+bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
+ if (isPCRel)
+ Value -= FinalAddress + 4; // see resolveX86_64Relocation
+
+ switch (Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_Vanilla: {
+ uint8_t *p = LocalAddress;
+ uint64_t ValueToWrite = Value + Addend;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)(ValueToWrite & 0xff);
+ ValueToWrite >>= 8;
+ }
+ return false;
+ }
+ case macho::RIT_Difference:
+ case macho::RIT_Generic_LocalDifference:
+ case macho::RIT_Generic_PreboundLazyPointer:
+ return Error("Relocation type not implemented yet!");
+ }
+}
+
+bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (isPCRel)
+ // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+ // address. Is that expected? Only for branches, perhaps?
+ Value -= FinalAddress + 4;
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_X86_64_Signed1:
+ case macho::RIT_X86_64_Signed2:
+ case macho::RIT_X86_64_Signed4:
+ case macho::RIT_X86_64_Signed:
+ case macho::RIT_X86_64_Unsigned:
+ case macho::RIT_X86_64_Branch: {
+ Value += Addend;
+ // Mask in the target value a byte at a time (we don't have an alignment
+ // guarantee for the target address, so this is safest).
+ uint8_t *p = (uint8_t*)LocalAddress;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)Value;
+ Value >>= 8;
+ }
+ return false;
+ }
+ case macho::RIT_X86_64_GOTLoad:
+ case macho::RIT_X86_64_GOT:
+ case macho::RIT_X86_64_Subtractor:
+ case macho::RIT_X86_64_TLV:
+ return Error("Relocation type not implemented yet!");
+ }
+}
+
+bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (isPCRel) {
+ Value -= FinalAddress;
+ // ARM PCRel relocations have an effective-PC offset of two instructions
+ // (four bytes in Thumb mode, 8 bytes in ARM mode).
+ // FIXME: For now, assume ARM mode.
+ Value -= 8;
+ }
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_Vanilla: {
+ // Mask in the target value a byte at a time (we don't have an alignment
+ // guarantee for the target address, so this is safest).
+ uint8_t *p = (uint8_t*)LocalAddress;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)Value;
+ Value >>= 8;
+ }
+ break;
+ }
+ case macho::RIT_ARM_Branch24Bit: {
+ // Mask the value into the target address. We know instructions are
+ // 32-bit aligned, so we can do it all at once.
+ uint32_t *p = (uint32_t*)LocalAddress;
+ // The low two bits of the value are not encoded.
+ Value >>= 2;
+ // Mask the value to 24 bits.
+ Value &= 0xffffff;
+ // FIXME: If the destination is a Thumb function (and the instruction
+ // is a non-predicated BL instruction), we need to change it to a BLX
+ // instruction instead.
+
+ // Insert the value into the instruction.
+ *p = (*p & ~0xffffff) | Value;
+ break;
+ }
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ case macho::RIT_ARM_ThumbBranch32Bit:
+ case macho::RIT_ARM_Half:
+ case macho::RIT_ARM_HalfDifference:
+ case macho::RIT_Pair:
+ case macho::RIT_Difference:
+ case macho::RIT_ARM_LocalDifference:
+ case macho::RIT_ARM_PreboundLazyPointer:
+ return Error("Relocation type not implemented yet!");
+ }
+ return false;
+}
+
+void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+ RelocationValueRef Value;
+ SectionEntry &Section = Sections[Rel.SectionID];
+
+ bool isExtern = (RelType >> 27) & 1;
+ if (isExtern) {
+ // Obtain the symbol name which is referenced in the relocation
+ StringRef TargetName;
+ const SymbolRef &Symbol = Rel.Symbol;
+ Symbol.getName(TargetName);
+ // First search for the symbol in the local symbol table
+ SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
+ if (lsi != Symbols.end()) {
+ Value.SectionID = lsi->second.first;
+ Value.Addend = lsi->second.second;
+ } else {
+ // Search for the symbol in the global symbol table
+ SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data());
+ if (gsi != GlobalSymbolTable.end()) {
+ Value.SectionID = gsi->second.first;
+ Value.Addend = gsi->second.second;
+ } else
+ Value.SymbolName = TargetName.data();
+ }
+ } else {
+ error_code err;
+ uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
+ section_iterator si = Obj.begin_sections(),
+ se = Obj.end_sections();
+ for (uint8_t i = 1; i < sectionIndex; i++) {
+ error_code err;
+ si.increment(err);
+ if (si == se)
+ break;
+ }
+ assert(si != se && "No section containing relocation!");
+ Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID);
+ Value.Addend = 0;
+ // FIXME: The size and type of the relocation determines if we can
+ // encode an Addend in the target location itself, and if so, how many
+ // bytes we should read in order to get it. We don't yet support doing
+ // that, and just assuming it's sizeof(intptr_t) is blatantly wrong.
+ //Value.Addend = *(const intptr_t *)Target;
+ if (Value.Addend) {
+ // The MachO addend is an offset from the current section. We need it
+ // to be an offset from the destination section
+ Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
+ }
+ }
+
+ if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+ // This is an ARM branch relocation, need to use a stub function.
+
+ // Look up for existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end())
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + i->second,
+ RelType, 0);
+ else {
+ // Create a new stub function.
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ macho::RIT_Vanilla, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ resolveRelocation(Section, Rel.Offset,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+}
+
+
+bool RuntimeDyldMachO::isCompatibleFormat(
+ const ObjectBuffer *InputBuffer) const {
+ if (InputBuffer->getBufferSize() < 4)
+ return false;
+ StringRef Magic(InputBuffer->getBufferStart(), 4);
+ if (Magic == "\xFE\xED\xFA\xCE") return true;
+ if (Magic == "\xCE\xFA\xED\xFE") return true;
+ if (Magic == "\xFE\xED\xFA\xCF") return true;
+ if (Magic == "\xCF\xFA\xED\xFE") return true;
+ return false;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
new file mode 100644
index 000000000000..62d84870780c
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -0,0 +1,71 @@
+//===-- RuntimeDyldMachO.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_MACHO_H
+#define LLVM_RUNTIME_DYLD_MACHO_H
+
+#include "RuntimeDyldImpl.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+
+namespace llvm {
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+protected:
+ bool resolveI386Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+ bool resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+ bool resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs);
+
+public:
+ virtual void resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+ bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
new file mode 100644
index 000000000000..ca4330fa22b0
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -0,0 +1,99 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate target to use, and
+// allows the user to specify a specific one on the commandline with -march=x,
+// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to
+// calling selectTarget().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+TargetMachine *EngineBuilder::selectTarget() {
+ Triple TT;
+
+ // MCJIT can generate code for remote targets, but the old JIT and Interpreter
+ // must use the host architecture.
+ if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M)
+ TT.setTriple(M->getTargetTriple());
+
+ return selectTarget(TT, MArch, MCPU, MAttrs);
+}
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch. Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs) {
+ Triple TheTriple(TargetTriple);
+ if (TheTriple.getTriple().empty())
+ TheTriple.setTriple(sys::getProcessTriple());
+
+ // Adjust the triple to match what the user requested.
+ const Target *TheTarget = 0;
+ if (!MArch.empty()) {
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ if (MArch == it->getName()) {
+ TheTarget = &*it;
+ break;
+ }
+ }
+
+ if (!TheTarget) {
+ if (ErrorStr)
+ *ErrorStr = "No available targets are compatible with this -march, "
+ "see -version for the available targets.\n";
+ return 0;
+ }
+
+ // Adjust the triple to match (if known), otherwise stick with the
+ // requested/host triple.
+ Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+ if (Type != Triple::UnknownArch)
+ TheTriple.setArch(Type);
+ } else {
+ std::string Error;
+ TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+ if (TheTarget == 0) {
+ if (ErrorStr)
+ *ErrorStr = Error;
+ return 0;
+ }
+ }
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ if (!MAttrs.empty()) {
+ SubtargetFeatures Features;
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ // Allocate a target...
+ TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
+ MCPU, FeaturesStr,
+ Options,
+ RelocModel, CMModel,
+ OptLevel);
+ assert(Target && "Could not allocate target machine!");
+ return Target;
+}
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
new file mode 100644
index 000000000000..fb591a891dae
--- /dev/null
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -0,0 +1,2236 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+ if (const Argument *MA = dyn_cast<Argument>(V))
+ return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+ return M ? M->getParent() : 0;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return GV->getParent();
+ return 0;
+}
+
+static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
+ switch (cc) {
+ default: Out << "cc" << cc; break;
+ case CallingConv::Fast: Out << "fastcc"; break;
+ case CallingConv::Cold: Out << "coldcc"; break;
+ case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
+ case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
+ case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
+ case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
+ case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
+ case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+ case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
+ case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << "ptx_device"; break;
+ }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ unsigned char C = Name[i];
+ if (isprint(C) && C != '\\' && C != '"')
+ Out << C;
+ else
+ Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+}
+
+enum PrefixType {
+ GlobalPrefix,
+ LabelPrefix,
+ LocalPrefix,
+ NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+ assert(!Name.empty() && "Cannot get empty name!");
+ switch (Prefix) {
+ case NoPrefix: break;
+ case GlobalPrefix: OS << '@'; break;
+ case LabelPrefix: break;
+ case LocalPrefix: OS << '%'; break;
+ }
+
+ // Scan the name to see if it needs quotes first.
+ bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
+ if (!NeedsQuotes) {
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ // By making this unsigned, the value passed in to isalnum will always be
+ // in the range 0-255. This is important when building with MSVC because
+ // its implementation will assert. This situation can arise when dealing
+ // with UTF-8 multibyte characters.
+ unsigned char C = Name[i];
+ if (!isalnum(static_cast<unsigned char>(C)) && C != '-' && C != '.' &&
+ C != '_') {
+ NeedsQuotes = true;
+ break;
+ }
+ }
+ }
+
+ // If we didn't need any quotes, just write out the name in one blast.
+ if (!NeedsQuotes) {
+ OS << Name;
+ return;
+ }
+
+ // Okay, we need quotes. Output the quotes and escape any scary characters as
+ // needed.
+ OS << '"';
+ PrintEscapedString(Name, OS);
+ OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+ PrintLLVMName(OS, V->getName(),
+ isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+ TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION;
+ void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION;
+public:
+
+ /// NamedTypes - The named types that are used by the current module.
+ TypeFinder NamedTypes;
+
+ /// NumberedTypes - The numbered types, along with their value.
+ DenseMap<StructType*, unsigned> NumberedTypes;
+
+
+ TypePrinting() {}
+ ~TypePrinting() {}
+
+ void incorporateTypes(const Module &M);
+
+ void print(Type *Ty, raw_ostream &OS);
+
+ void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
+
+
+void TypePrinting::incorporateTypes(const Module &M) {
+ NamedTypes.run(M, false);
+
+ // The list of struct types we got back includes all the struct types, split
+ // the unnamed ones out to a numbering and remove the anonymous structs.
+ unsigned NextNumber = 0;
+
+ std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+ for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+ StructType *STy = *I;
+
+ // Ignore anonymous types.
+ if (STy->isLiteral())
+ continue;
+
+ if (STy->getName().empty())
+ NumberedTypes[STy] = NextNumber++;
+ else
+ *NextToUse++ = STy;
+ }
+
+ NamedTypes.erase(NextToUse, NamedTypes.end());
+}
+
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: OS << "void"; break;
+ case Type::HalfTyID: OS << "half"; break;
+ case Type::FloatTyID: OS << "float"; break;
+ case Type::DoubleTyID: OS << "double"; break;
+ case Type::X86_FP80TyID: OS << "x86_fp80"; break;
+ case Type::FP128TyID: OS << "fp128"; break;
+ case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+ case Type::LabelTyID: OS << "label"; break;
+ case Type::MetadataTyID: OS << "metadata"; break;
+ case Type::X86_MMXTyID: OS << "x86_mmx"; break;
+ case Type::IntegerTyID:
+ OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+ return;
+
+ case Type::FunctionTyID: {
+ FunctionType *FTy = cast<FunctionType>(Ty);
+ print(FTy->getReturnType(), OS);
+ OS << " (";
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ if (I != FTy->param_begin())
+ OS << ", ";
+ print(*I, OS);
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams()) OS << ", ";
+ OS << "...";
+ }
+ OS << ')';
+ return;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+
+ if (STy->isLiteral())
+ return printStructBody(STy, OS);
+
+ if (!STy->getName().empty())
+ return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+
+ DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+ if (I != NumberedTypes.end())
+ OS << '%' << I->second;
+ else // Not enumerated, print the hex address.
+ OS << "%\"type " << STy << '\"';
+ return;
+ }
+ case Type::PointerTyID: {
+ PointerType *PTy = cast<PointerType>(Ty);
+ print(PTy->getElementType(), OS);
+ if (unsigned AddressSpace = PTy->getAddressSpace())
+ OS << " addrspace(" << AddressSpace << ')';
+ OS << '*';
+ return;
+ }
+ case Type::ArrayTyID: {
+ ArrayType *ATy = cast<ArrayType>(Ty);
+ OS << '[' << ATy->getNumElements() << " x ";
+ print(ATy->getElementType(), OS);
+ OS << ']';
+ return;
+ }
+ case Type::VectorTyID: {
+ VectorType *PTy = cast<VectorType>(Ty);
+ OS << "<" << PTy->getNumElements() << " x ";
+ print(PTy->getElementType(), OS);
+ OS << '>';
+ return;
+ }
+ default:
+ OS << "<unrecognized-type>";
+ return;
+ }
+}
+
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+ if (STy->isOpaque()) {
+ OS << "opaque";
+ return;
+ }
+
+ if (STy->isPacked())
+ OS << '<';
+
+ if (STy->getNumElements() == 0) {
+ OS << "{}";
+ } else {
+ StructType::element_iterator I = STy->element_begin();
+ OS << "{ ";
+ print(*I++, OS);
+ for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+ OS << ", ";
+ print(*I, OS);
+ }
+
+ OS << " }";
+ }
+ if (STy->isPacked())
+ OS << '>';
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+ /// ValueMap - A mapping of Values to slot numbers.
+ typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+ /// TheModule - The module for which we are holding slot numbers.
+ const Module* TheModule;
+
+ /// TheFunction - The function for which we are holding slot numbers.
+ const Function* TheFunction;
+ bool FunctionProcessed;
+
+ /// mMap - The slot map for the module level data.
+ ValueMap mMap;
+ unsigned mNext;
+
+ /// fMap - The slot map for the function level data.
+ ValueMap fMap;
+ unsigned fNext;
+
+ /// mdnMap - Map for MDNodes.
+ DenseMap<const MDNode*, unsigned> mdnMap;
+ unsigned mdnNext;
+
+ /// asMap - The slot map for attribute sets.
+ DenseMap<AttributeSet, unsigned> asMap;
+ unsigned asNext;
+public:
+ /// Construct from a module
+ explicit SlotTracker(const Module *M);
+ /// Construct from a function, starting out in incorp state.
+ explicit SlotTracker(const Function *F);
+
+ /// Return the slot number of the specified value in it's type
+ /// plane. If something is not in the SlotTracker, return -1.
+ int getLocalSlot(const Value *V);
+ int getGlobalSlot(const GlobalValue *V);
+ int getMetadataSlot(const MDNode *N);
+ int getAttributeGroupSlot(AttributeSet AS);
+
+ /// If you'd like to deal with a function instead of just a module, use
+ /// this method to get its data into the SlotTracker.
+ void incorporateFunction(const Function *F) {
+ TheFunction = F;
+ FunctionProcessed = false;
+ }
+
+ /// After calling incorporateFunction, use this method to remove the
+ /// most recently incorporated function from the SlotTracker. This
+ /// will reset the state of the machine back to just the module contents.
+ void purgeFunction();
+
+ /// MDNode map iterators.
+ typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+ mdn_iterator mdn_begin() { return mdnMap.begin(); }
+ mdn_iterator mdn_end() { return mdnMap.end(); }
+ unsigned mdn_size() const { return mdnMap.size(); }
+ bool mdn_empty() const { return mdnMap.empty(); }
+
+ /// AttributeSet map iterators.
+ typedef DenseMap<AttributeSet, unsigned>::iterator as_iterator;
+ as_iterator as_begin() { return asMap.begin(); }
+ as_iterator as_end() { return asMap.end(); }
+ unsigned as_size() const { return asMap.size(); }
+ bool as_empty() const { return asMap.empty(); }
+
+ /// This function does the actual initialization.
+ inline void initialize();
+
+ // Implementation Details
+private:
+ /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+ void CreateModuleSlot(const GlobalValue *V);
+
+ /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+ void CreateMetadataSlot(const MDNode *N);
+
+ /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+ void CreateFunctionSlot(const Value *V);
+
+ /// \brief Insert the specified AttributeSet into the slot table.
+ void CreateAttributeSetSlot(AttributeSet AS);
+
+ /// Add all of the module level global variables (and their initializers)
+ /// and function declarations, but not the contents of those functions.
+ void processModule();
+
+ /// Add all of the functions arguments, basic blocks, and instructions.
+ void processFunction();
+
+ SlotTracker(const SlotTracker &) LLVM_DELETED_FUNCTION;
+ void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION;
+};
+
+} // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+ if (const Argument *FA = dyn_cast<Argument>(V))
+ return new SlotTracker(FA->getParent());
+
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getParent())
+ return new SlotTracker(I->getParent()->getParent());
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return new SlotTracker(BB->getParent());
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return new SlotTracker(GV->getParent());
+
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return new SlotTracker(GA->getParent());
+
+ if (const Function *Func = dyn_cast<Function>(V))
+ return new SlotTracker(Func);
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ if (!MD->isFunctionLocal())
+ return new SlotTracker(MD->getFunction());
+
+ return new SlotTracker((Function *)0);
+ }
+
+ return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) dbgs() << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+ : TheModule(M), TheFunction(0), FunctionProcessed(false),
+ mNext(0), fNext(0), mdnNext(0), asNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+ : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+ mNext(0), fNext(0), mdnNext(0), asNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+ if (TheModule) {
+ processModule();
+ TheModule = 0; ///< Prevent re-processing next time we're called.
+ }
+
+ if (TheFunction && !FunctionProcessed)
+ processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+ ST_DEBUG("begin processModule!\n");
+
+ // Add all of the unnamed global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (!I->hasName())
+ CreateModuleSlot(I);
+ }
+
+ // Add metadata used by named metadata.
+ for (Module::const_named_metadata_iterator
+ I = TheModule->named_metadata_begin(),
+ E = TheModule->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ CreateMetadataSlot(NMD->getOperand(i));
+ }
+
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->hasName())
+ // Add all the unnamed functions to the table.
+ CreateModuleSlot(I);
+
+ // Add all the function attributes to the table.
+ // FIXME: Add attributes of other objects?
+ AttributeSet FnAttrs = I->getAttributes().getFnAttributes();
+ if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex))
+ CreateAttributeSetSlot(FnAttrs);
+ }
+
+ ST_DEBUG("end processModule!\n");
+}
+
+// Process the arguments, basic blocks, and instructions of a function.
+void SlotTracker::processFunction() {
+ ST_DEBUG("begin processFunction!\n");
+ fNext = 0;
+
+ // Add all the function arguments with no names.
+ for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+ AE = TheFunction->arg_end(); AI != AE; ++AI)
+ if (!AI->hasName())
+ CreateFunctionSlot(AI);
+
+ ST_DEBUG("Inserting Instructions:\n");
+
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+ // Add all of the basic blocks and instructions with no names.
+ for (Function::const_iterator BB = TheFunction->begin(),
+ E = TheFunction->end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ CreateFunctionSlot(BB);
+
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ if (!I->getType()->isVoidTy() && !I->hasName())
+ CreateFunctionSlot(I);
+
+ // Intrinsics can directly use metadata. We allow direct calls to any
+ // llvm.foo function here, because the target may not be linked into the
+ // optimizer.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (Function *F = CI->getCalledFunction())
+ if (F->getName().startswith("llvm."))
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+ CreateMetadataSlot(N);
+
+ // Add all the call attributes to the table.
+ AttributeSet Attrs = CI->getAttributes().getFnAttributes();
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ CreateAttributeSetSlot(Attrs);
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ // Add all the call attributes to the table.
+ AttributeSet Attrs = II->getAttributes().getFnAttributes();
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ CreateAttributeSetSlot(Attrs);
+ }
+
+ // Process metadata attached with this instruction.
+ I->getAllMetadata(MDForInst);
+ for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+ CreateMetadataSlot(MDForInst[i].second);
+ MDForInst.clear();
+ }
+ }
+
+ FunctionProcessed = true;
+
+ ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+ ST_DEBUG("begin purgeFunction!\n");
+ fMap.clear(); // Simply discard the function level map
+ TheFunction = 0;
+ FunctionProcessed = false;
+ ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the value in the module map
+ ValueMap::iterator MI = mMap.find(V);
+ return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+/// getMetadataSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the MDNode in the module map
+ mdn_iterator MI = mdnMap.find(N);
+ return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+ assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ ValueMap::iterator FI = fMap.find(V);
+ return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+int SlotTracker::getAttributeGroupSlot(AttributeSet AS) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the AttributeSet in the module map.
+ as_iterator AI = asMap.find(AS);
+ return AI == asMap.end() ? -1 : (int)AI->second;
+}
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+ assert(V && "Can't insert a null Value into SlotTracker!");
+ assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
+ assert(!V->hasName() && "Doesn't need a slot!");
+
+ unsigned DestSlot = mNext++;
+ mMap[V] = DestSlot;
+
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [");
+ // G = Global, F = Function, A = Alias, o = other
+ ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+ (isa<Function>(V) ? 'F' :
+ (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+ assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
+
+ unsigned DestSlot = fNext++;
+ fMap[V] = DestSlot;
+
+ // G = Global, F = Function, o = other
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [o]\n");
+}
+
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+ assert(N && "Can't insert a null Value into SlotTracker!");
+
+ // Don't insert if N is a function-local metadata, these are always printed
+ // inline.
+ if (!N->isFunctionLocal()) {
+ mdn_iterator I = mdnMap.find(N);
+ if (I != mdnMap.end())
+ return;
+
+ unsigned DestSlot = mdnNext++;
+ mdnMap[N] = DestSlot;
+ }
+
+ // Recursively add any MDNodes referenced by operands.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
+ CreateMetadataSlot(Op);
+}
+
+void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) {
+ assert(AS.hasAttributes(AttributeSet::FunctionIndex) &&
+ "Doesn't need a slot!");
+
+ as_iterator I = asMap.find(AS);
+ if (I != asMap.end())
+ return;
+
+ unsigned DestSlot = asNext++;
+ asMap[AS] = DestSlot;
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+ const char * pred = "unknown";
+ switch (predicate) {
+ case FCmpInst::FCMP_FALSE: pred = "false"; break;
+ case FCmpInst::FCMP_OEQ: pred = "oeq"; break;
+ case FCmpInst::FCMP_OGT: pred = "ogt"; break;
+ case FCmpInst::FCMP_OGE: pred = "oge"; break;
+ case FCmpInst::FCMP_OLT: pred = "olt"; break;
+ case FCmpInst::FCMP_OLE: pred = "ole"; break;
+ case FCmpInst::FCMP_ONE: pred = "one"; break;
+ case FCmpInst::FCMP_ORD: pred = "ord"; break;
+ case FCmpInst::FCMP_UNO: pred = "uno"; break;
+ case FCmpInst::FCMP_UEQ: pred = "ueq"; break;
+ case FCmpInst::FCMP_UGT: pred = "ugt"; break;
+ case FCmpInst::FCMP_UGE: pred = "uge"; break;
+ case FCmpInst::FCMP_ULT: pred = "ult"; break;
+ case FCmpInst::FCMP_ULE: pred = "ule"; break;
+ case FCmpInst::FCMP_UNE: pred = "une"; break;
+ case FCmpInst::FCMP_TRUE: pred = "true"; break;
+ case ICmpInst::ICMP_EQ: pred = "eq"; break;
+ case ICmpInst::ICMP_NE: pred = "ne"; break;
+ case ICmpInst::ICMP_SGT: pred = "sgt"; break;
+ case ICmpInst::ICMP_SGE: pred = "sge"; break;
+ case ICmpInst::ICMP_SLT: pred = "slt"; break;
+ case ICmpInst::ICMP_SLE: pred = "sle"; break;
+ case ICmpInst::ICMP_UGT: pred = "ugt"; break;
+ case ICmpInst::ICMP_UGE: pred = "uge"; break;
+ case ICmpInst::ICMP_ULT: pred = "ult"; break;
+ case ICmpInst::ICMP_ULE: pred = "ule"; break;
+ }
+ return pred;
+}
+
+static void writeAtomicRMWOperation(raw_ostream &Out,
+ AtomicRMWInst::BinOp Op) {
+ switch (Op) {
+ default: Out << " <unknown operation " << Op << ">"; break;
+ case AtomicRMWInst::Xchg: Out << " xchg"; break;
+ case AtomicRMWInst::Add: Out << " add"; break;
+ case AtomicRMWInst::Sub: Out << " sub"; break;
+ case AtomicRMWInst::And: Out << " and"; break;
+ case AtomicRMWInst::Nand: Out << " nand"; break;
+ case AtomicRMWInst::Or: Out << " or"; break;
+ case AtomicRMWInst::Xor: Out << " xor"; break;
+ case AtomicRMWInst::Max: Out << " max"; break;
+ case AtomicRMWInst::Min: Out << " min"; break;
+ case AtomicRMWInst::UMax: Out << " umax"; break;
+ case AtomicRMWInst::UMin: Out << " umin"; break;
+ }
+}
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+ if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
+ // Unsafe algebra implies all the others, no need to write them all out
+ if (FPO->hasUnsafeAlgebra())
+ Out << " fast";
+ else {
+ if (FPO->hasNoNaNs())
+ Out << " nnan";
+ if (FPO->hasNoInfs())
+ Out << " ninf";
+ if (FPO->hasNoSignedZeros())
+ Out << " nsz";
+ if (FPO->hasAllowReciprocal())
+ Out << " arcp";
+ }
+ }
+
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(U)) {
+ if (OBO->hasNoUnsignedWrap())
+ Out << " nuw";
+ if (OBO->hasNoSignedWrap())
+ Out << " nsw";
+ } else if (const PossiblyExactOperator *Div =
+ dyn_cast<PossiblyExactOperator>(U)) {
+ if (Div->isExact())
+ Out << " exact";
+ } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+ if (GEP->isInBounds())
+ Out << " inbounds";
+ }
+}
+
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+ TypePrinting &TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (CI->getType()->isIntegerTy(1)) {
+ Out << (CI->getZExtValue() ? "true" : "false");
+ return;
+ }
+ Out << CI->getValue();
+ return;
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
+ &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
+ // We would like to output the FP constant value in exponential notation,
+ // but we cannot do this if doing so will lose precision. Check here to
+ // make sure that we only output it in exponential format if we can parse
+ // the value back and get the same value.
+ //
+ bool ignored;
+ bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ bool isInf = CFP->getValueAPF().isInfinity();
+ bool isNaN = CFP->getValueAPF().isNaN();
+ if (!isHalf && !isInf && !isNaN) {
+ double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+ CFP->getValueAPF().convertToFloat();
+ SmallString<128> StrVal;
+ raw_svector_ostream(StrVal) << Val;
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN, that atof will accept, but the lexer will not. Check
+ // that the string matches the "[-+]?[0-9]" regex.
+ //
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+ // Reparse stringized version!
+ if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
+ Out << StrVal.str();
+ return;
+ }
+ }
+ }
+ // Otherwise we could not reparse it to exactly the same value, so we must
+ // output the string in hexadecimal format! Note that loading and storing
+ // floating point types changes the bits of NaNs on some hosts, notably
+ // x86, so we must not use these types.
+ assert(sizeof(double) == sizeof(uint64_t) &&
+ "assuming that double is 64 bits!");
+ char Buffer[40];
+ APFloat apf = CFP->getValueAPF();
+ // Halves and floats are represented in ASCII IR as double, convert.
+ if (!isDouble)
+ apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ Out << "0x" <<
+ utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+ Buffer+40);
+ return;
+ }
+
+ // Either half, or some form of long double.
+ // These appear as a magic letter identifying the type, then a
+ // fixed number of hex digits.
+ Out << "0x";
+ // Bit position, in the current word, of the next nibble to print.
+ int shiftcount;
+
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+ Out << 'K';
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = p[1];
+ shiftcount = 12;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *p;
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
+ shiftcount = 60;
+ Out << 'L';
+ } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
+ shiftcount = 60;
+ Out << 'M';
+ } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
+ shiftcount = 12;
+ Out << 'H';
+ } else
+ llvm_unreachable("Unsupported floating point type");
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = *p;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *(++p);
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ }
+
+ if (isa<ConstantAggregateZero>(CV)) {
+ Out << "zeroinitializer";
+ return;
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "blockaddress(";
+ WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+ Context);
+ Out << ", ";
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+ Context);
+ Out << ")";
+ return;
+ }
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ Type *ETy = CA->getType()->getElementType();
+ Out << '[';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(0),
+ &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ Out << ']';
+ return;
+ }
+
+ if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
+ // As a special case, print the array as a string if it is an array of
+ // i8 with ConstantInt values.
+ if (CA->isString()) {
+ Out << "c\"";
+ PrintEscapedString(CA->getAsString(), Out);
+ Out << '"';
+ return;
+ }
+
+ Type *ETy = CA->getType()->getElementType();
+ Out << '[';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
+ &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
+ Machine, Context);
+ }
+ Out << ']';
+ return;
+ }
+
+
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ if (CS->getType()->isPacked())
+ Out << '<';
+ Out << '{';
+ unsigned N = CS->getNumOperands();
+ if (N) {
+ Out << ' ';
+ TypePrinter.print(CS->getOperand(0)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+ Context);
+
+ for (unsigned i = 1; i < N; i++) {
+ Out << ", ";
+ TypePrinter.print(CS->getOperand(i)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ Out << ' ';
+ }
+
+ Out << '}';
+ if (CS->getType()->isPacked())
+ Out << '>';
+ return;
+ }
+
+ if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
+ Type *ETy = CV->getType()->getVectorElementType();
+ Out << '<';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
+ Machine, Context);
+ for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
+ Machine, Context);
+ }
+ Out << '>';
+ return;
+ }
+
+ if (isa<ConstantPointerNull>(CV)) {
+ Out << "null";
+ return;
+ }
+
+ if (isa<UndefValue>(CV)) {
+ Out << "undef";
+ return;
+ }
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ Out << CE->getOpcodeName();
+ WriteOptimizationInfo(Out, CE);
+ if (CE->isCompare())
+ Out << ' ' << getPredicateText(CE->getPredicate());
+ Out << " (";
+
+ for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+ TypePrinter.print((*OI)->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+ if (OI+1 != CE->op_end())
+ Out << ", ";
+ }
+
+ if (CE->hasIndices()) {
+ ArrayRef<unsigned> Indices = CE->getIndices();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ Out << ", " << Indices[i];
+ }
+
+ if (CE->isCast()) {
+ Out << " to ";
+ TypePrinter.print(CE->getType(), Out);
+ }
+
+ Out << ')';
+ return;
+ }
+
+ Out << "<placeholder or erroneous Constant>";
+}
+
+static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!{";
+ for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
+ const Value *V = Node->getOperand(mi);
+ if (V == 0)
+ Out << "null";
+ else {
+ TypePrinter->print(V->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, Node->getOperand(mi),
+ TypePrinter, Machine, Context);
+ }
+ if (mi + 1 != me)
+ Out << ", ";
+ }
+
+ Out << "}";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream. This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ if (V->hasName()) {
+ PrintLLVMName(Out, V);
+ return;
+ }
+
+ const Constant *CV = dyn_cast<Constant>(V);
+ if (CV && !isa<GlobalValue>(CV)) {
+ assert(TypePrinter && "Constants require TypePrinting!");
+ WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+ return;
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Out << "asm ";
+ if (IA->hasSideEffects())
+ Out << "sideeffect ";
+ if (IA->isAlignStack())
+ Out << "alignstack ";
+ // We don't emit the AD_ATT dialect as it's the assumed default.
+ if (IA->getDialect() == InlineAsm::AD_Intel)
+ Out << "inteldialect ";
+ Out << '"';
+ PrintEscapedString(IA->getAsmString(), Out);
+ Out << "\", \"";
+ PrintEscapedString(IA->getConstraintString(), Out);
+ Out << '"';
+ return;
+ }
+
+ if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ if (N->isFunctionLocal()) {
+ // Print metadata inline, not via slot reference number.
+ WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
+ return;
+ }
+
+ if (!Machine) {
+ if (N->isFunctionLocal())
+ Machine = new SlotTracker(N->getFunction());
+ else
+ Machine = new SlotTracker(Context);
+ }
+ int Slot = Machine->getMetadataSlot(N);
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
+ return;
+ }
+
+ if (const MDString *MDS = dyn_cast<MDString>(V)) {
+ Out << "!\"";
+ PrintEscapedString(MDS->getString(), Out);
+ Out << '"';
+ return;
+ }
+
+ if (V->getValueID() == Value::PseudoSourceValueVal ||
+ V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
+ V->print(Out);
+ return;
+ }
+
+ char Prefix = '%';
+ int Slot;
+ // If we have a SlotTracker, use it.
+ if (Machine) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+
+ // If the local value didn't succeed, then we may be referring to a value
+ // from a different function. Translate it, as this can happen when using
+ // address of blocks.
+ if (Slot == -1)
+ if ((Machine = createSlotTracker(V))) {
+ Slot = Machine->getLocalSlot(V);
+ delete Machine;
+ }
+ }
+ } else if ((Machine = createSlotTracker(V))) {
+ // Otherwise, create one to get the # and then destroy it.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+ }
+ delete Machine;
+ Machine = 0;
+ } else {
+ Slot = -1;
+ }
+
+ if (Slot != -1)
+ Out << Prefix << Slot;
+ else
+ Out << "<badref>";
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+ bool PrintType, const Module *Context) {
+
+ // Fast path: Don't construct and populate a TypePrinting object if we
+ // won't be needing any types printed.
+ if (!PrintType &&
+ ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+ V->hasName() || isa<GlobalValue>(V))) {
+ WriteAsOperandInternal(Out, V, 0, 0, Context);
+ return;
+ }
+
+ if (Context == 0) Context = getModuleFromVal(V);
+
+ TypePrinting TypePrinter;
+ if (Context)
+ TypePrinter.incorporateTypes(*Context);
+ if (PrintType) {
+ TypePrinter.print(V->getType(), Out);
+ Out << ' ';
+ }
+
+ WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
+}
+
+namespace {
+
+class AssemblyWriter {
+ formatted_raw_ostream &Out;
+ SlotTracker &Machine;
+ const Module *TheModule;
+ TypePrinting TypePrinter;
+ AssemblyAnnotationWriter *AnnotationWriter;
+
+public:
+ inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+ const Module *M,
+ AssemblyAnnotationWriter *AAW)
+ : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+ if (M)
+ TypePrinter.incorporateTypes(*M);
+ }
+
+ void printMDNodeBody(const MDNode *MD);
+ void printNamedMDNode(const NamedMDNode *NMD);
+
+ void printModule(const Module *M);
+
+ void writeOperand(const Value *Op, bool PrintType);
+ void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+ void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+
+ void writeAllMDNodes();
+ void writeAllAttributeGroups();
+
+ void printTypeIdentities();
+ void printGlobal(const GlobalVariable *GV);
+ void printAlias(const GlobalAlias *GV);
+ void printFunction(const Function *F);
+ void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
+ void printBasicBlock(const BasicBlock *BB);
+ void printInstruction(const Instruction &I);
+
+private:
+ // printInfoComment - Print a little comment after the instruction indicating
+ // which slot it occupies.
+ void printInfoComment(const Value &V);
+};
+} // end of anonymous namespace
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ return;
+ }
+ if (PrintType) {
+ TypePrinter.print(Operand->getType(), Out);
+ Out << ' ';
+ }
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Ordering == NotAtomic)
+ return;
+
+ switch (SynchScope) {
+ case SingleThread: Out << " singlethread"; break;
+ case CrossThread: break;
+ }
+
+ switch (Ordering) {
+ default: Out << " <bad ordering " << int(Ordering) << ">"; break;
+ case Unordered: Out << " unordered"; break;
+ case Monotonic: Out << " monotonic"; break;
+ case Acquire: Out << " acquire"; break;
+ case Release: Out << " release"; break;
+ case AcquireRelease: Out << " acq_rel"; break;
+ case SequentiallyConsistent: Out << " seq_cst"; break;
+ }
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+ AttributeSet Attrs, unsigned Idx) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ return;
+ }
+
+ // Print the type
+ TypePrinter.print(Operand->getType(), Out);
+ // Print parameter attributes list
+ if (Attrs.hasAttributes(Idx))
+ Out << ' ' << Attrs.getAsString(Idx);
+ Out << ' ';
+ // Print the operand
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+ Machine.initialize();
+
+ if (!M->getModuleIdentifier().empty() &&
+ // Don't print the ID if it will start a new line (which would
+ // require a comment char before it).
+ M->getModuleIdentifier().find('\n') == std::string::npos)
+ Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+ if (!M->getDataLayout().empty())
+ Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+ if (!M->getTargetTriple().empty())
+ Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+ if (!M->getModuleInlineAsm().empty()) {
+ // Split the string into lines, to make it easier to read the .ll file.
+ std::string Asm = M->getModuleInlineAsm();
+ size_t CurPos = 0;
+ size_t NewLine = Asm.find_first_of('\n', CurPos);
+ Out << '\n';
+ while (NewLine != std::string::npos) {
+ // We found a newline, print the portion of the asm string from the
+ // last newline up to this newline.
+ Out << "module asm \"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+ Out);
+ Out << "\"\n";
+ CurPos = NewLine+1;
+ NewLine = Asm.find_first_of('\n', CurPos);
+ }
+ std::string rest(Asm.begin()+CurPos, Asm.end());
+ if (!rest.empty()) {
+ Out << "module asm \"";
+ PrintEscapedString(rest, Out);
+ Out << "\"\n";
+ }
+ }
+
+ printTypeIdentities();
+
+ // Output all globals.
+ if (!M->global_empty()) Out << '\n';
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ printGlobal(I); Out << '\n';
+ }
+
+ // Output all aliases.
+ if (!M->alias_empty()) Out << "\n";
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ printAlias(I);
+
+ // Output all of the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ printFunction(I);
+
+ // Output all attribute groups.
+ if (!Machine.as_empty()) {
+ Out << '\n';
+ writeAllAttributeGroups();
+ }
+
+ // Output named metadata.
+ if (!M->named_metadata_empty()) Out << '\n';
+
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ printNamedMDNode(I);
+
+ // Output metadata.
+ if (!Machine.mdn_empty()) {
+ Out << '\n';
+ writeAllMDNodes();
+ }
+}
+
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+ Out << '!';
+ StringRef Name = NMD->getName();
+ if (Name.empty()) {
+ Out << "<empty name> ";
+ } else {
+ if (isalpha(static_cast<unsigned char>(Name[0])) ||
+ Name[0] == '-' || Name[0] == '$' ||
+ Name[0] == '.' || Name[0] == '_')
+ Out << Name[0];
+ else
+ Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+ for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+ unsigned char C = Name[i];
+ if (isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
+ C == '.' || C == '_')
+ Out << C;
+ else
+ Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+ }
+ Out << " = !{";
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ if (i) Out << ", ";
+ int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
+ }
+ Out << "}\n";
+}
+
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+ formatted_raw_ostream &Out) {
+ switch (LT) {
+ case GlobalValue::ExternalLinkage: break;
+ case GlobalValue::PrivateLinkage: Out << "private "; break;
+ case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "linker_private_weak ";
+ break;
+ case GlobalValue::InternalLinkage: Out << "internal "; break;
+ case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
+ case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ Out << "linkonce_odr_auto_hide ";
+ break;
+ case GlobalValue::WeakAnyLinkage: Out << "weak "; break;
+ case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break;
+ case GlobalValue::CommonLinkage: Out << "common "; break;
+ case GlobalValue::AppendingLinkage: Out << "appending "; break;
+ case GlobalValue::DLLImportLinkage: Out << "dllimport "; break;
+ case GlobalValue::DLLExportLinkage: Out << "dllexport "; break;
+ case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "available_externally ";
+ break;
+ }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+ formatted_raw_ostream &Out) {
+ switch (Vis) {
+ case GlobalValue::DefaultVisibility: break;
+ case GlobalValue::HiddenVisibility: Out << "hidden "; break;
+ case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+ }
+}
+
+static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM,
+ formatted_raw_ostream &Out) {
+ switch (TLM) {
+ case GlobalVariable::NotThreadLocal:
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ Out << "thread_local ";
+ break;
+ case GlobalVariable::LocalDynamicTLSModel:
+ Out << "thread_local(localdynamic) ";
+ break;
+ case GlobalVariable::InitialExecTLSModel:
+ Out << "thread_local(initialexec) ";
+ break;
+ case GlobalVariable::LocalExecTLSModel:
+ Out << "thread_local(localexec) ";
+ break;
+ }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+ if (GV->isMaterializable())
+ Out << "; Materializable\n";
+
+ WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+ Out << " = ";
+
+ if (!GV->hasInitializer() && GV->hasExternalLinkage())
+ Out << "external ";
+
+ PrintLinkage(GV->getLinkage(), Out);
+ PrintVisibility(GV->getVisibility(), Out);
+ PrintThreadLocalModel(GV->getThreadLocalMode(), Out);
+
+ if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+ Out << "addrspace(" << AddressSpace << ") ";
+ if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
+ if (GV->isExternallyInitialized()) Out << "externally_initialized ";
+ Out << (GV->isConstant() ? "constant " : "global ");
+ TypePrinter.print(GV->getType()->getElementType(), Out);
+
+ if (GV->hasInitializer()) {
+ Out << ' ';
+ writeOperand(GV->getInitializer(), false);
+ }
+
+ if (GV->hasSection()) {
+ Out << ", section \"";
+ PrintEscapedString(GV->getSection(), Out);
+ Out << '"';
+ }
+ if (GV->getAlignment())
+ Out << ", align " << GV->getAlignment();
+
+ printInfoComment(*GV);
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+ if (GA->isMaterializable())
+ Out << "; Materializable\n";
+
+ // Don't crash when dumping partially built GA
+ if (!GA->hasName())
+ Out << "<<nameless>> = ";
+ else {
+ PrintLLVMName(Out, GA);
+ Out << " = ";
+ }
+ PrintVisibility(GA->getVisibility(), Out);
+
+ Out << "alias ";
+
+ PrintLinkage(GA->getLinkage(), Out);
+
+ const Constant *Aliasee = GA->getAliasee();
+
+ if (Aliasee == 0) {
+ TypePrinter.print(GA->getType(), Out);
+ Out << " <<NULL ALIASEE>>";
+ } else {
+ writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
+ }
+
+ printInfoComment(*GA);
+ Out << '\n';
+}
+
+void AssemblyWriter::printTypeIdentities() {
+ if (TypePrinter.NumberedTypes.empty() &&
+ TypePrinter.NamedTypes.empty())
+ return;
+
+ Out << '\n';
+
+ // We know all the numbers that each type is used and we know that it is a
+ // dense assignment. Convert the map to an index table.
+ std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+ for (DenseMap<StructType*, unsigned>::iterator I =
+ TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+ I != E; ++I) {
+ assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+ NumberedTypes[I->second] = I->first;
+ }
+
+ // Emit all numbered types.
+ for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+ Out << '%' << i << " = type ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %2 = type %2
+ TypePrinter.printStructBody(NumberedTypes[i], Out);
+ Out << '\n';
+ }
+
+ for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+ PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
+ Out << " = type ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %FILE = type %FILE
+ TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
+ Out << '\n';
+ }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+ // Print out the return type and name.
+ Out << '\n';
+
+ if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+ if (F->isMaterializable())
+ Out << "; Materializable\n";
+
+ if (F->isDeclaration())
+ Out << "declare ";
+ else
+ Out << "define ";
+
+ PrintLinkage(F->getLinkage(), Out);
+ PrintVisibility(F->getVisibility(), Out);
+
+ // Print the calling convention.
+ if (F->getCallingConv() != CallingConv::C) {
+ PrintCallingConv(F->getCallingConv(), Out);
+ Out << " ";
+ }
+
+ FunctionType *FT = F->getFunctionType();
+ const AttributeSet &Attrs = F->getAttributes();
+ if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+ Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
+ TypePrinter.print(F->getReturnType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+ Out << '(';
+ Machine.incorporateFunction(F);
+
+ // Loop over the arguments, printing them...
+
+ unsigned Idx = 1;
+ if (!F->isDeclaration()) {
+ // If this isn't a declaration, print the argument names as well.
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (I != F->arg_begin()) Out << ", ";
+ printArgument(I, Attrs, Idx);
+ Idx++;
+ }
+ } else {
+ // Otherwise, print the types from the function type.
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (i) Out << ", ";
+
+ // Output type...
+ TypePrinter.print(FT->getParamType(i), Out);
+
+ if (Attrs.hasAttributes(i+1))
+ Out << ' ' << Attrs.getAsString(i+1);
+ }
+ }
+
+ // Finish printing arguments...
+ if (FT->isVarArg()) {
+ if (FT->getNumParams()) Out << ", ";
+ Out << "..."; // Output varargs portion of signature!
+ }
+ Out << ')';
+ if (F->hasUnnamedAddr())
+ Out << " unnamed_addr";
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+ if (F->hasSection()) {
+ Out << " section \"";
+ PrintEscapedString(F->getSection(), Out);
+ Out << '"';
+ }
+ if (F->getAlignment())
+ Out << " align " << F->getAlignment();
+ if (F->hasGC())
+ Out << " gc \"" << F->getGC() << '"';
+ if (F->isDeclaration()) {
+ Out << '\n';
+ } else {
+ Out << " {";
+ // Output all of the function's basic blocks.
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+ printBasicBlock(I);
+
+ Out << "}\n";
+ }
+
+ Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function. Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+ AttributeSet Attrs, unsigned Idx) {
+ // Output type...
+ TypePrinter.print(Arg->getType(), Out);
+
+ // Output parameter attributes list
+ if (Attrs.hasAttributes(Idx))
+ Out << ' ' << Attrs.getAsString(Idx);
+
+ // Output name, if available...
+ if (Arg->hasName()) {
+ Out << ' ';
+ PrintLLVMName(Out, Arg);
+ }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+ if (BB->hasName()) { // Print out the label if it exists...
+ Out << "\n";
+ PrintLLVMName(Out, BB->getName(), LabelPrefix);
+ Out << ':';
+ } else if (!BB->use_empty()) { // Don't print block # of no uses...
+ Out << "\n; <label>:";
+ int Slot = Machine.getLocalSlot(BB);
+ if (Slot != -1)
+ Out << Slot;
+ else
+ Out << "<badref>";
+ }
+
+ if (BB->getParent() == 0) {
+ Out.PadToColumn(50);
+ Out << "; Error: Block without parent!";
+ } else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
+ // Output predecessors for the block.
+ Out.PadToColumn(50);
+ Out << ";";
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ if (PI == PE) {
+ Out << " No predecessors!";
+ } else {
+ Out << " preds = ";
+ writeOperand(*PI, false);
+ for (++PI; PI != PE; ++PI) {
+ Out << ", ";
+ writeOperand(*PI, false);
+ }
+ }
+ }
+
+ Out << "\n";
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ printInstruction(*I);
+ Out << '\n';
+ }
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+ if (AnnotationWriter) {
+ AnnotationWriter->printInfoComment(V, Out);
+ return;
+ }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+ if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+ // Print out indentation for an instruction.
+ Out << " ";
+
+ // Print out name if it exists...
+ if (I.hasName()) {
+ PrintLLVMName(Out, &I);
+ Out << " = ";
+ } else if (!I.getType()->isVoidTy()) {
+ // Print out the def slot taken.
+ int SlotNum = Machine.getLocalSlot(&I);
+ if (SlotNum == -1)
+ Out << "<badref> = ";
+ else
+ Out << '%' << SlotNum << " = ";
+ }
+
+ if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall())
+ Out << "tail ";
+
+ // Print out the opcode...
+ Out << I.getOpcodeName();
+
+ // If this is an atomic load or store, print out the atomic marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isAtomic()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic()))
+ Out << " atomic";
+
+ // If this is a volatile operation, print out the volatile marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) ||
+ (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isVolatile()) ||
+ (isa<AtomicRMWInst>(I) && cast<AtomicRMWInst>(I).isVolatile()))
+ Out << " volatile";
+
+ // Print out optimization information.
+ WriteOptimizationInfo(Out, &I);
+
+ // Print out the compare instruction predicates
+ if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+ Out << ' ' << getPredicateText(CI->getPredicate());
+
+ // Print out the atomicrmw operation
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
+ writeAtomicRMWOperation(Out, RMWI->getOperation());
+
+ // Print out the type of the operands...
+ const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+ // Special case conditional branches to swizzle the condition out to the front
+ if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+ const BranchInst &BI(cast<BranchInst>(I));
+ Out << ' ';
+ writeOperand(BI.getCondition(), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(0), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(1), true);
+
+ } else if (isa<SwitchInst>(I)) {
+ const SwitchInst& SI(cast<SwitchInst>(I));
+ // Special case switch instruction to get formatting nice and correct.
+ Out << ' ';
+ writeOperand(SI.getCondition(), true);
+ Out << ", ";
+ writeOperand(SI.getDefaultDest(), true);
+ Out << " [";
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ Out << "\n ";
+ writeOperand(i.getCaseValue(), true);
+ Out << ", ";
+ writeOperand(i.getCaseSuccessor(), true);
+ }
+ Out << "\n ]";
+ } else if (isa<IndirectBrInst>(I)) {
+ // Special case indirectbr instruction to get formatting nice and correct.
+ Out << ' ';
+ writeOperand(Operand, true);
+ Out << ", [";
+
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ if (i != 1)
+ Out << ", ";
+ writeOperand(I.getOperand(i), true);
+ }
+ Out << ']';
+ } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+ Out << ' ';
+ TypePrinter.print(I.getType(), Out);
+ Out << ' ';
+
+ for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
+ if (op) Out << ", ";
+ Out << "[ ";
+ writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+ writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
+ }
+ } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true); Out << ", ";
+ writeOperand(I.getOperand(1), true);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(I.getType(), Out);
+ Out << " personality ";
+ writeOperand(I.getOperand(0), true); Out << '\n';
+
+ if (LPI->isCleanup())
+ Out << " cleanup";
+
+ for (unsigned i = 0, e = LPI->getNumClauses(); i != e; ++i) {
+ if (i != 0 || LPI->isCleanup()) Out << "\n";
+ if (LPI->isCatch(i))
+ Out << " catch ";
+ else
+ Out << " filter ";
+
+ writeOperand(LPI->getClause(i), true);
+ }
+ } else if (isa<ReturnInst>(I) && !Operand) {
+ Out << " void";
+ } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Print the calling convention being used.
+ if (CI->getCallingConv() != CallingConv::C) {
+ Out << " ";
+ PrintCallingConv(CI->getCallingConv(), Out);
+ }
+
+ Operand = CI->getCalledValue();
+ PointerType *PTy = cast<PointerType>(Operand->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ const AttributeSet &PAL = CI->getAttributes();
+
+ if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+ Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+
+ // If possible, print out the short form of the call instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ if (op > 0)
+ Out << ", ";
+ writeParamOperand(CI->getArgOperand(op), PAL, op + 1);
+ }
+ Out << ')';
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ Operand = II->getCalledValue();
+ PointerType *PTy = cast<PointerType>(Operand->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ const AttributeSet &PAL = II->getAttributes();
+
+ // Print the calling convention being used.
+ if (II->getCallingConv() != CallingConv::C) {
+ Out << " ";
+ PrintCallingConv(II->getCallingConv(), Out);
+ }
+
+ if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+ Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+
+ // If possible, print out the short form of the invoke instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+ if (op)
+ Out << ", ";
+ writeParamOperand(II->getArgOperand(op), PAL, op + 1);
+ }
+
+ Out << ')';
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+ Out << "\n to ";
+ writeOperand(II->getNormalDest(), true);
+ Out << " unwind ";
+ writeOperand(II->getUnwindDest(), true);
+
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(AI->getAllocatedType(), Out);
+ if (!AI->getArraySize() || AI->isArrayAllocation()) {
+ Out << ", ";
+ writeOperand(AI->getArraySize(), true);
+ }
+ if (AI->getAlignment()) {
+ Out << ", align " << AI->getAlignment();
+ }
+ } else if (isa<CastInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << " to ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (isa<VAArgInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << ", ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (Operand) { // Print the normal way.
+
+ // PrintAllTypes - Instructions who have operands of all the same type
+ // omit the type from all but the first operand. If the instruction has
+ // different type operands (for example br), then they are all printed.
+ bool PrintAllTypes = false;
+ Type *TheType = Operand->getType();
+
+ // Select, Store and ShuffleVector always print all types.
+ if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+ || isa<ReturnInst>(I)) {
+ PrintAllTypes = true;
+ } else {
+ for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+ Operand = I.getOperand(i);
+ // note that Operand shouldn't be null, but the test helps make dump()
+ // more tolerant of malformed IR
+ if (Operand && Operand->getType() != TheType) {
+ PrintAllTypes = true; // We have differing types! Print them all!
+ break;
+ }
+ }
+ }
+
+ if (!PrintAllTypes) {
+ Out << ' ';
+ TypePrinter.print(TheType, Out);
+ }
+
+ Out << ' ';
+ for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+ if (i) Out << ", ";
+ writeOperand(I.getOperand(i), PrintAllTypes);
+ }
+ }
+
+ // Print atomic ordering/alignment for memory operations
+ if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isAtomic())
+ writeAtomic(LI->getOrdering(), LI->getSynchScope());
+ if (LI->getAlignment())
+ Out << ", align " << LI->getAlignment();
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isAtomic())
+ writeAtomic(SI->getOrdering(), SI->getSynchScope());
+ if (SI->getAlignment())
+ Out << ", align " << SI->getAlignment();
+ } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
+ } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
+ writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
+ } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
+ writeAtomic(FI->getOrdering(), FI->getSynchScope());
+ }
+
+ // Print Metadata info.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+ I.getAllMetadata(InstMD);
+ if (!InstMD.empty()) {
+ SmallVector<StringRef, 8> MDNames;
+ I.getType()->getContext().getMDKindNames(MDNames);
+ for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+ unsigned Kind = InstMD[i].first;
+ if (Kind < MDNames.size()) {
+ Out << ", !" << MDNames[Kind];
+ } else {
+ Out << ", !<unknown kind #" << Kind << ">";
+ }
+ Out << ' ';
+ WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+ TheModule);
+ }
+ }
+ printInfoComment(I);
+}
+
+static void WriteMDNodeComment(const MDNode *Node,
+ formatted_raw_ostream &Out) {
+ if (Node->getNumOperands() < 1)
+ return;
+
+ Value *Op = Node->getOperand(0);
+ if (!Op || !isa<ConstantInt>(Op) || cast<ConstantInt>(Op)->getBitWidth() < 32)
+ return;
+
+ DIDescriptor Desc(Node);
+ if (!Desc.Verify())
+ return;
+
+ unsigned Tag = Desc.getTag();
+ Out.PadToColumn(50);
+ if (dwarf::TagString(Tag)) {
+ Out << "; ";
+ Desc.print(Out);
+ } else if (Tag == dwarf::DW_TAG_user_base) {
+ Out << "; [ DW_TAG_user_base ]";
+ }
+}
+
+void AssemblyWriter::writeAllMDNodes() {
+ SmallVector<const MDNode *, 16> Nodes;
+ Nodes.resize(Machine.mdn_size());
+ for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
+ I != E; ++I)
+ Nodes[I->second] = cast<MDNode>(I->first);
+
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+ Out << '!' << i << " = metadata ";
+ printMDNodeBody(Nodes[i]);
+ }
+}
+
+void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
+ WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+ WriteMDNodeComment(Node, Out);
+ Out << "\n";
+}
+
+void AssemblyWriter::writeAllAttributeGroups() {
+ std::vector<std::pair<AttributeSet, unsigned> > asVec;
+ asVec.resize(Machine.as_size());
+
+ for (SlotTracker::as_iterator I = Machine.as_begin(), E = Machine.as_end();
+ I != E; ++I)
+ asVec[I->second] = *I;
+
+ for (std::vector<std::pair<AttributeSet, unsigned> >::iterator
+ I = asVec.begin(), E = asVec.end(); I != E; ++I)
+ Out << "attributes #" << I->second << " = { "
+ << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(this);
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, this, AAW);
+ W.printModule(this);
+}
+
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(getParent());
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+ W.printNamedMDNode(this);
+}
+
+void Type::print(raw_ostream &OS) const {
+ if (this == 0) {
+ OS << "<null Type>";
+ return;
+ }
+ TypePrinting TP;
+ TP.print(const_cast<Type*>(this), OS);
+
+ // If the type is a named struct type, print the body as well.
+ if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+ if (!STy->isLiteral()) {
+ OS << " = type ";
+ TP.printStructBody(STy, OS);
+ }
+}
+
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ if (this == 0) {
+ ROS << "printing a <null> value\n";
+ return;
+ }
+ formatted_raw_ostream OS(ROS);
+ if (const Instruction *I = dyn_cast<Instruction>(this)) {
+ const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+ SlotTracker SlotTable(F);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+ W.printInstruction(*I);
+ } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+ SlotTracker SlotTable(BB->getParent());
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+ W.printBasicBlock(BB);
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+ SlotTracker SlotTable(GV->getParent());
+ AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+ if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
+ W.printGlobal(V);
+ else if (const Function *F = dyn_cast<Function>(GV))
+ W.printFunction(F);
+ else
+ W.printAlias(cast<GlobalAlias>(GV));
+ } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+ const Function *F = N->getFunction();
+ SlotTracker SlotTable(F);
+ AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+ W.printMDNodeBody(N);
+ } else if (const Constant *C = dyn_cast<Constant>(this)) {
+ TypePrinting TypePrinter;
+ TypePrinter.print(C->getType(), OS);
+ OS << ' ';
+ WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+ } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
+ isa<Argument>(this)) {
+ WriteAsOperand(OS, this, true, 0);
+ } else {
+ // Otherwise we don't know what it is. Call the virtual function to
+ // allow a subclass to print itself.
+ printCustom(OS);
+ }
+}
+
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+ llvm_unreachable("Unknown value to print out!");
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { print(dbgs()); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
new file mode 100644
index 000000000000..ad2670dade12
--- /dev/null
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -0,0 +1,278 @@
+//===-- AttributeImpl.h - Attribute Internals -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines various helper methods and classes used by
+/// LLVMContextImpl for creating and managing attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTESIMPL_H
+#define LLVM_ATTRIBUTESIMPL_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/Attributes.h"
+#include <string>
+
+namespace llvm {
+
+class Constant;
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief A set of classes that contain the kind and (optional) value of the
+/// attribute object. There are three main categories: enum attribute entries,
+/// represented by Attribute::AttrKind; alignment attribute entries; and string
+/// attribute enties, which are for target-dependent attributes.
+class AttributeEntry {
+ unsigned char KindID;
+protected:
+ enum AttrEntryKind {
+ EnumAttrEntry,
+ AlignAttrEntry,
+ StringAttrEntry
+ };
+public:
+ AttributeEntry(AttrEntryKind Kind)
+ : KindID(Kind) {}
+ virtual ~AttributeEntry() {}
+
+ unsigned getKindID() const { return KindID; }
+
+ static inline bool classof(const AttributeEntry *) { return true; }
+};
+
+class EnumAttributeEntry : public AttributeEntry {
+ Attribute::AttrKind Kind;
+public:
+ EnumAttributeEntry(Attribute::AttrKind Kind)
+ : AttributeEntry(EnumAttrEntry), Kind(Kind) {}
+
+ Attribute::AttrKind getEnumKind() const { return Kind; }
+
+ static inline bool classof(const AttributeEntry *AE) {
+ return AE->getKindID() == EnumAttrEntry;
+ }
+ static inline bool classof(const EnumAttributeEntry *) { return true; }
+};
+
+class AlignAttributeEntry : public AttributeEntry {
+ Attribute::AttrKind Kind;
+ unsigned Align;
+public:
+ AlignAttributeEntry(Attribute::AttrKind Kind, unsigned Align)
+ : AttributeEntry(AlignAttrEntry), Kind(Kind), Align(Align) {}
+
+ Attribute::AttrKind getEnumKind() const { return Kind; }
+ unsigned getAlignment() const { return Align; }
+
+ static inline bool classof(const AttributeEntry *AE) {
+ return AE->getKindID() == AlignAttrEntry;
+ }
+ static inline bool classof(const AlignAttributeEntry *) { return true; }
+};
+
+class StringAttributeEntry : public AttributeEntry {
+ std::string Kind;
+ std::string Val;
+public:
+ StringAttributeEntry(StringRef Kind, StringRef Val = StringRef())
+ : AttributeEntry(StringAttrEntry), Kind(Kind), Val(Val) {}
+
+ StringRef getStringKind() const { return Kind; }
+ StringRef getStringValue() const { return Val; }
+
+ static inline bool classof(const AttributeEntry *AE) {
+ return AE->getKindID() == StringAttrEntry;
+ }
+ static inline bool classof(const StringAttributeEntry *) { return true; }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a single, uniqued attribute. That attribute
+/// could be a single enum, a tuple, or a string.
+class AttributeImpl : public FoldingSetNode {
+ LLVMContext &Context; ///< Global context for uniquing objects
+
+ AttributeEntry *Entry; ///< Holds the kind and value of the attribute
+
+ // AttributesImpl is uniqued, these should not be publicly available.
+ void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION;
+ AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION;
+public:
+ AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind);
+ AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, unsigned Align);
+ AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val = StringRef());
+ ~AttributeImpl();
+
+ LLVMContext &getContext() { return Context; }
+
+ bool isEnumAttribute() const;
+ bool isAlignAttribute() const;
+ bool isStringAttribute() const;
+
+ bool hasAttribute(Attribute::AttrKind A) const;
+ bool hasAttribute(StringRef Kind) const;
+
+ Attribute::AttrKind getKindAsEnum() const;
+ uint64_t getValueAsInt() const;
+
+ StringRef getKindAsString() const;
+ StringRef getValueAsString() const;
+
+ /// \brief Used when sorting the attributes.
+ bool operator<(const AttributeImpl &AI) const;
+
+ void Profile(FoldingSetNodeID &ID) const {
+ if (isEnumAttribute())
+ Profile(ID, getKindAsEnum(), 0);
+ else if (isAlignAttribute())
+ Profile(ID, getKindAsEnum(), getValueAsInt());
+ else
+ Profile(ID, getKindAsString(), getValueAsString());
+ }
+ static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+ uint64_t Val) {
+ ID.AddInteger(Kind);
+ if (Val) ID.AddInteger(Val);
+ }
+ static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) {
+ ID.AddString(Kind);
+ if (!Values.empty()) ID.AddString(Values);
+ }
+
+ // FIXME: Remove this!
+ static uint64_t getAttrMask(Attribute::AttrKind Val);
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a group of attributes that apply to one
+/// element: function, return type, or parameter.
+class AttributeSetNode : public FoldingSetNode {
+ SmallVector<Attribute, 4> AttrList;
+
+ AttributeSetNode(ArrayRef<Attribute> Attrs)
+ : AttrList(Attrs.begin(), Attrs.end()) {}
+
+ // AttributesSetNode is uniqued, these should not be publicly available.
+ void operator=(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
+ AttributeSetNode(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
+public:
+ static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
+
+ bool hasAttribute(Attribute::AttrKind Kind) const;
+ bool hasAttribute(StringRef Kind) const;
+ bool hasAttributes() const { return !AttrList.empty(); }
+
+ Attribute getAttribute(Attribute::AttrKind Kind) const;
+ Attribute getAttribute(StringRef Kind) const;
+
+ unsigned getAlignment() const;
+ unsigned getStackAlignment() const;
+ std::string getAsString(bool InAttrGrp) const;
+
+ typedef SmallVectorImpl<Attribute>::iterator iterator;
+ typedef SmallVectorImpl<Attribute>::const_iterator const_iterator;
+
+ iterator begin() { return AttrList.begin(); }
+ iterator end() { return AttrList.end(); }
+
+ const_iterator begin() const { return AttrList.begin(); }
+ const_iterator end() const { return AttrList.end(); }
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, AttrList);
+ }
+ static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
+ for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
+ AttrList[I].Profile(ID);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a set of attributes that apply to the function,
+/// return type, and parameters.
+class AttributeSetImpl : public FoldingSetNode {
+ friend class AttributeSet;
+
+ LLVMContext &Context;
+
+ typedef std::pair<unsigned, AttributeSetNode*> IndexAttrPair;
+ SmallVector<IndexAttrPair, 4> AttrNodes;
+
+ // AttributesSet is uniqued, these should not be publicly available.
+ void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
+ AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
+public:
+ AttributeSetImpl(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> > attrs)
+ : Context(C), AttrNodes(attrs.begin(), attrs.end()) {}
+
+ /// \brief Get the context that created this AttributeSetImpl.
+ LLVMContext &getContext() { return Context; }
+
+ /// \brief Return the number of attributes this AttributeSet contains.
+ unsigned getNumAttributes() const { return AttrNodes.size(); }
+
+ /// \brief Get the index of the given "slot" in the AttrNodes list. This index
+ /// is the index of the return, parameter, or function object that the
+ /// attributes are applied to, not the index into the AttrNodes list where the
+ /// attributes reside.
+ uint64_t getSlotIndex(unsigned Slot) const {
+ return AttrNodes[Slot].first;
+ }
+
+ /// \brief Retrieve the attributes for the given "slot" in the AttrNode list.
+ /// \p Slot is an index into the AttrNodes list, not the index of the return /
+ /// parameter/ function which the attributes apply to.
+ AttributeSet getSlotAttributes(unsigned Slot) const {
+ return AttributeSet::get(Context, AttrNodes[Slot]);
+ }
+
+ /// \brief Retrieve the attribute set node for the given "slot" in the
+ /// AttrNode list.
+ AttributeSetNode *getSlotNode(unsigned Slot) const {
+ return AttrNodes[Slot].second;
+ }
+
+ typedef AttributeSetNode::iterator iterator;
+ typedef AttributeSetNode::const_iterator const_iterator;
+
+ iterator begin(unsigned Idx)
+ { return AttrNodes[Idx].second->begin(); }
+ iterator end(unsigned Idx)
+ { return AttrNodes[Idx].second->end(); }
+
+ const_iterator begin(unsigned Idx) const
+ { return AttrNodes[Idx].second->begin(); }
+ const_iterator end(unsigned Idx) const
+ { return AttrNodes[Idx].second->end(); }
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, AttrNodes);
+ }
+ static void Profile(FoldingSetNodeID &ID,
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> > Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+ ID.AddInteger(Nodes[i].first);
+ ID.AddPointer(Nodes[i].second);
+ }
+ }
+
+ // FIXME: This atrocity is temporary.
+ uint64_t Raw(uint64_t Index) const;
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
new file mode 100644
index 000000000000..2d828914cdca
--- /dev/null
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -0,0 +1,1180 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// \brief This file implements the Attribute, AttributeImpl, AttrBuilder,
+// AttributeSetImpl, and AttributeSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "AttributeImpl.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Construction Methods
+//===----------------------------------------------------------------------===//
+
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+ uint64_t Val) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ FoldingSetNodeID ID;
+ ID.AddInteger(Kind);
+ if (Val) ID.AddInteger(Val);
+
+ void *InsertPoint;
+ AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (!PA) {
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ PA = !Val ?
+ new AttributeImpl(Context, Kind) :
+ new AttributeImpl(Context, Kind, Val);
+ pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the Attribute that we found or created.
+ return Attribute(PA);
+}
+
+Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ FoldingSetNodeID ID;
+ ID.AddString(Kind);
+ if (!Val.empty()) ID.AddString(Val);
+
+ void *InsertPoint;
+ AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (!PA) {
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ PA = new AttributeImpl(Context, Kind, Val);
+ pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the Attribute that we found or created.
+ return Attribute(PA);
+}
+
+Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x40000000 && "Alignment too large.");
+ return get(Context, Alignment, Align);
+}
+
+Attribute Attribute::getWithStackAlignment(LLVMContext &Context,
+ uint64_t Align) {
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x100 && "Alignment too large.");
+ return get(Context, StackAlignment, Align);
+}
+
+//===----------------------------------------------------------------------===//
+// Attribute Accessor Methods
+//===----------------------------------------------------------------------===//
+
+bool Attribute::isEnumAttribute() const {
+ return pImpl && pImpl->isEnumAttribute();
+}
+
+bool Attribute::isAlignAttribute() const {
+ return pImpl && pImpl->isAlignAttribute();
+}
+
+bool Attribute::isStringAttribute() const {
+ return pImpl && pImpl->isStringAttribute();
+}
+
+Attribute::AttrKind Attribute::getKindAsEnum() const {
+ assert((isEnumAttribute() || isAlignAttribute()) &&
+ "Invalid attribute type to get the kind as an enum!");
+ return pImpl ? pImpl->getKindAsEnum() : None;
+}
+
+uint64_t Attribute::getValueAsInt() const {
+ assert(isAlignAttribute() &&
+ "Expected the attribute to be an alignment attribute!");
+ return pImpl ? pImpl->getValueAsInt() : 0;
+}
+
+StringRef Attribute::getKindAsString() const {
+ assert(isStringAttribute() &&
+ "Invalid attribute type to get the kind as a string!");
+ return pImpl ? pImpl->getKindAsString() : StringRef();
+}
+
+StringRef Attribute::getValueAsString() const {
+ assert(isStringAttribute() &&
+ "Invalid attribute type to get the value as a string!");
+ return pImpl ? pImpl->getValueAsString() : StringRef();
+}
+
+bool Attribute::hasAttribute(AttrKind Kind) const {
+ return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
+}
+
+bool Attribute::hasAttribute(StringRef Kind) const {
+ if (!isStringAttribute()) return false;
+ return pImpl && pImpl->hasAttribute(Kind);
+}
+
+/// This returns the alignment field of an attribute as a byte alignment value.
+unsigned Attribute::getAlignment() const {
+ assert(hasAttribute(Attribute::Alignment) &&
+ "Trying to get alignment from non-alignment attribute!");
+ return pImpl->getValueAsInt();
+}
+
+/// This returns the stack alignment field of an attribute as a byte alignment
+/// value.
+unsigned Attribute::getStackAlignment() const {
+ assert(hasAttribute(Attribute::StackAlignment) &&
+ "Trying to get alignment from non-alignment attribute!");
+ return pImpl->getValueAsInt();
+}
+
+std::string Attribute::getAsString(bool InAttrGrp) const {
+ if (!pImpl) return "";
+
+ if (hasAttribute(Attribute::SanitizeAddress))
+ return "sanitize_address";
+ if (hasAttribute(Attribute::AlwaysInline))
+ return "alwaysinline";
+ if (hasAttribute(Attribute::ByVal))
+ return "byval";
+ if (hasAttribute(Attribute::InlineHint))
+ return "inlinehint";
+ if (hasAttribute(Attribute::InReg))
+ return "inreg";
+ if (hasAttribute(Attribute::MinSize))
+ return "minsize";
+ if (hasAttribute(Attribute::Naked))
+ return "naked";
+ if (hasAttribute(Attribute::Nest))
+ return "nest";
+ if (hasAttribute(Attribute::NoAlias))
+ return "noalias";
+ if (hasAttribute(Attribute::NoBuiltin))
+ return "nobuiltin";
+ if (hasAttribute(Attribute::NoCapture))
+ return "nocapture";
+ if (hasAttribute(Attribute::NoDuplicate))
+ return "noduplicate";
+ if (hasAttribute(Attribute::NoImplicitFloat))
+ return "noimplicitfloat";
+ if (hasAttribute(Attribute::NoInline))
+ return "noinline";
+ if (hasAttribute(Attribute::NonLazyBind))
+ return "nonlazybind";
+ if (hasAttribute(Attribute::NoRedZone))
+ return "noredzone";
+ if (hasAttribute(Attribute::NoReturn))
+ return "noreturn";
+ if (hasAttribute(Attribute::NoUnwind))
+ return "nounwind";
+ if (hasAttribute(Attribute::OptimizeForSize))
+ return "optsize";
+ if (hasAttribute(Attribute::ReadNone))
+ return "readnone";
+ if (hasAttribute(Attribute::ReadOnly))
+ return "readonly";
+ if (hasAttribute(Attribute::ReturnsTwice))
+ return "returns_twice";
+ if (hasAttribute(Attribute::SExt))
+ return "signext";
+ if (hasAttribute(Attribute::StackProtect))
+ return "ssp";
+ if (hasAttribute(Attribute::StackProtectReq))
+ return "sspreq";
+ if (hasAttribute(Attribute::StackProtectStrong))
+ return "sspstrong";
+ if (hasAttribute(Attribute::StructRet))
+ return "sret";
+ if (hasAttribute(Attribute::SanitizeThread))
+ return "sanitize_thread";
+ if (hasAttribute(Attribute::SanitizeMemory))
+ return "sanitize_memory";
+ if (hasAttribute(Attribute::UWTable))
+ return "uwtable";
+ if (hasAttribute(Attribute::ZExt))
+ return "zeroext";
+
+ // FIXME: These should be output like this:
+ //
+ // align=4
+ // alignstack=8
+ //
+ if (hasAttribute(Attribute::Alignment)) {
+ std::string Result;
+ Result += "align";
+ Result += (InAttrGrp) ? "=" : " ";
+ Result += utostr(getValueAsInt());
+ return Result;
+ }
+
+ if (hasAttribute(Attribute::StackAlignment)) {
+ std::string Result;
+ Result += "alignstack";
+ if (InAttrGrp) {
+ Result += "=";
+ Result += utostr(getValueAsInt());
+ } else {
+ Result += "(";
+ Result += utostr(getValueAsInt());
+ Result += ")";
+ }
+ return Result;
+ }
+
+ // Convert target-dependent attributes to strings of the form:
+ //
+ // "kind"
+ // "kind" = "value"
+ //
+ if (isStringAttribute()) {
+ std::string Result;
+ Result += '\"' + getKindAsString().str() + '"';
+
+ StringRef Val = pImpl->getValueAsString();
+ if (Val.empty()) return Result;
+
+ Result += "=\"" + Val.str() + '"';
+ return Result;
+ }
+
+ llvm_unreachable("Unknown attribute");
+}
+
+bool Attribute::operator<(Attribute A) const {
+ if (!pImpl && !A.pImpl) return false;
+ if (!pImpl) return true;
+ if (!A.pImpl) return false;
+ return *pImpl < *A.pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeImpl Definition
+//===----------------------------------------------------------------------===//
+
+AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind)
+ : Context(C), Entry(new EnumAttributeEntry(Kind)) {}
+
+AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind,
+ unsigned Align)
+ : Context(C) {
+ assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) &&
+ "Wrong kind for alignment attribute!");
+ Entry = new AlignAttributeEntry(Kind, Align);
+}
+
+AttributeImpl::AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val)
+ : Context(C), Entry(new StringAttributeEntry(Kind, Val)) {}
+
+AttributeImpl::~AttributeImpl() {
+ delete Entry;
+}
+
+bool AttributeImpl::isEnumAttribute() const {
+ return isa<EnumAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::isAlignAttribute() const {
+ return isa<AlignAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::isStringAttribute() const {
+ return isa<StringAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
+ if (isStringAttribute()) return false;
+ return getKindAsEnum() == A;
+}
+
+bool AttributeImpl::hasAttribute(StringRef Kind) const {
+ if (!isStringAttribute()) return false;
+ return getKindAsString() == Kind;
+}
+
+Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
+ if (EnumAttributeEntry *E = dyn_cast<EnumAttributeEntry>(Entry))
+ return E->getEnumKind();
+ return cast<AlignAttributeEntry>(Entry)->getEnumKind();
+}
+
+uint64_t AttributeImpl::getValueAsInt() const {
+ return cast<AlignAttributeEntry>(Entry)->getAlignment();
+}
+
+StringRef AttributeImpl::getKindAsString() const {
+ return cast<StringAttributeEntry>(Entry)->getStringKind();
+}
+
+StringRef AttributeImpl::getValueAsString() const {
+ return cast<StringAttributeEntry>(Entry)->getStringValue();
+}
+
+bool AttributeImpl::operator<(const AttributeImpl &AI) const {
+ // This sorts the attributes with Attribute::AttrKinds coming first (sorted
+ // relative to their enum value) and then strings.
+ if (isEnumAttribute()) {
+ if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
+ if (AI.isAlignAttribute()) return true;
+ if (AI.isStringAttribute()) return true;
+ }
+
+ if (isAlignAttribute()) {
+ if (AI.isEnumAttribute()) return false;
+ if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt();
+ if (AI.isStringAttribute()) return true;
+ }
+
+ if (AI.isEnumAttribute()) return false;
+ if (AI.isAlignAttribute()) return false;
+ if (getKindAsString() == AI.getKindAsString())
+ return getValueAsString() < AI.getValueAsString();
+ return getKindAsString() < AI.getKindAsString();
+}
+
+uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
+ // FIXME: Remove this.
+ switch (Val) {
+ case Attribute::EndAttrKinds:
+ llvm_unreachable("Synthetic enumerators which should never get here");
+
+ case Attribute::None: return 0;
+ case Attribute::ZExt: return 1 << 0;
+ case Attribute::SExt: return 1 << 1;
+ case Attribute::NoReturn: return 1 << 2;
+ case Attribute::InReg: return 1 << 3;
+ case Attribute::StructRet: return 1 << 4;
+ case Attribute::NoUnwind: return 1 << 5;
+ case Attribute::NoAlias: return 1 << 6;
+ case Attribute::ByVal: return 1 << 7;
+ case Attribute::Nest: return 1 << 8;
+ case Attribute::ReadNone: return 1 << 9;
+ case Attribute::ReadOnly: return 1 << 10;
+ case Attribute::NoInline: return 1 << 11;
+ case Attribute::AlwaysInline: return 1 << 12;
+ case Attribute::OptimizeForSize: return 1 << 13;
+ case Attribute::StackProtect: return 1 << 14;
+ case Attribute::StackProtectReq: return 1 << 15;
+ case Attribute::Alignment: return 31 << 16;
+ case Attribute::NoCapture: return 1 << 21;
+ case Attribute::NoRedZone: return 1 << 22;
+ case Attribute::NoImplicitFloat: return 1 << 23;
+ case Attribute::Naked: return 1 << 24;
+ case Attribute::InlineHint: return 1 << 25;
+ case Attribute::StackAlignment: return 7 << 26;
+ case Attribute::ReturnsTwice: return 1 << 29;
+ case Attribute::UWTable: return 1 << 30;
+ case Attribute::NonLazyBind: return 1U << 31;
+ case Attribute::SanitizeAddress: return 1ULL << 32;
+ case Attribute::MinSize: return 1ULL << 33;
+ case Attribute::NoDuplicate: return 1ULL << 34;
+ case Attribute::StackProtectStrong: return 1ULL << 35;
+ case Attribute::SanitizeThread: return 1ULL << 36;
+ case Attribute::SanitizeMemory: return 1ULL << 37;
+ case Attribute::NoBuiltin: return 1ULL << 38;
+ }
+ llvm_unreachable("Unsupported attribute type");
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSetNode Definition
+//===----------------------------------------------------------------------===//
+
+AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
+ ArrayRef<Attribute> Attrs) {
+ if (Attrs.empty())
+ return 0;
+
+ // Otherwise, build a key to look up the existing attributes.
+ LLVMContextImpl *pImpl = C.pImpl;
+ FoldingSetNodeID ID;
+
+ SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
+ array_pod_sort(SortedAttrs.begin(), SortedAttrs.end());
+
+ for (SmallVectorImpl<Attribute>::iterator I = SortedAttrs.begin(),
+ E = SortedAttrs.end(); I != E; ++I)
+ I->Profile(ID);
+
+ void *InsertPoint;
+ AttributeSetNode *PA =
+ pImpl->AttrsSetNodes.FindNodeOrInsertPos(ID, InsertPoint);
+
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ if (!PA) {
+ PA = new AttributeSetNode(SortedAttrs);
+ pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the AttributesListNode that we found or created.
+ return PA;
+}
+
+bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Kind))
+ return true;
+ return false;
+}
+
+bool AttributeSetNode::hasAttribute(StringRef Kind) const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Kind))
+ return true;
+ return false;
+}
+
+Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Kind))
+ return *I;
+ return Attribute();
+}
+
+Attribute AttributeSetNode::getAttribute(StringRef Kind) const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Kind))
+ return *I;
+ return Attribute();
+}
+
+unsigned AttributeSetNode::getAlignment() const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Attribute::Alignment))
+ return I->getAlignment();
+ return 0;
+}
+
+unsigned AttributeSetNode::getStackAlignment() const {
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ++I)
+ if (I->hasAttribute(Attribute::StackAlignment))
+ return I->getStackAlignment();
+ return 0;
+}
+
+std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
+ std::string Str = "";
+ for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+ E = AttrList.end(); I != E; ) {
+ Str += I->getAsString(InAttrGrp);
+ if (++I != E) Str += " ";
+ }
+ return Str;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSetImpl Definition
+//===----------------------------------------------------------------------===//
+
+uint64_t AttributeSetImpl::Raw(uint64_t Index) const {
+ for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) {
+ if (getSlotIndex(I) != Index) continue;
+ const AttributeSetNode *ASN = AttrNodes[I].second;
+ uint64_t Mask = 0;
+
+ for (AttributeSetNode::const_iterator II = ASN->begin(),
+ IE = ASN->end(); II != IE; ++II) {
+ Attribute Attr = *II;
+
+ // This cannot handle string attributes.
+ if (Attr.isStringAttribute()) continue;
+
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+
+ if (Kind == Attribute::Alignment)
+ Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16;
+ else if (Kind == Attribute::StackAlignment)
+ Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26;
+ else
+ Mask |= AttributeImpl::getAttrMask(Kind);
+ }
+
+ return Mask;
+ }
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Construction and Mutation Methods
+//===----------------------------------------------------------------------===//
+
+AttributeSet
+AttributeSet::getImpl(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> > Attrs) {
+ LLVMContextImpl *pImpl = C.pImpl;
+ FoldingSetNodeID ID;
+ AttributeSetImpl::Profile(ID, Attrs);
+
+ void *InsertPoint;
+ AttributeSetImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID, InsertPoint);
+
+ // If we didn't find any existing attributes of the same shape then
+ // create a new one and insert it.
+ if (!PA) {
+ PA = new AttributeSetImpl(C, Attrs);
+ pImpl->AttrsLists.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the AttributesList that we found or created.
+ return AttributeSet(PA);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, Attribute> > Attrs){
+ // If there are no attributes then return a null AttributesList pointer.
+ if (Attrs.empty())
+ return AttributeSet();
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
+ "Misordered Attributes list!");
+ assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
+ "Pointless attribute!");
+ }
+#endif
+
+ // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
+ // list.
+ SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrPairVec;
+ for (ArrayRef<std::pair<unsigned, Attribute> >::iterator I = Attrs.begin(),
+ E = Attrs.end(); I != E; ) {
+ unsigned Index = I->first;
+ SmallVector<Attribute, 4> AttrVec;
+ while (I != E && I->first == Index) {
+ AttrVec.push_back(I->second);
+ ++I;
+ }
+
+ AttrPairVec.push_back(std::make_pair(Index,
+ AttributeSetNode::get(C, AttrVec)));
+ }
+
+ return getImpl(C, AttrPairVec);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C,
+ ArrayRef<std::pair<unsigned,
+ AttributeSetNode*> > Attrs) {
+ // If there are no attributes then return a null AttributesList pointer.
+ if (Attrs.empty())
+ return AttributeSet();
+
+ return getImpl(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+ if (!B.hasAttributes())
+ return AttributeSet();
+
+ // Add target-independent attributes.
+ SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
+ for (Attribute::AttrKind Kind = Attribute::None;
+ Kind != Attribute::EndAttrKinds; Kind = Attribute::AttrKind(Kind + 1)) {
+ if (!B.contains(Kind))
+ continue;
+
+ if (Kind == Attribute::Alignment)
+ Attrs.push_back(std::make_pair(Idx, Attribute::
+ getWithAlignment(C, B.getAlignment())));
+ else if (Kind == Attribute::StackAlignment)
+ Attrs.push_back(std::make_pair(Idx, Attribute::
+ getWithStackAlignment(C, B.getStackAlignment())));
+ else
+ Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+ }
+
+ // Add target-dependent (string) attributes.
+ for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
+ I != E; ++I)
+ Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+
+ return get(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+ ArrayRef<Attribute::AttrKind> Kind) {
+ SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
+ for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
+ E = Kind.end(); I != E; ++I)
+ Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+ return get(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
+ if (Attrs.empty()) return AttributeSet();
+
+ SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrNodeVec;
+ for (unsigned I = 0, E = Attrs.size(); I != E; ++I) {
+ AttributeSet AS = Attrs[I];
+ if (!AS.pImpl) continue;
+ AttrNodeVec.append(AS.pImpl->AttrNodes.begin(), AS.pImpl->AttrNodes.end());
+ }
+
+ return getImpl(C, AttrNodeVec);
+}
+
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+ Attribute::AttrKind Attr) const {
+ if (hasAttribute(Idx, Attr)) return *this;
+ return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+}
+
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+ StringRef Kind) const {
+ llvm::AttrBuilder B;
+ B.addAttribute(Kind);
+ return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+}
+
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+ AttributeSet Attrs) const {
+ if (!pImpl) return Attrs;
+ if (!Attrs.pImpl) return *this;
+
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment. For now, say
+ // we can't change a known alignment.
+ unsigned OldAlign = getParamAlignment(Idx);
+ unsigned NewAlign = Attrs.getParamAlignment(Idx);
+ assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+ "Attempt to change alignment!");
+#endif
+
+ // Add the attribute slots before the one we're trying to add.
+ SmallVector<AttributeSet, 4> AttrSet;
+ uint64_t NumAttrs = pImpl->getNumAttributes();
+ AttributeSet AS;
+ uint64_t LastIndex = 0;
+ for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
+ if (getSlotIndex(I) >= Idx) {
+ if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+ break;
+ }
+ LastIndex = I + 1;
+ AttrSet.push_back(getSlotAttributes(I));
+ }
+
+ // Now add the attribute into the correct slot. There may already be an
+ // AttributeSet there.
+ AttrBuilder B(AS, Idx);
+
+ for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
+ if (Attrs.getSlotIndex(I) == Idx) {
+ for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
+ IE = Attrs.pImpl->end(I); II != IE; ++II)
+ B.addAttribute(*II);
+ break;
+ }
+
+ AttrSet.push_back(AttributeSet::get(C, Idx, B));
+
+ // Add the remaining attribute slots.
+ for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
+ AttrSet.push_back(getSlotAttributes(I));
+
+ return get(C, AttrSet);
+}
+
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+ Attribute::AttrKind Attr) const {
+ if (!hasAttribute(Idx, Attr)) return *this;
+ return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+}
+
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+ AttributeSet Attrs) const {
+ if (!pImpl) return AttributeSet();
+ if (!Attrs.pImpl) return *this;
+
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment.
+ // For now, say we can't pass in alignment, which no current use does.
+ assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+ "Attempt to change alignment!");
+#endif
+
+ // Add the attribute slots before the one we're trying to add.
+ SmallVector<AttributeSet, 4> AttrSet;
+ uint64_t NumAttrs = pImpl->getNumAttributes();
+ AttributeSet AS;
+ uint64_t LastIndex = 0;
+ for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
+ if (getSlotIndex(I) >= Idx) {
+ if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+ break;
+ }
+ LastIndex = I + 1;
+ AttrSet.push_back(getSlotAttributes(I));
+ }
+
+ // Now remove the attribute from the correct slot. There may already be an
+ // AttributeSet there.
+ AttrBuilder B(AS, Idx);
+
+ for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
+ if (Attrs.getSlotIndex(I) == Idx) {
+ B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+ break;
+ }
+
+ AttrSet.push_back(AttributeSet::get(C, Idx, B));
+
+ // Add the remaining attribute slots.
+ for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
+ AttrSet.push_back(getSlotAttributes(I));
+
+ return get(C, AttrSet);
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Accessor Methods
+//===----------------------------------------------------------------------===//
+
+LLVMContext &AttributeSet::getContext() const {
+ return pImpl->getContext();
+}
+
+AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
+ return pImpl && hasAttributes(Idx) ?
+ AttributeSet::get(pImpl->getContext(),
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+ std::make_pair(Idx, getAttributes(Idx)))) :
+ AttributeSet();
+}
+
+AttributeSet AttributeSet::getRetAttributes() const {
+ return pImpl && hasAttributes(ReturnIndex) ?
+ AttributeSet::get(pImpl->getContext(),
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+ std::make_pair(ReturnIndex,
+ getAttributes(ReturnIndex)))) :
+ AttributeSet();
+}
+
+AttributeSet AttributeSet::getFnAttributes() const {
+ return pImpl && hasAttributes(FunctionIndex) ?
+ AttributeSet::get(pImpl->getContext(),
+ ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+ std::make_pair(FunctionIndex,
+ getAttributes(FunctionIndex)))) :
+ AttributeSet();
+}
+
+bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->hasAttribute(Kind) : false;
+}
+
+bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->hasAttribute(Kind) : false;
+}
+
+bool AttributeSet::hasAttributes(unsigned Index) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->hasAttributes() : false;
+}
+
+/// \brief Return true if the specified attribute is set for at least one
+/// parameter or for the return value.
+bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const {
+ if (pImpl == 0) return false;
+
+ for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
+ for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
+ IE = pImpl->end(I); II != IE; ++II)
+ if (II->hasAttribute(Attr))
+ return true;
+
+ return false;
+}
+
+Attribute AttributeSet::getAttribute(unsigned Index,
+ Attribute::AttrKind Kind) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getAttribute(Kind) : Attribute();
+}
+
+Attribute AttributeSet::getAttribute(unsigned Index,
+ StringRef Kind) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getAttribute(Kind) : Attribute();
+}
+
+unsigned AttributeSet::getParamAlignment(unsigned Index) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getAlignment() : 0;
+}
+
+unsigned AttributeSet::getStackAlignment(unsigned Index) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getStackAlignment() : 0;
+}
+
+std::string AttributeSet::getAsString(unsigned Index,
+ bool InAttrGrp) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getAsString(InAttrGrp) : std::string("");
+}
+
+/// \brief The attributes for the specified index are returned.
+AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+ if (!pImpl) return 0;
+
+ // Loop through to find the attribute node we want.
+ for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
+ if (pImpl->getSlotIndex(I) == Idx)
+ return pImpl->getSlotNode(I);
+
+ return 0;
+}
+
+AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+ if (!pImpl)
+ return ArrayRef<Attribute>().begin();
+ return pImpl->begin(Idx);
+}
+
+AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+ if (!pImpl)
+ return ArrayRef<Attribute>().end();
+ return pImpl->end(Idx);
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Introspection Methods
+//===----------------------------------------------------------------------===//
+
+/// \brief Return the number of slots used in this attribute list. This is the
+/// number of arguments that have an attribute set on them (including the
+/// function itself).
+unsigned AttributeSet::getNumSlots() const {
+ return pImpl ? pImpl->getNumAttributes() : 0;
+}
+
+uint64_t AttributeSet::getSlotIndex(unsigned Slot) const {
+ assert(pImpl && Slot < pImpl->getNumAttributes() &&
+ "Slot # out of range!");
+ return pImpl->getSlotIndex(Slot);
+}
+
+AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const {
+ assert(pImpl && Slot < pImpl->getNumAttributes() &&
+ "Slot # out of range!");
+ return pImpl->getSlotAttributes(Slot);
+}
+
+uint64_t AttributeSet::Raw(unsigned Index) const {
+ // FIXME: Remove this.
+ return pImpl ? pImpl->Raw(Index) : 0;
+}
+
+void AttributeSet::dump() const {
+ dbgs() << "PAL[\n";
+
+ for (unsigned i = 0, e = getNumSlots(); i < e; ++i) {
+ uint64_t Index = getSlotIndex(i);
+ dbgs() << " { ";
+ if (Index == ~0U)
+ dbgs() << "~0U";
+ else
+ dbgs() << Index;
+ dbgs() << " => " << getAsString(Index) << " }\n";
+ }
+
+ dbgs() << "]\n";
+}
+
+//===----------------------------------------------------------------------===//
+// AttrBuilder Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+ : Attrs(0), Alignment(0), StackAlignment(0) {
+ AttributeSetImpl *pImpl = AS.pImpl;
+ if (!pImpl) return;
+
+ for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
+ if (pImpl->getSlotIndex(I) != Idx) continue;
+
+ for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
+ IE = pImpl->end(I); II != IE; ++II)
+ addAttribute(*II);
+
+ break;
+ }
+}
+
+void AttrBuilder::clear() {
+ Attrs.reset();
+ Alignment = StackAlignment = 0;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
+ assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
+ assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment &&
+ "Adding alignment attribute without adding alignment value!");
+ Attrs[Val] = true;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
+ if (Attr.isStringAttribute()) {
+ addAttribute(Attr.getKindAsString(), Attr.getValueAsString());
+ return *this;
+ }
+
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ Attrs[Kind] = true;
+
+ if (Kind == Attribute::Alignment)
+ Alignment = Attr.getAlignment();
+ else if (Kind == Attribute::StackAlignment)
+ StackAlignment = Attr.getStackAlignment();
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) {
+ TargetDepAttrs[A] = V;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
+ assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
+ Attrs[Val] = false;
+
+ if (Val == Attribute::Alignment)
+ Alignment = 0;
+ else if (Val == Attribute::StackAlignment)
+ StackAlignment = 0;
+
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
+ unsigned Idx = ~0U;
+ for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
+ if (A.getSlotIndex(I) == Index) {
+ Idx = I;
+ break;
+ }
+
+ assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+
+ for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+ Attribute Attr = *I;
+ if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+ Attribute::AttrKind Kind = I->getKindAsEnum();
+ Attrs[Kind] = false;
+
+ if (Kind == Attribute::Alignment)
+ Alignment = 0;
+ else if (Kind == Attribute::StackAlignment)
+ StackAlignment = 0;
+ } else {
+ assert(Attr.isStringAttribute() && "Invalid attribute type!");
+ std::map<std::string, std::string>::iterator
+ Iter = TargetDepAttrs.find(Attr.getKindAsString());
+ if (Iter != TargetDepAttrs.end())
+ TargetDepAttrs.erase(Iter);
+ }
+ }
+
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttribute(StringRef A) {
+ std::map<std::string, std::string>::iterator I = TargetDepAttrs.find(A);
+ if (I != TargetDepAttrs.end())
+ TargetDepAttrs.erase(I);
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
+ if (Align == 0) return *this;
+
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x40000000 && "Alignment too large.");
+
+ Attrs[Attribute::Alignment] = true;
+ Alignment = Align;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) {
+ // Default alignment, allow the target to define how to align it.
+ if (Align == 0) return *this;
+
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x100 && "Alignment too large.");
+
+ Attrs[Attribute::StackAlignment] = true;
+ StackAlignment = Align;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
+ // FIXME: What if both have alignments, but they don't match?!
+ if (!Alignment)
+ Alignment = B.Alignment;
+
+ if (!StackAlignment)
+ StackAlignment = B.StackAlignment;
+
+ Attrs |= B.Attrs;
+
+ for (td_const_iterator I = B.TargetDepAttrs.begin(),
+ E = B.TargetDepAttrs.end(); I != E; ++I)
+ TargetDepAttrs[I->first] = I->second;
+
+ return *this;
+}
+
+bool AttrBuilder::contains(StringRef A) const {
+ return TargetDepAttrs.find(A) != TargetDepAttrs.end();
+}
+
+bool AttrBuilder::hasAttributes() const {
+ return !Attrs.none() || !TargetDepAttrs.empty();
+}
+
+bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
+ unsigned Idx = ~0U;
+ for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
+ if (A.getSlotIndex(I) == Index) {
+ Idx = I;
+ break;
+ }
+
+ assert(Idx != ~0U && "Couldn't find the index!");
+
+ for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+ I != E; ++I) {
+ Attribute Attr = *I;
+ if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+ if (Attrs[I->getKindAsEnum()])
+ return true;
+ } else {
+ assert(Attr.isStringAttribute() && "Invalid attribute kind!");
+ return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end();
+ }
+ }
+
+ return false;
+}
+
+bool AttrBuilder::hasAlignmentAttr() const {
+ return Alignment != 0;
+}
+
+bool AttrBuilder::operator==(const AttrBuilder &B) {
+ if (Attrs != B.Attrs)
+ return false;
+
+ for (td_const_iterator I = TargetDepAttrs.begin(),
+ E = TargetDepAttrs.end(); I != E; ++I)
+ if (B.TargetDepAttrs.find(I->first) == B.TargetDepAttrs.end())
+ return false;
+
+ return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
+}
+
+void AttrBuilder::removeFunctionOnlyAttrs() {
+ removeAttribute(Attribute::NoReturn)
+ .removeAttribute(Attribute::NoUnwind)
+ .removeAttribute(Attribute::ReadNone)
+ .removeAttribute(Attribute::ReadOnly)
+ .removeAttribute(Attribute::NoInline)
+ .removeAttribute(Attribute::AlwaysInline)
+ .removeAttribute(Attribute::OptimizeForSize)
+ .removeAttribute(Attribute::StackProtect)
+ .removeAttribute(Attribute::StackProtectReq)
+ .removeAttribute(Attribute::StackProtectStrong)
+ .removeAttribute(Attribute::NoRedZone)
+ .removeAttribute(Attribute::NoImplicitFloat)
+ .removeAttribute(Attribute::Naked)
+ .removeAttribute(Attribute::InlineHint)
+ .removeAttribute(Attribute::StackAlignment)
+ .removeAttribute(Attribute::UWTable)
+ .removeAttribute(Attribute::NonLazyBind)
+ .removeAttribute(Attribute::ReturnsTwice)
+ .removeAttribute(Attribute::SanitizeAddress)
+ .removeAttribute(Attribute::SanitizeThread)
+ .removeAttribute(Attribute::SanitizeMemory)
+ .removeAttribute(Attribute::MinSize)
+ .removeAttribute(Attribute::NoDuplicate)
+ .removeAttribute(Attribute::NoBuiltin);
+}
+
+AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
+ // FIXME: Remove this in 4.0.
+ if (!Val) return *this;
+
+ for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
+ I = Attribute::AttrKind(I + 1)) {
+ if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
+ Attrs[I] = true;
+
+ if (I == Attribute::Alignment)
+ Alignment = 1ULL << ((A >> 16) - 1);
+ else if (I == Attribute::StackAlignment)
+ StackAlignment = 1ULL << ((A >> 26)-1);
+ }
+ }
+
+ return *this;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeFuncs Function Defintions
+//===----------------------------------------------------------------------===//
+
+/// \brief Which attributes cannot be applied to a type.
+AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
+ AttrBuilder Incompatible;
+
+ if (!Ty->isIntegerTy())
+ // Attribute that only apply to integers.
+ Incompatible.addAttribute(Attribute::SExt)
+ .addAttribute(Attribute::ZExt);
+
+ if (!Ty->isPointerTy())
+ // Attribute that only apply to pointers.
+ Incompatible.addAttribute(Attribute::ByVal)
+ .addAttribute(Attribute::Nest)
+ .addAttribute(Attribute::NoAlias)
+ .addAttribute(Attribute::NoCapture)
+ .addAttribute(Attribute::StructRet);
+
+ return AttributeSet::get(Ty->getContext(), Index, Incompatible);
+}
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
new file mode 100644
index 000000000000..f2375374e356
--- /dev/null
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -0,0 +1,393 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+using namespace llvm;
+
+// Upgrade the declarations of the SSE4.1 functions whose arguments have
+// changed their type from v4f32 to v2i64.
+static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check whether this is an old version of the function, which received
+ // v4f32 arguments.
+ Type *Arg0Type = F->getFunctionType()->getParamType(0);
+ if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
+ return false;
+
+ // Yes, it's old, replace it with new version.
+ F->setName(F->getName() + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+ assert(F && "Illegal to upgrade a non-existent Function.");
+
+ // Quickly eliminate it, if it's not a candidate.
+ StringRef Name = F->getName();
+ if (Name.size() <= 8 || !Name.startswith("llvm."))
+ return false;
+ Name = Name.substr(5); // Strip off "llvm."
+
+ switch (Name[0]) {
+ default: break;
+ case 'a': {
+ if (Name.startswith("arm.neon.vclz")) {
+ Type* args[2] = {
+ F->arg_begin()->getType(),
+ Type::getInt1Ty(F->getContext())
+ };
+ // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
+ // the end of the name. Change name from llvm.arm.neon.vclz.* to
+ // llvm.ctlz.*
+ FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
+ NewFn = Function::Create(fType, F->getLinkage(),
+ "llvm.ctlz." + Name.substr(14), F->getParent());
+ return true;
+ }
+ if (Name.startswith("arm.neon.vcnt")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
+ F->arg_begin()->getType());
+ return true;
+ }
+ break;
+ }
+ case 'c': {
+ if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ F->arg_begin()->getType());
+ return true;
+ }
+ if (Name.startswith("cttz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+ F->arg_begin()->getType());
+ return true;
+ }
+ break;
+ }
+ case 'x': {
+ if (Name.startswith("x86.sse2.pcmpeq.") ||
+ Name.startswith("x86.sse2.pcmpgt.") ||
+ Name.startswith("x86.avx2.pcmpeq.") ||
+ Name.startswith("x86.avx2.pcmpgt.") ||
+ Name.startswith("x86.avx.vpermil.") ||
+ Name == "x86.avx.movnt.dq.256" ||
+ Name == "x86.avx.movnt.pd.256" ||
+ Name == "x86.avx.movnt.ps.256" ||
+ (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
+ NewFn = 0;
+ return true;
+ }
+ // SSE4.1 ptest functions may have an old signature.
+ if (Name.startswith("x86.sse41.ptest")) {
+ if (Name == "x86.sse41.ptestc")
+ return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
+ if (Name == "x86.sse41.ptestz")
+ return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
+ if (Name == "x86.sse41.ptestnzc")
+ return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+ }
+ // frcz.ss/sd may need to have an argument dropped
+ if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_ss);
+ return true;
+ }
+ if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_sd);
+ return true;
+ }
+ // Fix the FMA4 intrinsics to remove the 4
+ if (Name.startswith("x86.fma4.")) {
+ F->setName("llvm.x86.fma" + Name.substr(8));
+ NewFn = F;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // This may not belong here. This function is effectively being overloaded
+ // to both detect an intrinsic which needs upgrading, and to provide the
+ // upgraded form of the intrinsic. We should perhaps have two separate
+ // functions for this.
+ return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+ NewFn = 0;
+ bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+ // Upgrade intrinsic attributes. This does not change the function.
+ if (NewFn)
+ F = NewFn;
+ if (unsigned id = F->getIntrinsicID())
+ F->setAttributes(Intrinsic::getAttributes(F->getContext(),
+ (Intrinsic::ID)id));
+ return Upgraded;
+}
+
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ // Nothing to do yet.
+ return false;
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
+// upgraded intrinsic. All argument and return casting must be provided in
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+ Function *F = CI->getCalledFunction();
+ LLVMContext &C = CI->getContext();
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ assert(F && "Intrinsic call is not direct?");
+
+ if (!NewFn) {
+ // Get the Function's name.
+ StringRef Name = F->getName();
+
+ Value *Rep;
+ // Upgrade packed integer vector compares intrinsics to compare instructions
+ if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
+ Name.startswith("llvm.x86.avx2.pcmpeq.")) {
+ Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpeq");
+ // need to sign extend since icmp returns vector of i1
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
+ Name.startswith("llvm.x86.avx2.pcmpgt.")) {
+ Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpgt");
+ // need to sign extend since icmp returns vector of i1
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
+ Name == "llvm.x86.avx.movnt.ps.256" ||
+ Name == "llvm.x86.avx.movnt.pd.256") {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ Module *M = F->getParent();
+ SmallVector<Value *, 1> Elts;
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+ MDNode *Node = MDNode::get(C, Elts);
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(Arg0,
+ PointerType::getUnqual(Arg1->getType()),
+ "cast");
+ StoreInst *SI = Builder.CreateStore(Arg1, BC);
+ SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ SI->setAlignment(16);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
+ return;
+ } else if (Name.startswith("llvm.x86.xop.vpcom")) {
+ Intrinsic::ID intID;
+ if (Name.endswith("ub"))
+ intID = Intrinsic::x86_xop_vpcomub;
+ else if (Name.endswith("uw"))
+ intID = Intrinsic::x86_xop_vpcomuw;
+ else if (Name.endswith("ud"))
+ intID = Intrinsic::x86_xop_vpcomud;
+ else if (Name.endswith("uq"))
+ intID = Intrinsic::x86_xop_vpcomuq;
+ else if (Name.endswith("b"))
+ intID = Intrinsic::x86_xop_vpcomb;
+ else if (Name.endswith("w"))
+ intID = Intrinsic::x86_xop_vpcomw;
+ else if (Name.endswith("d"))
+ intID = Intrinsic::x86_xop_vpcomd;
+ else if (Name.endswith("q"))
+ intID = Intrinsic::x86_xop_vpcomq;
+ else
+ llvm_unreachable("Unknown suffix");
+
+ Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
+ unsigned Imm;
+ if (Name.startswith("lt"))
+ Imm = 0;
+ else if (Name.startswith("le"))
+ Imm = 1;
+ else if (Name.startswith("gt"))
+ Imm = 2;
+ else if (Name.startswith("ge"))
+ Imm = 3;
+ else if (Name.startswith("eq"))
+ Imm = 4;
+ else if (Name.startswith("ne"))
+ Imm = 5;
+ else if (Name.startswith("true"))
+ Imm = 6;
+ else if (Name.startswith("false"))
+ Imm = 7;
+ else
+ llvm_unreachable("Unknown condition");
+
+ Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
+ Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
+ CI->getArgOperand(1), Builder.getInt8(Imm));
+ } else {
+ bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
+ if (Name == "llvm.x86.avx.vpermil.pd.256")
+ PD256 = true;
+ else if (Name == "llvm.x86.avx.vpermil.pd")
+ PD128 = true;
+ else if (Name == "llvm.x86.avx.vpermil.ps.256")
+ PS256 = true;
+ else if (Name == "llvm.x86.avx.vpermil.ps")
+ PS128 = true;
+
+ if (PD256 || PD128 || PS256 || PS128) {
+ Value *Op0 = CI->getArgOperand(0);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ SmallVector<Constant*, 8> Idxs;
+
+ if (PD128)
+ for (unsigned i = 0; i != 2; ++i)
+ Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
+ else if (PD256)
+ for (unsigned l = 0; l != 4; l+=2)
+ for (unsigned i = 0; i != 2; ++i)
+ Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
+ else if (PS128)
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
+ else if (PS256)
+ for (unsigned l = 0; l != 8; l+=4)
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
+ else
+ llvm_unreachable("Unexpected function");
+
+ Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
+ } else {
+ llvm_unreachable("Unknown function for CallInst upgrade.");
+ }
+ }
+
+ CI->replaceAllUsesWith(Rep);
+ CI->eraseFromParent();
+ return;
+ }
+
+ std::string Name = CI->getName().str();
+ CI->setName(Name + ".old");
+
+ switch (NewFn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unknown function for CallInst upgrade.");
+
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ assert(CI->getNumArgOperands() == 1 &&
+ "Mismatch between function args and call args");
+ CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+ Builder.getFalse(), Name));
+ CI->eraseFromParent();
+ return;
+
+ case Intrinsic::arm_neon_vclz: {
+ // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
+ CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+ Builder.getFalse(),
+ "llvm.ctlz." + Name.substr(14)));
+ CI->eraseFromParent();
+ return;
+ }
+ case Intrinsic::ctpop: {
+ CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
+ CI->eraseFromParent();
+ return;
+ }
+
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd:
+ CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
+ Name));
+ CI->eraseFromParent();
+ return;
+
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_sse41_ptestnzc: {
+ // The arguments for these intrinsics used to be v4f32, and changed
+ // to v2i64. This is purely a nop, since those are bitwise intrinsics.
+ // So, the only thing required is a bitcast for both arguments.
+ // First, check the arguments have the old type.
+ Value *Arg0 = CI->getArgOperand(0);
+ if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
+ return;
+
+ // Old intrinsic, add bitcasts
+ Value *Arg1 = CI->getArgOperand(1);
+
+ Value *BC0 =
+ Builder.CreateBitCast(Arg0,
+ VectorType::get(Type::getInt64Ty(C), 2),
+ "cast");
+ Value *BC1 =
+ Builder.CreateBitCast(Arg1,
+ VectorType::get(Type::getInt64Ty(C), 2),
+ "cast");
+
+ CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ return;
+ }
+ }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find
+// one we are interested in, we then upgrade all calls to reflect the new
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+ assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+ // Upgrade the function and check if it is a totaly new function.
+ Function *NewFn;
+ if (UpgradeIntrinsicFunction(F, NewFn)) {
+ if (NewFn != F) {
+ // Replace all uses to the old function with the new one if necessary.
+ for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+ UI != UE; ) {
+ if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, NewFn);
+ }
+ // Remove old function, no longer used, from the module.
+ F->eraseFromParent();
+ }
+ }
+}
+
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
new file mode 100644
index 000000000000..41e58ec5da2d
--- /dev/null
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -0,0 +1,371 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+ if (Function *F = getParent())
+ return &F->getValueSymbolTable();
+ return 0;
+}
+
+LLVMContext &BasicBlock::getContext() const {
+ return getType()->getContext();
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
+ BasicBlock *InsertBefore)
+ : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (InsertBefore) {
+ assert(NewParent &&
+ "Cannot insert block before another block with no function!");
+ NewParent->getBasicBlockList().insert(InsertBefore, this);
+ } else if (NewParent) {
+ NewParent->getBasicBlockList().push_back(this);
+ }
+
+ setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+ // If the address of the block is taken and it is being deleted (e.g. because
+ // it is dead), this means that there is either a dangling constant expr
+ // hanging off the block, or an undefined use of the block (source code
+ // expecting the address of a label to keep the block alive even though there
+ // is no indirect branch). Handle these cases by zapping the BlockAddress
+ // nodes. There are no other possible uses at this point.
+ if (hasAddressTaken()) {
+ assert(!use_empty() && "There should be at least one blockaddress!");
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+ while (!use_empty()) {
+ BlockAddress *BA = cast<BlockAddress>(use_back());
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+ }
+
+ assert(getParent() == 0 && "BasicBlock still linked into the program!");
+ dropAllReferences();
+ InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+
+ // Set Parent=parent, updating instruction symtab entries as appropriate.
+ InstList.setSymTabObject(&Parent, parent);
+
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+ getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+ getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+ MovePos->getParent()->getBasicBlockList().splice(MovePos,
+ getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+ Function::iterator I = MovePos;
+ MovePos->getParent()->getBasicBlockList().splice(++I,
+ getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+ BasicBlock::iterator i = begin();
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ while (isa<PHINode>(i)) ++i;
+ return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+ BasicBlock::iterator i = begin();
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+ return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ BasicBlock::iterator i = begin();
+ for (;; ++i) {
+ if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+ continue;
+
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+ if (!II)
+ break;
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ break;
+ }
+ return &*i;
+}
+
+BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
+ iterator InsertPt = getFirstNonPHI();
+ if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
+ return InsertPt;
+}
+
+void BasicBlock::dropAllReferences() {
+ for(iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *ThePred = *PI;
+ ++PI;
+ return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be
+/// multiple edges from the unique predecessor to this block (for example
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *PredBB = *PI;
+ ++PI;
+ for (;PI != E; ++PI) {
+ if (*PI != PredBB)
+ return 0;
+ // The same predecessor appears multiple times in the predecessor list.
+ // This is OK.
+ }
+ return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it. This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block. Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+ bool DontDeleteUselessPHIs) {
+ assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+ find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+ "removePredecessor: BB is not a predecessor!");
+
+ if (InstList.empty()) return;
+ PHINode *APN = dyn_cast<PHINode>(&front());
+ if (!APN) return; // Quick exit.
+
+ // If there are exactly two predecessors, then we want to nuke the PHI nodes
+ // altogether. However, we cannot do this, if this in this case:
+ //
+ // Loop:
+ // %x = phi [X, Loop]
+ // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1
+ // br Loop ;; %x2 does not dominate all uses
+ //
+ // This is because the PHI node input is actually taken from the predecessor
+ // basic block. The only case this can happen is with a self loop, so we
+ // check for this case explicitly now.
+ //
+ unsigned max_idx = APN->getNumIncomingValues();
+ assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+ if (max_idx == 2) {
+ BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+ // Disable PHI elimination!
+ if (this == Other) max_idx = 3;
+ }
+
+ // <= Two predecessors BEFORE I remove one?
+ if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+ // Yup, loop through and nuke the PHI nodes
+ while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+ // Remove the predecessor first.
+ PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+ // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+ if (max_idx == 2) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ // We are left with an infinite loop with no entries: kill the PHI.
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ getInstList().pop_front(); // Remove the PHI node
+ }
+
+ // If the PHI node already only had one entry, it got deleted by
+ // removeIncomingValue.
+ }
+ } else {
+ // Okay, now we know that we need to remove predecessor #pred_idx from all
+ // PHI nodes. Iterate over each PHI node fixing them up
+ PHINode *PN;
+ for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+ ++II;
+ PN->removeIncomingValue(Pred, false);
+ // If all incoming values to the Phi are the same, we can replace the Phi
+ // with that value.
+ Value* PNV = 0;
+ if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+ if (PNV != PN) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction. Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator. This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+ assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+ assert(I != InstList.end() &&
+ "Trying to get me to create degenerate basic block!");
+
+ BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
+ .getNodePtrUnchecked();
+ BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+ getParent(), InsertBefore);
+
+ // Move all of the specified instructions from the original basic block into
+ // the new basic block.
+ New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+ // Add a branch instruction to the newly formed basic block.
+ BranchInst::Create(New, this);
+
+ // Now we must loop through all of the successors of the New block (which
+ // _were_ the successors of the 'this' block), and update any PHI nodes in
+ // successors. If there were PHI nodes in the successors, then they need to
+ // know that incoming branches will be from New, not from Old.
+ //
+ for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+ // Loop over any phi nodes in the basic block, updating the BB field of
+ // incoming values...
+ BasicBlock *Successor = *I;
+ PHINode *PN;
+ for (BasicBlock::iterator II = Successor->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ int IDX = PN->getBasicBlockIndex(this);
+ while (IDX != -1) {
+ PN->setIncomingBlock((unsigned)IDX, New);
+ IDX = PN->getBasicBlockIndex(this);
+ }
+ }
+ }
+ return New;
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+ TerminatorInst *TI = getTerminator();
+ if (!TI)
+ // Cope with being called on a BasicBlock that doesn't have a terminator
+ // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+ return;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ // N.B. Succ might not be a complete BasicBlock, so don't assume
+ // that it ends with a non-phi instruction.
+ for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
+ PHINode *PN = dyn_cast<PHINode>(II);
+ if (!PN)
+ break;
+ int i;
+ while ((i = PN->getBasicBlockIndex(this)) >= 0)
+ PN->setIncomingBlock(i, New);
+ }
+ }
+}
+
+/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
+/// the destination of the 'unwind' edge of an invoke instruction.
+bool BasicBlock::isLandingPad() const {
+ return isa<LandingPadInst>(getFirstNonPHI());
+}
+
+/// getLandingPadInst() - Return the landingpad instruction associated with
+/// the landing pad.
+LandingPadInst *BasicBlock::getLandingPadInst() {
+ return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
+const LandingPadInst *BasicBlock::getLandingPadInst() const {
+ return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
new file mode 100644
index 000000000000..bf93d4f95663
--- /dev/null
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -0,0 +1,2074 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM. This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// pieces that don't need DataLayout, and the pieces that do. This is to avoid
+// a dependence in IR on Target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified vector Constant node to the
+/// specified vector type. At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
+
+ if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+ if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
+ // If this cast changes element count then we can't handle it here:
+ // doing so requires endianness information. This should be handled by
+ // Analysis/ConstantFolding.cpp
+ unsigned NumElts = DstTy->getNumElements();
+ if (NumElts != CV->getType()->getVectorNumElements())
+ return 0;
+
+ Type *DstEltTy = DstTy->getElementType();
+
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(CV->getContext(), 32);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C =
+ ConstantExpr::getExtractElement(CV, ConstantInt::get(Ty, i));
+ C = ConstantExpr::getBitCast(C, DstEltTy);
+ Result.push_back(C);
+ }
+
+ return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+ unsigned opc, ///< opcode of the second cast constant expression
+ ConstantExpr *Op, ///< the first cast constant expression
+ Type *DstTy ///< desintation type of the first cast
+) {
+ assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+ assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+ assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+ // The the types and opcodes for the two Cast constant expressions
+ Type *SrcTy = Op->getOperand(0)->getType();
+ Type *MidTy = Op->getType();
+ Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+ // Assume that pointers are never more than 64 bits wide.
+ IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext());
+
+ // Let CastInst::isEliminableCastPair do the heavy lifting.
+ return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+ FakeIntPtrTy, FakeIntPtrTy,
+ FakeIntPtrTy);
+}
+
+static Constant *FoldBitCast(Constant *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy == DestTy)
+ return V; // no-op cast
+
+ // Check to see if we are casting a pointer to an aggregate to a pointer to
+ // the first element. If so, return the appropriate GEP instruction.
+ if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+ if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+ if (PTy->getAddressSpace() == DPTy->getAddressSpace()
+ && DPTy->getElementType()->isSized()) {
+ SmallVector<Value*, 8> IdxList;
+ Value *Zero =
+ Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
+ IdxList.push_back(Zero);
+ Type *ElTy = PTy->getElementType();
+ while (ElTy != DPTy->getElementType()) {
+ if (StructType *STy = dyn_cast<StructType>(ElTy)) {
+ if (STy->getNumElements() == 0) break;
+ ElTy = STy->getElementType(0);
+ IdxList.push_back(Zero);
+ } else if (SequentialType *STy =
+ dyn_cast<SequentialType>(ElTy)) {
+ if (ElTy->isPointerTy()) break; // Can't index into pointers!
+ ElTy = STy->getElementType();
+ IdxList.push_back(Zero);
+ } else {
+ break;
+ }
+ }
+
+ if (ElTy == DPTy->getElementType())
+ // This GEP is inbounds because all indices are zero.
+ return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
+ }
+
+ // Handle casts from one vector constant to another. We know that the src
+ // and dest type have the same size (otherwise its an illegal cast).
+ if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+ assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+ "Not cast between same sized vectors!");
+ SrcTy = NULL;
+ // First, check for null. Undef is already handled.
+ if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(DestTy);
+
+ // Handle ConstantVector and ConstantAggregateVector.
+ return BitCastConstantVector(V, DestPTy);
+ }
+
+ // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+ // This allows for other simplifications (although some of them
+ // can only be handled by Analysis/ConstantFolding.cpp).
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+ return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+ }
+
+ // Finally, implement bitcast folding now. The code below doesn't handle
+ // bitcast right.
+ if (isa<ConstantPointerNull>(V)) // ptr->ptr cast.
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+ // Handle integral constant input.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (DestTy->isIntegerTy())
+ // Integral -> Integral. This is a no-op because the bit widths must
+ // be the same. Consequently, we just fold to V.
+ return V;
+
+ if (DestTy->isFloatingPointTy())
+ return ConstantFP::get(DestTy->getContext(),
+ APFloat(DestTy->getFltSemantics(),
+ CI->getValue()));
+
+ // Otherwise, can't fold this (vector?)
+ return 0;
+ }
+
+ // Handle ConstantFP input: FP -> Integral.
+ if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+ return ConstantInt::get(FP->getContext(),
+ FP->getValueAPF().bitcastToAPInt());
+
+ return 0;
+}
+
+
+/// ExtractConstantBytes - V is an integer constant which only has a subset of
+/// its bytes used. The bytes used are indicated by ByteStart (which is the
+/// first byte used, counting from the least significant byte) and ByteSize,
+/// which is the number of bytes used.
+///
+/// This function analyzes the specified constant to see if the specified byte
+/// range can be returned as a simplified constant. If so, the constant is
+/// returned, otherwise null is returned.
+///
+static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
+ unsigned ByteSize) {
+ assert(C->getType()->isIntegerTy() &&
+ (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
+ "Non-byte sized integer input");
+ unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+ assert(ByteSize && "Must be accessing some piece");
+ assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
+ assert(ByteSize != CSize && "Should not extract everything");
+
+ // Constant Integers are simple.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ APInt V = CI->getValue();
+ if (ByteStart)
+ V = V.lshr(ByteStart*8);
+ V = V.trunc(ByteSize*8);
+ return ConstantInt::get(CI->getContext(), V);
+ }
+
+ // In the input is a constant expr, we might be able to recursively simplify.
+ // If not, we definitely can't do anything.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (CE == 0) return 0;
+
+ switch (CE->getOpcode()) {
+ default: return 0;
+ case Instruction::Or: {
+ Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+ if (RHS == 0)
+ return 0;
+
+ // X | -1 -> -1.
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
+ if (RHSC->isAllOnesValue())
+ return RHSC;
+
+ Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+ if (LHS == 0)
+ return 0;
+ return ConstantExpr::getOr(LHS, RHS);
+ }
+ case Instruction::And: {
+ Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+ if (RHS == 0)
+ return 0;
+
+ // X & 0 -> 0.
+ if (RHS->isNullValue())
+ return RHS;
+
+ Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+ if (LHS == 0)
+ return 0;
+ return ConstantExpr::getAnd(LHS, RHS);
+ }
+ case Instruction::LShr: {
+ ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+ if (Amt == 0)
+ return 0;
+ unsigned ShAmt = Amt->getZExtValue();
+ // Cannot analyze non-byte shifts.
+ if ((ShAmt & 7) != 0)
+ return 0;
+ ShAmt >>= 3;
+
+ // If the extract is known to be all zeros, return zero.
+ if (ByteStart >= CSize-ShAmt)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+ // If the extract is known to be fully in the input, extract it.
+ if (ByteStart+ByteSize+ShAmt <= CSize)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+
+ case Instruction::Shl: {
+ ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+ if (Amt == 0)
+ return 0;
+ unsigned ShAmt = Amt->getZExtValue();
+ // Cannot analyze non-byte shifts.
+ if ((ShAmt & 7) != 0)
+ return 0;
+ ShAmt >>= 3;
+
+ // If the extract is known to be all zeros, return zero.
+ if (ByteStart+ByteSize <= ShAmt)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+ // If the extract is known to be fully in the input, extract it.
+ if (ByteStart >= ShAmt)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+
+ case Instruction::ZExt: {
+ unsigned SrcBitSize =
+ cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
+
+ // If extracting something that is completely zero, return 0.
+ if (ByteStart*8 >= SrcBitSize)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+
+ // If exactly extracting the input, return it.
+ if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
+ return CE->getOperand(0);
+
+ // If extracting something completely in the input, if if the input is a
+ // multiple of 8 bits, recurse.
+ if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
+
+ // Otherwise, if extracting a subset of the input, which is not multiple of
+ // 8 bits, do a shift and trunc to get the bits.
+ if ((ByteStart+ByteSize)*8 < SrcBitSize) {
+ assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
+ Constant *Res = CE->getOperand(0);
+ if (ByteStart)
+ Res = ConstantExpr::getLShr(Res,
+ ConstantInt::get(Res->getType(), ByteStart*8));
+ return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
+ ByteSize*8));
+ }
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+ }
+}
+
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
+ bool Folded) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+ Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+ return ConstantExpr::getNUWMul(E, N);
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has size zero.
+ if (NumElems == 0)
+ return ConstantExpr::getNullValue(DestTy);
+ // Check for a struct with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame) {
+ Constant *N = ConstantInt::get(DestTy, NumElems);
+ return ConstantExpr::getNUWMul(MemberSize, N);
+ }
+ }
+
+ // Pointer size doesn't depend on the pointee type, so canonicalize them
+ // to an arbitrary pointee.
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (!PTy->getElementType()->isIntegerTy(1))
+ return
+ getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace()),
+ DestTy, true);
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular sizeof expression.
+ Constant *C = ConstantExpr::getSizeOf(Ty);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
+ bool Folded) {
+ // The alignment of an array is equal to the alignment of the
+ // array element. Note that this is not always true for vectors.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy,
+ false),
+ C, DestTy);
+ return C;
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ // Packed structs always have an alignment of 1.
+ if (STy->isPacked())
+ return ConstantInt::get(DestTy, 1);
+
+ // Otherwise, struct alignment is the maximum alignment of any member.
+ // Without target data, we can't compare much, but we can check to see
+ // if all the members have the same alignment.
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has minimal alignment.
+ if (NumElems == 0)
+ return ConstantInt::get(DestTy, 1);
+ // Check for a struct with all members having the same alignment.
+ Constant *MemberAlign =
+ getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberAlign;
+ }
+
+ // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+ // to an arbitrary pointee.
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (!PTy->getElementType()->isIntegerTy(1))
+ return
+ getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+ 1),
+ PTy->getAddressSpace()),
+ DestTy, true);
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular alignof expression.
+ Constant *C = ConstantExpr::getAlignOf(Ty);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
+ Type *DestTy,
+ bool Folded) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+ DestTy, false),
+ FieldNo, DestTy);
+ Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+ return ConstantExpr::getNUWMul(E, N);
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has no members.
+ if (NumElems == 0)
+ return 0;
+ // Check for a struct with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame) {
+ Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+ false,
+ DestTy,
+ false),
+ FieldNo, DestTy);
+ return ConstantExpr::getNUWMul(MemberSize, N);
+ }
+ }
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular offsetof expression.
+ Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
+ Type *DestTy) {
+ if (isa<UndefValue>(V)) {
+ // zext(undef) = 0, because the top bits will be zero.
+ // sext(undef) = 0, because the top bits will all be the same.
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+ opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+ return Constant::getNullValue(DestTy);
+ return UndefValue::get(DestTy);
+ }
+
+ if (V->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+
+ // If the cast operand is a constant expression, there's a few things we can
+ // do to try to simplify it.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->isCast()) {
+ // Try hard to fold cast of cast because they are often eliminable.
+ if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+ return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+ } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // If all of the indexes in the GEP are null values, there is no pointer
+ // adjustment going on. We might as well cast the source pointer.
+ bool isAllNull = true;
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!CE->getOperand(i)->isNullValue()) {
+ isAllNull = false;
+ break;
+ }
+ if (isAllNull)
+ // This is casting one pointer type to another, always BitCast
+ return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+ }
+ }
+
+ // If the cast operand is a constant vector, perform the cast by
+ // operating on each element. In the cast of bitcasts, the element
+ // count may be mismatched; don't attempt to handle that here.
+ if ((isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) &&
+ DestTy->isVectorTy() &&
+ DestTy->getVectorNumElements() == V->getType()->getVectorNumElements()) {
+ SmallVector<Constant*, 16> res;
+ VectorType *DestVecTy = cast<VectorType>(DestTy);
+ Type *DstEltTy = DestVecTy->getElementType();
+ Type *Ty = IntegerType::get(V->getContext(), 32);
+ for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i) {
+ Constant *C =
+ ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
+ res.push_back(ConstantExpr::getCast(opc, C, DstEltTy));
+ }
+ return ConstantVector::get(res);
+ }
+
+ // We actually have to do a cast now. Perform the cast according to the
+ // opcode specified.
+ switch (opc) {
+ default:
+ llvm_unreachable("Failed to cast constant expression");
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ bool ignored;
+ APFloat Val = FPC->getValueAPF();
+ Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
+ DestTy->isFloatTy() ? APFloat::IEEEsingle :
+ DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+ DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+ DestTy->isFP128Ty() ? APFloat::IEEEquad :
+ DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble :
+ APFloat::Bogus,
+ APFloat::rmNearestTiesToEven, &ignored);
+ return ConstantFP::get(V->getContext(), Val);
+ }
+ return 0; // Can't fold.
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ const APFloat &V = FPC->getValueAPF();
+ bool ignored;
+ uint64_t x[2];
+ uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ APInt Val(DestBitWidth, x);
+ return ConstantInt::get(FPC->getContext(), Val);
+ }
+ return 0; // Can't fold.
+ case Instruction::IntToPtr: //always treated as unsigned
+ if (V->isNullValue()) // Is it an integral null value?
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+ return 0; // Other pointer types cannot be casted
+ case Instruction::PtrToInt: // always treated as unsigned
+ // Is it a null pointer value?
+ if (V->isNullValue())
+ return ConstantInt::get(DestTy, 0);
+ // If this is a sizeof-like expression, pull out multiplications by
+ // known factors to expose them to subsequent folding. If it's an
+ // alignof-like expression, factor out known factors.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue()) {
+ Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ if (CE->getNumOperands() == 2) {
+ // Handle a sizeof-like expression.
+ Constant *Idx = CE->getOperand(1);
+ bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+ if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+ Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+ DestTy, false),
+ Idx, DestTy);
+ return ConstantExpr::getMul(C, Idx);
+ }
+ } else if (CE->getNumOperands() == 3 &&
+ CE->getOperand(1)->isNullValue()) {
+ // Handle an alignof-like expression.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+ if (CI->isOne() &&
+ STy->getNumElements() == 2 &&
+ STy->getElementType(0)->isIntegerTy(1)) {
+ return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+ }
+ }
+ // Handle an offsetof-like expression.
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
+ if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+ DestTy, false))
+ return C;
+ }
+ }
+ }
+ // Other pointer types cannot be casted
+ return 0;
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt api = CI->getValue();
+ APFloat apf(DestTy->getFltSemantics(),
+ APInt::getNullValue(DestTy->getPrimitiveSizeInBits()));
+ (void)apf.convertFromAPInt(api,
+ opc==Instruction::SIToFP,
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(V->getContext(), apf);
+ }
+ return 0;
+ case Instruction::ZExt:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().zext(BitWidth));
+ }
+ return 0;
+ case Instruction::SExt:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().sext(BitWidth));
+ }
+ return 0;
+ case Instruction::Trunc: {
+ uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().trunc(DestBitWidth));
+ }
+
+ // The input must be a constantexpr. See if we can simplify this based on
+ // the bytes we are demanding. Only do this if the source and dest are an
+ // even multiple of a byte.
+ if ((DestBitWidth & 7) == 0 &&
+ (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
+ if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
+ return Res;
+
+ return 0;
+ }
+ case Instruction::BitCast:
+ return FoldBitCast(V, DestTy);
+ }
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
+ Constant *V1, Constant *V2) {
+ // Check for i1 and vector true/false conditions.
+ if (Cond->isNullValue()) return V2;
+ if (Cond->isAllOnesValue()) return V1;
+
+ // If the condition is a vector constant, fold the result elementwise.
+ if (ConstantVector *CondV = dyn_cast<ConstantVector>(Cond)) {
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(CondV->getContext(), 32);
+ for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){
+ ConstantInt *Cond = dyn_cast<ConstantInt>(CondV->getOperand(i));
+ if (Cond == 0) break;
+
+ Constant *V = Cond->isNullValue() ? V2 : V1;
+ Constant *Res = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
+ Result.push_back(Res);
+ }
+
+ // If we were able to build the vector, return it.
+ if (Result.size() == V1->getType()->getVectorNumElements())
+ return ConstantVector::get(Result);
+ }
+
+ if (isa<UndefValue>(Cond)) {
+ if (isa<UndefValue>(V1)) return V1;
+ return V2;
+ }
+ if (isa<UndefValue>(V1)) return V2;
+ if (isa<UndefValue>(V2)) return V1;
+ if (V1 == V2) return V1;
+
+ if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+ if (TrueVal->getOpcode() == Instruction::Select)
+ if (TrueVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+ }
+ if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+ if (FalseVal->getOpcode() == Instruction::Select)
+ if (FalseVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+ }
+
+ return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
+ Constant *Idx) {
+ if (isa<UndefValue>(Val)) // ee(undef, x) -> undef
+ return UndefValue::get(Val->getType()->getVectorElementType());
+ if (Val->isNullValue()) // ee(zero, x) -> zero
+ return Constant::getNullValue(Val->getType()->getVectorElementType());
+ // ee({w,x,y,z}, undef) -> undef
+ if (isa<UndefValue>(Idx))
+ return UndefValue::get(Val->getType()->getVectorElementType());
+
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+ uint64_t Index = CIdx->getZExtValue();
+ // ee({w,x,y,z}, wrong_value) -> undef
+ if (Index >= Val->getType()->getVectorNumElements())
+ return UndefValue::get(Val->getType()->getVectorElementType());
+ return Val->getAggregateElement(Index);
+ }
+ return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
+ Constant *Elt,
+ Constant *Idx) {
+ ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+ if (!CIdx) return 0;
+ const APInt &IdxVal = CIdx->getValue();
+
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(Val->getContext(), 32);
+ for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
+ if (i == IdxVal) {
+ Result.push_back(Elt);
+ continue;
+ }
+
+ Constant *C =
+ ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i));
+ Result.push_back(C);
+ }
+
+ return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
+ Constant *V2,
+ Constant *Mask) {
+ unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+ Type *EltTy = V1->getType()->getVectorElementType();
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(Mask))
+ return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
+
+ // Don't break the bitcode reader hack.
+ if (isa<ConstantExpr>(Mask)) return 0;
+
+ unsigned SrcNumElts = V1->getType()->getVectorNumElements();
+
+ // Loop over the shuffle mask, evaluating each element.
+ SmallVector<Constant*, 32> Result;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Elt = ShuffleVectorInst::getMaskValue(Mask, i);
+ if (Elt == -1) {
+ Result.push_back(UndefValue::get(EltTy));
+ continue;
+ }
+ Constant *InElt;
+ if (unsigned(Elt) >= SrcNumElts*2)
+ InElt = UndefValue::get(EltTy);
+ else if (unsigned(Elt) >= SrcNumElts) {
+ Type *Ty = IntegerType::get(V2->getContext(), 32);
+ InElt =
+ ConstantExpr::getExtractElement(V2,
+ ConstantInt::get(Ty, Elt - SrcNumElts));
+ } else {
+ Type *Ty = IntegerType::get(V1->getContext(), 32);
+ InElt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, Elt));
+ }
+ Result.push_back(InElt);
+ }
+
+ return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
+ ArrayRef<unsigned> Idxs) {
+ // Base case: no indices, so return the entire value.
+ if (Idxs.empty())
+ return Agg;
+
+ if (Constant *C = Agg->getAggregateElement(Idxs[0]))
+ return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
+
+ return 0;
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
+ Constant *Val,
+ ArrayRef<unsigned> Idxs) {
+ // Base case: no indices, so replace the entire value.
+ if (Idxs.empty())
+ return Val;
+
+ unsigned NumElts;
+ if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+ NumElts = ST->getNumElements();
+ else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+ NumElts = AT->getNumElements();
+ else
+ NumElts = Agg->getType()->getVectorNumElements();
+
+ SmallVector<Constant*, 32> Result;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C = Agg->getAggregateElement(i);
+ if (C == 0) return 0;
+
+ if (Idxs[0] == i)
+ C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
+
+ Result.push_back(C);
+ }
+
+ if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+ return ConstantStruct::get(ST, Result);
+ if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+ return ConstantArray::get(AT, Result);
+ return ConstantVector::get(Result);
+}
+
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+ Constant *C1, Constant *C2) {
+ // Handle UndefValue up front.
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ switch (Opcode) {
+ case Instruction::Xor:
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return Constant::getNullValue(C1->getType());
+ // Fallthrough
+ case Instruction::Add:
+ case Instruction::Sub:
+ return UndefValue::get(C1->getType());
+ case Instruction::And:
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+ return C1;
+ return Constant::getNullValue(C1->getType()); // undef & X -> 0
+ case Instruction::Mul: {
+ ConstantInt *CI;
+ // X * undef -> undef if X is odd or undef
+ if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+ ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+ (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+ return UndefValue::get(C1->getType());
+
+ // X * undef -> 0 otherwise
+ return Constant::getNullValue(C1->getType());
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ // undef / 1 -> undef
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+ if (CI2->isOne())
+ return C1;
+ // FALL THROUGH
+ case Instruction::URem:
+ case Instruction::SRem:
+ if (!isa<UndefValue>(C2)) // undef / X -> 0
+ return Constant::getNullValue(C1->getType());
+ return C2; // X / undef -> undef
+ case Instruction::Or: // X | undef -> -1
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+ return C1;
+ return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
+ case Instruction::LShr:
+ if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+ return C1; // undef lshr undef -> undef
+ return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+ // undef lshr X -> 0
+ case Instruction::AShr:
+ if (!isa<UndefValue>(C2)) // undef ashr X --> all ones
+ return Constant::getAllOnesValue(C1->getType());
+ else if (isa<UndefValue>(C1))
+ return C1; // undef ashr undef -> undef
+ else
+ return C1; // X ashr undef --> X
+ case Instruction::Shl:
+ if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+ return C1; // undef shl undef -> undef
+ // undef << X -> 0 or X << undef -> 0
+ return Constant::getNullValue(C1->getType());
+ }
+ }
+
+ // Handle simplifications when the RHS is a constant int.
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ switch (Opcode) {
+ case Instruction::Add:
+ if (CI2->equalsInt(0)) return C1; // X + 0 == X
+ break;
+ case Instruction::Sub:
+ if (CI2->equalsInt(0)) return C1; // X - 0 == X
+ break;
+ case Instruction::Mul:
+ if (CI2->equalsInt(0)) return C2; // X * 0 == 0
+ if (CI2->equalsInt(1))
+ return C1; // X * 1 == X
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ if (CI2->equalsInt(1))
+ return C1; // X / 1 == X
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X / 0 == undef
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ if (CI2->equalsInt(1))
+ return Constant::getNullValue(CI2->getType()); // X % 1 == 0
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X % 0 == undef
+ break;
+ case Instruction::And:
+ if (CI2->isZero()) return C2; // X & 0 == 0
+ if (CI2->isAllOnesValue())
+ return C1; // X & -1 == X
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+ if (CE1->getOpcode() == Instruction::ZExt) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth =
+ CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+ if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+ return C1;
+ }
+
+ // If and'ing the address of a global with a constant, fold it.
+ if (CE1->getOpcode() == Instruction::PtrToInt &&
+ isa<GlobalValue>(CE1->getOperand(0))) {
+ GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+ // Functions are at least 4-byte aligned.
+ unsigned GVAlign = GV->getAlignment();
+ if (isa<Function>(GV))
+ GVAlign = std::max(GVAlign, 4U);
+
+ if (GVAlign > 1) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+ APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+ // If checking bits we know are clear, return zero.
+ if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+ return Constant::getNullValue(CI2->getType());
+ }
+ }
+ }
+ break;
+ case Instruction::Or:
+ if (CI2->equalsInt(0)) return C1; // X | 0 == X
+ if (CI2->isAllOnesValue())
+ return C2; // X | -1 == -1
+ break;
+ case Instruction::Xor:
+ if (CI2->equalsInt(0)) return C1; // X ^ 0 == X
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ switch (CE1->getOpcode()) {
+ default: break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ // cmp pred ^ true -> cmp !pred
+ assert(CI2->equalsInt(1));
+ CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+ pred = CmpInst::getInversePredicate(pred);
+ return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+ CE1->getOperand(1));
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+ if (CE1->getOpcode() == Instruction::ZExt) // Top bits known zero.
+ return ConstantExpr::getLShr(C1, C2);
+ break;
+ }
+ } else if (isa<ConstantInt>(C1)) {
+ // If C1 is a ConstantInt and C2 is not, swap the operands.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantExpr::get(Opcode, C2, C1);
+ }
+
+ // At this point we know neither constant is an UndefValue.
+ if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ const APInt &C1V = CI1->getValue();
+ const APInt &C2V = CI2->getValue();
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ return ConstantInt::get(CI1->getContext(), C1V + C2V);
+ case Instruction::Sub:
+ return ConstantInt::get(CI1->getContext(), C1V - C2V);
+ case Instruction::Mul:
+ return ConstantInt::get(CI1->getContext(), C1V * C2V);
+ case Instruction::UDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
+ case Instruction::SDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef
+ return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
+ case Instruction::URem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
+ case Instruction::SRem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef
+ return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
+ case Instruction::And:
+ return ConstantInt::get(CI1->getContext(), C1V & C2V);
+ case Instruction::Or:
+ return ConstantInt::get(CI1->getContext(), C1V | C2V);
+ case Instruction::Xor:
+ return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
+ case Instruction::Shl: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::LShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::AShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ if (CI1->equalsInt(0)) return C1;
+ break;
+ default:
+ break;
+ }
+ } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+ if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+ APFloat C1V = CFP1->getValueAPF();
+ APFloat C2V = CFP2->getValueAPF();
+ APFloat C3V = C1V; // copy for modification
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::FAdd:
+ (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FSub:
+ (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FMul:
+ (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FDiv:
+ (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FRem:
+ (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ }
+ }
+ } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+ // Perform elementwise folding.
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(VTy->getContext(), 32);
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *LHS =
+ ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
+ Constant *RHS =
+ ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+
+ Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
+ }
+
+ return ConstantVector::get(Result);
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ // There are many possible foldings we could do here. We should probably
+ // at least fold add of a pointer with an integer into the appropriate
+ // getelementptr. This will improve alias analysis a bit.
+
+ // Given ((a + b) + c), if (b + c) folds to something interesting, return
+ // (a + (b + c)).
+ if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
+ Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+ if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+ return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+ }
+ } else if (isa<ConstantExpr>(C2)) {
+ // If C2 is a constant expr and C1 isn't, flop them around and fold the
+ // other way if possible.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+ }
+
+ // i1 can be simplified in many cases.
+ if (C1->getType()->isIntegerTy(1)) {
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ return ConstantExpr::getXor(C1, C2);
+ case Instruction::Mul:
+ return ConstantExpr::getAnd(C1, C2);
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // We can assume that C2 == 0. If it were one the result would be
+ // undefined because the shift value is as large as the bitwidth.
+ return C1;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // We can assume that C2 == 1. If it were zero the result would be
+ // undefined through division by zero.
+ return C1;
+ case Instruction::URem:
+ case Instruction::SRem:
+ // We can assume that C2 == 1. If it were zero the result would be
+ // undefined through division by zero.
+ return ConstantInt::getFalse(C1->getContext());
+ default:
+ break;
+ }
+ }
+
+ // We don't know how to fold this.
+ return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types. The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(Type *Ty) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (STy->isOpaque()) return true; // Can't say.
+
+ // If all of elements have zero size, this does too.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+ return true;
+
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ return isMaybeZeroSizedType(ATy->getElementType());
+ }
+ return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices. This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0. If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1. If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
+ if (C1 == C2) return 0;
+
+ // Ok, we found a different index. If they are not ConstantInt, we can't do
+ // anything with them.
+ if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+ return -2; // don't know!
+
+ // Ok, we have two differing integer indices. Sign extend them to be the same
+ // type. Long is always big enough, so we use it.
+ if (!C1->getType()->isIntegerTy(64))
+ C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+
+ if (!C2->getType()->isIntegerTy(64))
+ C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+
+ if (C1 == C2) return 0; // They are equal
+
+ // If the type being indexed over is really just a zero sized type, there is
+ // no pointer difference being made here.
+ if (isMaybeZeroSizedType(ElTy))
+ return -2; // dunno.
+
+ // If they are really different, now that they are the same type, then we
+ // found a difference!
+ if (cast<ConstantInt>(C1)->getSExtValue() <
+ cast<ConstantInt>(C2)->getSExtValue())
+ return -1;
+ else
+ return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to
+/// each other, we should return the corresponding FCmpInst predicate,
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare values of different types!");
+
+ // Handle degenerate case quickly
+ if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+ if (!isa<ConstantExpr>(V1)) {
+ if (!isa<ConstantExpr>(V2)) {
+ // We distilled thisUse the standard constant folder for a few cases
+ ConstantInt *R = 0;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OEQ;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OLT;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OGT;
+
+ // Nothing more we can do
+ return FCmpInst::BAD_FCMP_PREDICATE;
+ }
+
+ // If the first operand is simple and second is ConstantExpr, swap operands.
+ FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+ if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+ return FCmpInst::getSwappedPredicate(SwappedRelation);
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr or a simple constant.
+ ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ switch (CE1->getOpcode()) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ // We might be able to do something with these but we don't right now.
+ break;
+ default:
+ break;
+ }
+ }
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues. If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
+ bool isSigned) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare different types of values!");
+ if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+ if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+ !isa<BlockAddress>(V1)) {
+ if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+ !isa<BlockAddress>(V2)) {
+ // We distilled this down to a simple case, use the standard constant
+ // folder.
+ ConstantInt *R = 0;
+ ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+
+ // If we couldn't figure it out, bail.
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // If the first operand is simple, swap operands.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
+ if (isa<ConstantExpr>(V2)) { // Swap as necessary.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+ // constant (which, since the types must match, means that it's a
+ // ConstantPointerNull).
+ if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+ // Don't try to decide equality of aliases.
+ if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+ if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+ return ICmpInst::ICMP_NE;
+ } else if (isa<BlockAddress>(V2)) {
+ return ICmpInst::ICMP_NE; // Globals never equal labels.
+ } else {
+ assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+ // GlobalVals can never be null unless they have external weak linkage.
+ // We don't try to evaluate aliases here.
+ if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
+ return ICmpInst::ICMP_NE;
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+ if (isa<ConstantExpr>(V2)) { // Swap as necessary.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+ // constant (which, since the types must match, means that it is a
+ // ConstantPointerNull).
+ if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+ // Block address in another function can't equal this one, but block
+ // addresses in the current function might be the same if blocks are
+ // empty.
+ if (BA2->getFunction() != BA->getFunction())
+ return ICmpInst::ICMP_NE;
+ } else {
+ // Block addresses aren't null, don't equal the address of globals.
+ assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+ "Canonicalization guarantee!");
+ return ICmpInst::ICMP_NE;
+ }
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr, a global, block address, or a simple constant.
+ ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ Constant *CE1Op0 = CE1->getOperand(0);
+
+ switch (CE1->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ break; // We can't evaluate floating point casts or truncations.
+
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::BitCast:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the cast is not actually changing bits, and the second operand is a
+ // null pointer, do the comparison with the pre-casted value.
+ if (V2->isNullValue() &&
+ (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
+ if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+ if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+ return evaluateICmpRelation(CE1Op0,
+ Constant::getNullValue(CE1Op0->getType()),
+ isSigned);
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ // Ok, since this is a getelementptr, we know that the constant has a
+ // pointer type. Check the various cases.
+ if (isa<ConstantPointerNull>(V2)) {
+ // If we are comparing a GEP to a null pointer, check to see if the base
+ // of the GEP equals the null pointer.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (GV->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing that
+ // to null pointer so its greater-or-equal
+ return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is greater-than
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else if (isa<ConstantPointerNull>(CE1Op0)) {
+ // If we are indexing from a null pointer, check to see if we have any
+ // non-zero indices.
+ for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+ if (!CE1->getOperand(i)->isNullValue())
+ // Offsetting from null, must not be equal.
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ // Only zero indexes from null, must still be zero.
+ return ICmpInst::ICMP_EQ;
+ }
+ // Otherwise, we can't really say if the first operand is null or not.
+ } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+ if (isa<ConstantPointerNull>(CE1Op0)) {
+ if (GV2->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing it to
+ // a null pointer, so its less-or-equal
+ return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is less-than
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (GV == GV2) {
+ // If this is a getelementptr of the same global, then it must be
+ // different. Because the types must match, the getelementptr could
+ // only have at most one index, and because we fold getelementptr's
+ // with a single zero index, it must be nonzero.
+ assert(CE1->getNumOperands() == 2 &&
+ !CE1->getOperand(1)->isNullValue() &&
+ "Surprising getelementptr!");
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else {
+ // If they are different globals, we don't know what the value is.
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+ }
+ } else {
+ ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+ Constant *CE2Op0 = CE2->getOperand(0);
+
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ switch (CE2->getOpcode()) {
+ default: break;
+ case Instruction::GetElementPtr:
+ // By far the most common case to handle is when the base pointers are
+ // obviously to the same global.
+ if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+ if (CE1Op0 != CE2Op0) // Don't know relative ordering.
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ // Ok, we know that both getelementptr instructions are based on the
+ // same global. From this, we can precisely determine the relative
+ // ordering of the resultant pointers.
+ unsigned i = 1;
+
+ // The logic below assumes that the result of the comparison
+ // can be determined by finding the first index that differs.
+ // This doesn't work if there is over-indexing in any
+ // subsequent indices, so check for that case first.
+ if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+ !CE2->isGEPWithNoNotionalOverIndexing())
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
+ // Compare all of the operands the GEP's have in common.
+ gep_type_iterator GTI = gep_type_begin(CE1);
+ for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+ ++i, ++GTI)
+ switch (IdxCompare(CE1->getOperand(i),
+ CE2->getOperand(i), GTI.getIndexedType())) {
+ case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+ case 1: return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+ case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Ok, we ran out of things they have in common. If any leftovers
+ // are non-zero then we have a difference, otherwise we are equal.
+ for (; i < CE1->getNumOperands(); ++i)
+ if (!CE1->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE1->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+
+ for (; i < CE2->getNumOperands(); ++i)
+ if (!CE2->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE2->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+ return ICmpInst::ICMP_EQ;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
+ Constant *C1, Constant *C2) {
+ Type *ResultTy;
+ if (VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+ ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+ VT->getNumElements());
+ else
+ ResultTy = Type::getInt1Ty(C1->getContext());
+
+ // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+ if (pred == FCmpInst::FCMP_FALSE)
+ return Constant::getNullValue(ResultTy);
+
+ if (pred == FCmpInst::FCMP_TRUE)
+ return Constant::getAllOnesValue(ResultTy);
+
+ // Handle some degenerate cases first
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Also, if both operands are undef, we can return undef.
+ if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+ (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+ return UndefValue::get(ResultTy);
+ // Otherwise, pick the same value as the non-undef operand, and fold
+ // it to true or false.
+ return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+ }
+
+ // icmp eq/ne(null,GV) -> false/true
+ if (C1->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(C1->getContext());
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(C1->getContext());
+ }
+ // icmp eq/ne(GV,null) -> false/true
+ } else if (C2->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(C1->getContext());
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(C1->getContext());
+ }
+ }
+
+ // If the comparison is a comparison between two i1's, simplify it.
+ if (C1->getType()->isIntegerTy(1)) {
+ switch(pred) {
+ case ICmpInst::ICMP_EQ:
+ if (isa<ConstantInt>(C2))
+ return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+ return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+ case ICmpInst::ICMP_NE:
+ return ConstantExpr::getXor(C1, C2);
+ default:
+ break;
+ }
+ }
+
+ if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+ APInt V1 = cast<ConstantInt>(C1)->getValue();
+ APInt V2 = cast<ConstantInt>(C2)->getValue();
+ switch (pred) {
+ default: llvm_unreachable("Invalid ICmp Predicate");
+ case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2);
+ case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2);
+ case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+ case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+ case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+ case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+ case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+ case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+ case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+ case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
+ }
+ } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+ APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+ APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+ APFloat::cmpResult R = C1V.compare(C2V);
+ switch (pred) {
+ default: llvm_unreachable("Invalid FCmp Predicate");
+ case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+ case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy);
+ case FCmpInst::FCMP_UNO:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
+ case FCmpInst::FCMP_ORD:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
+ case FCmpInst::FCMP_UEQ:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_OEQ:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UNE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
+ case FCmpInst::FCMP_ONE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OLT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_UGT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OGT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OLE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UGE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OGE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual);
+ }
+ } else if (C1->getType()->isVectorTy()) {
+ // If we can constant fold the comparison of each element, constant fold
+ // the whole vector comparison.
+ SmallVector<Constant*, 4> ResElts;
+ Type *Ty = IntegerType::get(C1->getContext(), 32);
+ // Compare the elements, producing an i1 result or constant expr.
+ for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){
+ Constant *C1E =
+ ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
+ Constant *C2E =
+ ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+
+ ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
+ }
+
+ return ConstantVector::get(ResElts);
+ }
+
+ if (C1->getType()->isFloatingPointTy()) {
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateFCmpRelation(C1, C2)) {
+ default: llvm_unreachable("Unknown relation!");
+ case FCmpInst::FCMP_UNO:
+ case FCmpInst::FCMP_ORD:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_TRUE:
+ case FCmpInst::FCMP_FALSE:
+ case FCmpInst::BAD_FCMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+ Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLT: // We know that C1 < C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+ break;
+ case FCmpInst::FCMP_OGT: // We know that C1 > C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 1;
+ break;
+ case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 1;
+ break;
+ case FCmpInst::FCMP_ONE: // We know that C1 != C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE)
+ Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1)
+ return ConstantInt::get(ResultTy, Result);
+
+ } else {
+ // Evaluate the relation between the two constants, per the predicate.
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+ default: llvm_unreachable("Unknown relational!");
+ case ICmpInst::BAD_ICMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case ICmpInst::ICMP_EQ: // We know the constants are equal!
+ // If we know the constants are equal, we can decide the result of this
+ // computation precisely.
+ Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (pred) {
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+ Result = 1; break;
+ case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+ Result = 1; break;
+ case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (pred) {
+ case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+ Result = 1; break;
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (pred) {
+ case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+ Result = 1; break;
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (pred == ICmpInst::ICMP_UGT) Result = 0;
+ if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
+ break;
+ case ICmpInst::ICMP_SLE:
+ if (pred == ICmpInst::ICMP_SGT) Result = 0;
+ if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (pred == ICmpInst::ICMP_ULT) Result = 0;
+ if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (pred == ICmpInst::ICMP_SLT) Result = 0;
+ if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
+ break;
+ case ICmpInst::ICMP_NE:
+ if (pred == ICmpInst::ICMP_EQ) Result = 0;
+ if (pred == ICmpInst::ICMP_NE) Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1)
+ return ConstantInt::get(ResultTy, Result);
+
+ // If the right hand side is a bitcast, try using its inverse to simplify
+ // it by moving it to the left hand side. We can't do this if it would turn
+ // a vector compare into a scalar compare or visa versa.
+ if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+ Constant *CE2Op0 = CE2->getOperand(0);
+ if (CE2->getOpcode() == Instruction::BitCast &&
+ CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+ Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+ return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
+ }
+ }
+
+ // If the left hand side is an extension, try eliminating it.
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+ (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+ Constant *CE1Op0 = CE1->getOperand(0);
+ Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+ if (CE1Inverse == CE1Op0) {
+ // Check whether we can safely truncate the right hand side.
+ Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+ if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+ return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+ }
+ }
+ }
+ }
+
+ if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
+ (C1->isNullValue() && !C2->isNullValue())) {
+ // If C2 is a constant expr and C1 isn't, flip them around and fold the
+ // other way if possible.
+ // Also, if C1 is null and C2 isn't, flip them around.
+ pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+ return ConstantExpr::getICmp(pred, C2, C1);
+ }
+ }
+ return 0;
+}
+
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+template<typename IndexTy>
+static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
+ // No indices means nothing that could be out of bounds.
+ if (Idxs.empty()) return true;
+
+ // If the first index is zero, it's in bounds.
+ if (cast<Constant>(Idxs[0])->isNullValue()) return true;
+
+ // If the first index is one and all the rest are zero, it's in bounds,
+ // by the one-past-the-end rule.
+ if (!cast<ConstantInt>(Idxs[0])->isOne())
+ return false;
+ for (unsigned i = 1, e = Idxs.size(); i != e; ++i)
+ if (!cast<Constant>(Idxs[i])->isNullValue())
+ return false;
+ return true;
+}
+
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+ bool inBounds,
+ ArrayRef<IndexTy> Idxs) {
+ if (Idxs.empty()) return C;
+ Constant *Idx0 = cast<Constant>(Idxs[0]);
+ if ((Idxs.size() == 1 && Idx0->isNullValue()))
+ return C;
+
+ if (isa<UndefValue>(C)) {
+ PointerType *Ptr = cast<PointerType>(C->getType());
+ Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+ }
+
+ if (C->isNullValue()) {
+ bool isNull = true;
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
+ if (!cast<Constant>(Idxs[i])->isNullValue()) {
+ isNull = false;
+ break;
+ }
+ if (isNull) {
+ PointerType *Ptr = cast<PointerType>(C->getType());
+ Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return ConstantPointerNull::get(PointerType::get(Ty,
+ Ptr->getAddressSpace()));
+ }
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Type *LastTy = 0;
+ for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ I != E; ++I)
+ LastTy = *I;
+
+ if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
+ SmallVector<Value*, 16> NewIndices;
+ NewIndices.reserve(Idxs.size() + CE->getNumOperands());
+ for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+ NewIndices.push_back(CE->getOperand(i));
+
+ // Add the last index of the source with the first index of the new GEP.
+ // Make sure to handle the case when they are actually different types.
+ Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+ // Otherwise it must be an array.
+ if (!Idx0->isNullValue()) {
+ Type *IdxTy = Combined->getType();
+ if (IdxTy != Idx0->getType()) {
+ Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+ Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+ Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+ Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+ } else {
+ Combined =
+ ConstantExpr::get(Instruction::Add, Idx0, Combined);
+ }
+ }
+
+ NewIndices.push_back(Combined);
+ NewIndices.append(Idxs.begin() + 1, Idxs.end());
+ return
+ ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
+ inBounds &&
+ cast<GEPOperator>(CE)->isInBounds());
+ }
+ }
+
+ // Attempt to fold casts to the same type away. For example, folding:
+ //
+ // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+ // i64 0, i64 0)
+ // into:
+ //
+ // i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+ //
+ // Don't fold if the cast is changing address spaces.
+ if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
+ PointerType *SrcPtrTy =
+ dyn_cast<PointerType>(CE->getOperand(0)->getType());
+ PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
+ if (SrcPtrTy && DstPtrTy) {
+ ArrayType *SrcArrayTy =
+ dyn_cast<ArrayType>(SrcPtrTy->getElementType());
+ ArrayType *DstArrayTy =
+ dyn_cast<ArrayType>(DstPtrTy->getElementType());
+ if (SrcArrayTy && DstArrayTy
+ && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
+ && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
+ return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
+ Idxs, inBounds);
+ }
+ }
+ }
+
+ // Check to see if any array indices are not within the corresponding
+ // notional array bounds. If so, try to determine if they can be factored
+ // out into preceding dimensions.
+ bool Unknown = false;
+ SmallVector<Constant *, 8> NewIdxs;
+ Type *Ty = C->getType();
+ Type *Prev = 0;
+ for (unsigned i = 0, e = Idxs.size(); i != e;
+ Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ if (ATy->getNumElements() <= INT64_MAX &&
+ ATy->getNumElements() != 0 &&
+ CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+ if (isa<SequentialType>(Prev)) {
+ // It's out of range, but we can factor it into the prior
+ // dimension.
+ NewIdxs.resize(Idxs.size());
+ ConstantInt *Factor = ConstantInt::get(CI->getType(),
+ ATy->getNumElements());
+ NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+ Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
+ Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+ // Before adding, extend both operands to i64 to avoid
+ // overflow trouble.
+ if (!PrevIdx->getType()->isIntegerTy(64))
+ PrevIdx = ConstantExpr::getSExt(PrevIdx,
+ Type::getInt64Ty(Div->getContext()));
+ if (!Div->getType()->isIntegerTy(64))
+ Div = ConstantExpr::getSExt(Div,
+ Type::getInt64Ty(Div->getContext()));
+
+ NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+ } else {
+ // It's out of range, but the prior dimension is a struct
+ // so we can't do anything about it.
+ Unknown = true;
+ }
+ }
+ } else {
+ // We don't know if it's in range or not.
+ Unknown = true;
+ }
+ }
+
+ // If we did any factoring, start over with the adjusted indices.
+ if (!NewIdxs.empty()) {
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
+ if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
+ return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
+ }
+
+ // If all indices are known integers and normalized, we can do a simple
+ // check for the "inbounds" property.
+ if (!Unknown && !inBounds &&
+ isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
+ return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+
+ return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ ArrayRef<Constant *> Idxs) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ ArrayRef<Value *> Idxs) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+}
diff --git a/contrib/llvm/lib/IR/ConstantFold.h b/contrib/llvm/lib/IR/ConstantFold.h
new file mode 100644
index 000000000000..e12f27a7cb1e
--- /dev/null
+++ b/contrib/llvm/lib/IR/ConstantFold.h
@@ -0,0 +1,56 @@
+//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the (internal) constant folding interfaces for LLVM. These
+// interfaces are used by the ConstantExpr::get* methods to automatically fold
+// constants when possible.
+//
+// These operators may return a null object if they don't know how to perform
+// the specified operation on the specified constant types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CONSTANTFOLDING_H
+#define CONSTANTFOLDING_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+ class Value;
+ class Constant;
+ class Type;
+
+ // Constant fold various types of instruction...
+ Constant *ConstantFoldCastInstruction(
+ unsigned opcode, ///< The opcode of the cast
+ Constant *V, ///< The source constant
+ Type *DestTy ///< The destination type
+ );
+ Constant *ConstantFoldSelectInstruction(Constant *Cond,
+ Constant *V1, Constant *V2);
+ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+ Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt,
+ Constant *Idx);
+ Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+ Constant *Mask);
+ Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
+ ArrayRef<unsigned> Idxs);
+ Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+ ArrayRef<unsigned> Idxs);
+ Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
+ Constant *V2);
+ Constant *ConstantFoldCompareInstruction(unsigned short predicate,
+ Constant *C1, Constant *C2);
+ Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+ ArrayRef<Constant *> Idxs);
+ Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+ ArrayRef<Value *> Idxs);
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
new file mode 100644
index 000000000000..1abb65643559
--- /dev/null
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -0,0 +1,2779 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "ConstantFold.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Class
+//===----------------------------------------------------------------------===//
+
+void Constant::anchor() { }
+
+bool Constant::isNegativeZeroValue() const {
+ // Floating point values have an explicit -0.0 value.
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->isZero() && CFP->isNegative();
+
+ // Equivalent for a vector of -0.0's.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ return true;
+
+ // We've already handled true FP case; any other FP vectors can't represent -0.0.
+ if (getType()->isFPOrFPVectorTy())
+ return false;
+
+ // Otherwise, just use +0.0.
+ return isNullValue();
+}
+
+// Return true iff this constant is positive zero (floating point), negative
+// zero (floating point), or a null value.
+bool Constant::isZeroValue() const {
+ // Floating point values have an explicit -0.0 value.
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->isZero();
+
+ // Otherwise, just use +0.0.
+ return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+ // 0 is null.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isZero();
+
+ // +0.0 is null.
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->isZero() && !CFP->isNegative();
+
+ // constant zero is zero for aggregates and cpnull is null for pointers.
+ return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
+bool Constant::isAllOnesValue() const {
+ // Check for -1 integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isMinusOne();
+
+ // Check for FP which are bitcasted from -1 integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
+
+ // Check for constant vectors which are splats of -1 values.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isAllOnesValue();
+
+ // Check for constant vectors which are splats of -1 values.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isAllOnesValue();
+
+ return false;
+}
+
+// Constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ return ConstantInt::get(Ty, 0);
+ case Type::HalfTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEhalf));
+ case Type::FloatTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEsingle));
+ case Type::DoubleTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEdouble));
+ case Type::X86_FP80TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::x87DoubleExtended));
+ case Type::FP128TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEquad));
+ case Type::PPC_FP128TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat(APFloat::PPCDoubleDouble,
+ APInt::getNullValue(128)));
+ case Type::PointerTyID:
+ return ConstantPointerNull::get(cast<PointerType>(Ty));
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return ConstantAggregateZero::get(Ty);
+ default:
+ // Function, Label, or Opaque type?
+ llvm_unreachable("Cannot create a null constant of that type!");
+ }
+}
+
+Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
+ Type *ScalarTy = Ty->getScalarType();
+
+ // Create the base integer constant.
+ Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+ // Convert an integer to a pointer, if necessary.
+ if (PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+ C = ConstantExpr::getIntToPtr(C, PTy);
+
+ // Broadcast a scalar to a vector, if necessary.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ C = ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+Constant *Constant::getAllOnesValue(Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ConstantInt::get(Ty->getContext(),
+ APInt::getAllOnesValue(ITy->getBitWidth()));
+
+ if (Ty->isFloatingPointTy()) {
+ APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+ !Ty->isPPC_FP128Ty());
+ return ConstantFP::get(Ty->getContext(), FL);
+ }
+
+ VectorType *VTy = cast<VectorType>(Ty);
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ getAllOnesValue(VTy->getElementType()));
+}
+
+/// getAggregateElement - For aggregates (struct/array/vector) return the
+/// constant that corresponds to the specified element if possible, or null if
+/// not. This can return null if the element index is a ConstantExpr, or if
+/// 'this' is a constant expr.
+Constant *Constant::getAggregateElement(unsigned Elt) const {
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
+ return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
+ return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
+
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
+
+ if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
+ return CAZ->getElementValue(Elt);
+
+ if (const UndefValue *UV = dyn_cast<UndefValue>(this))
+ return UV->getElementValue(Elt);
+
+ if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
+ return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
+ return 0;
+}
+
+Constant *Constant::getAggregateElement(Constant *Elt) const {
+ assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+ return getAggregateElement(CI->getZExtValue());
+ return 0;
+}
+
+
+void Constant::destroyConstantImpl() {
+ // When a Constant is destroyed, there may be lingering
+ // references to the constant by other constants in the constant pool. These
+ // constants are implicitly dependent on the module that is being deleted,
+ // but they don't know that. Because we only find out when the CPV is
+ // deleted, we must now notify all of our users (that should only be
+ // Constants) that they are, in fact, invalid now and should be deleted.
+ //
+ while (!use_empty()) {
+ Value *V = use_back();
+#ifndef NDEBUG // Only in -g mode...
+ if (!isa<Constant>(V)) {
+ dbgs() << "While deleting: " << *this
+ << "\n\nUse still stuck around after Def is destroyed: "
+ << *V << "\n\n";
+ }
+#endif
+ assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+ cast<Constant>(V)->destroyConstant();
+
+ // The constant should remove itself from our use list...
+ assert((use_empty() || use_back() != V) && "Constant not removed!");
+ }
+
+ // Value has no outstanding references it is safe to delete it now...
+ delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap. This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+ assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+ // The only thing that could possibly trap are constant exprs.
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+ if (!CE) return false;
+
+ // ConstantExpr traps if any operands can trap.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (CE->getOperand(i)->canTrap())
+ return true;
+
+ // Otherwise, only specific operations can trap.
+ switch (CE->getOpcode()) {
+ default:
+ return false;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ // Div and rem can trap if the RHS is not known to be non-zero.
+ if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+ return true;
+ return false;
+ }
+}
+
+/// isThreadDependent - Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+ SmallPtrSet<const Constant*, 64> Visited;
+ SmallVector<const Constant*, 64> WorkList;
+ WorkList.push_back(this);
+ Visited.insert(this);
+
+ while (!WorkList.empty()) {
+ const Constant *C = WorkList.pop_back_val();
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->isThreadLocal())
+ return true;
+ }
+
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+ const Constant *D = dyn_cast<Constant>(C->getOperand(I));
+ if (!D)
+ continue;
+ if (Visited.insert(D))
+ WorkList.push_back(D);
+ }
+ }
+
+ return false;
+}
+
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ const Constant *UC = dyn_cast<Constant>(*UI);
+ if (UC == 0 || isa<GlobalValue>(UC))
+ return true;
+
+ if (UC->isConstantUsed())
+ return true;
+ }
+ return false;
+}
+
+
+
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry. This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so. The return values are:
+///
+/// NoRelocation: This constant pool entry is guaranteed to never have a
+/// relocation applied to it (because it holds a simple constant like
+/// '4').
+/// LocalRelocation: This entry has relocations, but the entries are
+/// guaranteed to be resolvable by the static linker, so the dynamic
+/// linker will never see them.
+/// GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in IR.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return LocalRelocation; // Local to this file/library.
+ return GlobalRelocations; // Global reference.
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+ return BA->getFunction()->getRelocationInfo();
+
+ // While raw uses of blockaddress need to be relocated, differences between
+ // two of them don't when they are for labels in the same function. This is a
+ // common idiom when creating a table for the indirect goto extension, so we
+ // handle it efficiently here.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+ if (CE->getOpcode() == Instruction::Sub) {
+ ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+ ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+ if (LHS && RHS &&
+ LHS->getOpcode() == Instruction::PtrToInt &&
+ RHS->getOpcode() == Instruction::PtrToInt &&
+ isa<BlockAddress>(LHS->getOperand(0)) &&
+ isa<BlockAddress>(RHS->getOperand(0)) &&
+ cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+ cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+ return NoRelocation;
+ }
+
+ PossibleRelocationsTy Result = NoRelocation;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ Result = std::max(Result,
+ cast<Constant>(getOperand(i))->getRelocationInfo());
+
+ return Result;
+}
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it. This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false; // Cannot remove this
+
+ while (!C->use_empty()) {
+ const Constant *User = dyn_cast<Constant>(C->use_back());
+ if (!User) return false; // Non-constant usage;
+ if (!removeDeadUsersOfConstant(User))
+ return false; // Constant wasn't dead
+ }
+
+ const_cast<Constant*>(C)->destroyConstant();
+ return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them. This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+ Value::const_use_iterator I = use_begin(), E = use_end();
+ Value::const_use_iterator LastNonDeadUser = E;
+ while (I != E) {
+ const Constant *User = dyn_cast<Constant>(*I);
+ if (User == 0) {
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ if (!removeDeadUsersOfConstant(User)) {
+ // If the constant wasn't dead, remember that this was the last live use
+ // and move on to the next constant.
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ // If the constant was dead, then the iterator is invalidated.
+ if (LastNonDeadUser == E) {
+ I = use_begin();
+ if (I == E) break;
+ } else {
+ I = LastNonDeadUser;
+ ++I;
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ConstantInt
+//===----------------------------------------------------------------------===//
+
+void ConstantInt::anchor() { }
+
+ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
+ : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+ assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ if (!pImpl->TheTrueVal)
+ pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+ return pImpl->TheTrueVal;
+}
+
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ if (!pImpl->TheFalseVal)
+ pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+ return pImpl->TheFalseVal;
+}
+
+Constant *ConstantInt::getTrue(Type *Ty) {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy) {
+ assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+ return ConstantInt::getTrue(Ty->getContext());
+ }
+ assert(VTy->getElementType()->isIntegerTy(1) &&
+ "True must be vector of i1 or i1.");
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ ConstantInt::getTrue(Ty->getContext()));
+}
+
+Constant *ConstantInt::getFalse(Type *Ty) {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy) {
+ assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+ return ConstantInt::getFalse(Ty->getContext());
+ }
+ assert(VTy->getElementType()->isIntegerTy(1) &&
+ "False must be vector of i1 or i1.");
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ ConstantInt::getFalse(Ty->getContext()));
+}
+
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
+ // Get the corresponding integer type for the bit width of the value.
+ IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+ // get an existing value or the insertion position
+ DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+ ConstantInt *&Slot = Context.pImpl->IntConstants[Key];
+ if (!Slot) Slot = new ConstantInt(ITy, V);
+ return Slot;
+}
+
+Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
+ Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+ // For vectors, broadcast the value.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V,
+ bool isSigned) {
+ return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
+ return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
+ return get(Ty, V, true);
+}
+
+Constant *ConstantInt::get(Type *Ty, const APInt& V) {
+ ConstantInt *C = get(Ty->getContext(), V);
+ assert(C->getType() == Ty->getScalarType() &&
+ "ConstantInt type doesn't match the type implied by its value!");
+
+ // For vectors, broadcast the value.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
+ uint8_t radix) {
+ return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
+ if (Ty->isHalfTy())
+ return &APFloat::IEEEhalf;
+ if (Ty->isFloatTy())
+ return &APFloat::IEEEsingle;
+ if (Ty->isDoubleTy())
+ return &APFloat::IEEEdouble;
+ if (Ty->isX86_FP80Ty())
+ return &APFloat::x87DoubleExtended;
+ else if (Ty->isFP128Ty())
+ return &APFloat::IEEEquad;
+
+ assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
+ return &APFloat::PPCDoubleDouble;
+}
+
+void ConstantFP::anchor() { }
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type. This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant *ConstantFP::get(Type *Ty, double V) {
+ LLVMContext &Context = Ty->getContext();
+
+ APFloat FV(V);
+ bool ignored;
+ FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+ APFloat::rmNearestTiesToEven, &ignored);
+ Constant *C = get(Context, FV);
+
+ // For vectors, broadcast the value.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+
+Constant *ConstantFP::get(Type *Ty, StringRef Str) {
+ LLVMContext &Context = Ty->getContext();
+
+ APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+ Constant *C = get(Context, FV);
+
+ // For vectors, broadcast the value.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+
+ConstantFP *ConstantFP::getNegativeZero(Type *Ty) {
+ LLVMContext &Context = Ty->getContext();
+ APFloat apf = cast<ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+ apf.changeSign();
+ return get(Context, apf);
+}
+
+
+Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
+ Type *ScalarTy = Ty->getScalarType();
+ if (ScalarTy->isFloatingPointTy()) {
+ Constant *C = getNegativeZero(ScalarTy);
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+ return C;
+ }
+
+ return Constant::getNullValue(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
+ DenseMapAPFloatKeyInfo::KeyTy Key(V);
+
+ LLVMContextImpl* pImpl = Context.pImpl;
+
+ ConstantFP *&Slot = pImpl->FPConstants[Key];
+
+ if (!Slot) {
+ Type *Ty;
+ if (&V.getSemantics() == &APFloat::IEEEhalf)
+ Ty = Type::getHalfTy(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEsingle)
+ Ty = Type::getFloatTy(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEdouble)
+ Ty = Type::getDoubleTy(Context);
+ else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+ Ty = Type::getX86_FP80Ty(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEquad)
+ Ty = Type::getFP128Ty(Context);
+ else {
+ assert(&V.getSemantics() == &APFloat::PPCDoubleDouble &&
+ "Unknown FP format");
+ Ty = Type::getPPC_FP128Ty(Context);
+ }
+ Slot = new ConstantFP(Ty, V);
+ }
+
+ return Slot;
+}
+
+ConstantFP *ConstantFP::getInfinity(Type *Ty, bool Negative) {
+ const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getInf(Semantics, Negative));
+}
+
+ConstantFP::ConstantFP(Type *Ty, const APFloat& V)
+ : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+ assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+ "FP type Mismatch");
+}
+
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
+ return Val.bitwiseIsEqual(V);
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantAggregateZero Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this CAZ has array or vector type, return a zero
+/// with the right element type.
+Constant *ConstantAggregateZero::getSequentialElement() const {
+ return Constant::getNullValue(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this CAZ has struct type, return a zero with the
+/// right element type for the specified element.
+Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const {
+ return Constant::getNullValue(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+Constant *ConstantAggregateZero::getElementValue(Constant *C) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index.
+Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(Idx);
+}
+
+
+//===----------------------------------------------------------------------===//
+// UndefValue Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this undef has array or vector type, return an
+/// undef with the right element type.
+UndefValue *UndefValue::getSequentialElement() const {
+ return UndefValue::get(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this undef has struct type, return a zero with the
+/// right element type for the specified element.
+UndefValue *UndefValue::getStructElement(unsigned Elt) const {
+ return UndefValue::get(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+UndefValue *UndefValue::getElementValue(Constant *C) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index.
+UndefValue *UndefValue::getElementValue(unsigned Idx) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(Idx);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+template <typename ItTy, typename EltTy>
+static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
+ for (; Start != End; ++Start)
+ if (*Start != Elt)
+ return false;
+ return true;
+}
+
+ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
+ : Constant(T, ConstantArrayVal,
+ OperandTraits<ConstantArray>::op_end(this) - V.size(),
+ V.size()) {
+ assert(V.size() == T->getNumElements() &&
+ "Invalid initializer vector for constant array");
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ assert(V[i]->getType() == T->getElementType() &&
+ "Initializer for array element doesn't match array element type!");
+ std::copy(V.begin(), V.end(), op_begin());
+}
+
+Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
+ // Empty arrays are canonicalized to ConstantAggregateZero.
+ if (V.empty())
+ return ConstantAggregateZero::get(Ty);
+
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ assert(V[i]->getType() == Ty->getElementType() &&
+ "Wrong type in array element initializer");
+ }
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+ // If this is an all-zero array, return a ConstantAggregateZero object. If
+ // all undef, return an UndefValue, if "all simple", then return a
+ // ConstantDataArray.
+ Constant *C = V[0];
+ if (isa<UndefValue>(C) && rangeOnlyContains(V.begin(), V.end(), C))
+ return UndefValue::get(Ty);
+
+ if (C->isNullValue() && rangeOnlyContains(V.begin(), V.end(), C))
+ return ConstantAggregateZero::get(Ty);
+
+ // Check to see if all of the elements are ConstantFP or ConstantInt and if
+ // the element type is compatible with ConstantDataVector. If so, use it.
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+ // We speculatively build the elements here even if it turns out that there
+ // is a constantexpr or something else weird in the array, since it is so
+ // uncommon for that to happen.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ }
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToFloat());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToDouble());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ }
+ }
+ }
+
+ // Otherwise, we really do want to create a ConstantArray.
+ return pImpl->ArrayConstants.getOrCreate(Ty, V);
+}
+
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements. The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+ ArrayRef<Constant*> V,
+ bool Packed) {
+ unsigned VecSize = V.size();
+ SmallVector<Type*, 16> EltTypes(VecSize);
+ for (unsigned i = 0; i != VecSize; ++i)
+ EltTypes[i] = V[i]->getType();
+
+ return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+ bool Packed) {
+ assert(!V.empty() &&
+ "ConstantStruct::getTypeForElements cannot be called on empty list");
+ return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
+ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
+ : Constant(T, ConstantStructVal,
+ OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+ V.size()) {
+ assert(V.size() == T->getNumElements() &&
+ "Invalid initializer vector for constant structure");
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ assert((T->isOpaque() || V[i]->getType() == T->getElementType(i)) &&
+ "Initializer for struct element doesn't match struct element type!");
+ std::copy(V.begin(), V.end(), op_begin());
+}
+
+// ConstantStruct accessors.
+Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
+ assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+ "Incorrect # elements specified to ConstantStruct::get");
+
+ // Create a ConstantAggregateZero value if all elements are zeros.
+ bool isZero = true;
+ bool isUndef = false;
+
+ if (!V.empty()) {
+ isUndef = isa<UndefValue>(V[0]);
+ isZero = V[0]->isNullValue();
+ if (isUndef || isZero) {
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ if (!V[i]->isNullValue())
+ isZero = false;
+ if (!isa<UndefValue>(V[i]))
+ isUndef = false;
+ }
+ }
+ }
+ if (isZero)
+ return ConstantAggregateZero::get(ST);
+ if (isUndef)
+ return UndefValue::get(ST);
+
+ return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
+}
+
+Constant *ConstantStruct::get(StructType *T, ...) {
+ va_list ap;
+ SmallVector<Constant*, 8> Values;
+ va_start(ap, T);
+ while (Constant *Val = va_arg(ap, llvm::Constant*))
+ Values.push_back(Val);
+ va_end(ap);
+ return get(T, Values);
+}
+
+ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
+ : Constant(T, ConstantVectorVal,
+ OperandTraits<ConstantVector>::op_end(this) - V.size(),
+ V.size()) {
+ for (size_t i = 0, e = V.size(); i != e; i++)
+ assert(V[i]->getType() == T->getElementType() &&
+ "Initializer for vector element doesn't match vector element type!");
+ std::copy(V.begin(), V.end(), op_begin());
+}
+
+// ConstantVector accessors.
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+ assert(!V.empty() && "Vectors can't be empty");
+ VectorType *T = VectorType::get(V.front()->getType(), V.size());
+ LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+ // If this is an all-undef or all-zero vector, return a
+ // ConstantAggregateZero or UndefValue.
+ Constant *C = V[0];
+ bool isZero = C->isNullValue();
+ bool isUndef = isa<UndefValue>(C);
+
+ if (isZero || isUndef) {
+ for (unsigned i = 1, e = V.size(); i != e; ++i)
+ if (V[i] != C) {
+ isZero = isUndef = false;
+ break;
+ }
+ }
+
+ if (isZero)
+ return ConstantAggregateZero::get(T);
+ if (isUndef)
+ return UndefValue::get(T);
+
+ // Check to see if all of the elements are ConstantFP or ConstantInt and if
+ // the element type is compatible with ConstantDataVector. If so, use it.
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+ // We speculatively build the elements here even if it turns out that there
+ // is a constantexpr or something else weird in the array, since it is so
+ // uncommon for that to happen.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ }
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToFloat());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToDouble());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ }
+ }
+ }
+
+ // Otherwise, the element type isn't compatible with ConstantDataVector, or
+ // the operand list constants a ConstantExpr or something else strange.
+ return pImpl->VectorConstants.getOrCreate(T, V);
+}
+
+Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
+ // If this splat is compatible with ConstantDataVector, use it instead of
+ // ConstantVector.
+ if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
+ ConstantDataSequential::isElementTypeCompatible(V->getType()))
+ return ConstantDataVector::getSplat(NumElts, V);
+
+ SmallVector<Constant*, 32> Elts(NumElts, V);
+ return get(Elts);
+}
+
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+ return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+ return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+ if (getOpcode() != Instruction::GetElementPtr) return false;
+
+ gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+ User::const_op_iterator OI = llvm::next(this->op_begin());
+
+ // Skip the first index, as it has no static limit.
+ ++GEPI;
+ ++OI;
+
+ // The remaining indices must be compile-time known integers within the
+ // bounds of the corresponding notional static array types.
+ for (; GEPI != E; ++GEPI, ++OI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+ if (!CI) return false;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+ if (CI->getValue().getActiveBits() > 64 ||
+ CI->getZExtValue() >= ATy->getNumElements())
+ return false;
+ }
+
+ // All the indices checked out.
+ return true;
+}
+
+bool ConstantExpr::hasIndices() const {
+ return getOpcode() == Instruction::ExtractValue ||
+ getOpcode() == Instruction::InsertValue;
+}
+
+ArrayRef<unsigned> ConstantExpr::getIndices() const {
+ if (const ExtractValueConstantExpr *EVCE =
+ dyn_cast<ExtractValueConstantExpr>(this))
+ return EVCE->Indices;
+
+ return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+unsigned ConstantExpr::getPredicate() const {
+ assert(isCompare());
+ return ((const CompareConstantExpr*)this)->predicate;
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+ assert(Op->getType() == getOperand(OpNo)->getType() &&
+ "Replacing operand with value of different type!");
+ if (getOperand(OpNo) == Op)
+ return const_cast<ConstantExpr*>(this);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ NewOps.push_back(i == OpNo ? Op : getOperand(i));
+
+ return getWithOperands(NewOps);
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values. The specified array must
+/// have the same number of operands as our current one.
+Constant *ConstantExpr::
+getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
+ assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
+ bool AnyChange = Ty != getType();
+ for (unsigned i = 0; i != Ops.size(); ++i)
+ AnyChange |= Ops[i] != getOperand(i);
+
+ if (!AnyChange) // No operands changed, return self.
+ return const_cast<ConstantExpr*>(this);
+
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertValue:
+ return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
+ case Instruction::ExtractValue:
+ return ConstantExpr::getExtractValue(Ops[0], getIndices());
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+ cast<GEPOperator>(this)->isInBounds());
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
+ unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
+ if (Ty->isIntegerTy(1))
+ return Val == 0 || Val == 1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ uint64_t Max = (1ll << NumBits) - 1;
+ return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
+ unsigned NumBits = Ty->getIntegerBitWidth();
+ if (Ty->isIntegerTy(1))
+ return Val == 0 || Val == 1 || Val == -1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ int64_t Min = -(1ll << (NumBits-1));
+ int64_t Max = (1ll << (NumBits-1)) - 1;
+ return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ switch (Ty->getTypeID()) {
+ default:
+ return false; // These can't be represented as floating point!
+
+ // FIXME rounding mode needs to be more flexible
+ case Type::HalfTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEhalf)
+ return true;
+ Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::FloatTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+ return true;
+ Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::DoubleTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble)
+ return true;
+ Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::X86_FP80TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+ case Type::FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::IEEEquad;
+ case Type::PPC_FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Factory Function Implementation
+
+ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+ "Cannot create an aggregate zero of non-aggregate type!");
+
+ ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
+ if (Entry == 0)
+ Entry = new ConstantAggregateZero(Ty);
+
+ return Entry;
+}
+
+/// destroyConstant - Remove the constant from the constant table.
+///
+void ConstantAggregateZero::destroyConstant() {
+ getContext().pImpl->CAZConstants.erase(getType());
+ destroyConstantImpl();
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+ getType()->getContext().pImpl->ArrayConstants.remove(this);
+ destroyConstantImpl();
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+ getType()->getContext().pImpl->StructConstants.remove(this);
+ destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+ getType()->getContext().pImpl->VectorConstants.remove(this);
+ destroyConstantImpl();
+}
+
+/// getSplatValue - If this is a splat vector constant, meaning that all of
+/// the elements have the same value, return that value. Otherwise return 0.
+Constant *Constant::getSplatValue() const {
+ assert(this->getType()->isVectorTy() && "Only valid for vectors!");
+ if (isa<ConstantAggregateZero>(this))
+ return getNullValue(this->getType()->getVectorElementType());
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ return CV->getSplatValue();
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ return CV->getSplatValue();
+ return 0;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() const {
+ // Check out first element.
+ Constant *Elt = getOperand(0);
+ // Then make sure all remaining elements point to the same value.
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+ if (getOperand(I) != Elt)
+ return 0;
+ return Elt;
+}
+
+/// If C is a constant integer then return its value, otherwise C must be a
+/// vector of constant integers, all equal, and the common value is returned.
+const APInt &Constant::getUniqueInteger() const {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->getValue();
+ assert(this->getSplatValue() && "Doesn't contain a unique integer!");
+ const Constant *C = this->getAggregateElement(0U);
+ assert(C && isa<ConstantInt>(C) && "Not a vector of numbers!");
+ return cast<ConstantInt>(C)->getValue();
+}
+
+
+//---- ConstantPointerNull::get() implementation.
+//
+
+ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
+ ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
+ if (Entry == 0)
+ Entry = new ConstantPointerNull(Ty);
+
+ return Entry;
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+ getContext().pImpl->CPNConstants.erase(getType());
+ // Free the constant and any dangling references to it.
+ destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation.
+//
+
+UndefValue *UndefValue::get(Type *Ty) {
+ UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
+ if (Entry == 0)
+ Entry = new UndefValue(Ty);
+
+ return Entry;
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+ // Free the constant and any dangling references to it.
+ getContext().pImpl->UVConstants.erase(getType());
+ destroyConstantImpl();
+}
+
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+ assert(BB->getParent() != 0 && "Block must have a parent");
+ return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+ BlockAddress *&BA =
+ F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+ if (BA == 0)
+ BA = new BlockAddress(F, BB);
+
+ assert(BA->getFunction() == F && "Basic block moved between functions");
+ return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+ &Op<0>(), 2) {
+ setOperand(0, F);
+ setOperand(1, BB);
+ BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+ getFunction()->getType()->getContext().pImpl
+ ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+ getBasicBlock()->AdjustBlockAddressRefCount(-1);
+ destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+ // This could be replacing either the Basic Block or the Function. In either
+ // case, we have to remove the map entry.
+ Function *NewF = getFunction();
+ BasicBlock *NewBB = getBasicBlock();
+
+ if (U == &Op<0>())
+ NewF = cast<Function>(To);
+ else
+ NewBB = cast<BasicBlock>(To);
+
+ // See if the 'new' entry already exists, if not, just update this in place
+ // and return early.
+ BlockAddress *&NewBA =
+ getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+ if (NewBA == 0) {
+ getBasicBlock()->AdjustBlockAddressRefCount(-1);
+
+ // Remove the old entry, this can't cause the map to rehash (just a
+ // tombstone will get added).
+ getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+ getBasicBlock()));
+ NewBA = this;
+ setOperand(0, NewF);
+ setOperand(1, NewBB);
+ getBasicBlock()->AdjustBlockAddressRefCount(1);
+ return;
+ }
+
+ // Otherwise, I do need to replace this with an existing value.
+ assert(NewBA != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(NewBA);
+
+ destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
+//
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+ Instruction::CastOps opc, Constant *C, Type *Ty) {
+ assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+ // Fold a few common cases
+ if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+ return FC;
+
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+ // Look up the constant in the table first to ensure uniqueness.
+ ExprMapKeyType Key(opc, C);
+
+ return pImpl->ExprConstants.getOrCreate(Ty, Key);
+}
+
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
+ Instruction::CastOps opc = Instruction::CastOps(oc);
+ assert(Instruction::isCast(opc) && "opcode out of range");
+ assert(C && Ty && "Null arguments to getCast");
+ assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
+
+ switch (opc) {
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: return getTrunc(C, Ty);
+ case Instruction::ZExt: return getZExt(C, Ty);
+ case Instruction::SExt: return getSExt(C, Ty);
+ case Instruction::FPTrunc: return getFPTrunc(C, Ty);
+ case Instruction::FPExt: return getFPExtend(C, Ty);
+ case Instruction::UIToFP: return getUIToFP(C, Ty);
+ case Instruction::SIToFP: return getSIToFP(C, Ty);
+ case Instruction::FPToUI: return getFPToUI(C, Ty);
+ case Instruction::FPToSI: return getFPToSI(C, Ty);
+ case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+ case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+ case Instruction::BitCast: return getBitCast(C, Ty);
+ }
+}
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getZExt(C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getSExt(C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getTrunc(C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
+ assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+ assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
+ "Invalid cast");
+
+ if (Ty->isIntOrIntVectorTy())
+ return getPtrToInt(S, Ty);
+ return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty,
+ bool isSigned) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ Ty->isIntOrIntVectorTy() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ if (SrcBits == DstBits)
+ return C; // Avoid a useless cast
+ Instruction::CastOps opcode =
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+ assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
+ assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+ "SrcTy must be larger than DestTy for Trunc!");
+
+ return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "SrcTy must be smaller than DestTy for SExt!");
+
+ return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "SrcTy must be smaller than DestTy for ZExt!");
+
+ return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+ "This is an illegal floating point truncation!");
+ return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "This is an illegal floating point extension!");
+ return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "This is an illegal uint to floating point cast!");
+ return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "This is an illegal sint to floating point cast!");
+ return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "This is an illegal floating point to uint cast!");
+ return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "This is an illegal floating point to sint cast!");
+ return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
+ assert(C->getType()->getScalarType()->isPointerTy() &&
+ "PtrToInt source must be pointer or pointer vector");
+ assert(DstTy->getScalarType()->isIntegerTy() &&
+ "PtrToInt destination must be integer or integer vector");
+ assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+ if (isa<VectorType>(C->getType()))
+ assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+ "Invalid cast between a different number of vector elements");
+ return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
+ assert(C->getType()->getScalarType()->isIntegerTy() &&
+ "IntToPtr source must be integer or integer vector");
+ assert(DstTy->getScalarType()->isPointerTy() &&
+ "IntToPtr destination must be a pointer or pointer vector");
+ assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+ if (isa<VectorType>(C->getType()))
+ assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+ "Invalid cast between a different number of vector elements");
+ return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
+ assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+ "Invalid constantexpr bitcast!");
+
+ // It is common to ask for a bitcast of a value to its own type, handle this
+ // speedily.
+ if (C->getType() == DstTy) return C;
+
+ return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+ unsigned Flags) {
+ // Check the operands for consistency first.
+ assert(Opcode >= Instruction::BinaryOpsBegin &&
+ Opcode < Instruction::BinaryOpsEnd &&
+ "Invalid opcode in binary constant expression");
+ assert(C1->getType() == C2->getType() &&
+ "Operand types in binary constant expression should match");
+
+#ifndef NDEBUG
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an integer operation on a non-integer type!");
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create a shift operation on a non-integer type!");
+ break;
+ default:
+ break;
+ }
+#endif
+
+ if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+ return FC; // Fold a few common cases.
+
+ Constant *ArgVec[] = { C1, C2 };
+ ExprMapKeyType Key(Opcode, ArgVec, 0, Flags);
+
+ LLVMContextImpl *pImpl = C1->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
+}
+
+Constant *ConstantExpr::getSizeOf(Type* Ty) {
+ // sizeof is implemented as: (i64) gep (Ty*)null, 1
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+ Constant *GEP = getGetElementPtr(
+ Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getAlignOf(Type* Ty) {
+ // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ Type *AligningTy =
+ StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+ Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+ Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
+ Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+ Constant *Indices[2] = { Zero, One };
+ Constant *GEP = getGetElementPtr(NullPtr, Indices);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) {
+ return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+ FieldNo));
+}
+
+Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
+ // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ Constant *GEPIdx[] = {
+ ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+ FieldNo
+ };
+ Constant *GEP = getGetElementPtr(
+ Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getCompare(unsigned short Predicate,
+ Constant *C1, Constant *C2) {
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid CmpInst predicate");
+ case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ONE: case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+ case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ return getFCmp(Predicate, C1, C2);
+
+ case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return getICmp(Predicate, C1, C2);
+ }
+}
+
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
+ assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+ if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+ return SC; // Fold common cases
+
+ Constant *ArgVec[] = { C, V1, V2 };
+ ExprMapKeyType Key(Instruction::Select, ArgVec);
+
+ LLVMContextImpl *pImpl = C->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
+ bool InBounds) {
+ assert(C->getType()->isPtrOrPtrVectorTy() &&
+ "Non-pointer type for constant GetElementPtr expression");
+
+ if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
+ return FC; // Fold a few common cases.
+
+ // Get the result type of the getelementptr!
+ Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
+ assert(Ty && "GEP indices invalid!");
+ unsigned AS = C->getType()->getPointerAddressSpace();
+ Type *ReqTy = Ty->getPointerTo(AS);
+ if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
+ ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.reserve(1 + Idxs.size());
+ ArgVec.push_back(C);
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+ assert(Idxs[i]->getType()->isVectorTy() == ReqTy->isVectorTy() &&
+ "getelementptr index type missmatch");
+ assert((!Idxs[i]->getType()->isVectorTy() ||
+ ReqTy->getVectorNumElements() ==
+ Idxs[i]->getType()->getVectorNumElements()) &&
+ "getelementptr index type missmatch");
+ ArgVec.push_back(cast<Constant>(Idxs[i]));
+ }
+ const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+ InBounds ? GEPOperator::IsInBounds : 0);
+
+ LLVMContextImpl *pImpl = C->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
+ pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ Constant *ArgVec[] = { LHS, RHS };
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+ Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+ LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ Constant *ArgVec[] = { LHS, RHS };
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+ Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+ LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+ assert(Val->getType()->isVectorTy() &&
+ "Tried to create extractelement operation on non-vector type!");
+ assert(Idx->getType()->isIntegerTy(32) &&
+ "Extractelement index must be i32 type!");
+
+ if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+ return FC; // Fold a few common cases.
+
+ // Look up the constant in the table first to ensure uniqueness
+ Constant *ArgVec[] = { Val, Idx };
+ const ExprMapKeyType Key(Instruction::ExtractElement, ArgVec);
+
+ LLVMContextImpl *pImpl = Val->getContext().pImpl;
+ Type *ReqTy = Val->getType()->getVectorElementType();
+ return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
+ Constant *Idx) {
+ assert(Val->getType()->isVectorTy() &&
+ "Tried to create insertelement operation on non-vector type!");
+ assert(Elt->getType() == Val->getType()->getVectorElementType() &&
+ "Insertelement types must match!");
+ assert(Idx->getType()->isIntegerTy(32) &&
+ "Insertelement index must be i32 type!");
+
+ if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+ return FC; // Fold a few common cases.
+ // Look up the constant in the table first to ensure uniqueness
+ Constant *ArgVec[] = { Val, Elt, Idx };
+ const ExprMapKeyType Key(Instruction::InsertElement, ArgVec);
+
+ LLVMContextImpl *pImpl = Val->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
+ Constant *Mask) {
+ assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector constant expr operands!");
+
+ if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+ return FC; // Fold a few common cases.
+
+ unsigned NElts = Mask->getType()->getVectorNumElements();
+ Type *EltTy = V1->getType()->getVectorElementType();
+ Type *ShufTy = VectorType::get(EltTy, NElts);
+
+ // Look up the constant in the table first to ensure uniqueness
+ Constant *ArgVec[] = { V1, V2, Mask };
+ const ExprMapKeyType Key(Instruction::ShuffleVector, ArgVec);
+
+ LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+ ArrayRef<unsigned> Idxs) {
+ assert(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs) == Val->getType() &&
+ "insertvalue indices invalid!");
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant insertvalue expression");
+ Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+ assert(FC && "insertvalue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+ ArrayRef<unsigned> Idxs) {
+ assert(Agg->getType()->isFirstClassType() &&
+ "Tried to create extractelement operation on non-first-class type!");
+
+ Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+ (void)ReqTy;
+ assert(ReqTy && "extractvalue indices invalid!");
+
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant extractvalue expression");
+ Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+ assert(FC && "ExtractValue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ "Cannot NEG a nonintegral value!");
+ return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+ C, HasNUW, HasNSW);
+}
+
+Constant *ConstantExpr::getFNeg(Constant *C) {
+ assert(C->getType()->isFPOrFPVectorTy() &&
+ "Cannot FNEG a non-floating-point value!");
+ return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
+Constant *ConstantExpr::getNot(Constant *C) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ "Cannot NOT a nonintegral value!");
+ return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Add, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+ return get(Instruction::FAdd, C1, C2);
+}
+
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Sub, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+ return get(Instruction::FSub, C1, C2);
+}
+
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Mul, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+ return get(Instruction::FMul, C1, C2);
+}
+
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::UDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::SDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+ return get(Instruction::FDiv, C1, C2);
+}
+
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+ return get(Instruction::URem, C1, C2);
+}
+
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+ return get(Instruction::SRem, C1, C2);
+}
+
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+ return get(Instruction::FRem, C1, C2);
+}
+
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+ return get(Instruction::And, C1, C2);
+}
+
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+ return get(Instruction::Or, C1, C2);
+}
+
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+ return get(Instruction::Xor, C1, C2);
+}
+
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Shl, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::LShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::AShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+/// getBinOpIdentity - Return the identity for the given binary operation,
+/// i.e. a constant C such that X op C = X and C op X = X for every X. It
+/// returns null if the operator doesn't have an identity.
+Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
+ switch (Opcode) {
+ default:
+ // Doesn't have an identity.
+ return 0;
+
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return Constant::getNullValue(Ty);
+
+ case Instruction::Mul:
+ return ConstantInt::get(Ty, 1);
+
+ case Instruction::And:
+ return Constant::getAllOnesValue(Ty);
+ }
+}
+
+/// getBinOpAbsorber - Return the absorbing element for the given binary
+/// operation, i.e. a constant C such that X op C = C and C op X = C for
+/// every X. For example, this returns zero for integer multiplication.
+/// It returns null if the operator doesn't have an absorbing element.
+Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
+ switch (Opcode) {
+ default:
+ // Doesn't have an absorber.
+ return 0;
+
+ case Instruction::Or:
+ return Constant::getAllOnesValue(Ty);
+
+ case Instruction::And:
+ case Instruction::Mul:
+ return Constant::getNullValue(Ty);
+ }
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+ getType()->getContext().pImpl->ExprConstants.remove(this);
+ destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+ return Instruction::getOpcodeName(getOpcode());
+}
+
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
+ Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::GetElementPtr,
+ OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+ - (IdxList.size()+1), IdxList.size()+1) {
+ OperandList[0] = C;
+ for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+ OperandList[i+1] = IdxList[i];
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantData* implementations
+
+void ConstantDataArray::anchor() {}
+void ConstantDataVector::anchor() {}
+
+/// getElementType - Return the element type of the array/vector.
+Type *ConstantDataSequential::getElementType() const {
+ return getType()->getElementType();
+}
+
+StringRef ConstantDataSequential::getRawDataValues() const {
+ return StringRef(DataElements, getNumElements()*getElementByteSize());
+}
+
+/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+/// formed with a vector or array of the specified element type.
+/// ConstantDataArray only works with normal float and int types that are
+/// stored densely in memory, not with things like i42 or x86_f80.
+bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
+ if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+ if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+ switch (IT->getBitWidth()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return true;
+ default: break;
+ }
+ }
+ return false;
+}
+
+/// getNumElements - Return the number of elements in the array or vector.
+unsigned ConstantDataSequential::getNumElements() const {
+ if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
+ return AT->getNumElements();
+ return getType()->getVectorNumElements();
+}
+
+
+/// getElementByteSize - Return the size in bytes of the elements in the data.
+uint64_t ConstantDataSequential::getElementByteSize() const {
+ return getElementType()->getPrimitiveSizeInBits()/8;
+}
+
+/// getElementPointer - Return the start of the specified element.
+const char *ConstantDataSequential::getElementPointer(unsigned Elt) const {
+ assert(Elt < getNumElements() && "Invalid Elt");
+ return DataElements+Elt*getElementByteSize();
+}
+
+
+/// isAllZeros - return true if the array is empty or all zeros.
+static bool isAllZeros(StringRef Arr) {
+ for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I)
+ if (*I != 0)
+ return false;
+ return true;
+}
+
+/// getImpl - This is the underlying implementation of all of the
+/// ConstantDataSequential::get methods. They all thunk down to here, providing
+/// the correct element type. We take the bytes in as a StringRef because
+/// we *want* an underlying "char*" to avoid TBAA type punning violations.
+Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
+ assert(isElementTypeCompatible(Ty->getSequentialElementType()));
+ // If the elements are all zero or there are no elements, return a CAZ, which
+ // is more dense and canonical.
+ if (isAllZeros(Elements))
+ return ConstantAggregateZero::get(Ty);
+
+ // Do a lookup to see if we have already formed one of these.
+ StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
+ Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
+
+ // The bucket can point to a linked list of different CDS's that have the same
+ // body but different types. For example, 0,0,0,1 could be a 4 element array
+ // of i8, or a 1-element array of i32. They'll both end up in the same
+ /// StringMap bucket, linked up by their Next pointers. Walk the list.
+ ConstantDataSequential **Entry = &Slot.getValue();
+ for (ConstantDataSequential *Node = *Entry; Node != 0;
+ Entry = &Node->Next, Node = *Entry)
+ if (Node->getType() == Ty)
+ return Node;
+
+ // Okay, we didn't get a hit. Create a node of the right class, link it in,
+ // and return it.
+ if (isa<ArrayType>(Ty))
+ return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
+
+ assert(isa<VectorType>(Ty));
+ return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
+}
+
+void ConstantDataSequential::destroyConstant() {
+ // Remove the constant from the StringMap.
+ StringMap<ConstantDataSequential*> &CDSConstants =
+ getType()->getContext().pImpl->CDSConstants;
+
+ StringMap<ConstantDataSequential*>::iterator Slot =
+ CDSConstants.find(getRawDataValues());
+
+ assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
+
+ ConstantDataSequential **Entry = &Slot->getValue();
+
+ // Remove the entry from the hash table.
+ if ((*Entry)->Next == 0) {
+ // If there is only one value in the bucket (common case) it must be this
+ // entry, and removing the entry should remove the bucket completely.
+ assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
+ getContext().pImpl->CDSConstants.erase(Slot);
+ } else {
+ // Otherwise, there are multiple entries linked off the bucket, unlink the
+ // node we care about but keep the bucket around.
+ for (ConstantDataSequential *Node = *Entry; ;
+ Entry = &Node->Next, Node = *Entry) {
+ assert(Node && "Didn't find entry in its uniquing hash table!");
+ // If we found our entry, unlink it from the list and we're done.
+ if (Node == this) {
+ *Entry = Node->Next;
+ break;
+ }
+ }
+ }
+
+ // If we were part of a list, make sure that we don't delete the list that is
+ // still owned by the uniquing map.
+ Next = 0;
+
+ // Finally, actually delete it.
+ destroyConstantImpl();
+}
+
+/// get() constructors - Return a constant with array type with an element
+/// count and element type matching the ArrayRef passed in. Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
+ Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
+ Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
+ Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+
+/// getString - This method constructs a CDS and initializes it with a text
+/// string. The default behavior (AddNull==true) causes a null terminator to
+/// be placed at the end of the array (increasing the length of the string by
+/// one more than the StringRef would normally indicate. Pass AddNull=false
+/// to disable this behavior.
+Constant *ConstantDataArray::getString(LLVMContext &Context,
+ StringRef Str, bool AddNull) {
+ if (!AddNull) {
+ const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
+ return get(Context, ArrayRef<uint8_t>(const_cast<uint8_t *>(Data),
+ Str.size()));
+ }
+
+ SmallVector<uint8_t, 64> ElementVals;
+ ElementVals.append(Str.begin(), Str.end());
+ ElementVals.push_back(0);
+ return get(Context, ElementVals);
+}
+
+/// get() constructors - Return a constant with vector type with an element
+/// count and element type matching the ArrayRef passed in. Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
+ Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
+ Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
+ const char *Data = reinterpret_cast<const char *>(Elts.data());
+ return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+
+Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
+ assert(isElementTypeCompatible(V->getType()) &&
+ "Element type not compatible with ConstantData");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type");
+ SmallVector<uint64_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
+ return get(V->getContext(), Elts);
+ }
+ if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts(NumElts,
+ CFP->getValueAPF().convertToDouble());
+ return get(V->getContext(), Elts);
+ }
+ }
+ return ConstantVector::getSplat(NumElts, V);
+}
+
+
+/// getElementAsInteger - If this is a sequential container of integers (of
+/// any size), return the specified element in the low bits of a uint64_t.
+uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
+ assert(isa<IntegerType>(getElementType()) &&
+ "Accessor can only be used when element is an integer");
+ const char *EltPtr = getElementPointer(Elt);
+
+ // The data is stored in host byte order, make sure to cast back to the right
+ // type to load with the right endianness.
+ switch (getElementType()->getIntegerBitWidth()) {
+ default: llvm_unreachable("Invalid bitwidth for CDS");
+ case 8:
+ return *const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(EltPtr));
+ case 16:
+ return *const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(EltPtr));
+ case 32:
+ return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(EltPtr));
+ case 64:
+ return *const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(EltPtr));
+ }
+}
+
+/// getElementAsAPFloat - If this is a sequential container of floating point
+/// type, return the specified element as an APFloat.
+APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
+ const char *EltPtr = getElementPointer(Elt);
+
+ switch (getElementType()->getTypeID()) {
+ default:
+ llvm_unreachable("Accessor can only be used when element is float/double!");
+ case Type::FloatTyID: {
+ const float *FloatPrt = reinterpret_cast<const float *>(EltPtr);
+ return APFloat(*const_cast<float *>(FloatPrt));
+ }
+ case Type::DoubleTyID: {
+ const double *DoublePtr = reinterpret_cast<const double *>(EltPtr);
+ return APFloat(*const_cast<double *>(DoublePtr));
+ }
+ }
+}
+
+/// getElementAsFloat - If this is an sequential container of floats, return
+/// the specified element as a float.
+float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
+ assert(getElementType()->isFloatTy() &&
+ "Accessor can only be used when element is a 'float'");
+ const float *EltPtr = reinterpret_cast<const float *>(getElementPointer(Elt));
+ return *const_cast<float *>(EltPtr);
+}
+
+/// getElementAsDouble - If this is an sequential container of doubles, return
+/// the specified element as a float.
+double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
+ assert(getElementType()->isDoubleTy() &&
+ "Accessor can only be used when element is a 'float'");
+ const double *EltPtr =
+ reinterpret_cast<const double *>(getElementPointer(Elt));
+ return *const_cast<double *>(EltPtr);
+}
+
+/// getElementAsConstant - Return a Constant for a specified index's element.
+/// Note that this has to compute a new constant to return, so it isn't as
+/// efficient as getElementAsInteger/Float/Double.
+Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
+ if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+ return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
+
+ return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
+}
+
+/// isString - This method returns true if this is an array of i8.
+bool ConstantDataSequential::isString() const {
+ return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
+}
+
+/// isCString - This method returns true if the array "isString", ends with a
+/// nul byte, and does not contains any other nul bytes.
+bool ConstantDataSequential::isCString() const {
+ if (!isString())
+ return false;
+
+ StringRef Str = getAsString();
+
+ // The last value must be nul.
+ if (Str.back() != 0) return false;
+
+ // Other elements must be non-nul.
+ return Str.drop_back().find(0) == StringRef::npos;
+}
+
+/// getSplatValue - If this is a splat constant, meaning that all of the
+/// elements have the same value, return that value. Otherwise return NULL.
+Constant *ConstantDataVector::getSplatValue() const {
+ const char *Base = getRawDataValues().data();
+
+ // Compare elements 1+ to the 0'th element.
+ unsigned EltSize = getElementByteSize();
+ for (unsigned i = 1, e = getNumElements(); i != e; ++i)
+ if (memcmp(Base, Base+i*EltSize, EltSize))
+ return 0;
+
+ // If they're all the same, return the 0th one as a representative.
+ return getElementAsConstant(0);
+}
+
+//===----------------------------------------------------------------------===//
+// replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'. This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here. Consider
+/// a large array that uses 'From' 1000 times. By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses. Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+///
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+ SmallVector<Constant*, 8> Values;
+ LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
+ Lookup.first = cast<ArrayType>(getType());
+ Values.reserve(getNumOperands()); // Build replacement array.
+
+ // Fill values with the modified operands of the constant array. Also,
+ // compute whether this turns into an all-zeros array.
+ unsigned NumUpdated = 0;
+
+ // Keep track of whether all the values in the array are "ToC".
+ bool AllSame = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
+ }
+ Values.push_back(Val);
+ AllSame &= Val == ToC;
+ }
+
+ Constant *Replacement = 0;
+ if (AllSame && ToC->isNullValue()) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else if (AllSame && isa<UndefValue>(ToC)) {
+ Replacement = UndefValue::get(getType());
+ } else {
+ // Check to see if we have this array type already.
+ Lookup.second = makeArrayRef(Values);
+ LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+ pImpl->ArrayConstants.find(Lookup);
+
+ if (I != pImpl->ArrayConstants.map_end()) {
+ Replacement = I->first;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant array, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->ArrayConstants.remove(this);
+
+ // Update to the new value. Optimize for the case when we have a single
+ // operand that we're changing, but handle bulk updates efficiently.
+ if (NumUpdated == 1) {
+ unsigned OperandToUpdate = U - OperandList;
+ assert(getOperand(OperandToUpdate) == From &&
+ "ReplaceAllUsesWith broken!");
+ setOperand(OperandToUpdate, ToC);
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) == From)
+ setOperand(i, ToC);
+ }
+ pImpl->ArrayConstants.insert(this);
+ return;
+ }
+ }
+
+ // Otherwise, I do need to replace this with an existing value.
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ unsigned OperandToUpdate = U-OperandList;
+ assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+ SmallVector<Constant*, 8> Values;
+ LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
+ Lookup.first = cast<StructType>(getType());
+ Values.reserve(getNumOperands()); // Build replacement struct.
+
+ // Fill values with the modified operands of the constant struct. Also,
+ // compute whether this turns into an all-zeros struct.
+ bool isAllZeros = false;
+ bool isAllUndef = false;
+ if (ToC->isNullValue()) {
+ isAllZeros = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ Values.push_back(Val);
+ if (isAllZeros) isAllZeros = Val->isNullValue();
+ }
+ } else if (isa<UndefValue>(ToC)) {
+ isAllUndef = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ Values.push_back(Val);
+ if (isAllUndef) isAllUndef = isa<UndefValue>(Val);
+ }
+ } else {
+ for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+ Values.push_back(cast<Constant>(O->get()));
+ }
+ Values[OperandToUpdate] = ToC;
+
+ LLVMContextImpl *pImpl = getContext().pImpl;
+
+ Constant *Replacement = 0;
+ if (isAllZeros) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else if (isAllUndef) {
+ Replacement = UndefValue::get(getType());
+ } else {
+ // Check to see if we have this struct type already.
+ Lookup.second = makeArrayRef(Values);
+ LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+ pImpl->StructConstants.find(Lookup);
+
+ if (I != pImpl->StructConstants.map_end()) {
+ Replacement = I->first;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant struct, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->StructConstants.remove(this);
+
+ // Update to the new value.
+ setOperand(OperandToUpdate, ToC);
+ pImpl->StructConstants.insert(this);
+ return;
+ }
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+
+ SmallVector<Constant*, 8> Values;
+ Values.reserve(getNumOperands()); // Build replacement array...
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Constant *Val = getOperand(i);
+ if (Val == From) Val = cast<Constant>(To);
+ Values.push_back(Val);
+ }
+
+ Constant *Replacement = get(Values);
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+ Use *U) {
+ assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+ Constant *To = cast<Constant>(ToV);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Constant *Op = getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = getWithOperands(NewOps);
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+Instruction *ConstantExpr::getAsInstruction() {
+ SmallVector<Value*,4> ValueOperands;
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ ValueOperands.push_back(cast<Value>(I));
+
+ ArrayRef<Value*> Ops(ValueOperands);
+
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return CastInst::Create((Instruction::CastOps)getOpcode(),
+ Ops[0], getType());
+ case Instruction::Select:
+ return SelectInst::Create(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertElement:
+ return InsertElementInst::Create(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ExtractElementInst::Create(Ops[0], Ops[1]);
+ case Instruction::InsertValue:
+ return InsertValueInst::Create(Ops[0], Ops[1], getIndices());
+ case Instruction::ExtractValue:
+ return ExtractValueInst::Create(Ops[0], getIndices());
+ case Instruction::ShuffleVector:
+ return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
+
+ case Instruction::GetElementPtr:
+ if (cast<GEPOperator>(this)->isInBounds())
+ return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1));
+ else
+ return GetElementPtrInst::Create(Ops[0], Ops.slice(1));
+
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return CmpInst::Create((Instruction::OtherOps)getOpcode(),
+ getPredicate(), Ops[0], Ops[1]);
+
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ BinaryOperator *BO =
+ BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
+ Ops[0], Ops[1]);
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(SubclassOptionalData &
+ OverflowingBinaryOperator::NoUnsignedWrap);
+ BO->setHasNoSignedWrap(SubclassOptionalData &
+ OverflowingBinaryOperator::NoSignedWrap);
+ }
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(SubclassOptionalData & PossiblyExactOperator::IsExact);
+ return BO;
+ }
+}
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
new file mode 100644
index 000000000000..e9958589f53c
--- /dev/null
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -0,0 +1,774 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
+ : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+ Op<0>() = C;
+ }
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+ unsigned Flags)
+ : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ SubclassOptionalData = Flags;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ ExtractElementConstantExpr(Constant *C1, Constant *C2)
+ : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
+ Instruction::ExtractElement, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C1->getType(), Instruction::InsertElement,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(VectorType::get(
+ cast<VectorType>(C1->getType())->getElementType(),
+ cast<VectorType>(C3->getType())->getNumElements()),
+ Instruction::ShuffleVector,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ ExtractValueConstantExpr(Constant *Agg,
+ const SmallVector<unsigned, 4> &IdxList,
+ Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ }
+
+ /// Indices - These identify which value to extract.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ InsertValueConstantExpr(Constant *Agg, Constant *Val,
+ const SmallVector<unsigned, 4> &IdxList,
+ Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ Op<1>() = Val;
+ }
+
+ /// Indices - These identify the position for the insertion.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
+ Type *DestTy);
+public:
+ static GetElementPtrConstantExpr *Create(Constant *C,
+ ArrayRef<Constant*> IdxList,
+ Type *DestTy,
+ unsigned Flags) {
+ GetElementPtrConstantExpr *Result =
+ new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+ Result->SubclassOptionalData = Flags;
+ return Result;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+class CompareConstantExpr : public ConstantExpr {
+ virtual void anchor();
+ void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ unsigned short predicate;
+ CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
+ unsigned short pred, Constant* LHS, Constant* RHS)
+ : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> :
+ public FixedNumOperandTraits<UnaryConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> :
+ public FixedNumOperandTraits<BinaryConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> :
+ public FixedNumOperandTraits<SelectConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> :
+ public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> :
+ public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> :
+ public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> :
+ public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> :
+ public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> :
+ public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> :
+ public FixedNumOperandTraits<CompareConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+ ExprMapKeyType(unsigned opc,
+ ArrayRef<Constant*> ops,
+ unsigned short flags = 0,
+ unsigned short optionalflags = 0,
+ ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+ : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+ operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
+ uint8_t opcode;
+ uint8_t subclassoptionaldata;
+ uint16_t subclassdata;
+ std::vector<Constant*> operands;
+ SmallVector<unsigned, 4> indices;
+ bool operator==(const ExprMapKeyType& that) const {
+ return this->opcode == that.opcode &&
+ this->subclassdata == that.subclassdata &&
+ this->subclassoptionaldata == that.subclassoptionaldata &&
+ this->operands == that.operands &&
+ this->indices == that.indices;
+ }
+ bool operator<(const ExprMapKeyType & that) const {
+ if (this->opcode != that.opcode) return this->opcode < that.opcode;
+ if (this->operands != that.operands) return this->operands < that.operands;
+ if (this->subclassdata != that.subclassdata)
+ return this->subclassdata < that.subclassdata;
+ if (this->subclassoptionaldata != that.subclassoptionaldata)
+ return this->subclassoptionaldata < that.subclassoptionaldata;
+ if (this->indices != that.indices) return this->indices < that.indices;
+ return false;
+ }
+
+ bool operator!=(const ExprMapKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
+struct InlineAsmKeyType {
+ InlineAsmKeyType(StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack, InlineAsm::AsmDialect asmDialect)
+ : asm_string(AsmString), constraints(Constraints),
+ has_side_effects(hasSideEffects), is_align_stack(isAlignStack),
+ asm_dialect(asmDialect) {}
+ std::string asm_string;
+ std::string constraints;
+ bool has_side_effects;
+ bool is_align_stack;
+ InlineAsm::AsmDialect asm_dialect;
+ bool operator==(const InlineAsmKeyType& that) const {
+ return this->asm_string == that.asm_string &&
+ this->constraints == that.constraints &&
+ this->has_side_effects == that.has_side_effects &&
+ this->is_align_stack == that.is_align_stack &&
+ this->asm_dialect == that.asm_dialect;
+ }
+ bool operator<(const InlineAsmKeyType& that) const {
+ if (this->asm_string != that.asm_string)
+ return this->asm_string < that.asm_string;
+ if (this->constraints != that.constraints)
+ return this->constraints < that.constraints;
+ if (this->has_side_effects != that.has_side_effects)
+ return this->has_side_effects < that.has_side_effects;
+ if (this->is_align_stack != that.is_align_stack)
+ return this->is_align_stack < that.is_align_stack;
+ if (this->asm_dialect != that.asm_dialect)
+ return this->asm_dialect < that.asm_dialect;
+ return false;
+ }
+
+ bool operator!=(const InlineAsmKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ConstantUniqueMap*. This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+ static unsigned uses(const std::vector<T, Alloc>& v) {
+ return v.size();
+ }
+};
+
+template<>
+struct ConstantTraits<Constant *> {
+ static unsigned uses(Constant * const & v) {
+ return 1;
+ }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+ static ConstantClass *create(TypeClass *Ty, const ValType &V) {
+ return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+ }
+};
+
+template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator {
+ static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
+ return new(V.size()) ConstantClass(Ty, V);
+ }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+ typedef void ValType;
+ static ValType getValType(ConstantClass *C) {
+ llvm_unreachable("Unknown Constant type!");
+ }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+ static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V,
+ unsigned short pred = 0) {
+ if (Instruction::isCast(V.opcode))
+ return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+ if ((V.opcode >= Instruction::BinaryOpsBegin &&
+ V.opcode < Instruction::BinaryOpsEnd))
+ return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+ V.subclassoptionaldata);
+ if (V.opcode == Instruction::Select)
+ return new SelectConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ExtractElement)
+ return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::InsertElement)
+ return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ShuffleVector)
+ return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::InsertValue)
+ return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+ V.indices, Ty);
+ if (V.opcode == Instruction::ExtractValue)
+ return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+ if (V.opcode == Instruction::GetElementPtr) {
+ std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+ return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+ V.subclassoptionaldata);
+ }
+
+ // The compare instructions are weird. We have to encode the predicate
+ // value and it is combined with the instruction opcode by multiplying
+ // the opcode by one hundred. We must decode this to get the predicate.
+ if (V.opcode == Instruction::ICmp)
+ return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+ V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::FCmp)
+ return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+ V.operands[0], V.operands[1]);
+ llvm_unreachable("Invalid ConstantExpr!");
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+ typedef ExprMapKeyType ValType;
+ static ValType getValType(ConstantExpr *CE) {
+ std::vector<Constant*> Operands;
+ Operands.reserve(CE->getNumOperands());
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ Operands.push_back(cast<Constant>(CE->getOperand(i)));
+ return ExprMapKeyType(CE->getOpcode(), Operands,
+ CE->isCompare() ? CE->getPredicate() : 0,
+ CE->getRawSubclassOptionalData(),
+ CE->hasIndices() ?
+ CE->getIndices() : ArrayRef<unsigned>());
+ }
+};
+
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+ static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
+ return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+ Key.has_side_effects, Key.is_align_stack,
+ Key.asm_dialect);
+ }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+ typedef InlineAsmKeyType ValType;
+ static ValType getValType(InlineAsm *Asm) {
+ return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+ Asm->hasSideEffects(), Asm->isAlignStack(),
+ Asm->getDialect());
+ }
+};
+
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+ bool HasLargeKey = false /*true for arrays and structs*/ >
+class ConstantUniqueMap {
+public:
+ typedef std::pair<TypeClass*, ValType> MapKey;
+ typedef std::map<MapKey, ConstantClass *> MapTy;
+ typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+private:
+ /// Map - This is the main map from the element descriptor to the Constants.
+ /// This is the primary way we avoid creating two of the same shape
+ /// constant.
+ MapTy Map;
+
+ /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+ /// from the constants to their element in Map. This is important for
+ /// removal of constants from the array, which would otherwise have to scan
+ /// through the map with very large keys.
+ InverseMapTy InverseMap;
+
+public:
+ typename MapTy::iterator map_begin() { return Map.begin(); }
+ typename MapTy::iterator map_end() { return Map.end(); }
+
+ void freeConstants() {
+ for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+ I != E; ++I) {
+ // Asserts that use_empty().
+ delete I->second;
+ }
+ }
+
+ /// InsertOrGetItem - Return an iterator for the specified element.
+ /// If the element exists in the map, the returned iterator points to the
+ /// entry and Exists=true. If not, the iterator points to the newly
+ /// inserted entry and returns Exists=false. Newly inserted entries have
+ /// I->second == 0, and should be filled in.
+ typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+ &InsertVal,
+ bool &Exists) {
+ std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+ Exists = !IP.second;
+ return IP.first;
+ }
+
+private:
+ typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+ if (HasLargeKey) {
+ typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+ assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+ IMI->second->second == CP &&
+ "InverseMap corrupt!");
+ return IMI->second;
+ }
+
+ typename MapTy::iterator I =
+ Map.find(MapKey(static_cast<TypeClass*>(CP->getType()),
+ ConstantKeyData<ConstantClass>::getValType(CP)));
+ if (I == Map.end() || I->second != CP) {
+ // FIXME: This should not use a linear scan. If this gets to be a
+ // performance problem, someone should look at this.
+ for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+ /* empty */;
+ }
+ return I;
+ }
+
+ ConstantClass *Create(TypeClass *Ty, ValRefType V,
+ typename MapTy::iterator I) {
+ ConstantClass* Result =
+ ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+ assert(Result->getType() == Ty && "Type specified is not correct!");
+ I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.insert(std::make_pair(Result, I));
+
+ return Result;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
+ MapKey Lookup(Ty, V);
+ ConstantClass* Result = 0;
+
+ typename MapTy::iterator I = Map.find(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ Result = I->second;
+
+ if (!Result) {
+ // If no preexisting value, create one now...
+ Result = Create(Ty, V, I);
+ }
+
+ return Result;
+ }
+
+ void remove(ConstantClass *CP) {
+ typename MapTy::iterator I = FindExistingElement(CP);
+ assert(I != Map.end() && "Constant not found in constant table!");
+ assert(I->second == CP && "Didn't find correct element?");
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.erase(CP);
+
+ Map.erase(I);
+ }
+
+ /// MoveConstantToNewSlot - If we are about to change C to be the element
+ /// specified by I, update our internal data structures to reflect this
+ /// fact.
+ void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+ // First, remove the old location of the specified constant in the map.
+ typename MapTy::iterator OldI = FindExistingElement(C);
+ assert(OldI != Map.end() && "Constant not found in constant table!");
+ assert(OldI->second == C && "Didn't find correct element?");
+
+ // Remove the old entry from the map.
+ Map.erase(OldI);
+
+ // Update the inverse map so that we know that this constant is now
+ // located at descriptor I.
+ if (HasLargeKey) {
+ assert(I->second == C && "Bad inversemap entry!");
+ InverseMap[C] = I;
+ }
+ }
+
+ void dump() const {
+ DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+ }
+};
+
+// Unique map for aggregate constants
+template<class TypeClass, class ConstantClass>
+class ConstantAggrUniqueMap {
+public:
+ typedef ArrayRef<Constant*> Operands;
+ typedef std::pair<TypeClass*, Operands> LookupKey;
+private:
+ struct MapInfo {
+ typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
+ typedef DenseMapInfo<Constant*> ConstantInfo;
+ typedef DenseMapInfo<TypeClass*> TypeClassInfo;
+ static inline ConstantClass* getEmptyKey() {
+ return ConstantClassInfo::getEmptyKey();
+ }
+ static inline ConstantClass* getTombstoneKey() {
+ return ConstantClassInfo::getTombstoneKey();
+ }
+ static unsigned getHashValue(const ConstantClass *CP) {
+ SmallVector<Constant*, 8> CPOperands;
+ CPOperands.reserve(CP->getNumOperands());
+ for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+ CPOperands.push_back(CP->getOperand(I));
+ return getHashValue(LookupKey(CP->getType(), CPOperands));
+ }
+ static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
+ return LHS == RHS;
+ }
+ static unsigned getHashValue(const LookupKey &Val) {
+ return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+ Val.second.end()));
+ }
+ static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ if (LHS.first != RHS->getType()
+ || LHS.second.size() != RHS->getNumOperands())
+ return false;
+ for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
+ if (LHS.second[I] != RHS->getOperand(I))
+ return false;
+ }
+ return true;
+ }
+ };
+public:
+ typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
+
+private:
+ /// Map - This is the main map from the element descriptor to the Constants.
+ /// This is the primary way we avoid creating two of the same shape
+ /// constant.
+ MapTy Map;
+
+public:
+ typename MapTy::iterator map_begin() { return Map.begin(); }
+ typename MapTy::iterator map_end() { return Map.end(); }
+
+ void freeConstants() {
+ for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+ I != E; ++I) {
+ // Asserts that use_empty().
+ delete I->first;
+ }
+ }
+
+private:
+ typename MapTy::iterator findExistingElement(ConstantClass *CP) {
+ return Map.find(CP);
+ }
+
+ ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
+ ConstantClass* Result =
+ ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
+
+ assert(Result->getType() == Ty && "Type specified is not correct!");
+ Map[Result] = '\0';
+
+ return Result;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
+ LookupKey Lookup(Ty, V);
+ ConstantClass* Result = 0;
+
+ typename MapTy::iterator I = Map.find_as(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ Result = I->first;
+
+ if (!Result) {
+ // If no preexisting value, create one now...
+ Result = Create(Ty, V, I);
+ }
+
+ return Result;
+ }
+
+ /// Find the constant by lookup key.
+ typename MapTy::iterator find(LookupKey Lookup) {
+ return Map.find_as(Lookup);
+ }
+
+ /// Insert the constant into its proper slot.
+ void insert(ConstantClass *CP) {
+ Map[CP] = '\0';
+ }
+
+ /// Remove this constant from the map
+ void remove(ConstantClass *CP) {
+ typename MapTy::iterator I = findExistingElement(CP);
+ assert(I != Map.end() && "Constant not found in constant table!");
+ assert(I->first == CP && "Didn't find correct element?");
+ Map.erase(I);
+ }
+
+ void dump() const {
+ DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
new file mode 100644
index 000000000000..983b49c628b4
--- /dev/null
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -0,0 +1,2458 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Threading.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeCore(PassRegistry &Registry) {
+ initializeDominatorTreePass(Registry);
+ initializePrintModulePassPass(Registry);
+ initializePrintFunctionPassPass(Registry);
+ initializePrintBasicBlockPassPass(Registry);
+ initializeVerifierPass(Registry);
+ initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+ initializeCore(*unwrap(R));
+}
+
+void LLVMShutdown() {
+ llvm_shutdown();
+}
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+ free(Message);
+}
+
+
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+ return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+ return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+ delete unwrap(C);
+}
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+ unsigned SLen) {
+ return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+ return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+ return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
+ LLVMContextRef C) {
+ return wrap(new Module(ModuleID, *unwrap(C)));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+ delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+ return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+ return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setTargetTriple(Triple);
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+ unwrap(M)->dump();
+}
+
+LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
+ char **ErrorMessage) {
+ std::string error;
+ raw_fd_ostream dest(Filename, error);
+ if (!error.empty()) {
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+
+ unwrap(M)->print(dest, NULL);
+
+ if (!error.empty()) {
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+ dest.flush();
+ return false;
+}
+
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+ unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
+
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+ return wrap(&unwrap(M)->getContext());
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+ switch (unwrap(Ty)->getTypeID()) {
+ default: llvm_unreachable("Unhandled TypeID.");
+ case Type::VoidTyID:
+ return LLVMVoidTypeKind;
+ case Type::HalfTyID:
+ return LLVMHalfTypeKind;
+ case Type::FloatTyID:
+ return LLVMFloatTypeKind;
+ case Type::DoubleTyID:
+ return LLVMDoubleTypeKind;
+ case Type::X86_FP80TyID:
+ return LLVMX86_FP80TypeKind;
+ case Type::FP128TyID:
+ return LLVMFP128TypeKind;
+ case Type::PPC_FP128TyID:
+ return LLVMPPC_FP128TypeKind;
+ case Type::LabelTyID:
+ return LLVMLabelTypeKind;
+ case Type::MetadataTyID:
+ return LLVMMetadataTypeKind;
+ case Type::IntegerTyID:
+ return LLVMIntegerTypeKind;
+ case Type::FunctionTyID:
+ return LLVMFunctionTypeKind;
+ case Type::StructTyID:
+ return LLVMStructTypeKind;
+ case Type::ArrayTyID:
+ return LLVMArrayTypeKind;
+ case Type::PointerTyID:
+ return LLVMPointerTypeKind;
+ case Type::VectorTyID:
+ return LLVMVectorTypeKind;
+ case Type::X86_MMXTyID:
+ return LLVMX86_MMXTypeKind;
+ }
+}
+
+LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty)
+{
+ return unwrap(Ty)->isSized();
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+ return wrap(&unwrap(Ty)->getContext());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+ return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
+
+LLVMTypeRef LLVMInt1Type(void) {
+ return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void) {
+ return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+ return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+ return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+ return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+ return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+ return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getHalfTy(*unwrap(C));
+}
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
+
+LLVMTypeRef LLVMHalfType(void) {
+ return LLVMHalfTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFloatType(void) {
+ return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+ return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+ return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+ return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+ return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86MMXType(void) {
+ return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+ LLVMTypeRef *ParamTypes, unsigned ParamCount,
+ LLVMBool IsVarArg) {
+ ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+ return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+ return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+ FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+ for (FunctionType::param_iterator I = Ty->param_begin(),
+ E = Ty->param_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+ return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+ ElementCount, Packed);
+}
+
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+ return wrap(StructType::create(*unwrap(C), Name));
+}
+
+const char *LLVMGetStructName(LLVMTypeRef Ty)
+{
+ StructType *Type = unwrap<StructType>(Ty);
+ if (!Type->hasName())
+ return 0;
+ return Type->getName().data();
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+ unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+ StructType *Ty = unwrap<StructType>(StructTy);
+ for (StructType::element_iterator I = Ty->element_begin(),
+ E = Ty->element_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isPacked();
+}
+
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getTypeByName(Name));
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+ return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+ return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+ return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+ return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+ return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) {
+ return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+ return wrap(Type::getLabelTy(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMVoidType(void) {
+ return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+ return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+ return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+ return unwrap(Val)->getName().data();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+ unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+ unwrap(Val)->dump();
+}
+
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+ unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
+
+int LLVMHasMetadata(LLVMValueRef Inst) {
+ return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+ return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+ unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name) \
+ LLVMValueRef LLVMIsA##name(LLVMValueRef Val) { \
+ return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+ }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
+ Value *V = unwrap(Val);
+ Value::use_iterator I = V->use_begin();
+ if (I == V->use_end())
+ return 0;
+ return wrap(&(I.getUse()));
+}
+
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+ Use *Next = unwrap(U)->getNext();
+ if (Next)
+ return wrap(Next);
+ return 0;
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+ return wrap(unwrap(U)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+ return wrap(unwrap(U)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+ Value *V = unwrap(Val);
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return wrap(MD->getOperand(Index));
+ return wrap(cast<User>(V)->getOperand(Index));
+}
+
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+ unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+ Value *V = unwrap(Val);
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return MD->getNumOperands();
+ return cast<User>(V)->getNumOperands();
+}
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+ return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+ return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+ return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
+ return isa<Constant>(unwrap(Ty));
+}
+
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
+ if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+ return C->isNullValue();
+ return false;
+}
+
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
+ return isa<UndefValue>(unwrap(Val));
+}
+
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+ return
+ wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+ unsigned SLen) {
+ return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+ return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+ unsigned Count) {
+ return wrap(MDNode::get(*unwrap(C),
+ makeArrayRef(unwrap<Value>(Vals, Count), Count)));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+ return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
+const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) {
+ if (const MDString *S = dyn_cast<MDString>(unwrap(V))) {
+ *Len = S->getString().size();
+ return S->getString().data();
+ }
+ *Len = 0;
+ return 0;
+}
+
+unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V)
+{
+ return cast<MDNode>(unwrap(V))->getNumOperands();
+}
+
+void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest)
+{
+ const MDNode *N = cast<MDNode>(unwrap(V));
+ const unsigned numOperands = N->getNumOperands();
+ for (unsigned i = 0; i < numOperands; i++)
+ Dest[i] = wrap(N->getOperand(i));
+}
+
+unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name)
+{
+ if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) {
+ return N->getNumOperands();
+ }
+ return 0;
+}
+
+void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest)
+{
+ NamedMDNode *N = unwrap(M)->getNamedMetadata(name);
+ if (!N)
+ return;
+ for (unsigned i=0;i<N->getNumOperands();i++)
+ Dest[i] = wrap(N->getOperand(i));
+}
+
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+ LLVMValueRef Val)
+{
+ NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
+ if (!N)
+ return;
+ MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
+ if (Op)
+ N->addOperand(Op);
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+ LLVMBool SignExtend) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+ unsigned NumWords,
+ const uint64_t Words[]) {
+ IntegerType *Ty = unwrap<IntegerType>(IntTy);
+ return wrap(ConstantInt::get(Ty->getContext(),
+ APInt(Ty->getBitWidth(),
+ makeArrayRef(Words, NumWords))));
+}
+
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+ uint8_t Radix) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+ Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+ unsigned SLen, uint8_t Radix) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+ Radix));
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+ return wrap(ConstantFP::get(unwrap(RealTy), N));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+ return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+ unsigned SLen) {
+ return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+ return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+ return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+ unsigned Length,
+ LLVMBool DontNullTerminate) {
+ /* Inverted the sense of AddNull because ', 0)' is a
+ better mnemonic for null termination than ', 1)'. */
+ return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
+ DontNullTerminate == 0));
+}
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
+ LLVMValueRef *ConstantVals,
+ unsigned Count, LLVMBool Packed) {
+ Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+ return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count),
+ Packed != 0));
+}
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+ LLVMBool DontNullTerminate) {
+ return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+ DontNullTerminate);
+}
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+ LLVMValueRef *ConstantVals, unsigned Length) {
+ ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+ return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
+}
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+ LLVMBool Packed) {
+ return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+ Packed);
+}
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+ LLVMValueRef *ConstantVals,
+ unsigned Count) {
+ Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+ StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+ return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count)));
+}
+
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+ return wrap(ConstantVector::get(makeArrayRef(
+ unwrap<Constant>(ScalarConstantVals, Size), Size)));
+}
+
+/*-- Opcode mapping */
+
+static LLVMOpcode map_to_llvmopcode(int opcode)
+{
+ switch (opcode) {
+ default: llvm_unreachable("Unhandled Opcode.");
+#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+ }
+}
+
+static int map_from_llvmopcode(LLVMOpcode code)
+{
+ switch (code) {
+#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+ }
+ llvm_unreachable("Unhandled Opcode.");
+}
+
+/*--.. Constant expressions ................................................--*/
+
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+ return map_to_llvmopcode(unwrap<ConstantExpr>(ConstantVal)->getOpcode());
+}
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getICmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFCmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
+ return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+ IdxList));
+}
+
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices,
+ unsigned NumIndices) {
+ Constant* Val = unwrap<Constant>(ConstantVal);
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
+ return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+ LLVMBool isSigned) {
+ return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType), isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+ LLVMValueRef ConstantIfTrue,
+ LLVMValueRef ConstantIfFalse) {
+ return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+ unwrap<Constant>(ConstantIfTrue),
+ unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+ LLVMValueRef ElementValueConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(ElementValueConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+ LLVMValueRef VectorBConstant,
+ LLVMValueRef MaskConstant) {
+ return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+ unwrap<Constant>(VectorBConstant),
+ unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+ unsigned NumIdx) {
+ return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+ makeArrayRef(IdxList, NumIdx)));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+ LLVMValueRef ElementValueConstant,
+ unsigned *IdxList, unsigned NumIdx) {
+ return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+ unwrap<Constant>(ElementValueConstant),
+ makeArrayRef(IdxList, NumIdx)));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+ const char *Constraints,
+ LLVMBool HasSideEffects,
+ LLVMBool IsAlignStack) {
+ return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+ Constraints, HasSideEffects, IsAlignStack));
+}
+
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+ return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+ return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+ switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ return LLVMExternalLinkage;
+ case GlobalValue::AvailableExternallyLinkage:
+ return LLVMAvailableExternallyLinkage;
+ case GlobalValue::LinkOnceAnyLinkage:
+ return LLVMLinkOnceAnyLinkage;
+ case GlobalValue::LinkOnceODRLinkage:
+ return LLVMLinkOnceODRLinkage;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ return LLVMLinkOnceODRAutoHideLinkage;
+ case GlobalValue::WeakAnyLinkage:
+ return LLVMWeakAnyLinkage;
+ case GlobalValue::WeakODRLinkage:
+ return LLVMWeakODRLinkage;
+ case GlobalValue::AppendingLinkage:
+ return LLVMAppendingLinkage;
+ case GlobalValue::InternalLinkage:
+ return LLVMInternalLinkage;
+ case GlobalValue::PrivateLinkage:
+ return LLVMPrivateLinkage;
+ case GlobalValue::LinkerPrivateLinkage:
+ return LLVMLinkerPrivateLinkage;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ return LLVMLinkerPrivateWeakLinkage;
+ case GlobalValue::DLLImportLinkage:
+ return LLVMDLLImportLinkage;
+ case GlobalValue::DLLExportLinkage:
+ return LLVMDLLExportLinkage;
+ case GlobalValue::ExternalWeakLinkage:
+ return LLVMExternalWeakLinkage;
+ case GlobalValue::CommonLinkage:
+ return LLVMCommonLinkage;
+ }
+
+ llvm_unreachable("Invalid GlobalValue linkage!");
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+ GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+ switch (Linkage) {
+ case LLVMExternalLinkage:
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ break;
+ case LLVMAvailableExternallyLinkage:
+ GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ break;
+ case LLVMLinkOnceAnyLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ break;
+ case LLVMLinkOnceODRLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ break;
+ case LLVMLinkOnceODRAutoHideLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
+ break;
+ case LLVMWeakAnyLinkage:
+ GV->setLinkage(GlobalValue::WeakAnyLinkage);
+ break;
+ case LLVMWeakODRLinkage:
+ GV->setLinkage(GlobalValue::WeakODRLinkage);
+ break;
+ case LLVMAppendingLinkage:
+ GV->setLinkage(GlobalValue::AppendingLinkage);
+ break;
+ case LLVMInternalLinkage:
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ break;
+ case LLVMPrivateLinkage:
+ GV->setLinkage(GlobalValue::PrivateLinkage);
+ break;
+ case LLVMLinkerPrivateLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+ break;
+ case LLVMLinkerPrivateWeakLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+ break;
+ case LLVMDLLImportLinkage:
+ GV->setLinkage(GlobalValue::DLLImportLinkage);
+ break;
+ case LLVMDLLExportLinkage:
+ GV->setLinkage(GlobalValue::DLLExportLinkage);
+ break;
+ case LLVMExternalWeakLinkage:
+ GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+ break;
+ case LLVMGhostLinkage:
+ DEBUG(errs()
+ << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
+ break;
+ case LLVMCommonLinkage:
+ GV->setLinkage(GlobalValue::CommonLinkage);
+ break;
+ }
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+ unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+ return static_cast<LLVMVisibility>(
+ unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+ unwrap<GlobalValue>(Global)
+ ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+ unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name));
+}
+
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+ const char *Name,
+ unsigned AddressSpace) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name, 0,
+ GlobalVariable::NotThreadLocal, AddressSpace));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_begin();
+ if (I == Mod->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_end();
+ if (I == Mod->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (++I == GV->getParent()->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (I == GV->getParent()->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+ unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+ GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+ if ( !GV->hasInitializer() )
+ return 0;
+ return wrap(GV->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+ unwrap<GlobalVariable>(GlobalVar)
+ ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
+ unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
+ unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+ const char *Name) {
+ return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+ unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+ LLVMTypeRef FunctionTy) {
+ return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+ GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->begin();
+ if (I == Mod->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->end();
+ if (I == Mod->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (++I == Func->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (I == Func->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+ unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+ if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+ return F->getIntrinsicID();
+ return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+ return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+ return unwrap<Function>(Fn)->setCallingConv(
+ static_cast<CallingConv::ID>(CC));
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+ Function *F = unwrap<Function>(Fn);
+ if (GC)
+ F->setGC(GC);
+ else
+ F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttributeSet PAL = Func->getAttributes();
+ AttrBuilder B(PA);
+ const AttributeSet PALnew =
+ PAL.addAttributes(Func->getContext(), AttributeSet::FunctionIndex,
+ AttributeSet::get(Func->getContext(),
+ AttributeSet::FunctionIndex, B));
+ Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttributeSet PAL = Func->getAttributes();
+ AttrBuilder B(PA);
+ const AttributeSet PALnew =
+ PAL.removeAttributes(Func->getContext(), AttributeSet::FunctionIndex,
+ AttributeSet::get(Func->getContext(),
+ AttributeSet::FunctionIndex, B));
+ Func->setAttributes(PALnew);
+}
+
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttributeSet PAL = Func->getAttributes();
+ return (LLVMAttribute)PAL.Raw(AttributeSet::FunctionIndex);
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+ // This function is strictly redundant to
+ // LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+ return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::arg_iterator I = Fn->arg_begin(),
+ E = Fn->arg_end(); I != E; I++)
+ *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+ Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+ while (index --> 0)
+ AI++;
+ return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+ return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_begin();
+ if (I == Func->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_end();
+ if (I == Func->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (++I == A->getParent()->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (I == A->getParent()->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ Argument *A = unwrap<Argument>(Arg);
+ AttrBuilder B(PA);
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ Argument *A = unwrap<Argument>(Arg);
+ AttrBuilder B(PA);
+ A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+}
+
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ return (LLVMAttribute)A->getParent()->getAttributes().
+ Raw(A->getArgNo()+1);
+}
+
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+ Argument *A = unwrap<Argument>(Arg);
+ AttrBuilder B;
+ B.addAlignmentAttr(align);
+ A->addAttr(AttributeSet::get(A->getContext(),A->getArgNo() + 1, B));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+ return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
+ return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+ return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+ return wrap(unwrap(BB)->getParent());
+}
+
+LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) {
+ return wrap(unwrap(BB)->getTerminator());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+ return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+ *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+ return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->begin();
+ if (I == Func->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->end();
+ if (I == Func->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (++I == Block->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (I == Block->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+ LLVMValueRef FnRef,
+ const char *Name) {
+ return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+ return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+ LLVMBasicBlockRef BBRef,
+ const char *Name) {
+ BasicBlock *BB = unwrap(BBRef);
+ return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
+ const char *Name) {
+ return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+ unwrap(BBRef)->eraseFromParent();
+}
+
+void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) {
+ unwrap(BBRef)->removeFromParent();
+}
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+ return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->begin();
+ if (I == Block->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->end();
+ if (I == Block->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (++I == Instr->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (I == Instr->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
+ unwrap<Instruction>(Inst)->eraseFromParent();
+}
+
+LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
+ if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst)))
+ return (LLVMIntPredicate)I->getPredicate();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst)))
+ if (CE->getOpcode() == Instruction::ICmp)
+ return (LLVMIntPredicate)CE->getPredicate();
+ return (LLVMIntPredicate)0;
+}
+
+LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) {
+ if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
+ return map_to_llvmopcode(C->getOpcode());
+ return (LLVMOpcode)0;
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->getCallingConv();
+ if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->getCallingConv();
+ llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+ llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ AttrBuilder B(PA);
+ Call.setAttributes(
+ Call.getAttributes().addAttributes(Call->getContext(), index,
+ AttributeSet::get(Call->getContext(),
+ index, B)));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ AttrBuilder B(PA);
+ Call.setAttributes(Call.getAttributes()
+ .removeAttributes(Call->getContext(), index,
+ AttributeSet::get(Call->getContext(),
+ index, B)));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+ unsigned align) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ AttrBuilder B;
+ B.addAlignmentAttr(align);
+ Call.setAttributes(Call.getAttributes()
+ .addAttributes(Call->getContext(), index,
+ AttributeSet::get(Call->getContext(),
+ index, B)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
+ return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
+ unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on switch instructions (only) ............................--*/
+
+LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) {
+ return wrap(unwrap<SwitchInst>(Switch)->getDefaultDest());
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+ LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+ PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+ for (unsigned I = 0; I != Count; ++I)
+ PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+ return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+ return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+ return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+ LLVMValueRef Instr) {
+ BasicBlock *BB = unwrap(Block);
+ Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+ unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ Instruction *I = unwrap<Instruction>(Instr);
+ unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+ BasicBlock *BB = unwrap(Block);
+ unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+ unwrap(Builder)->ClearInsertionPoint();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+ const char *Name) {
+ unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+ delete unwrap(Builder);
+}
+
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+ MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+ unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getCurrentDebugLocation()
+ .getAsMDNode(unwrap(Builder)->getContext()));
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+ return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+ unsigned N) {
+ return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+ return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+ return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+ LLVMBasicBlockRef Else, unsigned NumCases) {
+ return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+ unsigned NumDests) {
+ return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+ makeArrayRef(unwrap(Args), NumArgs),
+ Name));
+}
+
+LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef PersFn, unsigned NumClauses,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty),
+ cast<Function>(unwrap(PersFn)),
+ NumClauses, Name));
+}
+
+LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) {
+ return wrap(unwrap(B)->CreateResume(unwrap(Exn)));
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+ LLVMBasicBlockRef Dest) {
+ unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+ unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
+void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) {
+ unwrap<LandingPadInst>(LandingPad)->
+ addClause(cast<Constant>(unwrap(ClauseVal)));
+}
+
+void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) {
+ unwrap<LandingPadInst>(LandingPad)->setCleanup(Val);
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+ LLVMValueRef RHS, const char *Name) {
+ return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS),
+ unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+ AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
+ 0, 0, "");
+ return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+ AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
+ unwrap(Val), 0, "");
+ return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->Insert(
+ CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ LLVMValueRef *Indices, unsigned NumIndices,
+ const char *Name) {
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ LLVMValueRef *Indices, unsigned NumIndices,
+ const char *Name) {
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ unsigned Idx, const char *Name) {
+ return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
+LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->isVolatile();
+ return cast<StoreInst>(P)->isVolatile();
+}
+
+void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->setVolatile(isVolatile);
+ return cast<StoreInst>(P)->setVolatile(isVolatile);
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val),
+ unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+ /*isSigned*/true, Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+ makeArrayRef(unwrap(Args), NumArgs),
+ Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMValueRef Then, LLVMValueRef Else,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+ Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+ LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+ Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef EltVal, LLVMValueRef Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+ unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+ LLVMValueRef V2, LLVMValueRef Mask,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+ unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ unsigned Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ LLVMValueRef EltVal, unsigned Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+ Index, Name));
+}
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+ LLVMValueRef RHS, const char *Name) {
+ return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+ return reinterpret_cast<LLVMModuleProviderRef>(M);
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+ delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+ const char *Path,
+ LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+ *OutMemBuf = wrap(MB.take());
+ return 0;
+ }
+
+ *OutMessage = strdup(ec.message().c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+ *OutMemBuf = wrap(MB.take());
+ return 0;
+ }
+
+ *OutMessage = strdup(ec.message().c_str());
+ return 1;
+}
+
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(
+ const char *InputData,
+ size_t InputDataLength,
+ const char *BufferName,
+ LLVMBool RequiresNullTerminator) {
+
+ return wrap(MemoryBuffer::getMemBuffer(
+ StringRef(InputData, InputDataLength),
+ StringRef(BufferName),
+ RequiresNullTerminator));
+}
+
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
+ const char *InputData,
+ size_t InputDataLength,
+ const char *BufferName) {
+
+ return wrap(MemoryBuffer::getMemBufferCopy(
+ StringRef(InputData, InputDataLength),
+ StringRef(BufferName)));
+}
+
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+ delete unwrap(MemBuf);
+}
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+ return wrap(PassRegistry::getPassRegistry());
+}
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+ return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+ return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+ return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+ delete unwrap(PM);
+}
+
+/*===-- Threading ------------------------------------------------------===*/
+
+LLVMBool LLVMStartMultithreaded() {
+ return llvm_start_multithreaded();
+}
+
+void LLVMStopMultithreaded() {
+ llvm_stop_multithreaded();
+}
+
+LLVMBool LLVMIsMultithreaded() {
+ return llvm_is_multithreaded();
+}
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
new file mode 100644
index 000000000000..9d6e84072912
--- /dev/null
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -0,0 +1,1101 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DIBuilder.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+ assert((Tag & LLVMDebugVersionMask) == 0 &&
+ "Tag too large for debug encoding!");
+ return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+ : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0),
+ TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
+ ValueFn(0)
+{}
+
+/// finalize - Construct any deferred debug info descriptors.
+void DIBuilder::finalize() {
+ DIArray Enums = getOrCreateArray(AllEnumTypes);
+ DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+
+ DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
+ DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
+
+ DIArray SPs = getOrCreateArray(AllSubprograms);
+ DIType(TempSubprograms).replaceAllUsesWith(SPs);
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ SmallVector<Value *, 4> Variables;
+ if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
+ for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
+ Variables.push_back(NMD->getOperand(ii));
+ NMD->eraseFromParent();
+ }
+ if (MDNode *Temp = SP.getVariablesNodes()) {
+ DIArray AV = getOrCreateArray(Variables);
+ DIType(Temp).replaceAllUsesWith(AV);
+ }
+ }
+
+ DIArray GVs = getOrCreateArray(AllGVs);
+ DIType(TempGVs).replaceAllUsesWith(GVs);
+}
+
+/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
+/// N.
+static MDNode *getNonCompileUnitScope(MDNode *N) {
+ if (DIDescriptor(N).isCompileUnit())
+ return NULL;
+ return N;
+}
+
+static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
+ StringRef Directory) {
+ assert(!Filename.empty() && "Unable to create file without name");
+ Value *Pair[] = {
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ };
+ return MDNode::get(VMContext, Pair);
+}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+ StringRef Directory, StringRef Producer,
+ bool isOptimized, StringRef Flags,
+ unsigned RunTimeVer, StringRef SplitName) {
+ assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
+ (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
+ "Invalid Language tag");
+ assert(!Filename.empty() &&
+ "Unable to create compile unit without filename");
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
+
+ TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
+
+ TempSubprograms = MDNode::getTemporary(VMContext, TElts);
+
+ TempGVs = MDNode::getTemporary(VMContext, TElts);
+
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+ createFilePathPair(VMContext, Filename, Directory),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+ MDString::get(VMContext, Producer),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ MDString::get(VMContext, Flags),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
+ TempEnumTypes,
+ TempRetainTypes,
+ TempSubprograms,
+ TempGVs,
+ MDString::get(VMContext, SplitName)
+ };
+ TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+ // Create a named metadata so that it is easier to find cu in a module.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->addOperand(TheCU);
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ createFilePathPair(VMContext, Filename, Directory)
+ };
+ return DIFile(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+ assert(!Name.empty() && "Unable to create enumerator without name");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+ };
+ return DIEnumerator(MDNode::get(VMContext, Elts));
+}
+
+/// createNullPtrType - Create C++0x nullptr type.
+DIType DIBuilder::createNullPtrType(StringRef Name) {
+ assert(!Name.empty() && "Unable to create type without name");
+ // nullptr is encoded in DIBasicType format. Line number, filename,
+ // ,size, alignment, offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+ NULL, // Filename
+ NULL, //TheCU,
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0) // Encoding
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
+/// type, e.g 'char'.
+DIBasicType
+DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+ uint64_t AlignInBits, unsigned Encoding) {
+ assert(!Name.empty() && "Unable to create type without name");
+ // Basic types are encoded in DIBasicType format. Line number, filename,
+ // offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ NULL, // File/directory name
+ NULL, //TheCU,
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+ };
+ return DIBasicType(MDNode::get(VMContext, Elts));
+}
+
+/// createQualifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+ // Qualified types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ NULL, // Filename
+ NULL, //TheCU,
+ MDString::get(VMContext, StringRef()), // Empty name.
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FromTy
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIDerivedType
+DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+ uint64_t AlignInBits, StringRef Name) {
+ // Pointer types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ NULL, // Filename
+ NULL, //TheCU,
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ PointeeTy
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+ // Pointer types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
+ NULL, // Filename
+ NULL, //TheCU,
+ NULL,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ PointeeTy,
+ Base
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createReferenceType - Create debugging information entry for a reference
+/// type.
+DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
+ assert(RTy.Verify() && "Unable to create reference type");
+ // References are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ NULL, // Filename
+ NULL, // TheCU,
+ NULL, // Name
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ RTy
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+ unsigned LineNo, DIDescriptor Context) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid typedef type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ File.getFileNode(),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ Ty
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid type!");
+ assert(FriendTy.Verify() && "Invalid friend type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ NULL,
+ Ty,
+ NULL, // Name
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FriendTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritance relationship between two types.
+DIDerivedType DIBuilder::createInheritance(
+ DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) {
+ assert(Ty.Verify() && "Unable to create inheritance");
+ // TAG_inheritance is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ NULL,
+ Ty,
+ NULL, // Name
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ BaseTy
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIDerivedType DIBuilder::createMemberType(
+ DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
+ unsigned Flags, DIType Ty) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty
+ };
+ return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createStaticMemberType - Create debugging information entry for a
+/// C++ static data member.
+DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ DIType Ty, unsigned Flags,
+ llvm::Value *Val) {
+ // TAG_member is encoded in DIDerivedType format.
+ Flags |= DIDescriptor::FlagStaticMember;
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0/*OffsetInBits*/),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ Val
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, StringRef PropertyName,
+ StringRef GetterName, StringRef SetterName,
+ unsigned PropertyAttributes) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
+ getNonCompileUnitScope(File),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ MDString::get(VMContext, PropertyName),
+ MDString::get(VMContext, GetterName),
+ MDString::get(VMContext, SetterName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, MDNode *PropertyNode) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
+ getNonCompileUnitScope(File),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ PropertyNode
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCProperty - Create debugging information entry for Objective-C
+/// property.
+DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ StringRef GetterName,
+ StringRef SetterName,
+ unsigned PropertyAttributes,
+ DIType Ty) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ MDString::get(VMContext, GetterName),
+ MDString::get(VMContext, SetterName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
+ Ty
+ };
+ return DIObjCProperty(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ Ty,
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, uint64_t Val,
+ MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ Ty,
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits,
+ unsigned Flags, DIType DerivedFrom,
+ DIArray Elements,
+ MDNode *VTableHolder,
+ MDNode *TemplateParams) {
+ assert((!Context || Context.Verify()) &&
+ "createClassType should be called with a valid Context");
+ // TAG_class_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ File.getFileNode(),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ DerivedFrom,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ VTableHolder,
+ TemplateParams
+ };
+ DICompositeType R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() && "createClassType should return a verifiable DIType");
+ return R;
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DICompositeType DIBuilder::createStructType(DIDescriptor Context,
+ StringRef Name, DIFile File,
+ unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ unsigned Flags, DIType DerivedFrom,
+ DIArray Elements,
+ unsigned RunTimeLang,
+ MDNode *VTableHolder) {
+ // TAG_structure_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ File.getFileNode(),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ DerivedFrom,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ VTableHolder,
+ NULL,
+ };
+ DICompositeType R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() && "createStructType should return a verifiable DIType");
+ return R;
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits, unsigned Flags,
+ DIArray Elements,
+ unsigned RunTimeLang) {
+ // TAG_union_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ NULL,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL
+ };
+ return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createSubroutineType - Create subroutine type.
+DICompositeType
+DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+ // TAG_subroutine_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ MDString::get(VMContext, ""),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ NULL,
+ ParameterTypes,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Constant::getNullValue(Type::getInt32Ty(VMContext))
+ };
+ return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerationType - Create debugging information entry for an
+/// enumeration.
+DICompositeType DIBuilder::createEnumerationType(
+ DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
+ DIType ClassType) {
+ // TAG_enumeration_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ClassType,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Constant::getNullValue(Type::getInt32Ty(VMContext))
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ AllEnumTypes.push_back(Node);
+ return DICompositeType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_array_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
+ NULL, //TheCU,
+ MDString::get(VMContext, ""),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Constant::getNullValue(Type::getInt32Ty(VMContext))
+ };
+ return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+
+ // A vector is an array type with the FlagVector flag applied.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
+ NULL, //TheCU,
+ MDString::get(VMContext, ""),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), DIType::FlagVector),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Constant::getNullValue(Type::getInt32Ty(VMContext))
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+ if (Ty.isArtificial())
+ return Ty;
+
+ SmallVector<Value *, 9> Elts;
+ MDNode *N = Ty;
+ assert (N && "Unexpected input DIType!");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i))
+ Elts.push_back(V);
+ else
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ }
+
+ unsigned CurFlags = Ty.getFlags();
+ CurFlags = CurFlags | DIType::FlagArtificial;
+
+ // Flags are stored at this slot.
+ Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjectPointerType - Create a new type with both the object pointer
+/// and artificial flags set.
+DIType DIBuilder::createObjectPointerType(DIType Ty) {
+ if (Ty.isObjectPointer())
+ return Ty;
+
+ SmallVector<Value *, 9> Elts;
+ MDNode *N = Ty;
+ assert (N && "Unexpected input DIType!");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i))
+ Elts.push_back(V);
+ else
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ }
+
+ unsigned CurFlags = Ty.getFlags();
+ CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial);
+
+ // Flags are stored at this slot.
+ Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+ AllRetainTypes.push_back(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+ };
+ return DIDescriptor(MDNode::get(VMContext, Elts));
+}
+
+/// createForwardDecl - Create a temporary forward-declared type that
+/// can be RAUW'd if the full type is seen.
+DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
+ DIDescriptor Scope, DIFile F,
+ unsigned Line, unsigned RuntimeLang,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits) {
+ // Create a temporary MDNode.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ F.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext),
+ DIDescriptor::FlagFwdDecl),
+ NULL,
+ DIArray(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ assert(DIType(Node).Verify() &&
+ "createForwardDecl result should be verifiable");
+ return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+ if (Elements.empty()) {
+ Value *Null = Constant::getNullValue(Type::getInt32Ty(VMContext));
+ return DIArray(MDNode::get(VMContext, Null));
+ }
+ return DIArray(MDNode::get(VMContext, Elements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range. This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Count)
+ };
+
+ return DISubrange(MDNode::get(VMContext, Elts));
+}
+
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
+ unsigned LineNumber, DIType Ty, bool isLocalToUnit,
+ Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL, // TheCU,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val,
+ DIDescriptor()
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ AllGVs.push_back(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, Value *Val) {
+ return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
+ Val);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, Value *Val, MDNode *Decl) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val,
+ DIDescriptor(Decl)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ AllGVs.push_back(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile File,
+ unsigned LineNo, DIType Ty,
+ bool AlwaysPreserve, unsigned Flags,
+ unsigned ArgNo) {
+ DIDescriptor Context(getNonCompileUnitScope(Scope));
+ assert((!Context || Context.Verify()) &&
+ "createLocalVariable should be called with a valid Context");
+ assert(Ty.Verify() &&
+ "createLocalVariable should be called with a valid type");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Constant::getNullValue(Type::getInt32Ty(VMContext))
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ if (AlwaysPreserve) {
+ // The optimizer may remove local variable. If there is an interest
+ // to preserve variable info in such situation then stash it in a
+ // named mdnode.
+ DISubprogram Fn(getDISubprogram(Scope));
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
+ FnLocals->addOperand(Node);
+ }
+ assert(DIVariable(Node).Verify() &&
+ "createLocalVariable should return a verifiable DIVariable");
+ return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile F,
+ unsigned LineNo,
+ DIType Ty, ArrayRef<Value *> Addr,
+ unsigned ArgNo) {
+ SmallVector<Value *, 15> Elts;
+ Elts.push_back(GetTagConstant(VMContext, Tag));
+ Elts.push_back(getNonCompileUnitScope(Scope)),
+ Elts.push_back(MDString::get(VMContext, Name));
+ Elts.push_back(F);
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
+ (LineNo | (ArgNo << 24))));
+ Elts.push_back(Ty);
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ Elts.append(Addr.begin(), Addr.end());
+
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile File, unsigned LineNo,
+ DIType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine,
+ unsigned Flags, bool isOptimized,
+ Function *Fn,
+ MDNode *TParams,
+ MDNode *Decl) {
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ File.getFileNode(),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ NULL,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParams,
+ Decl,
+ MDNode::getTemporary(VMContext, TElts),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ if (isDefinition)
+ AllSubprograms.push_back(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createFunction should return a valid DISubprogram");
+ return S;
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,
+ bool isLocalToUnit,
+ bool isDefinition,
+ unsigned VK, unsigned VIndex,
+ MDNode *VTableHolder,
+ unsigned Flags,
+ bool isOptimized,
+ Function *Fn,
+ MDNode *TParam) {
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ F.getFileNode(),
+ getNonCompileUnitScope(Context),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+ VTableHolder,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParam,
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ MDNode::getTemporary(VMContext, TElts),
+ // FIXME: Do we want to use different scope/lines?
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ if (isDefinition)
+ AllSubprograms.push_back(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createMethod should return a valid DISubprogram");
+ return S;
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+ };
+ DINameSpace R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() &&
+ "createNameSpace should return a verifiable DINameSpace");
+ return R;
+}
+
+/// createLexicalBlockFile - This creates a new MDNode that encapsulates
+/// an existing scope with a new filename.
+DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
+ DIFile File) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ File.getFileNode(),
+ Scope
+ };
+ DILexicalBlockFile R(MDNode::get(VMContext, Elts));
+ assert(
+ R.Verify() &&
+ "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
+ return R;
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+ unsigned Line, unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks by using unique id.
+ static unsigned int unique_id = 0;
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ File.getFileNode(),
+ getNonCompileUnitScope(Scope),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+ };
+ DILexicalBlock R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() &&
+ "createLexicalBlock should return a verifiable DILexicalBlock");
+ return R;
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+ return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+
+ // If this block already has a terminator then insert this intrinsic
+ // before the terminator.
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ return CallInst::Create(DeclareFn, Args, "", T);
+ else
+ return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+}
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
new file mode 100644
index 000000000000..ecd5216f20ac
--- /dev/null
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -0,0 +1,697 @@
+//===-- DataLayout.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines layout properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(DataLayout, "datalayout", "Data Layout", false, true)
+char DataLayout::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+ assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory.
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Type *Ty = ST->getElementType(i);
+ unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+ // Add padding if necessary to align the data element properly.
+ if ((StructSize & (TyAlign-1)) != 0)
+ StructSize = DataLayout::RoundUpAlignment(StructSize, TyAlign);
+
+ // Keep track of maximum alignment constraint.
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if ((StructSize & (StructAlignment-1)) != 0)
+ StructSize = DataLayout::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+
+ // Multiple fields can have the same offset if any of them are zero sized.
+ // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+ // at the i32 element, because it is the last element at that offset. This is
+ // the right one to return, because anything after it will have a higher
+ // offset, implying that this element is non-empty.
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// LayoutAlignElem, LayoutAlign support
+//===----------------------------------------------------------------------===//
+
+LayoutAlignElem
+LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ LayoutAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const LayoutAlignElem
+DataLayout::InvalidAlignmentElem = LayoutAlignElem::get(INVALID_ALIGN, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// PointerAlignElem, PointerAlign support
+//===----------------------------------------------------------------------===//
+
+PointerAlignElem
+PointerAlignElem::get(uint32_t addr_space, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ PointerAlignElem retval;
+ retval.AddressSpace = addr_space;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
+ return (ABIAlign == rhs.ABIAlign
+ && AddressSpace == rhs.AddressSpace
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const PointerAlignElem
+DataLayout::InvalidPointerElem = PointerAlignElem::get(~0U, 0U, 0U, 0U);
+
+//===----------------------------------------------------------------------===//
+// DataLayout Class Implementation
+//===----------------------------------------------------------------------===//
+
+void DataLayout::init(StringRef Desc) {
+ initializeDataLayoutPass(*PassRegistry::getPassRegistry());
+
+ LayoutMap = 0;
+ LittleEndian = false;
+ StackNaturalAlign = 0;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 2, 2, 16); // half
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ...
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ...
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
+ setPointerAlignment(0, 8, 8, 8);
+
+ parseSpecifier(Desc);
+}
+
+/// Checked version of split, to ensure mandatory subparts.
+static std::pair<StringRef, StringRef> split(StringRef Str, char Separator) {
+ assert(!Str.empty() && "parse error, string can't be empty here");
+ std::pair<StringRef, StringRef> Split = Str.split(Separator);
+ assert((!Split.second.empty() || Split.first == Str) &&
+ "a trailing separator is not allowed");
+ return Split;
+}
+
+/// Get an unsinged integer, including error checks.
+static unsigned getInt(StringRef R) {
+ unsigned Result;
+ bool error = R.getAsInteger(10, Result); (void)error;
+ assert(!error && "not a number, or does not fit in an unsigned int");
+ return Result;
+}
+
+/// Convert bits into bytes. Assert if not a byte width multiple.
+static unsigned inBytes(unsigned Bits) {
+ assert(Bits % 8 == 0 && "number of bits must be a byte width multiple");
+ return Bits / 8;
+}
+
+void DataLayout::parseSpecifier(StringRef Desc) {
+
+ while (!Desc.empty()) {
+
+ // Split at '-'.
+ std::pair<StringRef, StringRef> Split = split(Desc, '-');
+ Desc = Split.second;
+
+ // Split at ':'.
+ Split = split(Split.first, ':');
+
+ // Aliases used below.
+ StringRef &Tok = Split.first; // Current token.
+ StringRef &Rest = Split.second; // The rest of the string.
+
+ char Specifier = Tok.front();
+ Tok = Tok.substr(1);
+
+ switch (Specifier) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p': {
+ // Address space.
+ unsigned AddrSpace = Tok.empty() ? 0 : getInt(Tok);
+ assert(AddrSpace < 1 << 24 &&
+ "Invalid address space, must be a 24bit integer");
+
+ // Size.
+ Split = split(Rest, ':');
+ unsigned PointerMemSize = inBytes(getInt(Tok));
+
+ // ABI alignment.
+ Split = split(Rest, ':');
+ unsigned PointerABIAlign = inBytes(getInt(Tok));
+
+ // Preferred alignment.
+ unsigned PointerPrefAlign = PointerABIAlign;
+ if (!Rest.empty()) {
+ Split = split(Rest, ':');
+ PointerPrefAlign = inBytes(getInt(Tok));
+ }
+
+ setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
+ PointerMemSize);
+ break;
+ }
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a':
+ case 's': {
+ AlignTypeEnum AlignType;
+ switch (Specifier) {
+ default:
+ case 'i': AlignType = INTEGER_ALIGN; break;
+ case 'v': AlignType = VECTOR_ALIGN; break;
+ case 'f': AlignType = FLOAT_ALIGN; break;
+ case 'a': AlignType = AGGREGATE_ALIGN; break;
+ case 's': AlignType = STACK_ALIGN; break;
+ }
+
+ // Bit size.
+ unsigned Size = Tok.empty() ? 0 : getInt(Tok);
+
+ // ABI alignment.
+ Split = split(Rest, ':');
+ unsigned ABIAlign = inBytes(getInt(Tok));
+
+ // Preferred alignment.
+ unsigned PrefAlign = ABIAlign;
+ if (!Rest.empty()) {
+ Split = split(Rest, ':');
+ PrefAlign = inBytes(getInt(Tok));
+ }
+
+ setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+
+ break;
+ }
+ case 'n': // Native integer types.
+ for (;;) {
+ unsigned Width = getInt(Tok);
+ assert(Width != 0 && "width must be non-zero");
+ LegalIntWidths.push_back(Width);
+ if (Rest.empty())
+ break;
+ Split = split(Rest, ':');
+ }
+ break;
+ case 'S': { // Stack natural alignment.
+ StackNaturalAlign = inBytes(getInt(Tok));
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown specifier in datalayout string");
+ break;
+ }
+ }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+DataLayout::DataLayout() : ImmutablePass(ID) {
+ report_fatal_error("Bad DataLayout ctor used. "
+ "Tool did not specify a DataLayout to use?");
+}
+
+DataLayout::DataLayout(const Module *M)
+ : ImmutablePass(ID) {
+ init(M->getDataLayout());
+}
+
+void
+DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
+ assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == (unsigned)align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(LayoutAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+void
+DataLayout::setPointerAlignment(uint32_t addr_space, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ DenseMap<unsigned,PointerAlignElem>::iterator val = Pointers.find(addr_space);
+ if (val == Pointers.end()) {
+ Pointers[addr_space] = PointerAlignElem::get(addr_space,
+ abi_align, pref_align, bit_width);
+ } else {
+ val->second.ABIAlign = abi_align;
+ val->second.PrefAlign = pref_align;
+ val->second.TypeBitWidth = bit_width;
+ }
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the layout wants for the specified datatype.
+unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo,
+ Type *Ty) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == (unsigned)AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ if (BestMatchIdx == -1) {
+ // If we didn't find an integer alignment, fall back on most conservative.
+ if (AlignType == INTEGER_ALIGN) {
+ BestMatchIdx = LargestInt;
+ } else {
+ assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+ // By default, use natural alignment for vector types. This is consistent
+ // with what clang and llvm-gcc do.
+ unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ Align *= cast<VectorType>(Ty)->getNumElements();
+ // If the alignment is not a power of 2, round up to the next power of 2.
+ // This happens for non-power-of-2 length vectors.
+ if (Align & (Align-1))
+ Align = NextPowerOf2(Align);
+ return Align;
+ }
+ }
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap {
+ typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
+ LayoutInfoTy LayoutInfo;
+
+public:
+ virtual ~StructLayoutMap() {
+ // Remove any layouts.
+ for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+ I != E; ++I) {
+ StructLayout *Value = I->second;
+ Value->~StructLayout();
+ free(Value);
+ }
+ }
+
+ StructLayout *&operator[](StructType *STy) {
+ return LayoutInfo[STy];
+ }
+
+ // for debugging...
+ virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+DataLayout::~DataLayout() {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+bool DataLayout::doFinalization(Module &M) {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+ LayoutMap = 0;
+ return false;
+}
+
+const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
+ if (!LayoutMap)
+ LayoutMap = new StructLayoutMap();
+
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ StructLayout *&SL = (*STM)[Ty];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+
+ return L;
+}
+
+std::string DataLayout::getStringRepresentation() const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << (LittleEndian ? "e" : "E");
+ SmallVector<unsigned, 8> addrSpaces;
+ // Lets get all of the known address spaces and sort them
+ // into increasing order so that we can emit the string
+ // in a cleaner format.
+ for (DenseMap<unsigned, PointerAlignElem>::const_iterator
+ pib = Pointers.begin(), pie = Pointers.end();
+ pib != pie; ++pib) {
+ addrSpaces.push_back(pib->first);
+ }
+ std::sort(addrSpaces.begin(), addrSpaces.end());
+ for (SmallVector<unsigned, 8>::iterator asb = addrSpaces.begin(),
+ ase = addrSpaces.end(); asb != ase; ++asb) {
+ const PointerAlignElem &PI = Pointers.find(*asb)->second;
+ OS << "-p";
+ if (PI.AddressSpace) {
+ OS << PI.AddressSpace;
+ }
+ OS << ":" << PI.TypeBitWidth*8 << ':' << PI.ABIAlign*8
+ << ':' << PI.PrefAlign*8;
+ }
+ OS << "-S" << StackNaturalAlign*8;
+
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ const LayoutAlignElem &AI = Alignments[i];
+ OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+ << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+ }
+
+ if (!LegalIntWidths.empty()) {
+ OS << "-n" << (unsigned)LegalIntWidths[0];
+
+ for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+ OS << ':' << (unsigned)LegalIntWidths[i];
+ }
+ return OS.str();
+}
+
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ // Early escape for the non-numeric types.
+ case Type::LabelTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment(0)
+ : getPointerPrefAlignment(0));
+ case Type::PointerTyID: {
+ unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
+ return (abi_or_pref
+ ? getPointerABIAlignment(AS)
+ : getPointerPrefAlignment(AS));
+ }
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ return std::max(Align, Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::HalfTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ // PPC_FP128TyID and FP128TyID have different data contents, but the
+ // same size and alignment, so they look the same here.
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ case Type::X86_FP80TyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::X86_MMXTyID:
+ case Type::VectorTyID:
+ AlignType = VECTOR_ALIGN;
+ break;
+ default:
+ llvm_unreachable("Bad type for getAlignment!!!");
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+ abi_or_pref, Ty);
+}
+
+unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+ return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned DataLayout::getCallFrameTypeAlignment(Type *Ty) const {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+ if (Alignments[i].AlignType == STACK_ALIGN)
+ return Alignments[i].ABIAlign;
+
+ return getABITypeAlignment(Ty);
+}
+
+unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an integer type with size at least as big as that
+/// of a pointer in the given address space.
+IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
+ unsigned AddressSpace) const {
+ return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
+}
+
+/// getIntPtrType - Return an integer (vector of integer) type with size at
+/// least as big as that of a pointer of the given pointer (vector of pointer)
+/// type.
+Type *DataLayout::getIntPtrType(Type *Ty) const {
+ assert(Ty->isPtrOrPtrVectorTy() &&
+ "Expected a pointer or pointer vector type.");
+ unsigned NumBits = getTypeSizeInBits(Ty->getScalarType());
+ IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+ return VectorType::get(IntTy, VecTy->getNumElements());
+ return IntTy;
+}
+
+Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const {
+ for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+ if (Width <= LegalIntWidths[i])
+ return Type::getIntNTy(C, LegalIntWidths[i]);
+ return 0;
+}
+
+uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
+ ArrayRef<Value *> Indices) const {
+ Type *Ty = ptrTy;
+ assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices);
+ for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
+ ++CurIDX, ++TI) {
+ if (StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() ==
+ Type::getInt32Ty(ptrTy->getContext()) &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global. This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const {
+ Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPrefTypeAlignment(ElemType);
+ unsigned GVAlignment = GV->getAlignment();
+ if (GVAlignment >= Alignment) {
+ Alignment = GVAlignment;
+ } else if (GVAlignment != 0) {
+ Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+ }
+
+ if (GV->hasInitializer() && GVAlignment == 0) {
+ if (Alignment < 16) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSizeInBits(ElemType) > 128)
+ Alignment = 16; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
new file mode 100644
index 000000000000..0ffe99d70454
--- /dev/null
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -0,0 +1,1209 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+bool DIDescriptor::Verify() const {
+ return DbgNode &&
+ (DIDerivedType(DbgNode).Verify() ||
+ DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
+ DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
+ DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
+ DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
+ DILexicalBlock(DbgNode).Verify() ||
+ DILexicalBlockFile(DbgNode).Verify() ||
+ DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
+ DIObjCProperty(DbgNode).Verify() ||
+ DITemplateTypeParameter(DbgNode).Verify() ||
+ DITemplateValueParameter(DbgNode).Verify());
+}
+
+static Value *getField(const MDNode *DbgNode, unsigned Elt) {
+ if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
+ return 0;
+ return DbgNode->getOperand(Elt);
+}
+
+static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+ if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)))
+ return R;
+ return 0;
+}
+
+static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
+ if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
+ return MDS->getString();
+ return StringRef();
+}
+
+StringRef DIDescriptor::getStringField(unsigned Elt) const {
+ return ::getStringField(DbgNode, Elt);
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ if (ConstantInt *CI
+ = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+ return CI->getZExtValue();
+
+ return 0;
+}
+
+int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ if (ConstantInt *CI
+ = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+ return CI->getSExtValue();
+
+ return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return DIDescriptor();
+
+ if (Elt < DbgNode->getNumOperands())
+ return
+ DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+ return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
+ if (DbgNode == 0)
+ return;
+
+ if (Elt < DbgNode->getNumOperands()) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ Node->replaceOperandWith(Elt, F);
+ }
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+ return DbgNode->getNumOperands()-8;
+}
+
+/// getInlinedAt - If this variable is inlined then return inline location.
+MDNode *DIVariable::getInlinedAt() const {
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
+}
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_unspecified_type:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_ptr_to_member_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_member:
+ case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_friend:
+ return true;
+ default:
+ // CompositeTypes are currently modelled as DerivedTypes.
+ return isCompositeType();
+ }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_class_type:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_auto_variable:
+ case dwarf::DW_TAG_arg_variable:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+ return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+ return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+ getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+ return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_namespace:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlockFile - Return true if the specified descriptor is a
+/// lexical block with an extra file.
+bool DIDescriptor::isLexicalBlockFile() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+ (DbgNode->getNumOperands() == 3);
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+ (DbgNode->getNumOperands() > 3);
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property.
+bool DIDescriptor::isObjCProperty() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
+}
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+ if (!N) return;
+ if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+ DbgNode = 0;
+ }
+}
+
+unsigned DIArray::getNumElements() const {
+ if (!DbgNode)
+ return 0;
+ return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+bool DIType::isUnsignedDIType() {
+ DIDerivedType DTy(DbgNode);
+ if (DTy.Verify())
+ return DTy.getTypeDerivedFrom().isUnsignedDIType();
+
+ DIBasicType BTy(DbgNode);
+ if (BTy.Verify()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_boolean)
+ return true;
+ }
+ return false;
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+ if (!isCompileUnit())
+ return false;
+ StringRef N = getFilename();
+ if (N.empty())
+ return false;
+ // It is possible that directory and produce string is empty.
+ return DbgNode->getNumOperands() == 12;
+}
+
+/// Verify - Verify that an ObjC property is well formed.
+bool DIObjCProperty::Verify() const {
+ if (!isObjCProperty())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify()) return false;
+
+ // Don't worry about the rest of the strings for now.
+ return DbgNode->getNumOperands() == 8;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+ if (!isType())
+ return false;
+ if (getContext() && !getContext().Verify())
+ return false;
+ unsigned Tag = getTag();
+ if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+ Tag != dwarf::DW_TAG_ptr_to_member_type &&
+ Tag != dwarf::DW_TAG_reference_type &&
+ Tag != dwarf::DW_TAG_rvalue_reference_type &&
+ Tag != dwarf::DW_TAG_restrict_type &&
+ Tag != dwarf::DW_TAG_array_type &&
+ Tag != dwarf::DW_TAG_enumeration_type &&
+ Tag != dwarf::DW_TAG_subroutine_type &&
+ getFilename().empty())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+ return isBasicType() && DbgNode->getNumOperands() == 10;
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+ return isDerivedType() && DbgNode->getNumOperands() >= 10 &&
+ DbgNode->getNumOperands() <= 14;
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+ if (!isCompositeType())
+ return false;
+ if (getContext() && !getContext().Verify())
+ return false;
+
+ return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+ if (!isSubprogram())
+ return false;
+
+ if (getContext() && !getContext().Verify())
+ return false;
+
+ DICompositeType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+ return DbgNode->getNumOperands() == 20;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+ if (!isGlobalVariable())
+ return false;
+
+ if (getDisplayName().empty())
+ return false;
+
+ if (getContext() && !getContext().Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ if (!getGlobal() && !getConstant())
+ return false;
+
+ return DbgNode->getNumOperands() == 13;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+ if (!isVariable())
+ return false;
+
+ if (getContext() && !getContext().Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ return DbgNode->getNumOperands() >= 8;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+ if (!isNameSpace())
+ return false;
+ return DbgNode->getNumOperands() == 5;
+}
+
+/// \brief Retrieve the MDNode for the directory/file pair.
+MDNode *DIFile::getFileNode() const {
+ return const_cast<MDNode*>(getNodeField(DbgNode, 1));
+}
+
+/// \brief Verify that the file descriptor is well formed.
+bool DIFile::Verify() const {
+ return isFile() && DbgNode->getNumOperands() == 2;
+}
+
+/// \brief Verify that the enumerator descriptor is well formed.
+bool DIEnumerator::Verify() const {
+ return isEnumerator() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the subrange descriptor is well formed.
+bool DISubrange::Verify() const {
+ return isSubrange() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the lexical block descriptor is well formed.
+bool DILexicalBlock::Verify() const {
+ return isLexicalBlock() && DbgNode->getNumOperands() == 6;
+}
+
+/// \brief Verify that the file-scoped lexical block descriptor is well formed.
+bool DILexicalBlockFile::Verify() const {
+ return isLexicalBlockFile() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the template type parameter descriptor is well formed.
+bool DITemplateTypeParameter::Verify() const {
+ return isTemplateTypeParameter() && DbgNode->getNumOperands() == 7;
+}
+
+/// \brief Verify that the template value parameter descriptor is well formed.
+bool DITemplateValueParameter::Verify() const {
+ return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+ unsigned Tag = getTag();
+
+ if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+ Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type)
+ return getSizeInBits();
+
+ DIType BaseType = getTypeDerivedFrom();
+
+ // If this type is not derived from any type then take conservative approach.
+ if (!BaseType.isValid())
+ return getSizeInBits();
+
+ // If this is a derived type, go ahead and get the base type, unless it's a
+ // reference then it's just the size of the field. Pointer types have no need
+ // of this since they're a different type of qualification on the type.
+ if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
+ BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
+ return getSizeInBits();
+
+ if (BaseType.isDerivedType())
+ return DIDerivedType(BaseType).getOriginalTypeSize();
+
+ return BaseType.getSizeInBits();
+}
+
+/// getObjCProperty - Return property node, if this ivar is associated with one.
+MDNode *DIDerivedType::getObjCProperty() const {
+ if (DbgNode->getNumOperands() <= 10)
+ return NULL;
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
+}
+
+/// \brief Set the array of member DITypes.
+void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
+ assert((!TParams || DbgNode->getNumOperands() == 14) &&
+ "If you're setting the template parameters this should include a slot "
+ "for that!");
+ TrackingVH<MDNode> N(*this);
+ N->replaceOperandWith(10, Elements);
+ if (TParams)
+ N->replaceOperandWith(13, TParams);
+ DbgNode = N;
+}
+
+/// \brief Set the containing type.
+void DICompositeType::setContainingType(DICompositeType ContainingType) {
+ TrackingVH<MDNode> N(*this);
+ N->replaceOperandWith(12, ContainingType);
+ DbgNode = N;
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+ assert(CurFn && "Invalid function");
+ if (!getContext().isSubprogram())
+ return false;
+ // This variable is not inlined function argument if its scope
+ // does not describe current function.
+ return !DISubprogram(getContext()).describes(CurFn);
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+ assert(F && "Invalid function");
+ if (F == getFunction())
+ return true;
+ StringRef Name = getLinkageName();
+ if (Name.empty())
+ Name = getName();
+ if (F->getName() == Name)
+ return true;
+ return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+ assert (DbgNode && "Invalid subprogram descriptor!");
+ if (DbgNode->getNumOperands() == 15)
+ return getUnsignedField(14);
+ return 0;
+}
+
+MDNode *DISubprogram::getVariablesNodes() const {
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
+ return NULL;
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18));
+}
+
+DIArray DISubprogram::getVariables() const {
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
+ return DIArray();
+ if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)))
+ return DIArray(T);
+ return DIArray();
+}
+
+StringRef DIScope::getFilename() const {
+ if (!DbgNode)
+ return StringRef();
+ return ::getStringField(getNodeField(DbgNode, 1), 0);
+}
+
+StringRef DIScope::getDirectory() const {
+ if (!DbgNode)
+ return StringRef();
+ return ::getStringField(getNodeField(DbgNode, 1), 1);
+}
+
+DIArray DICompileUnit::getEnumTypes() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
+ return DIArray(N);
+ return DIArray();
+}
+
+DIArray DICompileUnit::getRetainedTypes() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
+ return DIArray(N);
+ return DIArray();
+}
+
+DIArray DICompileUnit::getSubprograms() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
+ return DIArray(N);
+ return DIArray();
+}
+
+
+DIArray DICompileUnit::getGlobalVariables() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+ return DIArray(N);
+ return DIArray();
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+ bool isObjCLike = false;
+ for (size_t i = 0, e = Str.size(); i < e; ++i) {
+ char C = Str[i];
+ if (C == '[')
+ isObjCLike = true;
+
+ if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+ C == '+' || C == '(' || C == ')'))
+ Out.push_back('.');
+ else
+ Out.push_back(C);
+ }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
+ SmallString<32> Name = StringRef("llvm.dbg.lv.");
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ else
+ FName = Fn.getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ fixupObjcLikeName(FName, Name);
+ return M.getNamedMetadata(Name.str());
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
+ SmallString<32> Name = StringRef("llvm.dbg.lv.");
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ else
+ FName = Fn.getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ fixupObjcLikeName(FName, Name);
+
+ return M.getOrInsertNamedMetadata(Name.str());
+}
+
+/// createInlinedVariable - Create a new inlined variable based on current
+/// variable.
+/// @param DV Current Variable.
+/// @param InlinedScope Location at current variable is inlined.
+DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
+ LLVMContext &VMContext) {
+ SmallVector<Value *, 16> Elts;
+ // Insert inlined scope as 7th element.
+ for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+ i == 7 ? Elts.push_back(InlinedScope) :
+ Elts.push_back(DV->getOperand(i));
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// cleanseInlinedVariable - Remove inlined scope from the variable.
+DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
+ SmallVector<Value *, 16> Elts;
+ // Insert inlined scope as 7th element.
+ for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+ i == 7 ?
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))):
+ Elts.push_back(DV->getOperand(i));
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+ DIDescriptor D(Scope);
+ if (D.isSubprogram())
+ return DISubprogram(Scope);
+
+ if (D.isLexicalBlockFile())
+ return getDISubprogram(DILexicalBlockFile(Scope).getContext());
+
+ if (D.isLexicalBlock())
+ return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+ return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+ if (T.isCompositeType())
+ return DICompositeType(T);
+
+ if (T.isDerivedType())
+ return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+ return DICompositeType();
+}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+bool llvm::isSubprogramContext(const MDNode *Context) {
+ if (!Context)
+ return false;
+ DIDescriptor D(Context);
+ if (D.isSubprogram())
+ return true;
+ if (D.isType())
+ return isSubprogramContext(DIType(Context).getContext());
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(const Module &M) {
+ if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ addCompileUnit(CU);
+ DIArray GVs = CU.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ if (addGlobalVariable(DIG))
+ processType(DIG.getType());
+ }
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ processSubprogram(DISubprogram(SPs.getElement(i)));
+ DIArray EnumTypes = CU.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ processType(DIType(EnumTypes.getElement(i)));
+ DIArray RetainedTypes = CU.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ processType(DIType(RetainedTypes.getElement(i)));
+ // FIXME: We really shouldn't be bailing out after visiting just one CU
+ return;
+ }
+ }
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+ if (!Loc.Verify()) return;
+ DIDescriptor S(Loc.getScope());
+ if (S.isCompileUnit())
+ addCompileUnit(DICompileUnit(S));
+ else if (S.isSubprogram())
+ processSubprogram(DISubprogram(S));
+ else if (S.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(S));
+ else if (S.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF = DILexicalBlockFile(S);
+ processLexicalBlock(DILexicalBlock(DBF.getScope()));
+ }
+ processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+ if (!addType(DT))
+ return;
+ if (DT.isCompositeType()) {
+ DICompositeType DCT(DT);
+ processType(DCT.getTypeDerivedFrom());
+ DIArray DA = DCT.getTypeArray();
+ for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+ DIDescriptor D = DA.getElement(i);
+ if (D.isType())
+ processType(DIType(D));
+ else if (D.isSubprogram())
+ processSubprogram(DISubprogram(D));
+ }
+ } else if (DT.isDerivedType()) {
+ DIDerivedType DDT(DT);
+ processType(DDT.getTypeDerivedFrom());
+ }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+ DIScope Context = LB.getContext();
+ if (Context.isLexicalBlock())
+ return processLexicalBlock(DILexicalBlock(Context));
+ else if (Context.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF = DILexicalBlockFile(Context);
+ return processLexicalBlock(DILexicalBlock(DBF.getScope()));
+ }
+ else
+ return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+ if (!addSubprogram(SP))
+ return;
+ processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
+ MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+ if (!N) return;
+
+ DIDescriptor DV(N);
+ if (!DV.isVariable())
+ return;
+
+ if (!NodesSeen.insert(DV))
+ return;
+ processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+ if (!DT.isValid())
+ return false;
+
+ if (!NodesSeen.insert(DT))
+ return false;
+
+ TYs.push_back(DT);
+ return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+ if (!CU.Verify())
+ return false;
+
+ if (!NodesSeen.insert(CU))
+ return false;
+
+ CUs.push_back(CU);
+ return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+ if (!DIDescriptor(DIG).isGlobalVariable())
+ return false;
+
+ if (!NodesSeen.insert(DIG))
+ return false;
+
+ GVs.push_back(DIG);
+ return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+ if (!DIDescriptor(SP).isSubprogram())
+ return false;
+
+ if (!NodesSeen.insert(SP))
+ return false;
+
+ SPs.push_back(SP);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+ if (!DbgNode) return;
+
+ if (const char *Tag = dwarf::TagString(getTag()))
+ OS << "[ " << Tag << " ]";
+
+ if (this->isSubrange()) {
+ DISubrange(DbgNode).printInternal(OS);
+ } else if (this->isCompileUnit()) {
+ DICompileUnit(DbgNode).printInternal(OS);
+ } else if (this->isFile()) {
+ DIFile(DbgNode).printInternal(OS);
+ } else if (this->isEnumerator()) {
+ DIEnumerator(DbgNode).printInternal(OS);
+ } else if (this->isBasicType()) {
+ DIType(DbgNode).printInternal(OS);
+ } else if (this->isDerivedType()) {
+ DIDerivedType(DbgNode).printInternal(OS);
+ } else if (this->isCompositeType()) {
+ DICompositeType(DbgNode).printInternal(OS);
+ } else if (this->isSubprogram()) {
+ DISubprogram(DbgNode).printInternal(OS);
+ } else if (this->isGlobalVariable()) {
+ DIGlobalVariable(DbgNode).printInternal(OS);
+ } else if (this->isVariable()) {
+ DIVariable(DbgNode).printInternal(OS);
+ } else if (this->isObjCProperty()) {
+ DIObjCProperty(DbgNode).printInternal(OS);
+ } else if (this->isNameSpace()) {
+ DINameSpace(DbgNode).printInternal(OS);
+ } else if (this->isScope()) {
+ DIScope(DbgNode).printInternal(OS);
+ }
+}
+
+void DISubrange::printInternal(raw_ostream &OS) const {
+ int64_t Count = getCount();
+ if (Count != -1)
+ OS << " [" << getLo() << ", " << Count - 1 << ']';
+ else
+ OS << " [unbounded]";
+}
+
+void DIScope::printInternal(raw_ostream &OS) const {
+ OS << " [" << getDirectory() << "/" << getFilename() << ']';
+}
+
+void DICompileUnit::printInternal(raw_ostream &OS) const {
+ DIScope::printInternal(OS);
+ if (const char *Lang = dwarf::LanguageString(getLanguage()))
+ OS << " [" << Lang << ']';
+}
+
+void DIEnumerator::printInternal(raw_ostream &OS) const {
+ OS << " [" << getName() << " :: " << getEnumValue() << ']';
+}
+
+void DIType::printInternal(raw_ostream &OS) const {
+ if (!DbgNode) return;
+
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "]";
+
+ // TODO: Print context?
+
+ OS << " [line " << getLineNumber()
+ << ", size " << getSizeInBits()
+ << ", align " << getAlignInBits()
+ << ", offset " << getOffsetInBits();
+ if (isBasicType())
+ if (const char *Enc =
+ dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
+ OS << ", enc " << Enc;
+ OS << "]";
+
+ if (isPrivate())
+ OS << " [private]";
+ else if (isProtected())
+ OS << " [protected]";
+
+ if (isArtificial())
+ OS << " [artificial]";
+
+ if (isForwardDecl())
+ OS << " [fwd]";
+ if (isVector())
+ OS << " [vector]";
+ if (isStaticMember())
+ OS << " [static]";
+}
+
+void DIDerivedType::printInternal(raw_ostream &OS) const {
+ DIType::printInternal(OS);
+ OS << " [from " << getTypeDerivedFrom().getName() << ']';
+}
+
+void DICompositeType::printInternal(raw_ostream &OS) const {
+ DIType::printInternal(OS);
+ DIArray A = getTypeArray();
+ OS << " [" << A.getNumElements() << " elements]";
+}
+
+void DINameSpace::printInternal(raw_ostream &OS) const {
+ StringRef Name = getName();
+ if (!Name.empty())
+ OS << " [" << Name << ']';
+
+ OS << " [line " << getLineNumber() << ']';
+}
+
+void DISubprogram::printInternal(raw_ostream &OS) const {
+ // TODO : Print context
+ OS << " [line " << getLineNumber() << ']';
+
+ if (isLocalToUnit())
+ OS << " [local]";
+
+ if (isDefinition())
+ OS << " [def]";
+
+ if (getScopeLineNumber() != getLineNumber())
+ OS << " [scope " << getScopeLineNumber() << "]";
+
+ if (isPrivate())
+ OS << " [private]";
+ else if (isProtected())
+ OS << " [protected]";
+
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << ']';
+}
+
+void DIGlobalVariable::printInternal(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << ']';
+
+ OS << " [line " << getLineNumber() << ']';
+
+ // TODO : Print context
+
+ if (isLocalToUnit())
+ OS << " [local]";
+
+ if (isDefinition())
+ OS << " [def]";
+}
+
+void DIVariable::printInternal(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << ']';
+
+ OS << " [line " << getLineNumber() << ']';
+}
+
+void DIObjCProperty::printInternal(raw_ostream &OS) const {
+ StringRef Name = getObjCPropertyName();
+ if (!Name.empty())
+ OS << " [" << Name << ']';
+
+ OS << " [line " << getLineNumber()
+ << ", properties " << getUnsignedField(6) << ']';
+}
+
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+ const LLVMContext &Ctx) {
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+ CommentOS << " ]";
+ }
+ }
+}
+
+void DIVariable::printExtendedName(raw_ostream &OS) const {
+ const LLVMContext &Ctx = DbgNode->getContext();
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << Res << "," << getLineNumber();
+ if (MDNode *InlinedAt = getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " @[";
+ printDebugLoc(InlinedAtDL, OS, Ctx);
+ OS << "]";
+ }
+ }
+}
diff --git a/contrib/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm/lib/IR/DebugLoc.cpp
new file mode 100644
index 000000000000..c57b5a305303
--- /dev/null
+++ b/contrib/llvm/lib/IR/DebugLoc.cpp
@@ -0,0 +1,315 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/DebugInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) return 0;
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified. Zero is invalid.
+ if (ScopeIdx >= 0) return 0;
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+ const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) {
+ Scope = IA = 0;
+ return;
+ }
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ IA = 0;
+ return;
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+ IA = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+ MDNode *Scope, MDNode *InlinedAt) {
+ DebugLoc Result;
+
+ // If no scope is available, this is an unknown location.
+ if (Scope == 0) return Result;
+
+ // Saturate line and col to "unknown".
+ if (Col > 255) Col = 0;
+ if (Line >= (1 << 24)) Line = 0;
+ Result.LineCol = Line | (Col << 24);
+
+ LLVMContext &Ctx = Scope->getContext();
+
+ // If there is no inlined-at location, use the ScopeRecords array.
+ if (InlinedAt == 0)
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+ else
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+ InlinedAt, 0);
+
+ return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+ if (isUnknown()) return 0;
+
+ MDNode *Scope, *IA;
+ getScopeAndInlinedAt(Scope, IA, Ctx);
+ assert(Scope && "If scope is null, this should be isUnknown()");
+
+ LLVMContext &Ctx2 = Scope->getContext();
+ Type *Int32 = Type::getInt32Ty(Ctx2);
+ Value *Elts[] = {
+ ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+ Scope, IA
+ };
+ return MDNode::get(Ctx2, Elts);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+ DILocation Loc(N);
+ MDNode *Scope = Loc.getScope();
+ if (Scope == 0) return DebugLoc();
+ return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope,
+ Loc.getOrigLocation());
+}
+
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+ DILexicalBlock LexBlock(N);
+ MDNode *Scope = LexBlock.getContext();
+ if (Scope == 0) return DebugLoc();
+ return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+ if (!isUnknown()) {
+ dbgs() << getLine();
+ if (getCol() != 0)
+ dbgs() << ',' << getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ dbgs() << " @ ";
+ InlinedAtDL.dump(Ctx);
+ } else
+ dbgs() << "\n";
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// DenseMap specialization
+//===----------------------------------------------------------------------===//
+
+unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
+ return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx));
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+ int ExistingIdx) {
+ // If we already have an entry for this scope, return it.
+ int &Idx = ScopeRecordIdx[Scope];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Otherwise add a new entry.
+
+ // Start out ScopeRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeRecords.empty())
+ ScopeRecords.reserve(128);
+
+ // Index is biased by 1 for index.
+ Idx = ScopeRecords.size()+1;
+ ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+ return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+ int ExistingIdx) {
+ // If we already have an entry, return it.
+ int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeInlinedAtRecords.empty())
+ ScopeInlinedAtRecords.reserve(128);
+
+ // Index is biased by 1 and negated.
+ Idx = -ScopeInlinedAtRecords.size()-1;
+ ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+ DebugRecVH(IA, this, Idx)));
+ return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+ // If this is a non-canonical reference, just drop the value to null, we know
+ // it doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(0);
+ return;
+ }
+
+ MDNode *Cur = get();
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(Cur);
+ // Reset this VH to null and we're done.
+ setValPtr(0);
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to null. Drop both 'Idx' values to null to indicate that
+ // we're in non-canonical form now.
+ setValPtr(0);
+ Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+ // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+ // the mdnode got deleted.
+ MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+ if (NewVal == 0) return deleted();
+
+ // If this is a non-canonical reference, just change it, we know it already
+ // doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(NewVa);
+ return;
+ }
+
+ MDNode *OldVal = get();
+ assert(OldVal != NewVa && "Node replaced with self?");
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(OldVal);
+ setValPtr(NewVal);
+
+ int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewEntry != Idx)
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to the new value.
+ setValPtr(NewVal);
+
+ int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+ Entry.second.get(), Idx);
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewIdx != Idx) {
+ std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+ Entry.first.Idx = Entry.second.Idx = 0;
+ }
+}
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
new file mode 100644
index 000000000000..a1160cdc83b1
--- /dev/null
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -0,0 +1,302 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators. Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed. Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyDomInfo = true;
+#else
+static bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+ cl::desc("Verify dominator info (time consuming)"));
+
+bool BasicBlockEdge::isSingleEdge() const {
+ const TerminatorInst *TI = Start->getTerminator();
+ unsigned NumEdgesToEnd = 0;
+ for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+ if (TI->getSuccessor(i) == End)
+ ++NumEdgesToEnd;
+ if (NumEdgesToEnd >= 2)
+ return false;
+ }
+ assert(NumEdgesToEnd == 1);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information. Implementation details
+// can be found in DominatorInternals.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+INITIALIZE_PASS(DominatorTree, "domtree",
+ "Dominator Tree Construction", true, true)
+
+bool DominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+void DominatorTree::verifyAnalysis() const {
+ if (!VerifyDomInfo) return;
+
+ Function &F = *getRoot()->getParent();
+
+ DominatorTree OtherDT;
+ OtherDT.getBase().recalculate(F);
+ if (compare(OtherDT)) {
+ errs() << "DominatorTree is not up to date!\nComputed:\n";
+ print(errs());
+ errs() << "\nActual:\n";
+ OtherDT.print(errs());
+ abort();
+ }
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+ DT->print(OS);
+}
+
+// dominates - Return true if Def dominates a use in User. This performs
+// the special checks necessary if Def and User are in the same basic block.
+// Note that Def doesn't dominate a use in Def itself!
+bool DominatorTree::dominates(const Instruction *Def,
+ const Instruction *User) const {
+ const BasicBlock *UseBB = User->getParent();
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Any unreachable use is dominated, even if Def == User.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ // An instruction doesn't dominate a use in itself.
+ if (Def == User)
+ return false;
+
+ // The value defined by an invoke dominates an instruction only if
+ // it dominates every instruction in UseBB.
+ // A PHI is dominated only if the instruction dominates every possible use
+ // in the UseBB.
+ if (isa<InvokeInst>(Def) || isa<PHINode>(User))
+ return dominates(Def, UseBB);
+
+ if (DefBB != UseBB)
+ return dominates(DefBB, UseBB);
+
+ // Loop through the basic block until we find Def or User.
+ BasicBlock::const_iterator I = DefBB->begin();
+ for (; &*I != Def && &*I != User; ++I)
+ /*empty*/;
+
+ return &*I == Def;
+}
+
+// true if Def would dominate a use in any instruction in UseBB.
+// note that dominates(Def, Def->getParent()) is false.
+bool DominatorTree::dominates(const Instruction *Def,
+ const BasicBlock *UseBB) const {
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Any unreachable use is dominated, even if DefBB == UseBB.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ if (DefBB == UseBB)
+ return false;
+
+ const InvokeInst *II = dyn_cast<InvokeInst>(Def);
+ if (!II)
+ return dominates(DefBB, UseBB);
+
+ // Invoke results are only usable in the normal destination, not in the
+ // exceptional destination.
+ BasicBlock *NormalDest = II->getNormalDest();
+ BasicBlockEdge E(DefBB, NormalDest);
+ return dominates(E, UseBB);
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+ const BasicBlock *UseBB) const {
+ // Assert that we have a single edge. We could handle them by simply
+ // returning false, but since isSingleEdge is linear on the number of
+ // edges, the callers can normally handle them more efficiently.
+ assert(BBE.isSingleEdge());
+
+ // If the BB the edge ends in doesn't dominate the use BB, then the
+ // edge also doesn't.
+ const BasicBlock *Start = BBE.getStart();
+ const BasicBlock *End = BBE.getEnd();
+ if (!dominates(End, UseBB))
+ return false;
+
+ // Simple case: if the end BB has a single predecessor, the fact that it
+ // dominates the use block implies that the edge also does.
+ if (End->getSinglePredecessor())
+ return true;
+
+ // The normal edge from the invoke is critical. Conceptually, what we would
+ // like to do is split it and check if the new block dominates the use.
+ // With X being the new block, the graph would look like:
+ //
+ // DefBB
+ // /\ . .
+ // / \ . .
+ // / \ . .
+ // / \ | |
+ // A X B C
+ // | \ | /
+ // . \|/
+ // . NormalDest
+ // .
+ //
+ // Given the definition of dominance, NormalDest is dominated by X iff X
+ // dominates all of NormalDest's predecessors (X, B, C in the example). X
+ // trivially dominates itself, so we only have to find if it dominates the
+ // other predecessors. Since the only way out of X is via NormalDest, X can
+ // only properly dominate a node if NormalDest dominates that node too.
+ for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
+ PI != E; ++PI) {
+ const BasicBlock *BB = *PI;
+ if (BB == Start)
+ continue;
+
+ if (!dominates(End, BB))
+ return false;
+ }
+ return true;
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+ const Use &U) const {
+ // Assert that we have a single edge. We could handle them by simply
+ // returning false, but since isSingleEdge is linear on the number of
+ // edges, the callers can normally handle them more efficiently.
+ assert(BBE.isSingleEdge());
+
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ // A PHI in the end of the edge is dominated by it.
+ PHINode *PN = dyn_cast<PHINode>(UserInst);
+ if (PN && PN->getParent() == BBE.getEnd() &&
+ PN->getIncomingBlock(U) == BBE.getStart())
+ return true;
+
+ // Otherwise use the edge-dominates-block query, which
+ // handles the crazy critical edge cases properly.
+ const BasicBlock *UseBB;
+ if (PN)
+ UseBB = PN->getIncomingBlock(U);
+ else
+ UseBB = UserInst->getParent();
+ return dominates(BBE, UseBB);
+}
+
+bool DominatorTree::dominates(const Instruction *Def,
+ const Use &U) const {
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Determine the block in which the use happens. PHI nodes use
+ // their operands on edges; simulate this by thinking of the use
+ // happening at the end of the predecessor block.
+ const BasicBlock *UseBB;
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UseBB = PN->getIncomingBlock(U);
+ else
+ UseBB = UserInst->getParent();
+
+ // Any unreachable use is dominated, even if Def == User.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ // Invoke instructions define their return values on the edges
+ // to their normal successors, so we have to handle them specially.
+ // Among other things, this means they don't dominate anything in
+ // their own block, except possibly a phi, so we don't need to
+ // walk the block in any case.
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
+ BasicBlock *NormalDest = II->getNormalDest();
+ BasicBlockEdge E(DefBB, NormalDest);
+ return dominates(E, U);
+ }
+
+ // If the def and use are in different blocks, do a simple CFG dominator
+ // tree query.
+ if (DefBB != UseBB)
+ return dominates(DefBB, UseBB);
+
+ // Ok, def and use are in the same block. If the def is an invoke, it
+ // doesn't dominate anything in the block. If it's a PHI, it dominates
+ // everything in the block.
+ if (isa<PHINode>(UserInst))
+ return true;
+
+ // Otherwise, just loop through the basic block until we find Def or User.
+ BasicBlock::const_iterator I = DefBB->begin();
+ for (; &*I != Def && &*I != UserInst; ++I)
+ /*empty*/;
+
+ return &*I != UserInst;
+}
+
+bool DominatorTree::isReachableFromEntry(const Use &U) const {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+
+ // ConstantExprs aren't really reachable from the entry block, but they
+ // don't need to be treated like unreachable code either.
+ if (!I) return true;
+
+ // PHI nodes use their operands on their incoming edges.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ return isReachableFromEntry(PN->getIncomingBlock(U));
+
+ // Everything else uses their operands in their own block.
+ return isReachableFromEntry(I->getParent());
+}
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
new file mode 100644
index 000000000000..1e72b90a13ce
--- /dev/null
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -0,0 +1,707 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "LLVMContextImpl.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/Threading.h"
+using namespace llvm;
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Argument, Function>;
+template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+void Argument::anchor() { }
+
+Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
+ : Value(Ty, Value::ArgumentVal) {
+ Parent = 0;
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (Par)
+ Par->getArgumentList().push_back(this);
+ setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function. For example in "void foo(int a, float b)" a is 0 and b is 1.
+unsigned Argument::getArgNo() const {
+ const Function *F = getParent();
+ assert(F && "Argument is not in a function");
+
+ Function::const_arg_iterator AI = F->arg_begin();
+ unsigned ArgIdx = 0;
+ for (; &*AI != this; ++AI)
+ ++ArgIdx;
+
+ return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::ByVal);
+}
+
+unsigned Argument::getParamAlignment() const {
+ assert(getType()->isPointerTy() && "Only pointers have alignments");
+ return getParent()->getParamAlignment(getArgNo()+1);
+
+}
+
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::Nest);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ if (this != getParent()->arg_begin())
+ return false; // StructRet param must be first param
+ return getParent()->getAttributes().
+ hasAttribute(1, Attribute::StructRet);
+}
+
+/// addAttr - Add attributes to an argument.
+void Argument::addAttr(AttributeSet AS) {
+ assert(AS.getNumSlots() <= 1 &&
+ "Trying to add more than one attribute set to an argument!");
+ AttrBuilder B(AS, AS.getSlotIndex(0));
+ getParent()->addAttributes(getArgNo() + 1,
+ AttributeSet::get(Parent->getContext(),
+ getArgNo() + 1, B));
+}
+
+/// removeAttr - Remove attributes from an argument.
+void Argument::removeAttr(AttributeSet AS) {
+ assert(AS.getNumSlots() <= 1 &&
+ "Trying to remove more than one attribute set from an argument!");
+ AttrBuilder B(AS, AS.getSlotIndex(0));
+ getParent()->removeAttributes(getArgNo() + 1,
+ AttributeSet::get(Parent->getContext(),
+ getArgNo() + 1, B));
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+LLVMContext &Function::getContext() const {
+ return getType()->getContext();
+}
+
+FunctionType *Function::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+ return getFunctionType()->isVarArg();
+}
+
+Type *Function::getReturnType() const {
+ return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+ getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+ getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(FunctionType *Ty, LinkageTypes Linkage,
+ const Twine &name, Module *ParentModule)
+ : GlobalValue(PointerType::getUnqual(Ty),
+ Value::FunctionVal, 0, 0, Linkage, name) {
+ assert(FunctionType::isValidReturnType(getReturnType()) &&
+ "invalid return type");
+ SymTab = new ValueSymbolTable();
+
+ // If the function has arguments, mark them as lazily built.
+ if (Ty->getNumParams())
+ setValueSubclassData(1); // Set the "has lazy arguments" bit.
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (ParentModule)
+ ParentModule->getFunctionList().push_back(this);
+
+ // Ensure intrinsics have the right parameter attributes.
+ if (unsigned IID = getIntrinsicID())
+ setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+ dropAllReferences(); // After this it is safe to delete instructions.
+
+ // Delete all of the method arguments and unlink from symbol table...
+ ArgumentList.clear();
+ delete SymTab;
+
+ // Remove the function from the on-the-side GC table.
+ clearGC();
+
+ // Remove the intrinsicID from the Cache.
+ if (getValueName() && isIntrinsic())
+ getContext().pImpl->IntrinsicIDCache.erase(this);
+}
+
+void Function::BuildLazyArguments() const {
+ // Create the arguments vector, all arguments start out unnamed.
+ FunctionType *FT = getFunctionType();
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ assert(!FT->getParamType(i)->isVoidTy() &&
+ "Cannot have void typed arguments!");
+ ArgumentList.push_back(new Argument(FT->getParamType(i)));
+ }
+
+ // Clear the lazy arguments bit.
+ unsigned SDC = getSubclassDataFromValue();
+ const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
+}
+
+size_t Function::arg_size() const {
+ return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+ return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining. This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero. Then everything is deleted for real. Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+
+ // Delete all basic blocks. They are now unused, except possibly by
+ // blockaddresses, but BasicBlock's destructor takes care of those.
+ while (!BasicBlocks.empty())
+ BasicBlocks.begin()->eraseFromParent();
+}
+
+void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
+ AttributeSet PAL = getAttributes();
+ PAL = PAL.addAttribute(getContext(), i, attr);
+ setAttributes(PAL);
+}
+
+void Function::addAttributes(unsigned i, AttributeSet attrs) {
+ AttributeSet PAL = getAttributes();
+ PAL = PAL.addAttributes(getContext(), i, attrs);
+ setAttributes(PAL);
+}
+
+void Function::removeAttributes(unsigned i, AttributeSet attrs) {
+ AttributeSet PAL = getAttributes();
+ PAL = PAL.removeAttributes(getContext(), i, attrs);
+ setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
+
+bool Function::hasGC() const {
+ sys::SmartScopedReader<true> Reader(*GCLock);
+ return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+ assert(hasGC() && "Function has no collector");
+ sys::SmartScopedReader<true> Reader(*GCLock);
+ return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+ sys::SmartScopedWriter<true> Writer(*GCLock);
+ if (!GCNamePool)
+ GCNamePool = new StringPool();
+ if (!GCNames)
+ GCNames = new DenseMap<const Function*,PooledStringPtr>();
+ (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+ sys::SmartScopedWriter<true> Writer(*GCLock);
+ if (GCNames) {
+ GCNames->erase(this);
+ if (GCNames->empty()) {
+ delete GCNames;
+ GCNames = 0;
+ if (GCNamePool->empty()) {
+ delete GCNamePool;
+ GCNamePool = 0;
+ }
+ }
+ }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<Function>(Src) && "Expected a Function!");
+ GlobalValue::copyAttributesFrom(Src);
+ const Function *SrcF = cast<Function>(Src);
+ setCallingConv(SrcF->getCallingConv());
+ setAttributes(SrcF->getAttributes());
+ if (SrcF->hasGC())
+ setGC(SrcF->getGC());
+ else
+ clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null. This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not. The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h. Results are cached in the LLVM context, subsequent
+/// requests for the same ID return results much faster from the cache.
+///
+unsigned Function::getIntrinsicID() const {
+ const ValueName *ValName = this->getValueName();
+ if (!ValName || !isIntrinsic())
+ return 0;
+
+ LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
+ getContext().pImpl->IntrinsicIDCache;
+ if (!IntrinsicIDCache.count(this)) {
+ unsigned Id = lookupIntrinsicID();
+ IntrinsicIDCache[this]=Id;
+ return Id;
+ }
+ return IntrinsicIDCache[this];
+}
+
+/// This private method does the actual lookup of an intrinsic ID when the query
+/// could not be answered from the cache.
+unsigned Function::lookupIntrinsicID() const {
+ const ValueName *ValName = this->getValueName();
+ unsigned Len = ValName->getKeyLength();
+ const char *Name = ValName->getKeyData();
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+
+ return 0;
+}
+
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
+ assert(id < num_intrinsics && "Invalid intrinsic ID!");
+ static const char * const Table[] = {
+ "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+ if (Tys.empty())
+ return Table[id];
+ std::string Result(Table[id]);
+ for (unsigned i = 0; i < Tys.size(); ++i) {
+ if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+ Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
+ EVT::getEVT(PTyp->getElementType()).getEVTString();
+ }
+ else if (Tys[i])
+ Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+ }
+ return Result;
+}
+
+
+/// IIT_Info - These are enumerators that describe the entries returned by the
+/// getIntrinsicInfoTableEntries function.
+///
+/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
+enum IIT_Info {
+ // Common values should be encoded with 0-15.
+ IIT_Done = 0,
+ IIT_I1 = 1,
+ IIT_I8 = 2,
+ IIT_I16 = 3,
+ IIT_I32 = 4,
+ IIT_I64 = 5,
+ IIT_F16 = 6,
+ IIT_F32 = 7,
+ IIT_F64 = 8,
+ IIT_V2 = 9,
+ IIT_V4 = 10,
+ IIT_V8 = 11,
+ IIT_V16 = 12,
+ IIT_V32 = 13,
+ IIT_PTR = 14,
+ IIT_ARG = 15,
+
+ // Values from 16+ are only encodable with the inefficient encoding.
+ IIT_MMX = 16,
+ IIT_METADATA = 17,
+ IIT_EMPTYSTRUCT = 18,
+ IIT_STRUCT2 = 19,
+ IIT_STRUCT3 = 20,
+ IIT_STRUCT4 = 21,
+ IIT_STRUCT5 = 22,
+ IIT_EXTEND_VEC_ARG = 23,
+ IIT_TRUNC_VEC_ARG = 24,
+ IIT_ANYPTR = 25
+};
+
+
+static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
+ SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
+ IIT_Info Info = IIT_Info(Infos[NextElt++]);
+ unsigned StructElts = 2;
+ using namespace Intrinsic;
+
+ switch (Info) {
+ case IIT_Done:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0));
+ return;
+ case IIT_MMX:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
+ return;
+ case IIT_METADATA:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
+ return;
+ case IIT_F16:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Half, 0));
+ return;
+ case IIT_F32:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0));
+ return;
+ case IIT_F64:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0));
+ return;
+ case IIT_I1:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
+ return;
+ case IIT_I8:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
+ return;
+ case IIT_I16:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16));
+ return;
+ case IIT_I32:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32));
+ return;
+ case IIT_I64:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
+ return;
+ case IIT_V2:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_V4:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 4));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_V8:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 8));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_V16:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 16));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_V32:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_PTR:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_ANYPTR: { // [ANYPTR addrspace, subtype]
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer,
+ Infos[NextElt++]));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ }
+ case IIT_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
+ return;
+ }
+ case IIT_EXTEND_VEC_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendVecArgument,
+ ArgInfo));
+ return;
+ }
+ case IIT_TRUNC_VEC_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncVecArgument,
+ ArgInfo));
+ return;
+ }
+ case IIT_EMPTYSTRUCT:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
+ return;
+ case IIT_STRUCT5: ++StructElts; // FALL THROUGH.
+ case IIT_STRUCT4: ++StructElts; // FALL THROUGH.
+ case IIT_STRUCT3: ++StructElts; // FALL THROUGH.
+ case IIT_STRUCT2: {
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts));
+
+ for (unsigned i = 0; i != StructElts; ++i)
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ }
+ }
+ llvm_unreachable("unhandled");
+}
+
+
+#define GET_INTRINSIC_GENERATOR_GLOBAL
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR_GLOBAL
+
+void Intrinsic::getIntrinsicInfoTableEntries(ID id,
+ SmallVectorImpl<IITDescriptor> &T){
+ // Check to see if the intrinsic's type was expressible by the table.
+ unsigned TableVal = IIT_Table[id-1];
+
+ // Decode the TableVal into an array of IITValues.
+ SmallVector<unsigned char, 8> IITValues;
+ ArrayRef<unsigned char> IITEntries;
+ unsigned NextElt = 0;
+ if ((TableVal >> 31) != 0) {
+ // This is an offset into the IIT_LongEncodingTable.
+ IITEntries = IIT_LongEncodingTable;
+
+ // Strip sentinel bit.
+ NextElt = (TableVal << 1) >> 1;
+ } else {
+ // Decode the TableVal into an array of IITValues. If the entry was encoded
+ // into a single word in the table itself, decode it now.
+ do {
+ IITValues.push_back(TableVal & 0xF);
+ TableVal >>= 4;
+ } while (TableVal);
+
+ IITEntries = IITValues;
+ NextElt = 0;
+ }
+
+ // Okay, decode the table into the output vector of IITDescriptors.
+ DecodeIITType(NextElt, IITEntries, T);
+ while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0)
+ DecodeIITType(NextElt, IITEntries, T);
+}
+
+
+static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
+ ArrayRef<Type*> Tys, LLVMContext &Context) {
+ using namespace Intrinsic;
+ IITDescriptor D = Infos.front();
+ Infos = Infos.slice(1);
+
+ switch (D.Kind) {
+ case IITDescriptor::Void: return Type::getVoidTy(Context);
+ case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
+ case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
+ case IITDescriptor::Half: return Type::getHalfTy(Context);
+ case IITDescriptor::Float: return Type::getFloatTy(Context);
+ case IITDescriptor::Double: return Type::getDoubleTy(Context);
+
+ case IITDescriptor::Integer:
+ return IntegerType::get(Context, D.Integer_Width);
+ case IITDescriptor::Vector:
+ return VectorType::get(DecodeFixedType(Infos, Tys, Context),D.Vector_Width);
+ case IITDescriptor::Pointer:
+ return PointerType::get(DecodeFixedType(Infos, Tys, Context),
+ D.Pointer_AddressSpace);
+ case IITDescriptor::Struct: {
+ Type *Elts[5];
+ assert(D.Struct_NumElements <= 5 && "Can't handle this yet");
+ for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
+ Elts[i] = DecodeFixedType(Infos, Tys, Context);
+ return StructType::get(Context, ArrayRef<Type*>(Elts,D.Struct_NumElements));
+ }
+
+ case IITDescriptor::Argument:
+ return Tys[D.getArgumentNumber()];
+ case IITDescriptor::ExtendVecArgument:
+ return VectorType::getExtendedElementVectorType(cast<VectorType>(
+ Tys[D.getArgumentNumber()]));
+
+ case IITDescriptor::TruncVecArgument:
+ return VectorType::getTruncatedElementVectorType(cast<VectorType>(
+ Tys[D.getArgumentNumber()]));
+ }
+ llvm_unreachable("unhandled");
+}
+
+
+
+FunctionType *Intrinsic::getType(LLVMContext &Context,
+ ID id, ArrayRef<Type*> Tys) {
+ SmallVector<IITDescriptor, 8> Table;
+ getIntrinsicInfoTableEntries(id, Table);
+
+ ArrayRef<IITDescriptor> TableRef = Table;
+ Type *ResultTy = DecodeFixedType(TableRef, Tys, Context);
+
+ SmallVector<Type*, 8> ArgTys;
+ while (!TableRef.empty())
+ ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context));
+
+ return FunctionType::get(ResultTy, ArgTys, false);
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
+ // There can never be multiple globals with the same name of different types,
+ // because intrinsics must be a specific type.
+ return
+ cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+ getType(M->getContext(), id, Tys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+ for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const User *U = *I;
+ if (isa<BlockAddress>(U))
+ continue;
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return PutOffender ? (*PutOffender = U, true) : true;
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(I))
+ return PutOffender ? (*PutOffender = U, true) : true;
+ }
+ return false;
+}
+
+bool Function::isDefTriviallyDead() const {
+ // Check the linkage
+ if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
+ !hasAvailableExternallyLinkage())
+ return false;
+
+ // Check if the function is used by anything other than a blockaddress.
+ for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+ if (!isa<BlockAddress>(*I))
+ return false;
+
+ return true;
+}
+
+/// callsFunctionThatReturnsTwice - Return true if the function has a call to
+/// setjmp or other function that gcc recognizes as "returning twice".
+bool Function::callsFunctionThatReturnsTwice() const {
+ for (const_inst_iterator
+ I = inst_begin(this), E = inst_end(this); I != E; ++I) {
+ const CallInst* callInst = dyn_cast<CallInst>(&*I);
+ if (!callInst)
+ continue;
+ if (callInst->canReturnTwice())
+ return true;
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp
new file mode 100644
index 000000000000..ea2f0a6d556f
--- /dev/null
+++ b/contrib/llvm/lib/IR/GCOV.cpp
@@ -0,0 +1,283 @@
+//===- GCOVr.cpp - LLVM coverage tool -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// GCOV implements the interface to read and write coverage files that use
+// 'gcov' format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GCOV.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// GCOVFile implementation.
+
+/// ~GCOVFile - Delete GCOVFile and its content.
+GCOVFile::~GCOVFile() {
+ DeleteContainerPointers(Functions);
+}
+
+/// isGCDAFile - Return true if Format identifies a .gcda file.
+static bool isGCDAFile(GCOV::GCOVFormat Format) {
+ return Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404;
+}
+
+/// isGCNOFile - Return true if Format identifies a .gcno file.
+static bool isGCNOFile(GCOV::GCOVFormat Format) {
+ return Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404;
+}
+
+/// read - Read GCOV buffer.
+bool GCOVFile::read(GCOVBuffer &Buffer) {
+ GCOV::GCOVFormat Format = Buffer.readGCOVFormat();
+ if (Format == GCOV::InvalidGCOV)
+ return false;
+
+ unsigned i = 0;
+ while (1) {
+ GCOVFunction *GFun = NULL;
+ if (isGCDAFile(Format)) {
+ // Use existing function while reading .gcda file.
+ assert(i < Functions.size() && ".gcda data does not match .gcno data");
+ GFun = Functions[i];
+ } else if (isGCNOFile(Format)){
+ GFun = new GCOVFunction();
+ Functions.push_back(GFun);
+ }
+ if (!GFun || !GFun->read(Buffer, Format))
+ break;
+ ++i;
+ }
+ return true;
+}
+
+/// dump - Dump GCOVFile content on standard out for debugging purposes.
+void GCOVFile::dump() {
+ for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(),
+ E = Functions.end(); I != E; ++I)
+ (*I)->dump();
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVFile::collectLineCounts(FileInfo &FI) {
+ for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(),
+ E = Functions.end(); I != E; ++I)
+ (*I)->collectLineCounts(FI);
+ FI.print();
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVFunction implementation.
+
+/// ~GCOVFunction - Delete GCOVFunction and its content.
+GCOVFunction::~GCOVFunction() {
+ DeleteContainerPointers(Blocks);
+}
+
+/// read - Read a aunction from the buffer. Return false if buffer cursor
+/// does not point to a function tag.
+bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) {
+ if (!Buff.readFunctionTag())
+ return false;
+
+ Buff.readInt(); // Function header length
+ Ident = Buff.readInt();
+ Buff.readInt(); // Checksum #1
+ if (Format != GCOV::GCNO_402)
+ Buff.readInt(); // Checksum #2
+
+ Name = Buff.readString();
+ if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404)
+ Filename = Buff.readString();
+
+ if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) {
+ Buff.readArcTag();
+ uint32_t Count = Buff.readInt() / 2;
+ for (unsigned i = 0, e = Count; i != e; ++i) {
+ Blocks[i]->addCount(Buff.readInt64());
+ }
+ return true;
+ }
+
+ LineNumber = Buff.readInt();
+
+ // read blocks.
+ bool BlockTagFound = Buff.readBlockTag();
+ (void)BlockTagFound;
+ assert(BlockTagFound && "Block Tag not found!");
+ uint32_t BlockCount = Buff.readInt();
+ for (int i = 0, e = BlockCount; i != e; ++i) {
+ Buff.readInt(); // Block flags;
+ Blocks.push_back(new GCOVBlock(i));
+ }
+
+ // read edges.
+ while (Buff.readEdgeTag()) {
+ uint32_t EdgeCount = (Buff.readInt() - 1) / 2;
+ uint32_t BlockNo = Buff.readInt();
+ assert(BlockNo < BlockCount && "Unexpected Block number!");
+ for (int i = 0, e = EdgeCount; i != e; ++i) {
+ Blocks[BlockNo]->addEdge(Buff.readInt());
+ Buff.readInt(); // Edge flag
+ }
+ }
+
+ // read line table.
+ while (Buff.readLineTag()) {
+ uint32_t LineTableLength = Buff.readInt();
+ uint32_t Size = Buff.getCursor() + LineTableLength*4;
+ uint32_t BlockNo = Buff.readInt();
+ assert(BlockNo < BlockCount && "Unexpected Block number!");
+ GCOVBlock *Block = Blocks[BlockNo];
+ Buff.readInt(); // flag
+ while (Buff.getCursor() != (Size - 4)) {
+ StringRef Filename = Buff.readString();
+ if (Buff.getCursor() == (Size - 4)) break;
+ while (uint32_t L = Buff.readInt())
+ Block->addLine(Filename, L);
+ }
+ Buff.readInt(); // flag
+ }
+ return true;
+}
+
+/// dump - Dump GCOVFunction content on standard out for debugging purposes.
+void GCOVFunction::dump() {
+ outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
+ for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I)
+ (*I)->dump();
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVFunction::collectLineCounts(FileInfo &FI) {
+ for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I)
+ (*I)->collectLineCounts(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVBlock implementation.
+
+/// ~GCOVBlock - Delete GCOVBlock and its content.
+GCOVBlock::~GCOVBlock() {
+ Edges.clear();
+ DeleteContainerSeconds(Lines);
+}
+
+void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) {
+ GCOVLines *&LinesForFile = Lines[Filename];
+ if (!LinesForFile)
+ LinesForFile = new GCOVLines();
+ LinesForFile->add(LineNo);
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVBlock::collectLineCounts(FileInfo &FI) {
+ for (StringMap<GCOVLines *>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ I->second->collectLineCounts(FI, I->first(), Counter);
+}
+
+/// dump - Dump GCOVBlock content on standard out for debugging purposes.
+void GCOVBlock::dump() {
+ outs() << "Block : " << Number << " Counter : " << Counter << "\n";
+ if (!Edges.empty()) {
+ outs() << "\tEdges : ";
+ for (SmallVector<uint32_t, 16>::iterator I = Edges.begin(), E = Edges.end();
+ I != E; ++I)
+ outs() << (*I) << ",";
+ outs() << "\n";
+ }
+ if (!Lines.empty()) {
+ outs() << "\tLines : ";
+ for (StringMap<GCOVLines *>::iterator LI = Lines.begin(),
+ LE = Lines.end(); LI != LE; ++LI) {
+ outs() << LI->first() << " -> ";
+ LI->second->dump();
+ outs() << "\n";
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVLines implementation.
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename,
+ uint32_t Count) {
+ for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ FI.addLineCount(Filename, *I, Count);
+}
+
+/// dump - Dump GCOVLines content on standard out for debugging purposes.
+void GCOVLines::dump() {
+ for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ outs() << (*I) << ",";
+}
+
+//===----------------------------------------------------------------------===//
+// FileInfo implementation.
+
+/// addLineCount - Add line count for the given line number in a file.
+void FileInfo::addLineCount(StringRef Filename, uint32_t Line, uint32_t Count) {
+ if (LineInfo.find(Filename) == LineInfo.end()) {
+ OwningPtr<MemoryBuffer> Buff;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+ errs() << Filename << ": " << ec.message() << "\n";
+ return;
+ }
+ StringRef AllLines = Buff.take()->getBuffer();
+ LineCounts L(AllLines.count('\n')+2);
+ L[Line-1] = Count;
+ LineInfo[Filename] = L;
+ return;
+ }
+ LineCounts &L = LineInfo[Filename];
+ L[Line-1] = Count;
+}
+
+/// print - Print source files with collected line count information.
+void FileInfo::print() {
+ for (StringMap<LineCounts>::iterator I = LineInfo.begin(), E = LineInfo.end();
+ I != E; ++I) {
+ StringRef Filename = I->first();
+ outs() << Filename << "\n";
+ LineCounts &L = LineInfo[Filename];
+ OwningPtr<MemoryBuffer> Buff;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+ errs() << Filename << ": " << ec.message() << "\n";
+ return;
+ }
+ StringRef AllLines = Buff.take()->getBuffer();
+ for (unsigned i = 0, e = L.size(); i != e; ++i) {
+ if (L[i])
+ outs() << L[i] << ":\t";
+ else
+ outs() << " :\t";
+ std::pair<StringRef, StringRef> P = AllLines.split('\n');
+ if (AllLines != P.first)
+ outs() << P.first;
+ outs() << "\n";
+ AllLines = P.second;
+ }
+ }
+}
+
+
diff --git a/contrib/llvm/lib/IR/GVMaterializer.cpp b/contrib/llvm/lib/IR/GVMaterializer.cpp
new file mode 100644
index 000000000000..f77a9c908d54
--- /dev/null
+++ b/contrib/llvm/lib/IR/GVMaterializer.cpp
@@ -0,0 +1,18 @@
+//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for materializing
+// GlobalValues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GVMaterializer.h"
+using namespace llvm;
+
+GVMaterializer::~GVMaterializer() {}
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
new file mode 100644
index 000000000000..6d547f3edf3f
--- /dev/null
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -0,0 +1,269 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the IR
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+bool GlobalValue::isMaterializable() const {
+ return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+ return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+ return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+ getParent()->Dematerialize(this);
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+ llvm_unreachable("You can't GV->destroyConstant()!");
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+ setAlignment(Src->getAlignment());
+ setSection(Src->getSection());
+ setVisibility(Src->getVisibility());
+ setUnnamedAddr(Src->hasUnnamedAddr());
+}
+
+void GlobalValue::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ Alignment = Log2_32(Align) + 1;
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool GlobalValue::isDeclaration() const {
+ // Globals are definitions if they have an initializer.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+ return GV->getNumOperands() == 0;
+
+ // Functions are definitions if they have a body.
+ if (const Function *F = dyn_cast<Function>(this))
+ return F->empty();
+
+ // Aliases are always definitions.
+ assert(isa<GlobalAlias>(this));
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
+ Constant *InitVal,
+ const Twine &Name, ThreadLocalMode TLMode,
+ unsigned AddressSpace,
+ bool isExternallyInitialized)
+ : GlobalValue(PointerType::get(Ty, AddressSpace),
+ Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), threadLocalMode(TLMode),
+ isExternallyInitializedConstant(isExternallyInitialized) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+}
+
+GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
+ LinkageTypes Link, Constant *InitVal,
+ const Twine &Name,
+ GlobalVariable *Before, ThreadLocalMode TLMode,
+ unsigned AddressSpace,
+ bool isExternallyInitialized)
+ : GlobalValue(PointerType::get(Ty, AddressSpace),
+ Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), threadLocalMode(TLMode),
+ isExternallyInitializedConstant(isExternallyInitialized) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+
+ if (Before)
+ Before->getParent()->getGlobalList().insert(Before, this);
+ else
+ M.getGlobalList().push_back(this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+ getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+ getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ // If you call this, then you better know this GVar has a constant
+ // initializer worth replacing. Enforce that here.
+ assert(getNumOperands() == 1 &&
+ "Attempt to replace uses of Constants on a GVar with no initializer");
+
+ // And, since you know it has an initializer, the From value better be
+ // the initializer :)
+ assert(getOperand(0) == From &&
+ "Attempt to replace wrong constant initializer in GVar");
+
+ // And, you better have a constant for the replacement value
+ assert(isa<Constant>(To) &&
+ "Attempt to replace GVar initializer with non-constant");
+
+ // Okay, preconditions out of the way, replace the constant initializer.
+ this->setOperand(0, cast<Constant>(To));
+}
+
+void GlobalVariable::setInitializer(Constant *InitVal) {
+ if (InitVal == 0) {
+ if (hasInitializer()) {
+ Op<0>().set(0);
+ NumOperands = 0;
+ }
+ } else {
+ assert(InitVal->getType() == getType()->getElementType() &&
+ "Initializer type must match GlobalVariable type");
+ if (!hasInitializer())
+ NumOperands = 1;
+ Op<0>().set(InitVal);
+ }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+ GlobalValue::copyAttributesFrom(Src);
+ const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+ setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link,
+ const Twine &Name, Constant* aliasee,
+ Module *ParentModule)
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+ LeakDetector::addGarbageObject(this);
+
+ if (aliasee)
+ assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+ Op<0>() = aliasee;
+
+ if (ParentModule)
+ ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+ getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+ getParent()->getAliasList().erase(this);
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+ assert((!Aliasee || Aliasee->getType() == getType()) &&
+ "Alias and aliasee types should match!");
+
+ setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+ const Constant *C = getAliasee();
+ if (C == 0) return 0;
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GV;
+
+ const ConstantExpr *CE = cast<ConstantExpr>(C);
+ assert((CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ "Unsupported aliasee");
+
+ return cast<GlobalValue>(CE->getOperand(0));
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+ SmallPtrSet<const GlobalValue*, 3> Visited;
+
+ // Check if we need to stop early.
+ if (stopOnWeak && mayBeOverridden())
+ return this;
+
+ const GlobalValue *GV = getAliasedGlobal();
+ Visited.insert(GV);
+
+ // Iterate over aliasing chain, stopping on weak alias if necessary.
+ while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+ if (stopOnWeak && GA->mayBeOverridden())
+ break;
+
+ GV = GA->getAliasedGlobal();
+
+ if (!Visited.insert(GV))
+ return 0;
+ }
+
+ return GV;
+}
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
new file mode 100644
index 000000000000..435e54f0ea2a
--- /dev/null
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -0,0 +1,153 @@
+//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+using namespace llvm;
+
+/// CreateGlobalString - Make a new global variable with an initializer that
+/// has array of i8 type filled in with the nul terminated string value
+/// specified. If Name is specified, it is the name of the global variable
+/// created.
+Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+ Constant *StrConstant = ConstantDataArray::getString(Context, Str);
+ Module &M = *BB->getParent()->getParent();
+ GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
+ true, GlobalValue::PrivateLinkage,
+ StrConstant);
+ GV->setName(Name);
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+ assert(BB && BB->getParent() && "No current function!");
+ return BB->getParent()->getReturnType();
+}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+ PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getElementType()->isIntegerTy(8))
+ return Ptr;
+
+ // Otherwise, we need to insert a bitcast.
+ PT = getInt8PtrTy(PT->getAddressSpace());
+ BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+ BB->getInstList().insert(InsertPt, BCI);
+ SetInstDebugLocation(BCI);
+ return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+ IRBuilderBase *Builder) {
+ CallInst *CI = CallInst::Create(Callee, Ops, "");
+ Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+ Builder->SetInstDebugLocation(CI);
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Ptr = getCastedInt8PtrValue(Ptr);
+ Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Ptr->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ // Set the TBAA Struct info if present.
+ if (TBAAStructTag)
+ CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
+ assert(isa<PointerType>(Ptr->getType()) &&
+ "lifetime.start only applies to pointers.");
+ Ptr = getCastedInt8PtrValue(Ptr);
+ if (!Size)
+ Size = getInt64(-1);
+ else
+ assert(Size->getType() == getInt64Ty() &&
+ "lifetime.start requires the size to be an i64");
+ Value *Ops[] = { Size, Ptr };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
+ return createCallHelper(TheFn, Ops, this);
+}
+
+CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
+ assert(isa<PointerType>(Ptr->getType()) &&
+ "lifetime.end only applies to pointers.");
+ Ptr = getCastedInt8PtrValue(Ptr);
+ if (!Size)
+ Size = getInt64(-1);
+ else
+ assert(Size->getType() == getInt64Ty() &&
+ "lifetime.end requires the size to be an i64");
+ Value *Ops[] = { Size, Ptr };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
+ return createCallHelper(TheFn, Ops, this);
+}
diff --git a/contrib/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm/lib/IR/InlineAsm.cpp
new file mode 100644
index 000000000000..9f2a9fea4b93
--- /dev/null
+++ b/contrib/llvm/lib/IR/InlineAsm.cpp
@@ -0,0 +1,295 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack, AsmDialect asmDialect) {
+ InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack,
+ asmDialect);
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+ return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+}
+
+InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
+ const std::string &constraints, bool hasSideEffects,
+ bool isAlignStack, AsmDialect asmDialect)
+ : Value(Ty, Value::InlineAsmVal),
+ AsmString(asmString), Constraints(constraints),
+ HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
+ Dialect(asmDialect) {
+
+ // Do various checks on the constraint string and type.
+ assert(Verify(getFunctionType(), constraints) &&
+ "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+ getType()->getContext().pImpl->InlineAsms.remove(this);
+ delete this;
+}
+
+FunctionType *InlineAsm::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+ Type(isInput), isEarlyClobber(false),
+ MatchingInput(-1), isCommutative(false),
+ isIndirect(false), isMultipleAlternative(false),
+ currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+ Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+ MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+ isIndirect(other.isIndirect), Codes(other.Codes),
+ isMultipleAlternative(other.isMultipleAlternative),
+ multipleAlternatives(other.multipleAlternatives),
+ currentAlternativeIndex(other.currentAlternativeIndex) {
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure. If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
+ InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
+ StringRef::iterator I = Str.begin(), E = Str.end();
+ unsigned multipleAlternativeCount = Str.count('|') + 1;
+ unsigned multipleAlternativeIndex = 0;
+ ConstraintCodeVector *pCodes = &Codes;
+
+ // Initialize
+ isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+ if (isMultipleAlternative) {
+ multipleAlternatives.resize(multipleAlternativeCount);
+ pCodes = &multipleAlternatives[0].Codes;
+ }
+ Type = isInput;
+ isEarlyClobber = false;
+ MatchingInput = -1;
+ isCommutative = false;
+ isIndirect = false;
+ currentAlternativeIndex = 0;
+
+ // Parse prefixes.
+ if (*I == '~') {
+ Type = isClobber;
+ ++I;
+ } else if (*I == '=') {
+ ++I;
+ Type = isOutput;
+ }
+
+ if (*I == '*') {
+ isIndirect = true;
+ ++I;
+ }
+
+ if (I == E) return true; // Just a prefix, like "==" or "~".
+
+ // Parse the modifiers.
+ bool DoneWithModifiers = false;
+ while (!DoneWithModifiers) {
+ switch (*I) {
+ default:
+ DoneWithModifiers = true;
+ break;
+ case '&': // Early clobber.
+ if (Type != isOutput || // Cannot early clobber anything but output.
+ isEarlyClobber) // Reject &&&&&&
+ return true;
+ isEarlyClobber = true;
+ break;
+ case '%': // Commutative.
+ if (Type == isClobber || // Cannot commute clobbers.
+ isCommutative) // Reject %%%%%
+ return true;
+ isCommutative = true;
+ break;
+ case '#': // Comment.
+ case '*': // Register preferencing.
+ return true; // Not supported.
+ }
+
+ if (!DoneWithModifiers) {
+ ++I;
+ if (I == E) return true; // Just prefixes and modifiers!
+ }
+ }
+
+ // Parse the various constraints.
+ while (I != E) {
+ if (*I == '{') { // Physical register reference.
+ // Find the end of the register name.
+ StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
+ if (ConstraintEnd == E) return true; // "{foo"
+ pCodes->push_back(std::string(I, ConstraintEnd+1));
+ I = ConstraintEnd+1;
+ } else if (isdigit(static_cast<unsigned char>(*I))) { // Matching Constraint
+ // Maximal munch numbers.
+ StringRef::iterator NumStart = I;
+ while (I != E && isdigit(static_cast<unsigned char>(*I)))
+ ++I;
+ pCodes->push_back(std::string(NumStart, I));
+ unsigned N = atoi(pCodes->back().c_str());
+ // Check that this is a valid matching constraint!
+ if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+ Type != isInput)
+ return true; // Invalid constraint number.
+
+ // If Operand N already has a matching input, reject this. An output
+ // can't be constrained to the same value as multiple inputs.
+ if (isMultipleAlternative) {
+ InlineAsm::SubConstraintInfo &scInfo =
+ ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+ if (scInfo.MatchingInput != -1)
+ return true;
+ // Note that operand #n has a matching input.
+ scInfo.MatchingInput = ConstraintsSoFar.size();
+ } else {
+ if (ConstraintsSoFar[N].hasMatchingInput())
+ return true;
+ // Note that operand #n has a matching input.
+ ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+ }
+ } else if (*I == '|') {
+ multipleAlternativeIndex++;
+ pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+ ++I;
+ } else if (*I == '^') {
+ // Multi-letter constraint
+ // FIXME: For now assuming these are 2-character constraints.
+ pCodes->push_back(std::string(I+1, I+3));
+ I += 3;
+ } else {
+ // Single letter constraint.
+ pCodes->push_back(std::string(I, I+1));
+ ++I;
+ }
+ }
+
+ return false;
+}
+
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+ if (index < multipleAlternatives.size()) {
+ currentAlternativeIndex = index;
+ InlineAsm::SubConstraintInfo &scInfo =
+ multipleAlternatives[currentAlternativeIndex];
+ MatchingInput = scInfo.MatchingInput;
+ Codes = scInfo.Codes;
+ }
+}
+
+InlineAsm::ConstraintInfoVector
+InlineAsm::ParseConstraints(StringRef Constraints) {
+ ConstraintInfoVector Result;
+
+ // Scan the constraints string.
+ for (StringRef::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ) {
+ ConstraintInfo Info;
+
+ // Find the end of this constraint.
+ StringRef::iterator ConstraintEnd = std::find(I, E, ',');
+
+ if (ConstraintEnd == I || // Empty constraint like ",,"
+ Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
+ Result.clear(); // Erroneous constraint?
+ break;
+ }
+
+ Result.push_back(Info);
+
+ // ConstraintEnd may be either the next comma or the end of the string. In
+ // the former case, we skip the comma.
+ I = ConstraintEnd;
+ if (I != E) {
+ ++I;
+ if (I == E) { Result.clear(); break; } // don't allow "xyz,"
+ }
+ }
+
+ return Result;
+}
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
+ if (Ty->isVarArg()) return false;
+
+ ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
+
+ // Error parsing constraints.
+ if (Constraints.empty() && !ConstStr.empty()) return false;
+
+ unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+ unsigned NumIndirect = 0;
+
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ switch (Constraints[i].Type) {
+ case InlineAsm::isOutput:
+ if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+ return false; // outputs before inputs and clobbers.
+ if (!Constraints[i].isIndirect) {
+ ++NumOutputs;
+ break;
+ }
+ ++NumIndirect;
+ // FALLTHROUGH for Indirect Outputs.
+ case InlineAsm::isInput:
+ if (NumClobbers) return false; // inputs before clobbers.
+ ++NumInputs;
+ break;
+ case InlineAsm::isClobber:
+ ++NumClobbers;
+ break;
+ }
+ }
+
+ switch (NumOutputs) {
+ case 0:
+ if (!Ty->getReturnType()->isVoidTy()) return false;
+ break;
+ case 1:
+ if (Ty->getReturnType()->isStructTy()) return false;
+ break;
+ default:
+ StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+ if (STy == 0 || STy->getNumElements() != NumOutputs)
+ return false;
+ break;
+ }
+
+ if (Ty->getNumParams() != NumInputs) return false;
+ return true;
+}
+
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
new file mode 100644
index 000000000000..2b5a0b39c316
--- /dev/null
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -0,0 +1,555 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ Instruction *InsertBefore)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // If requested, insert this instruction into a basic block...
+ if (InsertBefore) {
+ assert(InsertBefore->getParent() &&
+ "Instruction to insert before is not in a basic block!");
+ InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+ }
+}
+
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ BasicBlock *InsertAtEnd)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // append this instruction into the basic block
+ assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+ InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+ assert(Parent == 0 && "Instruction still linked in the program!");
+ if (hasMetadataHashEntry())
+ clearMetadataHashEntries();
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+ if (getParent()) {
+ if (!P) LeakDetector::addGarbageObject(this);
+ } else {
+ if (P) LeakDetector::removeGarbageObject(this);
+ }
+
+ Parent = P;
+}
+
+void Instruction::removeFromParent() {
+ getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+ getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+ MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+ this);
+}
+
+/// Set or clear the unsafe-algebra flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasUnsafeAlgebra(bool B) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setHasUnsafeAlgebra(B);
+}
+
+/// Set or clear the NoNaNs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoNaNs(bool B) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setHasNoNaNs(B);
+}
+
+/// Set or clear the no-infs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoInfs(bool B) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setHasNoInfs(B);
+}
+
+/// Set or clear the no-signed-zeros flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasNoSignedZeros(bool B) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setHasNoSignedZeros(B);
+}
+
+/// Set or clear the allow-reciprocal flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasAllowReciprocal(bool B) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setHasAllowReciprocal(B);
+}
+
+/// Convenience function for setting all the fast-math flags on this
+/// instruction, which must be an operator which supports these flags. See
+/// LangRef.html for the meaning of these flats.
+void Instruction::setFastMathFlags(FastMathFlags FMF) {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->setFastMathFlags(FMF);
+}
+
+/// Determine whether the unsafe-algebra flag is set.
+bool Instruction::hasUnsafeAlgebra() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+}
+
+/// Determine whether the no-NaNs flag is set.
+bool Instruction::hasNoNaNs() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasNoNaNs();
+}
+
+/// Determine whether the no-infs flag is set.
+bool Instruction::hasNoInfs() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasNoInfs();
+}
+
+/// Determine whether the no-signed-zeros flag is set.
+bool Instruction::hasNoSignedZeros() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasNoSignedZeros();
+}
+
+/// Determine whether the allow-reciprocal flag is set.
+bool Instruction::hasAllowReciprocal() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasAllowReciprocal();
+}
+
+/// Convenience function for getting all the fast-math flags, which must be an
+/// operator which supports these flags. See LangRef.html for the meaning of
+/// these flats.
+FastMathFlags Instruction::getFastMathFlags() const {
+ assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->getFastMathFlags();
+}
+
+/// Copy I's fast-math flags
+void Instruction::copyFastMathFlags(const Instruction *I) {
+ setFastMathFlags(I->getFastMathFlags());
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+ switch (OpCode) {
+ // Terminators
+ case Ret: return "ret";
+ case Br: return "br";
+ case Switch: return "switch";
+ case IndirectBr: return "indirectbr";
+ case Invoke: return "invoke";
+ case Resume: return "resume";
+ case Unreachable: return "unreachable";
+
+ // Standard binary operators...
+ case Add: return "add";
+ case FAdd: return "fadd";
+ case Sub: return "sub";
+ case FSub: return "fsub";
+ case Mul: return "mul";
+ case FMul: return "fmul";
+ case UDiv: return "udiv";
+ case SDiv: return "sdiv";
+ case FDiv: return "fdiv";
+ case URem: return "urem";
+ case SRem: return "srem";
+ case FRem: return "frem";
+
+ // Logical operators...
+ case And: return "and";
+ case Or : return "or";
+ case Xor: return "xor";
+
+ // Memory instructions...
+ case Alloca: return "alloca";
+ case Load: return "load";
+ case Store: return "store";
+ case AtomicCmpXchg: return "cmpxchg";
+ case AtomicRMW: return "atomicrmw";
+ case Fence: return "fence";
+ case GetElementPtr: return "getelementptr";
+
+ // Convert instructions...
+ case Trunc: return "trunc";
+ case ZExt: return "zext";
+ case SExt: return "sext";
+ case FPTrunc: return "fptrunc";
+ case FPExt: return "fpext";
+ case FPToUI: return "fptoui";
+ case FPToSI: return "fptosi";
+ case UIToFP: return "uitofp";
+ case SIToFP: return "sitofp";
+ case IntToPtr: return "inttoptr";
+ case PtrToInt: return "ptrtoint";
+ case BitCast: return "bitcast";
+
+ // Other instructions...
+ case ICmp: return "icmp";
+ case FCmp: return "fcmp";
+ case PHI: return "phi";
+ case Select: return "select";
+ case Call: return "call";
+ case Shl: return "shl";
+ case LShr: return "lshr";
+ case AShr: return "ashr";
+ case VAArg: return "va_arg";
+ case ExtractElement: return "extractelement";
+ case InsertElement: return "insertelement";
+ case ShuffleVector: return "shufflevector";
+ case ExtractValue: return "extractvalue";
+ case InsertValue: return "insertvalue";
+ case LandingPad: return "landingpad";
+
+ default: return "<Invalid operator> ";
+ }
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one. This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+ return isIdenticalToWhenDefined(I) &&
+ SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
+ if (getOpcode() != I->getOpcode() ||
+ getNumOperands() != I->getNumOperands() ||
+ getType() != I->getType())
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) != I->getOperand(i))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+ return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+ return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+ return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
+ if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
+ const PHINode *otherPHI = cast<PHINode>(I);
+ for (unsigned i = 0, e = thisPHI->getNumOperands(); i != e; ++i) {
+ if (thisPHI->getIncomingBlock(i) != otherPHI->getIncomingBlock(i))
+ return false;
+ }
+ return true;
+ }
+ return true;
+}
+
+// isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
+bool Instruction::isSameOperationAs(const Instruction *I,
+ unsigned flags) const {
+ bool IgnoreAlignment = flags & CompareIgnoringAlignment;
+ bool UseScalarTypes = flags & CompareUsingScalarTypes;
+
+ if (getOpcode() != I->getOpcode() ||
+ getNumOperands() != I->getNumOperands() ||
+ (UseScalarTypes ?
+ getType()->getScalarType() != I->getType()->getScalarType() :
+ getType() != I->getType()))
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (UseScalarTypes ?
+ getOperand(i)->getType()->getScalarType() !=
+ I->getOperand(i)->getType()->getScalarType() :
+ getOperand(i)->getType() != I->getOperand(i)->getType())
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ (LI->getAlignment() == cast<LoadInst>(I)->getAlignment() ||
+ IgnoreAlignment) &&
+ LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ (SI->getAlignment() == cast<StoreInst>(I)->getAlignment() ||
+ IgnoreAlignment) &&
+ SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes() ==
+ cast<InvokeInst>(I)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+ return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+ return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+ return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
+
+ return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block. Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ // PHI nodes uses values in the corresponding predecessor block. For other
+ // instructions, just check to see whether the parent of the use matches up.
+ const User *U = *UI;
+ const PHINode *PN = dyn_cast<PHINode>(U);
+ if (PN == 0) {
+ if (cast<Instruction>(U)->getParent() != BB)
+ return true;
+ continue;
+ }
+
+ if (PN->getIncomingBlock(UI) != BB)
+ return true;
+ }
+ return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::VAArg:
+ case Instruction::Load:
+ case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->doesNotAccessMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->doesNotAccessMemory();
+ case Instruction::Store:
+ return !cast<StoreInst>(this)->isUnordered();
+ }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory
+ case Instruction::Store:
+ case Instruction::VAArg:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->onlyReadsMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->onlyReadsMemory();
+ case Instruction::Load:
+ return !cast<LoadInst>(this)->isUnordered();
+ }
+}
+
+bool Instruction::mayThrow() const {
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return !CI->doesNotThrow();
+ return isa<ResumeInst>(this);
+}
+
+bool Instruction::mayReturn() const {
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return !CI->doesNotReturn();
+ return true;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+/// Associative operators satisfy: x op (y op z) === (x op y) op z
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+///
+bool Instruction::isAssociative(unsigned Opcode) {
+ return Opcode == And || Opcode == Or || Opcode == Xor ||
+ Opcode == Add || Opcode == Mul;
+}
+
+bool Instruction::isAssociative() const {
+ unsigned Opcode = getOpcode();
+ if (isAssociative(Opcode))
+ return true;
+
+ switch (Opcode) {
+ case FMul:
+ case FAdd:
+ return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+ default:
+ return false;
+ }
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+/// Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+ switch (op) {
+ case Add:
+ case FAdd:
+ case Mul:
+ case FMul:
+ case And:
+ case Or:
+ case Xor:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isIdempotent - Return true if the instruction is idempotent:
+///
+/// Idempotent operators satisfy: x op x === x
+///
+/// In LLVM, the And and Or operators are idempotent.
+///
+bool Instruction::isIdempotent(unsigned Opcode) {
+ return Opcode == And || Opcode == Or;
+}
+
+/// isNilpotent - Return true if the instruction is nilpotent:
+///
+/// Nilpotent operators satisfy: x op x === Id,
+///
+/// where Id is the identity for the operator, i.e. a constant such that
+/// x op Id === x and Id op x === x for all x.
+///
+/// In LLVM, the Xor operator is nilpotent.
+///
+bool Instruction::isNilpotent(unsigned Opcode) {
+ return Opcode == Xor;
+}
+
+Instruction *Instruction::clone() const {
+ Instruction *New = clone_impl();
+ New->SubclassOptionalData = SubclassOptionalData;
+ if (!hasMetadata())
+ return New;
+
+ // Otherwise, enumerate and copy over metadata from the old instruction to the
+ // new one.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+ getAllMetadataOtherThanDebugLoc(TheMDs);
+ for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
+ New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+
+ New->setDebugLoc(getDebugLoc());
+ return New;
+}
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
new file mode 100644
index 000000000000..d58877ef773a
--- /dev/null
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -0,0 +1,3553 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CallSite Class
+//===----------------------------------------------------------------------===//
+
+User::op_iterator CallSite::getCallee() const {
+ Instruction *II(getInstruction());
+ return isCall()
+ ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+ : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
+}
+
+//===----------------------------------------------------------------------===//
+// TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+// SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+ if (Op1->getType() != Op2->getType())
+ return "both values to select must have same type";
+
+ if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+ // Vector select.
+ if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
+ return "vector select condition element type must be i1";
+ VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+ if (ET == 0)
+ return "selected values for vector select must be vectors";
+ if (ET->getNumElements() != VT->getNumElements())
+ return "vector select requires selected vectors to have "
+ "the same vector length as select condition";
+ } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
+ return "select condition must be i1 or <n x i1>";
+ }
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+ : Instruction(PN.getType(), Instruction::PHI,
+ allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+ ReservedSpace(PN.getNumOperands()) {
+ std::copy(PN.op_begin(), PN.op_end(), op_begin());
+ std::copy(PN.block_begin(), PN.block_end(), block_begin());
+ SubclassOptionalData = PN.SubclassOptionalData;
+}
+
+PHINode::~PHINode() {
+ dropHungoffUses();
+}
+
+Use *PHINode::allocHungoffUses(unsigned N) const {
+ // Allocate the array of Uses of the incoming values, followed by a pointer
+ // (with bottom bit set) to the User, followed by the array of pointers to
+ // the incoming basic blocks.
+ size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+ + N * sizeof(BasicBlock*);
+ Use *Begin = static_cast<Use*>(::operator new(size));
+ Use *End = Begin + N;
+ (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+ return Use::initTags(Begin, End);
+}
+
+// removeIncomingValue - Remove an incoming value. This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+ Value *Removed = getIncomingValue(Idx);
+
+ // Move everything after this operand down.
+ //
+ // FIXME: we could just swap with the end of the list, then erase. However,
+ // clients might not expect this to happen. The code as it is thrashes the
+ // use/def lists, which is kinda lame.
+ std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+ std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
+
+ // Nuke the last value.
+ Op<-1>().set(0);
+ --NumOperands;
+
+ // If the PHI node is dead, because it has zero entries, nuke it now.
+ if (getNumOperands() == 0 && DeletePHIIfEmpty) {
+ // If anyone is using this PHI, make them use a dummy value instead...
+ replaceAllUsesWith(UndefValue::get(getType()));
+ eraseFromParent();
+ }
+ return Removed;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 1.5
+/// times.
+///
+void PHINode::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e + e / 2;
+ if (NumOps < 2) NumOps = 2; // 2 op PHI nodes are VERY common.
+
+ Use *OldOps = op_begin();
+ BasicBlock **OldBlocks = block_begin();
+
+ ReservedSpace = NumOps;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ std::copy(OldOps, OldOps + e, op_begin());
+ std::copy(OldBlocks, OldBlocks + e, block_begin());
+
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+Value *PHINode::hasConstantValue() const {
+ // Exploit the fact that phi nodes always have at least one entry.
+ Value *ConstantValue = getIncomingValue(0);
+ for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+ if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) {
+ if (ConstantValue != this)
+ return 0; // Incoming values not all the same.
+ // The case where the first value is this PHI.
+ ConstantValue = getIncomingValue(i);
+ }
+ if (ConstantValue == this)
+ return UndefValue::get(getType());
+ return ConstantValue;
+}
+
+//===----------------------------------------------------------------------===//
+// LandingPadInst Implementation
+//===----------------------------------------------------------------------===//
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedValues, const Twine &NameStr,
+ Instruction *InsertBefore)
+ : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) {
+ init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedValues, const Twine &NameStr,
+ BasicBlock *InsertAtEnd)
+ : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) {
+ init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(const LandingPadInst &LP)
+ : Instruction(LP.getType(), Instruction::LandingPad,
+ allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()),
+ ReservedSpace(LP.getNumOperands()) {
+ Use *OL = OperandList, *InOL = LP.OperandList;
+ for (unsigned I = 0, E = ReservedSpace; I != E; ++I)
+ OL[I] = InOL[I];
+
+ setCleanup(LP.isCleanup());
+}
+
+LandingPadInst::~LandingPadInst() {
+ dropHungoffUses();
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedClauses,
+ const Twine &NameStr,
+ Instruction *InsertBefore) {
+ return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+ InsertBefore);
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedClauses,
+ const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+ InsertAtEnd);
+}
+
+void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
+ const Twine &NameStr) {
+ ReservedSpace = NumReservedValues;
+ NumOperands = 1;
+ OperandList = allocHungoffUses(ReservedSpace);
+ OperandList[0] = PersFn;
+ setName(NameStr);
+ setCleanup(false);
+}
+
+/// growOperands - grow operands - This grows the operand list in response to a
+/// push_back style of operation. This grows the number of ops by 2 times.
+void LandingPadInst::growOperands(unsigned Size) {
+ unsigned e = getNumOperands();
+ if (ReservedSpace >= e + Size) return;
+ ReservedSpace = (e + Size / 2) * 2;
+
+ Use *NewOps = allocHungoffUses(ReservedSpace);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i)
+ NewOps[i] = OldOps[i];
+
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+void LandingPadInst::addClause(Value *Val) {
+ unsigned OpNo = getNumOperands();
+ growOperands(1);
+ assert(OpNo < ReservedSpace && "Growing didn't work!");
+ ++NumOperands;
+ OperandList[OpNo] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+// CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+ assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
+ Op<-1>() = Func;
+
+#ifndef NDEBUG
+ FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+ assert((Args.size() == FTy->getNumParams() ||
+ (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+ "Calling a function with bad signature!");
+
+ for (unsigned i = 0; i != Args.size(); ++i)
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Calling a function with a bad signature!");
+#endif
+
+ std::copy(Args.begin(), Args.end(), op_begin());
+ setName(NameStr);
+}
+
+void CallInst::init(Value *Func, const Twine &NameStr) {
+ assert(NumOperands == 1 && "NumOperands not set up?");
+ Op<-1>() = Func;
+
+#ifndef NDEBUG
+ FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+ assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+#endif
+
+ setName(NameStr);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ init(Func, Name);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ init(Func, Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+ : Instruction(CI.getType(), Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+ CI.getNumOperands()) {
+ setAttributes(CI.getAttributes());
+ setTailCall(CI.isTailCall());
+ setCallingConv(CI.getCallingConv());
+
+ std::copy(CI.op_begin(), CI.op_end(), op_begin());
+ SubclassOptionalData = CI.SubclassOptionalData;
+}
+
+void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
+ AttributeSet PAL = getAttributes();
+ PAL = PAL.addAttribute(getContext(), i, attr);
+ setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attribute attr) {
+ AttributeSet PAL = getAttributes();
+ AttrBuilder B(attr);
+ LLVMContext &Context = getContext();
+ PAL = PAL.removeAttributes(Context, i,
+ AttributeSet::get(Context, i, B));
+ setAttributes(PAL);
+}
+
+bool CallInst::hasFnAttr(Attribute::AttrKind A) const {
+ if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+ return false;
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+ if (AttributeList.hasAttribute(i, A))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(i, A);
+ return false;
+}
+
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+ assert(val && "IsConstantOne does not work with NULL val");
+ return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Instruction *createMalloc(Instruction *InsertBefore,
+ BasicBlock *InsertAtEnd, Type *IntPtrTy,
+ Type *AllocTy, Value *AllocSize,
+ Value *ArraySize, Function *MallocF,
+ const Twine &Name) {
+ assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+ "createMalloc needs either InsertBefore or InsertAtEnd");
+
+ // malloc(type) becomes:
+ // bitcast (i8* malloc(typeSize)) to type*
+ // malloc(type, arraySize) becomes:
+ // bitcast (i8 *malloc(typeSize*arraySize)) to type*
+ if (!ArraySize)
+ ArraySize = ConstantInt::get(IntPtrTy, 1);
+ else if (ArraySize->getType() != IntPtrTy) {
+ if (InsertBefore)
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertBefore);
+ else
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertAtEnd);
+ }
+
+ if (!IsConstantOne(ArraySize)) {
+ if (IsConstantOne(AllocSize)) {
+ AllocSize = ArraySize; // Operand * 1 = Operand
+ } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+ Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+ false /*ZExt*/);
+ // Malloc arg is constant product of type size and array size
+ AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+ } else {
+ // Multiply type size by the array size...
+ if (InsertBefore)
+ AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+ "mallocsize", InsertBefore);
+ else
+ AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+ "mallocsize", InsertAtEnd);
+ }
+ }
+
+ assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+ // Create the call to Malloc.
+ BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+ Module* M = BB->getParent()->getParent();
+ Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+ Value *MallocFunc = MallocF;
+ if (!MallocFunc)
+ // prototype malloc as "void *malloc(size_t)"
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+ PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+ CallInst *MCall = NULL;
+ Instruction *Result = NULL;
+ if (InsertBefore) {
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
+ Result = MCall;
+ if (Result->getType() != AllocPtrType)
+ // Create a cast instruction to convert to the right type...
+ Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+ } else {
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
+ Result = MCall;
+ if (Result->getType() != AllocPtrType) {
+ InsertAtEnd->getInstList().push_back(MCall);
+ // Create a cast instruction to convert to the right type...
+ Result = new BitCastInst(MCall, AllocPtrType, Name);
+ }
+ }
+ MCall->setTailCall();
+ if (Function *F = dyn_cast<Function>(MallocFunc)) {
+ MCall->setCallingConv(F->getCallingConv());
+ if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+ }
+ assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
+
+ return Result;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+/// possibly multiplied by the array size if the array size is not
+/// constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
+ Type *IntPtrTy, Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ Function * MallocF,
+ const Twine &Name) {
+ return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+ ArraySize, MallocF, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+/// possibly multiplied by the array size if the array size is not
+/// constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
+ Type *IntPtrTy, Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ Function *MallocF, const Twine &Name) {
+ return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+ ArraySize, MallocF, Name);
+}
+
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+ BasicBlock *InsertAtEnd) {
+ assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+ "createFree needs either InsertBefore or InsertAtEnd");
+ assert(Source->getType()->isPointerTy() &&
+ "Can not free something of nonpointer type!");
+
+ BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+ Module* M = BB->getParent()->getParent();
+
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+ Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+ // prototype free as "void free(void*)"
+ Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+ CallInst* Result = NULL;
+ Value *PtrCast = Source;
+ if (InsertBefore) {
+ if (Source->getType() != IntPtrTy)
+ PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+ Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+ } else {
+ if (Source->getType() != IntPtrTy)
+ PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+ Result = CallInst::Create(FreeFunc, PtrCast, "");
+ }
+ Result->setTailCall();
+ if (Function *F = dyn_cast<Function>(FreeFunc))
+ Result->setCallingConv(F->getCallingConv());
+
+ return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+ return createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+ Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+ assert(FreeCall && "CreateFree did not create a CallInst");
+ return FreeCall;
+}
+
+//===----------------------------------------------------------------------===//
+// InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+ ArrayRef<Value *> Args, const Twine &NameStr) {
+ assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
+ Op<-3>() = Fn;
+ Op<-2>() = IfNormal;
+ Op<-1>() = IfException;
+
+#ifndef NDEBUG
+ FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+
+ assert(((Args.size() == FTy->getNumParams()) ||
+ (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+ "Invoking a function with bad signature");
+
+ for (unsigned i = 0, e = Args.size(); i != e; i++)
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Invoking a function with a bad signature!");
+#endif
+
+ std::copy(Args.begin(), Args.end(), op_begin());
+ setName(NameStr);
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+ : TerminatorInst(II.getType(), Instruction::Invoke,
+ OperandTraits<InvokeInst>::op_end(this)
+ - II.getNumOperands(),
+ II.getNumOperands()) {
+ setAttributes(II.getAttributes());
+ setCallingConv(II.getCallingConv());
+ std::copy(II.op_begin(), II.op_end(), op_begin());
+ SubclassOptionalData = II.SubclassOptionalData;
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ return setSuccessor(idx, B);
+}
+
+bool InvokeInst::hasFnAttr(Attribute::AttrKind A) const {
+ if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+ return false;
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+ if (AttributeList.hasAttribute(i, A))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(i, A);
+ return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
+ AttributeSet PAL = getAttributes();
+ PAL = PAL.addAttribute(getContext(), i, attr);
+ setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attribute attr) {
+ AttributeSet PAL = getAttributes();
+ AttrBuilder B(attr);
+ PAL = PAL.removeAttributes(getContext(), i,
+ AttributeSet::get(getContext(), i, B));
+ setAttributes(PAL);
+}
+
+LandingPadInst *InvokeInst::getLandingPadInst() const {
+ return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
+}
+
+//===----------------------------------------------------------------------===//
+// ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+ : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) -
+ RI.getNumOperands(),
+ RI.getNumOperands()) {
+ if (RI.getNumOperands())
+ Op<0>() = RI.Op<0>();
+ SubclassOptionalData = RI.SubclassOptionalData;
+}
+
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertBefore) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertAtEnd) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("ReturnInst has no successors!");
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// ResumeInst Implementation
+//===----------------------------------------------------------------------===//
+
+ResumeInst::ResumeInst(const ResumeInst &RI)
+ : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1) {
+ Op<0>() = RI.Op<0>();
+}
+
+ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
+ Op<0>() = Exn;
+}
+
+ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
+ Op<0>() = Exn;
+}
+
+unsigned ResumeInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("ResumeInst has no successors!");
+}
+
+BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("ResumeInst has no successors!");
+}
+
+//===----------------------------------------------------------------------===//
+// UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(LLVMContext &Context,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+ 0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+ 0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("UnreachableInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("UnreachableInst has no successors!");
+}
+
+//===----------------------------------------------------------------------===//
+// BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+ if (isConditional())
+ assert(getCondition()->getType()->isIntegerTy(1) &&
+ "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertBefore) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertAtEnd) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+ TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+ BI.getNumOperands()) {
+ Op<-1>() = BI.Op<-1>();
+ if (BI.getNumOperands() != 1) {
+ assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+ Op<-3>() = BI.Op<-3>();
+ Op<-2>() = BI.Op<-2>();
+ }
+ SubclassOptionalData = BI.SubclassOptionalData;
+}
+
+void BranchInst::swapSuccessors() {
+ assert(isConditional() &&
+ "Cannot swap successors of an unconditional branch");
+ Op<-1>().swap(Op<-2>());
+
+ // Update profile metadata if present and it matches our structural
+ // expectations.
+ MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3)
+ return;
+
+ // The first operand is the name. Fetch them backwards and build a new one.
+ Value *Ops[] = {
+ ProfileData->getOperand(0),
+ ProfileData->getOperand(2),
+ ProfileData->getOperand(1)
+ };
+ setMetadata(LLVMContext::MD_prof,
+ MDNode::get(ProfileData->getContext(), Ops));
+}
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AllocaInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
+ if (!Amt)
+ Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
+ else {
+ assert(!isa<BasicBlock>(Amt) &&
+ "Passed basic block into allocation size parameter! Use other ctor");
+ assert(Amt->getType()->isIntegerTy() &&
+ "Allocation array size is not an integer!");
+ }
+ return Amt;
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
+ const Twine &Name, Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+ Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), 0), InsertBefore) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), 0), InsertAtEnd) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+ const Twine &Name, Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+ setAlignment(Align);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+ setAlignment(Align);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocaInst::~AllocaInst() {
+}
+
+void AllocaInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData(Log2_32(Align) + 1);
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocaInst::isArrayAllocation() const {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+ return !CI->isOne();
+ return true;
+}
+
+Type *AllocaInst::getAllocatedType() const {
+ return getType()->getElementType();
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size. If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+ // Must be constant size.
+ if (!isa<ConstantInt>(getArraySize())) return false;
+
+ // Must be in the entry block.
+ const BasicBlock *Parent = getParent();
+ return Parent == &Parent->getParent()->front();
+}
+
+//===----------------------------------------------------------------------===//
+// LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type.");
+ assert(!(isAtomic() && getAlignment() == 0) &&
+ "Alignment required for atomic load");
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
+ ((Log2_32(Align)+1)<<1));
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+// StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+ assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+ assert(getOperand(1)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(0)->getType() ==
+ cast<PointerType>(getOperand(1)->getType())->getElementType()
+ && "Ptr must be a pointer to Val type!");
+ assert(!(isAtomic() && getAlignment() == 0) &&
+ "Alignment required for atomic load");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
+ ((Log2_32(Align)+1) << 1));
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+// AtomicCmpXchgInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ Op<0>() = Ptr;
+ Op<1>() = Cmp;
+ Op<2>() = NewVal;
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+
+ assert(getOperand(0) && getOperand(1) && getOperand(2) &&
+ "All operands must be non-null!");
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(1)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to Cmp type!");
+ assert(getOperand(2)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to NewVal type!");
+ assert(Ordering != NotAtomic &&
+ "AtomicCmpXchg instructions must be atomic!");
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Cmp->getType(), AtomicCmpXchg,
+ OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this),
+ InsertBefore) {
+ Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Cmp->getType(), AtomicCmpXchg,
+ OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this),
+ InsertAtEnd) {
+ Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// AtomicRMWInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ Op<0>() = Ptr;
+ Op<1>() = Val;
+ setOperation(Operation);
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+
+ assert(getOperand(0) && getOperand(1) &&
+ "All operands must be non-null!");
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(1)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to Val type!");
+ assert(Ordering != NotAtomic &&
+ "AtomicRMW instructions must be atomic!");
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Val->getType(), AtomicRMW,
+ OperandTraits<AtomicRMWInst>::op_begin(this),
+ OperandTraits<AtomicRMWInst>::operands(this),
+ InsertBefore) {
+ Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Val->getType(), AtomicRMW,
+ OperandTraits<AtomicRMWInst>::op_begin(this),
+ OperandTraits<AtomicRMWInst>::operands(this),
+ InsertAtEnd) {
+ Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// FenceInst Implementation
+//===----------------------------------------------------------------------===//
+
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) {
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+}
+
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) {
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
+ const Twine &Name) {
+ assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
+ OperandList[0] = Ptr;
+ std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
+ setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+ : Instruction(GEPI.getType(), GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this)
+ - GEPI.getNumOperands(),
+ GEPI.getNumOperands()) {
+ std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
+ SubclassOptionalData = GEPI.SubclassOptionalData;
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+ PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
+ if (!PTy) return 0; // Type isn't a pointer type!
+ Type *Agg = PTy->getElementType();
+
+ // Handle the special case of the empty set index set, which is always valid.
+ if (IdxList.empty())
+ return Agg;
+
+ // If there is at least one index, the top level type must be sized, otherwise
+ // it cannot be 'stepped over'.
+ if (!Agg->isSized())
+ return 0;
+
+ unsigned CurIdx = 1;
+ for (; CurIdx != IdxList.size(); ++CurIdx) {
+ CompositeType *CT = dyn_cast<CompositeType>(Agg);
+ if (!CT || CT->isPointerTy()) return 0;
+ IndexTy Index = IdxList[CurIdx];
+ if (!CT->indexValid(Index)) return 0;
+ Agg = CT->getTypeAtIndex(Index);
+ }
+ return CurIdx == IdxList.size() ? Agg : 0;
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+ ArrayRef<Constant *> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros. If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+ if (!CI->isZero()) return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers. If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ }
+ return true;
+}
+
+void GetElementPtrInst::setIsInBounds(bool B) {
+ cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+ return cast<GEPOperator>(this)->isInBounds();
+}
+
+bool GetElementPtrInst::accumulateConstantOffset(const DataLayout &DL,
+ APInt &Offset) const {
+ // Delegate to the generic GEPOperator implementation.
+ return cast<GEPOperator>(this)->accumulateConstantOffset(DL, Offset);
+}
+
+//===----------------------------------------------------------------------===//
+// ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const Twine &Name,
+ Instruction *InsertBef)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertBef) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const Twine &Name,
+ BasicBlock *InsertAE)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertAE) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+ if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+ return false;
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const Twine &Name,
+ Instruction *InsertBef)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertBef) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const Twine &Name,
+ BasicBlock *InsertAE)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertAE) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
+ const Value *Index) {
+ if (!Vec->getType()->isVectorTy())
+ return false; // First operand of insertelement must be vector type.
+
+ if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+ return false;// Second operand of insertelement must be vector element type.
+
+ if (!Index->getType()->isIntegerTy(32))
+ return false; // Third operand of insertelement must be i32.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const Twine &Name,
+ Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+ cast<VectorType>(Mask->getType())->getNumElements()),
+ ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertBefore) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+ cast<VectorType>(Mask->getType())->getNumElements()),
+ ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertAtEnd) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+ const Value *Mask) {
+ // V1 and V2 must be vectors of the same type.
+ if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
+ return false;
+
+ // Mask must be vector of i32.
+ VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+ if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+ return false;
+
+ // Check to see if Mask is valid.
+ if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
+ return true;
+
+ if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+ unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+ for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ if (CI->uge(V1Size*2))
+ return false;
+ } else if (!isa<UndefValue>(MV->getOperand(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(Mask)) {
+ unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+ for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
+ if (CDS->getElementAsInteger(i) >= V1Size*2)
+ return false;
+ return true;
+ }
+
+ // The bitcode reader can create a place holder for a forward reference
+ // used as the shuffle mask. When this occurs, the shuffle mask will
+ // fall into this case and fail. To avoid this error, do this bit of
+ // ugliness to allow such a mask pass.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
+ if (CE->getOpcode() == Instruction::UserOp1)
+ return true;
+
+ return false;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result. This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
+ assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+ if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
+ return CDS->getElementAsInteger(i);
+ Constant *C = Mask->getAggregateElement(i);
+ if (isa<UndefValue>(C))
+ return -1;
+ return cast<ConstantInt>(C)->getZExtValue();
+}
+
+/// getShuffleMask - Return the full mask for this instruction, where each
+/// element is the element number and undef's are returned as -1.
+void ShuffleVectorInst::getShuffleMask(Constant *Mask,
+ SmallVectorImpl<int> &Result) {
+ unsigned NumElts = Mask->getType()->getVectorNumElements();
+
+ if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Result.push_back(CDS->getElementAsInteger(i));
+ return;
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C = Mask->getAggregateElement(i);
+ Result.push_back(isa<UndefValue>(C) ? -1 :
+ cast<ConstantInt>(C)->getZExtValue());
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+ const Twine &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+
+ // There's no fundamental reason why we require at least one index
+ // (other than weirdness with &*IdxBegin being invalid; see
+ // getelementptr's init routine for example). But there's no
+ // present need to support it.
+ assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
+
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+ Val->getType() && "Inserted value must match indexed type!");
+ Op<0>() = Agg;
+ Op<1>() = Val;
+
+ Indices.append(Idxs.begin(), Idxs.end());
+ setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+ : Instruction(IVI.getType(), InsertValue,
+ OperandTraits<InsertValueInst>::op_begin(this), 2),
+ Indices(IVI.Indices) {
+ Op<0>() = IVI.getOperand(0);
+ Op<1>() = IVI.getOperand(1);
+ SubclassOptionalData = IVI.SubclassOptionalData;
+}
+
+//===----------------------------------------------------------------------===//
+// ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
+ assert(NumOperands == 1 && "NumOperands not initialized?");
+
+ // There's no fundamental reason why we require at least one index.
+ // But there's no present need to support it.
+ assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
+
+ Indices.append(Idxs.begin(), Idxs.end());
+ setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+ : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+ Indices(EVI.Indices) {
+ SubclassOptionalData = EVI.SubclassOptionalData;
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+Type *ExtractValueInst::getIndexedType(Type *Agg,
+ ArrayRef<unsigned> Idxs) {
+ for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
+ unsigned Index = Idxs[CurIdx];
+ // We can't use CompositeType::indexValid(Index) here.
+ // indexValid() always returns true for arrays because getelementptr allows
+ // out-of-bounds indices. Since we don't allow those for extractvalue and
+ // insertvalue we need to check array indexing manually.
+ // Since the only other types we can index into are struct types it's just
+ // as easy to check those manually as well.
+ if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+ if (Index >= AT->getNumElements())
+ return 0;
+ } else if (StructType *ST = dyn_cast<StructType>(Agg)) {
+ if (Index >= ST->getNumElements())
+ return 0;
+ } else {
+ // Not a valid type to index into.
+ return 0;
+ }
+
+ Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
+ }
+ return const_cast<Type*>(Agg);
+}
+
+//===----------------------------------------------------------------------===//
+// BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ Type *Ty, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertBefore) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ Type *Ty, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+ Value *LHS = getOperand(0), *RHS = getOperand(1);
+ (void)LHS; (void)RHS; // Silence warnings.
+ assert(LHS->getType() == RHS->getType() &&
+ "Binary operator operand types must match!");
+#ifndef NDEBUG
+ switch (iType) {
+ case Add: case Sub:
+ case Mul:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isIntOrIntVectorTy() &&
+ "Tried to create an integer operation on a non-integer type!");
+ break;
+ case FAdd: case FSub:
+ case FMul:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ case UDiv:
+ case SDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Incorrect operand type (not integer) for S/UDIV");
+ break;
+ case FDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Incorrect operand type (not floating point) for FDIV");
+ break;
+ case URem:
+ case SRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Incorrect operand type (not integer) for S/UREM");
+ break;
+ case FRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Incorrect operand type (not floating point) for FREM");
+ break;
+ case Shl:
+ case LShr:
+ case AShr:
+ assert(getType() == LHS->getType() &&
+ "Shift operation should return same type as operands!");
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Tried to create a shift operation on a non-integral type!");
+ break;
+ case And: case Or:
+ case Xor:
+ assert(getType() == LHS->getType() &&
+ "Logical operation should return same type as operands!");
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ default:
+ break;
+ }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(S1->getType() == S2->getType() &&
+ "Cannot create binary operator with two operands of differing type!");
+ return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ BinaryOperator *Res = Create(Op, S1, S2, Name);
+ InsertAtEnd->getInstList().push_back(Res);
+ return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::FSub, zero, Op,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::FSub, zero, Op,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Constant *C = Constant::getAllOnesValue(Op->getType());
+ return new BinaryOperator(Instruction::Xor, Op, C,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Constant *AllOnes = Constant::getAllOnesValue(Op->getType());
+ return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+ if (const Constant *C = dyn_cast<Constant>(V))
+ return C->isAllOnesValue();
+ return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ if (Bop->getOpcode() == Instruction::Sub)
+ if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+ return C->isNegativeZeroValue();
+ return false;
+}
+
+bool BinaryOperator::isFNeg(const Value *V, bool IgnoreZeroSign) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ if (Bop->getOpcode() == Instruction::FSub)
+ if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0))) {
+ if (!IgnoreZeroSign)
+ IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros();
+ return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue();
+ }
+ return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ return (Bop->getOpcode() == Instruction::Xor &&
+ (isConstantAllOnes(Bop->getOperand(1)) ||
+ isConstantAllOnes(Bop->getOperand(0))));
+ return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+ return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+ return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+ return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+ return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+ assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+ BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+ Value *Op0 = BO->getOperand(0);
+ Value *Op1 = BO->getOperand(1);
+ if (isConstantAllOnes(Op0)) return Op1;
+
+ assert(isConstantAllOnes(Op1));
+ return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+ return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction. This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction. If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+ if (!isCommutative())
+ return true; // Can't commute operands
+ Op<0>().swap(Op<1>());
+ return false;
+}
+
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+ cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+ cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+ cast<PossiblyExactOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+ return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+ return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+ return cast<PossiblyExactOperator>(this)->isExact();
+}
+
+//===----------------------------------------------------------------------===//
+// FPMathOperator Class
+//===----------------------------------------------------------------------===//
+
+/// getFPAccuracy - Get the maximum error permitted by this operation in ULPs.
+/// An accuracy of 0.0 means that the operation should be performed with the
+/// default precision.
+float FPMathOperator::getFPAccuracy() const {
+ const MDNode *MD =
+ cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
+ if (!MD)
+ return 0.0;
+ ConstantFP *Accuracy = cast<ConstantFP>(MD->getOperand(0));
+ return Accuracy->getValueAPF().convertToFloat();
+}
+
+
+//===----------------------------------------------------------------------===//
+// CastInst Class
+//===----------------------------------------------------------------------===//
+
+void CastInst::anchor() {}
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
+ return true;
+ case Instruction::BitCast:
+ return getOperand(0)->getType()->isIntegerTy() &&
+ getType()->isIntegerTy();
+ }
+}
+
+bool CastInst::isLosslessCast() const {
+ // Only BitCast can be lossless, exit fast if we're not BitCast
+ if (getOpcode() != Instruction::BitCast)
+ return false;
+
+ // Identity cast is always lossless
+ Type* SrcTy = getOperand(0)->getType();
+ Type* DstTy = getType();
+ if (SrcTy == DstTy)
+ return true;
+
+ // Pointer to pointer is always lossless.
+ if (SrcTy->isPointerTy())
+ return DstTy->isPointerTy();
+ return false; // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast. For
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16>
+/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+ Type *SrcTy,
+ Type *DestTy,
+ Type *IntPtrTy) {
+ switch (Opcode) {
+ default: llvm_unreachable("Invalid CastOp");
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return false; // These always modify bits
+ case Instruction::BitCast:
+ return true; // BitCast never modifies bits.
+ case Instruction::PtrToInt:
+ return IntPtrTy->getScalarSizeInBits() ==
+ DestTy->getScalarSizeInBits();
+ case Instruction::IntToPtr:
+ return IntPtrTy->getScalarSizeInBits() ==
+ SrcTy->getScalarSizeInBits();
+ }
+}
+
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(Type *IntPtrTy) const {
+ return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
+/// This function determines if a pair of casts can be eliminated and what
+/// opcode should be used in the elimination. This assumes that there are two
+/// instructions like this:
+/// * %F = firstOpcode SrcTy %x to MidTy
+/// * %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// * %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+ Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+ Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
+ Type *DstIntPtrTy) {
+ // Define the 144 possibilities for these two cast instructions. The values
+ // in this matrix determine what to do in a given situation and select the
+ // case in the switch below. The rows correspond to firstOp, the columns
+ // correspond to secondOp. In looking at the table below, keep in mind
+ // the following cast properties:
+ //
+ // Size Compare Source Destination
+ // Operator Src ? Size Type Sign Type Sign
+ // -------- ------------ ------------------- ---------------------
+ // TRUNC > Integer Any Integral Any
+ // ZEXT < Integral Unsigned Integer Any
+ // SEXT < Integral Signed Integer Any
+ // FPTOUI n/a FloatPt n/a Integral Unsigned
+ // FPTOSI n/a FloatPt n/a Integral Signed
+ // UITOFP n/a Integral Unsigned FloatPt n/a
+ // SITOFP n/a Integral Signed FloatPt n/a
+ // FPTRUNC > FloatPt n/a FloatPt n/a
+ // FPEXT < FloatPt n/a FloatPt n/a
+ // PTRTOINT n/a Pointer n/a Integral Unsigned
+ // INTTOPTR n/a Integral Unsigned Pointer n/a
+ // BITCAST = FirstClass n/a FirstClass n/a
+ //
+ // NOTE: some transforms are safe, but we consider them to be non-profitable.
+ // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+ // into "fptoui double to i64", but this loses information about the range
+ // of the produced value (we no longer know the top-part is all zeros).
+ // Further this conversion is often much more expensive for typical hardware,
+ // and causes issues when building libgcc. We disallow fptosi+sext for the
+ // same reason.
+ const unsigned numCastOps =
+ Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+ static const uint8_t CastResults[numCastOps][numCastOps] = {
+ // T F F U S F F P I B -+
+ // R Z S P P I I T P 2 N T |
+ // U E E 2 2 2 2 R E I T C +- secondOp
+ // N X X U S F F N X N 2 V |
+ // C T T I I P P C T T P T -+
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc -+
+ { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt |
+ { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP +- firstOp
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP |
+ { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc |
+ { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt |
+ { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr |
+ { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+
+ };
+
+ // If either of the casts are a bitcast from scalar to vector, disallow the
+ // merging. However, bitcast of A->B->A are allowed.
+ bool isFirstBitcast = (firstOp == Instruction::BitCast);
+ bool isSecondBitcast = (secondOp == Instruction::BitCast);
+ bool chainedBitcast = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
+
+ // Check if any of the bitcasts convert scalars<->vectors.
+ if ((isFirstBitcast && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+ (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+ // Unless we are bitcasing to the original type, disallow optimizations.
+ if (!chainedBitcast) return 0;
+
+ int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+ [secondOp-Instruction::CastOpsBegin];
+ switch (ElimCase) {
+ case 0:
+ // categorically disallowed
+ return 0;
+ case 1:
+ // allowed, use first cast's opcode
+ return firstOp;
+ case 2:
+ // allowed, use second cast's opcode
+ return secondOp;
+ case 3:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is integer and we are not converting between a vector and a
+ // non vector type.
+ if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
+ return firstOp;
+ return 0;
+ case 4:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is floating point.
+ if (DstTy->isFloatingPointTy())
+ return firstOp;
+ return 0;
+ case 5:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is an integer.
+ if (SrcTy->isIntegerTy())
+ return secondOp;
+ return 0;
+ case 6:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is a floating point.
+ if (SrcTy->isFloatingPointTy())
+ return secondOp;
+ return 0;
+ case 7: {
+ // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+ if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy)
+ return 0;
+ unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits();
+ unsigned MidSize = MidTy->getScalarSizeInBits();
+ if (MidSize >= PtrSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 8: {
+ // ext, trunc -> bitcast, if the SrcTy and DstTy are same size
+ // ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy)
+ // ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy)
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
+ if (SrcSize == DstSize)
+ return Instruction::BitCast;
+ else if (SrcSize < DstSize)
+ return firstOp;
+ return secondOp;
+ }
+ case 9: // zext, sext -> zext, because sext can't sign extend after zext
+ return Instruction::ZExt;
+ case 10:
+ // fpext followed by ftrunc is allowed if the bit size returned to is
+ // the same as the original, in which case its just a bitcast
+ if (SrcTy == DstTy)
+ return Instruction::BitCast;
+ return 0; // If the types are not the same we can't eliminate it.
+ case 11:
+ // bitcast followed by ptrtoint is allowed as long as the bitcast
+ // is a pointer to pointer cast.
+ if (SrcTy->isPointerTy() && MidTy->isPointerTy())
+ return secondOp;
+ return 0;
+ case 12:
+ // inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast
+ if (MidTy->isPointerTy() && DstTy->isPointerTy())
+ return firstOp;
+ return 0;
+ case 13: {
+ // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+ if (!MidIntPtrTy)
+ return 0;
+ unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits();
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
+ if (SrcSize <= PtrSize && SrcSize == DstSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 99:
+ // cast combination can't happen (error in input). This is for all cases
+ // where the MidTy is not the same for the two cast instructions.
+ llvm_unreachable("Invalid Cast Combination");
+ default:
+ llvm_unreachable("Error in CastResults table!!!");
+ }
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
+ const Twine &Name, Instruction *InsertBefore) {
+ assert(castIsValid(op, S, Ty) && "Invalid cast!");
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertBefore);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore);
+ case SExt: return new SExtInst (S, Ty, Name, InsertBefore);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
+ default: llvm_unreachable("Invalid opcode provided");
+ }
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
+ const Twine &Name, BasicBlock *InsertAtEnd) {
+ assert(castIsValid(op, S, Ty) && "Invalid cast!");
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd);
+ case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
+ default: llvm_unreachable("Invalid opcode provided");
+ }
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Invalid cast");
+
+ if (Ty->isIntegerTy())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+ assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
+ "Invalid cast");
+
+ if (Ty->isIntOrIntVectorTy())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
+ bool isSigned, const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "Invalid integer cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
+ bool isSigned, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
+ if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+ return false;
+
+ if (SrcTy == DestTy)
+ return true;
+
+ if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ // An element by element cast. Valid if casting the elements is valid.
+ SrcTy = SrcVecTy->getElementType();
+ DestTy = DestVecTy->getElementType();
+ }
+
+ // Get the bit sizes, we'll need these
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+ // Run through the possibilities ...
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ return true;
+ } else if (SrcTy->isVectorTy()) { // Casting from vector
+ return DestBits == SrcBits;
+ } else { // Casting from something else
+ return SrcTy->isPointerTy();
+ }
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ return true;
+ } else if (SrcTy->isVectorTy()) { // Casting from vector
+ return DestBits == SrcBits;
+ } else { // Casting from something else
+ return false;
+ }
+ } else if (DestTy->isVectorTy()) { // Casting to vector
+ return DestBits == SrcBits;
+ } else if (DestTy->isPointerTy()) { // Casting to pointer
+ if (SrcTy->isPointerTy()) { // Casting from pointer
+ return true;
+ } else if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else { // Casting from something else
+ return false;
+ }
+ } else if (DestTy->isX86_MMXTy()) {
+ if (SrcTy->isVectorTy()) {
+ return DestBits == SrcBits; // 64-bit vector to MMX
+ } else {
+ return false;
+ }
+ } else { // Casting to something else
+ return false;
+ }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the
+// types and size of the operand. This, basically, is a parallel of the
+// logic in the castIsValid function below. This axiom should hold:
+// castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+ const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) {
+ Type *SrcTy = Src->getType();
+
+ assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+ "Only first class types are castable!");
+
+ if (SrcTy == DestTy)
+ return BitCast;
+
+ if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ // An element by element cast. Find the appropriate opcode based on the
+ // element types.
+ SrcTy = SrcVecTy->getElementType();
+ DestTy = DestVecTy->getElementType();
+ }
+
+ // Get the bit sizes, we'll need these
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+ // Run through the possibilities ...
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ if (DestBits < SrcBits)
+ return Trunc; // int -> smaller int
+ else if (DestBits > SrcBits) { // its an extension
+ if (SrcIsSigned)
+ return SExt; // signed -> SEXT
+ else
+ return ZExt; // unsigned -> ZEXT
+ } else {
+ return BitCast; // Same size, No-op cast
+ }
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (DestIsSigned)
+ return FPToSI; // FP -> sint
+ else
+ return FPToUI; // FP -> uint
+ } else if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Casting vector to integer of different width");
+ return BitCast; // Same size, no-op cast
+ } else {
+ assert(SrcTy->isPointerTy() &&
+ "Casting from a value that is not first-class type");
+ return PtrToInt; // ptr -> int
+ }
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ if (SrcIsSigned)
+ return SIToFP; // sint -> FP
+ else
+ return UIToFP; // uint -> FP
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (DestBits < SrcBits) {
+ return FPTrunc; // FP -> smaller FP
+ } else if (DestBits > SrcBits) {
+ return FPExt; // FP -> larger FP
+ } else {
+ return BitCast; // same size, no-op cast
+ }
+ } else if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Casting vector to floating point of different width");
+ return BitCast; // same size, no-op cast
+ }
+ llvm_unreachable("Casting pointer or non-first class to float");
+ } else if (DestTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Illegal cast to vector (wrong type or size)");
+ return BitCast;
+ } else if (DestTy->isPointerTy()) {
+ if (SrcTy->isPointerTy()) {
+ return BitCast; // ptr -> ptr
+ } else if (SrcTy->isIntegerTy()) {
+ return IntToPtr; // int -> ptr
+ }
+ llvm_unreachable("Casting pointer to other than pointer or int");
+ } else if (DestTy->isX86_MMXTy()) {
+ if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
+ return BitCast; // 64-bit vector to MMX
+ }
+ llvm_unreachable("Illegal cast to X86_MMX");
+ }
+ llvm_unreachable("Casting to type that is not first-class");
+}
+
+//===----------------------------------------------------------------------===//
+// CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool
+CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
+
+ // Check for type sanity on the arguments
+ Type *SrcTy = S->getType();
+
+ // If this is a cast to the same type then it's trivially true.
+ if (SrcTy == DstTy)
+ return true;
+
+ if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+ SrcTy->isAggregateType() || DstTy->isAggregateType())
+ return false;
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DstBitSize = DstTy->getScalarSizeInBits();
+
+ // If these are vector types, get the lengths of the vectors (using zero for
+ // scalar types means that checking that vector lengths match also checks that
+ // scalars are not being converted to vectors or vectors to scalars).
+ unsigned SrcLength = SrcTy->isVectorTy() ?
+ cast<VectorType>(SrcTy)->getNumElements() : 0;
+ unsigned DstLength = DstTy->isVectorTy() ?
+ cast<VectorType>(DstTy)->getNumElements() : 0;
+
+ // Switch on the opcode provided
+ switch (op) {
+ default: return false; // This is an input error
+ case Instruction::Trunc:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize > DstBitSize;
+ case Instruction::ZExt:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::SExt:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::FPTrunc:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength && SrcBitSize > DstBitSize;
+ case Instruction::FPExt:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength;
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength;
+ case Instruction::PtrToInt:
+ if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+ return false;
+ if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+ if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isPointerTy() &&
+ DstTy->getScalarType()->isIntegerTy();
+ case Instruction::IntToPtr:
+ if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+ return false;
+ if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+ if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isIntegerTy() &&
+ DstTy->getScalarType()->isPointerTy();
+ case Instruction::BitCast:
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+ return false;
+
+ // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+ // these cases, the cast is okay if the source and destination bit widths
+ // are identical.
+ return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+ }
+}
+
+TruncInst::TruncInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+// CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+void CmpInst::anchor() {}
+
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ setPredicate((Predicate)predicate);
+ setName(Name);
+}
+
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ setPredicate((Predicate)predicate);
+ setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+ Value *S1, Value *S2,
+ const Twine &Name, Instruction *InsertBefore) {
+ if (Op == Instruction::ICmp) {
+ if (InsertBefore)
+ return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ else
+ return new ICmpInst(CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ }
+
+ if (InsertBefore)
+ return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ else
+ return new FCmpInst(CmpInst::Predicate(predicate),
+ S1, S2, Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2,
+ const Twine &Name, BasicBlock *InsertAtEnd) {
+ if (Op == Instruction::ICmp) {
+ return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ }
+ return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+}
+
+void CmpInst::swapOperands() {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ IC->swapOperands();
+ else
+ cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isCommutative();
+ return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isEquality();
+ return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+ switch (pred) {
+ default: llvm_unreachable("Unknown cmp predicate!");
+ case ICMP_EQ: return ICMP_NE;
+ case ICMP_NE: return ICMP_EQ;
+ case ICMP_UGT: return ICMP_ULE;
+ case ICMP_ULT: return ICMP_UGE;
+ case ICMP_UGE: return ICMP_ULT;
+ case ICMP_ULE: return ICMP_UGT;
+ case ICMP_SGT: return ICMP_SLE;
+ case ICMP_SLT: return ICMP_SGE;
+ case ICMP_SGE: return ICMP_SLT;
+ case ICMP_SLE: return ICMP_SGT;
+
+ case FCMP_OEQ: return FCMP_UNE;
+ case FCMP_ONE: return FCMP_UEQ;
+ case FCMP_OGT: return FCMP_ULE;
+ case FCMP_OLT: return FCMP_UGE;
+ case FCMP_OGE: return FCMP_ULT;
+ case FCMP_OLE: return FCMP_UGT;
+ case FCMP_UEQ: return FCMP_ONE;
+ case FCMP_UNE: return FCMP_OEQ;
+ case FCMP_UGT: return FCMP_OLE;
+ case FCMP_ULT: return FCMP_OGE;
+ case FCMP_UGE: return FCMP_OLT;
+ case FCMP_ULE: return FCMP_OGT;
+ case FCMP_ORD: return FCMP_UNO;
+ case FCMP_UNO: return FCMP_ORD;
+ case FCMP_TRUE: return FCMP_FALSE;
+ case FCMP_FALSE: return FCMP_TRUE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: llvm_unreachable("Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ return pred;
+ case ICMP_UGT: return ICMP_SGT;
+ case ICMP_ULT: return ICMP_SLT;
+ case ICMP_UGE: return ICMP_SGE;
+ case ICMP_ULE: return ICMP_SLE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: llvm_unreachable("Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
+ return pred;
+ case ICMP_SGT: return ICMP_UGT;
+ case ICMP_SLT: return ICMP_ULT;
+ case ICMP_SGE: return ICMP_UGE;
+ case ICMP_SLE: return ICMP_ULE;
+ }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+ APInt Lower(C);
+ APInt Upper(C);
+ uint32_t BitWidth = C.getBitWidth();
+ switch (pred) {
+ default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
+ case ICmpInst::ICMP_EQ: ++Upper; break;
+ case ICmpInst::ICMP_NE: ++Lower; break;
+ case ICmpInst::ICMP_ULT:
+ Lower = APInt::getMinValue(BitWidth);
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_SLT:
+ Lower = APInt::getSignedMinValue(BitWidth);
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_UGT:
+ ++Lower; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_SGT:
+ ++Lower; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_ULE:
+ Lower = APInt::getMinValue(BitWidth); ++Upper;
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_SLE:
+ Lower = APInt::getSignedMinValue(BitWidth); ++Upper;
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_UGE:
+ Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_SGE:
+ Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ }
+ return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+ switch (pred) {
+ default: llvm_unreachable("Unknown cmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ return pred;
+ case ICMP_SGT: return ICMP_SLT;
+ case ICMP_SLT: return ICMP_SGT;
+ case ICMP_SGE: return ICMP_SLE;
+ case ICMP_SLE: return ICMP_SGE;
+ case ICMP_UGT: return ICMP_ULT;
+ case ICMP_ULT: return ICMP_UGT;
+ case ICMP_UGE: return ICMP_ULE;
+ case ICMP_ULE: return ICMP_UGE;
+
+ case FCMP_FALSE: case FCMP_TRUE:
+ case FCMP_OEQ: case FCMP_ONE:
+ case FCMP_UEQ: case FCMP_UNE:
+ case FCMP_ORD: case FCMP_UNO:
+ return pred;
+ case FCMP_OGT: return FCMP_OLT;
+ case FCMP_OLT: return FCMP_OGT;
+ case FCMP_OGE: return FCMP_OLE;
+ case FCMP_OLE: return FCMP_OGE;
+ case FCMP_UGT: return FCMP_ULT;
+ case FCMP_ULT: return FCMP_UGT;
+ case FCMP_UGE: return FCMP_ULE;
+ case FCMP_ULE: return FCMP_UGE;
+ }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return true;
+ }
+}
+
+bool CmpInst::isSigned(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return true;
+ }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_ORD: return true;
+ }
+}
+
+bool CmpInst::isUnordered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UNO: return true;
+ }
+}
+
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+ switch(predicate) {
+ default: return false;
+ case ICMP_EQ: case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+ case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+ }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+ switch(predicate) {
+ case ICMP_NE: case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+ case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+ default: return false;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+ assert(Value && Default && NumReserved);
+ ReservedSpace = NumReserved;
+ NumOperands = 2;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ OperandList[0] = Value;
+ OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ 0, 0, InsertBefore) {
+ init(Value, Default, 2+NumCases*2);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ 0, 0, InsertAtEnd) {
+ init(Value, Default, 2+NumCases*2);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+ : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+ init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+ NumOperands = SI.getNumOperands();
+ Use *OL = OperandList, *InOL = SI.OperandList;
+ for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
+ OL[i] = InOL[i];
+ OL[i+1] = InOL[i+1];
+ }
+ TheSubsets = SI.TheSubsets;
+ SubclassOptionalData = SI.SubclassOptionalData;
+}
+
+SwitchInst::~SwitchInst() {
+ dropHungoffUses();
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+ IntegersSubsetToBB Mapping;
+
+ // FIXME: Currently we work with ConstantInt based cases.
+ // So inititalize IntItem container directly from ConstantInt.
+ Mapping.add(IntItem::fromConstantInt(OnVal));
+ IntegersSubset CaseRanges = Mapping.getCase();
+ addCase(CaseRanges, Dest);
+}
+
+void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) {
+ unsigned NewCaseIdx = getNumCases();
+ unsigned OpNo = NumOperands;
+ if (OpNo+2 > ReservedSpace)
+ growOperands(); // Get more space!
+ // Initialize some new operands.
+ assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+ NumOperands = OpNo+2;
+
+ SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal);
+
+ CaseIt Case(this, NewCaseIdx, TheSubsetsIt);
+ Case.updateCaseValueOperand(OnVal);
+ Case.setSuccessor(Dest);
+}
+
+/// removeCase - This method removes the specified case and its successor
+/// from the switch instruction.
+void SwitchInst::removeCase(CaseIt& i) {
+ unsigned idx = i.getCaseIndex();
+
+ assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
+
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+
+ // Overwrite this case with the end of the list.
+ if (2 + (idx + 1) * 2 != NumOps) {
+ OL[2 + idx * 2] = OL[NumOps - 2];
+ OL[2 + idx * 2 + 1] = OL[NumOps - 1];
+ }
+
+ // Nuke the last value.
+ OL[NumOps-2].set(0);
+ OL[NumOps-2+1].set(0);
+
+ // Do the same with TheCases collection:
+ if (i.SubsetIt != --TheSubsets.end()) {
+ *i.SubsetIt = TheSubsets.back();
+ TheSubsets.pop_back();
+ } else {
+ TheSubsets.pop_back();
+ i.SubsetIt = TheSubsets.end();
+ }
+
+ NumOperands = NumOps-2;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 3 times.
+///
+void SwitchInst::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e*3;
+
+ ReservedSpace = NumOps;
+ Use *NewOps = allocHungoffUses(NumOps);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i) {
+ NewOps[i] = OldOps[i];
+ }
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+// IndirectBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+ assert(Address && Address->getType()->isPointerTy() &&
+ "Address of indirectbr must be a pointer");
+ ReservedSpace = 1+NumDests;
+ NumOperands = 1;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ OperandList[0] = Address;
+}
+
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 2 times.
+///
+void IndirectBrInst::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e*2;
+
+ ReservedSpace = NumOps;
+ Use *NewOps = allocHungoffUses(NumOps);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i)
+ NewOps[i] = OldOps[i];
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+ Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+ 0, 0, InsertBefore) {
+ init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+ BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+ 0, 0, InsertAtEnd) {
+ init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+ : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+ allocHungoffUses(IBI.getNumOperands()),
+ IBI.getNumOperands()) {
+ Use *OL = OperandList, *InOL = IBI.OperandList;
+ for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+ OL[i] = InOL[i];
+ SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+ dropHungoffUses();
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+ unsigned OpNo = NumOperands;
+ if (OpNo+1 > ReservedSpace)
+ growOperands(); // Get more space!
+ // Initialize some new operands.
+ assert(OpNo < ReservedSpace && "Growing didn't work!");
+ NumOperands = OpNo+1;
+ OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+ assert(idx < getNumOperands()-1 && "Successor index out of range!");
+
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+
+ // Replace this value with the last one.
+ OL[idx+1] = OL[NumOps-1];
+
+ // Nuke the last value.
+ OL[NumOps-1].set(0);
+ NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+// clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+ return new (getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone_impl() const {
+ return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone_impl() const {
+ return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ICmpInst* ICmpInst::clone_impl() const {
+ return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+ return new ExtractValueInst(*this);
+}
+
+InsertValueInst *InsertValueInst::clone_impl() const {
+ return new InsertValueInst(*this);
+}
+
+AllocaInst *AllocaInst::clone_impl() const {
+ return new AllocaInst(getAllocatedType(),
+ (Value*)getOperand(0),
+ getAlignment());
+}
+
+LoadInst *LoadInst::clone_impl() const {
+ return new LoadInst(getOperand(0), Twine(), isVolatile(),
+ getAlignment(), getOrdering(), getSynchScope());
+}
+
+StoreInst *StoreInst::clone_impl() const {
+ return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
+ getAlignment(), getOrdering(), getSynchScope());
+
+}
+
+AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
+ AtomicCmpXchgInst *Result =
+ new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
+ getOrdering(), getSynchScope());
+ Result->setVolatile(isVolatile());
+ return Result;
+}
+
+AtomicRMWInst *AtomicRMWInst::clone_impl() const {
+ AtomicRMWInst *Result =
+ new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
+ getOrdering(), getSynchScope());
+ Result->setVolatile(isVolatile());
+ return Result;
+}
+
+FenceInst *FenceInst::clone_impl() const {
+ return new FenceInst(getContext(), getOrdering(), getSynchScope());
+}
+
+TruncInst *TruncInst::clone_impl() const {
+ return new TruncInst(getOperand(0), getType());
+}
+
+ZExtInst *ZExtInst::clone_impl() const {
+ return new ZExtInst(getOperand(0), getType());
+}
+
+SExtInst *SExtInst::clone_impl() const {
+ return new SExtInst(getOperand(0), getType());
+}
+
+FPTruncInst *FPTruncInst::clone_impl() const {
+ return new FPTruncInst(getOperand(0), getType());
+}
+
+FPExtInst *FPExtInst::clone_impl() const {
+ return new FPExtInst(getOperand(0), getType());
+}
+
+UIToFPInst *UIToFPInst::clone_impl() const {
+ return new UIToFPInst(getOperand(0), getType());
+}
+
+SIToFPInst *SIToFPInst::clone_impl() const {
+ return new SIToFPInst(getOperand(0), getType());
+}
+
+FPToUIInst *FPToUIInst::clone_impl() const {
+ return new FPToUIInst(getOperand(0), getType());
+}
+
+FPToSIInst *FPToSIInst::clone_impl() const {
+ return new FPToSIInst(getOperand(0), getType());
+}
+
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+ return new PtrToIntInst(getOperand(0), getType());
+}
+
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+ return new IntToPtrInst(getOperand(0), getType());
+}
+
+BitCastInst *BitCastInst::clone_impl() const {
+ return new BitCastInst(getOperand(0), getType());
+}
+
+CallInst *CallInst::clone_impl() const {
+ return new(getNumOperands()) CallInst(*this);
+}
+
+SelectInst *SelectInst::clone_impl() const {
+ return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+VAArgInst *VAArgInst::clone_impl() const {
+ return new VAArgInst(getOperand(0), getType());
+}
+
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+ return ExtractElementInst::Create(getOperand(0), getOperand(1));
+}
+
+InsertElementInst *InsertElementInst::clone_impl() const {
+ return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+ return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
+}
+
+PHINode *PHINode::clone_impl() const {
+ return new PHINode(*this);
+}
+
+LandingPadInst *LandingPadInst::clone_impl() const {
+ return new LandingPadInst(*this);
+}
+
+ReturnInst *ReturnInst::clone_impl() const {
+ return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
+ return new(getNumOperands()) BranchInst(*this);
+}
+
+SwitchInst *SwitchInst::clone_impl() const {
+ return new SwitchInst(*this);
+}
+
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+ return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+ return new(getNumOperands()) InvokeInst(*this);
+}
+
+ResumeInst *ResumeInst::clone_impl() const {
+ return new(1) ResumeInst(*this);
+}
+
+UnreachableInst *UnreachableInst::clone_impl() const {
+ LLVMContext &Context = getContext();
+ return new UnreachableInst(Context);
+}
diff --git a/contrib/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm/lib/IR/IntrinsicInst.cpp
new file mode 100644
index 000000000000..51f88d2e6fbd
--- /dev/null
+++ b/contrib/llvm/lib/IR/IntrinsicInst.cpp
@@ -0,0 +1,73 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions.
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class. Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*. To access the real item of interest the
+// cast instruction needs to be stripped away.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (CE->isCast())
+ return CE->getOperand(0);
+ return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+ if (Value *CO = CastOperand(C)) {
+ C = StripCast(CO);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasInitializer())
+ if (Value *CO = CastOperand(GV->getInitializer()))
+ C = StripCast(CO);
+ }
+ return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+///
+
+Value *DbgDeclareInst::getAddress() const {
+ if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
+ return MD->getOperand(0);
+ else
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
+///
+
+const Value *DbgValueInst::getValue() const {
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
+
+Value *DbgValueInst::getValue() {
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
new file mode 100644
index 000000000000..883bb9878fa5
--- /dev/null
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -0,0 +1,168 @@
+//===-- LLVMContext.cpp - Implement LLVMContext ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LLVMContext, as a wrapper around the opaque
+// class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
+#include <cctype>
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+ return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+ // Create the fixed metadata kinds. This is done in the same order as the
+ // MD_* enum values so that they correspond.
+
+ // Create the 'dbg' metadata kind.
+ unsigned DbgID = getMDKindID("dbg");
+ assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+ // Create the 'tbaa' metadata kind.
+ unsigned TBAAID = getMDKindID("tbaa");
+ assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+ // Create the 'prof' metadata kind.
+ unsigned ProfID = getMDKindID("prof");
+ assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+
+ // Create the 'fpmath' metadata kind.
+ unsigned FPAccuracyID = getMDKindID("fpmath");
+ assert(FPAccuracyID == MD_fpmath && "fpmath kind id drifted");
+ (void)FPAccuracyID;
+
+ // Create the 'range' metadata kind.
+ unsigned RangeID = getMDKindID("range");
+ assert(RangeID == MD_range && "range kind id drifted");
+ (void)RangeID;
+
+ // Create the 'tbaa.struct' metadata kind.
+ unsigned TBAAStructID = getMDKindID("tbaa.struct");
+ assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted");
+ (void)TBAAStructID;
+
+ // Create the 'invariant.load' metadata kind.
+ unsigned InvariantLdId = getMDKindID("invariant.load");
+ assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted");
+ (void)InvariantLdId;
+}
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+void LLVMContext::addModule(Module *M) {
+ pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+ pImpl->OwnedModules.erase(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+ void *DiagContext) {
+ pImpl->InlineAsmDiagHandler = DiagHandler;
+ pImpl->InlineAsmDiagContext = DiagContext;
+}
+
+/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+/// setInlineAsmDiagnosticHandler.
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
+ return pImpl->InlineAsmDiagHandler;
+}
+
+/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+/// setInlineAsmDiagnosticHandler.
+void *LLVMContext::getInlineAsmDiagnosticContext() const {
+ return pImpl->InlineAsmDiagContext;
+}
+
+void LLVMContext::emitError(const Twine &ErrorStr) {
+ emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
+ unsigned LocCookie = 0;
+ if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+ if (SrcLoc->getNumOperands() != 0)
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+ LocCookie = CI->getZExtValue();
+ }
+ return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
+ // If there is no error handler installed, just print the error and exit.
+ if (pImpl->InlineAsmDiagHandler == 0) {
+ errs() << "error: " << ErrorStr << "\n";
+ exit(1);
+ }
+
+ // If we do have an error handler, we can report the error and keep going.
+ SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str());
+
+ pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+ if (MDName.empty())
+ return false;
+
+ if (!std::isalpha(static_cast<unsigned char>(MDName[0])))
+ return false;
+
+ for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+ ++I) {
+ if (!std::isalnum(static_cast<unsigned char>(*I)) && *I != '_' &&
+ *I != '-' && *I != '.')
+ return false;
+ }
+ return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+ assert(isValidName(Name) && "Invalid MDNode name");
+
+ // If this is new, assign it its ID.
+ return
+ pImpl->CustomMDKindNames.GetOrCreateValue(
+ Name, pImpl->CustomMDKindNames.size()).second;
+}
+
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+ Names.resize(pImpl->CustomMDKindNames.size());
+ for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+ E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+ Names[I->second] = I->first();
+}
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
new file mode 100644
index 000000000000..6a6a4d6801f0
--- /dev/null
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
@@ -0,0 +1,156 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Module.h"
+#include <algorithm>
+using namespace llvm;
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+ : TheTrueVal(0), TheFalseVal(0),
+ VoidTy(C, Type::VoidTyID),
+ LabelTy(C, Type::LabelTyID),
+ HalfTy(C, Type::HalfTyID),
+ FloatTy(C, Type::FloatTyID),
+ DoubleTy(C, Type::DoubleTyID),
+ MetadataTy(C, Type::MetadataTyID),
+ X86_FP80Ty(C, Type::X86_FP80TyID),
+ FP128Ty(C, Type::FP128TyID),
+ PPC_FP128Ty(C, Type::PPC_FP128TyID),
+ X86_MMXTy(C, Type::X86_MMXTyID),
+ Int1Ty(C, 1),
+ Int8Ty(C, 8),
+ Int16Ty(C, 16),
+ Int32Ty(C, 32),
+ Int64Ty(C, 64) {
+ InlineAsmDiagHandler = 0;
+ InlineAsmDiagContext = 0;
+ NamedStructTypesUniqueID = 0;
+}
+
+namespace {
+struct DropReferences {
+ // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+ // is a Constant*.
+ template<typename PairT>
+ void operator()(const PairT &P) {
+ P.second->dropAllReferences();
+ }
+};
+
+// Temporary - drops pair.first instead of second.
+struct DropFirst {
+ // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+ // is a Constant*.
+ template<typename PairT>
+ void operator()(const PairT &P) {
+ P.first->dropAllReferences();
+ }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+ // NOTE: We need to delete the contents of OwnedModules, but we have to
+ // duplicate it into a temporary vector, because the destructor of Module
+ // will try to remove itself from OwnedModules set. This would cause
+ // iterator invalidation if we iterated on the set directly.
+ std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+ DeleteContainerPointers(Modules);
+
+ // Free the constants. This is important to do here to ensure that they are
+ // freed before the LeakDetector is torn down.
+ std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+ DropReferences());
+ std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+ DropFirst());
+ std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+ DropFirst());
+ std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+ DropFirst());
+ ExprConstants.freeConstants();
+ ArrayConstants.freeConstants();
+ StructConstants.freeConstants();
+ VectorConstants.freeConstants();
+ DeleteContainerSeconds(CAZConstants);
+ DeleteContainerSeconds(CPNConstants);
+ DeleteContainerSeconds(UVConstants);
+ InlineAsms.freeConstants();
+ DeleteContainerSeconds(IntConstants);
+ DeleteContainerSeconds(FPConstants);
+
+ for (StringMap<ConstantDataSequential*>::iterator I = CDSConstants.begin(),
+ E = CDSConstants.end(); I != E; ++I)
+ delete I->second;
+ CDSConstants.clear();
+
+ // Destroy attributes.
+ for (FoldingSetIterator<AttributeImpl> I = AttrsSet.begin(),
+ E = AttrsSet.end(); I != E; ) {
+ FoldingSetIterator<AttributeImpl> Elem = I++;
+ delete &*Elem;
+ }
+
+ // Destroy attribute lists.
+ for (FoldingSetIterator<AttributeSetImpl> I = AttrsLists.begin(),
+ E = AttrsLists.end(); I != E; ) {
+ FoldingSetIterator<AttributeSetImpl> Elem = I++;
+ delete &*Elem;
+ }
+
+ // Destroy attribute node lists.
+ for (FoldingSetIterator<AttributeSetNode> I = AttrsSetNodes.begin(),
+ E = AttrsSetNodes.end(); I != E; ) {
+ FoldingSetIterator<AttributeSetNode> Elem = I++;
+ delete &*Elem;
+ }
+
+ // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
+ // and the NonUniquedMDNodes sets, so copy the values out first.
+ SmallVector<MDNode*, 8> MDNodes;
+ MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+ for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+ I != E; ++I)
+ MDNodes.push_back(&*I);
+ MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+ for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
+ E = MDNodes.end(); I != E; ++I)
+ (*I)->destroy();
+ assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+ "Destroying all MDNodes didn't empty the Context's sets.");
+
+ // Destroy MDStrings.
+ DeleteContainerSeconds(MDStringCache);
+}
+
+// ConstantsContext anchors
+void UnaryConstantExpr::anchor() { }
+
+void BinaryConstantExpr::anchor() { }
+
+void SelectConstantExpr::anchor() { }
+
+void ExtractElementConstantExpr::anchor() { }
+
+void InsertElementConstantExpr::anchor() { }
+
+void ShuffleVectorConstantExpr::anchor() { }
+
+void ExtractValueConstantExpr::anchor() { }
+
+void InsertValueConstantExpr::anchor() { }
+
+void GetElementPtrConstantExpr::anchor() { }
+
+void CompareConstantExpr::anchor() { }
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
new file mode 100644
index 000000000000..0c659b81b706
--- /dev/null
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -0,0 +1,367 @@
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares LLVMContextImpl, the opaque implementation
+// of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+#include "AttributeImpl.h"
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/ValueHandle.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+ struct KeyTy {
+ APInt val;
+ Type* type;
+ KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {}
+ bool operator==(const KeyTy& that) const {
+ return type == that.type && this->val == that.val;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ friend hash_code hash_value(const KeyTy &Key) {
+ return hash_combine(Key.type, Key.val);
+ }
+ };
+ static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+ static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return static_cast<unsigned>(hash_value(Key));
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+};
+
+struct DenseMapAPFloatKeyInfo {
+ struct KeyTy {
+ APFloat val;
+ KeyTy(const APFloat& V) : val(V){}
+ bool operator==(const KeyTy& that) const {
+ return this->val.bitwiseIsEqual(that.val);
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ friend hash_code hash_value(const KeyTy &Key) {
+ return hash_combine(Key.val);
+ }
+ };
+ static inline KeyTy getEmptyKey() {
+ return KeyTy(APFloat(APFloat::Bogus,1));
+ }
+ static inline KeyTy getTombstoneKey() {
+ return KeyTy(APFloat(APFloat::Bogus,2));
+ }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return static_cast<unsigned>(hash_value(Key));
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+};
+
+struct AnonStructTypeKeyInfo {
+ struct KeyTy {
+ ArrayRef<Type*> ETypes;
+ bool isPacked;
+ KeyTy(const ArrayRef<Type*>& E, bool P) :
+ ETypes(E), isPacked(P) {}
+ KeyTy(const StructType* ST) :
+ ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
+ isPacked(ST->isPacked()) {}
+ bool operator==(const KeyTy& that) const {
+ if (isPacked != that.isPacked)
+ return false;
+ if (ETypes != that.ETypes)
+ return false;
+ return true;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline StructType* getEmptyKey() {
+ return DenseMapInfo<StructType*>::getEmptyKey();
+ }
+ static inline StructType* getTombstoneKey() {
+ return DenseMapInfo<StructType*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const KeyTy& Key) {
+ return hash_combine(hash_combine_range(Key.ETypes.begin(),
+ Key.ETypes.end()),
+ Key.isPacked);
+ }
+ static unsigned getHashValue(const StructType *ST) {
+ return getHashValue(KeyTy(ST));
+ }
+ static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS == KeyTy(RHS);
+ }
+ static bool isEqual(const StructType *LHS, const StructType *RHS) {
+ return LHS == RHS;
+ }
+};
+
+struct FunctionTypeKeyInfo {
+ struct KeyTy {
+ const Type *ReturnType;
+ ArrayRef<Type*> Params;
+ bool isVarArg;
+ KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
+ ReturnType(R), Params(P), isVarArg(V) {}
+ KeyTy(const FunctionType* FT) :
+ ReturnType(FT->getReturnType()),
+ Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
+ isVarArg(FT->isVarArg()) {}
+ bool operator==(const KeyTy& that) const {
+ if (ReturnType != that.ReturnType)
+ return false;
+ if (isVarArg != that.isVarArg)
+ return false;
+ if (Params != that.Params)
+ return false;
+ return true;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline FunctionType* getEmptyKey() {
+ return DenseMapInfo<FunctionType*>::getEmptyKey();
+ }
+ static inline FunctionType* getTombstoneKey() {
+ return DenseMapInfo<FunctionType*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const KeyTy& Key) {
+ return hash_combine(Key.ReturnType,
+ hash_combine_range(Key.Params.begin(),
+ Key.Params.end()),
+ Key.isVarArg);
+ }
+ static unsigned getHashValue(const FunctionType *FT) {
+ return getHashValue(KeyTy(FT));
+ }
+ static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS == KeyTy(RHS);
+ }
+ static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
+// shortcut to avoid comparing all operands.
+template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
+ static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
+ unsigned IDHash, FoldingSetNodeID &TempID) {
+ assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
+ // First, check if the cached hashes match. If they don't we can skip the
+ // expensive operand walk.
+ if (X.Hash != IDHash)
+ return false;
+
+ // If they match we have to compare the operands.
+ X.Profile(TempID);
+ return TempID == ID;
+ }
+ static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
+ return X.Hash; // Return cached hash.
+ }
+};
+
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+ /// Ctx - This is the LLVM Context being referenced.
+ LLVMContextImpl *Ctx;
+
+ /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+ /// this reference lives in. If this is zero, then it represents a
+ /// non-canonical entry that has no DenseMap value. This can happen due to
+ /// RAUW.
+ int Idx;
+public:
+ DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+ : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+
+ MDNode *get() const {
+ return cast_or_null<MDNode>(getValPtr());
+ }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *VNew);
+};
+
+class LLVMContextImpl {
+public:
+ /// OwnedModules - The set of modules instantiated in this context, and which
+ /// will be automatically deleted if this context is deleted.
+ SmallPtrSet<Module*, 4> OwnedModules;
+
+ LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+ void *InlineAsmDiagContext;
+
+ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
+ DenseMapAPIntKeyInfo> IntMapTy;
+ IntMapTy IntConstants;
+
+ typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
+ DenseMapAPFloatKeyInfo> FPMapTy;
+ FPMapTy FPConstants;
+
+ FoldingSet<AttributeImpl> AttrsSet;
+ FoldingSet<AttributeSetImpl> AttrsLists;
+ FoldingSet<AttributeSetNode> AttrsSetNodes;
+
+ StringMap<Value*> MDStringCache;
+
+ FoldingSet<MDNode> MDNodeSet;
+
+ // MDNodes may be uniqued or not uniqued. When they're not uniqued, they
+ // aren't in the MDNodeSet, but they're still shared between objects, so no
+ // one object can destroy them. This set allows us to at least destroy them
+ // on Context destruction.
+ SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
+
+ DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
+
+ typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
+ ArrayConstantsTy ArrayConstants;
+
+ typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
+ StructConstantsTy StructConstants;
+
+ typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
+ VectorConstantsTy VectorConstants;
+
+ DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
+
+ DenseMap<Type*, UndefValue*> UVConstants;
+
+ StringMap<ConstantDataSequential*> CDSConstants;
+
+
+ DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+ ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+ ExprConstants;
+
+ ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+ InlineAsm> InlineAsms;
+
+ ConstantInt *TheTrueVal;
+ ConstantInt *TheFalseVal;
+
+ LeakDetectorImpl<Value> LLVMObjects;
+
+ // Basic type instances.
+ Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
+ Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+ IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+
+ /// TypeAllocator - All dynamically allocated types are allocated from this.
+ /// They live forever until the context is torn down.
+ BumpPtrAllocator TypeAllocator;
+
+ DenseMap<unsigned, IntegerType*> IntegerTypes;
+
+ typedef DenseMap<FunctionType*, bool, FunctionTypeKeyInfo> FunctionTypeMap;
+ FunctionTypeMap FunctionTypes;
+ typedef DenseMap<StructType*, bool, AnonStructTypeKeyInfo> StructTypeMap;
+ StructTypeMap AnonStructTypes;
+ StringMap<StructType*> NamedStructTypes;
+ unsigned NamedStructTypesUniqueID;
+
+ DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+ DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+ DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
+ DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+
+
+ /// ValueHandles - This map keeps track of all of the value handles that are
+ /// watching a Value*. The Value::HasValueHandle bit is used to know
+ /// whether or not a value has an entry in this map.
+ typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+ ValueHandlesTy ValueHandles;
+
+ /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
+ StringMap<unsigned> CustomMDKindNames;
+
+ typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
+ typedef SmallVector<MDPairTy, 2> MDMapTy;
+
+ /// MetadataStore - Collection of per-instruction metadata used in this
+ /// context.
+ DenseMap<const Instruction *, MDMapTy> MetadataStore;
+
+ /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+ /// entry with no "inlined at" element.
+ DenseMap<MDNode*, int> ScopeRecordIdx;
+
+ /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+ /// index. The ValueHandle ensures that ScopeRecordIdx stays up to date if
+ /// the MDNode is RAUW'd.
+ std::vector<DebugRecVH> ScopeRecords;
+
+ /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+ /// scope/inlined-at pair.
+ DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+
+ /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+ /// for an index. The ValueHandle ensures that ScopeINlinedAtIdx stays up
+ /// to date.
+ std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+
+ /// IntrinsicIDCache - Cache of intrinsic name (string) to numeric ID mappings
+ /// requested in this context
+ typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy;
+ IntrinsicIDCacheTy IntrinsicIDCache;
+
+ int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+ int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+
+ LLVMContextImpl(LLVMContext &C);
+ ~LLVMContextImpl();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/IR/LeakDetector.cpp b/contrib/llvm/lib/IR/LeakDetector.cpp
new file mode 100644
index 000000000000..835e5e61cdf9
--- /dev/null
+++ b/contrib/llvm/lib/IR/LeakDetector.cpp
@@ -0,0 +1,69 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LeakDetector.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
+using namespace llvm;
+
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
+
+static void clearGarbage(LLVMContext &Context) {
+ Objects->clear();
+ Context.pImpl->LLVMObjects.clear();
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+ Objects->addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+ LLVMContextImpl *pImpl = Object->getContext().pImpl;
+ pImpl->LLVMObjects.addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+ Objects->removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+ LLVMContextImpl *pImpl = Object->getContext().pImpl;
+ pImpl->LLVMObjects.removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context,
+ const std::string &Message) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+
+ Objects->setName("GENERIC");
+ pImpl->LLVMObjects.setName("LLVM");
+
+ // use non-short-circuit version so that both checks are performed
+ if (Objects->hasGarbage(Message) |
+ pImpl->LLVMObjects.hasGarbage(Message))
+ errs() << "\nThis is probably because you removed an object, but didn't "
+ << "delete it. Please check your code for memory leaks.\n";
+
+ // Clear out results so we don't get duplicate warnings on
+ // next call...
+ clearGarbage(Context);
+}
diff --git a/contrib/llvm/lib/IR/LeaksContext.h b/contrib/llvm/lib/IR/LeaksContext.h
new file mode 100644
index 000000000000..5038dc9d6d6d
--- /dev/null
+++ b/contrib/llvm/lib/IR/LeaksContext.h
@@ -0,0 +1,92 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+template <class T>
+struct PrinterTrait {
+ static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+ static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+ explicit LeakDetectorImpl(const char* const name = "") :
+ Cache(0), Name(name) { }
+
+ void clear() {
+ Cache = 0;
+ Ts.clear();
+ }
+
+ void setName(const char* n) {
+ Name = n;
+ }
+
+ // Because the most common usage pattern, by far, is to add a
+ // garbage object, then remove it immediately, we optimize this
+ // case. When an object is added, it is not added to the set
+ // immediately, it is added to the CachedValue Value. If it is
+ // immediately removed, no set search need be performed.
+ void addGarbage(const T* o) {
+ assert(Ts.count(o) == 0 && "Object already in set!");
+ if (Cache) {
+ assert(Cache != o && "Object already in set!");
+ Ts.insert(Cache);
+ }
+ Cache = o;
+ }
+
+ void removeGarbage(const T* o) {
+ if (o == Cache)
+ Cache = 0; // Cache hit
+ else
+ Ts.erase(o);
+ }
+
+ bool hasGarbage(const std::string& Message) {
+ addGarbage(0); // Flush the Cache
+
+ assert(Cache == 0 && "No value should be cached anymore!");
+
+ if (!Ts.empty()) {
+ errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+ for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+ E = Ts.end(); I != E; ++I) {
+ errs() << '\t';
+ PrinterTrait<T>::print(*I);
+ errs() << '\n';
+ }
+ errs() << '\n';
+
+ return true;
+ }
+
+ return false;
+ }
+
+private:
+ SmallPtrSet<const T*, 8> Ts;
+ const T* Cache;
+ const char* Name;
+};
+
+}
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
new file mode 100644
index 000000000000..0228aeb31f5d
--- /dev/null
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -0,0 +1,745 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Metadata.h"
+#include "LLVMContextImpl.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MDString implementation.
+//
+
+void MDString::anchor() { }
+
+MDString::MDString(LLVMContext &C)
+ : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
+
+MDString *MDString::get(LLVMContext &Context, StringRef Str) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ StringMapEntry<Value*> &Entry =
+ pImpl->MDStringCache.GetOrCreateValue(Str);
+ Value *&S = Entry.getValue();
+ if (!S) S = new MDString(Context);
+ S->setValueName(&Entry);
+ return cast<MDString>(S);
+}
+
+//===----------------------------------------------------------------------===//
+// MDNodeOperand implementation.
+//
+
+// Use CallbackVH to hold MDNode operands.
+namespace llvm {
+class MDNodeOperand : public CallbackVH {
+ MDNode *getParent() {
+ MDNodeOperand *Cur = this;
+
+ while (Cur->getValPtrInt() != 1)
+ --Cur;
+
+ assert(Cur->getValPtrInt() == 1 &&
+ "Couldn't find the beginning of the operand list!");
+ return reinterpret_cast<MDNode*>(Cur) - 1;
+ }
+
+public:
+ MDNodeOperand(Value *V) : CallbackVH(V) {}
+ ~MDNodeOperand() {}
+
+ void set(Value *V) {
+ unsigned IsFirst = this->getValPtrInt();
+ this->setValPtr(V);
+ this->setAsFirstOperand(IsFirst);
+ }
+
+ /// setAsFirstOperand - Accessor method to mark the operand as the first in
+ /// the list.
+ void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *NV);
+};
+} // end namespace llvm.
+
+
+void MDNodeOperand::deleted() {
+ getParent()->replaceOperand(this, 0);
+}
+
+void MDNodeOperand::allUsesReplacedWith(Value *NV) {
+ getParent()->replaceOperand(this, NV);
+}
+
+//===----------------------------------------------------------------------===//
+// MDNode implementation.
+//
+
+/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
+/// the end of the MDNode.
+static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
+ // Use <= instead of < to permit a one-past-the-end address.
+ assert(Op <= N->getNumOperands() && "Invalid operand number");
+ return reinterpret_cast<MDNodeOperand*>(N + 1) + Op;
+}
+
+void MDNode::replaceOperandWith(unsigned i, Value *Val) {
+ MDNodeOperand *Op = getOperandPtr(this, i);
+ replaceOperand(Op, Val);
+}
+
+MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
+: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+ NumOperands = Vals.size();
+
+ if (isFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
+
+ // Initialize the operand list, which is co-allocated on the end of the node.
+ unsigned i = 0;
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op, ++i) {
+ new (Op) MDNodeOperand(Vals[i]);
+
+ // Mark the first MDNodeOperand as being the first in the list of operands.
+ if (i == 0)
+ Op->setAsFirstOperand(1);
+ }
+}
+
+/// ~MDNode - Destroy MDNode.
+MDNode::~MDNode() {
+ assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
+ "Not being destroyed through destroy()?");
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+ if (isNotUniqued()) {
+ pImpl->NonUniquedMDNodes.erase(this);
+ } else {
+ pImpl->MDNodeSet.RemoveNode(this);
+ }
+
+ // Destroy the operands.
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op)
+ Op->~MDNodeOperand();
+}
+
+static const Function *getFunctionForValue(Value *V) {
+ if (!V) return NULL;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ BasicBlock *BB = I->getParent();
+ return BB ? BB->getParent() : 0;
+ }
+ if (Argument *A = dyn_cast<Argument>(V))
+ return A->getParent();
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return BB->getParent();
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return MD->getFunction();
+ return NULL;
+}
+
+#ifndef NDEBUG
+static const Function *assertLocalFunction(const MDNode *N) {
+ if (!N->isFunctionLocal()) return 0;
+
+ // FIXME: This does not handle cyclic function local metadata.
+ const Function *F = 0, *NewF = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ NewF = assertLocalFunction(MD);
+ else
+ NewF = getFunctionForValue(V);
+ }
+ if (F == 0)
+ F = NewF;
+ else
+ assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+ }
+ return F;
+}
+#endif
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. getFunction() should not be used for performance-
+// critical code because it recursively visits all the MDNode's operands.
+const Function *MDNode::getFunction() const {
+#ifndef NDEBUG
+ return assertLocalFunction(this);
+#else
+ if (!isFunctionLocal()) return NULL;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (const Function *F = getFunctionForValue(getOperand(i)))
+ return F;
+ return NULL;
+#endif
+}
+
+// destroy - Delete this node. Only when there are no uses.
+void MDNode::destroy() {
+ setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
+ // Placement delete, then free the memory.
+ this->~MDNode();
+ free(this);
+}
+
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+ return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+ (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
+MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
+ FunctionLocalness FL, bool Insert) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != Vals.size(); ++i)
+ ID.AddPointer(Vals[i]);
+
+ void *InsertPoint;
+ MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (N || !Insert)
+ return N;
+
+ bool isFunctionLocal = false;
+ switch (FL) {
+ case FL_Unknown:
+ for (unsigned i = 0; i != Vals.size(); ++i) {
+ Value *V = Vals[i];
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isFunctionLocal = true;
+ break;
+ }
+ }
+ break;
+ case FL_No:
+ isFunctionLocal = false;
+ break;
+ case FL_Yes:
+ isFunctionLocal = true;
+ break;
+ }
+
+ // Coallocate space for the node and Operands together, then placement new.
+ void *Ptr = malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
+ N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
+
+ // Cache the operand hash.
+ N->Hash = ID.ComputeHash();
+
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+
+ return N;
+}
+
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ return getMDNode(Context, Vals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
+ ArrayRef<Value*> Vals,
+ bool isFunctionLocal) {
+ return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
+MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ return getMDNode(Context, Vals, FL_Unknown, false);
+}
+
+MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ MDNode *N =
+ (MDNode *)malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
+ N = new (N) MDNode(Context, Vals, FL_No);
+ N->setValueSubclassData(N->getSubclassDataFromValue() |
+ NotUniquedBit);
+ LeakDetector::addGarbageObject(N);
+ return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+ assert(N->use_empty() && "Temporary MDNode has uses!");
+ assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+ "Deleting a non-temporary uniqued node!");
+ assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+ "Deleting a non-temporary non-uniqued node!");
+ assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+ "Temporary MDNode does not have NotUniquedBit set!");
+ assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+ "Temporary MDNode has DestroyFlag set!");
+ LeakDetector::removeGarbageObject(N);
+ N->destroy();
+}
+
+/// getOperand - Return specified operand.
+Value *MDNode::getOperand(unsigned i) const {
+ assert(i < getNumOperands() && "Invalid operand number");
+ return *getOperandPtr(const_cast<MDNode*>(this), i);
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ ID.AddPointer(getOperand(i));
+}
+
+void MDNode::setIsNotUniqued() {
+ setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+ pImpl->NonUniquedMDNodes.insert(this);
+}
+
+// Replace value from this node's operand list.
+void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
+ Value *From = *Op;
+
+ // If is possible that someone did GV->RAUW(inst), replacing a global variable
+ // with an instruction or some other function-local object. If this is a
+ // non-function-local MDNode, it can't point to a function-local object.
+ // Handle this case by implicitly dropping the MDNode reference to null.
+ // Likewise if the MDNode is function-local but for a different function.
+ if (To && isFunctionLocalValue(To)) {
+ if (!isFunctionLocal())
+ To = 0;
+ else {
+ const Function *F = getFunction();
+ const Function *FV = getFunctionForValue(To);
+ // Metadata can be function-local without having an associated function.
+ // So only consider functions to have changed if non-null.
+ if (F && FV && F != FV)
+ To = 0;
+ }
+ }
+
+ if (From == To)
+ return;
+
+ // Update the operand.
+ Op->set(To);
+
+ // If this node is already not being uniqued (because one of the operands
+ // already went to null), then there is nothing else to do here.
+ if (isNotUniqued()) return;
+
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+ // Remove "this" from the context map. FoldingSet doesn't have to reprofile
+ // this node to remove it, so we don't care what state the operands are in.
+ pImpl->MDNodeSet.RemoveNode(this);
+
+ // If we are dropping an argument to null, we choose to not unique the MDNode
+ // anymore. This commonly occurs during destruction, and uniquing these
+ // brings little reuse. Also, this means we don't need to include
+ // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+ if (To == 0) {
+ setIsNotUniqued();
+ return;
+ }
+
+ // Now that the node is out of the folding set, get ready to reinsert it.
+ // First, check to see if another node with the same operands already exists
+ // in the set. If so, then this node is redundant.
+ FoldingSetNodeID ID;
+ Profile(ID);
+ void *InsertPoint;
+ if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+ replaceAllUsesWith(N);
+ destroy();
+ return;
+ }
+
+ // Cache the operand hash.
+ Hash = ID.ComputeHash();
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+ // If this MDValue was previously function-local but no longer is, clear
+ // its function-local flag.
+ if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+ bool isStillFunctionLocal = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Value *V = getOperand(i);
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isStillFunctionLocal = true;
+ break;
+ }
+ }
+ if (!isStillFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+ }
+}
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return NULL;
+
+ if (A == B)
+ return A;
+
+ SmallVector<MDNode *, 4> PathA;
+ MDNode *T = A;
+ while (T) {
+ PathA.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ SmallVector<MDNode *, 4> PathB;
+ T = B;
+ while (T) {
+ PathB.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ int IA = PathA.size() - 1;
+ int IB = PathB.size() - 1;
+
+ MDNode *Ret = 0;
+ while (IA >= 0 && IB >=0) {
+ if (PathA[IA] == PathB[IB])
+ Ret = PathA[IA];
+ else
+ break;
+ --IA;
+ --IB;
+ }
+ return Ret;
+}
+
+MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return NULL;
+
+ APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF();
+ APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF();
+ if (AVal.compare(BVal) == APFloat::cmpLessThan)
+ return A;
+ return B;
+}
+
+static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
+ return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
+}
+
+static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) {
+ return !A.intersectWith(B).isEmptySet() || isContiguous(A, B);
+}
+
+static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
+ ConstantInt *High) {
+ ConstantRange NewRange(Low->getValue(), High->getValue());
+ unsigned Size = EndPoints.size();
+ APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue();
+ APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue();
+ ConstantRange LastRange(LB, LE);
+ if (canBeMerged(NewRange, LastRange)) {
+ ConstantRange Union = LastRange.unionWith(NewRange);
+ Type *Ty = High->getType();
+ EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower());
+ EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper());
+ return true;
+ }
+ return false;
+}
+
+static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
+ ConstantInt *High) {
+ if (!EndPoints.empty())
+ if (tryMergeRange(EndPoints, Low, High))
+ return;
+
+ EndPoints.push_back(Low);
+ EndPoints.push_back(High);
+}
+
+MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
+ // Given two ranges, we want to compute the union of the ranges. This
+ // is slightly complitade by having to combine the intervals and merge
+ // the ones that overlap.
+
+ if (!A || !B)
+ return NULL;
+
+ if (A == B)
+ return A;
+
+ // First, walk both lists in older of the lower boundary of each interval.
+ // At each step, try to merge the new interval to the last one we adedd.
+ SmallVector<Value*, 4> EndPoints;
+ int AI = 0;
+ int BI = 0;
+ int AN = A->getNumOperands() / 2;
+ int BN = B->getNumOperands() / 2;
+ while (AI < AN && BI < BN) {
+ ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI));
+ ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI));
+
+ if (ALow->getValue().slt(BLow->getValue())) {
+ addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1)));
+ ++AI;
+ } else {
+ addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1)));
+ ++BI;
+ }
+ }
+ while (AI < AN) {
+ addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)),
+ cast<ConstantInt>(A->getOperand(2 * AI + 1)));
+ ++AI;
+ }
+ while (BI < BN) {
+ addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)),
+ cast<ConstantInt>(B->getOperand(2 * BI + 1)));
+ ++BI;
+ }
+
+ // If we have more than 2 ranges (4 endpoints) we have to try to merge
+ // the last and first ones.
+ unsigned Size = EndPoints.size();
+ if (Size > 4) {
+ ConstantInt *FB = cast<ConstantInt>(EndPoints[0]);
+ ConstantInt *FE = cast<ConstantInt>(EndPoints[1]);
+ if (tryMergeRange(EndPoints, FB, FE)) {
+ for (unsigned i = 0; i < Size - 2; ++i) {
+ EndPoints[i] = EndPoints[i + 2];
+ }
+ EndPoints.resize(Size - 2);
+ }
+ }
+
+ // If in the end we have a single range, it is possible that it is now the
+ // full range. Just drop the metadata in that case.
+ if (EndPoints.size() == 2) {
+ ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(),
+ cast<ConstantInt>(EndPoints[1])->getValue());
+ if (Range.isFullSet())
+ return NULL;
+ }
+
+ return MDNode::get(A->getContext(), EndPoints);
+}
+
+//===----------------------------------------------------------------------===//
+// NamedMDNode implementation.
+//
+
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+ return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+}
+
+NamedMDNode::NamedMDNode(const Twine &N)
+ : Name(N.str()), Parent(0),
+ Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
+}
+
+NamedMDNode::~NamedMDNode() {
+ dropAllReferences();
+ delete &getNMDOps(Operands);
+}
+
+/// getNumOperands - Return number of NamedMDNode operands.
+unsigned NamedMDNode::getNumOperands() const {
+ return (unsigned)getNMDOps(Operands).size();
+}
+
+/// getOperand - Return specified operand.
+MDNode *NamedMDNode::getOperand(unsigned i) const {
+ assert(i < getNumOperands() && "Invalid Operand number!");
+ return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+}
+
+/// addOperand - Add metadata Operand.
+void NamedMDNode::addOperand(MDNode *M) {
+ assert(!M->isFunctionLocal() &&
+ "NamedMDNode operands must not be function-local!");
+ getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+ getParent()->eraseNamedMetadata(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+ getNMDOps(Operands).clear();
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+ return StringRef(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Metadata method implementations.
+//
+
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
+ if (Node == 0 && !hasMetadata()) return;
+ setMetadata(getContext().getMDKindID(Kind), Node);
+}
+
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
+ return getMetadataImpl(getContext().getMDKindID(Kind));
+}
+
+/// setMetadata - Set the metadata of of the specified kind to the specified
+/// node. This updates/replaces metadata if already present, or removes it if
+/// Node is null.
+void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
+ if (Node == 0 && !hasMetadata()) return;
+
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg) {
+ DbgLoc = DebugLoc::getFromDILocation(Node);
+ return;
+ }
+
+ // Handle the case when we're adding/updating metadata on an instruction.
+ if (Node) {
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+ assert(!Info.empty() == hasMetadataHashEntry() &&
+ "HasMetadata bit is wonked");
+ if (Info.empty()) {
+ setHasMetadataHashEntry(true);
+ } else {
+ // Handle replacement of an existing value.
+ for (unsigned i = 0, e = Info.size(); i != e; ++i)
+ if (Info[i].first == KindID) {
+ Info[i].second = Node;
+ return;
+ }
+ }
+
+ // No replacement, just add it to the list.
+ Info.push_back(std::make_pair(KindID, Node));
+ return;
+ }
+
+ // Otherwise, we're removing metadata from an instruction.
+ assert((hasMetadataHashEntry() ==
+ getContext().pImpl->MetadataStore.count(this)) &&
+ "HasMetadata bit out of date!");
+ if (!hasMetadataHashEntry())
+ return; // Nothing to remove!
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+
+ // Common case is removing the only entry.
+ if (Info.size() == 1 && Info[0].first == KindID) {
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
+ return;
+ }
+
+ // Handle removal of an existing value.
+ for (unsigned i = 0, e = Info.size(); i != e; ++i)
+ if (Info[i].first == KindID) {
+ Info[i] = Info.back();
+ Info.pop_back();
+ assert(!Info.empty() && "Removing last entry should be handled above");
+ return;
+ }
+ // Otherwise, removing an entry that doesn't exist on the instruction.
+}
+
+MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg)
+ return DbgLoc.getAsMDNode(getContext());
+
+ if (!hasMetadataHashEntry()) return 0;
+
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+ assert(!Info.empty() && "bit out of sync with hash table");
+
+ for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
+ I != E; ++I)
+ if (I->first == KindID)
+ return I->second;
+ return 0;
+}
+
+void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
+ MDNode*> > &Result) const {
+ Result.clear();
+
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (!DbgLoc.isUnknown()) {
+ Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+ DbgLoc.getAsMDNode(getContext())));
+ if (!hasMetadataHashEntry()) return;
+ }
+
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "Shouldn't have called this");
+ const LLVMContextImpl::MDMapTy &Info =
+ getContext().pImpl->MetadataStore.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+
+ Result.append(Info.begin(), Info.end());
+
+ // Sort the resulting array so it is stable.
+ if (Result.size() > 1)
+ array_pod_sort(Result.begin(), Result.end());
+}
+
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+ MDNode*> > &Result) const {
+ Result.clear();
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "Shouldn't have called this");
+ const LLVMContextImpl::MDMapTy &Info =
+ getContext().pImpl->MetadataStore.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+ Result.append(Info.begin(), Info.end());
+
+ // Sort the resulting array so it is stable.
+ if (Result.size() > 1)
+ array_pod_sort(Result.begin(), Result.end());
+}
+
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+ assert(hasMetadataHashEntry() && "Caller should check");
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
+}
+
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
new file mode 100644
index 000000000000..8affcc946960
--- /dev/null
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -0,0 +1,451 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
+template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(StringRef MID, LLVMContext& C)
+ : Context(C), Materializer(NULL), ModuleID(MID) {
+ ValSymTab = new ValueSymbolTable();
+ NamedMDSymTab = new StringMap<NamedMDNode *>();
+ Context.addModule(this);
+}
+
+Module::~Module() {
+ Context.removeModule(this);
+ dropAllReferences();
+ GlobalList.clear();
+ FunctionList.clear();
+ AliasList.clear();
+ NamedMDList.clear();
+ delete ValSymTab;
+ delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+}
+
+/// Target endian information.
+Module::Endianness Module::getEndianness() const {
+ StringRef temp = DataLayout;
+ Module::Endianness ret = AnyEndianness;
+
+ while (!temp.empty()) {
+ std::pair<StringRef, StringRef> P = getToken(temp, "-");
+
+ StringRef token = P.first;
+ temp = P.second;
+
+ if (token[0] == 'e') {
+ ret = LittleEndian;
+ } else if (token[0] == 'E') {
+ ret = BigEndian;
+ }
+ }
+
+ return ret;
+}
+
+/// Target Pointer Size information.
+Module::PointerSize Module::getPointerSize() const {
+ StringRef temp = DataLayout;
+ Module::PointerSize ret = AnyPointerSize;
+
+ while (!temp.empty()) {
+ std::pair<StringRef, StringRef> TmpP = getToken(temp, "-");
+ temp = TmpP.second;
+ TmpP = getToken(TmpP.first, ":");
+ StringRef token = TmpP.second, signalToken = TmpP.first;
+
+ if (signalToken[0] == 'p') {
+ int size = 0;
+ getToken(token, ":").first.getAsInteger(10, size);
+ if (size == 32)
+ ret = Pointer32;
+ else if (size == 64)
+ ret = Pointer64;
+ }
+ }
+
+ return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type. This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(StringRef Name) const {
+ return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// This ID is uniqued across modules in the current LLVMContext.
+unsigned Module::getMDKindID(StringRef Name) const {
+ return Context.getMDKindID(Name);
+}
+
+/// getMDKindNames - Populate client supplied SmallVector with the name for
+/// custom metadata IDs registered in this LLVMContext. ID #0 is not used,
+/// so it is filled in as an empty string.
+void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
+ return Context.getMDKindNames(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return
+// it. This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+ FunctionType *Ty,
+ AttributeSet AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Okay, the function exists. Does it have externally visible linkage?
+ if (F->hasLocalLinkage()) {
+ // Clear the function's name.
+ F->setName("");
+ // Retry, now there won't be a conflict.
+ Constant *NewF = getOrInsertFunction(Name, Ty);
+ F->setName(Name);
+ return NewF;
+ }
+
+ // If the function exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (F->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
+ FunctionType *Ty,
+ AttributeSet AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+ FunctionType *Ty) {
+ return getOrInsertFunction(Name, Ty, AttributeSet());
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+ AttributeSet AttributeList,
+ Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<Type*> ArgTys;
+ while (Type *ArgTy = va_arg(Args, Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name,
+ FunctionType::get(RetTy, ArgTys, false),
+ AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+ Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<Type*> ArgTys;
+ while (Type *ArgTy = va_arg(Args, Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name,
+ FunctionType::get(RetTy, ArgTys, false),
+ AttributeSet());
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(StringRef Name) const {
+ return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table. If it does not exist, return null. The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+ bool AllowLocal) const {
+ if (GlobalVariable *Result =
+ dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+ if (AllowLocal || !Result->hasLocalLinkage())
+ return Result;
+ return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+/// 1. If it does not exist, add a declaration of the global and return it.
+/// 2. Else, the global exists but has the wrong type: return the function
+/// with a constantexpr cast to the right type.
+/// 3. Finally, if the existing global is the correct delclaration, return the
+/// existing global.
+Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+ // See if we have a definition for the specified global already.
+ GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+ if (GV == 0) {
+ // Nope, add it
+ GlobalVariable *New =
+ new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+ 0, Name);
+ return New; // Return the new declaration.
+ }
+
+ // If the variable exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (GV->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
+ return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+ SmallString<256> NameData;
+ StringRef NameRef = Name.toStringRef(NameData);
+ return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module
+/// with the specified name. This method returns a new NamedMDNode if a
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
+ NamedMDNode *&NMD =
+ (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+ if (!NMD) {
+ NMD = new NamedMDNode(Name);
+ NMD->setParent(this);
+ NamedMDList.push_back(NMD);
+ }
+ return NMD;
+}
+
+/// eraseNamedMetadata - Remove the given NamedMDNode from this module and
+/// delete it.
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+ static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+ NamedMDList.erase(NMD);
+}
+
+/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+void Module::
+getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
+ const NamedMDNode *ModFlags = getModuleFlagsMetadata();
+ if (!ModFlags) return;
+
+ for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
+ MDNode *Flag = ModFlags->getOperand(i);
+ ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
+ MDString *Key = cast<MDString>(Flag->getOperand(1));
+ Value *Val = Flag->getOperand(2);
+ Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
+ Key, Val));
+ }
+}
+
+/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. This method returns null if there are no
+/// module-level flags.
+NamedMDNode *Module::getModuleFlagsMetadata() const {
+ return getNamedMetadata("llvm.module.flags");
+}
+
+/// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. If module-level flags aren't found, it
+/// creates the named metadata that contains them.
+NamedMDNode *Module::getOrInsertModuleFlagsMetadata() {
+ return getOrInsertNamedMetadata("llvm.module.flags");
+}
+
+/// addModuleFlag - Add a module-level flag to the module-level flags
+/// metadata. It will create the module-level flags named metadata if it doesn't
+/// already exist.
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+ Value *Val) {
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Value *Ops[3] = {
+ ConstantInt::get(Int32Ty, Behavior), MDString::get(Context, Key), Val
+ };
+ getOrInsertModuleFlagsMetadata()->addOperand(MDNode::get(Context, Ops));
+}
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+ uint32_t Val) {
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ addModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val));
+}
+void Module::addModuleFlag(MDNode *Node) {
+ assert(Node->getNumOperands() == 3 &&
+ "Invalid number of operands for module flag!");
+ assert(isa<ConstantInt>(Node->getOperand(0)) &&
+ isa<MDString>(Node->getOperand(1)) &&
+ "Invalid operand types for module flag!");
+ getOrInsertModuleFlagsMetadata()->addOperand(Node);
+}
+
+//===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+ assert(!Materializer &&
+ "Module already has a GVMaterializer. Call MaterializeAllPermanently"
+ " to clear it out before setting another one.");
+ Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+ if (Materializer)
+ return Materializer->isMaterializable(GV);
+ return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+ if (Materializer)
+ return Materializer->isDematerializable(GV);
+ return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+ if (Materializer)
+ return Materializer->Materialize(GV, ErrInfo);
+ return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+ if (Materializer)
+ return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+ if (!Materializer)
+ return false;
+ return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+ if (MaterializeAll(ErrInfo))
+ return true;
+ Materializer.reset();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelements to "let go"
+// of all references that they are maintaining. This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero. Then everything
+// is deleted for real. Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+ for(Module::iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+ I->dropAllReferences();
+}
diff --git a/contrib/llvm/lib/IR/Pass.cpp b/contrib/llvm/lib/IR/Pass.cpp
new file mode 100644
index 000000000000..7fc48282380b
--- /dev/null
+++ b/contrib/llvm/lib/IR/Pass.cpp
@@ -0,0 +1,276 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure. It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+// Force out-of-line virtual method.
+Pass::~Pass() {
+ delete Resolver;
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+}
+
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+ return PMT_ModulePassManager;
+}
+
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+ return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-pass=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+ dbgs().indent(Offset*2) << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass. This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+ AnalysisID AID = getPassID();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+ if (PI)
+ return PI->getPassName();
+ return "Unnamed pass: implement Pass::getPassName()";
+}
+
+void Pass::preparePassManager(PMStack &) {
+ // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+ // Default implementation.
+ return PMT_Unknown;
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+ // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+ // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+ // By default, don't do anything.
+}
+
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
+ return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+ return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+ return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+ assert(!Resolver && "Resolver is already set");
+ Resolver = AR;
+}
+
+// print - Print out the internal state of the pass. This is called by Analyze
+// to print out the contents of an analysis. Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(raw_ostream &O,const Module*) const {
+ O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+ print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+void ImmutablePass::initializePass() {
+ // By default, don't do anything.
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createPrintFunctionPass(Banner, &O);
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+ return PMT_FunctionPassManager;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createPrintBasicBlockPass(&O, false, Banner);
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+}
+
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+ return PassRegistry::getPassRegistry()->getPassInfo(TI);
+}
+
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
+ return PassRegistry::getPassRegistry()->getPassInfo(Arg);
+}
+
+Pass *Pass::createPass(AnalysisID ID) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+ if (!PI)
+ return NULL;
+ return PI->createPass();
+}
+
+Pass *PassInfo::createPass() const {
+ assert((!isAnalysisGroup() || NormalCtor) &&
+ "No default implementation found for analysis group!");
+ assert(NormalCtor &&
+ "Cannot call createPass on PassInfo without default ctor!");
+ return NormalCtor();
+}
+
+//===----------------------------------------------------------------------===//
+// Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+ const void *PassID, bool isDefault)
+ : PassInfo(Name, InterfaceID) {
+ PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+ *this, isDefault);
+}
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+ PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+ PassRegistry::getPassRegistry()->removeRegistrationListener(this);
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+ PassRegistry::getPassRegistry()->enumerateWith(this);
+}
+
+PassNameParser::~PassNameParser() {}
+
+//===----------------------------------------------------------------------===//
+// AnalysisUsage Class Implementation
+//
+
+namespace {
+ struct GetCFGOnlyPasses : public PassRegistrationListener {
+ typedef AnalysisUsage::VectorType VectorType;
+ VectorType &CFGOnlyList;
+ GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+
+ void passEnumerate(const PassInfo *P) {
+ if (P->isCFGOnlyPass())
+ CFGOnlyList.push_back(P->getTypeInfo());
+ }
+ };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+// 1. Add or remove basic blocks from the function
+// 2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+ // Since this transformation doesn't modify the CFG, it preserves all analyses
+ // that only depend on the CFG (like dominators, loop info, etc...)
+ GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+ const PassInfo *PI = Pass::lookupPassInfo(Arg);
+ // If the pass exists, preserve it. Otherwise silently do nothing.
+ if (PI) Preserved.push_back(PI->getTypeInfo());
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
+ Required.push_back(ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+ Required.push_back(&ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+ Required.push_back(&ID);
+ RequiredTransitive.push_back(&ID);
+ return *this;
+}
diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp
new file mode 100644
index 000000000000..3c968aac164f
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassManager.cpp
@@ -0,0 +1,1912 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information. Often it is useful to find out what pass is
+// running when a crash occurs in a utility. When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+ None, Arguments, Structure, Executions, Details
+};
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+ cl::desc("Print PassManager debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+ clEnumVal(Structure , "print pass structure before run()"),
+ clEnumVal(Executions, "print pass name before it is executed"),
+ clEnumVal(Details , "print pass details when it is executed"),
+ clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+ llvm::cl::desc("Print IR before specified passes"),
+ cl::Hidden);
+
+static PassOptionList
+PrintAfter("print-after",
+ llvm::cl::desc("Print IR after specified passes"),
+ cl::Hidden);
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+ llvm::cl::desc("Print IR before each pass"),
+ cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+ llvm::cl::desc("Print IR after each pass"),
+ cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
+ PassOptionList &PassesToPrint) {
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const PassInfo *PI) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const PassInfo *PI) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
+}
+
+} // End of llvm namespace
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+ return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+ if (V == 0 && M == 0)
+ OS << "Releasing pass '";
+ else
+ OS << "Running pass '";
+
+ OS << P->getPassName() << "'";
+
+ if (M) {
+ OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+ return;
+ }
+ if (V == 0) {
+ OS << '\n';
+ return;
+ }
+
+ OS << " on ";
+ if (isa<Function>(V))
+ OS << "function";
+ else if (isa<BasicBlock>(V))
+ OS << "basic block";
+ else
+ OS << "value";
+
+ OS << " '";
+ WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+ OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+ static char ID;
+ explicit BBPassManager()
+ : PMDataManager(), FunctionPass(ID) {}
+
+ /// Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the function, and if so, return true.
+ bool runOnFunction(Function &F);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M);
+ bool doInitialization(Function &F);
+ bool doFinalization(Module &M);
+ bool doFinalization(Function &F);
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ virtual const char *getPassName() const {
+ return "BasicBlock Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ BP->dumpPassStructure(Offset + 1);
+ dumpLastUses(BP, Offset+1);
+ }
+ }
+
+ BasicBlockPass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+ return BP;
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+ }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+ virtual void anchor();
+private:
+ bool wasRun;
+public:
+ static char ID;
+ explicit FunctionPassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new FPPassManager()), wasRun(false) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a function printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintFunctionPass(Banner, &O);
+ }
+
+ // Prepare for running an on the fly pass, freeing memory if needed
+ // from a previous run.
+ void releaseMemoryOnTheFly();
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Function &F);
+
+ /// doInitialization - Run all of the initializers for the function passes.
+ ///
+ bool doInitialization(Module &M);
+
+ /// doFinalization - Run all of the finalizers for the function passes.
+ ///
+ bool doFinalization(Module &M);
+
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_FunctionPassManager;
+ }
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ FPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+ return FP;
+ }
+};
+
+void FunctionPassManagerImpl::anchor() {}
+
+char FunctionPassManagerImpl::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+ static char ID;
+ explicit MPPassManager() :
+ Pass(PT_PassManager, ID), PMDataManager() { }
+
+ // Delete on the fly managers.
+ virtual ~MPPassManager() {
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ delete FPP;
+ }
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ using llvm::Pass::doInitialization;
+ using llvm::Pass::doFinalization;
+
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ /// Add RequiredPass into list of lower level passes required by pass P.
+ /// RequiredPass is run on the fly by Pass Manager when P requests it
+ /// through getAnalysis interface.
+ virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+ /// Return function pass corresponding to PassInfo PI, that is
+ /// required by module pass MP. Instantiate analysis pass, by using
+ /// its runOnFunction() for function F.
+ virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+ virtual const char *getPassName() const {
+ return "Module Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ MP->dumpPassStructure(Offset + 1);
+ std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+ OnTheFlyManagers.find(MP);
+ if (I != OnTheFlyManagers.end())
+ I->second->dumpPassStructure(Offset + 2);
+ dumpLastUses(MP, Offset+1);
+ }
+ }
+
+ ModulePass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<ModulePass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
+ }
+
+ private:
+ /// Collection of on the fly FPPassManagers. These managers manage
+ /// function passes that are required by module passes.
+ std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+ virtual void anchor();
+
+public:
+ static char ID;
+ explicit PassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new MPPassManager()) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Module &M);
+
+ using llvm::Pass::doInitialization;
+ using llvm::Pass::doFinalization;
+
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_ModulePassManager;
+ }
+
+ MPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+ return MP;
+ }
+};
+
+void PassManagerImpl::anchor() {}
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute. This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+ DenseMap<Pass*, Timer*> TimingData;
+ TimerGroup TG;
+public:
+ // Use 'create' member to get this.
+ TimingInfo() : TG("... Pass execution timing report ...") {}
+
+ // TimingDtor - Print out information about timing information
+ ~TimingInfo() {
+ // Delete all of the timers, which accumulate their info into the
+ // TimerGroup.
+ for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+ E = TimingData.end(); I != E; ++I)
+ delete I->second;
+ // TimerGroup is deleted next, printing the report.
+ }
+
+ // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+ // to a non null value (if the -time-passes option is enabled) or it leaves it
+ // null. It may be called multiple times.
+ static void createTheTimeInfo();
+
+ /// getPassTimer - Return the timer for the specified pass if it exists.
+ Timer *getPassTimer(Pass *P) {
+ if (P->getAsPMDataManager())
+ return 0;
+
+ sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+ Timer *&T = TimingData[P];
+ if (T == 0)
+ T = new Timer(P->getPassName(), TG);
+ return T;
+ }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+ PMDM->setTopLevelManager(this);
+ addPassManager(PMDM);
+ activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
+ unsigned PDepth = 0;
+ if (P->getResolver())
+ PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+ for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+ E = AnalysisPasses.end(); I != E; ++I) {
+ Pass *AP = *I;
+ LastUser[AP] = P;
+
+ if (P == AP)
+ continue;
+
+ // Update the last users of passes that are required transitive by AP.
+ AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 12> LastPMUses;
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ assert(AnalysisPass && "Expected analysis pass to exist.");
+ AnalysisResolver *AR = AnalysisPass->getResolver();
+ assert(AR && "Expected analysis resolver to exist.");
+ unsigned APDepth = AR->getPMDataManager().getDepth();
+
+ if (PDepth == APDepth)
+ LastUses.push_back(AnalysisPass);
+ else if (PDepth > APDepth)
+ LastPMUses.push_back(AnalysisPass);
+ }
+
+ setLastUser(LastUses, P);
+
+ // If this pass has a corresponding pass manager, push higher level
+ // analysis to this pass manager.
+ if (P->getResolver())
+ setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+ // If AP is the last user of other passes then make P last user of
+ // such passes.
+ for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+ LUE = LastUser.end(); LUI != LUE; ++LUI) {
+ if (LUI->second == AP)
+ // DenseMap iterator is not invalidated here because
+ // this is just updating existing entries.
+ LastUser[LUI->first] = P;
+ }
+ }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+ Pass *P) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ InversedLastUser.find(P);
+ if (DMI == InversedLastUser.end())
+ return;
+
+ SmallPtrSet<Pass *, 8> &LU = DMI->second;
+ for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+ E = LU.end(); I != E; ++I) {
+ LastUses.push_back(*I);
+ }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+ AnalysisUsage *AnUsage = NULL;
+ DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+ if (DMI != AnUsageMap.end())
+ AnUsage = DMI->second;
+ else {
+ AnUsage = new AnalysisUsage();
+ P->getAnalysisUsage(*AnUsage);
+ AnUsageMap[P] = AnUsage;
+ }
+ return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+ // TODO : Allocate function manager for this pass, other wise required set
+ // may be inserted into previous function manager
+
+ // Give pass a chance to prepare the stage.
+ P->preparePassManager(activeStack);
+
+ // If P is an analysis pass and it is available then do not
+ // generate the analysis again. Stale analysis info should not be
+ // available at this point.
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+ if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+ delete P;
+ return;
+ }
+
+ AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+ bool checkAnalysis = true;
+ while (checkAnalysis) {
+ checkAnalysis = false;
+
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+ E = RequiredSet.end(); I != E; ++I) {
+
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ if (!AnalysisPass) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+
+ if (PI == NULL) {
+ // Pass P is not in the global PassRegistry
+ dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n";
+ dbgs() << "Verify if there is a pass dependency cycle." << "\n";
+ dbgs() << "Required Passes:" << "\n";
+ for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
+ E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
+ Pass *AnalysisPass2 = findAnalysisPass(*I2);
+ if (AnalysisPass2) {
+ dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
+ } else {
+ dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n";
+ dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n";
+ dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n";
+ }
+ }
+ }
+
+ assert(PI && "Expected required passes to be initialized");
+ AnalysisPass = PI->createPass();
+ if (P->getPotentialPassManagerType () ==
+ AnalysisPass->getPotentialPassManagerType())
+ // Schedule analysis pass that is managed by the same pass manager.
+ schedulePass(AnalysisPass);
+ else if (P->getPotentialPassManagerType () >
+ AnalysisPass->getPotentialPassManagerType()) {
+ // Schedule analysis pass that is managed by a new manager.
+ schedulePass(AnalysisPass);
+ // Recheck analysis passes to ensure that required analyses that
+ // are already checked are still available.
+ checkAnalysis = true;
+ } else
+ // Do not schedule this analysis. Lower level analsyis
+ // passes are run on the fly.
+ delete AnalysisPass;
+ }
+ }
+ }
+
+ // Now all required passes are available.
+ if (ImmutablePass *IP = P->getAsImmutablePass()) {
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ PMDataManager *DM = getAsPMDataManager();
+ AnalysisResolver *AR = new AnalysisResolver(*DM);
+ P->setResolver(AR);
+ DM->initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ DM->recordAvailableAnalysis(IP);
+ return;
+ }
+
+ if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
+
+ // Add the requested pass to the best available pass manager.
+ P->assignPassManager(activeStack, getTopLevelPassManagerType());
+
+ if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+ // Check pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(),
+ E = IndirectPassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check the immutable passes. Iterate in reverse order so that we find
+ // the most recently registered passes first.
+ for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+ ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+ AnalysisID PI = (*I)->getPassID();
+ if (PI == AID)
+ return *I;
+
+ // If Pass not found then check the interfaces implemented by Immutable Pass
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
+ assert(PassInf && "Expected all immutable passes to be initialized");
+ const std::vector<const PassInfo*> &ImmPI =
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ return *I;
+ }
+ }
+
+ return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+ if (PassDebugging < Structure)
+ return;
+
+ // Print out the immutable passes
+ for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+ ImmutablePasses[i]->dumpPassStructure(0);
+ }
+
+ // Every class that derives from PMDataManager also derives from Pass
+ // (sometimes indirectly), but there's no inheritance relationship
+ // between PMDataManager and Pass, so we have to getAsPass to get
+ // from a PMDataManager* to a Pass*.
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+ if (PassDebugging < Arguments)
+ return;
+
+ dbgs() << "Pass Arguments: ";
+ for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+ ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+ assert(PI && "Expected all immutable passes to be initialized");
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->dumpPassArguments();
+ dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ // Initailize other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+ DME = LastUser.end(); DMI != DME; ++DMI) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ InversedLastUser.find(DMI->second);
+ if (InvDMI != InversedLastUser.end()) {
+ SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+ L.insert(DMI->first);
+ } else {
+ SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+ InversedLastUser[DMI->second] = L;
+ }
+ }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ delete *I;
+
+ for (SmallVectorImpl<ImmutablePass *>::iterator
+ I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ delete *I;
+
+ for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+ DME = AnUsageMap.end(); DMI != DME; ++DMI)
+ delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+ AnalysisID PI = P->getPassID();
+
+ AvailableAnalysis[PI] = P;
+
+ assert(!AvailableAnalysis.empty());
+
+ // This pass is the current implementation of all of the interfaces it
+ // implements as well.
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+ if (PInf == 0) return;
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i)
+ AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return true;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+ E = HigherLevelAnalysis.end(); I != E; ++I) {
+ Pass *P1 = *I;
+ if (P1->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(),
+ P1->getPassID()) ==
+ PreservedSet.end())
+ return false;
+ }
+
+ return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+ // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+ return;
+#endif
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+ // Verify preserved analysis
+ for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+ E = PreservedSet.end(); I != E; ++I) {
+ AnalysisID AID = *I;
+ if (Pass *AP = findAnalysisPass(AID, true)) {
+ TimeRegion PassTimer(getPassTimer(AP));
+ AP->verifyAnalysis();
+ }
+ }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (DenseMap<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+ E = AvailableAnalysis.end(); I != E; ) {
+ DenseMap<AnalysisID, Pass*>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ AvailableAnalysis.erase(Info);
+ }
+ }
+
+ // Check inherited analysis also. If P is not preserving analysis
+ // provided by parent manager then remove it here.
+ for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+ if (!InheritedAnalysis[Index])
+ continue;
+
+ for (DenseMap<AnalysisID, Pass*>::iterator
+ I = InheritedAnalysis[Index]->begin(),
+ E = InheritedAnalysis[Index]->end(); I != E; ) {
+ DenseMap<AnalysisID, Pass *>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ InheritedAnalysis[Index]->erase(Info);
+ }
+ }
+ }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+
+ SmallVector<Pass *, 12> DeadPasses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(DeadPasses, P);
+
+ if (PassDebugging >= Details && !DeadPasses.empty()) {
+ dbgs() << " -*- '" << P->getPassName();
+ dbgs() << "' is the last user of following pass instances.";
+ dbgs() << " Free these instances\n";
+ }
+
+ for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+ E = DeadPasses.end(); I != E; ++I)
+ freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+ dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+ {
+ // If the pass crashes releasing memory, remember this.
+ PassManagerPrettyStackEntry X(P);
+ TimeRegion PassTimer(getPassTimer(P));
+
+ P->releaseMemory();
+ }
+
+ AnalysisID PI = P->getPassID();
+ if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+ // Remove the pass itself (if it is not already removed).
+ AvailableAnalysis.erase(PI);
+
+ // Remove all interfaces this pass implements, for which it is also
+ // listed as the available implementation.
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i) {
+ DenseMap<AnalysisID, Pass*>::iterator Pos =
+ AvailableAnalysis.find(II[i]->getTypeInfo());
+ if (Pos != AvailableAnalysis.end() && Pos->second == P)
+ AvailableAnalysis.erase(Pos);
+ }
+ }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+ // This manager is going to manage pass P. Set up analysis resolver
+ // to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+
+ // If a FunctionPass F is the last user of ModulePass info M
+ // then the F's manager, not F, records itself as a last user of M.
+ SmallVector<Pass *, 12> TransferLastUses;
+
+ if (!ProcessAnalysis) {
+ // Add pass
+ PassVector.push_back(P);
+ return;
+ }
+
+ // At the moment, this pass is the last user of all required passes.
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 8> RequiredPasses;
+ SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+ unsigned PDepth = this->getDepth();
+
+ collectRequiredAnalysis(RequiredPasses,
+ ReqAnalysisNotAvailable, P);
+ for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+ E = RequiredPasses.end(); I != E; ++I) {
+ Pass *PRequired = *I;
+ unsigned RDepth = 0;
+
+ assert(PRequired->getResolver() && "Analysis Resolver is not set");
+ PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+ RDepth = DM.getDepth();
+
+ if (PDepth == RDepth)
+ LastUses.push_back(PRequired);
+ else if (PDepth > RDepth) {
+ // Let the parent claim responsibility of last use
+ TransferLastUses.push_back(PRequired);
+ // Keep track of higher level analysis used by this manager.
+ HigherLevelAnalysis.push_back(PRequired);
+ } else
+ llvm_unreachable("Unable to accommodate Required Pass");
+ }
+
+ // Set P as P's last user until someone starts using P.
+ // However, if P is a Pass Manager then it does not need
+ // to record its last user.
+ if (P->getAsPMDataManager() == 0)
+ LastUses.push_back(P);
+ TPM->setLastUser(LastUses, P);
+
+ if (!TransferLastUses.empty()) {
+ Pass *My_PM = getAsPass();
+ TPM->setLastUser(TransferLastUses, My_PM);
+ TransferLastUses.clear();
+ }
+
+ // Now, take care of required analyses that are not available.
+ for (SmallVectorImpl<AnalysisID>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
+ E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ Pass *AnalysisPass = PI->createPass();
+ this->addLowerLevelRequiredPass(P, AnalysisPass);
+ }
+
+ // Take a note of analysis required and made available by this pass.
+ // Remove the analysis not preserved by this pass
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+
+ // Add pass
+ PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+ SmallVectorImpl<AnalysisID> &RP_NotAvail,
+ Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator
+ I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+}
+
+// All Required analyses should be available to the pass as it runs! Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+ for (AnalysisUsage::VectorType::const_iterator
+ I = AnUsage->getRequiredSet().begin(),
+ E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+ Pass *Impl = findAnalysisPass(*I, true);
+ if (Impl == 0)
+ // This may be analysis pass that is initialized on the fly.
+ // If that is not the case then it will raise an assert when it is used.
+ continue;
+ AnalysisResolver *AR = P->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->addAnalysisImplsPair(*I, Impl);
+ }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+ // Check if AvailableAnalysis map has one entry.
+ DenseMap<AnalysisID, Pass*>::const_iterator I = AvailableAnalysis.find(AID);
+
+ if (I != AvailableAnalysis.end())
+ return I->second;
+
+ // Search Parents through TopLevelManager
+ if (SearchParent)
+ return TPM->findAnalysisPass(AID);
+
+ return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+ SmallVector<Pass *, 12> LUses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(LUses, P);
+
+ for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+ E = LUses.end(); I != E; ++I) {
+ llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+ (*I)->dumpPassStructure(0);
+ }
+}
+
+void PMDataManager::dumpPassArguments() const {
+ for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I) {
+ if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+ PMD->dumpPassArguments();
+ else
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+ enum PassDebuggingString S2,
+ StringRef Msg) {
+ if (PassDebugging < Executions)
+ return;
+ dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+ switch (S1) {
+ case EXECUTION_MSG:
+ dbgs() << "Executing Pass '" << P->getPassName();
+ break;
+ case MODIFICATION_MSG:
+ dbgs() << "Made Modification '" << P->getPassName();
+ break;
+ case FREEING_MSG:
+ dbgs() << " Freeing Pass '" << P->getPassName();
+ break;
+ default:
+ break;
+ }
+ switch (S2) {
+ case ON_BASICBLOCK_MSG:
+ dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+ break;
+ case ON_FUNCTION_MSG:
+ dbgs() << "' on Function '" << Msg << "'...\n";
+ break;
+ case ON_MODULE_MSG:
+ dbgs() << "' on Module '" << Msg << "'...\n";
+ break;
+ case ON_REGION_MSG:
+ dbgs() << "' on Region '" << Msg << "'...\n";
+ break;
+ case ON_LOOP_MSG:
+ dbgs() << "' on Loop '" << Msg << "'...\n";
+ break;
+ case ON_CG_MSG:
+ dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+ break;
+ default:
+ break;
+ }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+ const AnalysisUsage::VectorType &Set) const {
+ assert(PassDebugging >= Details);
+ if (Set.empty())
+ return;
+ dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+ for (unsigned i = 0; i != Set.size(); ++i) {
+ if (i) dbgs() << ',';
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+ if (!PInf) {
+ // Some preserved passes, such as AliasAnalysis, may not be initialized by
+ // all drivers.
+ dbgs() << " Uninitialized Pass";
+ continue;
+ }
+ dbgs() << ' ' << PInf->getPassName();
+ }
+ dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ if (TPM) {
+ TPM->dumpArguments();
+ TPM->dumpPasses();
+ }
+
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
+ // module level pass is requiring lower level analysis info managed by
+ // lower level pass manager.
+
+ // When Pass manager is not able to order required analysis info, Pass manager
+ // checks whether any lower level manager will be able to provide this
+ // analysis info on demand or not.
+#ifndef NDEBUG
+ dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+ dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+ llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+ llvm_unreachable("Unable to find on the fly pass");
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+ for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I)
+ delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+ return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+ Function &F) {
+ return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = doInitialization(F);
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+ dumpRequiredSet(BP);
+
+ initializeAnalysisImpl(BP);
+
+ {
+ // If the pass crashes, remember this.
+ PassManagerPrettyStackEntry X(BP, *I);
+ TimeRegion PassTimer(getPassTimer(BP));
+
+ LocalChanged |= BP->runOnBasicBlock(*I);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+ I->getName());
+ dumpPreservedSet(BP);
+
+ verifyPreservedAnalysis(BP);
+ removeNotPreservedAnalysis(BP);
+ recordAvailableAnalysis(BP);
+ removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+ }
+
+ return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doInitialization(F);
+ }
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doFinalization(F);
+ }
+
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+ FPM = new FunctionPassManagerImpl();
+ // FPM is the top level manager.
+ FPM->setTopLevelManager(FPM);
+
+ AnalysisResolver *AR = new AnalysisResolver(*FPM);
+ FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+ delete FPM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes
+/// ownership of the Pass to the PassManager. When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+ FPM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+ if (F.isMaterializable()) {
+ std::string errstr;
+ if (F.Materialize(&errstr))
+ report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+ }
+ return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+ return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+ return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+ bool Changed = false;
+
+ dumpArguments();
+ dumpPasses();
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doInitialization(M);
+ }
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
+ Changed |= getContainedManager(Index)->doFinalization(M);
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ AnalysisResolver *AR = FP->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+ if (!wasRun)
+ return;
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+ FPPassManager *FPPM = getContainedManager(Index);
+ for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+ FPPM->getContainedPass(Index)->releaseMemory();
+ }
+ }
+ wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnFunction(F);
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ getContainedManager(Index)->cleanup();
+
+ wasRun = true;
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+ dbgs().indent(Offset*2) << "FunctionPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ FP->dumpPassStructure(Offset + 1);
+ dumpLastUses(FP, Offset+1);
+ }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpRequiredSet(FP);
+
+ initializeAnalysisImpl(FP);
+
+ {
+ PassManagerPrettyStackEntry X(FP, F);
+ TimeRegion PassTimer(getPassTimer(FP));
+
+ LocalChanged |= FP->runOnFunction(F);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpPreservedSet(FP);
+
+ verifyPreservedAnalysis(FP);
+ removeNotPreservedAnalysis(FP);
+ recordAvailableAnalysis(FP);
+ removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+ }
+ return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ Changed |= runOnFunction(*I);
+
+ return Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Initialize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ Changed |= FPP->doInitialization(M);
+ }
+
+ // Initialize module passes
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+ dumpRequiredSet(MP);
+
+ initializeAnalysisImpl(MP);
+
+ {
+ PassManagerPrettyStackEntry X(MP, M);
+ TimeRegion PassTimer(getPassTimer(MP));
+
+ LocalChanged |= MP->runOnModule(M);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+ M.getModuleIdentifier());
+ dumpPreservedSet(MP);
+
+ verifyPreservedAnalysis(MP);
+ removeNotPreservedAnalysis(MP);
+ recordAvailableAnalysis(MP);
+ removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+ }
+
+ // Finalize module passes
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ // Finalize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ // We don't know when is the last time an on-the-fly pass is run,
+ // so we need to releaseMemory / finalize here
+ FPP->releaseMemoryOnTheFly();
+ Changed |= FPP->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+ assert((P->getPotentialPassManagerType() <
+ RequiredPass->getPotentialPassManagerType()) &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+ if (!FPP) {
+ FPP = new FunctionPassManagerImpl();
+ // FPP is the top level manager.
+ FPP->setTopLevelManager(FPP);
+
+ OnTheFlyManagers[P] = FPP;
+ }
+ FPP->add(RequiredPass);
+
+ // Register P as the last user of RequiredPass.
+ if (RequiredPass) {
+ SmallVector<Pass *, 1> LU;
+ LU.push_back(RequiredPass);
+ FPP->setLastUser(LU, P);
+ }
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+ assert(FPP && "Unable to find on the fly pass");
+
+ FPP->releaseMemoryOnTheFly();
+ FPP->run(F);
+ return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+
+//
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ dumpArguments();
+ dumpPasses();
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doInitialization(M);
+ }
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnModule(M);
+
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+ PM = new PassManagerImpl();
+ // PM is the top level manager
+ PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+ delete PM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes ownership of
+/// the Pass to the PassManager. When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass. This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+ PM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+ return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo implementation
+
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+ cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null. It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+ if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+ // Constructed the first time this is called, iff -time-passes is enabled.
+ // This guarantees that the object will be constructed before static globals,
+ // thus it will be destroyed before them.
+ static ManagedStatic<TimingInfo> TTI;
+ TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+ if (TheTimeInfo)
+ return TheTimeInfo->getPassTimer(P);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+ PMDataManager *Top = this->top();
+ Top->initializeAnalysisInfo();
+
+ S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+ assert(PM && "Unable to push. Pass Manager expected");
+ assert(PM->getDepth()==0 && "Pass Manager depth set too early");
+
+ if (!this->empty()) {
+ assert(PM->getPassManagerType() > this->top()->getPassManagerType()
+ && "pushing bad pass manager to PMStack");
+ PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+ assert(TPM && "Unable to find top level manager");
+ TPM->addIndirectPassManager(PM);
+ PM->setTopLevelManager(TPM);
+ PM->setDepth(this->top()->getDepth()+1);
+ } else {
+ assert((PM->getPassManagerType() == PMT_ModulePassManager
+ || PM->getPassManagerType() == PMT_FunctionPassManager)
+ && "pushing bad pass manager to PMStack");
+ PM->setDepth(1);
+ }
+
+ S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+ for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+ E = S.end(); I != E; ++I)
+ dbgs() << (*I)->getAsPass()->getPassName() << ' ';
+
+ if (!S.empty())
+ dbgs() << '\n';
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find Module Pass Manager
+ while (!PMS.empty()) {
+ PassManagerType TopPMType = PMS.top()->getPassManagerType();
+ if (TopPMType == PreferredType)
+ break; // We found desired pass manager
+ else if (TopPMType > PMT_ModulePassManager)
+ PMS.pop(); // Pop children pass managers
+ else
+ break;
+ }
+ assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+ PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+
+ // Find Function Pass Manager
+ while (!PMS.empty()) {
+ if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+ PMS.pop();
+ else
+ break;
+ }
+
+ // Create new Function Pass Manager if needed.
+ FPPassManager *FPP;
+ if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+ FPP = (FPPassManager *)PMS.top();
+ } else {
+ assert(!PMS.empty() && "Unable to create Function Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Function Pass Manager
+ FPP = new FPPassManager();
+ FPP->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(FPP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+ // [4] Push new manager into PMS
+ PMS.push(FPP);
+ }
+
+ // Assign FPP as the manager of this pass.
+ FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ BBPassManager *BBP;
+
+ // Basic Pass Manager is a leaf pass manager. It does not handle
+ // any other pass manager.
+ if (!PMS.empty() &&
+ PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+ BBP = (BBPassManager *)PMS.top();
+ } else {
+ // If leaf manager is not Basic Block Pass manager then create new
+ // basic Block Pass manager.
+ assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Basic Block Manager
+ BBP = new BBPassManager();
+
+ // [2] Set up new manager's top level manager
+ // Basic Block Pass Manager does not live by itself
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(BBP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ BBP->assignPassManager(PMS, PreferredType);
+
+ // [4] Push new manager into PMS
+ PMS.push(BBP);
+ }
+
+ // Assign BBP as the manager of this pass.
+ BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/contrib/llvm/lib/IR/PassRegistry.cpp b/contrib/llvm/lib/IR/PassRegistry.cpp
new file mode 100644
index 000000000000..a0b64ed78f5f
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassRegistry.cpp
@@ -0,0 +1,209 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+
+using namespace llvm;
+
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful resurrection after
+// llvm_shutdown is run. Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+ return &*PassRegistryObj;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+ /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+ typedef DenseMap<const void*, const PassInfo*> MapType;
+ MapType PassInfoMap;
+
+ typedef StringMap<const PassInfo*> StringMapType;
+ StringMapType PassInfoStringMap;
+
+ /// AnalysisGroupInfo - Keep track of information for each analysis group.
+ struct AnalysisGroupInfo {
+ SmallPtrSet<const PassInfo *, 8> Implementations;
+ };
+ DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+
+ std::vector<const PassInfo*> ToFree;
+ std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+ if (!pImpl)
+ pImpl = new PassRegistryImpl();
+ return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+
+ for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+ E = Impl->ToFree.end(); I != E; ++I)
+ delete *I;
+
+ delete Impl;
+ pImpl = 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+ return I != Impl->PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::StringMapType::const_iterator
+ I = Impl->PassInfoStringMap.find(Arg);
+ return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ bool Inserted =
+ Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!");
+ (void)Inserted;
+ Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+
+ // Notify any listeners.
+ for (std::vector<PassRegistrationListener*>::iterator
+ I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+ (*I)->passRegistered(&PI);
+
+ if (ShouldFree) Impl->ToFree.push_back(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::iterator I =
+ Impl->PassInfoMap.find(PI.getTypeInfo());
+ assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+
+ // Remove pass from the map.
+ Impl->PassInfoMap.erase(I);
+ Impl->PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+ E = Impl->PassInfoMap.end(); I != E; ++I)
+ L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
+ const void *PassID,
+ PassInfo& Registeree,
+ bool isDefault,
+ bool ShouldFree) {
+ PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
+ if (InterfaceInfo == 0) {
+ // First reference to Interface, register it now.
+ registerPass(Registeree);
+ InterfaceInfo = &Registeree;
+ }
+ assert(Registeree.isAnalysisGroup() &&
+ "Trying to join an analysis group that is a normal pass!");
+
+ if (PassID) {
+ PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+ assert(ImplementationInfo &&
+ "Must register pass before adding to AnalysisGroup!");
+
+ sys::SmartScopedLock<true> Guard(*Lock);
+
+ // Make sure we keep track of the fact that the implementation implements
+ // the interface.
+ ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::AnalysisGroupInfo &AGI =
+ Impl->AnalysisGroupInfoMap[InterfaceInfo];
+ assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+ "Cannot add a pass to the same analysis group more than once!");
+ AGI.Implementations.insert(ImplementationInfo);
+ if (isDefault) {
+ assert(InterfaceInfo->getNormalCtor() == 0 &&
+ "Default implementation for analysis group already specified!");
+ assert(ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
+ InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+ }
+ }
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ Impl->Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+
+ // NOTE: This is necessary, because removeRegistrationListener() can be called
+ // as part of the llvm_shutdown sequence. Since we have no control over the
+ // order of that sequence, we need to gracefully handle the case where the
+ // PassRegistry is destructed before the object that triggers this call.
+ if (!pImpl) return;
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ std::vector<PassRegistrationListener*>::iterator I =
+ std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+ assert(I != Impl->Listeners.end() &&
+ "PassRegistrationListener not registered!");
+ Impl->Listeners.erase(I);
+}
diff --git a/contrib/llvm/lib/IR/PrintModulePass.cpp b/contrib/llvm/lib/IR/PrintModulePass.cpp
new file mode 100644
index 000000000000..5026bc2d9840
--- /dev/null
+++ b/contrib/llvm/lib/IR/PrintModulePass.cpp
@@ -0,0 +1,136 @@
+//===--- IR/PrintModulePass.cpp - Module/Function Printer -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class PrintModulePass : public ModulePass {
+ std::string Banner;
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintModulePass() : ModulePass(ID), Out(&dbgs()),
+ DeleteStream(false) {}
+ PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+ : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ ~PrintModulePass() {
+ if (DeleteStream) delete Out;
+ }
+
+ bool runOnModule(Module &M) {
+ (*Out) << Banner << M;
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class PrintFunctionPass : public FunctionPass {
+ std::string Banner; // String to print before each function
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()),
+ DeleteStream(false) {}
+ PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+ : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ ~PrintFunctionPass() {
+ if (DeleteStream) delete Out;
+ }
+
+ // runOnFunction - This pass just prints a banner followed by the
+ // function as it's processed.
+ //
+ bool runOnFunction(Function &F) {
+ (*Out) << Banner << static_cast<Value&>(F);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class PrintBasicBlockPass : public BasicBlockPass {
+ std::string Banner;
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintBasicBlockPass() : BasicBlockPass(ID), Out(&dbgs()),
+ DeleteStream(false) {}
+ PrintBasicBlockPass(const std::string &B, raw_ostream *o, bool DS)
+ : BasicBlockPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ ~PrintBasicBlockPass() {
+ if (DeleteStream) delete Out;
+ }
+
+ bool runOnBasicBlock(BasicBlock &BB) {
+ (*Out) << Banner << BB;
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char PrintModulePass::ID = 0;
+INITIALIZE_PASS(PrintModulePass, "print-module",
+ "Print module to stderr", false, false)
+char PrintFunctionPass::ID = 0;
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+ "Print function to stderr", false, false)
+char PrintBasicBlockPass::ID = 0;
+INITIALIZE_PASS(PrintBasicBlockPass, "print-bb",
+ "Print BB to stderr", false, false)
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS,
+ bool DeleteStream,
+ const std::string &Banner) {
+ return new PrintModulePass(Banner, OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+ llvm::raw_ostream *OS,
+ bool DeleteStream) {
+ return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
+/// createPrintBasicBlockPass - Create and return a pass that writes the
+/// BB to the specified raw_ostream.
+BasicBlockPass *llvm::createPrintBasicBlockPass(llvm::raw_ostream *OS,
+ bool DeleteStream,
+ const std::string &Banner) {
+ return new PrintBasicBlockPass(Banner, OS, DeleteStream);
+}
+
diff --git a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
new file mode 100644
index 000000000000..5a383eee56c5
--- /dev/null
+++ b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/IR/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes. This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+ // Get the old symtab and value list before doing the assignment.
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+ // Do it.
+ *Dest = Src;
+
+ // Get the new SymTab object.
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+
+ // If there is nothing to do, quick exit.
+ if (OldST == NewST) return;
+
+ // Move all the elements from the old symtab to the new one.
+ iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+ if (ItemList.empty()) return;
+
+ if (OldST) {
+ // Remove all entries from the previous symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ OldST->removeValueName(I->getValueName());
+ }
+
+ if (NewST) {
+ // Add all of the items to the new symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ NewST->reinsertValue(I);
+ }
+
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+ assert(V->getParent() == 0 && "Value already in a container!!");
+ ItemParentClass *Owner = getListOwner();
+ V->setParent(Owner);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+ ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+ V->setParent(0);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+ ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+ ilist_iterator<ValueSubClass> first,
+ ilist_iterator<ValueSubClass> last) {
+ // We only have to do work here if transferring instructions between BBs
+ ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+ if (NewIP == OldIP) return; // No work to do at all...
+
+ // We only have to update symbol table entries if we are transferring the
+ // instructions to a different symtab object...
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+ if (NewST != OldST) {
+ for (; first != last; ++first) {
+ ValueSubClass &V = *first;
+ bool HasName = V.hasName();
+ if (OldST && HasName)
+ OldST->removeValueName(V.getValueName());
+ V.setParent(NewIP);
+ if (NewST && HasName)
+ NewST->reinsertValue(&V);
+ }
+ } else {
+ // Just transferring between blocks in the same function, simply update the
+ // parent fields in the instructions...
+ for (; first != last; ++first)
+ first->setParent(NewIP);
+ }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
new file mode 100644
index 000000000000..1e6a51ab108c
--- /dev/null
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -0,0 +1,767 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Type.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+ switch (IDNumber) {
+ case VoidTyID : return getVoidTy(C);
+ case HalfTyID : return getHalfTy(C);
+ case FloatTyID : return getFloatTy(C);
+ case DoubleTyID : return getDoubleTy(C);
+ case X86_FP80TyID : return getX86_FP80Ty(C);
+ case FP128TyID : return getFP128Ty(C);
+ case PPC_FP128TyID : return getPPC_FP128Ty(C);
+ case LabelTyID : return getLabelTy(C);
+ case MetadataTyID : return getMetadataTy(C);
+ case X86_MMXTyID : return getX86_MMXTy(C);
+ default:
+ return 0;
+ }
+}
+
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+Type *Type::getScalarType() {
+ if (VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType();
+ return this;
+}
+
+const Type *Type::getScalarType() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType();
+ return this;
+}
+
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+ return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
+// canLosslesslyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits. For example, i8* to i32*.
+//
+bool Type::canLosslesslyBitCastTo(Type *Ty) const {
+ // Identity cast means no change so return true
+ if (this == Ty)
+ return true;
+
+ // They are not convertible unless they are at least first class types
+ if (!this->isFirstClassType() || !Ty->isFirstClassType())
+ return false;
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not. Also, 64-bit vector types can be
+ // converted to x86mmx.
+ if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+ if (Ty->getTypeID() == Type::X86_MMXTyID &&
+ thisPTy->getBitWidth() == 64)
+ return true;
+ }
+
+ if (this->getTypeID() == Type::X86_MMXTyID)
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ if (thatPTy->getBitWidth() == 64)
+ return true;
+
+ // At this point we have only various mismatches of the first class types
+ // remaining and ptr->ptr. Just select the lossless conversions. Everything
+ // else is not lossless.
+ if (this->isPointerTy())
+ return Ty->isPointerTy();
+ return false; // Other types have no identity values
+}
+
+bool Type::isEmptyTy() const {
+ const ArrayType *ATy = dyn_cast<ArrayType>(this);
+ if (ATy) {
+ unsigned NumElements = ATy->getNumElements();
+ return NumElements == 0 || ATy->getElementType()->isEmptyTy();
+ }
+
+ const StructType *STy = dyn_cast<StructType>(this);
+ if (STy) {
+ unsigned NumElements = STy->getNumElements();
+ for (unsigned i = 0; i < NumElements; ++i)
+ if (!STy->getElementType(i)->isEmptyTy())
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+ switch (getTypeID()) {
+ case Type::HalfTyID: return 16;
+ case Type::FloatTyID: return 32;
+ case Type::DoubleTyID: return 64;
+ case Type::X86_FP80TyID: return 80;
+ case Type::FP128TyID: return 128;
+ case Type::PPC_FP128TyID: return 128;
+ case Type::X86_MMXTyID: return 64;
+ case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+ case Type::VectorTyID: return cast<VectorType>(this)->getBitWidth();
+ default: return 0;
+ }
+}
+
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() {
+ return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type. This
+/// is only valid on floating point types. If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType()->getFPMantissaWidth();
+ assert(isFloatingPointTy() && "Not a floating point type!");
+ if (getTypeID() == HalfTyID) return 11;
+ if (getTypeID() == FloatTyID) return 24;
+ if (getTypeID() == DoubleTyID) return 53;
+ if (getTypeID() == X86_FP80TyID) return 64;
+ if (getTypeID() == FP128TyID) return 113;
+ assert(getTypeID() == PPC_FP128TyID && "unknown fp type");
+ return -1;
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well. Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+ if (this->isIntegerTy())
+ return true;
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+ return ATy->getElementType()->isSized();
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType()->isSized();
+
+ if (!this->isStructTy())
+ return false;
+
+ return cast<StructType>(this)->isSized();
+}
+
+//===----------------------------------------------------------------------===//
+// Subclass Helper Methods
+//===----------------------------------------------------------------------===//
+
+unsigned Type::getIntegerBitWidth() const {
+ return cast<IntegerType>(this)->getBitWidth();
+}
+
+bool Type::isFunctionVarArg() const {
+ return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+ return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+ return cast<FunctionType>(this)->getNumParams();
+}
+
+StringRef Type::getStructName() const {
+ return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+ return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+ return cast<StructType>(this)->getElementType(N);
+}
+
+Type *Type::getSequentialElementType() const {
+ return cast<SequentialType>(this)->getElementType();
+}
+
+uint64_t Type::getArrayNumElements() const {
+ return cast<ArrayType>(this)->getNumElements();
+}
+
+unsigned Type::getVectorNumElements() const {
+ return cast<VectorType>(this)->getNumElements();
+}
+
+unsigned Type::getPointerAddressSpace() const {
+ return cast<PointerType>(getScalarType())->getAddressSpace();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+ return IntegerType::get(C, N);
+}
+
+PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
+ return getHalfTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+ return getFloatTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+ return getDoubleTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+ return getFP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+ return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+ return getIntNTy(C, N)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt1Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt8Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt16Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt32Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt64Ty(C)->getPointerTo(AS);
+}
+
+
+//===----------------------------------------------------------------------===//
+// IntegerType Implementation
+//===----------------------------------------------------------------------===//
+
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+ assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+ assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+
+ // Check for the built-in integer types
+ switch (NumBits) {
+ case 1: return cast<IntegerType>(Type::getInt1Ty(C));
+ case 8: return cast<IntegerType>(Type::getInt8Ty(C));
+ case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+ case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+ case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+ default:
+ break;
+ }
+
+ IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+
+ if (Entry == 0)
+ Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+
+ return Entry;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+ unsigned BitWidth = getBitWidth();
+ return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+ return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
+ bool IsVarArgs)
+ : Type(Result->getContext(), FunctionTyID) {
+ Type **SubTys = reinterpret_cast<Type**>(this+1);
+ assert(isValidReturnType(Result) && "invalid return type for function");
+ setSubclassData(IsVarArgs);
+
+ SubTys[0] = const_cast<Type*>(Result);
+
+ for (unsigned i = 0, e = Params.size(); i != e; ++i) {
+ assert(isValidArgumentType(Params[i]) &&
+ "Not a valid type for function argument!");
+ SubTys[i+1] = Params[i];
+ }
+
+ ContainedTys = SubTys;
+ NumContainedTys = Params.size() + 1; // + 1 for result type
+}
+
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(Type *ReturnType,
+ ArrayRef<Type*> Params, bool isVarArg) {
+ LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+ FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
+ LLVMContextImpl::FunctionTypeMap::iterator I =
+ pImpl->FunctionTypes.find_as(Key);
+ FunctionType *FT;
+
+ if (I == pImpl->FunctionTypes.end()) {
+ FT = (FunctionType*) pImpl->TypeAllocator.
+ Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
+ AlignOf<FunctionType>::Alignment);
+ new (FT) FunctionType(ReturnType, Params, isVarArg);
+ pImpl->FunctionTypes[FT] = true;
+ } else {
+ FT = I->first;
+ }
+
+ return FT;
+}
+
+FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
+ return get(Result, ArrayRef<Type *>(), isVarArg);
+}
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(Type *RetTy) {
+ return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+ !RetTy->isMetadataTy();
+}
+
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(Type *ArgTy) {
+ return ArgTy->isFirstClassType();
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Implementation
+//===----------------------------------------------------------------------===//
+
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
+ bool isPacked) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
+ LLVMContextImpl::StructTypeMap::iterator I =
+ pImpl->AnonStructTypes.find_as(Key);
+ StructType *ST;
+
+ if (I == pImpl->AnonStructTypes.end()) {
+ // Value not found. Create a new type!
+ ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
+ ST->setBody(ETypes, isPacked);
+ Context.pImpl->AnonStructTypes[ST] = true;
+ } else {
+ ST = I->first;
+ }
+
+ return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+ assert(isOpaque() && "Struct body already set!");
+
+ setSubclassData(getSubclassData() | SCDB_HasBody);
+ if (isPacked)
+ setSubclassData(getSubclassData() | SCDB_Packed);
+
+ unsigned NumElements = Elements.size();
+ Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
+ memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
+
+ ContainedTys = Elts;
+ NumContainedTys = NumElements;
+}
+
+void StructType::setName(StringRef Name) {
+ if (Name == getName()) return;
+
+ StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes;
+ typedef StringMap<StructType *>::MapEntryTy EntryTy;
+
+ // If this struct already had a name, remove its symbol table entry. Don't
+ // delete the data yet because it may be part of the new name.
+ if (SymbolTableEntry)
+ SymbolTable.remove((EntryTy *)SymbolTableEntry);
+
+ // If this is just removing the name, we're done.
+ if (Name.empty()) {
+ if (SymbolTableEntry) {
+ // Delete the old string data.
+ ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+ SymbolTableEntry = 0;
+ }
+ return;
+ }
+
+ // Look up the entry for the name.
+ EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+
+ // While we have a name collision, try a random rename.
+ if (Entry->getValue()) {
+ SmallString<64> TempStr(Name);
+ TempStr.push_back('.');
+ raw_svector_ostream TmpStream(TempStr);
+ unsigned NameSize = Name.size();
+
+ do {
+ TempStr.resize(NameSize + 1);
+ TmpStream.resync();
+ TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+
+ Entry = &getContext().pImpl->
+ NamedStructTypes.GetOrCreateValue(TmpStream.str());
+ } while (Entry->getValue());
+ }
+
+ // Okay, we found an entry that isn't used. It's us!
+ Entry->setValue(this);
+
+ // Delete the old string data.
+ if (SymbolTableEntry)
+ ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+ SymbolTableEntry = Entry;
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
+
+StructType *StructType::create(LLVMContext &Context, StringRef Name) {
+ StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ if (!Name.empty())
+ ST->setName(Name);
+ return ST;
+}
+
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+ return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+}
+
+StructType *StructType::get(Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ LLVMContext &Ctx = type->getContext();
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ return llvm::StructType::get(Ctx, StructFields);
+}
+
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements,
+ StringRef Name, bool isPacked) {
+ StructType *ST = create(Context, Name);
+ ST->setBody(Elements, isPacked);
+ return ST;
+}
+
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements) {
+ return create(Context, Elements, StringRef());
+}
+
+StructType *StructType::create(LLVMContext &Context) {
+ return create(Context, StringRef());
+}
+
+StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name,
+ bool isPacked) {
+ assert(!Elements.empty() &&
+ "This method may not be invoked with an empty list");
+ return create(Elements[0]->getContext(), Elements, Name, isPacked);
+}
+
+StructType *StructType::create(ArrayRef<Type*> Elements) {
+ assert(!Elements.empty() &&
+ "This method may not be invoked with an empty list");
+ return create(Elements[0]->getContext(), Elements, StringRef());
+}
+
+StructType *StructType::create(StringRef Name, Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ LLVMContext &Ctx = type->getContext();
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ return llvm::StructType::create(Ctx, StructFields, Name);
+}
+
+bool StructType::isSized() const {
+ if ((getSubclassData() & SCDB_IsSized) != 0)
+ return true;
+ if (isOpaque())
+ return false;
+
+ // Okay, our struct is sized if all of the elements are, but if one of the
+ // elements is opaque, the struct isn't sized *yet*, but may become sized in
+ // the future, so just bail out without caching.
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSized())
+ return false;
+
+ // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+ // we find a sized type, as types can only move from opaque to sized, not the
+ // other way.
+ const_cast<StructType*>(this)->setSubclassData(
+ getSubclassData() | SCDB_IsSized);
+ return true;
+}
+
+StringRef StructType::getName() const {
+ assert(!isLiteral() && "Literal structs never have names");
+ if (SymbolTableEntry == 0) return StringRef();
+
+ return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
+}
+
+void StructType::setBody(Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ setBody(StructFields);
+}
+
+bool StructType::isValidElementType(Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(StructType *Other) const {
+ if (this == Other) return true;
+
+ if (isPacked() != Other->isPacked() ||
+ getNumElements() != Other->getNumElements())
+ return false;
+
+ return std::equal(element_begin(), element_end(), Other->element_begin());
+}
+
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+ StringMap<StructType*>::iterator I =
+ getContext().pImpl->NamedStructTypes.find(Name);
+ if (I != getContext().pImpl->NamedStructTypes.end())
+ return I->second;
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CompositeType Implementation
+//===----------------------------------------------------------------------===//
+
+Type *CompositeType::getTypeAtIndex(const Value *V) {
+ if (StructType *STy = dyn_cast<StructType>(this)) {
+ unsigned Idx =
+ (unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return STy->getElementType(Idx);
+ }
+
+ return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) {
+ if (StructType *STy = dyn_cast<StructType>(this)) {
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return STy->getElementType(Idx);
+ }
+
+ return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+ if (const StructType *STy = dyn_cast<StructType>(this)) {
+ // Structure indexes require (vectors of) 32-bit integer constants. In the
+ // vector case all of the indices must be equal.
+ if (!V->getType()->getScalarType()->isIntegerTy(32))
+ return false;
+ const Constant *C = dyn_cast<Constant>(V);
+ if (C && V->getType()->isVectorTy())
+ C = C->getSplatValue();
+ const ConstantInt *CU = dyn_cast_or_null<ConstantInt>(C);
+ return CU && CU->getZExtValue() < STy->getNumElements();
+ }
+
+ // Sequential types can be indexed by any integer.
+ return V->getType()->isIntOrIntVectorTy();
+}
+
+bool CompositeType::indexValid(unsigned Idx) const {
+ if (const StructType *STy = dyn_cast<StructType>(this))
+ return Idx < STy->getNumElements();
+ // Sequential types can be indexed by any integer.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ArrayType Implementation
+//===----------------------------------------------------------------------===//
+
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+ : SequentialType(ArrayTyID, ElType) {
+ NumElements = NumEl;
+}
+
+ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
+ Type *ElementType = const_cast<Type*>(elementType);
+ assert(isValidElementType(ElementType) && "Invalid type for array element!");
+
+ LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+ ArrayType *&Entry =
+ pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
+
+ if (Entry == 0)
+ Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+ return Entry;
+}
+
+bool ArrayType::isValidElementType(Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+//===----------------------------------------------------------------------===//
+// VectorType Implementation
+//===----------------------------------------------------------------------===//
+
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+ : SequentialType(VectorTyID, ElType) {
+ NumElements = NumEl;
+}
+
+VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
+ Type *ElementType = const_cast<Type*>(elementType);
+ assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+ assert(isValidElementType(ElementType) &&
+ "Elements of a VectorType must be a primitive type");
+
+ LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+ VectorType *&Entry = ElementType->getContext().pImpl
+ ->VectorTypes[std::make_pair(ElementType, NumElements)];
+
+ if (Entry == 0)
+ Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+ return Entry;
+}
+
+bool VectorType::isValidElementType(Type *ElemTy) {
+ return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+ ElemTy->isPointerTy();
+}
+
+//===----------------------------------------------------------------------===//
+// PointerType Implementation
+//===----------------------------------------------------------------------===//
+
+PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
+ assert(EltTy && "Can't get a pointer to <null> type!");
+ assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+
+ LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+
+ // Since AddressSpace #0 is the common case, we special case it.
+ PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+ : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
+
+ if (Entry == 0)
+ Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+ return Entry;
+}
+
+
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+ : SequentialType(PointerTyID, E) {
+#ifndef NDEBUG
+ const unsigned oldNCT = NumContainedTys;
+#endif
+ setSubclassData(AddrSpace);
+ // Check for miscompile. PR11652.
+ assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
+}
+
+PointerType *Type::getPointerTo(unsigned addrs) {
+ return PointerType::get(this, addrs);
+}
+
+bool PointerType::isValidElementType(Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy();
+}
diff --git a/contrib/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm/lib/IR/TypeFinder.cpp
new file mode 100644
index 000000000000..d5e620350705
--- /dev/null
+++ b/contrib/llvm/lib/IR/TypeFinder.cpp
@@ -0,0 +1,148 @@
+//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeFinder class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+using namespace llvm;
+
+void TypeFinder::run(const Module &M, bool onlyNamed) {
+ OnlyNamed = onlyNamed;
+
+ // Get types from global variables.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (I->hasInitializer())
+ incorporateValue(I->getInitializer());
+ }
+
+ // Get types from aliases.
+ for (Module::const_alias_iterator I = M.alias_begin(),
+ E = M.alias_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (const Value *Aliasee = I->getAliasee())
+ incorporateValue(Aliasee);
+ }
+
+ // Get types from functions.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+ for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+ incorporateType(FI->getType());
+
+ // First incorporate the arguments.
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI)
+ incorporateValue(AI);
+
+ for (Function::const_iterator BB = FI->begin(), E = FI->end();
+ BB != E;++BB)
+ for (BasicBlock::const_iterator II = BB->begin(),
+ E = BB->end(); II != E; ++II) {
+ const Instruction &I = *II;
+
+ // Incorporate the type of the instruction.
+ incorporateType(I.getType());
+
+ // Incorporate non-instruction operand types. (We are incorporating all
+ // instructions with this loop.)
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ if (!isa<Instruction>(OI))
+ incorporateValue(*OI);
+
+ // Incorporate types hiding in metadata.
+ I.getAllMetadataOtherThanDebugLoc(MDForInst);
+ for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+ incorporateMDNode(MDForInst[i].second);
+
+ MDForInst.clear();
+ }
+ }
+
+ for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ incorporateMDNode(NMD->getOperand(i));
+ }
+}
+
+void TypeFinder::clear() {
+ VisitedConstants.clear();
+ VisitedTypes.clear();
+ StructTypes.clear();
+}
+
+/// incorporateType - This method adds the type to the list of used structures
+/// if it's not in there already.
+void TypeFinder::incorporateType(Type *Ty) {
+ // Check to see if we're already visited this type.
+ if (!VisitedTypes.insert(Ty).second)
+ return;
+
+ // If this is a structure or opaque type, add a name for the type.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!OnlyNamed || STy->hasName())
+ StructTypes.push_back(STy);
+
+ // Recursively walk all contained types.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ incorporateType(*I);
+}
+
+/// incorporateValue - This method is used to walk operand lists finding types
+/// hiding in constant expressions and other operands that won't be walked in
+/// other ways. GlobalValues, basic blocks, instructions, and inst operands are
+/// all explicitly enumerated.
+void TypeFinder::incorporateValue(const Value *V) {
+ if (const MDNode *M = dyn_cast<MDNode>(V))
+ return incorporateMDNode(M);
+
+ if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Check this type.
+ incorporateType(V->getType());
+
+ // If this is an instruction, we incorporate it separately.
+ if (isa<Instruction>(V))
+ return;
+
+ // Look in operands for types.
+ const User *U = cast<User>(V);
+ for (Constant::const_op_iterator I = U->op_begin(),
+ E = U->op_end(); I != E;++I)
+ incorporateValue(*I);
+}
+
+/// incorporateMDNode - This method is used to walk the operands of an MDNode to
+/// find types hiding within.
+void TypeFinder::incorporateMDNode(const MDNode *V) {
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Look in operands for types.
+ for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+ if (Value *Op = V->getOperand(i))
+ incorporateValue(Op);
+}
diff --git a/contrib/llvm/lib/IR/Use.cpp b/contrib/llvm/lib/IR/Use.cpp
new file mode 100644
index 000000000000..1d343e803094
--- /dev/null
+++ b/contrib/llvm/lib/IR/Use.cpp
@@ -0,0 +1,145 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Value.h"
+#include <new>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+ Value *V1(Val);
+ Value *V2(RHS.Val);
+ if (V1 != V2) {
+ if (V1) {
+ removeFromList();
+ }
+
+ if (V2) {
+ RHS.removeFromList();
+ Val = V2;
+ V2->addUse(*this);
+ } else {
+ Val = 0;
+ }
+
+ if (V1) {
+ RHS.Val = V1;
+ V1->addUse(RHS);
+ } else {
+ RHS.Val = 0;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+ const Use *Current = this;
+
+ while (true) {
+ unsigned Tag = (Current++)->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ continue;
+
+ case stopTag: {
+ ++Current;
+ ptrdiff_t Offset = 1;
+ while (true) {
+ unsigned Tag = Current->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ ++Current;
+ Offset = (Offset << 1) + Tag;
+ continue;
+ default:
+ return Current + Offset;
+ }
+ }
+ }
+
+ case fullStopTag:
+ return Current;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop) {
+ ptrdiff_t Done = 0;
+ while (Done < 20) {
+ if (Start == Stop--)
+ return Start;
+ static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, stopTag,
+ zeroDigitTag, oneDigitTag, oneDigitTag,
+ stopTag, zeroDigitTag, oneDigitTag,
+ zeroDigitTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, oneDigitTag,
+ oneDigitTag, stopTag
+ };
+ new(Stop) Use(tags[Done++]);
+ }
+
+ ptrdiff_t Count = Done;
+ while (Start != Stop) {
+ --Stop;
+ if (!Count) {
+ new(Stop) Use(stopTag);
+ ++Done;
+ Count = Done;
+ } else {
+ new(Stop) Use(PrevPtrTag(Count & 1));
+ Count >>= 1;
+ ++Done;
+ }
+ }
+
+ return Start;
+}
+
+//===----------------------------------------------------------------------===//
+// Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+ while (Start != Stop)
+ (--Stop)->~Use();
+ if (del)
+ ::operator delete(Start);
+}
+
+//===----------------------------------------------------------------------===//
+// Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+ const Use *End = getImpliedUser();
+ const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+ return ref->getInt()
+ ? ref->getPointer()
+ : reinterpret_cast<User*>(const_cast<Use*>(End));
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/IR/User.cpp b/contrib/llvm/lib/IR/User.cpp
new file mode 100644
index 000000000000..940682826acc
--- /dev/null
+++ b/contrib/llvm/lib/IR/User.cpp
@@ -0,0 +1,90 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/User.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Operator.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// User Class
+//===----------------------------------------------------------------------===//
+
+void User::anchor() {}
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+ if (From == To) return; // Duh what?
+
+ assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+ "Cannot call User::replaceUsesOfWith on a constant!");
+
+ for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+ if (getOperand(i) == From) { // Is This operand is pointing to oldval?
+ // The side effects of this setOperand call include linking to
+ // "To", adding "this" to the uses list of To, and
+ // most importantly, removing "this" from the use list of "From".
+ setOperand(i, To); // Fix it now...
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+ // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+ // the User.
+ size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+ Use *Begin = static_cast<Use*>(::operator new(size));
+ Use *End = Begin + N;
+ (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
+ return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+// User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+ void *Storage = ::operator new(s + sizeof(Use) * Us);
+ Use *Start = static_cast<Use*>(Storage);
+ Use *End = Start + Us;
+ User *Obj = reinterpret_cast<User*>(End);
+ Obj->OperandList = Start;
+ Obj->NumOperands = Us;
+ Use::initTags(Start, End);
+ return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+// User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+ User *Start = static_cast<User*>(Usr);
+ Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+ // If there were hung-off uses, they will have been freed already and
+ // NumOperands reset to 0, so here we just free the User itself.
+ ::operator delete(Storage);
+}
+
+//===----------------------------------------------------------------------===//
+// Operator Class
+//===----------------------------------------------------------------------===//
+
+Operator::~Operator() {
+ llvm_unreachable("should never destroy an Operator");
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
new file mode 100644
index 000000000000..adc702e05e68
--- /dev/null
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -0,0 +1,701 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Value.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Value Class
+//===----------------------------------------------------------------------===//
+
+static inline Type *checkType(Type *Ty) {
+ assert(Ty && "Value defined with a null type: Error!");
+ return const_cast<Type*>(Ty);
+}
+
+Value::Value(Type *ty, unsigned scid)
+ : SubclassID(scid), HasValueHandle(0),
+ SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
+ UseList(0), Name(0) {
+ // FIXME: Why isn't this in the subclass gunk??
+ // Note, we cannot call isa<CallInst> before the CallInst has been
+ // constructed.
+ if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke)
+ assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+ "invalid CallInst type!");
+ else if (SubclassID != BasicBlockVal &&
+ (SubclassID < ConstantFirstVal || SubclassID > ConstantLastVal))
+ assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
+ "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG // Only in -g mode...
+ // Check to make sure that there are no uses of this value that are still
+ // around when the value is destroyed. If there are, then we have a dangling
+ // reference and something is wrong. This code is here to print out what is
+ // still being referenced. The value in question should be printed as
+ // a <badref>
+ //
+ if (!use_empty()) {
+ dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
+ for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+ dbgs() << "Use still stuck around after Def is destroyed:"
+ << **I << "\n";
+ }
+#endif
+ assert(use_empty() && "Uses remain when a value is destroyed!");
+
+ // If this value is named, destroy the name. This should not be in a symtab
+ // at this point.
+ if (Name && SubclassID != MDStringVal)
+ Name->Destroy();
+
+ // There should be no uses of this object anymore, remove it.
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+ const_use_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+ return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more. This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+ const_use_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+
+ return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+ // Start by scanning over the instructions looking for a use before we start
+ // the expensive use iteration.
+ unsigned MaxBlockSize = 3;
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+ return true;
+ if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+ break;
+ }
+
+ if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+ return false;
+
+ for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const Instruction *User = dyn_cast<Instruction>(*I);
+ if (User && User->getParent() == BB)
+ return true;
+ }
+ return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value. This
+/// is a linear time operation. Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+ return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+ ST = 0;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (BasicBlock *P = I->getParent())
+ if (Function *PP = P->getParent())
+ ST = &PP->getValueSymbolTable();
+ } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ if (Function *P = BB->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (Module *P = GV->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (Argument *A = dyn_cast<Argument>(V)) {
+ if (Function *P = A->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (isa<MDString>(V))
+ return true;
+ else {
+ assert(isa<Constant>(V) && "Unknown value type!");
+ return true; // no name is setable for this.
+ }
+ return false;
+}
+
+StringRef Value::getName() const {
+ // Make sure the empty string is still a C string. For historical reasons,
+ // some clients want to call .data() on the result and expect it to be null
+ // terminated.
+ if (!Name) return StringRef("", 0);
+ return Name->getKey();
+}
+
+void Value::setName(const Twine &NewName) {
+ assert(SubclassID != MDStringVal &&
+ "Cannot set the name of MDString with this method!");
+
+ // Fast path for common IRBuilder case of setName("") when there is no name.
+ if (NewName.isTriviallyEmpty() && !hasName())
+ return;
+
+ SmallString<256> NameData;
+ StringRef NameRef = NewName.toStringRef(NameData);
+
+ // Name isn't changing?
+ if (getName() == NameRef)
+ return;
+
+ assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+ // Get the symbol table to update for this object.
+ ValueSymbolTable *ST;
+ if (getSymTab(this, ST))
+ return; // Cannot set a name on this value (e.g. constant).
+
+ if (Function *F = dyn_cast<Function>(this))
+ getContext().pImpl->IntrinsicIDCache.erase(F);
+
+ if (!ST) { // No symbol table to update? Just do the change.
+ if (NameRef.empty()) {
+ // Free the name for this value.
+ Name->Destroy();
+ Name = 0;
+ return;
+ }
+
+ if (Name)
+ Name->Destroy();
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+
+ // Create the new name.
+ Name = ValueName::Create(NameRef.begin(), NameRef.end());
+ Name->setValue(this);
+ return;
+ }
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+ if (hasName()) {
+ // Remove old name.
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+
+ if (NameRef.empty())
+ return;
+ }
+
+ // Name is changing to something new.
+ Name = ST->createValueName(NameRef, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty. It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+ assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
+
+ ValueSymbolTable *ST = 0;
+ // If this value has a name, drop it.
+ if (hasName()) {
+ // Get the symtab this is in.
+ if (getSymTab(this, ST)) {
+ // We can't set a name on this value, but we need to clear V's name if
+ // it has one.
+ if (V->hasName()) V->setName("");
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+
+ // Remove old name.
+ if (ST)
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+ }
+
+ // Now we know that this has no name.
+
+ // If V has no name either, we're done.
+ if (!V->hasName()) return;
+
+ // Get this's symtab if we didn't before.
+ if (!ST) {
+ if (getSymTab(this, ST)) {
+ // Clear V's name.
+ V->setName("");
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+ }
+
+ // Get V's ST, this should always succed, because V has a name.
+ ValueSymbolTable *VST;
+ bool Failure = getSymTab(V, VST);
+ assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
+
+ // If these values are both in the same symtab, we can do this very fast.
+ // This works even if both values have no symtab yet.
+ if (ST == VST) {
+ // Take the name!
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+ return;
+ }
+
+ // Otherwise, things are slightly more complex. Remove V's name from VST and
+ // then reinsert it into ST.
+
+ if (VST)
+ VST->removeValueName(V->Name);
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+
+ if (ST)
+ ST->reinsertValue(this);
+}
+
+
+void Value::replaceAllUsesWith(Value *New) {
+ assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+ assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+ assert(New->getType() == getType() &&
+ "replaceAllUses of value with new value of different type!");
+
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsRAUWd(this, New);
+
+ while (!use_empty()) {
+ Use &U = *UseList;
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ C->replaceUsesOfWithOnConstant(this, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ }
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+ BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
+}
+
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+ PSK_ZeroIndices,
+ PSK_InBoundsConstantIndices,
+ PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ switch (StripKind) {
+ case PSK_ZeroIndices:
+ if (!GEP->hasAllZeroIndices())
+ return V;
+ break;
+ case PSK_InBoundsConstantIndices:
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ // fallthrough
+ case PSK_InBounds:
+ if (!GEP->isInBounds())
+ return V;
+ break;
+ }
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ return V;
+}
+} // namespace
+
+Value *Value::stripPointerCasts() {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+static bool isDereferenceablePointer(const Value *V,
+ SmallPtrSet<const Value *, 32> &Visited) {
+ // Note that it is not safe to speculate into a malloc'd region because
+ // malloc may return null.
+ // It's also not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. Some cases could
+ // be handled using DataLayout to check sizes and alignments though.
+
+ // These are obviously ok.
+ if (isa<AllocaInst>(V)) return true;
+
+ // Global variables which can't collapse to null are ok.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->hasExternalWeakLinkage();
+
+ // byval arguments are ok.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // For GEPs, determine if the indexing lands within the allocated object.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ // Conservatively require that the base pointer be fully dereferenceable.
+ if (!Visited.insert(GEP->getOperand(0)))
+ return false;
+ if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
+ return false;
+ // Check the indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::const_op_iterator I = GEP->op_begin()+1,
+ E = GEP->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ Type *Ty = *GTI++;
+ // Struct indices can't be out of bounds.
+ if (isa<StructType>(Ty))
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+ if (!CI)
+ return false;
+ // Zero is always ok.
+ if (CI->isZero())
+ continue;
+ // Check to see that it's within the bounds of an array.
+ ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (!ATy)
+ return false;
+ if (CI->getValue().getActiveBits() > 64)
+ return false;
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return false;
+ }
+ // Indices check out; this is dereferenceable.
+ return true;
+ }
+
+ // If we don't know, assume the worst.
+ return false;
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+ SmallPtrSet<const Value *, 32> Visited;
+ return ::isDereferenceablePointer(this, Visited);
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB. If not, return
+/// ourself. This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+ const BasicBlock *PredBB) {
+ PHINode *PN = dyn_cast<PHINode>(this);
+ if (PN && PN->getParent() == CurBB)
+ return PN->getIncomingValueForBlock(PredBB);
+ return this;
+}
+
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
+//===----------------------------------------------------------------------===//
+// ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+ assert(List && "Handle list is null?");
+
+ // Splice ourselves into the list.
+ Next = *List;
+ *List = this;
+ setPrevPtr(List);
+ if (Next) {
+ Next->setPrevPtr(&Next);
+ assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?");
+ }
+}
+
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+ assert(List && "Must insert after existing node");
+
+ Next = List->Next;
+ setPrevPtr(&List->Next);
+ List->Next = this;
+ if (Next)
+ Next->setPrevPtr(&Next);
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+ assert(VP.getPointer() && "Null pointer doesn't have a use list!");
+
+ LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
+
+ if (VP.getPointer()->HasValueHandle) {
+ // If this value already has a ValueHandle, then it must be in the
+ // ValueHandles map already.
+ ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
+ assert(Entry != 0 && "Value doesn't have any handles?");
+ AddToExistingUseList(&Entry);
+ return;
+ }
+
+ // Ok, it doesn't have any handles yet, so we must insert it into the
+ // DenseMap. However, doing this insertion could cause the DenseMap to
+ // reallocate itself, which would invalidate all of the PrevP pointers that
+ // point into the old table. Handle this by checking for reallocation and
+ // updating the stale pointers only if needed.
+ DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+ const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+ ValueHandleBase *&Entry = Handles[VP.getPointer()];
+ assert(Entry == 0 && "Value really did already have handles?");
+ AddToExistingUseList(&Entry);
+ VP.getPointer()->HasValueHandle = true;
+
+ // If reallocation didn't happen or if this was the first insertion, don't
+ // walk the table.
+ if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+ Handles.size() == 1) {
+ return;
+ }
+
+ // Okay, reallocation did happen. Fix the Prev Pointers.
+ for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+ E = Handles.end(); I != E; ++I) {
+ assert(I->second && I->first == I->second->VP.getPointer() &&
+ "List invariant broken!");
+ I->second->setPrevPtr(&I->second);
+ }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+ assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
+ "Pointer doesn't have a use list!");
+
+ // Unlink this from its use list.
+ ValueHandleBase **PrevPtr = getPrevPtr();
+ assert(*PrevPtr == this && "List invariant broken");
+
+ *PrevPtr = Next;
+ if (Next) {
+ assert(Next->getPrevPtr() == &Next && "List invariant broken");
+ Next->setPrevPtr(PrevPtr);
+ return;
+ }
+
+ // If the Next pointer was null, then it is possible that this was the last
+ // ValueHandle watching VP. If so, delete its entry from the ValueHandles
+ // map.
+ LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
+ DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+ if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+ Handles.erase(VP.getPointer());
+ VP.getPointer()->HasValueHandle = false;
+ }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+ assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ LLVMContextImpl *pImpl = V->getContext().pImpl;
+ ValueHandleBase *Entry = pImpl->ValueHandles[V];
+ assert(Entry && "Value bit set but no entries exist");
+
+ // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+ // and remove themselves from the list without breaking our iteration. This
+ // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+ // Note that we deliberately do not the support the case when dropping a value
+ // handle results in a new value handle being permanently added to the list
+ // (as might occur in theory for CallbackVH's): the new value handle will not
+ // be processed and the checking code will mete out righteous punishment if
+ // the handle is still present once we have finished processing all the other
+ // value handles (it is fine to momentarily add then remove a value handle).
+ for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+ Iterator.RemoveFromUseList();
+ Iterator.AddToExistingUseListAfter(Entry);
+ assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+ switch (Entry->getKind()) {
+ case Assert:
+ break;
+ case Tracking:
+ // Mark that this value has been deleted by setting it to an invalid Value
+ // pointer.
+ Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+ break;
+ case Weak:
+ // Weak just goes to null, which will unlink it from the list.
+ Entry->operator=(0);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(Entry)->deleted();
+ break;
+ }
+ }
+
+ // All callbacks, weak references, and assertingVHs should be dropped by now.
+ if (V->HasValueHandle) {
+#ifndef NDEBUG // Only in +Asserts mode...
+ dbgs() << "While deleting: " << *V->getType() << " %" << V->getName()
+ << "\n";
+ if (pImpl->ValueHandles[V]->getKind() == Assert)
+ llvm_unreachable("An asserting value handle still pointed to this"
+ " value!");
+
+#endif
+ llvm_unreachable("All references to V were not removed?");
+ }
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+ assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+ assert(Old != New && "Changing value into itself!");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ LLVMContextImpl *pImpl = Old->getContext().pImpl;
+ ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
+ assert(Entry && "Value bit set but no entries exist");
+
+ // We use a local ValueHandleBase as an iterator so that
+ // ValueHandles can add and remove themselves from the list without
+ // breaking our iteration. This is not really an AssertingVH; we
+ // just have to give ValueHandleBase some kind.
+ for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+ Iterator.RemoveFromUseList();
+ Iterator.AddToExistingUseListAfter(Entry);
+ assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+ switch (Entry->getKind()) {
+ case Assert:
+ // Asserting handle does not follow RAUW implicitly.
+ break;
+ case Tracking:
+ // Tracking goes to new value like a WeakVH. Note that this may make it
+ // something incompatible with its templated type. We don't want to have a
+ // virtual (or inline) interface to handle this though, so instead we make
+ // the TrackingVH accessors guarantee that a client never sees this value.
+
+ // FALLTHROUGH
+ case Weak:
+ // Weak goes to the new value, which will unlink it from Old's list.
+ Entry->operator=(New);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
+ break;
+ }
+ }
+
+#ifndef NDEBUG
+ // If any new tracking or weak value handles were added while processing the
+ // list, then complain about it now.
+ if (Old->HasValueHandle)
+ for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+ switch (Entry->getKind()) {
+ case Tracking:
+ case Weak:
+ dbgs() << "After RAUW from " << *Old->getType() << " %"
+ << Old->getName() << " to " << *New->getType() << " %"
+ << New->getName() << "\n";
+ llvm_unreachable("A tracking or weak value handle still pointed to the"
+ " old value!\n");
+ default:
+ break;
+ }
+#endif
+}
+
+// Default implementation for CallbackVH.
+void CallbackVH::allUsesReplacedWith(Value *) {}
+
+void CallbackVH::deleted() {
+ setValPtr(NULL);
+}
diff --git a/contrib/llvm/lib/IR/ValueSymbolTable.cpp b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
new file mode 100644
index 000000000000..fffacb377770
--- /dev/null
+++ b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
@@ -0,0 +1,117 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG // Only do this in -g mode...
+ for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+ dbgs() << "Value still in symbol table! Type = '"
+ << *VI->getValue()->getType() << "' Name = '"
+ << VI->getKeyData() << "'\n";
+ assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+ assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+ // Try inserting the name, assuming it won't conflict.
+ if (vmap.insert(V->Name)) {
+ //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+ return;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
+
+ // The name is too already used, just free it so we can allocate a new name.
+ V->Name->Destroy();
+
+ unsigned BaseSize = UniqueName.size();
+ while (1) {
+ // Trim any suffix off and append the next number.
+ UniqueName.resize(BaseSize);
+ raw_svector_ostream(UniqueName) << ++LastUnique;
+
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ V->Name = &NewName;
+ //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return;
+ }
+ }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+ //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
+ // Remove the value from the symbol table.
+ vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name. If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
+ // In the common case, the name is not already in the symbol table.
+ ValueName &Entry = vmap.GetOrCreateValue(Name);
+ if (Entry.getValue() == 0) {
+ Entry.setValue(V);
+ //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
+ // << *V << "\n");
+ return &Entry;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<256> UniqueName(Name.begin(), Name.end());
+
+ while (1) {
+ // Trim any suffix off and append the next number.
+ UniqueName.resize(Name.size());
+ raw_svector_ostream(UniqueName) << ++LastUnique;
+
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return &NewName;
+ }
+ }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+ //DEBUG(dbgs() << "ValueSymbolTable:\n");
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ //DEBUG(dbgs() << " '" << I->getKeyData() << "' = ");
+ I->getValue()->dump();
+ //DEBUG(dbgs() << "\n");
+ }
+}
diff --git a/contrib/llvm/lib/IR/ValueTypes.cpp b/contrib/llvm/lib/IR/ValueTypes.cpp
new file mode 100644
index 000000000000..ba04d60c24a1
--- /dev/null
+++ b/contrib/llvm/lib/IR/ValueTypes.cpp
@@ -0,0 +1,277 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
+ return getVectorVT(Context, IntTy, getVectorNumElements());
+}
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+ EVT VT;
+ VT.LLVMTy = IntegerType::get(Context, BitWidth);
+ assert(VT.isExtended() && "Type is not extended!");
+ return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+ unsigned NumElements) {
+ EVT ResultVT;
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedVector() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended16BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 16;
+}
+
+bool EVT::isExtended32BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 32;
+}
+
+bool EVT::isExtended64BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 512;
+}
+
+bool EVT::isExtended1024BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 1024;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+ assert(isExtended() && "Type is not extended!");
+ return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+ assert(isExtended() && "Type is not extended!");
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ return ITy->getBitWidth();
+ if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ return VTy->getBitWidth();
+ llvm_unreachable("Unrecognized extended type!");
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+ switch (V.SimpleTy) {
+ default:
+ if (isVector())
+ return "v" + utostr(getVectorNumElements()) +
+ getVectorElementType().getEVTString();
+ if (isInteger())
+ return "i" + utostr(getSizeInBits());
+ llvm_unreachable("Invalid EVT!");
+ case MVT::i1: return "i1";
+ case MVT::i8: return "i8";
+ case MVT::i16: return "i16";
+ case MVT::i32: return "i32";
+ case MVT::i64: return "i64";
+ case MVT::i128: return "i128";
+ case MVT::f16: return "f16";
+ case MVT::f32: return "f32";
+ case MVT::f64: return "f64";
+ case MVT::f80: return "f80";
+ case MVT::f128: return "f128";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::v2i1: return "v2i1";
+ case MVT::v4i1: return "v4i1";
+ case MVT::v8i1: return "v8i1";
+ case MVT::v16i1: return "v16i1";
+ case MVT::v32i1: return "v32i1";
+ case MVT::v64i1: return "v64i1";
+ case MVT::v2i8: return "v2i8";
+ case MVT::v4i8: return "v4i8";
+ case MVT::v8i8: return "v8i8";
+ case MVT::v16i8: return "v16i8";
+ case MVT::v32i8: return "v32i8";
+ case MVT::v64i8: return "v64i8";
+ case MVT::v1i16: return "v1i16";
+ case MVT::v2i16: return "v2i16";
+ case MVT::v4i16: return "v4i16";
+ case MVT::v8i16: return "v8i16";
+ case MVT::v16i16: return "v16i16";
+ case MVT::v32i16: return "v32i16";
+ case MVT::v1i32: return "v1i32";
+ case MVT::v2i32: return "v2i32";
+ case MVT::v4i32: return "v4i32";
+ case MVT::v8i32: return "v8i32";
+ case MVT::v16i32: return "v16i32";
+ case MVT::v1i64: return "v1i64";
+ case MVT::v2i64: return "v2i64";
+ case MVT::v4i64: return "v4i64";
+ case MVT::v8i64: return "v8i64";
+ case MVT::v16i64: return "v16i64";
+ case MVT::v2f32: return "v2f32";
+ case MVT::v2f16: return "v2f16";
+ case MVT::v4f32: return "v4f32";
+ case MVT::v8f32: return "v8f32";
+ case MVT::v16f32: return "v16f32";
+ case MVT::v2f64: return "v2f64";
+ case MVT::v4f64: return "v4f64";
+ case MVT::v8f64: return "v8f64";
+ case MVT::Metadata:return "Metadata";
+ case MVT::Untyped: return "Untyped";
+ }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT. For integer types, this returns an unsigned type. Note
+/// that this will abort for types that cannot be represented.
+Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ switch (V.SimpleTy) {
+ default:
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy;
+ case MVT::isVoid: return Type::getVoidTy(Context);
+ case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i8: return Type::getInt8Ty(Context);
+ case MVT::i16: return Type::getInt16Ty(Context);
+ case MVT::i32: return Type::getInt32Ty(Context);
+ case MVT::i64: return Type::getInt64Ty(Context);
+ case MVT::i128: return IntegerType::get(Context, 128);
+ case MVT::f16: return Type::getHalfTy(Context);
+ case MVT::f32: return Type::getFloatTy(Context);
+ case MVT::f64: return Type::getDoubleTy(Context);
+ case MVT::f80: return Type::getX86_FP80Ty(Context);
+ case MVT::f128: return Type::getFP128Ty(Context);
+ case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+ case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// Return the value type corresponding to the specified type. This returns all
+/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
+/// as Other, otherwise they are invalid.
+MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ if (HandleUnknown) return MVT(MVT::Other);
+ llvm_unreachable("Unknown type!");
+ case Type::VoidTyID:
+ return MVT::isVoid;
+ case Type::IntegerTyID:
+ return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
+ case Type::HalfTyID: return MVT(MVT::f16);
+ case Type::FloatTyID: return MVT(MVT::f32);
+ case Type::DoubleTyID: return MVT(MVT::f64);
+ case Type::X86_FP80TyID: return MVT(MVT::f80);
+ case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::FP128TyID: return MVT(MVT::f128);
+ case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+ case Type::PointerTyID: return MVT(MVT::iPTR);
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(
+ getVT(VTy->getElementType(), false), VTy->getNumElements());
+ }
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type. This
+/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ return MVT::getVT(Ty, HandleUnknown);
+ case Type::IntegerTyID:
+ return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+ VTy->getNumElements());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
new file mode 100644
index 000000000000..8bfbb322cf4c
--- /dev/null
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -0,0 +1,2144 @@
+//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+// * Both of a binary operator's parameters are of the same type
+// * Verify that the indices of mem access instructions match other operands
+// * Verify that arithmetic and other things are only performed on first-class
+// types. Verify that shifts & logicals only happen on integrals f.e.
+// * All of the constants in a switch statement are of the correct type
+// * The code is in valid SSA form
+// * It should be illegal to put a label into any other type (like a structure)
+// or to return one. [except constant arrays!]
+// * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+// * PHI nodes must have an entry for each predecessor, with no extras.
+// * PHI nodes must be the first thing in a basic block, all grouped together
+// * PHI nodes must have at least one entry
+// * All basic blocks should only end with terminator insts, not contain them
+// * The entry node to a function must not have predecessors
+// * All Instructions must be embedded into a basic block
+// * Functions cannot take a void-typed parameter
+// * Verify that a function's argument list agrees with it's declared type.
+// * It is illegal to specify a name for a void value.
+// * It is illegal to have a internal global value with no initializer
+// * It is illegal to have a ret instruction that returns a value that does not
+// agree with the function return value type.
+// * Function call argument types match the function prototype
+// * A landing pad is defined by a landingpad instruction, and can be jumped to
+// only by the unwind edge of an invoke instruction.
+// * A landingpad instruction must be the first non-PHI instruction in the
+// block.
+// * All landingpad instructions must use the same personality function with
+// the same function.
+// * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace { // Anonymous namespace for class
+ struct PreVerifier : public FunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ PreVerifier() : FunctionPass(ID) {
+ initializePreVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // Check that the prerequisites for successful DominatorTree construction
+ // are satisfied.
+ bool runOnFunction(Function &F) {
+ bool Broken = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ if (I->empty() || !I->back().isTerminator()) {
+ dbgs() << "Basic Block in function '" << F.getName()
+ << "' does not have terminator!\n";
+ WriteAsOperand(dbgs(), I, true);
+ dbgs() << "\n";
+ Broken = true;
+ }
+ }
+
+ if (Broken)
+ report_fatal_error("Broken module, no Basic Block terminator!");
+
+ return false;
+ }
+ };
+}
+
+char PreVerifier::ID = 0;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
+ false, false)
+static char &PreVerifyID = PreVerifier::ID;
+
+namespace {
+ struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
+ static char ID; // Pass ID, replacement for typeid
+ bool Broken; // Is this module found to be broken?
+ VerifierFailureAction action;
+ // What to do if verification fails.
+ Module *Mod; // Module we are verifying right now
+ LLVMContext *Context; // Context within which we are verifying
+ DominatorTree *DT; // Dominator Tree, caution can be null!
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ /// InstInThisBlock - when verifying a basic block, keep track of all of the
+ /// instructions we have seen so far. This allows us to do efficient
+ /// dominance checks for the case when an instruction has an operand that is
+ /// an instruction in the same block.
+ SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+ /// MDNodes - keep track of the metadata nodes that have been checked
+ /// already.
+ SmallPtrSet<MDNode *, 32> MDNodes;
+
+ /// PersonalityFn - The personality function referenced by the
+ /// LandingPadInsts. All LandingPadInsts within the same function must use
+ /// the same personality function.
+ const Value *PersonalityFn;
+
+ Verifier()
+ : FunctionPass(ID), Broken(false),
+ action(AbortProcessAction), Mod(0), Context(0), DT(0),
+ MessagesStr(Messages), PersonalityFn(0) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
+ explicit Verifier(VerifierFailureAction ctn)
+ : FunctionPass(ID), Broken(false), action(ctn), Mod(0),
+ Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) {
+ Mod = &M;
+ Context = &M.getContext();
+
+ // We must abort before returning back to the pass manager, or else the
+ // pass manager may try to run other passes on the broken module.
+ return abortIfBroken();
+ }
+
+ bool runOnFunction(Function &F) {
+ // Get dominator information if we are being run by PassManager
+ DT = &getAnalysis<DominatorTree>();
+
+ Mod = F.getParent();
+ if (!Context) Context = &F.getContext();
+
+ visit(F);
+ InstsInThisBlock.clear();
+ PersonalityFn = 0;
+
+ // We must abort before returning back to the pass manager, or else the
+ // pass manager may try to run other passes on the broken module.
+ return abortIfBroken();
+ }
+
+ bool doFinalization(Module &M) {
+ // Scan through, checking all of the external function's linkage now...
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ visitGlobalValue(*I);
+
+ // Check to make sure function prototypes are okay.
+ if (I->isDeclaration()) visitFunction(*I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ visitGlobalVariable(*I);
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ visitGlobalAlias(*I);
+
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I)
+ visitNamedMDNode(*I);
+
+ visitModuleFlags(M);
+
+ // If the module is broken, abort at this time.
+ return abortIfBroken();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredID(PreVerifyID);
+ AU.addRequired<DominatorTree>();
+ }
+
+ /// abortIfBroken - If the module is broken and we are supposed to abort on
+ /// this condition, do so.
+ ///
+ bool abortIfBroken() {
+ if (!Broken) return false;
+ MessagesStr << "Broken module found, ";
+ switch (action) {
+ case AbortProcessAction:
+ MessagesStr << "compilation aborted!\n";
+ dbgs() << MessagesStr.str();
+ // Client should choose different reaction if abort is not desired
+ abort();
+ case PrintMessageAction:
+ MessagesStr << "verification continues.\n";
+ dbgs() << MessagesStr.str();
+ return false;
+ case ReturnStatusAction:
+ MessagesStr << "compilation terminated.\n";
+ return true;
+ }
+ llvm_unreachable("Invalid action");
+ }
+
+
+ // Verification methods...
+ void visitGlobalValue(GlobalValue &GV);
+ void visitGlobalVariable(GlobalVariable &GV);
+ void visitGlobalAlias(GlobalAlias &GA);
+ void visitNamedMDNode(NamedMDNode &NMD);
+ void visitMDNode(MDNode &MD, Function *F);
+ void visitModuleFlags(Module &M);
+ void visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*> &SeenIDs,
+ SmallVectorImpl<MDNode*> &Requirements);
+ void visitFunction(Function &F);
+ void visitBasicBlock(BasicBlock &BB);
+ using InstVisitor<Verifier>::visit;
+
+ void visit(Instruction &I);
+
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitPHINode(PHINode &PN);
+ void visitBinaryOperator(BinaryOperator &B);
+ void visitICmpInst(ICmpInst &IC);
+ void visitFCmpInst(FCmpInst &FC);
+ void visitExtractElementInst(ExtractElementInst &EI);
+ void visitInsertElementInst(InsertElementInst &EI);
+ void visitShuffleVectorInst(ShuffleVectorInst &EI);
+ void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+ void visitCallInst(CallInst &CI);
+ void visitInvokeInst(InvokeInst &II);
+ void visitGetElementPtrInst(GetElementPtrInst &GEP);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void verifyDominatesUse(Instruction &I, unsigned i);
+ void visitInstruction(Instruction &I);
+ void visitTerminatorInst(TerminatorInst &I);
+ void visitBranchInst(BranchInst &BI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitSwitchInst(SwitchInst &SI);
+ void visitIndirectBrInst(IndirectBrInst &BI);
+ void visitSelectInst(SelectInst &SI);
+ void visitUserOp1(Instruction &I);
+ void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+ void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
+ void visitAtomicRMWInst(AtomicRMWInst &RMWI);
+ void visitFenceInst(FenceInst &FI);
+ void visitAllocaInst(AllocaInst &AI);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &LPI);
+
+ void VerifyCallSite(CallSite CS);
+ bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty,
+ int VT, unsigned ArgNo, std::string &Suffix);
+ bool VerifyIntrinsicType(Type *Ty,
+ ArrayRef<Intrinsic::IITDescriptor> &Infos,
+ SmallVectorImpl<Type*> &ArgTys);
+ void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+ bool isReturnValue, const Value *V);
+ void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+ const Value *V);
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ WriteAsOperand(MessagesStr, V, true, Mod);
+ MessagesStr << '\n';
+ }
+ }
+
+ void WriteType(Type *T) {
+ if (!T) return;
+ MessagesStr << ' ' << *T;
+ }
+
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, const Value *V1,
+ Type *T2, const Value *V3 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteType(T2);
+ WriteValue(V3);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, Type *T1,
+ Type *T2 = 0, Type *T3 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteType(T1);
+ WriteType(T2);
+ WriteType(T3);
+ Broken = true;
+ }
+ };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+ InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+ Assert1(!GV.isDeclaration() ||
+ GV.isMaterializable() ||
+ GV.hasExternalLinkage() ||
+ GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage() ||
+ (isa<GlobalAlias>(GV) &&
+ (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+ "Global is external, but doesn't have external or dllimport or weak linkage!",
+ &GV);
+
+ Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+ "Global is marked as dllimport, but not external", &GV);
+
+ Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+ "Only global variables can have appending linkage!", &GV);
+
+ if (GV.hasAppendingLinkage()) {
+ GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+ Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
+ "Only global arrays can have appending linkage!", GVar);
+ }
+
+ Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
+ "linkonce_odr_auto_hide can only have default visibility!",
+ &GV);
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+ if (GV.hasInitializer()) {
+ Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+ "Global variable initializer type does not match global "
+ "variable type!", &GV);
+
+ // If the global has common linkage, it must have a zero initializer and
+ // cannot be constant.
+ if (GV.hasCommonLinkage()) {
+ Assert1(GV.getInitializer()->isNullValue(),
+ "'common' global must have a zero initializer!", &GV);
+ Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+ &GV);
+ }
+ } else {
+ Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage(),
+ "invalid linkage type for global declaration", &GV);
+ }
+
+ if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
+ GV.getName() == "llvm.global_dtors")) {
+ Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
+ // Don't worry about emitting an error for it not being an array,
+ // visitGlobalValue will complain on appending non-array.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+ StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ PointerType *FuncPtrTy =
+ FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+ Assert1(STy && STy->getNumElements() == 2 &&
+ STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+ STy->getTypeAtIndex(1) == FuncPtrTy,
+ "wrong type for intrinsic global variable", &GV);
+ }
+ }
+
+ visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+ Assert1(!GA.getName().empty(),
+ "Alias name cannot be empty!", &GA);
+ Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+ GA.hasWeakLinkage(),
+ "Alias should have external or external weak linkage!", &GA);
+ Assert1(GA.getAliasee(),
+ "Aliasee cannot be NULL!", &GA);
+ Assert1(GA.getType() == GA.getAliasee()->getType(),
+ "Alias and aliasee types should match!", &GA);
+ Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
+
+ if (!isa<GlobalValue>(GA.getAliasee())) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+ Assert1(CE &&
+ (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ isa<GlobalValue>(CE->getOperand(0)),
+ "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+ &GA);
+ }
+
+ const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+ Assert1(Aliasee,
+ "Aliasing chain should end with function or global variable", &GA);
+
+ visitGlobalValue(GA);
+}
+
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+ MDNode *MD = NMD.getOperand(i);
+ if (!MD)
+ continue;
+
+ Assert1(!MD->isFunctionLocal(),
+ "Named metadata operand cannot be function local!", MD);
+ visitMDNode(*MD, 0);
+ }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+ // Only visit each node once. Metadata can be mutually recursive, so this
+ // avoids infinite recursion here, as well as being an optimization.
+ if (!MDNodes.insert(&MD))
+ return;
+
+ for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+ Value *Op = MD.getOperand(i);
+ if (!Op)
+ continue;
+ if (isa<Constant>(Op) || isa<MDString>(Op))
+ continue;
+ if (MDNode *N = dyn_cast<MDNode>(Op)) {
+ Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+ "Global metadata operand cannot be function local!", &MD, N);
+ visitMDNode(*N, F);
+ continue;
+ }
+ Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+ // If this was an instruction, bb, or argument, verify that it is in the
+ // function that we expect.
+ Function *ActualF = 0;
+ if (Instruction *I = dyn_cast<Instruction>(Op))
+ ActualF = I->getParent()->getParent();
+ else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+ ActualF = BB->getParent();
+ else if (Argument *A = dyn_cast<Argument>(Op))
+ ActualF = A->getParent();
+ assert(ActualF && "Unimplemented function local metadata case!");
+
+ Assert2(ActualF == F, "function-local metadata used in wrong function",
+ &MD, Op);
+ }
+}
+
+void Verifier::visitModuleFlags(Module &M) {
+ const NamedMDNode *Flags = M.getModuleFlagsMetadata();
+ if (!Flags) return;
+
+ // Scan each flag, and track the flags and requirements.
+ DenseMap<MDString*, MDNode*> SeenIDs;
+ SmallVector<MDNode*, 16> Requirements;
+ for (unsigned I = 0, E = Flags->getNumOperands(); I != E; ++I) {
+ visitModuleFlag(Flags->getOperand(I), SeenIDs, Requirements);
+ }
+
+ // Validate that the requirements in the module are valid.
+ for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+ MDNode *Requirement = Requirements[I];
+ MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+ Value *ReqValue = Requirement->getOperand(1);
+
+ MDNode *Op = SeenIDs.lookup(Flag);
+ if (!Op) {
+ CheckFailed("invalid requirement on flag, flag is not present in module",
+ Flag);
+ continue;
+ }
+
+ if (Op->getOperand(2) != ReqValue) {
+ CheckFailed(("invalid requirement on flag, "
+ "flag does not have the required value"),
+ Flag);
+ continue;
+ }
+ }
+}
+
+void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
+ SmallVectorImpl<MDNode*> &Requirements) {
+ // Each module flag should have three arguments, the merge behavior (a
+ // constant int), the flag ID (an MDString), and the value.
+ Assert1(Op->getNumOperands() == 3,
+ "incorrect number of operands in module flag", Op);
+ ConstantInt *Behavior = dyn_cast<ConstantInt>(Op->getOperand(0));
+ MDString *ID = dyn_cast<MDString>(Op->getOperand(1));
+ Assert1(Behavior,
+ "invalid behavior operand in module flag (expected constant integer)",
+ Op->getOperand(0));
+ unsigned BehaviorValue = Behavior->getZExtValue();
+ Assert1(ID,
+ "invalid ID operand in module flag (expected metadata string)",
+ Op->getOperand(1));
+
+ // Sanity check the values for behaviors with additional requirements.
+ switch (BehaviorValue) {
+ default:
+ Assert1(false,
+ "invalid behavior operand in module flag (unexpected constant)",
+ Op->getOperand(0));
+ break;
+
+ case Module::Error:
+ case Module::Warning:
+ case Module::Override:
+ // These behavior types accept any value.
+ break;
+
+ case Module::Require: {
+ // The value should itself be an MDNode with two operands, a flag ID (an
+ // MDString), and a value.
+ MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2));
+ Assert1(Value && Value->getNumOperands() == 2,
+ "invalid value for 'require' module flag (expected metadata pair)",
+ Op->getOperand(2));
+ Assert1(isa<MDString>(Value->getOperand(0)),
+ ("invalid value for 'require' module flag "
+ "(first value operand should be a string)"),
+ Value->getOperand(0));
+
+ // Append it to the list of requirements, to check once all module flags are
+ // scanned.
+ Requirements.push_back(Value);
+ break;
+ }
+
+ case Module::Append:
+ case Module::AppendUnique: {
+ // These behavior types require the operand be an MDNode.
+ Assert1(isa<MDNode>(Op->getOperand(2)),
+ "invalid value for 'append'-type module flag "
+ "(expected a metadata node)", Op->getOperand(2));
+ break;
+ }
+ }
+
+ // Unless this is a "requires" flag, check the ID is unique.
+ if (BehaviorValue != Module::Require) {
+ bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
+ Assert1(Inserted,
+ "module flag identifiers must be unique (or of 'require' type)",
+ ID);
+ }
+}
+
+// VerifyParameterAttrs - Check the given attributes for an argument or return
+// value of the specified type. The value V is printed in error messages.
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+ bool isReturnValue, const Value *V) {
+ if (!Attrs.hasAttributes(Idx))
+ return;
+
+ Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
+ !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+ !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+ !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
+ !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
+ !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
+ !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
+ !Attrs.hasAttribute(Idx, Attribute::Naked) &&
+ !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
+ !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
+ !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
+ !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
+ !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
+ !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
+ !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
+ !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
+ !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
+ "Some attributes in '" + Attrs.getAsString(Idx) +
+ "' only apply to functions!", V);
+
+ if (isReturnValue)
+ Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ !Attrs.hasAttribute(Idx, Attribute::Nest) &&
+ !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoCapture),
+ "Attribute 'byval', 'nest', 'sret', and 'nocapture' "
+ "do not apply to return values!", V);
+
+ // Check for mutually incompatible attributes.
+ Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ Attrs.hasAttribute(Idx, Attribute::Nest)) ||
+ (Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ Attrs.hasAttribute(Idx, Attribute::StructRet)) ||
+ (Attrs.hasAttribute(Idx, Attribute::Nest) &&
+ Attrs.hasAttribute(Idx, Attribute::StructRet))), "Attributes "
+ "'byval, nest, and sret' are incompatible!", V);
+
+ Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ Attrs.hasAttribute(Idx, Attribute::Nest)) ||
+ (Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ Attrs.hasAttribute(Idx, Attribute::InReg)) ||
+ (Attrs.hasAttribute(Idx, Attribute::Nest) &&
+ Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes "
+ "'byval, nest, and inreg' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
+ Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
+ "'zeroext and signext' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+ Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
+ "'readnone and readonly' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+ Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes "
+ "'noinline and alwaysinline' are incompatible!", V);
+
+ Assert1(!AttrBuilder(Attrs, Idx).
+ hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx),
+ "Wrong types for attribute: " +
+ AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V);
+
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) ||
+ PTy->getElementType()->isSized(),
+ "Attribute 'byval' does not support unsized types!", V);
+ else
+ Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal),
+ "Attribute 'byval' only applies to parameters with pointer type!",
+ V);
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(FunctionType *FT,
+ const AttributeSet &Attrs,
+ const Value *V) {
+ if (Attrs.isEmpty())
+ return;
+
+ bool SawNest = false;
+
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ unsigned Index = Attrs.getSlotIndex(i);
+
+ Type *Ty;
+ if (Index == 0)
+ Ty = FT->getReturnType();
+ else if (Index-1 < FT->getNumParams())
+ Ty = FT->getParamType(Index-1);
+ else
+ break; // VarArgs attributes, verified elsewhere.
+
+ VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+
+ if (Attrs.hasAttribute(i, Attribute::Nest)) {
+ Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+ SawNest = true;
+ }
+
+ if (Attrs.hasAttribute(Index, Attribute::StructRet))
+ Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+ }
+
+ if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ return;
+
+ AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
+ NotFn.removeFunctionOnlyAttrs();
+ Assert1(NotFn.empty(), "Attributes '" +
+ AttributeSet::get(V->getContext(),
+ AttributeSet::FunctionIndex,
+ NotFn).getAsString(AttributeSet::FunctionIndex) +
+ "' do not apply to the function!", V);
+
+ // Check for mutually incompatible attributes.
+ Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ByVal) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Nest)) ||
+ (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ByVal) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StructRet)) ||
+ (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Nest) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StructRet))),
+ "Attributes 'byval, nest, and sret' are incompatible!", V);
+
+ Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ByVal) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Nest)) ||
+ (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ByVal) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InReg)) ||
+ (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Nest) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InReg))),
+ "Attributes 'byval, nest, and inreg' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ZExt) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::SExt)),
+ "Attributes 'zeroext and signext' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ReadNone) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::ReadOnly)),
+ "Attributes 'readnone and readonly' are incompatible!", V);
+
+ Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoInline) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline)),
+ "Attributes 'noinline and alwaysinline' are incompatible!", V);
+}
+
+static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+ if (Attrs.getNumSlots() == 0)
+ return true;
+
+ unsigned LastSlot = Attrs.getNumSlots() - 1;
+ unsigned LastIndex = Attrs.getSlotIndex(LastSlot);
+ if (LastIndex <= Params
+ || (LastIndex == AttributeSet::FunctionIndex
+ && (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params)))
+ return true;
+
+ return false;
+}
+
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+ // Check function arguments.
+ FunctionType *FT = F.getFunctionType();
+ unsigned NumArgs = F.arg_size();
+
+ Assert1(Context == &F.getContext(),
+ "Function context does not match Module context!", &F);
+
+ Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+ Assert2(FT->getNumParams() == NumArgs,
+ "# formal arguments must match # of arguments for function type!",
+ &F, FT);
+ Assert1(F.getReturnType()->isFirstClassType() ||
+ F.getReturnType()->isVoidTy() ||
+ F.getReturnType()->isStructTy(),
+ "Functions cannot return aggregate values!", &F);
+
+ Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+ "Invalid struct return type!", &F);
+
+ const AttributeSet &Attrs = F.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+ "Attribute after last parameter!", &F);
+
+ // Check function attributes.
+ VerifyFunctionAttrs(FT, Attrs, &F);
+
+ // Check that this function meets the restrictions on this calling convention.
+ switch (F.getCallingConv()) {
+ default:
+ break;
+ case CallingConv::C:
+ break;
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::Intel_OCL_BI:
+ case CallingConv::PTX_Kernel:
+ case CallingConv::PTX_Device:
+ Assert1(!F.isVarArg(),
+ "Varargs functions must have C calling conventions!", &F);
+ break;
+ }
+
+ bool isLLVMdotName = F.getName().size() >= 5 &&
+ F.getName().substr(0, 5) == "llvm.";
+
+ // Check that the argument values match the function type for this function...
+ unsigned i = 0;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++i) {
+ Assert2(I->getType() == FT->getParamType(i),
+ "Argument value does not match function argument type!",
+ I, FT->getParamType(i));
+ Assert1(I->getType()->isFirstClassType(),
+ "Function arguments must have first-class types!", I);
+ if (!isLLVMdotName)
+ Assert2(!I->getType()->isMetadataTy(),
+ "Function takes metadata but isn't an intrinsic", I, &F);
+ }
+
+ if (F.isMaterializable()) {
+ // Function has a body somewhere we can't see.
+ } else if (F.isDeclaration()) {
+ Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+ F.hasExternalWeakLinkage(),
+ "invalid linkage type for function declaration", &F);
+ } else {
+ // Verify that this function (which has a body) is not named "llvm.*". It
+ // is not legal to define intrinsics.
+ Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+
+ // Check the entry node
+ BasicBlock *Entry = &F.getEntryBlock();
+ Assert1(pred_begin(Entry) == pred_end(Entry),
+ "Entry block to function must not have predecessors!", Entry);
+
+ // The address of the entry block cannot be taken, unless it is dead.
+ if (Entry->hasAddressTaken()) {
+ Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+ "blockaddress may not be used with the entry block!", Entry);
+ }
+ }
+
+ // If this function is actually an intrinsic, verify that it is only used in
+ // direct call/invokes, never having its "address taken".
+ if (F.getIntrinsicID()) {
+ const User *U;
+ if (F.hasAddressTaken(&U))
+ Assert1(0, "Invalid user of intrinsic instruction!", U);
+ }
+}
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+ InstsInThisBlock.clear();
+
+ // Ensure that basic blocks have terminators!
+ Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+ // Check constraints that this basic block imposes on all of the PHI nodes in
+ // it.
+ if (isa<PHINode>(BB.front())) {
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+ std::sort(Preds.begin(), Preds.end());
+ PHINode *PN;
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+ // Ensure that PHI nodes have at least one entry!
+ Assert1(PN->getNumIncomingValues() != 0,
+ "PHI nodes must have at least one entry. If the block is dead, "
+ "the PHI should be removed!", PN);
+ Assert1(PN->getNumIncomingValues() == Preds.size(),
+ "PHINode should have one entry for each predecessor of its "
+ "parent basic block!", PN);
+
+ // Get and sort all incoming values in the PHI node...
+ Values.clear();
+ Values.reserve(PN->getNumIncomingValues());
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+ PN->getIncomingValue(i)));
+ std::sort(Values.begin(), Values.end());
+
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ // Check to make sure that if there is more than one entry for a
+ // particular basic block in this PHI node, that the incoming values are
+ // all identical.
+ //
+ Assert4(i == 0 || Values[i].first != Values[i-1].first ||
+ Values[i].second == Values[i-1].second,
+ "PHI node has multiple entries for the same basic block with "
+ "different incoming values!", PN, Values[i].first,
+ Values[i].second, Values[i-1].second);
+
+ // Check to make sure that the predecessors and PHI node entries are
+ // matched up.
+ Assert3(Values[i].first == Preds[i],
+ "PHI node entries do not match predecessors!", PN,
+ Values[i].first, Preds[i]);
+ }
+ }
+ }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+ // Ensure that terminators only exist at the end of the basic block.
+ Assert1(&I == I.getParent()->getTerminator(),
+ "Terminator found in the middle of a basic block!", I.getParent());
+ visitInstruction(I);
+}
+
+void Verifier::visitBranchInst(BranchInst &BI) {
+ if (BI.isConditional()) {
+ Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+ "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+ }
+ visitTerminatorInst(BI);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+ Function *F = RI.getParent()->getParent();
+ unsigned N = RI.getNumOperands();
+ if (F->getReturnType()->isVoidTy())
+ Assert2(N == 0,
+ "Found return instr that returns non-void in Function of void "
+ "return type!", &RI, F->getReturnType());
+ else
+ Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!", &RI, F->getReturnType());
+
+ // Check to make sure that the return value has necessary properties for
+ // terminators...
+ visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+ // Check to make sure that all of the constants in the switch instruction
+ // have the same type as the switched-on value.
+ Type *SwitchTy = SI.getCondition()->getType();
+ IntegerType *IntTy = cast<IntegerType>(SwitchTy);
+ IntegersSubsetToBB Mapping;
+ std::map<IntegersSubset::Range, unsigned> RangeSetMap;
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+ IntegersSubset CaseRanges = i.getCaseValueEx();
+ for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) {
+ IntegersSubset::Range r = CaseRanges.getItem(ri);
+ Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(),
+ "Switch constants must all be same type as switch value!", &SI);
+ Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(),
+ "Switch constants must all be same type as switch value!", &SI);
+ Mapping.add(r);
+ RangeSetMap[r] = i.getCaseIndex();
+ }
+ }
+
+ IntegersSubsetToBB::RangeIterator errItem;
+ if (!Mapping.verify(errItem)) {
+ unsigned CaseIndex = RangeSetMap[errItem->first];
+ SwitchInst::CaseIt i(&SI, CaseIndex);
+ Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx());
+ }
+
+ visitTerminatorInst(SI);
+}
+
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+ Assert1(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
+ for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+ Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
+
+ visitTerminatorInst(BI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+ Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+ SI.getOperand(2)),
+ "Invalid operands for select instruction!", &SI);
+
+ Assert1(SI.getTrueValue()->getType() == SI.getType(),
+ "Select values must have same type as select instruction!", &SI);
+ visitInstruction(SI);
+}
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+ Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "trunc source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "zext source and destination must both be a vector or neither", &I);
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "sext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fptrunc source and destination must both be a vector or neither",&I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fpext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "UIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVectorTy(),
+ "UIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),
+ "UIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "UIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "SIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVectorTy(),
+ "SIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),
+ "SIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "SIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "FPToUI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+ &I);
+ Assert1(DestTy->isIntOrIntVectorTy(),
+ "FPToUI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToUI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "FPToSI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVectorTy(),
+ "FPToSI source must be FP or FP vector", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(),
+ "FPToSI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToSI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ Assert1(SrcTy->getScalarType()->isPointerTy(),
+ "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->getScalarType()->isIntegerTy(),
+ "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "PtrToInt type mismatch", &I);
+
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "PtrToInt Vector width mismatch", &I);
+ }
+
+ visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ Assert1(SrcTy->getScalarType()->isIntegerTy(),
+ "IntToPtr source must be an integral", &I);
+ Assert1(DestTy->getScalarType()->isPointerTy(),
+ "IntToPtr result must be a pointer",&I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "IntToPtr type mismatch", &I);
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "IntToPtr Vector width mismatch", &I);
+ }
+ visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+ // Get the source and destination types
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
+ "Bitcast requires both operands to be pointer or neither", &I);
+ Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
+
+ // Disallow aggregates.
+ Assert1(!SrcTy->isAggregateType(),
+ "Bitcast operand must not be aggregate", &I);
+ Assert1(!DestTy->isAggregateType(),
+ "Bitcast type must not be aggregate", &I);
+
+ visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+ // Ensure that the PHI nodes are all grouped together at the top of the block.
+ // This can be tested by checking whether the instruction before this is
+ // either nonexistent (because this is begin()) or is a PHI node. If not,
+ // then there is some other instruction before a PHI.
+ Assert2(&PN == &PN.getParent()->front() ||
+ isa<PHINode>(--BasicBlock::iterator(&PN)),
+ "PHI nodes not grouped at top of basic block!",
+ &PN, PN.getParent());
+
+ // Check that all of the values of the PHI node have the same type as the
+ // result, and that the incoming blocks are really basic blocks.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+ "PHI node operands are not the same type as the result!", &PN);
+ }
+
+ // All other PHI node constraints are checked in the visitBasicBlock method.
+
+ visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+
+ Assert1(CS.getCalledValue()->getType()->isPointerTy(),
+ "Called function must be a pointer!", I);
+ PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
+ Assert1(FPTy->getElementType()->isFunctionTy(),
+ "Called function is not pointer to function type!", I);
+ FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+ // Verify that the correct number of arguments are being passed
+ if (FTy->isVarArg())
+ Assert1(CS.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!",I);
+ else
+ Assert1(CS.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", I);
+
+ // Verify that all arguments to the call match the function type.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+ "Call parameter type does not match function signature!",
+ CS.getArgument(i), FTy->getParamType(i), I);
+
+ const AttributeSet &Attrs = CS.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+ "Attribute after last parameter!", I);
+
+ // Verify call attributes.
+ VerifyFunctionAttrs(FTy, Attrs, I);
+
+ if (FTy->isVarArg())
+ // Check attributes on the varargs part.
+ for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+ VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(),
+ false, I);
+
+ Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!", I);
+ }
+
+ // Verify that there's no metadata unless it's a direct call to an intrinsic.
+ if (CS.getCalledFunction() == 0 ||
+ !CS.getCalledFunction()->getName().startswith("llvm.")) {
+ for (FunctionType::param_iterator PI = FTy->param_begin(),
+ PE = FTy->param_end(); PI != PE; ++PI)
+ Assert1(!(*PI)->isMetadataTy(),
+ "Function has metadata parameter but isn't an intrinsic", I);
+ }
+
+ visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+ VerifyCallSite(&CI);
+
+ if (Function *F = CI.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+ VerifyCallSite(&II);
+
+ // Verify that there is a landingpad instruction as the first non-PHI
+ // instruction of the 'unwind' destination.
+ Assert1(II.getUnwindDest()->isLandingPad(),
+ "The unwind destination does not have a landingpad instruction!",&II);
+
+ visitTerminatorInst(II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+ Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+ "Both operands to a binary operator are not of the same type!", &B);
+
+ switch (B.getOpcode()) {
+ // Check that integer arithmetic operators are only used with
+ // integral operands.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Integer arithmetic operators only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Integer arithmetic operators must have same type "
+ "for operands and result!", &B);
+ break;
+ // Check that floating-point arithmetic operators are only used with
+ // floating-point operands.
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ Assert1(B.getType()->isFPOrFPVectorTy(),
+ "Floating-point arithmetic operators only work with "
+ "floating-point types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Floating-point arithmetic operators must have same type "
+ "for operands and result!", &B);
+ break;
+ // Check that logical operators are only used with integral operands.
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Logical operators only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Logical operators must have same type for operands and result!",
+ &B);
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Shifts only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Shift return type must be same as operands!", &B);
+ break;
+ default:
+ llvm_unreachable("Unknown BinaryOperator opcode!");
+ }
+
+ visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst &IC) {
+ // Check that the operands are the same type
+ Type *Op0Ty = IC.getOperand(0)->getType();
+ Type *Op1Ty = IC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to ICmp instruction are not of the same type!", &IC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
+ "Invalid operand types for ICmp instruction", &IC);
+ // Check that the predicate is valid.
+ Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+ IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ "Invalid predicate in ICmp instruction!", &IC);
+
+ visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst &FC) {
+ // Check that the operands are the same type
+ Type *Op0Ty = FC.getOperand(0)->getType();
+ Type *Op1Ty = FC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to FCmp instruction are not of the same type!", &FC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isFPOrFPVectorTy(),
+ "Invalid operand types for FCmp instruction", &FC);
+ // Check that the predicate is valid.
+ Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+ FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ "Invalid predicate in FCmp instruction!", &FC);
+
+ visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+ Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+ EI.getOperand(1)),
+ "Invalid extractelement operands!", &EI);
+ visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+ Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+ IE.getOperand(1),
+ IE.getOperand(2)),
+ "Invalid insertelement operands!", &IE);
+ visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+ Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+ SV.getOperand(2)),
+ "Invalid shufflevector operands!", &SV);
+ visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
+
+ Assert1(isa<PointerType>(TargetTy),
+ "GEP base pointer is not a vector or a vector of pointers", &GEP);
+ Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
+ "GEP into unsized type!", &GEP);
+ Assert1(GEP.getPointerOperandType()->isVectorTy() ==
+ GEP.getType()->isVectorTy(), "Vector GEP must return a vector value",
+ &GEP);
+
+ SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+ Type *ElTy =
+ GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
+ Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+
+ Assert2(GEP.getType()->getScalarType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType()->getScalarType())->getElementType()
+ == ElTy, "GEP is not of right type for indices!", &GEP, ElTy);
+
+ if (GEP.getPointerOperandType()->isVectorTy()) {
+ // Additional checks for vector GEPs.
+ unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
+ Assert1(GepWidth == GEP.getType()->getVectorNumElements(),
+ "Vector GEP result width doesn't match operand's", &GEP);
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+ Type *IndexTy = Idxs[i]->getType();
+ Assert1(IndexTy->isVectorTy(),
+ "Vector GEP must have vector indices!", &GEP);
+ unsigned IndexWidth = IndexTy->getVectorNumElements();
+ Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ }
+ }
+ visitInstruction(GEP);
+}
+
+static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
+ return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+ PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+ Assert1(PTy, "Load operand must be a pointer.", &LI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == LI.getType(),
+ "Load result type does not match pointer operand type!", &LI, ElTy);
+ if (LI.isAtomic()) {
+ Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
+ "Load cannot have Release ordering", &LI);
+ Assert1(LI.getAlignment() != 0,
+ "Atomic load must specify explicit alignment", &LI);
+ if (!ElTy->isPointerTy()) {
+ Assert2(ElTy->isIntegerTy(),
+ "atomic store operand must have integer type!",
+ &LI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomic store operand must be power-of-two byte-sized integer",
+ &LI, ElTy);
+ }
+ } else {
+ Assert1(LI.getSynchScope() == CrossThread,
+ "Non-atomic load cannot have SynchronizationScope specified", &LI);
+ }
+
+ if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
+ unsigned NumOperands = Range->getNumOperands();
+ Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+ unsigned NumRanges = NumOperands / 2;
+ Assert1(NumRanges >= 1, "It should have at least one range!", Range);
+
+ ConstantRange LastRange(1); // Dummy initial value
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
+ Assert1(Low, "The lower limit must be an integer!", Low);
+ ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
+ Assert1(High, "The upper limit must be an integer!", High);
+ Assert1(High->getType() == Low->getType() &&
+ High->getType() == ElTy, "Range types must match load type!",
+ &LI);
+
+ APInt HighV = High->getValue();
+ APInt LowV = Low->getValue();
+ ConstantRange CurRange(LowV, HighV);
+ Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+ "Range must not be empty!", Range);
+ if (i != 0) {
+ Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+ Range);
+ Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+ Range);
+ }
+ LastRange = ConstantRange(LowV, HighV);
+ }
+ if (NumRanges > 2) {
+ APInt FirstLow =
+ dyn_cast<ConstantInt>(Range->getOperand(0))->getValue();
+ APInt FirstHigh =
+ dyn_cast<ConstantInt>(Range->getOperand(1))->getValue();
+ ConstantRange FirstRange(FirstLow, FirstHigh);
+ Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+ Range);
+ }
+
+
+ }
+
+ visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+ PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+ Assert1(PTy, "Store operand must be a pointer.", &SI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == SI.getOperand(0)->getType(),
+ "Stored value type does not match pointer operand type!",
+ &SI, ElTy);
+ if (SI.isAtomic()) {
+ Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
+ "Store cannot have Acquire ordering", &SI);
+ Assert1(SI.getAlignment() != 0,
+ "Atomic store must specify explicit alignment", &SI);
+ if (!ElTy->isPointerTy()) {
+ Assert2(ElTy->isIntegerTy(),
+ "atomic store operand must have integer type!",
+ &SI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomic store operand must be power-of-two byte-sized integer",
+ &SI, ElTy);
+ }
+ } else {
+ Assert1(SI.getSynchScope() == CrossThread,
+ "Non-atomic store cannot have SynchronizationScope specified", &SI);
+ }
+ visitInstruction(SI);
+}
+
+void Verifier::visitAllocaInst(AllocaInst &AI) {
+ PointerType *PTy = AI.getType();
+ Assert1(PTy->getAddressSpace() == 0,
+ "Allocation instruction pointer not in the generic address space!",
+ &AI);
+ Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+ &AI);
+ Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+ "Alloca array size must have integer type", &AI);
+ visitInstruction(AI);
+}
+
+void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
+ Assert1(CXI.getOrdering() != NotAtomic,
+ "cmpxchg instructions must be atomic.", &CXI);
+ Assert1(CXI.getOrdering() != Unordered,
+ "cmpxchg instructions cannot be unordered.", &CXI);
+ PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
+ Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy->isIntegerTy(),
+ "cmpxchg operand must have integer type!",
+ &CXI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "cmpxchg operand must be power-of-two byte-sized integer",
+ &CXI, ElTy);
+ Assert2(ElTy == CXI.getOperand(1)->getType(),
+ "Expected value type does not match pointer operand type!",
+ &CXI, ElTy);
+ Assert2(ElTy == CXI.getOperand(2)->getType(),
+ "Stored value type does not match pointer operand type!",
+ &CXI, ElTy);
+ visitInstruction(CXI);
+}
+
+void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+ Assert1(RMWI.getOrdering() != NotAtomic,
+ "atomicrmw instructions must be atomic.", &RMWI);
+ Assert1(RMWI.getOrdering() != Unordered,
+ "atomicrmw instructions cannot be unordered.", &RMWI);
+ PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
+ Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy->isIntegerTy(),
+ "atomicrmw operand must have integer type!",
+ &RMWI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomicrmw operand must be power-of-two byte-sized integer",
+ &RMWI, ElTy);
+ Assert2(ElTy == RMWI.getOperand(1)->getType(),
+ "Argument value type does not match pointer operand type!",
+ &RMWI, ElTy);
+ Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+ RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+ "Invalid binary operation!", &RMWI);
+ visitInstruction(RMWI);
+}
+
+void Verifier::visitFenceInst(FenceInst &FI) {
+ const AtomicOrdering Ordering = FI.getOrdering();
+ Assert1(Ordering == Acquire || Ordering == Release ||
+ Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
+ "fence instructions may only have "
+ "acquire, release, acq_rel, or seq_cst ordering.", &FI);
+ visitInstruction(FI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+ Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+ EVI.getIndices()) ==
+ EVI.getType(),
+ "Invalid ExtractValueInst operands!", &EVI);
+
+ visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+ Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+ IVI.getIndices()) ==
+ IVI.getOperand(1)->getType(),
+ "Invalid InsertValueInst operands!", &IVI);
+
+ visitInstruction(IVI);
+}
+
+void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
+ BasicBlock *BB = LPI.getParent();
+
+ // The landingpad instruction is ill-formed if it doesn't have any clauses and
+ // isn't a cleanup.
+ Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+ "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+
+ // The landingpad instruction defines its parent as a landing pad block. The
+ // landing pad block may be branched to only by the unwind edge of an invoke.
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
+ Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "Block containing LandingPadInst must be jumped to "
+ "only by the unwind edge of an invoke.", &LPI);
+ }
+
+ // The landingpad instruction must be the first non-PHI instruction in the
+ // block.
+ Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
+ "LandingPadInst not the first non-PHI instruction in the block.",
+ &LPI);
+
+ // The personality functions for all landingpad instructions within the same
+ // function should match.
+ if (PersonalityFn)
+ Assert1(LPI.getPersonalityFn() == PersonalityFn,
+ "Personality function doesn't match others in function", &LPI);
+ PersonalityFn = LPI.getPersonalityFn();
+
+ // All operands must be constants.
+ Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
+ &LPI);
+ for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
+ Value *Clause = LPI.getClause(i);
+ Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI);
+ if (LPI.isCatch(i)) {
+ Assert1(isa<PointerType>(Clause->getType()),
+ "Catch operand does not have pointer type!", &LPI);
+ } else {
+ Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+ Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+ "Filter operand is not an array of constants!", &LPI);
+ }
+ }
+
+ visitInstruction(LPI);
+}
+
+void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
+ Instruction *Op = cast<Instruction>(I.getOperand(i));
+ // If the we have an invalid invoke, don't try to compute the dominance.
+ // We already reject it in the invoke specific checks and the dominance
+ // computation doesn't handle multiple edges.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+ if (II->getNormalDest() == II->getUnwindDest())
+ return;
+ }
+
+ const Use &U = I.getOperandUse(i);
+ Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U),
+ "Instruction does not dominate all uses!", Op, &I);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+ BasicBlock *BB = I.getParent();
+ Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+ if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+ "Only PHI nodes may reference their own value!", &I);
+ }
+
+ // Check that void typed values don't have names
+ Assert1(!I.getType()->isVoidTy() || !I.hasName(),
+ "Instruction has a name, but provides a void value!", &I);
+
+ // Check that the return value of the instruction is either void or a legal
+ // value type.
+ Assert1(I.getType()->isVoidTy() ||
+ I.getType()->isFirstClassType(),
+ "Instruction returns a non-scalar type!", &I);
+
+ // Check that the instruction doesn't produce metadata. Calls are already
+ // checked against the callee type.
+ Assert1(!I.getType()->isMetadataTy() ||
+ isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Invalid use of metadata!", &I);
+
+ // Check that all uses of the instruction, if they are instructions
+ // themselves, actually have parent basic blocks. If the use is not an
+ // instruction, it is an error!
+ for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ if (Instruction *Used = dyn_cast<Instruction>(*UI))
+ Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+ " embedded in a basic block!", &I, Used);
+ else {
+ CheckFailed("Use of instruction is not an instruction!", *UI);
+ return;
+ }
+ }
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+ // Check to make sure that only first-class-values are operands to
+ // instructions.
+ if (!I.getOperand(i)->getType()->isFirstClassType()) {
+ Assert1(0, "Instruction operands must be first-class values!", &I);
+ }
+
+ if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+ // Check to make sure that the "address of" an intrinsic function is never
+ // taken.
+ Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : 0),
+ "Cannot take the address of an intrinsic!", &I);
+ Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
+ F->getIntrinsicID() == Intrinsic::donothing,
+ "Cannot invoke an intrinsinc other than donothing", &I);
+ Assert1(F->getParent() == Mod, "Referencing function in another module!",
+ &I);
+ } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+ Assert1(OpBB->getParent() == BB->getParent(),
+ "Referring to a basic block in another function!", &I);
+ } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+ Assert1(OpArg->getParent() == BB->getParent(),
+ "Referring to an argument in another function!", &I);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+ Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+ &I);
+ } else if (isa<Instruction>(I.getOperand(i))) {
+ verifyDominatesUse(I, i);
+ } else if (isa<InlineAsm>(I.getOperand(i))) {
+ Assert1((i + 1 == e && isa<CallInst>(I)) ||
+ (i + 3 == e && isa<InvokeInst>(I)),
+ "Cannot take the address of an inline asm!", &I);
+ }
+ }
+
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
+ Assert1(I.getType()->isFPOrFPVectorTy(),
+ "fpmath requires a floating point result!", &I);
+ Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
+ Value *Op0 = MD->getOperand(0);
+ if (ConstantFP *CFP0 = dyn_cast_or_null<ConstantFP>(Op0)) {
+ APFloat Accuracy = CFP0->getValueAPF();
+ Assert1(Accuracy.isNormal() && !Accuracy.isNegative(),
+ "fpmath accuracy not a positive number!", &I);
+ } else {
+ Assert1(false, "invalid fpmath accuracy!", &I);
+ }
+ }
+
+ MDNode *MD = I.getMetadata(LLVMContext::MD_range);
+ Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+
+ InstsInThisBlock.insert(&I);
+}
+
+/// VerifyIntrinsicType - Verify that the specified type (which comes from an
+/// intrinsic argument or return value) matches the type constraints specified
+/// by the .td file (e.g. an "any integer" argument really is an integer).
+///
+/// This return true on error but does not print a message.
+bool Verifier::VerifyIntrinsicType(Type *Ty,
+ ArrayRef<Intrinsic::IITDescriptor> &Infos,
+ SmallVectorImpl<Type*> &ArgTys) {
+ using namespace Intrinsic;
+
+ // If we ran out of descriptors, there are too many arguments.
+ if (Infos.empty()) return true;
+ IITDescriptor D = Infos.front();
+ Infos = Infos.slice(1);
+
+ switch (D.Kind) {
+ case IITDescriptor::Void: return !Ty->isVoidTy();
+ case IITDescriptor::MMX: return !Ty->isX86_MMXTy();
+ case IITDescriptor::Metadata: return !Ty->isMetadataTy();
+ case IITDescriptor::Half: return !Ty->isHalfTy();
+ case IITDescriptor::Float: return !Ty->isFloatTy();
+ case IITDescriptor::Double: return !Ty->isDoubleTy();
+ case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
+ case IITDescriptor::Vector: {
+ VectorType *VT = dyn_cast<VectorType>(Ty);
+ return VT == 0 || VT->getNumElements() != D.Vector_Width ||
+ VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys);
+ }
+ case IITDescriptor::Pointer: {
+ PointerType *PT = dyn_cast<PointerType>(Ty);
+ return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace ||
+ VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys);
+ }
+
+ case IITDescriptor::Struct: {
+ StructType *ST = dyn_cast<StructType>(Ty);
+ if (ST == 0 || ST->getNumElements() != D.Struct_NumElements)
+ return true;
+
+ for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
+ if (VerifyIntrinsicType(ST->getElementType(i), Infos, ArgTys))
+ return true;
+ return false;
+ }
+
+ case IITDescriptor::Argument:
+ // Two cases here - If this is the second occurrence of an argument, verify
+ // that the later instance matches the previous instance.
+ if (D.getArgumentNumber() < ArgTys.size())
+ return Ty != ArgTys[D.getArgumentNumber()];
+
+ // Otherwise, if this is the first instance of an argument, record it and
+ // verify the "Any" kind.
+ assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error");
+ ArgTys.push_back(Ty);
+
+ switch (D.getArgumentKind()) {
+ case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
+ case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy();
+ case IITDescriptor::AK_AnyVector: return !isa<VectorType>(Ty);
+ case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
+ }
+ llvm_unreachable("all argument kinds not covered");
+
+ case IITDescriptor::ExtendVecArgument:
+ // This may only be used when referring to a previous vector argument.
+ return D.getArgumentNumber() >= ArgTys.size() ||
+ !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+ VectorType::getExtendedElementVectorType(
+ cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
+
+ case IITDescriptor::TruncVecArgument:
+ // This may only be used when referring to a previous vector argument.
+ return D.getArgumentNumber() >= ArgTys.size() ||
+ !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+ VectorType::getTruncatedElementVectorType(
+ cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
+ }
+ llvm_unreachable("unhandled");
+}
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+ Function *IF = CI.getCalledFunction();
+ Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+ IF);
+
+ // Verify that the intrinsic prototype lines up with what the .td files
+ // describe.
+ FunctionType *IFTy = IF->getFunctionType();
+ Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF);
+
+ SmallVector<Intrinsic::IITDescriptor, 8> Table;
+ getIntrinsicInfoTableEntries(ID, Table);
+ ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+
+ SmallVector<Type *, 4> ArgTys;
+ Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
+ "Intrinsic has incorrect return type!", IF);
+ for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
+ Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
+ "Intrinsic has incorrect argument type!", IF);
+ Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
+
+ // Now that we have the intrinsic ID and the actual argument types (and we
+ // know they are legal for the intrinsic!) get the intrinsic name through the
+ // usual means. This allows us to verify the mangling of argument types into
+ // the name.
+ Assert1(Intrinsic::getName(ID, ArgTys) == IF->getName(),
+ "Intrinsic name not mangled correctly for type arguments!", IF);
+
+ // If the intrinsic takes MDNode arguments, verify that they are either global
+ // or are local to *this* function.
+ for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+ if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
+ visitMDNode(*MD, CI.getParent()->getParent());
+
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::ctlz: // llvm.ctlz
+ case Intrinsic::cttz: // llvm.cttz
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+ "is_zero_undef argument of bit counting intrinsics must be a "
+ "constant int", &CI);
+ break;
+ case Intrinsic::dbg_declare: { // llvm.dbg.declare
+ Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
+ "invalid llvm.dbg.declare intrinsic call 1", &CI);
+ MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
+ Assert1(MD->getNumOperands() == 1,
+ "invalid llvm.dbg.declare intrinsic call 2", &CI);
+ } break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
+ "alignment argument of memory intrinsics must be a constant int",
+ &CI);
+ Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
+ "isvolatile argument of memory intrinsics must be a constant int",
+ &CI);
+ break;
+ case Intrinsic::gcroot:
+ case Intrinsic::gcwrite:
+ case Intrinsic::gcread:
+ if (ID == Intrinsic::gcroot) {
+ AllocaInst *AI =
+ dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+ Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+ Assert1(isa<Constant>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #2 must be a constant.", &CI);
+ if (!AI->getType()->getElementType()->isPointerTy()) {
+ Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #1 must either be a pointer alloca, "
+ "or argument #2 must be a non-null constant.", &CI);
+ }
+ }
+
+ Assert1(CI.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", &CI);
+ break;
+ case Intrinsic::init_trampoline:
+ Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.init_trampoline parameter #2 must resolve to a function.",
+ &CI);
+ break;
+ case Intrinsic::prefetch:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+ isa<ConstantInt>(CI.getArgOperand(2)) &&
+ cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch",
+ &CI);
+ break;
+ case Intrinsic::stackprotector:
+ Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.",
+ &CI);
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
+ "size argument of memory use markers must be a constant integer",
+ &CI);
+ break;
+ case Intrinsic::invariant_end:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+ "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+ return new Verifier(action);
+}
+
+
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot verify external functions");
+
+ FunctionPassManager FPM(F.getParent());
+ Verifier *V = new Verifier(action);
+ FPM.add(V);
+ FPM.run(F);
+ return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+ std::string *ErrorInfo) {
+ PassManager PM;
+ Verifier *V = new Verifier(action);
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+
+ if (ErrorInfo && V->Broken)
+ *ErrorInfo = V->MessagesStr.str();
+ return V->Broken;
+}
diff --git a/contrib/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm/lib/IRReader/IRReader.cpp
new file mode 100644
index 000000000000..eeec14e834c1
--- /dev/null
+++ b/contrib/llvm/lib/IRReader/IRReader.cpp
@@ -0,0 +1,89 @@
+//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+namespace llvm {
+ extern bool TimePassesIsEnabled;
+}
+
+static const char *TimeIRParsingGroupName = "LLVM IR Parsing";
+static const char *TimeIRParsingName = "Parse IR";
+
+
+Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+ (const unsigned char *)Buffer->getBufferEnd())) {
+ std::string ErrMsg;
+ Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
+ if (M == 0) {
+ Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+ ErrMsg);
+ // ParseBitcodeFile does not take ownership of the Buffer in the
+ // case of an error.
+ delete Buffer;
+ }
+ return M;
+ }
+
+ return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return getLazyIRModule(File.take(), Err, Context);
+}
+
+Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
+ TimePassesIsEnabled);
+ if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+ (const unsigned char *)Buffer->getBufferEnd())) {
+ std::string ErrMsg;
+ Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+ if (M == 0)
+ Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+ ErrMsg);
+ // ParseBitcodeFile does not take ownership of the Buffer.
+ delete Buffer;
+ return M;
+ }
+
+ return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return ParseIR(File.take(), Err, Context);
+}
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
new file mode 100644
index 000000000000..74cbdadd61eb
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -0,0 +1,1322 @@
+//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM module linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm-c/Linker.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TypeMapTy : public ValueMapTypeRemapper {
+ /// MappedTypes - This is a mapping from a source type to a destination type
+ /// to use.
+ DenseMap<Type*, Type*> MappedTypes;
+
+ /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
+ /// we speculatively add types to MappedTypes, but keep track of them here in
+ /// case we need to roll back.
+ SmallVector<Type*, 16> SpeculativeTypes;
+
+ /// SrcDefinitionsToResolve - This is a list of non-opaque structs in the
+ /// source module that are mapped to an opaque struct in the destination
+ /// module.
+ SmallVector<StructType*, 16> SrcDefinitionsToResolve;
+
+ /// DstResolvedOpaqueTypes - This is the set of opaque types in the
+ /// destination modules who are getting a body from the source module.
+ SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
+
+public:
+ /// addTypeMapping - Indicate that the specified type in the destination
+ /// module is conceptually equivalent to the specified type in the source
+ /// module.
+ void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+ /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+ /// module from a type definition in the source module.
+ void linkDefinedTypeBodies();
+
+ /// get - Return the mapped type to use for the specified input type from the
+ /// source module.
+ Type *get(Type *SrcTy);
+
+ FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+
+ /// dump - Dump out the type map for debugging purposes.
+ void dump() const {
+ for (DenseMap<Type*, Type*>::const_iterator
+ I = MappedTypes.begin(), E = MappedTypes.end(); I != E; ++I) {
+ dbgs() << "TypeMap: ";
+ I->first->dump();
+ dbgs() << " => ";
+ I->second->dump();
+ dbgs() << '\n';
+ }
+ }
+
+private:
+ Type *getImpl(Type *T);
+ /// remapType - Implement the ValueMapTypeRemapper interface.
+ Type *remapType(Type *SrcTy) {
+ return get(SrcTy);
+ }
+
+ bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
+
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+ Type *&Entry = MappedTypes[SrcTy];
+ if (Entry) return;
+
+ if (DstTy == SrcTy) {
+ Entry = DstTy;
+ return;
+ }
+
+ // Check to see if these types are recursively isomorphic and establish a
+ // mapping between them if so.
+ if (!areTypesIsomorphic(DstTy, SrcTy)) {
+ // Oops, they aren't isomorphic. Just discard this request by rolling out
+ // any speculative mappings we've established.
+ for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i)
+ MappedTypes.erase(SpeculativeTypes[i]);
+ }
+ SpeculativeTypes.clear();
+}
+
+/// areTypesIsomorphic - Recursively walk this pair of types, returning true
+/// if they are isomorphic, false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+ // Two types with differing kinds are clearly not isomorphic.
+ if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
+
+ // If we have an entry in the MappedTypes table, then we have our answer.
+ Type *&Entry = MappedTypes[SrcTy];
+ if (Entry)
+ return Entry == DstTy;
+
+ // Two identical types are clearly isomorphic. Remember this
+ // non-speculatively.
+ if (DstTy == SrcTy) {
+ Entry = DstTy;
+ return true;
+ }
+
+ // Okay, we have two types with identical kinds that we haven't seen before.
+
+ // If this is an opaque struct type, special case it.
+ if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+ // Mapping an opaque type to any struct, just keep the dest struct.
+ if (SSTy->isOpaque()) {
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+ return true;
+ }
+
+ // Mapping a non-opaque source type to an opaque dest. If this is the first
+ // type that we're mapping onto this destination type then we succeed. Keep
+ // the dest, but fill it in later. This doesn't need to be speculative. If
+ // this is the second (different) type that we're trying to map onto the
+ // same opaque type then we fail.
+ if (cast<StructType>(DstTy)->isOpaque()) {
+ // We can only map one source type onto the opaque destination type.
+ if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)))
+ return false;
+ SrcDefinitionsToResolve.push_back(SSTy);
+ Entry = DstTy;
+ return true;
+ }
+ }
+
+ // If the number of subtypes disagree between the two types, then we fail.
+ if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+ return false;
+
+ // Fail if any of the extra properties (e.g. array size) of the type disagree.
+ if (isa<IntegerType>(DstTy))
+ return false; // bitwidth disagrees.
+ if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+ if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+ return false;
+
+ } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+ if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+ return false;
+ } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+ StructType *SSTy = cast<StructType>(SrcTy);
+ if (DSTy->isLiteral() != SSTy->isLiteral() ||
+ DSTy->isPacked() != SSTy->isPacked())
+ return false;
+ } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+ if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+ return false;
+ } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+ if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
+ return false;
+ }
+
+ // Otherwise, we speculate that these two types will line up and recursively
+ // check the subelements.
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+
+ for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i)
+ if (!areTypesIsomorphic(DstTy->getContainedType(i),
+ SrcTy->getContainedType(i)))
+ return false;
+
+ // If everything seems to have lined up, then everything is great.
+ return true;
+}
+
+/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+/// module from a type definition in the source module.
+void TypeMapTy::linkDefinedTypeBodies() {
+ SmallVector<Type*, 16> Elements;
+ SmallString<16> TmpName;
+
+ // Note that processing entries in this loop (calling 'get') can add new
+ // entries to the SrcDefinitionsToResolve vector.
+ while (!SrcDefinitionsToResolve.empty()) {
+ StructType *SrcSTy = SrcDefinitionsToResolve.pop_back_val();
+ StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+
+ // TypeMap is a many-to-one mapping, if there were multiple types that
+ // provide a body for DstSTy then previous iterations of this loop may have
+ // already handled it. Just ignore this case.
+ if (!DstSTy->isOpaque()) continue;
+ assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
+
+ // Map the body of the source type over to a new body for the dest type.
+ Elements.resize(SrcSTy->getNumElements());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+ Elements[i] = getImpl(SrcSTy->getElementType(i));
+
+ DstSTy->setBody(Elements, SrcSTy->isPacked());
+
+ // If DstSTy has no name or has a longer name than STy, then viciously steal
+ // STy's name.
+ if (!SrcSTy->hasName()) continue;
+ StringRef SrcName = SrcSTy->getName();
+
+ if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
+ TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
+ SrcSTy->setName("");
+ DstSTy->setName(TmpName.str());
+ TmpName.clear();
+ }
+ }
+
+ DstResolvedOpaqueTypes.clear();
+}
+
+/// get - Return the mapped type to use for the specified input type from the
+/// source module.
+Type *TypeMapTy::get(Type *Ty) {
+ Type *Result = getImpl(Ty);
+
+ // If this caused a reference to any struct type, resolve it before returning.
+ if (!SrcDefinitionsToResolve.empty())
+ linkDefinedTypeBodies();
+ return Result;
+}
+
+/// getImpl - This is the recursive version of get().
+Type *TypeMapTy::getImpl(Type *Ty) {
+ // If we already have an entry for this type, return it.
+ Type **Entry = &MappedTypes[Ty];
+ if (*Entry) return *Entry;
+
+ // If this is not a named struct type, then just map all of the elements and
+ // then rebuild the type from inside out.
+ if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) {
+ // If there are no element types to map, then the type is itself. This is
+ // true for the anonymous {} struct, things like 'float', integers, etc.
+ if (Ty->getNumContainedTypes() == 0)
+ return *Entry = Ty;
+
+ // Remap all of the elements, keeping track of whether any of them change.
+ bool AnyChange = false;
+ SmallVector<Type*, 4> ElementTypes;
+ ElementTypes.resize(Ty->getNumContainedTypes());
+ for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) {
+ ElementTypes[i] = getImpl(Ty->getContainedType(i));
+ AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
+ }
+
+ // If we found our type while recursively processing stuff, just use it.
+ Entry = &MappedTypes[Ty];
+ if (*Entry) return *Entry;
+
+ // If all of the element types mapped directly over, then the type is usable
+ // as-is.
+ if (!AnyChange)
+ return *Entry = Ty;
+
+ // Otherwise, rebuild a modified type.
+ switch (Ty->getTypeID()) {
+ default: llvm_unreachable("unknown derived type to remap");
+ case Type::ArrayTyID:
+ return *Entry = ArrayType::get(ElementTypes[0],
+ cast<ArrayType>(Ty)->getNumElements());
+ case Type::VectorTyID:
+ return *Entry = VectorType::get(ElementTypes[0],
+ cast<VectorType>(Ty)->getNumElements());
+ case Type::PointerTyID:
+ return *Entry = PointerType::get(ElementTypes[0],
+ cast<PointerType>(Ty)->getAddressSpace());
+ case Type::FunctionTyID:
+ return *Entry = FunctionType::get(ElementTypes[0],
+ makeArrayRef(ElementTypes).slice(1),
+ cast<FunctionType>(Ty)->isVarArg());
+ case Type::StructTyID:
+ // Note that this is only reached for anonymous structs.
+ return *Entry = StructType::get(Ty->getContext(), ElementTypes,
+ cast<StructType>(Ty)->isPacked());
+ }
+ }
+
+ // Otherwise, this is an unmapped named struct. If the struct can be directly
+ // mapped over, just use it as-is. This happens in a case when the linked-in
+ // module has something like:
+ // %T = type {%T*, i32}
+ // @GV = global %T* null
+ // where T does not exist at all in the destination module.
+ //
+ // The other case we watch for is when the type is not in the destination
+ // module, but that it has to be rebuilt because it refers to something that
+ // is already mapped. For example, if the destination module has:
+ // %A = type { i32 }
+ // and the source module has something like
+ // %A' = type { i32 }
+ // %B = type { %A'* }
+ // @GV = global %B* null
+ // then we want to create a new type: "%B = type { %A*}" and have it take the
+ // pristine "%B" name from the source module.
+ //
+ // To determine which case this is, we have to recursively walk the type graph
+ // speculating that we'll be able to reuse it unmodified. Only if this is
+ // safe would we map the entire thing over. Because this is an optimization,
+ // and is not required for the prettiness of the linked module, we just skip
+ // it and always rebuild a type here.
+ StructType *STy = cast<StructType>(Ty);
+
+ // If the type is opaque, we can just use it directly.
+ if (STy->isOpaque())
+ return *Entry = STy;
+
+ // Otherwise we create a new type and resolve its body later. This will be
+ // resolved by the top level of get().
+ SrcDefinitionsToResolve.push_back(STy);
+ StructType *DTy = StructType::create(STy->getContext());
+ DstResolvedOpaqueTypes.insert(DTy);
+ return *Entry = DTy;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuleLinker implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// ModuleLinker - This is an implementation class for the LinkModules
+ /// function, which is the entrypoint for this file.
+ class ModuleLinker {
+ Module *DstM, *SrcM;
+
+ TypeMapTy TypeMap;
+
+ /// ValueMap - Mapping of values from what they used to be in Src, to what
+ /// they are now in DstM. ValueToValueMapTy is a ValueMap, which involves
+ /// some overhead due to the use of Value handles which the Linker doesn't
+ /// actually need, but this allows us to reuse the ValueMapper code.
+ ValueToValueMapTy ValueMap;
+
+ struct AppendingVarInfo {
+ GlobalVariable *NewGV; // New aggregate global in dest module.
+ Constant *DstInit; // Old initializer from dest module.
+ Constant *SrcInit; // Old initializer from src module.
+ };
+
+ std::vector<AppendingVarInfo> AppendingVars;
+
+ unsigned Mode; // Mode to treat source module.
+
+ // Set of items not to link in from source.
+ SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
+
+ // Vector of functions to lazily link in.
+ std::vector<Function*> LazilyLinkFunctions;
+
+ public:
+ std::string ErrorMsg;
+
+ ModuleLinker(Module *dstM, Module *srcM, unsigned mode)
+ : DstM(dstM), SrcM(srcM), Mode(mode) { }
+
+ bool run();
+
+ private:
+ /// emitError - Helper method for setting a message and returning an error
+ /// code.
+ bool emitError(const Twine &Message) {
+ ErrorMsg = Message.str();
+ return true;
+ }
+
+ /// getLinkageResult - This analyzes the two global values and determines
+ /// what the result will look like in the destination module.
+ bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+ GlobalValue::LinkageTypes &LT,
+ GlobalValue::VisibilityTypes &Vis,
+ bool &LinkFromSrc);
+
+ /// getLinkedToGlobal - Given a global in the source module, return the
+ /// global in the destination module that is being linked to, if any.
+ GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+ // If the source has no name it can't link. If it has local linkage,
+ // there is no name match-up going on.
+ if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+ return 0;
+
+ // Otherwise see if we have a match in the destination module's symtab.
+ GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+ if (DGV == 0) return 0;
+
+ // If we found a global with the same name in the dest module, but it has
+ // internal linkage, we are really not doing any linkage here.
+ if (DGV->hasLocalLinkage())
+ return 0;
+
+ // Otherwise, we do in fact link to the destination global.
+ return DGV;
+ }
+
+ void computeTypeMapping();
+
+ bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
+ bool linkGlobalProto(GlobalVariable *SrcGV);
+ bool linkFunctionProto(Function *SrcF);
+ bool linkAliasProto(GlobalAlias *SrcA);
+ bool linkModuleFlagsMetadata();
+
+ void linkAppendingVarInit(const AppendingVarInfo &AVI);
+ void linkGlobalInits();
+ void linkFunctionBody(Function *Dst, Function *Src);
+ void linkAliasBodies();
+ void linkNamedMDNodes();
+ };
+}
+
+/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+/// in the symbol table. This is good for all clients except for us. Go
+/// through the trouble to force this back.
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+ // If the global doesn't force its name or if it already has the right name,
+ // there is nothing for us to do.
+ if (GV->hasLocalLinkage() || GV->getName() == Name)
+ return;
+
+ Module *M = GV->getParent();
+
+ // If there is a conflict, rename the conflict.
+ if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
+ GV->takeName(ConflictGV);
+ ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
+ assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
+ } else {
+ GV->setName(Name); // Force the name back
+ }
+}
+
+/// copyGVAttributes - copy additional attributes (those not needed to construct
+/// a GlobalValue) from the SrcGV to the DestGV.
+static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+ // Use the maximum alignment, rather than just copying the alignment of SrcGV.
+ unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+ DestGV->copyAttributesFrom(SrcGV);
+ DestGV->setAlignment(Alignment);
+
+ forceRenaming(DestGV, SrcGV->getName());
+}
+
+static bool isLessConstraining(GlobalValue::VisibilityTypes a,
+ GlobalValue::VisibilityTypes b) {
+ if (a == GlobalValue::HiddenVisibility)
+ return false;
+ if (b == GlobalValue::HiddenVisibility)
+ return true;
+ if (a == GlobalValue::ProtectedVisibility)
+ return false;
+ if (b == GlobalValue::ProtectedVisibility)
+ return true;
+ return false;
+}
+
+/// getLinkageResult - This analyzes the two global values and determines what
+/// the result will look like in the destination module. In particular, it
+/// computes the resultant linkage type and visibility, computes whether the
+/// global in the source should be copied over to the destination (replacing
+/// the existing one), and computes whether this linkage is an error or not.
+bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+ GlobalValue::LinkageTypes &LT,
+ GlobalValue::VisibilityTypes &Vis,
+ bool &LinkFromSrc) {
+ assert(Dest && "Must have two globals being queried");
+ assert(!Src->hasLocalLinkage() &&
+ "If Src has internal linkage, Dest shouldn't be set!");
+
+ bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable();
+ bool DestIsDeclaration = Dest->isDeclaration();
+
+ if (SrcIsDeclaration) {
+ // If Src is external or if both Src & Dest are external.. Just link the
+ // external globals, we aren't adding anything.
+ if (Src->hasDLLImportLinkage()) {
+ // If one of GVs has DLLImport linkage, result should be dllimport'ed.
+ if (DestIsDeclaration) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ }
+ } else if (Dest->hasExternalWeakLinkage()) {
+ // If the Dest is weak, use the source linkage.
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) {
+ // If Dest is external but Src is not:
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else if (Src->isWeakForLinker()) {
+ // At this point we know that Dest has LinkOnce, External*, Weak, Common,
+ // or DLL* linkage.
+ if (Dest->hasExternalWeakLinkage() ||
+ Dest->hasAvailableExternallyLinkage() ||
+ (Dest->hasLinkOnceLinkage() &&
+ (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (Dest->isWeakForLinker()) {
+ // At this point we know that Src has External* or DLL* linkage.
+ if (Src->hasExternalWeakLinkage()) {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ } else {
+ LinkFromSrc = true;
+ LT = GlobalValue::ExternalLinkage;
+ }
+ } else {
+ assert((Dest->hasExternalLinkage() || Dest->hasDLLImportLinkage() ||
+ Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) &&
+ (Src->hasExternalLinkage() || Src->hasDLLImportLinkage() ||
+ Src->hasDLLExportLinkage() || Src->hasExternalWeakLinkage()) &&
+ "Unexpected linkage type!");
+ return emitError("Linking globals named '" + Src->getName() +
+ "': symbol multiply defined!");
+ }
+
+ // Compute the visibility. We follow the rules in the System V Application
+ // Binary Interface.
+ Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ?
+ Dest->getVisibility() : Src->getVisibility();
+ return false;
+}
+
+/// computeTypeMapping - Loop over all of the linked values to compute type
+/// mappings. For example, if we link "extern Foo *x" and "Foo *x = NULL", then
+/// we have two struct types 'Foo' but one got renamed when the module was
+/// loaded into the same LLVMContext.
+void ModuleLinker::computeTypeMapping() {
+ // Incorporate globals.
+ for (Module::global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I) {
+ GlobalValue *DGV = getLinkedToGlobal(I);
+ if (DGV == 0) continue;
+
+ if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
+ TypeMap.addTypeMapping(DGV->getType(), I->getType());
+ continue;
+ }
+
+ // Unify the element type of appending arrays.
+ ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+ ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
+ TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+ }
+
+ // Incorporate functions.
+ for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
+ if (GlobalValue *DGV = getLinkedToGlobal(I))
+ TypeMap.addTypeMapping(DGV->getType(), I->getType());
+ }
+
+ // Incorporate types by name, scanning all the types in the source module.
+ // At this point, the destination module may have a type "%foo = { i32 }" for
+ // example. When the source module got loaded into the same LLVMContext, if
+ // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
+ TypeFinder SrcStructTypes;
+ SrcStructTypes.run(*SrcM, true);
+ SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
+ SrcStructTypes.end());
+
+ TypeFinder DstStructTypes;
+ DstStructTypes.run(*DstM, true);
+ SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
+ DstStructTypes.end());
+
+ for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
+ StructType *ST = SrcStructTypes[i];
+ if (!ST->hasName()) continue;
+
+ // Check to see if there is a dot in the name followed by a digit.
+ size_t DotPos = ST->getName().rfind('.');
+ if (DotPos == 0 || DotPos == StringRef::npos ||
+ ST->getName().back() == '.' ||
+ !isdigit(static_cast<unsigned char>(ST->getName()[DotPos+1])))
+ continue;
+
+ // Check to see if the destination module has a struct with the prefix name.
+ if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos)))
+ // Don't use it if this actually came from the source module. They're in
+ // the same LLVMContext after all. Also don't use it unless the type is
+ // actually used in the destination module. This can happen in situations
+ // like this:
+ //
+ // Module A Module B
+ // -------- --------
+ // %Z = type { %A } %B = type { %C.1 }
+ // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* }
+ // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] }
+ // %C = type { i8* } %B.3 = type { %C.1 }
+ //
+ // When we link Module B with Module A, the '%B' in Module B is
+ // used. However, that would then use '%C.1'. But when we process '%C.1',
+ // we prefer to take the '%C' version. So we are then left with both
+ // '%C.1' and '%C' being used for the same types. This leads to some
+ // variables using one type and some using the other.
+ if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+ TypeMap.addTypeMapping(DST, ST);
+ }
+
+ // Don't bother incorporating aliases, they aren't generally typed well.
+
+ // Now that we have discovered all of the type equivalences, get a body for
+ // any 'opaque' types in the dest module that are now resolved.
+ TypeMap.linkDefinedTypeBodies();
+}
+
+/// linkAppendingVarProto - If there were any appending global variables, link
+/// them together now. Return true on error.
+bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+ GlobalVariable *SrcGV) {
+
+ if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+ return emitError("Linking globals named '" + SrcGV->getName() +
+ "': can only link appending global with another appending global!");
+
+ ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+ ArrayType *SrcTy =
+ cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+ Type *EltTy = DstTy->getElementType();
+
+ // Check to see that they two arrays agree on type.
+ if (EltTy != SrcTy->getElementType())
+ return emitError("Appending variables with different element types!");
+ if (DstGV->isConstant() != SrcGV->isConstant())
+ return emitError("Appending variables linked with different const'ness!");
+
+ if (DstGV->getAlignment() != SrcGV->getAlignment())
+ return emitError(
+ "Appending variables with different alignment need to be linked!");
+
+ if (DstGV->getVisibility() != SrcGV->getVisibility())
+ return emitError(
+ "Appending variables with different visibility need to be linked!");
+
+ if (DstGV->getSection() != SrcGV->getSection())
+ return emitError(
+ "Appending variables with different section name need to be linked!");
+
+ uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
+ ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+
+ // Create the new global variable.
+ GlobalVariable *NG =
+ new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
+ DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+ DstGV->getThreadLocalMode(),
+ DstGV->getType()->getAddressSpace());
+
+ // Propagate alignment, visibility and section info.
+ copyGVAttributes(NG, DstGV);
+
+ AppendingVarInfo AVI;
+ AVI.NewGV = NG;
+ AVI.DstInit = DstGV->getInitializer();
+ AVI.SrcInit = SrcGV->getInitializer();
+ AppendingVars.push_back(AVI);
+
+ // Replace any uses of the two global variables with uses of the new
+ // global.
+ ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+ DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+ DstGV->eraseFromParent();
+
+ // Track the source variable so we don't try to link it.
+ DoNotLinkFromSource.insert(SrcGV);
+
+ return false;
+}
+
+/// linkGlobalProto - Loop through the global variables in the src module and
+/// merge them into the dest module.
+bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+ GlobalValue *DGV = getLinkedToGlobal(SGV);
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
+ if (DGV) {
+ // Concatenation of appending linkage variables is magic and handled later.
+ if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+ return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+
+ // Determine whether linkage of these two globals follows the source
+ // module's definition or the destination module's definition.
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
+ bool LinkFromSrc = false;
+ if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+
+ // If we're not linking from the source, then keep the definition that we
+ // have.
+ if (!LinkFromSrc) {
+ // Special case for const propagation.
+ if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+ if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+ DGVar->setConstant(true);
+
+ // Set calculated linkage and visibility.
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
+
+ // Track the source global so that we don't attempt to copy it over when
+ // processing global initializers.
+ DoNotLinkFromSource.insert(SGV);
+
+ return false;
+ }
+ }
+
+ // No linking to be performed or linking from the source: simply create an
+ // identical version of the symbol over in the dest module... the
+ // initializer will be filled in later by LinkGlobalInits.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
+ SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+ SGV->getName(), /*insertbefore*/0,
+ SGV->getThreadLocalMode(),
+ SGV->getType()->getAddressSpace());
+ // Propagate alignment, visibility and section info.
+ copyGVAttributes(NewDGV, SGV);
+ if (NewVisibility)
+ NewDGV->setVisibility(*NewVisibility);
+
+ if (DGV) {
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = NewDGV;
+ return false;
+}
+
+/// linkFunctionProto - Link the function in the source module into the
+/// destination module if needed, setting up mapping information.
+bool ModuleLinker::linkFunctionProto(Function *SF) {
+ GlobalValue *DGV = getLinkedToGlobal(SF);
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
+ if (DGV) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ GlobalValue::VisibilityTypes NV;
+ if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+
+ // Make sure to remember this mapping.
+ ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+
+ // Track the function from the source module so we don't attempt to remap
+ // it.
+ DoNotLinkFromSource.insert(SF);
+
+ return false;
+ }
+ }
+
+ // If there is no linkage to be performed or we are linking from the source,
+ // bring SF over.
+ Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
+ SF->getLinkage(), SF->getName(), DstM);
+ copyGVAttributes(NewDF, SF);
+ if (NewVisibility)
+ NewDF->setVisibility(*NewVisibility);
+
+ if (DGV) {
+ // Any uses of DF need to change to NewDF, with cast.
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+ DGV->eraseFromParent();
+ } else {
+ // Internal, LO_ODR, or LO linkage - stick in set to ignore and lazily link.
+ if (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() ||
+ SF->hasAvailableExternallyLinkage()) {
+ DoNotLinkFromSource.insert(SF);
+ LazilyLinkFunctions.push_back(SF);
+ }
+ }
+
+ ValueMap[SF] = NewDF;
+ return false;
+}
+
+/// LinkAliasProto - Set up prototypes for any aliases that come over from the
+/// source module.
+bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
+ GlobalValue *DGV = getLinkedToGlobal(SGA);
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
+ if (DGV) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
+ bool LinkFromSrc = false;
+ if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage.
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+
+ // Make sure to remember this mapping.
+ ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
+
+ // Track the alias from the source module so we don't attempt to remap it.
+ DoNotLinkFromSource.insert(SGA);
+
+ return false;
+ }
+ }
+
+ // If there is no linkage to be performed or we're linking from the source,
+ // bring over SGA.
+ GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
+ SGA->getLinkage(), SGA->getName(),
+ /*aliasee*/0, DstM);
+ copyGVAttributes(NewDA, SGA);
+ if (NewVisibility)
+ NewDA->setVisibility(*NewVisibility);
+
+ if (DGV) {
+ // Any uses of DGV need to change to NewDA, with cast.
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ ValueMap[SGA] = NewDA;
+ return false;
+}
+
+static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
+ unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
+
+ for (unsigned i = 0; i != NumElements; ++i)
+ Dest.push_back(C->getAggregateElement(i));
+}
+
+void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
+ // Merge the initializer.
+ SmallVector<Constant*, 16> Elements;
+ getArrayElements(AVI.DstInit, Elements);
+
+ Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
+ getArrayElements(SrcInit, Elements);
+
+ ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
+ AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+}
+
+/// linkGlobalInits - Update the initializers in the Dest module now that all
+/// globals that may be referenced are in Dest.
+void ModuleLinker::linkGlobalInits() {
+ // Loop over all of the globals in the src module, mapping them over as we go
+ for (Module::const_global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I) {
+
+ // Only process initialized GV's or ones not already in dest.
+ if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue;
+
+ // Grab destination global variable.
+ GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
+ // Figure out what the initializer looks like in the dest module.
+ DGV->setInitializer(MapValue(I->getInitializer(), ValueMap,
+ RF_None, &TypeMap));
+ }
+}
+
+/// linkFunctionBody - Copy the source function over into the dest function and
+/// fix up references to values. At this point we know that Dest is an external
+/// function, and that Src is not.
+void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
+ assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
+
+ // Go through and convert function arguments over, remembering the mapping.
+ Function::arg_iterator DI = Dst->arg_begin();
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I, ++DI) {
+ DI->setName(I->getName()); // Copy the name over.
+
+ // Add a mapping to our mapping.
+ ValueMap[I] = DI;
+ }
+
+ if (Mode == Linker::DestroySource) {
+ // Splice the body of the source function into the dest function.
+ Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
+
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions.
+ for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
+
+ } else {
+ // Clone the body of the function into the dest function.
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
+ CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap);
+ }
+
+ // There is no need to map the arguments anymore.
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I)
+ ValueMap.erase(I);
+
+}
+
+/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
+void ModuleLinker::linkAliasBodies() {
+ for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
+ I != E; ++I) {
+ if (DoNotLinkFromSource.count(I))
+ continue;
+ if (Constant *Aliasee = I->getAliasee()) {
+ GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
+ DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
+ }
+ }
+}
+
+/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest
+/// module.
+void ModuleLinker::linkNamedMDNodes() {
+ const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
+ for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
+ E = SrcM->named_metadata_end(); I != E; ++I) {
+ // Don't link module flags here. Do them separately.
+ if (&*I == SrcModFlags) continue;
+ NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+ // Add Src elements into Dest node.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap,
+ RF_None, &TypeMap));
+ }
+}
+
+/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
+/// module.
+bool ModuleLinker::linkModuleFlagsMetadata() {
+ // If the source module has no module flags, we are done.
+ const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
+ if (!SrcModFlags) return false;
+
+ // If the destination module doesn't have module flags yet, then just copy
+ // over the source module's flags.
+ NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
+ if (DstModFlags->getNumOperands() == 0) {
+ for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
+ DstModFlags->addOperand(SrcModFlags->getOperand(I));
+
+ return false;
+ }
+
+ // First build a map of the existing module flags and requirements.
+ DenseMap<MDString*, MDNode*> Flags;
+ SmallSetVector<MDNode*, 16> Requirements;
+ for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *Op = DstModFlags->getOperand(I);
+ ConstantInt *Behavior = cast<ConstantInt>(Op->getOperand(0));
+ MDString *ID = cast<MDString>(Op->getOperand(1));
+
+ if (Behavior->getZExtValue() == Module::Require) {
+ Requirements.insert(cast<MDNode>(Op->getOperand(2)));
+ } else {
+ Flags[ID] = Op;
+ }
+ }
+
+ // Merge in the flags from the source module, and also collect its set of
+ // requirements.
+ bool HasErr = false;
+ for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *SrcOp = SrcModFlags->getOperand(I);
+ ConstantInt *SrcBehavior = cast<ConstantInt>(SrcOp->getOperand(0));
+ MDString *ID = cast<MDString>(SrcOp->getOperand(1));
+ MDNode *DstOp = Flags.lookup(ID);
+ unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
+
+ // If this is a requirement, add it and continue.
+ if (SrcBehaviorValue == Module::Require) {
+ // If the destination module does not already have this requirement, add
+ // it.
+ if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
+ DstModFlags->addOperand(SrcOp);
+ }
+ continue;
+ }
+
+ // If there is no existing flag with this ID, just add it.
+ if (!DstOp) {
+ Flags[ID] = SrcOp;
+ DstModFlags->addOperand(SrcOp);
+ continue;
+ }
+
+ // Otherwise, perform a merge.
+ ConstantInt *DstBehavior = cast<ConstantInt>(DstOp->getOperand(0));
+ unsigned DstBehaviorValue = DstBehavior->getZExtValue();
+
+ // If either flag has override behavior, handle it first.
+ if (DstBehaviorValue == Module::Override) {
+ // Diagnose inconsistent flags which both have override behavior.
+ if (SrcBehaviorValue == Module::Override &&
+ SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ HasErr |= emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting override values");
+ }
+ continue;
+ } else if (SrcBehaviorValue == Module::Override) {
+ // Update the destination flag to that of the source.
+ DstOp->replaceOperandWith(0, SrcBehavior);
+ DstOp->replaceOperandWith(2, SrcOp->getOperand(2));
+ continue;
+ }
+
+ // Diagnose inconsistent merge behavior types.
+ if (SrcBehaviorValue != DstBehaviorValue) {
+ HasErr |= emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting behaviors");
+ continue;
+ }
+
+ // Perform the merge for standard behavior types.
+ switch (SrcBehaviorValue) {
+ case Module::Require:
+ case Module::Override: assert(0 && "not possible"); break;
+ case Module::Error: {
+ // Emit an error if the values differ.
+ if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ HasErr |= emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting values");
+ }
+ continue;
+ }
+ case Module::Warning: {
+ // Emit a warning if the values differ.
+ if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ errs() << "WARNING: linking module flags '" << ID->getString()
+ << "': IDs have conflicting values";
+ }
+ continue;
+ }
+ case Module::Append: {
+ MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+ unsigned NumOps = DstValue->getNumOperands() + SrcValue->getNumOperands();
+ Value **VP, **Values = VP = new Value*[NumOps];
+ for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i, ++VP)
+ *VP = DstValue->getOperand(i);
+ for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i, ++VP)
+ *VP = SrcValue->getOperand(i);
+ DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(),
+ ArrayRef<Value*>(Values,
+ NumOps)));
+ delete[] Values;
+ break;
+ }
+ case Module::AppendUnique: {
+ SmallSetVector<Value*, 16> Elts;
+ MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+ for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i)
+ Elts.insert(DstValue->getOperand(i));
+ for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i)
+ Elts.insert(SrcValue->getOperand(i));
+ DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(),
+ ArrayRef<Value*>(Elts.begin(),
+ Elts.end())));
+ break;
+ }
+ }
+ }
+
+ // Check all of the requirements.
+ for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+ MDNode *Requirement = Requirements[I];
+ MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+ Value *ReqValue = Requirement->getOperand(1);
+
+ MDNode *Op = Flags[Flag];
+ if (!Op || Op->getOperand(2) != ReqValue) {
+ HasErr |= emitError("linking module flags '" + Flag->getString() +
+ "': does not have the required value");
+ continue;
+ }
+ }
+
+ return HasErr;
+}
+
+bool ModuleLinker::run() {
+ assert(DstM && "Null destination module");
+ assert(SrcM && "Null source module");
+
+ // Inherit the target data from the source module if the destination module
+ // doesn't have one already.
+ if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty())
+ DstM->setDataLayout(SrcM->getDataLayout());
+
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+ DstM->setTargetTriple(SrcM->getTargetTriple());
+
+ if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
+ SrcM->getDataLayout() != DstM->getDataLayout())
+ errs() << "WARNING: Linking two modules of different data layouts!\n";
+ if (!SrcM->getTargetTriple().empty() &&
+ DstM->getTargetTriple() != SrcM->getTargetTriple()) {
+ errs() << "WARNING: Linking two modules of different target triples: ";
+ if (!SrcM->getModuleIdentifier().empty())
+ errs() << SrcM->getModuleIdentifier() << ": ";
+ errs() << "'" << SrcM->getTargetTriple() << "' and '"
+ << DstM->getTargetTriple() << "'\n";
+ }
+
+ // Append the module inline asm string.
+ if (!SrcM->getModuleInlineAsm().empty()) {
+ if (DstM->getModuleInlineAsm().empty())
+ DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
+ else
+ DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
+ SrcM->getModuleInlineAsm());
+ }
+
+ // Loop over all of the linked values to compute type mappings.
+ computeTypeMapping();
+
+ // Insert all of the globals in src into the DstM module... without linking
+ // initializers (which could refer to functions not yet mapped over).
+ for (Module::global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I)
+ if (linkGlobalProto(I))
+ return true;
+
+ // Link the functions together between the two modules, without doing function
+ // bodies... this just adds external function prototypes to the DstM
+ // function... We do this so that when we begin processing function bodies,
+ // all of the global values that may be referenced are available in our
+ // ValueMap.
+ for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
+ if (linkFunctionProto(I))
+ return true;
+
+ // If there were any aliases, link them now.
+ for (Module::alias_iterator I = SrcM->alias_begin(),
+ E = SrcM->alias_end(); I != E; ++I)
+ if (linkAliasProto(I))
+ return true;
+
+ for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
+ linkAppendingVarInit(AppendingVars[i]);
+
+ // Update the initializers in the DstM module now that all globals that may
+ // be referenced are in DstM.
+ linkGlobalInits();
+
+ // Link in the function bodies that are defined in the source module into
+ // DstM.
+ for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
+ // Skip if not linking from source.
+ if (DoNotLinkFromSource.count(SF)) continue;
+
+ // Skip if no body (function is external) or materialize.
+ if (SF->isDeclaration()) {
+ if (!SF->isMaterializable())
+ continue;
+ if (SF->Materialize(&ErrorMsg))
+ return true;
+ }
+
+ linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+ SF->Dematerialize();
+ }
+
+ // Resolve all uses of aliases with aliasees.
+ linkAliasBodies();
+
+ // Remap all of the named MDNodes in Src into the DstM module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ linkNamedMDNodes();
+
+ // Merge the module flags into the DstM module.
+ if (linkModuleFlagsMetadata())
+ return true;
+
+ // Process vector of lazily linked in functions.
+ bool LinkedInAnyFunctions;
+ do {
+ LinkedInAnyFunctions = false;
+
+ for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+ E = LazilyLinkFunctions.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+
+ Function *SF = *I;
+ Function *DF = cast<Function>(ValueMap[SF]);
+
+ if (!DF->use_empty()) {
+
+ // Materialize if necessary.
+ if (SF->isDeclaration()) {
+ if (!SF->isMaterializable())
+ continue;
+ if (SF->Materialize(&ErrorMsg))
+ return true;
+ }
+
+ // Link in function body.
+ linkFunctionBody(DF, SF);
+ SF->Dematerialize();
+
+ // "Remove" from vector by setting the element to 0.
+ *I = 0;
+
+ // Set flag to indicate we may have more functions to lazily link in
+ // since we linked in a function.
+ LinkedInAnyFunctions = true;
+ }
+ }
+ } while (LinkedInAnyFunctions);
+
+ // Remove any prototypes of functions that were not actually linked in.
+ for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+ E = LazilyLinkFunctions.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+
+ Function *SF = *I;
+ Function *DF = cast<Function>(ValueMap[SF]);
+ if (DF->use_empty())
+ DF->eraseFromParent();
+ }
+
+ // Now that all of the types from the source are used, resolve any structs
+ // copied over to the dest that didn't exist there.
+ TypeMap.linkDefinedTypeBodies();
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// LinkModules entrypoint.
+//===----------------------------------------------------------------------===//
+
+/// LinkModules - This function links two modules together, with the resulting
+/// Dest module modified to be the composite of the two input modules. If an
+/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+/// the problem. Upon failure, the Dest module could be in a modified state,
+/// and shouldn't be relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
+ std::string *ErrorMsg) {
+ ModuleLinker TheLinker(Dest, Src, Mode);
+ if (TheLinker.run()) {
+ if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
+ return true;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// C API.
+//===----------------------------------------------------------------------===//
+
+LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
+ LLVMLinkerMode Mode, char **OutMessages) {
+ std::string Messages;
+ LLVMBool Result = Linker::LinkModules(unwrap(Dest), unwrap(Src),
+ Mode, OutMessages? &Messages : 0);
+ if (OutMessages)
+ *OutMessages = strdup(Messages.c_str());
+ return Result;
+}
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
new file mode 100644
index 000000000000..74d24f278b77
--- /dev/null
+++ b/contrib/llvm/lib/Linker/Linker.cpp
@@ -0,0 +1,70 @@
+//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic Linker functionality that all usages will need.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+Linker::Linker(StringRef progname, StringRef modname,
+ LLVMContext& C, unsigned flags):
+ Context(C),
+ Composite(new Module(modname, C)),
+ Flags(flags),
+ Error(),
+ ProgramName(progname) { }
+
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
+ Context(aModule->getContext()),
+ Composite(aModule),
+ Flags(flags),
+ Error(),
+ ProgramName(progname) { }
+
+Linker::~Linker() {
+ delete Composite;
+}
+
+bool
+Linker::error(StringRef message) {
+ Error = message;
+ if (!(Flags&QuietErrors))
+ errs() << ProgramName << ": error: " << message << "\n";
+ return true;
+}
+
+bool
+Linker::warning(StringRef message) {
+ Error = message;
+ if (!(Flags&QuietWarnings))
+ errs() << ProgramName << ": warning: " << message << "\n";
+ return false;
+}
+
+void
+Linker::verbose(StringRef message) {
+ if (Flags&Verbose)
+ errs() << " " << message << "\n";
+}
+
+Module*
+Linker::releaseModule() {
+ Module* result = Composite;
+ Error.clear();
+ Composite = 0;
+ Flags = 0;
+ return result;
+}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 000000000000..3d995484e7c7
--- /dev/null
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,1594 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+using namespace llvm;
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "reloc-info"
+
+namespace {
+class ELFObjectWriter : public MCObjectWriter {
+ protected:
+
+ static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+ static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+ bool Used, bool Renamed);
+ static bool isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc);
+ static bool IsELFMetaDataSection(const MCSectionData &SD);
+ static uint64_t DataSectionSize(const MCSectionData &SD);
+ static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+ static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+
+ void WriteDataSectionData(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionELF &Section);
+
+ /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }*/
+
+ /// ELFSymbolData - Helper struct for containing some precomputed
+ /// information on symbols.
+ struct ELFSymbolData {
+ MCSymbolData *SymbolData;
+ uint64_t StringIndex;
+ uint32_t SectionIndex;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFSymbolData &RHS) const {
+ if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+ return true;
+ if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+ return false;
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+ }
+ };
+
+ /// The target specific ELF writer instance.
+ llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+ SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+ SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+ DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+ llvm::DenseMap<const MCSectionData*,
+ std::vector<ELFRelocationEntry> > Relocations;
+ DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+ /// @}
+ /// @name Symbol Table Data
+ /// @{
+
+ SmallString<256> StringTable;
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
+ std::vector<ELFSymbolData> UndefinedSymbolData;
+
+ /// @}
+
+ bool NeedsGOT;
+
+ bool NeedsSymtabShndx;
+
+ // This holds the symbol table index of the last local symbol.
+ unsigned LastLocalSymbolIndex;
+ // This holds the .strtab section index.
+ unsigned StringTableIndex;
+ // This holds the .symtab section index.
+ unsigned SymbolTableIndex;
+
+ unsigned ShstrtabIndex;
+
+
+ const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+ // TargetObjectWriter wrappers.
+ const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return TargetObjectWriter->ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+ }
+ const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup,
+ IsPCRel);
+ }
+
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool hasRelocationAddend() const {
+ return TargetObjectWriter->hasRelocationAddend();
+ }
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return TargetObjectWriter->GetRelocType(Target, Fixup, IsPCRel,
+ IsRelocWithSymbol, Addend);
+ }
+
+ public:
+ ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS, bool IsLittleEndian)
+ : MCObjectWriter(_OS, IsLittleEndian),
+ TargetObjectWriter(MOTW),
+ NeedsGOT(false), NeedsSymtabShndx(false) {
+ }
+
+ virtual ~ELFObjectWriter();
+
+ void WriteWord(uint64_t W) {
+ if (is64Bit())
+ Write64(W);
+ else
+ Write32(W);
+ }
+
+ void StringLE16(char *buf, uint16_t Value) {
+ buf[0] = char(Value >> 0);
+ buf[1] = char(Value >> 8);
+ }
+
+ void StringLE32(char *buf, uint32_t Value) {
+ StringLE16(buf, uint16_t(Value >> 0));
+ StringLE16(buf + 2, uint16_t(Value >> 16));
+ }
+
+ void StringLE64(char *buf, uint64_t Value) {
+ StringLE32(buf, uint32_t(Value >> 0));
+ StringLE32(buf + 4, uint32_t(Value >> 32));
+ }
+
+ void StringBE16(char *buf ,uint16_t Value) {
+ buf[0] = char(Value >> 8);
+ buf[1] = char(Value >> 0);
+ }
+
+ void StringBE32(char *buf, uint32_t Value) {
+ StringBE16(buf, uint16_t(Value >> 16));
+ StringBE16(buf + 2, uint16_t(Value >> 0));
+ }
+
+ void StringBE64(char *buf, uint64_t Value) {
+ StringBE32(buf, uint32_t(Value >> 32));
+ StringBE32(buf + 4, uint32_t(Value >> 0));
+ }
+
+ void String8(MCDataFragment &F, uint8_t Value) {
+ char buf[1];
+ buf[0] = Value;
+ F.getContents().append(&buf[0], &buf[1]);
+ }
+
+ void String16(MCDataFragment &F, uint16_t Value) {
+ char buf[2];
+ if (isLittleEndian())
+ StringLE16(buf, Value);
+ else
+ StringBE16(buf, Value);
+ F.getContents().append(&buf[0], &buf[2]);
+ }
+
+ void String32(MCDataFragment &F, uint32_t Value) {
+ char buf[4];
+ if (isLittleEndian())
+ StringLE32(buf, Value);
+ else
+ StringBE32(buf, Value);
+ F.getContents().append(&buf[0], &buf[4]);
+ }
+
+ void String64(MCDataFragment &F, uint64_t Value) {
+ char buf[8];
+ if (isLittleEndian())
+ StringLE64(buf, Value);
+ else
+ StringBE64(buf, Value);
+ F.getContents().append(&buf[0], &buf[8]);
+ }
+
+ void WriteHeader(const MCAssembler &Asm,
+ uint64_t SectionDataSize,
+ unsigned NumberOfSections);
+
+ void WriteSymbolEntry(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ uint64_t name, uint8_t info,
+ uint64_t value, uint64_t size,
+ uint8_t other, uint32_t shndx,
+ bool Reserved);
+
+ void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout);
+
+ typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+ void WriteSymbolTable(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap);
+
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+ uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S);
+
+ // Map from a group section to the signature symbol
+ typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+ // Map from a signature symbol to the group section
+ typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+ // Map from a section to the section with the relocations
+ typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+ // Map from a section to its offset
+ typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+ /// ComputeSymbolTable - Compute the symbol table data
+ ///
+ /// \param Asm - The assembler.
+ /// \param SectionIndexMap - Maps a section to its index.
+ /// \param RevGroupMap - Maps a signature symbol to the group section.
+ /// \param NumRegularSections - Number of non-relocation sections.
+ void ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap,
+ unsigned NumRegularSections);
+
+ void ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ RelMapTy &RelMap);
+
+ void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+ const RelMapTy &RelMap);
+
+ void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ // Create the sections that show up in the symbol table. Currently
+ // those are the .note.GNU-stack section and the group sections.
+ void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap,
+ const SectionOffsetMapTy &SectionOffsetMap);
+
+ void ComputeSectionOrder(MCAssembler &Asm,
+ std::vector<const MCSectionELF*> &Sections);
+
+ void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Size, uint32_t Link, uint32_t Info,
+ uint64_t Alignment, uint64_t EntrySize);
+
+ void WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD);
+
+ virtual bool
+ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const;
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+ void WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size, uint64_t Alignment,
+ const MCSectionELF &Section);
+ };
+}
+
+bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI =
+ Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+ switch (Variant) {
+ default:
+ return false;
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_PLT:
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_GOTOFF:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_TLSLD:
+ return true;
+ }
+}
+
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
+// Emit the ELF header.
+void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
+ uint64_t SectionDataSize,
+ unsigned NumberOfSections) {
+ // ELF Header
+ // ----------
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the latter.
+
+ Write8(0x7f); // e_ident[EI_MAG0]
+ Write8('E'); // e_ident[EI_MAG1]
+ Write8('L'); // e_ident[EI_MAG2]
+ Write8('F'); // e_ident[EI_MAG3]
+
+ Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+ // e_ident[EI_DATA]
+ Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+ Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ // e_ident[EI_OSABI]
+ Write8(TargetObjectWriter->getOSABI());
+ Write8(0); // e_ident[EI_ABIVERSION]
+
+ WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+ Write16(ELF::ET_REL); // e_type
+
+ Write16(TargetObjectWriter->getEMachine()); // e_machine = target
+
+ Write32(ELF::EV_CURRENT); // e_version
+ WriteWord(0); // e_entry, no entry point in .o file
+ WriteWord(0); // e_phoff, no program header for .o
+ WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
+
+ // e_flags = whatever the target wants
+ Write32(Asm.getELFHeaderEFlags());
+
+ // e_ehsize = ELF header size
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+ Write16(0); // e_phentsize = prog header entry size
+ Write16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+ // e_shnum = # of section header ents
+ if (NumberOfSections >= ELF::SHN_LORESERVE)
+ Write16(ELF::SHN_UNDEF);
+ else
+ Write16(NumberOfSections);
+
+ // e_shstrndx = Section # of '.shstrtab'
+ if (ShstrtabIndex >= ELF::SHN_LORESERVE)
+ Write16(ELF::SHN_XINDEX);
+ else
+ Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ uint64_t name,
+ uint8_t info, uint64_t value,
+ uint64_t size, uint8_t other,
+ uint32_t shndx,
+ bool Reserved) {
+ if (ShndxF) {
+ if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+ String32(*ShndxF, shndx);
+ else
+ String32(*ShndxF, 0);
+ }
+
+ uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+ uint16_t(ELF::SHN_XINDEX) : shndx;
+
+ if (is64Bit()) {
+ String32(*SymtabF, name); // st_name
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ String64(*SymtabF, value); // st_value
+ String64(*SymtabF, size); // st_size
+ } else {
+ String32(*SymtabF, name); // st_name
+ String32(*SymtabF, value); // st_value
+ String32(*SymtabF, size); // st_size
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ }
+}
+
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+ const MCAsmLayout &Layout) {
+ if (Data.isCommon() && Data.isExternal())
+ return Data.getCommonAlignment();
+
+ const MCSymbol &Symbol = Data.getSymbol();
+
+ if (Symbol.isAbsolute() && Symbol.isVariable()) {
+ if (const MCExpr *Value = Symbol.getVariableValue()) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, Layout))
+ return (uint64_t)IntValue;
+ }
+ }
+
+ if (!Symbol.isInSection())
+ return 0;
+
+
+ if (Data.getFragment()) {
+ if (Data.getFlags() & ELF_Other_ThumbFunc)
+ return Layout.getSymbolOffset(&Data)+1;
+ else
+ return Layout.getSymbolOffset(&Data);
+ }
+
+ return 0;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // The presence of symbol versions causes undefined symbols and
+ // versions declared with @@@ to be renamed.
+
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Alias = it->getSymbol();
+ const MCSymbol &Symbol = Alias.AliasedSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ // Not an alias.
+ if (&Symbol == &Alias)
+ continue;
+
+ StringRef AliasName = Alias.getName();
+ size_t Pos = AliasName.find('@');
+ if (Pos == StringRef::npos)
+ continue;
+
+ // Aliases defined with .symvar copy the binding from the symbol they alias.
+ // This is the first place we are able to copy this information.
+ it->setExternal(SD.isExternal());
+ MCELF::SetBinding(*it, MCELF::GetBinding(SD));
+
+ StringRef Rest = AliasName.substr(Pos);
+ if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+ continue;
+
+ // FIXME: produce a better error message.
+ if (Symbol.isUndefined() && Rest.startswith("@@") &&
+ !Rest.startswith("@@@"))
+ report_fatal_error("A @@ version cannot be undefined");
+
+ Renames.insert(std::make_pair(&Symbol, &Alias));
+ }
+}
+
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &OrigData = *MSD.SymbolData;
+ MCSymbolData &Data =
+ Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+ bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+ Data.getSymbol().isVariable();
+
+ // Binding and Type share the same byte as upper and lower nibbles
+ uint8_t Binding = MCELF::GetBinding(OrigData);
+ uint8_t Type = MCELF::GetType(Data);
+ uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+
+ // Other and Visibility share the same byte with Visability using the lower
+ // 2 bits
+ uint8_t Visibility = MCELF::GetVisibility(OrigData);
+ uint8_t Other = MCELF::getOther(OrigData) <<
+ (ELF_Other_Shift - ELF_STV_Shift);
+ Other |= Visibility;
+
+ uint64_t Value = SymbolValue(Data, Layout);
+ uint64_t Size = 0;
+
+ assert(!(Data.isCommon() && !Data.isExternal()));
+
+ const MCExpr *ESize = Data.getSize();
+ if (ESize) {
+ int64_t Res;
+ if (!ESize->EvaluateAsAbsolute(Res, Layout))
+ report_fatal_error("Size expression must be absolute.");
+ Size = Res;
+ }
+
+ // Write out the symbol table entry
+ WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+ Size, Other, MSD.SectionIndex, IsReserved);
+}
+
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap) {
+ // The string table must be emitted first because we need the index
+ // into the string table for all the symbol names.
+ assert(StringTable.size() && "Missing string table");
+
+ // FIXME: Make sure the start of the symbol table is aligned.
+
+ // The first entry is the undefined symbol entry.
+ WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
+
+ // Write the symbol table entries.
+ LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = LocalSymbolData[i];
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ }
+
+ // Write out a symbol table entry for each regular section.
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+ ++i) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(i->getSection());
+ if (Section.getType() == ELF::SHT_RELA ||
+ Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_STRTAB ||
+ Section.getType() == ELF::SHT_SYMTAB ||
+ Section.getType() == ELF::SHT_SYMTAB_SHNDX)
+ continue;
+ WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+ ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section),
+ false);
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = ExternalSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ assert(((Data.getFlags() & ELF_STB_Global) ||
+ (Data.getFlags() & ELF_STB_Weak)) &&
+ "External symbol requires STB_GLOBAL or STB_WEAK flag");
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = UndefinedSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+ LastLocalSymbolIndex++;
+ }
+}
+
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ const MCSymbol *Renamed = Renames.lookup(&Symbol);
+ const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ if (ASymbol.isUndefined()) {
+ if (Renamed)
+ return Renamed;
+ return undefinedExplicitRelSym(Target, Fixup, IsPCRel);
+ }
+
+ if (SD.isExternal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(ASymbol.getSection());
+ const SectionKind secKind = Section.getKind();
+
+ if (secKind.isBSS())
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+ if (secKind.isThreadLocal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ const MCSectionELF &Sec2 =
+ static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+ if (&Sec2 != &Section &&
+ (Kind == MCSymbolRefExpr::VK_PLT ||
+ Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+ Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ if (Section.getFlags() & ELF::SHF_MERGE) {
+ if (Target.getConstant() == 0)
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ int64_t Addend = 0;
+ int Index = 0;
+ int64_t Value = Target.getConstant();
+ const MCSymbol *RelocSymbol = NULL;
+
+ bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ if (!Target.isAbsolute()) {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ RelocSymbol = SymbolToReloc(Asm, Target, *Fragment, Fixup, IsPCRel);
+
+ if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+ const MCSymbol &SymbolB = RefB->getSymbol();
+ MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+ IsPCRel = true;
+
+ // Offset of the symbol in the section
+ int64_t a = Layout.getSymbolOffset(&SDB);
+
+ // Offset of the relocation in the section
+ int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ Value += b - a;
+ }
+
+ if (!RelocSymbol) {
+ MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+ MCFragment *F = SD.getFragment();
+
+ if (F) {
+ Index = F->getParent()->getOrdinal() + 1;
+ // Offset of the symbol in the section
+ Value += Layout.getSymbolOffset(&SD);
+ } else {
+ Index = 0;
+ }
+ } else {
+ if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+ WeakrefUsedInReloc.insert(RelocSymbol);
+ else
+ UsedInReloc.insert(RelocSymbol);
+ Index = -1;
+ }
+ Addend = Value;
+ if (hasRelocationAddend())
+ Value = 0;
+ }
+
+ FixedValue = Value;
+ unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+ (RelocSymbol != 0), Addend);
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
+
+ uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+ Fixup.getOffset();
+
+ // FIXME: no tests cover this. Is adjustFixupOffset dead code?
+ TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset);
+
+ if (!hasRelocationAddend())
+ Addend = 0;
+
+ if (is64Bit())
+ assert(isInt<64>(Addend));
+ else
+ assert(isInt<32>(Addend));
+
+ ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend, Fixup);
+ Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S) {
+ MCSymbolData &SD = Asm.getSymbolData(*S);
+ return SD.getIndex();
+}
+
+bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
+ const MCSymbolData &Data,
+ bool Used, bool Renamed) {
+ if (Data.getFlags() & ELF_Other_Weakref)
+ return false;
+
+ if (Used)
+ return true;
+
+ if (Renamed)
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+
+ if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+ return true;
+
+ const MCSymbol &A = Symbol.AliasedSymbol();
+ if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
+ return false;
+
+ bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
+ if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
+ return false;
+
+ if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+ return false;
+
+ if (Symbol.isTemporary())
+ return false;
+
+ return true;
+}
+
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc) {
+ if (Data.isExternal())
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+ if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+ if (isSignature && !isUsedInReloc)
+ return true;
+
+ return false;
+ }
+
+ return true;
+}
+
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() != ELF::SHT_GROUP)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_GROUP ||
+ Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_RELA)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ const MCSectionELF *RelSection = RelMap.lookup(&Section);
+ if (RelSection)
+ SectionIndexMap[RelSection] = Index++;
+ }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap,
+ unsigned NumRegularSections) {
+ // FIXME: Is this the correct place to do this?
+ // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
+ if (NeedsGOT) {
+ StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+ MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+ Data.setExternal(true);
+ MCELF::SetBinding(Data, ELF::STB_GLOBAL);
+ }
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // FIXME: We could optimize suffixes in strtab in the same way we
+ // optimize them in shstrtab.
+
+ // Add the data for the symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ bool Used = UsedInReloc.count(&Symbol);
+ bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+ bool isSignature = RevGroupMap.count(&Symbol);
+
+ if (!isInSymtab(Asm, *it,
+ Used || WeakrefUsed || isSignature,
+ Renames.count(&Symbol)))
+ continue;
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+ // Undefined symbols are global, but this is the first place we
+ // are able to set it.
+ bool Local = isLocal(*it, isSignature, Used);
+ if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
+ MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+ MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ }
+
+ if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+ MCELF::SetBinding(*it, ELF::STB_WEAK);
+
+ if (it->isCommon()) {
+ assert(!Local);
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ } else if (RefSymbol.isUndefined()) {
+ if (isSignature && !Used)
+ MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+ else
+ MSD.SectionIndex = ELF::SHN_UNDEF;
+ } else {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(RefSymbol.getSection());
+ MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+ if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+ NeedsSymtabShndx = true;
+ assert(MSD.SectionIndex && "Invalid section index!");
+ }
+
+ // The @@@ in symbol version is replaced with @ in undefined symbols and
+ // @@ in defined ones.
+ StringRef Name = Symbol.getName();
+ SmallString<32> Buf;
+
+ size_t Pos = Name.find("@@@");
+ if (Pos != StringRef::npos) {
+ Buf += Name.substr(0, Pos);
+ unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+ Buf += Name.substr(Pos + Skip);
+ Name = Buf;
+ }
+
+ uint64_t &Entry = StringIndexMap[Name];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Name;
+ StringTable += '\x00';
+ }
+ MSD.StringIndex = Entry;
+ if (MSD.SectionIndex == ELF::SHN_UNDEF)
+ UndefinedSymbolData.push_back(MSD);
+ else if (Local)
+ LocalSymbolData.push_back(MSD);
+ else
+ ExternalSymbolData.push_back(MSD);
+ }
+
+ // Symbols are required to be in lexicographic order.
+ array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+ array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices. Local symbols must come before all other
+ // symbols with non-local bindings.
+ unsigned Index = 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+ Index += NumRegularSections;
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+ if (Index >= ELF::SHN_LORESERVE)
+ NeedsSymtabShndx = true;
+}
+
+void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ RelMapTy &RelMap) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ if (Relocations[&SD].empty())
+ continue;
+
+ MCContext &Ctx = Asm.getContext();
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const StringRef SectionName = Section.getSectionName();
+ std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
+ RelaSectionName += SectionName;
+
+ unsigned EntrySize;
+ if (hasRelocationAddend())
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+ else
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+ const MCSectionELF *RelaSection =
+ Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
+ ELF::SHT_RELA : ELF::SHT_REL, 0,
+ SectionKind::getReadOnly(),
+ EntrySize, "");
+ RelMap[&Section] = RelaSection;
+ Asm.getOrCreateSectionData(*RelaSection);
+ }
+}
+
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+ const RelMapTy &RelMap) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const MCSectionELF *RelaSection = RelMap.lookup(&Section);
+ if (!RelaSection)
+ continue;
+ MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+ RelaSD.setAlignment(is64Bit() ? 8 : 4);
+
+ MCDataFragment *F = new MCDataFragment(&RelaSD);
+ WriteRelocationsFragment(Asm, F, &*it);
+ }
+}
+
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+ uint64_t Flags, uint64_t Address,
+ uint64_t Offset, uint64_t Size,
+ uint32_t Link, uint32_t Info,
+ uint64_t Alignment,
+ uint64_t EntrySize) {
+ Write32(Name); // sh_name: index into string table
+ Write32(Type); // sh_type
+ WriteWord(Flags); // sh_flags
+ WriteWord(Address); // sh_addr
+ WriteWord(Offset); // sh_offset
+ WriteWord(Size); // sh_size
+ Write32(Link); // sh_link
+ Write32(Info); // sh_info
+ WriteWord(Alignment); // sh_addralign
+ WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD) {
+ std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+
+ // Sort the relocation entries. Most targets just sort by r_offset, but some
+ // (e.g., MIPS) have additional constraints.
+ TargetObjectWriter->sortRelocs(Asm, Relocs);
+
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ ELFRelocationEntry entry = Relocs[e - i - 1];
+
+ if (!entry.Index)
+ ;
+ else if (entry.Index < 0)
+ entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+ else
+ entry.Index += LocalSymbolData.size();
+ if (is64Bit()) {
+ String64(*F, entry.r_offset);
+ if (TargetObjectWriter->isN64()) {
+ String32(*F, entry.Index);
+
+ String8(*F, TargetObjectWriter->getRSsym(entry.Type));
+ String8(*F, TargetObjectWriter->getRType3(entry.Type));
+ String8(*F, TargetObjectWriter->getRType2(entry.Type));
+ String8(*F, TargetObjectWriter->getRType(entry.Type));
+ }
+ else {
+ struct ELF::Elf64_Rela ERE64;
+ ERE64.setSymbolAndType(entry.Index, entry.Type);
+ String64(*F, ERE64.r_info);
+ }
+ if (hasRelocationAddend())
+ String64(*F, entry.r_addend);
+ } else {
+ String32(*F, entry.r_offset);
+
+ struct ELF::Elf32_Rela ERE32;
+ ERE32.setSymbolAndType(entry.Index, entry.Type);
+ String32(*F, ERE32.r_info);
+
+ if (hasRelocationAddend())
+ String32(*F, entry.r_addend);
+ }
+ }
+}
+
+static int compareBySuffix(const void *a, const void *b) {
+ const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a);
+ const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b);
+ const StringRef &NameA = secA->getSectionName();
+ const StringRef &NameB = secB->getSectionName();
+ const unsigned sizeA = NameA.size();
+ const unsigned sizeB = NameB.size();
+ const unsigned len = std::min(sizeA, sizeB);
+ for (unsigned int i = 0; i < len; ++i) {
+ char ca = NameA[sizeA - i - 1];
+ char cb = NameB[sizeB - i - 1];
+ if (ca != cb)
+ return cb - ca;
+ }
+
+ return sizeB - sizeA;
+}
+
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ MCContext &Ctx = Asm.getContext();
+ MCDataFragment *F;
+
+ unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+ // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+ const MCSectionELF *ShstrtabSection =
+ Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly());
+ MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+ ShstrtabSD.setAlignment(1);
+
+ const MCSectionELF *SymtabSection =
+ Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ EntrySize, "");
+ MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+ SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+
+ MCSectionData *SymtabShndxSD = NULL;
+
+ if (NeedsSymtabShndx) {
+ const MCSectionELF *SymtabShndxSection =
+ Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+ SectionKind::getReadOnly(), 4, "");
+ SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+ SymtabShndxSD->setAlignment(4);
+ }
+
+ const MCSectionELF *StrtabSection;
+ StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly());
+ MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+ StrtabSD.setAlignment(1);
+
+ ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+ ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection);
+ SymbolTableIndex = SectionIndexMap.lookup(SymtabSection);
+ StringTableIndex = SectionIndexMap.lookup(StrtabSection);
+
+ // Symbol table
+ F = new MCDataFragment(&SymtabSD);
+ MCDataFragment *ShndxF = NULL;
+ if (NeedsSymtabShndx) {
+ ShndxF = new MCDataFragment(SymtabShndxSD);
+ }
+ WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
+
+ F = new MCDataFragment(&StrtabSD);
+ F->getContents().append(StringTable.begin(), StringTable.end());
+
+ F = new MCDataFragment(&ShstrtabSD);
+
+ std::vector<const MCSectionELF*> Sections;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ Sections.push_back(&Section);
+ }
+ array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
+
+ // Section header string table.
+ //
+ // The first entry of a string table holds a null character so skip
+ // section 0.
+ uint64_t Index = 1;
+ F->getContents().push_back('\x00');
+
+ for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
+ const MCSectionELF &Section = *Sections[I];
+
+ StringRef Name = Section.getSectionName();
+ if (I != 0) {
+ StringRef PreviousName = Sections[I - 1]->getSectionName();
+ if (PreviousName.endswith(Name)) {
+ SectionStringTableIndex[&Section] = Index - Name.size() - 1;
+ continue;
+ }
+ }
+ // Remember the index into the string table so we can write it
+ // into the sh_name field of the section header table.
+ SectionStringTableIndex[&Section] = Index;
+
+ Index += Name.size() + 1;
+ F->getContents().append(Name.begin(), Name.end());
+ F->getContents().push_back('\x00');
+ }
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ // Create the .note.GNU-stack section if needed.
+ MCContext &Ctx = Asm.getContext();
+ if (Asm.getNoExecStack()) {
+ const MCSectionELF *GnuStackSection =
+ Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ Asm.getOrCreateSectionData(*GnuStackSection);
+ }
+
+ // Build the groups
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+
+ const MCSymbol *SignatureSymbol = Section.getGroup();
+ Asm.getOrCreateSymbolData(*SignatureSymbol);
+ const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+ if (!Group) {
+ Group = Ctx.CreateELFGroupSection();
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ Data.setAlignment(4);
+ MCDataFragment *F = new MCDataFragment(&Data);
+ String32(*F, ELF::GRP_COMDAT);
+ }
+ GroupMap[Group] = SignatureSymbol;
+ }
+
+ ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+ // Add sections to the groups
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+ const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ // FIXME: we could use the previous fragment
+ MCDataFragment *F = new MCDataFragment(&Data);
+ unsigned Index = SectionIndexMap.lookup(&Section);
+ String32(*F, Index);
+ }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size,
+ uint64_t Alignment,
+ const MCSectionELF &Section) {
+ uint64_t sh_link = 0;
+ uint64_t sh_info = 0;
+
+ switch(Section.getType()) {
+ case ELF::SHT_DYNAMIC:
+ sh_link = SectionStringTableIndex[&Section];
+ sh_info = 0;
+ break;
+
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA: {
+ const MCSectionELF *SymtabSection;
+ const MCSectionELF *InfoSection;
+ SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+ 0,
+ SectionKind::getReadOnly());
+ sh_link = SectionIndexMap.lookup(SymtabSection);
+ assert(sh_link && ".symtab not found");
+
+ // Remove ".rel" and ".rela" prefixes.
+ unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+ StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+ InfoSection = Asm.getContext().getELFSection(SectionName,
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ sh_info = SectionIndexMap.lookup(InfoSection);
+ break;
+ }
+
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ sh_link = StringTableIndex;
+ sh_info = LastLocalSymbolIndex;
+ break;
+
+ case ELF::SHT_SYMTAB_SHNDX:
+ sh_link = SymbolTableIndex;
+ break;
+
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_STRTAB:
+ case ELF::SHT_NOBITS:
+ case ELF::SHT_NOTE:
+ case ELF::SHT_NULL:
+ case ELF::SHT_ARM_ATTRIBUTES:
+ case ELF::SHT_INIT_ARRAY:
+ case ELF::SHT_FINI_ARRAY:
+ case ELF::SHT_PREINIT_ARRAY:
+ case ELF::SHT_X86_64_UNWIND:
+ case ELF::SHT_MIPS_REGINFO:
+ case ELF::SHT_MIPS_OPTIONS:
+ // Nothing to do.
+ break;
+
+ case ELF::SHT_GROUP:
+ sh_link = SymbolTableIndex;
+ sh_info = GroupSymbolIndex;
+ break;
+
+ default:
+ assert(0 && "FIXME: sh_type value not supported!");
+ break;
+ }
+
+ if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
+ Section.getType() == ELF::SHT_ARM_EXIDX) {
+ StringRef SecName(Section.getSectionName());
+ if (SecName == ".ARM.exidx") {
+ sh_link = SectionIndexMap.lookup(
+ Asm.getContext().getELFSection(".text",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+ SectionKind::getText()));
+ } else if (SecName.startswith(".ARM.exidx")) {
+ sh_link = SectionIndexMap.lookup(
+ Asm.getContext().getELFSection(SecName.substr(sizeof(".ARM.exidx") - 1),
+ ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+ SectionKind::getText()));
+ }
+ }
+
+ WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+ Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+ Alignment, Section.getEntrySize());
+}
+
+bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
+ return SD.getOrdinal() == ~UINT32_C(0) &&
+ !SD.getSection().isVirtualSection();
+}
+
+uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
+ uint64_t Ret = 0;
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ Ret += cast<MCDataFragment>(F).getContents().size();
+ }
+ return Ret;
+}
+
+uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionFileSize(&SD);
+}
+
+uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionAddressSize(&SD);
+}
+
+void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionELF &Section) {
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
+ WriteZeros(Padding);
+
+ if (IsELFMetaDataSection(SD)) {
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ WriteBytes(cast<MCDataFragment>(F).getContents());
+ }
+ } else {
+ Asm.writeSectionData(&SD, Layout);
+ }
+}
+
+void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm,
+ const GroupMapTy &GroupMap,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap,
+ const SectionOffsetMapTy &SectionOffsetMap) {
+ const unsigned NumSections = Asm.size() + 1;
+
+ std::vector<const MCSectionELF*> Sections;
+ Sections.resize(NumSections - 1);
+
+ for (SectionIndexMapTy::const_iterator i=
+ SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+ const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+ Sections[p.second - 1] = p.first;
+ }
+
+ // Null section first.
+ uint64_t FirstSectionSize =
+ NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+ uint32_t FirstSectionLink =
+ ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+ WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+ for (unsigned i = 0; i < NumSections - 1; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+ uint32_t GroupSymbolIndex;
+ if (Section.getType() != ELF::SHT_GROUP)
+ GroupSymbolIndex = 0;
+ else
+ GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm,
+ GroupMap.lookup(&Section));
+
+ uint64_t Size = GetSectionAddressSize(Layout, SD);
+
+ WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+ SectionOffsetMap.lookup(&Section), Size,
+ SD.getAlignment(), Section);
+ }
+}
+
+void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
+ std::vector<const MCSectionELF*> &Sections) {
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_GROUP)
+ Sections.push_back(&Section);
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() != ELF::SHT_GROUP &&
+ Section.getType() != ELF::SHT_REL &&
+ Section.getType() != ELF::SHT_RELA)
+ Sections.push_back(&Section);
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_RELA)
+ Sections.push_back(&Section);
+ }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ GroupMapTy GroupMap;
+ RevGroupMapTy RevGroupMap;
+ SectionIndexMapTy SectionIndexMap;
+
+ unsigned NumUserSections = Asm.size();
+
+ DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
+ CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+ const unsigned NumUserAndRelocSections = Asm.size();
+ CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+ RevGroupMap, SectionIndexMap, RelMap);
+ const unsigned AllSections = Asm.size();
+ const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
+
+ unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+
+ // Compute symbol table information.
+ ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections);
+
+
+ WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+ CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+ const_cast<MCAsmLayout&>(Layout),
+ SectionIndexMap,
+ RelMap);
+
+ uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+ uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr);
+ uint64_t FileOff = HeaderSize;
+
+ std::vector<const MCSectionELF*> Sections;
+ ComputeSectionOrder(Asm, Sections);
+ unsigned NumSections = Sections.size();
+ SectionOffsetMapTy SectionOffsetMap;
+ for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&Section] = FileOff;
+
+ // Get the size of the section in the output file (including padding).
+ FileOff += GetSectionFileSize(Layout, SD);
+ }
+
+ FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
+ const unsigned SectionHeaderOffset = FileOff - HeaderSize;
+
+ uint64_t SectionHeaderEntrySize = is64Bit() ?
+ sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
+ FileOff += (NumSections + 1) * SectionHeaderEntrySize;
+
+ for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&Section] = FileOff;
+
+ // Get the size of the section in the output file (including padding).
+ FileOff += GetSectionFileSize(Layout, SD);
+ }
+
+ // Write out the ELF header ...
+ WriteHeader(Asm, SectionHeaderOffset, NumSections + 1);
+
+ // ... then the regular sections ...
+ // + because of .shstrtab
+ for (unsigned i = 0; i < NumRegularSections + 1; ++i)
+ WriteDataSectionData(Asm, Layout, *Sections[i]);
+
+ uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
+ WriteZeros(Padding);
+
+ // ... then the section header table ...
+ WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
+ SectionOffsetMap);
+
+ // ... and then the remaining sections ...
+ for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
+ WriteDataSectionData(Asm, Layout, *Sections[i]);
+}
+
+bool
+ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ if (DataA.getFlags() & ELF_STB_Weak)
+ return false;
+ return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+ Asm, DataA, FB,InSet, IsPCRel);
+}
+
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm/lib/MC/MCAsmBackend.cpp
new file mode 100644
index 000000000000..53960e7980ae
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmBackend.cpp
@@ -0,0 +1,43 @@
+//===-- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+using namespace llvm;
+
+MCAsmBackend::MCAsmBackend()
+ : HasReliableSymbolDifference(false), HasDataInCodeSupport(false) {}
+
+MCAsmBackend::~MCAsmBackend() {}
+
+const MCFixupKindInfo &
+MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ static const MCFixupKindInfo Builtins[] = {
+ { "FK_Data_1", 0, 8, 0 },
+ { "FK_Data_2", 0, 16, 0 },
+ { "FK_Data_4", 0, 32, 0 },
+ { "FK_Data_8", 0, 64, 0 },
+ { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_GPRel_1", 0, 8, 0 },
+ { "FK_GPRel_2", 0, 16, 0 },
+ { "FK_GPRel_4", 0, 32, 0 },
+ { "FK_GPRel_8", 0, 64, 0 },
+ { "FK_SecRel_1", 0, 8, 0 },
+ { "FK_SecRel_2", 0, 16, 0 },
+ { "FK_SecRel_4", 0, 32, 0 },
+ { "FK_SecRel_8", 0, 64, 0 }
+ };
+
+ assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+ "Unknown fixup kind");
+ return Builtins[Kind];
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
new file mode 100644
index 000000000000..51bb4357102e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -0,0 +1,143 @@
+//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Dwarf.h"
+#include <cctype>
+#include <cstring>
+using namespace llvm;
+
+MCAsmInfo::MCAsmInfo() {
+ PointerSize = 4;
+ CalleeSaveStackSlotSize = 4;
+
+ IsLittleEndian = true;
+ StackGrowsUp = false;
+ HasSubsectionsViaSymbols = false;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = false;
+ MaxInstLength = 4;
+ PCSymbol = "$";
+ SeparatorString = ";";
+ CommentColumn = 40;
+ CommentString = "#";
+ LabelSuffix = ":";
+ DebugLabelSuffix = ":";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".";
+ LinkerPrivateGlobalPrefix = "";
+ InlineAsmStart = "APP";
+ InlineAsmEnd = "NO_APP";
+ Code16Directive = ".code16";
+ Code32Directive = ".code32";
+ Code64Directive = ".code64";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = true;
+ AllowUTF8 = true;
+ UseDataRegionDirectives = false;
+ ZeroDirective = "\t.zero\t";
+ AsciiDirective = "\t.ascii\t";
+ AscizDirective = "\t.asciz\t";
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = "\t.quad\t";
+ SunStyleELFSectionSwitchSyntax = false;
+ UsesELFSectionDirectiveForBSS = false;
+ AlignDirective = "\t.align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+ GPRel64Directive = 0;
+ GPRel32Directive = 0;
+ GlobalDirective = "\t.globl\t";
+ HasSetDirective = true;
+ HasAggressiveSymbolFolding = true;
+ COMMDirectiveAlignmentIsInBytes = true;
+ LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
+ HasDotTypeDotSizeDirective = true;
+ HasSingleParameterDotFile = true;
+ HasNoDeadStrip = false;
+ HasSymbolResolver = false;
+ WeakRefDirective = 0;
+ WeakDefDirective = 0;
+ LinkOnceDirective = 0;
+ HiddenVisibilityAttr = MCSA_Hidden;
+ HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+ ProtectedVisibilityAttr = MCSA_Protected;
+ HasLEB128 = false;
+ SupportsDebugInformation = false;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfSectionOffsetDirective = 0;
+ DwarfUsesRelocationsAcrossSections = true;
+ DwarfRegNumForCFI = false;
+ HasMicrosoftFastStdCallMangling = false;
+}
+
+MCAsmInfo::~MCAsmInfo() {
+}
+
+
+unsigned MCAsmInfo::getULEB128Size(unsigned Value) {
+ unsigned Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t);
+ } while (Value);
+ return Size;
+}
+
+unsigned MCAsmInfo::getSLEB128Size(int Value) {
+ unsigned Size = 0;
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ Size += sizeof(int8_t);
+ } while (IsMore);
+ return Size;
+}
+
+const MCExpr *
+MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ return getExprForFDESymbol(Sym, Encoding, Streamer);
+}
+
+const MCExpr *
+MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ if (!(Encoding & dwarf::DW_EH_PE_pcrel))
+ return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
+
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
+ MCSymbol *PCSym = Context.CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
+ return MCBinaryExpr::CreateSub(Res, PC, Context);
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
new file mode 100644
index 000000000000..fd79193073df
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -0,0 +1,53 @@
+//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on COFF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+using namespace llvm;
+
+void MCAsmInfoCOFF::anchor() { }
+
+MCAsmInfoCOFF::MCAsmInfoCOFF() {
+ GlobalPrefix = "_";
+ // MingW 4.5 and later support .comm with log2 alignment, but .lcomm uses byte
+ // alignment.
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ PrivateGlobalPrefix = "L"; // Prefix for private global symbols
+ WeakRefDirective = "\t.weak\t";
+ LinkOnceDirective = "\t.linkonce discard\n";
+
+ // Doesn't support visibility:
+ HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+ ProtectedVisibilityAttr = MCSA_Invalid;
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = "\t.secrel32\t";
+ HasMicrosoftFastStdCallMangling = true;
+}
+
+void MCAsmInfoMicrosoft::anchor() { }
+
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
+ AllowQuotesInName = true;
+}
+
+void MCAsmInfoGNUCOFF::anchor() { }
+
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
new file mode 100644
index 000000000000..a0e3ebad5e2b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -0,0 +1,64 @@
+//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+void MCAsmInfoDarwin::anchor() { }
+
+MCAsmInfoDarwin::MCAsmInfoDarwin() {
+ // Common settings for all Darwin targets.
+ // Syntax:
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LinkerPrivateGlobalPrefix = "l";
+ AllowQuotesInName = true;
+ HasSingleParameterDotFile = false;
+ HasSubsectionsViaSymbols = true;
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+ InlineAsmStart = " InlineAsm Start";
+ InlineAsmEnd = " InlineAsm End";
+
+ // Directives:
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
+ HasMachoZeroFillDirective = true; // Uses .zerofill
+ HasMachoTBSSDirective = true; // Uses .tbss
+ HasStaticCtorDtorReferenceInStaticMode = true;
+
+ // FIXME: Darwin 10 and newer don't need this.
+ LinkerRequiresNonEmptyDwarfLines = true;
+
+ // FIXME: Change this once MC is the system assembler.
+ HasAggressiveSymbolFolding = false;
+
+ HiddenVisibilityAttr = MCSA_PrivateExtern;
+ HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+
+ // Doesn't support protected visibility.
+ ProtectedVisibilityAttr = MCSA_Invalid;
+
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = true;
+
+ DwarfUsesRelocationsAcrossSections = false;
+}
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 000000000000..35613b411c24
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,1440 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PathV2.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+class MCAsmStreamer : public MCStreamer {
+protected:
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+private:
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+ OwningPtr<MCAsmBackend> AsmBackend;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+ unsigned UseLoc : 1;
+ unsigned UseCFI : 1;
+ unsigned UseDwarfDirectory : 1;
+
+ enum EHSymbolFlags { EHGlobal = 1,
+ EHWeakDefinition = 1 << 1,
+ EHPrivateExtern = 1 << 2 };
+ DenseMap<const MCSymbol*, unsigned> FlagMap;
+
+ bool needsSet(const MCExpr *Value);
+
+ void EmitRegisterName(int64_t Register);
+ virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
+ virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
+
+public:
+ MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *printer, MCCodeEmitter *emitter,
+ MCAsmBackend *asmbackend,
+ bool showInst)
+ : MCStreamer(SK_AsmStreamer, Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+ CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
+ UseDwarfDirectory(useDwarfDirectory) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+ ~MCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+
+ virtual void InitSections() {
+ InitToTextSection();
+ }
+
+ virtual void InitToTextSection() {
+ // FIXME, this is MachO specific, but the testsuite
+ // expects this.
+ SwitchSection(getContext().getMachOSection(
+ "__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitDebugLabel(MCSymbol *Symbol);
+
+ virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitLinkerOptions(ArrayRef<std::string> Options);
+ virtual void EmitDataRegion(MCDataRegionType Kind);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize);
+ virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ uint64_t Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace = 0);
+
+ virtual void EmitULEB128Value(const MCExpr *Value);
+
+ virtual void EmitSLEB128Value(const MCExpr *Value);
+
+ virtual void EmitGPRel64Value(const MCExpr *Value);
+
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename, unsigned CUID = 0);
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator,
+ StringRef FileName);
+
+ virtual void EmitCFISections(bool EH, bool Debug);
+ virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
+ virtual void EmitCFIDefCfaOffset(int64_t Offset);
+ virtual void EmitCFIDefCfaRegister(int64_t Register);
+ virtual void EmitCFIOffset(int64_t Register, int64_t Offset);
+ virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+ virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+ virtual void EmitCFIRememberState();
+ virtual void EmitCFIRestoreState();
+ virtual void EmitCFISameValue(int64_t Register);
+ virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
+ virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+ virtual void EmitCFISignalFrame();
+ virtual void EmitCFIUndefined(int64_t Register);
+ virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
+
+ virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
+ virtual void EmitWin64EHEndProc();
+ virtual void EmitWin64EHStartChained();
+ virtual void EmitWin64EHEndChained();
+ virtual void EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except);
+ virtual void EmitWin64EHHandlerData();
+ virtual void EmitWin64EHPushReg(unsigned Register);
+ virtual void EmitWin64EHSetFrame(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHAllocStack(unsigned Size);
+ virtual void EmitWin64EHSaveReg(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHSaveXMM(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHPushFrame(bool Code);
+ virtual void EmitWin64EHEndProlog();
+
+ virtual void EmitFnStart();
+ virtual void EmitFnEnd();
+ virtual void EmitCantUnwind();
+ virtual void EmitPersonality(const MCSymbol *Personality);
+ virtual void EmitHandlerData();
+ virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void EmitPad(int64_t Offset);
+ virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
+
+ virtual void EmitTCEntry(const MCSymbol &S);
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ virtual void EmitBundleAlignMode(unsigned AlignPow2);
+ virtual void EmitBundleLock(bool AlignToEnd);
+ virtual void EmitBundleUnlock();
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void FinishImpl();
+
+ /// @}
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_AsmStreamer;
+ }
+};
+
+} // end anonymous namespace.
+
+/// AddComment - Add a comment that can be emitted to the generated .s
+/// file if applicable as a QoI issue to make the output of the compiler
+/// more readable. This only affects the MCAsmStreamer, and only when
+/// verbose assembly output is enabled.
+void MCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void MCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+ Section->PrintSwitchToSection(MAI, OS);
+}
+
+void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+ if (UseCFI)
+ return;
+
+ unsigned Flags = FlagMap.lookup(Symbol);
+
+ if (Flags & EHGlobal)
+ EmitSymbolAttribute(EHSymbol, MCSA_Global);
+ if (Flags & EHWeakDefinition)
+ EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+ if (Flags & EHPrivateExtern)
+ EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCStreamer::EmitLabel(Symbol);
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCStreamer::EmitDebugLabel(Symbol);
+
+ OS << *Symbol << MAI.getDebugLabelSuffix();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
+ case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+ case MCAF_Code16: OS << '\t'<< MAI.getCode16Directive(); break;
+ case MCAF_Code32: OS << '\t'<< MAI.getCode32Directive(); break;
+ case MCAF_Code64: OS << '\t'<< MAI.getCode64Directive(); break;
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitLinkerOptions(ArrayRef<std::string> Options) {
+ assert(!Options.empty() && "At least one option is required!");
+ OS << "\t.linker_option \"" << Options[0] << '"';
+ for (ArrayRef<std::string>::iterator it = Options.begin() + 1,
+ ie = Options.end(); it != ie; ++it) {
+ OS << ", " << '"' << *it << '"';
+ }
+ OS << "\n";
+}
+
+void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
+ MCContext &Ctx = getContext();
+ const MCAsmInfo &MAI = Ctx.getAsmInfo();
+ if (!MAI.doesSupportDataRegionDirectives())
+ return;
+ switch (Kind) {
+ case MCDR_DataRegion: OS << "\t.data_region"; break;
+ case MCDR_DataRegionJT8: OS << "\t.data_region jt8"; break;
+ case MCDR_DataRegionJT16: OS << "\t.data_region jt16"; break;
+ case MCDR_DataRegionJT32: OS << "\t.data_region jt32"; break;
+ case MCDR_DataRegionEnd: OS << "\t.end_data_region"; break;
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+ // This needs to emit to a temporary string to get properly quoted
+ // MCSymbols when they have spaces in them.
+ OS << "\t.thumb_func";
+ // Only Mach-O hasSubsectionsViaSymbols()
+ if (MAI.hasSubsectionsViaSymbols())
+ OS << '\t' << *Func;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ EmitIntValue(dwarf::DW_CFA_advance_loc4, 1);
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ AddrDelta = ForceExpAbs(AddrDelta);
+ EmitValue(AddrDelta, 4);
+}
+
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ switch (Attribute) {
+ case MCSA_Invalid: llvm_unreachable("Invalid symbol attribute");
+ case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function
+ case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
+ case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object
+ case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object
+ case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common
+ case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype
+ case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object
+ assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+ OS << "\t.type\t" << *Symbol << ','
+ << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
+ switch (Attribute) {
+ default: llvm_unreachable("Unknown ELF .type");
+ case MCSA_ELF_TypeFunction: OS << "function"; break;
+ case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
+ case MCSA_ELF_TypeObject: OS << "object"; break;
+ case MCSA_ELF_TypeTLS: OS << "tls_object"; break;
+ case MCSA_ELF_TypeCommon: OS << "common"; break;
+ case MCSA_ELF_TypeNoType: OS << "no_type"; break;
+ case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
+ }
+ EmitEOL();
+ return;
+ case MCSA_Global: // .globl/.global
+ OS << MAI.getGlobalDirective();
+ FlagMap[Symbol] |= EHGlobal;
+ break;
+ case MCSA_Hidden: OS << "\t.hidden\t"; break;
+ case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
+ case MCSA_Internal: OS << "\t.internal\t"; break;
+ case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break;
+ case MCSA_Local: OS << "\t.local\t"; break;
+ case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break;
+ case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
+ case MCSA_PrivateExtern:
+ OS << "\t.private_extern\t";
+ FlagMap[Symbol] |= EHPrivateExtern;
+ break;
+ case MCSA_Protected: OS << "\t.protected\t"; break;
+ case MCSA_Reference: OS << "\t.reference\t"; break;
+ case MCSA_Weak: OS << "\t.weak\t"; break;
+ case MCSA_WeakDefinition:
+ OS << "\t.weak_definition\t";
+ FlagMap[Symbol] |= EHWeakDefinition;
+ break;
+ // .weak_reference
+ case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break;
+ case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+ }
+
+ OS << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ OS << ".desc" << ' ' << *Symbol << ',' << DescValue;
+ EmitEOL();
+}
+
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ OS << "\t.def\t " << *Symbol << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+ OS << "\t.scl\t" << StorageClass << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+ OS << "\t.type\t" << Type << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+ OS << "\t.endef";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+ OS << "\t.secrel32\t" << *Symbol << '\n';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ assert(MAI.hasDotTypeDotSizeDirective());
+ OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+}
+
+void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ OS << "\t.comm\t" << *Symbol << ',' << Size;
+ if (ByteAlignment != 0) {
+ if (MAI.getCOMMDirectiveAlignmentIsInBytes())
+ OS << ',' << ByteAlignment;
+ else
+ OS << ',' << Log2_32(ByteAlignment);
+ }
+ EmitEOL();
+}
+
+/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+///
+/// @param Symbol - The common symbol to emit.
+/// @param Size - The size of the common symbol.
+void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign) {
+ OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+ if (ByteAlign > 1) {
+ switch (MAI.getLCOMMDirectiveAlignmentType()) {
+ case LCOMM::NoAlignment:
+ llvm_unreachable("alignment not supported on .lcomm!");
+ case LCOMM::ByteAlignment:
+ OS << ',' << ByteAlign;
+ break;
+ case LCOMM::Log2Alignment:
+ assert(isPowerOf2_32(ByteAlign) && "alignment must be a power of 2");
+ OS << ',' << Log2_32(ByteAlign);
+ break;
+ }
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ // Note: a .zerofill directive does not switch sections.
+ OS << ".zerofill ";
+
+ // This is a mach-o specific directive.
+ const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
+ OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
+
+ if (Symbol != NULL) {
+ OS << ',' << *Symbol << ',' << Size;
+ if (ByteAlignment != 0)
+ OS << ',' << Log2_32(ByteAlignment);
+ }
+ EmitEOL();
+}
+
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+ // Instead of using the Section we'll just use the shortcut.
+ // This is a mach-o specific directive and section.
+ OS << ".tbss " << *Symbol << ", " << Size;
+
+ // Output align if we have it. We default to 1 so don't bother printing
+ // that.
+ if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+
+ EmitEOL();
+}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+
+void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitULEB128IntValue(IntValue);
+ return;
+ }
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitSLEB128IntValue(IntValue);
+ return;
+ }
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
+ assert(MAI.getGPRel64Directive() != 0);
+ OS << MAI.getGPRel64Directive() << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // Emit with a text fill value.
+ EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+ 1, MaxBytesToEmit);
+}
+
+bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ // FIXME: Verify that Offset is associated with the current section.
+ OS << ".org " << *Offset << ", " << (unsigned) Value;
+ EmitEOL();
+ return false;
+}
+
+
+void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename, unsigned CUID) {
+ if (!UseDwarfDirectory && !Directory.empty()) {
+ if (sys::path::is_absolute(Filename))
+ return EmitDwarfFileDirective(FileNo, "", Filename, CUID);
+
+ SmallString<128> FullPathName = Directory;
+ sys::path::append(FullPathName, Filename);
+ return EmitDwarfFileDirective(FileNo, "", FullPathName, CUID);
+ }
+
+ if (UseLoc) {
+ OS << "\t.file\t" << FileNo << ' ';
+ if (!Directory.empty()) {
+ PrintQuotedString(Directory, OS);
+ OS << ' ';
+ }
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ // All .file will belong to a single CUID.
+ CUID = 0;
+ }
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename,
+ CUID);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator,
+ StringRef FileName) {
+ this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+ Isa, Discriminator, FileName);
+ if (!UseLoc)
+ return;
+
+ OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+ if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+ OS << " basic_block";
+ if (Flags & DWARF2_FLAG_PROLOGUE_END)
+ OS << " prologue_end";
+ if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+ OS << " epilogue_begin";
+
+ unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+ if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+ OS << " is_stmt ";
+
+ if (Flags & DWARF2_FLAG_IS_STMT)
+ OS << "1";
+ else
+ OS << "0";
+ }
+
+ if (Isa)
+ OS << "isa " << Isa;
+ if (Discriminator)
+ OS << "discriminator " << Discriminator;
+
+ if (IsVerboseAsm) {
+ OS.PadToColumn(MAI.getCommentColumn());
+ OS << MAI.getCommentString() << ' ' << FileName << ':'
+ << Line << ':' << Column;
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
+ MCStreamer::EmitCFISections(EH, Debug);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_sections ";
+ if (EH) {
+ OS << ".eh_frame";
+ if (Debug)
+ OS << ", .debug_frame";
+ } else if (Debug) {
+ OS << ".debug_frame";
+ }
+
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+ if (!UseCFI) {
+ RecordProcStart(Frame);
+ return;
+ }
+
+ OS << "\t.cfi_startproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ if (!UseCFI) {
+ RecordProcEnd(Frame);
+ return;
+ }
+
+ // Put a dummy non-null value in Frame.End to mark that this frame has been
+ // closed.
+ Frame.End = (MCSymbol *) 1;
+
+ OS << "\t.cfi_endproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegisterName(int64_t Register) {
+ if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+ unsigned LLVMRegister = MRI.getLLVMRegNum(Register, true);
+ InstPrinter->printRegName(OS, LLVMRegister);
+ } else {
+ OS << Register;
+ }
+}
+
+void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ MCStreamer::EmitCFIDefCfa(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ MCStreamer::EmitCFIDefCfaOffset(Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa_offset " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ MCStreamer::EmitCFIDefCfaRegister(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa_register ";
+ EmitRegisterName(Register);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ this->MCStreamer::EmitCFIOffset(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_offset ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ MCStreamer::EmitCFIPersonality(Sym, Encoding);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ MCStreamer::EmitCFILsda(Sym, Encoding);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRememberState() {
+ MCStreamer::EmitCFIRememberState();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_remember_state";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRestoreState() {
+ MCStreamer::EmitCFIRestoreState();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_restore_state";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
+ MCStreamer::EmitCFISameValue(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_same_value ";
+ EmitRegisterName(Register);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+ MCStreamer::EmitCFIRelOffset(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_rel_offset ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+ MCStreamer::EmitCFIAdjustCfaOffset(Adjustment);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISignalFrame() {
+ MCStreamer::EmitCFISignalFrame();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_signal_frame";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIUndefined(int64_t Register) {
+ MCStreamer::EmitCFIUndefined(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_undefined " << Register;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+ MCStreamer::EmitCFIRegister(Register1, Register2);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_register " << Register1 << ", " << Register2;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+ MCStreamer::EmitWin64EHStartProc(Symbol);
+
+ OS << ".seh_proc " << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProc() {
+ MCStreamer::EmitWin64EHEndProc();
+
+ OS << "\t.seh_endproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartChained() {
+ MCStreamer::EmitWin64EHStartChained();
+
+ OS << "\t.seh_startchained";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndChained() {
+ MCStreamer::EmitWin64EHEndChained();
+
+ OS << "\t.seh_endchained";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
+ MCStreamer::EmitWin64EHHandler(Sym, Unwind, Except);
+
+ OS << "\t.seh_handler " << *Sym;
+ if (Unwind)
+ OS << ", @unwind";
+ if (Except)
+ OS << ", @except";
+ EmitEOL();
+}
+
+static const MCSection *getWin64EHTableSection(StringRef suffix,
+ MCContext &context) {
+ // FIXME: This doesn't belong in MCObjectFileInfo. However,
+ /// this duplicate code in MCWin64EH.cpp.
+ if (suffix == "")
+ return context.getObjectFileInfo()->getXDataSection();
+ return context.getCOFFSection((".xdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+void MCAsmStreamer::EmitWin64EHHandlerData() {
+ MCStreamer::EmitWin64EHHandlerData();
+
+ // Switch sections. Don't call SwitchSection directly, because that will
+ // cause the section switch to be visible in the emitted assembly.
+ // We only do this so the section switch that terminates the handler
+ // data block is visible.
+ MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo();
+ StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function);
+ const MCSection *xdataSect = getWin64EHTableSection(suffix, getContext());
+ if (xdataSect)
+ SwitchSectionNoChange(xdataSect);
+
+ OS << "\t.seh_handlerdata";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) {
+ MCStreamer::EmitWin64EHPushReg(Register);
+
+ OS << "\t.seh_pushreg " << Register;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSetFrame(Register, Offset);
+
+ OS << "\t.seh_setframe " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) {
+ MCStreamer::EmitWin64EHAllocStack(Size);
+
+ OS << "\t.seh_stackalloc " << Size;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSaveReg(Register, Offset);
+
+ OS << "\t.seh_savereg " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSaveXMM(Register, Offset);
+
+ OS << "\t.seh_savexmm " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) {
+ MCStreamer::EmitWin64EHPushFrame(Code);
+
+ OS << "\t.seh_pushframe";
+ if (Code)
+ OS << " @code";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProlog(void) {
+ MCStreamer::EmitWin64EHEndProlog();
+
+ OS << "\t.seh_endprologue";
+ EmitEOL();
+}
+
+void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
+ raw_ostream &OS = GetCommentOS();
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // If we are showing fixups, create symbolic markers in the encoded
+ // representation. We do this by making a per-bit map to the fixup item index,
+ // then trying to display it as nicely as possible.
+ SmallVector<uint8_t, 64> FixupMap;
+ FixupMap.resize(Code.size() * 8);
+ for (unsigned i = 0, e = Code.size() * 8; i != e; ++i)
+ FixupMap[i] = 0;
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+ for (unsigned j = 0; j != Info.TargetSize; ++j) {
+ unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
+ assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
+ FixupMap[Index] = 1 + i;
+ }
+ }
+
+ // FIXME: Note the fixup comments for Thumb2 are completely bogus since the
+ // high order halfword of a 32-bit Thumb2 instruction is emitted first.
+ OS << "encoding: [";
+ for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+ if (i)
+ OS << ',';
+
+ // See if all bits are the same map entry.
+ uint8_t MapEntry = FixupMap[i * 8 + 0];
+ for (unsigned j = 1; j != 8; ++j) {
+ if (FixupMap[i * 8 + j] == MapEntry)
+ continue;
+
+ MapEntry = uint8_t(~0U);
+ break;
+ }
+
+ if (MapEntry != uint8_t(~0U)) {
+ if (MapEntry == 0) {
+ OS << format("0x%02x", uint8_t(Code[i]));
+ } else {
+ if (Code[i]) {
+ // FIXME: Some of the 8 bits require fix up.
+ OS << format("0x%02x", uint8_t(Code[i])) << '\''
+ << char('A' + MapEntry - 1) << '\'';
+ } else
+ OS << char('A' + MapEntry - 1);
+ }
+ } else {
+ // Otherwise, write out in binary.
+ OS << "0b";
+ for (unsigned j = 8; j--;) {
+ unsigned Bit = (Code[i] >> j) & 1;
+
+ unsigned FixupBit;
+ if (getContext().getAsmInfo().isLittleEndian())
+ FixupBit = i * 8 + j;
+ else
+ FixupBit = i * 8 + (7-j);
+
+ if (uint8_t MapEntry = FixupMap[FixupBit]) {
+ assert(Bit == 0 && "Encoder wrote into fixed up bit!");
+ OS << char('A' + MapEntry - 1);
+ } else
+ OS << Bit;
+ }
+ }
+ }
+ OS << "]\n";
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+ OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
+ << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
+ }
+}
+
+void MCAsmStreamer::EmitFnStart() {
+ OS << "\t.fnstart";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitFnEnd() {
+ OS << "\t.fnend";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCantUnwind() {
+ OS << "\t.cantunwind";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitHandlerData() {
+ OS << "\t.handlerdata";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter->printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter->printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter->printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter->printRegName(OS, RegList[i]);
+ }
+
+ OS << "}";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS, "");
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+ OS << "\t.bundle_align_mode " << AlignPow2;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleLock(bool AlignToEnd) {
+ OS << "\t.bundle_lock";
+ if (AlignToEnd)
+ OS << " align_to_end";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleUnlock() {
+ OS << "\t.bundle_unlock";
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void MCAsmStreamer::FinishImpl() {
+ // FIXME: This header is duplicated with MCObjectStreamer
+ // Dump out the dwarf file & directory tables and line tables.
+ const MCSymbol *LineSectionSymbol = NULL;
+ if (getContext().hasDwarfFiles() && !UseLoc)
+ LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this, LineSectionSymbol);
+
+ if (!UseCFI)
+ EmitFrames(false);
+}
+MCStreamer *llvm::createAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ bool useCFI, bool useDwarfDirectory,
+ MCInstPrinter *IP, MCCodeEmitter *CE,
+ MCAsmBackend *MAB, bool ShowInst) {
+ return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, IP, CE, MAB, ShowInst);
+}
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
new file mode 100644
index 000000000000..1829266f96cb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -0,0 +1,1154 @@
+//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assembler"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments - total");
+STATISTIC(EmittedRelaxableFragments,
+ "Number of emitted assembler fragments - relaxable");
+STATISTIC(EmittedDataFragments,
+ "Number of emitted assembler fragments - data");
+STATISTIC(EmittedCompactEncodedInstFragments,
+ "Number of emitted assembler fragments - compact encoded inst");
+STATISTIC(EmittedAlignFragments,
+ "Number of emitted assembler fragments - align");
+STATISTIC(EmittedFillFragments,
+ "Number of emitted assembler fragments - fill");
+STATISTIC(EmittedOrgFragments,
+ "Number of emitted assembler fragments - org");
+STATISTIC(evaluateFixup, "Number of evaluated fixups");
+STATISTIC(FragmentLayouts, "Number of fragment layouts");
+STATISTIC(ObjectBytes, "Number of emitted object file bytes");
+STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
+STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
+}
+}
+
+// FIXME FIXME FIXME: There are number of places in this file where we convert
+// what is a 64-bit assembler value used for computation into a value in the
+// object file, which may truncate it. We should detect that truncation where
+// invalid and report errors back.
+
+/* *** */
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+ : Assembler(Asm), LastValidFragment()
+ {
+ // Compute the section layout order. Virtual sections must go last.
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (!it->getSection().isVirtualSection())
+ SectionOrder.push_back(&*it);
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (it->getSection().isVirtualSection())
+ SectionOrder.push_back(&*it);
+}
+
+bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
+ const MCSectionData &SD = *F->getParent();
+ const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+ if (!LastValid)
+ return false;
+ assert(LastValid->getParent() == F->getParent());
+ return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
+ // If this fragment wasn't already valid, we don't need to do anything.
+ if (!isFragmentValid(F))
+ return;
+
+ // Otherwise, reset the last valid fragment to the previous fragment
+ // (if this is the first fragment, it will be NULL).
+ const MCSectionData &SD = *F->getParent();
+ LastValidFragment[&SD] = F->getPrevNode();
+}
+
+void MCAsmLayout::ensureValid(const MCFragment *F) const {
+ MCSectionData &SD = *F->getParent();
+
+ MCFragment *Cur = LastValidFragment[&SD];
+ if (!Cur)
+ Cur = &*SD.begin();
+ else
+ Cur = Cur->getNextNode();
+
+ // Advance the layout position until the fragment is valid.
+ while (!isFragmentValid(F)) {
+ assert(Cur && "Layout bookkeeping error");
+ const_cast<MCAsmLayout*>(this)->layoutFragment(Cur);
+ Cur = Cur->getNextNode();
+ }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+ ensureValid(F);
+ assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+ return F->Offset;
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+ const MCSymbol &S = SD->getSymbol();
+
+ // If this is a variable, then recursively evaluate now.
+ if (S.isVariable()) {
+ MCValue Target;
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ // Verify that any used symbols are defined.
+ if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymA()->getSymbol().getName() + "'");
+ if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymB()->getSymbol().getName() + "'");
+
+ uint64_t Offset = Target.getConstant();
+ if (Target.getSymA())
+ Offset += getSymbolOffset(&Assembler.getSymbolData(
+ Target.getSymA()->getSymbol()));
+ if (Target.getSymB())
+ Offset -= getSymbolOffset(&Assembler.getSymbolData(
+ Target.getSymB()->getSymbol()));
+ return Offset;
+ }
+
+ assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+ return getFragmentOffset(SD->getFragment()) + SD->getOffset();
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+ // The size is the last fragment's end offset.
+ const MCFragment &F = SD->getFragmentList().back();
+ return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+ // Virtual sections have no file size.
+ if (SD->getSection().isVirtualSection())
+ return 0;
+
+ // Otherwise, the file size is the same as the address space size.
+ return getSectionAddressSize(SD);
+}
+
+uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
+ uint64_t FOffset, uint64_t FSize) {
+ uint64_t BundleSize = Assembler.getBundleAlignSize();
+ assert(BundleSize > 0 &&
+ "computeBundlePadding should only be called if bundling is enabled");
+ uint64_t BundleMask = BundleSize - 1;
+ uint64_t OffsetInBundle = FOffset & BundleMask;
+ uint64_t EndOfFragment = OffsetInBundle + FSize;
+
+ // There are two kinds of bundling restrictions:
+ //
+ // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
+ // *end* on a bundle boundary.
+ // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
+ // would, add padding until the end of the bundle so that the fragment
+ // will start in a new one.
+ if (F->alignToBundleEnd()) {
+ // Three possibilities here:
+ //
+ // A) The fragment just happens to end at a bundle boundary, so we're good.
+ // B) The fragment ends before the current bundle boundary: pad it just
+ // enough to reach the boundary.
+ // C) The fragment ends after the current bundle boundary: pad it until it
+ // reaches the end of the next bundle boundary.
+ //
+ // Note: this code could be made shorter with some modulo trickery, but it's
+ // intentionally kept in its more explicit form for simplicity.
+ if (EndOfFragment == BundleSize)
+ return 0;
+ else if (EndOfFragment < BundleSize)
+ return BundleSize - EndOfFragment;
+ else { // EndOfFragment > BundleSize
+ return 2 * BundleSize - EndOfFragment;
+ }
+ } else if (EndOfFragment > BundleSize)
+ return BundleSize - OffsetInBundle;
+ else
+ return 0;
+}
+
+/* *** */
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+}
+
+MCFragment::~MCFragment() {
+}
+
+MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
+ : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+{
+ if (Parent)
+ Parent->getFragmentList().push_back(this);
+}
+
+/* *** */
+
+MCEncodedFragment::~MCEncodedFragment() {
+}
+
+/* *** */
+
+MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
+}
+
+/* *** */
+
+MCSectionData::MCSectionData() : Section(0) {}
+
+MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
+ : Section(&_Section),
+ Ordinal(~UINT32_C(0)),
+ Alignment(1),
+ BundleLockState(NotBundleLocked), BundleGroupBeforeFirstInst(false),
+ HasInstructions(false)
+{
+ if (A)
+ A->getSectionList().push_back(this);
+}
+
+/* *** */
+
+MCSymbolData::MCSymbolData() : Symbol(0) {}
+
+MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
+ uint64_t _Offset, MCAssembler *A)
+ : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+ IsExternal(false), IsPrivateExtern(false),
+ CommonSize(0), SymbolSize(0), CommonAlign(0),
+ Flags(0), Index(0)
+{
+ if (A)
+ A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
+MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
+ MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+ raw_ostream &OS_)
+ : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+ OS(OS_), BundleAlignSize(0), RelaxAll(false), NoExecStack(false),
+ SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
+}
+
+MCAssembler::~MCAssembler() {
+}
+
+void MCAssembler::reset() {
+ Sections.clear();
+ Symbols.clear();
+ SectionMap.clear();
+ SymbolMap.clear();
+ IndirectSymbols.clear();
+ DataRegions.clear();
+ ThumbFuncs.clear();
+ RelaxAll = false;
+ NoExecStack = false;
+ SubsectionsViaSymbols = false;
+ ELFHeaderEFlags = 0;
+
+ // reset objects owned by us
+ getBackend().reset();
+ getEmitter().reset();
+ getWriter().reset();
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
+ // Non-temporary labels should always be visible to the linker.
+ if (!Symbol.isTemporary())
+ return true;
+
+ // Absolute temporary labels are never visible.
+ if (!Symbol.isInSection())
+ return false;
+
+ // Otherwise, check if the section requires symbols even for temporary labels.
+ return getBackend().doesSectionRequireSymbols(Symbol.getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+ // Linker visible symbols define atoms.
+ if (isSymbolLinkerVisible(SD->getSymbol()))
+ return SD;
+
+ // Absolute and undefined symbols have no defining atom.
+ if (!SD->getFragment())
+ return 0;
+
+ // Non-linker visible symbols in sections which can't be atomized have no
+ // defining atom.
+ if (!getBackend().isSectionAtomizable(
+ SD->getFragment()->getParent()->getSection()))
+ return 0;
+
+ // Otherwise, return the atom for the containing fragment.
+ return SD->getFragment()->getAtom();
+}
+
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value) const {
+ ++stats::evaluateFixup;
+
+ if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
+ getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
+
+ bool IsPCRel = Backend.getFixupKindInfo(
+ Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+ bool IsResolved;
+ if (IsPCRel) {
+ if (Target.getSymB()) {
+ IsResolved = false;
+ } else if (!Target.getSymA()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol &SA = A->getSymbol();
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ SA.AliasedSymbol().isUndefined()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolData &DataA = getSymbolData(SA);
+ IsResolved =
+ getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+ *DF, false, true);
+ }
+ }
+ } else {
+ IsResolved = Target.isAbsolute();
+ }
+
+ Value = Target.getConstant();
+
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+ }
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
+ }
+
+
+ bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+ assert((ShouldAlignPC ? IsPCRel : true) &&
+ "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
+
+ if (IsPCRel) {
+ uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+
+ // A number of ARM fixups in Thumb mode require that the effective PC
+ // address be determined as the 32-bit aligned version of the actual offset.
+ if (ShouldAlignPC) Offset &= ~0x3;
+ Value -= Offset;
+ }
+
+ // Let the backend adjust the fixup value if necessary, including whether
+ // we need a relocation.
+ Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value,
+ IsResolved);
+
+ return IsResolved;
+}
+
+uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
+ const MCFragment &F) const {
+ switch (F.getKind()) {
+ case MCFragment::FT_Data:
+ case MCFragment::FT_Relaxable:
+ case MCFragment::FT_CompactEncodedInst:
+ return cast<MCEncodedFragment>(F).getContents().size();
+ case MCFragment::FT_Fill:
+ return cast<MCFillFragment>(F).getSize();
+
+ case MCFragment::FT_LEB:
+ return cast<MCLEBFragment>(F).getContents().size();
+
+ case MCFragment::FT_Align: {
+ const MCAlignFragment &AF = cast<MCAlignFragment>(F);
+ unsigned Offset = Layout.getFragmentOffset(&AF);
+ unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+ // If we are padding with nops, force the padding to be larger than the
+ // minimum nop size.
+ if (Size > 0 && AF.hasEmitNops()) {
+ while (Size % getBackend().getMinimumNopSize())
+ Size += AF.getAlignment();
+ }
+ if (Size > AF.getMaxBytesToEmit())
+ return 0;
+ return Size;
+ }
+
+ case MCFragment::FT_Org: {
+ const MCOrgFragment &OF = cast<MCOrgFragment>(F);
+ int64_t TargetLocation;
+ if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
+ report_fatal_error("expected assembly-time absolute expression");
+
+ // FIXME: We need a way to communicate this error.
+ uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+ int64_t Size = TargetLocation - FragmentOffset;
+ if (Size < 0 || Size >= 0x40000000)
+ report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+ "' (at offset '" + Twine(FragmentOffset) + "')");
+ return Size;
+ }
+
+ case MCFragment::FT_Dwarf:
+ return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+ case MCFragment::FT_DwarfFrame:
+ return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+ }
+
+ llvm_unreachable("invalid fragment kind");
+}
+
+void MCAsmLayout::layoutFragment(MCFragment *F) {
+ MCFragment *Prev = F->getPrevNode();
+
+ // We should never try to recompute something which is valid.
+ assert(!isFragmentValid(F) && "Attempt to recompute a valid fragment!");
+ // We should never try to compute the fragment layout if its predecessor
+ // isn't valid.
+ assert((!Prev || isFragmentValid(Prev)) &&
+ "Attempt to compute fragment before its predecessor!");
+
+ ++stats::FragmentLayouts;
+
+ // Compute fragment offset and size.
+ if (Prev)
+ F->Offset = Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
+ else
+ F->Offset = 0;
+ LastValidFragment[F->getParent()] = F;
+
+ // If bundling is enabled and this fragment has instructions in it, it has to
+ // obey the bundling restrictions. With padding, we'll have:
+ //
+ //
+ // BundlePadding
+ // |||
+ // -------------------------------------
+ // Prev |##########| F |
+ // -------------------------------------
+ // ^
+ // |
+ // F->Offset
+ //
+ // The fragment's offset will point to after the padding, and its computed
+ // size won't include the padding.
+ //
+ if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
+ assert(isa<MCEncodedFragment>(F) &&
+ "Only MCEncodedFragment implementations have instructions");
+ uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
+
+ if (FSize > Assembler.getBundleAlignSize())
+ report_fatal_error("Fragment can't be larger than a bundle size");
+
+ uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
+ if (RequiredBundlePadding > UINT8_MAX)
+ report_fatal_error("Padding cannot exceed 255 bytes");
+ F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
+ F->Offset += RequiredBundlePadding;
+ }
+}
+
+/// \brief Write the contents of a fragment to the given object writer. Expects
+/// a MCEncodedFragment.
+static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
+ const MCEncodedFragment &EF = cast<MCEncodedFragment>(F);
+ OW->WriteBytes(EF.getContents());
+}
+
+/// \brief Write the fragment \p F to the output file.
+static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment &F) {
+ MCObjectWriter *OW = &Asm.getWriter();
+
+ // FIXME: Embed in fragments instead?
+ uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
+
+ // Should NOP padding be written out before this fragment?
+ unsigned BundlePadding = F.getBundlePadding();
+ if (BundlePadding > 0) {
+ assert(Asm.isBundlingEnabled() &&
+ "Writing bundle padding with disabled bundling");
+ assert(F.hasInstructions() &&
+ "Writing bundle padding for a fragment without instructions");
+
+ unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
+ if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
+ // If the padding itself crosses a bundle boundary, it must be emitted
+ // in 2 pieces, since even nop instructions must not cross boundaries.
+ // v--------------v <- BundleAlignSize
+ // v---------v <- BundlePadding
+ // ----------------------------
+ // | Prev |####|####| F |
+ // ----------------------------
+ // ^-------------------^ <- TotalLength
+ unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
+ if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
+ report_fatal_error("unable to write NOP sequence of " +
+ Twine(DistanceToBoundary) + " bytes");
+ BundlePadding -= DistanceToBoundary;
+ }
+ if (!Asm.getBackend().writeNopData(BundlePadding, OW))
+ report_fatal_error("unable to write NOP sequence of " +
+ Twine(BundlePadding) + " bytes");
+ }
+
+ // This variable (and its dummy usage) is to participate in the assert at
+ // the end of the function.
+ uint64_t Start = OW->getStream().tell();
+ (void) Start;
+
+ ++stats::EmittedFragments;
+
+ switch (F.getKind()) {
+ case MCFragment::FT_Align: {
+ ++stats::EmittedAlignFragments;
+ const MCAlignFragment &AF = cast<MCAlignFragment>(F);
+ uint64_t Count = FragmentSize / AF.getValueSize();
+
+ assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+ // FIXME: This error shouldn't actually occur (the front end should emit
+ // multiple .align directives to enforce the semantics it wants), but is
+ // severe enough that we want to report it. How to handle this?
+ if (Count * AF.getValueSize() != FragmentSize)
+ report_fatal_error("undefined .align directive, value size '" +
+ Twine(AF.getValueSize()) +
+ "' is not a divisor of padding size '" +
+ Twine(FragmentSize) + "'");
+
+ // See if we are aligning with nops, and if so do that first to try to fill
+ // the Count bytes. Then if that did not fill any bytes or there are any
+ // bytes left to fill use the Value and ValueSize to fill the rest.
+ // If we are aligning with nops, ask that target to emit the right data.
+ if (AF.hasEmitNops()) {
+ if (!Asm.getBackend().writeNopData(Count, OW))
+ report_fatal_error("unable to write nop sequence of " +
+ Twine(Count) + " bytes");
+ break;
+ }
+
+ // Otherwise, write out in multiples of the value size.
+ for (uint64_t i = 0; i != Count; ++i) {
+ switch (AF.getValueSize()) {
+ default: llvm_unreachable("Invalid size!");
+ case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+ case 2: OW->Write16(uint16_t(AF.getValue())); break;
+ case 4: OW->Write32(uint32_t(AF.getValue())); break;
+ case 8: OW->Write64(uint64_t(AF.getValue())); break;
+ }
+ }
+ break;
+ }
+
+ case MCFragment::FT_Data:
+ ++stats::EmittedDataFragments;
+ writeFragmentContents(F, OW);
+ break;
+
+ case MCFragment::FT_Relaxable:
+ ++stats::EmittedRelaxableFragments;
+ writeFragmentContents(F, OW);
+ break;
+
+ case MCFragment::FT_CompactEncodedInst:
+ ++stats::EmittedCompactEncodedInstFragments;
+ writeFragmentContents(F, OW);
+ break;
+
+ case MCFragment::FT_Fill: {
+ ++stats::EmittedFillFragments;
+ const MCFillFragment &FF = cast<MCFillFragment>(F);
+
+ assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+ for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
+ switch (FF.getValueSize()) {
+ default: llvm_unreachable("Invalid size!");
+ case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+ case 2: OW->Write16(uint16_t(FF.getValue())); break;
+ case 4: OW->Write32(uint32_t(FF.getValue())); break;
+ case 8: OW->Write64(uint64_t(FF.getValue())); break;
+ }
+ }
+ break;
+ }
+
+ case MCFragment::FT_LEB: {
+ const MCLEBFragment &LF = cast<MCLEBFragment>(F);
+ OW->WriteBytes(LF.getContents().str());
+ break;
+ }
+
+ case MCFragment::FT_Org: {
+ ++stats::EmittedOrgFragments;
+ const MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+ for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
+ OW->Write8(uint8_t(OF.getValue()));
+
+ break;
+ }
+
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+ OW->WriteBytes(OF.getContents().str());
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+ OW->WriteBytes(CF.getContents().str());
+ break;
+ }
+ }
+
+ assert(OW->getStream().tell() - Start == FragmentSize &&
+ "The stream should advance by fragment size");
+}
+
+void MCAssembler::writeSectionData(const MCSectionData *SD,
+ const MCAsmLayout &Layout) const {
+ // Ignore virtual sections.
+ if (SD->getSection().isVirtualSection()) {
+ assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+ // Check that contents are only things legal inside a virtual section.
+ for (MCSectionData::const_iterator it = SD->begin(),
+ ie = SD->end(); it != ie; ++it) {
+ switch (it->getKind()) {
+ default: llvm_unreachable("Invalid fragment in virtual section!");
+ case MCFragment::FT_Data: {
+ // Check that we aren't trying to write a non-zero contents (or fixups)
+ // into a virtual section. This is to support clients which use standard
+ // directives to fill the contents of virtual sections.
+ const MCDataFragment &DF = cast<MCDataFragment>(*it);
+ assert(DF.fixup_begin() == DF.fixup_end() &&
+ "Cannot have fixups in virtual section!");
+ for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+ assert(DF.getContents()[i] == 0 &&
+ "Invalid data value for virtual section!");
+ break;
+ }
+ case MCFragment::FT_Align:
+ // Check that we aren't trying to write a non-zero value into a virtual
+ // section.
+ assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+ !cast<MCAlignFragment>(it)->getValue()) &&
+ "Invalid align in virtual section!");
+ break;
+ case MCFragment::FT_Fill:
+ assert(!cast<MCFillFragment>(it)->getValueSize() &&
+ "Invalid fill in virtual section!");
+ break;
+ }
+ }
+
+ return;
+ }
+
+ uint64_t Start = getWriter().getStream().tell();
+ (void)Start;
+
+ for (MCSectionData::const_iterator it = SD->begin(), ie = SD->end();
+ it != ie; ++it)
+ writeFragment(*this, Layout, *it);
+
+ assert(getWriter().getStream().tell() - Start ==
+ Layout.getSectionAddressSize(SD));
+}
+
+
+uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout,
+ MCFragment &F,
+ const MCFixup &Fixup) {
+ // Evaluate the fixup.
+ MCValue Target;
+ uint64_t FixedValue;
+ if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ // The fixup was unresolved, we need a relocation. Inform the object
+ // writer of the relocation, and give it an opportunity to adjust the
+ // fixup value if need be.
+ getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+ }
+ return FixedValue;
+ }
+
+void MCAssembler::Finish() {
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - pre-layout\n--\n";
+ dump(); });
+
+ // Create the layout object.
+ MCAsmLayout Layout(*this);
+
+ // Create dummy fragments and assign section ordinals.
+ unsigned SectionIndex = 0;
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ // Create dummy fragments to eliminate any empty sections, this simplifies
+ // layout.
+ if (it->getFragmentList().empty())
+ new MCDataFragment(it);
+
+ it->setOrdinal(SectionIndex++);
+ }
+
+ // Assign layout order indices to sections and fragments.
+ for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+ SD->setLayoutOrder(i);
+
+ unsigned FragmentIndex = 0;
+ for (MCSectionData::iterator iFrag = SD->begin(), iFragEnd = SD->end();
+ iFrag != iFragEnd; ++iFrag)
+ iFrag->setLayoutOrder(FragmentIndex++);
+ }
+
+ // Layout until everything fits.
+ while (layoutOnce(Layout))
+ continue;
+
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - post-relaxation\n--\n";
+ dump(); });
+
+ // Finalize the layout, including fragment lowering.
+ finishLayout(Layout);
+
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - final-layout\n--\n";
+ dump(); });
+
+ uint64_t StartOffset = OS.tell();
+
+ // Allow the object writer a chance to perform post-layout binding (for
+ // example, to set the index fields in the symbol data).
+ getWriter().ExecutePostLayoutBinding(*this, Layout);
+
+ // Evaluate and apply the fixups, generating relocation entries as necessary.
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2) {
+ MCEncodedFragmentWithFixups *F =
+ dyn_cast<MCEncodedFragmentWithFixups>(it2);
+ if (F) {
+ for (MCEncodedFragmentWithFixups::fixup_iterator it3 = F->fixup_begin(),
+ ie3 = F->fixup_end(); it3 != ie3; ++it3) {
+ MCFixup &Fixup = *it3;
+ uint64_t FixedValue = handleFixup(Layout, *F, Fixup);
+ getBackend().applyFixup(Fixup, F->getContents().data(),
+ F->getContents().size(), FixedValue);
+ }
+ }
+ }
+ }
+
+ // Write the object file.
+ getWriter().WriteObject(*this, Layout);
+
+ stats::ObjectBytes += OS.tell() - StartOffset;
+}
+
+bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // If we cannot resolve the fixup value, it requires relaxation.
+ MCValue Target;
+ uint64_t Value;
+ if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
+ return true;
+
+ return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
+}
+
+bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F,
+ const MCAsmLayout &Layout) const {
+ // If this inst doesn't ever need relaxation, ignore it. This occurs when we
+ // are intentionally pushing out inst fragments, or because we relaxed a
+ // previous instruction to one that doesn't need relaxation.
+ if (!getBackend().mayNeedRelaxation(F->getInst()))
+ return false;
+
+ for (MCRelaxableFragment::const_fixup_iterator it = F->fixup_begin(),
+ ie = F->fixup_end(); it != ie; ++it)
+ if (fixupNeedsRelaxation(*it, F, Layout))
+ return true;
+
+ return false;
+}
+
+bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
+ MCRelaxableFragment &F) {
+ if (!fragmentNeedsRelaxation(&F, Layout))
+ return false;
+
+ ++stats::RelaxedInstructions;
+
+ // FIXME-PERF: We could immediately lower out instructions if we can tell
+ // they are fully resolved, to avoid retesting on later passes.
+
+ // Relax the fragment.
+
+ MCInst Relaxed;
+ getBackend().relaxInstruction(F.getInst(), Relaxed);
+
+ // Encode the new instruction.
+ //
+ // FIXME-PERF: If it matters, we could let the target do this. It can
+ // probably do so more efficiently in many cases.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+ VecOS.flush();
+
+ // Update the fragment.
+ F.setInst(Relaxed);
+ F.getContents() = Code;
+ F.getFixups() = Fixups;
+
+ return true;
+}
+
+bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+ int64_t Value = 0;
+ uint64_t OldSize = LF.getContents().size();
+ bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ SmallString<8> &Data = LF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ if (LF.isSigned())
+ encodeSLEB128(Value, OSE);
+ else
+ encodeULEB128(Value, OSE);
+ OSE.flush();
+ return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
+ MCDwarfLineAddrFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ int64_t LineDelta;
+ LineDelta = DF.getLineDelta();
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+ MCDwarfCallFrameFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) {
+ // Holds the first fragment which needed relaxing during this layout. It will
+ // remain NULL if none were relaxed.
+ // When a fragment is relaxed, all the fragments following it should get
+ // invalidated because their offset is going to change.
+ MCFragment *FirstRelaxedFragment = NULL;
+
+ // Attempt to relax all the fragments in the section.
+ for (MCSectionData::iterator I = SD.begin(), IE = SD.end(); I != IE; ++I) {
+ // Check if this is a fragment that needs relaxation.
+ bool RelaxedFrag = false;
+ switch(I->getKind()) {
+ default:
+ break;
+ case MCFragment::FT_Relaxable:
+ assert(!getRelaxAll() &&
+ "Did not expect a MCRelaxableFragment in RelaxAll mode");
+ RelaxedFrag = relaxInstruction(Layout, *cast<MCRelaxableFragment>(I));
+ break;
+ case MCFragment::FT_Dwarf:
+ RelaxedFrag = relaxDwarfLineAddr(Layout,
+ *cast<MCDwarfLineAddrFragment>(I));
+ break;
+ case MCFragment::FT_DwarfFrame:
+ RelaxedFrag =
+ relaxDwarfCallFrameFragment(Layout,
+ *cast<MCDwarfCallFrameFragment>(I));
+ break;
+ case MCFragment::FT_LEB:
+ RelaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(I));
+ break;
+ }
+ if (RelaxedFrag && !FirstRelaxedFragment)
+ FirstRelaxedFragment = I;
+ }
+ if (FirstRelaxedFragment) {
+ Layout.invalidateFragmentsFrom(FirstRelaxedFragment);
+ return true;
+ }
+ return false;
+}
+
+bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
+ ++stats::RelaxationSteps;
+
+ bool WasRelaxed = false;
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ MCSectionData &SD = *it;
+ while (layoutSectionOnce(Layout, SD))
+ WasRelaxed = true;
+ }
+
+ return WasRelaxed;
+}
+
+void MCAssembler::finishLayout(MCAsmLayout &Layout) {
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
+ }
+}
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+ OS << "<MCFixup" << " Offset:" << AF.getOffset()
+ << " Value:" << *AF.getValue()
+ << " Kind:" << AF.getKind() << ">";
+ return OS;
+}
+
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<";
+ switch (getKind()) {
+ case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+ case MCFragment::FT_Data: OS << "MCDataFragment"; break;
+ case MCFragment::FT_CompactEncodedInst:
+ OS << "MCCompactEncodedInstFragment"; break;
+ case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
+ case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break;
+ case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
+ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+ case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+ case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
+ }
+
+ OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+ << " Offset:" << Offset
+ << " HasInstructions:" << hasInstructions()
+ << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
+
+ switch (getKind()) {
+ case MCFragment::FT_Align: {
+ const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+ if (AF->hasEmitNops())
+ OS << " (emit nops)";
+ OS << "\n ";
+ OS << " Alignment:" << AF->getAlignment()
+ << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+ << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+ break;
+ }
+ case MCFragment::FT_Data: {
+ const MCDataFragment *DF = cast<MCDataFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = DF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << Contents.size() << " bytes)";
+
+ if (DF->fixup_begin() != DF->fixup_end()) {
+ OS << ",\n ";
+ OS << " Fixups:[";
+ for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+ ie = DF->fixup_end(); it != ie; ++it) {
+ if (it != DF->fixup_begin()) OS << ",\n ";
+ OS << *it;
+ }
+ OS << "]";
+ }
+ break;
+ }
+ case MCFragment::FT_CompactEncodedInst: {
+ const MCCompactEncodedInstFragment *CEIF =
+ cast<MCCompactEncodedInstFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = CEIF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << Contents.size() << " bytes)";
+ break;
+ }
+ case MCFragment::FT_Fill: {
+ const MCFillFragment *FF = cast<MCFillFragment>(this);
+ OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+ << " Size:" << FF->getSize();
+ break;
+ }
+ case MCFragment::FT_Relaxable: {
+ const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
+ OS << "\n ";
+ OS << " Inst:";
+ F->getInst().dump_pretty(OS);
+ break;
+ }
+ case MCFragment::FT_Org: {
+ const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+ OS << "\n ";
+ OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+ break;
+ }
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << OF->getAddrDelta()
+ << " LineDelta:" << OF->getLineDelta();
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << CF->getAddrDelta();
+ break;
+ }
+ case MCFragment::FT_LEB: {
+ const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+ OS << "\n ";
+ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+ break;
+ }
+ }
+ OS << ">";
+}
+
+void MCSectionData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSectionData";
+ OS << " Alignment:" << getAlignment()
+ << " Fragments:[\n ";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>";
+}
+
+void MCSymbolData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSymbolData Symbol:" << getSymbol()
+ << " Fragment:" << getFragment() << " Offset:" << getOffset()
+ << " Flags:" << getFlags() << " Index:" << getIndex();
+ if (isCommon())
+ OS << " (common, size:" << getCommonSize()
+ << " align: " << getCommonAlignment() << ")";
+ if (isExternal())
+ OS << " (external)";
+ if (isPrivateExtern())
+ OS << " (private extern)";
+ OS << ">";
+}
+
+void MCAssembler::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCAssembler\n";
+ OS << " Sections:[\n ";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "],\n";
+ OS << " Symbols:[";
+
+ for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+ if (it != symbol_begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>\n";
+}
+#endif
+
+// anchors for MC*Fragment vtables
+void MCEncodedFragment::anchor() { }
+void MCEncodedFragmentWithFixups::anchor() { }
+void MCDataFragment::anchor() { }
+void MCCompactEncodedInstFragment::anchor() { }
+void MCRelaxableFragment::anchor() { }
+void MCAlignFragment::anchor() { }
+void MCFillFragment::anchor() { }
+void MCOrgFragment::anchor() { }
+void MCLEBFragment::anchor() { }
+void MCDwarfLineAddrFragment::anchor() { }
+void MCDwarfCallFrameFragment::anchor() { }
diff --git a/contrib/llvm/lib/MC/MCAtom.cpp b/contrib/llvm/lib/MC/MCAtom.cpp
new file mode 100644
index 000000000000..d71444324f3d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAtom.cpp
@@ -0,0 +1,97 @@
+//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
+ assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
+
+ assert(Address < End+Size &&
+ "Instruction not contiguous with end of atom!");
+ if (Address > End)
+ Parent->remap(this, Begin, End+Size);
+
+ Text.push_back(std::make_pair(Address, I));
+}
+
+void MCAtom::addData(const MCData &D) {
+ assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
+ Parent->remap(this, Begin, End+1);
+
+ Data.push_back(D);
+}
+
+MCAtom *MCAtom::split(uint64_t SplitPt) {
+ assert((SplitPt > Begin && SplitPt <= End) &&
+ "Splitting at point not contained in atom!");
+
+ // Compute the new begin/end points.
+ uint64_t LeftBegin = Begin;
+ uint64_t LeftEnd = SplitPt - 1;
+ uint64_t RightBegin = SplitPt;
+ uint64_t RightEnd = End;
+
+ // Remap this atom to become the lower of the two new ones.
+ Parent->remap(this, LeftBegin, LeftEnd);
+
+ // Create a new atom for the higher atom.
+ MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+
+ // Split the contents of the original atom between it and the new one. The
+ // precise method depends on whether this is a data or a text atom.
+ if (isDataAtom()) {
+ std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+
+ assert(I != Data.end() && "Split point not found in range!");
+
+ std::copy(I, Data.end(), RightAtom->Data.end());
+ Data.erase(I, Data.end());
+ } else if (isTextAtom()) {
+ std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+
+ while (I != Text.end() && I->first < SplitPt) ++I;
+
+ assert(I != Text.end() && "Split point not found in disassembly!");
+ assert(I->first == SplitPt &&
+ "Split point does not fall on instruction boundary!");
+
+ std::copy(I, Text.end(), RightAtom->Text.end());
+ Text.erase(I, Text.end());
+ } else
+ llvm_unreachable("Unknown atom type!");
+
+ return RightAtom;
+}
+
+void MCAtom::truncate(uint64_t TruncPt) {
+ assert((TruncPt >= Begin && TruncPt < End) &&
+ "Truncation point not contained in atom!");
+
+ Parent->remap(this, Begin, TruncPt);
+
+ if (isDataAtom()) {
+ Data.resize(TruncPt - Begin + 1);
+ } else if (isTextAtom()) {
+ std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+
+ while (I != Text.end() && I->first <= TruncPt) ++I;
+
+ assert(I != Text.end() && "Truncation point not found in disassembly!");
+ assert(I->first == TruncPt+1 &&
+ "Truncation point does not fall on instruction boundary");
+
+ Text.erase(I, Text.end());
+ } else
+ llvm_unreachable("Unknown atom type!");
+}
+
diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
new file mode 100644
index 000000000000..c122763b2fe5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
@@ -0,0 +1,18 @@
+//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeEmitter.h"
+
+using namespace llvm;
+
+MCCodeEmitter::MCCodeEmitter() {
+}
+
+MCCodeEmitter::~MCCodeEmitter() {
+}
diff --git a/contrib/llvm/lib/MC/MCCodeGenInfo.cpp b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp
new file mode 100644
index 000000000000..d9dcfd0614bc
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MCCodeGenInfo.cpp - Target CodeGen Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tracks information about the target which can affect codegen,
+// asm parsing, and asm printing. For example, relocation model.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeGenInfo.h"
+using namespace llvm;
+
+void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ RelocationModel = RM;
+ CMModel = CM;
+ OptLevel = OL;
+}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
new file mode 100644
index 000000000000..9adcc02b71a4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -0,0 +1,390 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+
+MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
+ const MCObjectFileInfo *mofi, const SourceMgr *mgr,
+ bool DoAutoReset) :
+ SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
+ Allocator(), Symbols(Allocator), UsedNames(Allocator),
+ NextUniqueID(0),
+ CompilationDir(llvm::sys::Path::GetCurrentDirectory().str()),
+ CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
+ DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0),
+ AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) {
+
+ MachOUniquingMap = 0;
+ ELFUniquingMap = 0;
+ COFFUniquingMap = 0;
+
+ SecureLogFile = getenv("AS_SECURE_LOG_FILE");
+ SecureLog = 0;
+ SecureLogUsed = false;
+
+ if (SrcMgr && SrcMgr->getNumBuffers() > 0)
+ MainFileName = SrcMgr->getMemoryBuffer(0)->getBufferIdentifier();
+ else
+ MainFileName = "";
+}
+
+MCContext::~MCContext() {
+
+ if (AutoReset)
+ reset();
+
+ // NOTE: The symbols are all allocated out of a bump pointer allocator,
+ // we don't need to free them here.
+
+ // If the stream for the .secure_log_unique directive was created free it.
+ delete (raw_ostream*)SecureLog;
+}
+
+//===----------------------------------------------------------------------===//
+// Module Lifetime Management
+//===----------------------------------------------------------------------===//
+
+void MCContext::reset() {
+ UsedNames.clear();
+ Symbols.clear();
+ Allocator.Reset();
+ Instances.clear();
+ MCDwarfFilesCUMap.clear();
+ MCDwarfDirsCUMap.clear();
+ MCGenDwarfLabelEntries.clear();
+ DwarfDebugFlags = StringRef();
+ MCLineSections.clear();
+ MCLineSectionOrder.clear();
+ DwarfCompileUnitID = 0;
+ MCLineTableSymbols.clear();
+ CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0);
+
+ // If we have the MachO uniquing map, free it.
+ delete (MachOUniqueMapTy*)MachOUniquingMap;
+ delete (ELFUniqueMapTy*)ELFUniquingMap;
+ delete (COFFUniqueMapTy*)COFFUniquingMap;
+ MachOUniquingMap = 0;
+ ELFUniquingMap = 0;
+ COFFUniquingMap = 0;
+
+ NextUniqueID = 0;
+ AllowTemporaryLabels = true;
+ DwarfLocSeen = false;
+ GenDwarfForAssembly = false;
+ GenDwarfFileNumber = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
+ assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+
+ // Do the lookup and get the entire StringMapEntry. We want access to the
+ // key if we are creating the entry.
+ StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
+ MCSymbol *Sym = Entry.getValue();
+
+ if (Sym)
+ return Sym;
+
+ Sym = CreateSymbol(Name);
+ Entry.setValue(Sym);
+ return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+ // Determine whether this is an assembler temporary or normal label, if used.
+ bool isTemporary = false;
+ if (AllowTemporaryLabels)
+ isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+ StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+ if (NameEntry->getValue()) {
+ assert(isTemporary && "Cannot rename non temporary symbols");
+ SmallString<128> NewName = Name;
+ do {
+ NewName.resize(Name.size());
+ raw_svector_ostream(NewName) << NextUniqueID++;
+ NameEntry = &UsedNames.GetOrCreateValue(NewName);
+ } while (NameEntry->getValue());
+ }
+ NameEntry->setValue(true);
+
+ // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer
+ // to the copy of the string that is embedded in the UsedNames entry.
+ MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+ return Result;
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+ SmallString<128> NameSV;
+ Name.toVector(NameSV);
+ return GetOrCreateSymbol(NameSV.str());
+}
+
+MCSymbol *MCContext::CreateTempSymbol() {
+ SmallString<128> NameSV;
+ raw_svector_ostream(NameSV)
+ << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
+ return CreateSymbol(NameSV);
+}
+
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+ int bORf) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
+ return Symbols.lookup(Name);
+}
+
+MCSymbol *MCContext::LookupSymbol(const Twine &Name) const {
+ SmallString<128> NameSV;
+ Name.toVector(NameSV);
+ return LookupSymbol(NameSV.str());
+}
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes,
+ unsigned Reserved2, SectionKind Kind) {
+
+ // We unique sections by their segment/section pair. The returned section
+ // may not have the same flags as the requested section, if so this should be
+ // diagnosed by the client as an error.
+
+ // Create the map if it doesn't already exist.
+ if (MachOUniquingMap == 0)
+ MachOUniquingMap = new MachOUniqueMapTy();
+ MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+
+ // Form the name to look up.
+ SmallString<64> Name;
+ Name += Segment;
+ Name.push_back(',');
+ Name += Section;
+
+ // Do the lookup, if we have a hit, return it.
+ const MCSectionMachO *&Entry = Map[Name.str()];
+ if (Entry) return Entry;
+
+ // Otherwise, return a new section.
+ return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+ Reserved2, Kind);
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind, unsigned EntrySize, StringRef Group) {
+ if (ELFUniquingMap == 0)
+ ELFUniquingMap = new ELFUniqueMapTy();
+ ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+ if (Entry.getValue()) return Entry.getValue();
+
+ // Possibly refine the entry size first.
+ if (!EntrySize) {
+ EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+ }
+
+ MCSymbol *GroupSym = NULL;
+ if (!Group.empty())
+ GroupSym = GetOrCreateSymbol(Group);
+
+ MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+ Kind, EntrySize, GroupSym);
+ Entry.setValue(Result);
+ return Result;
+}
+
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+ MCSectionELF *Result =
+ new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+ SectionKind::getReadOnly(), 4, NULL);
+ return Result;
+}
+
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+ unsigned Characteristics,
+ int Selection,
+ SectionKind Kind) {
+ if (COFFUniquingMap == 0)
+ COFFUniquingMap = new COFFUniqueMapTy();
+ COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+ if (Entry.getValue()) return Entry.getValue();
+
+ MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+ Characteristics,
+ Selection, Kind);
+
+ Entry.setValue(Result);
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables. If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned. The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
+ unsigned FileNumber, unsigned CUID) {
+ // TODO: a FileNumber of zero says to use the next available file number.
+ // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+ // to not be less than one. This needs to be change to be not less than zero.
+
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
+ // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+ if (FileNumber >= MCDwarfFiles.size()) {
+ MCDwarfFiles.resize(FileNumber + 1);
+ } else {
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ // It is an error to use see the same number more than once.
+ return 0;
+ }
+
+ // Get the new MCDwarfFile slot for this FileNumber.
+ MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+ if (Directory.empty()) {
+ // Separate the directory part from the basename of the FileName.
+ StringRef tFileName = sys::path::filename(FileName);
+ if (!tFileName.empty()) {
+ Directory = sys::path::parent_path(FileName);
+ if (!Directory.empty())
+ FileName = tFileName;
+ }
+ }
+
+ // Find or make a entry in the MCDwarfDirs vector for this Directory.
+ // Capture directory name.
+ unsigned DirIndex;
+ if (Directory.empty()) {
+ // For FileNames with no directories a DirIndex of 0 is used.
+ DirIndex = 0;
+ } else {
+ DirIndex = 0;
+ for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
+ if (Directory == MCDwarfDirs[DirIndex])
+ break;
+ }
+ if (DirIndex >= MCDwarfDirs.size()) {
+ char *Buf = static_cast<char *>(Allocate(Directory.size()));
+ memcpy(Buf, Directory.data(), Directory.size());
+ MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+ }
+ // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+ // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+ // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames
+ // are stored at MCDwarfFiles[FileNumber].Name .
+ DirIndex++;
+ }
+
+ // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+ // vector.
+ char *Buf = static_cast<char *>(Allocate(FileName.size()));
+ memcpy(Buf, FileName.data(), FileName.size());
+ File = new (*this) MCDwarfFile(StringRef(Buf, FileName.size()), DirIndex);
+
+ // return the allocated FileNumber.
+ return FileNumber;
+}
+
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+ return false;
+
+ return MCDwarfFiles[FileNumber] != 0;
+}
+
+void MCContext::FatalError(SMLoc Loc, const Twine &Msg) {
+ // If we have a source manager and a location, use it. Otherwise just
+ // use the generic report_fatal_error().
+ if (!SrcMgr || Loc == SMLoc())
+ report_fatal_error(Msg);
+
+ // Use the source manager to print the message.
+ SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+
+ // If we reached here, we are failing ungracefully. Run the interrupt handlers
+ // to make sure any special cleanups get done, in particular that we remove
+ // files registered with RemoveFileOnSignal.
+ sys::RunInterruptHandlers();
+ exit(1);
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp
new file mode 100644
index 000000000000..08096906462f
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler.cpp
@@ -0,0 +1,14 @@
+//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+using namespace llvm;
+
+MCDisassembler::~MCDisassembler() {
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
new file mode 100644
index 000000000000..4766b3747635
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -0,0 +1,223 @@
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Disassembler.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+class Target;
+} // namespace llvm
+using namespace llvm;
+
+// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic
+// disassembly is supported by passing a block of information in the DisInfo
+// parameter and specifying the TagType and callback functions as described in
+// the header llvm-c/Disassembler.h . The pointer to the block and the
+// functions can all be passed as NULL. If successful, this returns a
+// disassembler context. If not, it returns NULL.
+//
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+ void *DisInfo, int TagType,
+ LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp){
+ // Get the target.
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+ assert(TheTarget && "Unable to create target!");
+
+ // Get the assembler info needed to setup the MCContext.
+ const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
+ if (!MAI)
+ return 0;
+
+ const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+ if (!MII)
+ return 0;
+
+ const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+ if (!MRI)
+ return 0;
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+
+ const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
+ FeaturesStr);
+ if (!STI)
+ return 0;
+
+ // Set up the MCContext for creating symbols and MCExpr's.
+ MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
+ if (!Ctx)
+ return 0;
+
+ // Set up disassembler.
+ MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
+ if (!DisAsm)
+ return 0;
+ DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
+
+ // Set up the instruction printer.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
+ *MAI, *MII, *MRI, *STI);
+ if (!IP)
+ return 0;
+
+ LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
+ GetOpInfo, SymbolLookUp,
+ TheTarget, MAI, MRI,
+ STI, MII, Ctx, DisAsm, IP);
+ if (!DC)
+ return 0;
+
+ return DC;
+}
+
+LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+ int TagType, LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp) {
+ return LLVMCreateDisasmCPU(Triple, "", DisInfo, TagType, GetOpInfo,
+ SymbolLookUp);
+}
+
+//
+// LLVMDisasmDispose() disposes of the disassembler specified by the context.
+//
+void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ delete DC;
+}
+
+namespace {
+//
+// The memory object created by LLVMDisasmInstruction().
+//
+class DisasmMemoryObject : public MemoryObject {
+ uint8_t *Bytes;
+ uint64_t Size;
+ uint64_t BasePC;
+public:
+ DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+ Bytes(bytes), Size(size), BasePC(basePC) {}
+
+ uint64_t getBase() const { return BasePC; }
+ uint64_t getExtent() const { return Size; }
+
+ int readByte(uint64_t Addr, uint8_t *Byte) const {
+ if (Addr - BasePC >= Size)
+ return -1;
+ *Byte = Bytes[Addr - BasePC];
+ return 0;
+ }
+};
+} // end anonymous namespace
+
+//
+// LLVMDisasmInstruction() disassembles a single instruction using the
+// disassembler context specified in the parameter DC. The bytes of the
+// instruction are specified in the parameter Bytes, and contains at least
+// BytesSize number of bytes. The instruction is at the address specified by
+// the PC parameter. If a valid instruction can be disassembled its string is
+// returned indirectly in OutString which whos size is specified in the
+// parameter OutStringSize. This function returns the number of bytes in the
+// instruction or zero if there was no valid instruction. If this function
+// returns zero the caller will have to pick how many bytes they want to step
+// over by printing a .byte, .long etc. to continue.
+//
+size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
+ uint64_t BytesSize, uint64_t PC, char *OutString,
+ size_t OutStringSize){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
+ DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+
+ uint64_t Size;
+ MCInst Inst;
+ const MCDisassembler *DisAsm = DC->getDisAsm();
+ MCInstPrinter *IP = DC->getIP();
+ MCDisassembler::DecodeStatus S;
+ S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ /*REMOVE*/ nulls(), DC->CommentStream);
+ switch (S) {
+ case MCDisassembler::Fail:
+ case MCDisassembler::SoftFail:
+ // FIXME: Do something different for soft failure modes?
+ return 0;
+
+ case MCDisassembler::Success: {
+ DC->CommentStream.flush();
+ StringRef Comments = DC->CommentsToEmit.str();
+
+ SmallVector<char, 64> InsnStr;
+ raw_svector_ostream OS(InsnStr);
+ IP->printInst(&Inst, OS, Comments);
+ OS.flush();
+
+ // Tell the comment stream that the vector changed underneath it.
+ DC->CommentsToEmit.clear();
+ DC->CommentStream.resync();
+
+ assert(OutStringSize != 0 && "Output buffer cannot be zero size");
+ size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
+ std::memcpy(OutString, InsnStr.data(), OutputSize);
+ OutString[OutputSize] = '\0'; // Terminate string.
+
+ return Size;
+ }
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+//
+// LLVMSetDisasmOptions() sets the disassembler's options. It returns 1 if it
+// can set all the Options and 0 otherwise.
+//
+int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
+ if (Options & LLVMDisassembler_Option_UseMarkup){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ MCInstPrinter *IP = DC->getIP();
+ IP->setUseMarkup(1);
+ Options &= ~LLVMDisassembler_Option_UseMarkup;
+ }
+ if (Options & LLVMDisassembler_Option_PrintImmHex){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ MCInstPrinter *IP = DC->getIP();
+ IP->setPrintImmHex(1);
+ Options &= ~LLVMDisassembler_Option_PrintImmHex;
+ }
+ if (Options & LLVMDisassembler_Option_AsmPrinterVariant){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ // Try to set up the new instruction printer.
+ const MCAsmInfo *MAI = DC->getAsmInfo();
+ const MCInstrInfo *MII = DC->getInstrInfo();
+ const MCRegisterInfo *MRI = DC->getRegisterInfo();
+ const MCSubtargetInfo *STI = DC->getSubtargetInfo();
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0;
+ MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter(
+ AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+ if (IP) {
+ DC->setIP(IP);
+ Options &= ~LLVMDisassembler_Option_AsmPrinterVariant;
+ }
+ }
+ return (Options == 0);
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
new file mode 100644
index 000000000000..6eb59d0c57be
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
@@ -0,0 +1,121 @@
+//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Disassembly library's disassembler
+// context. The disassembler is responsible for producing strings for
+// individual instructions according to a given architecture and disassembly
+// syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_DISASSEMBLER_H
+#define LLVM_MC_DISASSEMBLER_H
+
+#include "llvm-c/Disassembler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+class MCContext;
+class MCAsmInfo;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+//
+// This is the disassembler context returned by LLVMCreateDisasm().
+//
+class LLVMDisasmContext {
+private:
+ //
+ // The passed parameters when the disassembler context is created.
+ //
+ // The TripleName for this disassembler.
+ std::string TripleName;
+ // The pointer to the caller's block of symbolic information.
+ void *DisInfo;
+ // The Triple specific symbolic information type returned by GetOpInfo.
+ int TagType;
+ // The function to get the symbolic information for operands.
+ LLVMOpInfoCallback GetOpInfo;
+ // The function to look up a symbol name.
+ LLVMSymbolLookupCallback SymbolLookUp;
+ //
+ // The objects created and saved by LLVMCreateDisasm() then used by
+ // LLVMDisasmInstruction().
+ //
+ // The LLVM target corresponding to the disassembler.
+ // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error
+ // when this LLVMDisasmContext is deleted.
+ const Target *TheTarget;
+ // The assembly information for the target architecture.
+ llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
+ // The register information for the target architecture.
+ llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
+ // The subtarget information for the target architecture.
+ llvm::OwningPtr<const llvm::MCSubtargetInfo> MSI;
+ // The instruction information for the target architecture.
+ llvm::OwningPtr<const llvm::MCInstrInfo> MII;
+ // The assembly context for creating symbols and MCExprs.
+ llvm::OwningPtr<const llvm::MCContext> Ctx;
+ // The disassembler for the target architecture.
+ llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
+ // The instruction printer for the target architecture.
+ llvm::OwningPtr<llvm::MCInstPrinter> IP;
+
+public:
+ // Comment stream and backing vector.
+ SmallString<128> CommentsToEmit;
+ raw_svector_ostream CommentStream;
+
+ LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
+ LLVMOpInfoCallback getOpInfo,
+ LLVMSymbolLookupCallback symbolLookUp,
+ const Target *theTarget, const MCAsmInfo *mAI,
+ const MCRegisterInfo *mRI,
+ const MCSubtargetInfo *mSI,
+ const MCInstrInfo *mII,
+ llvm::MCContext *ctx, const MCDisassembler *disAsm,
+ MCInstPrinter *iP) : TripleName(tripleName),
+ DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
+ SymbolLookUp(symbolLookUp), TheTarget(theTarget),
+ CommentStream(CommentsToEmit) {
+ MAI.reset(mAI);
+ MRI.reset(mRI);
+ MSI.reset(mSI);
+ MII.reset(mII);
+ Ctx.reset(ctx);
+ DisAsm.reset(disAsm);
+ IP.reset(iP);
+ }
+ const std::string &getTripleName() const { return TripleName; }
+ void *getDisInfo() const { return DisInfo; }
+ int getTagType() const { return TagType; }
+ LLVMOpInfoCallback getGetOpInfo() const { return GetOpInfo; }
+ LLVMSymbolLookupCallback getSymbolLookupCallback() const {
+ return SymbolLookUp;
+ }
+ const Target *getTarget() const { return TheTarget; }
+ const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+ const MCAsmInfo *getAsmInfo() const { return MAI.get(); }
+ const MCInstrInfo *getInstrInfo() const { return MII.get(); }
+ const MCRegisterInfo *getRegisterInfo() const { return MRI.get(); }
+ const MCSubtargetInfo *getSubtargetInfo() const { return MSI.get(); }
+ MCInstPrinter *getIP() { return IP.get(); }
+ void setIP(MCInstPrinter *NewIP) { IP.reset(NewIP); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
new file mode 100644
index 000000000000..0f8f0741bd7c
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -0,0 +1,1558 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
+#define DWARF2_LINE_OPCODE_BASE 13
+
+// Minimum line offset in a special line info. opcode. This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE -5
+
+// Range of line offsets in a special line info. opcode.
+#define DWARF2_LINE_RANGE 14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+#define DWARF2_LINE_MIN_INSN_LENGTH 1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) {
+ if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+ return AddrDelta;
+ if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+ // TODO: report this error, but really only once.
+ ;
+ }
+ return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+ if (!MCOS->getContext().getDwarfLocSeen())
+ return;
+
+ // Create a symbol at in the current section for use in the line entry.
+ MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+ // Set the value of the symbol to use for the MCLineEntry.
+ MCOS->EmitLabel(LineSym);
+
+ // Get the current .loc info saved in the context.
+ const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+ // Create a (local) line entry with the symbol and the current .loc info.
+ MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+ // clear DwarfLocSeen saying the current .loc info is now used.
+ MCOS->getContext().ClearDwarfLocSeen();
+
+ // Get the MCLineSection for this section, if one does not exist for this
+ // section create it.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ MCLineSection *LineSection = MCLineSections.lookup(Section);
+ if (!LineSection) {
+ // Create a new MCLineSection. This will be deleted after the dwarf line
+ // table is created using it by iterating through the MCLineSections
+ // DenseMap.
+ LineSection = new MCLineSection;
+ // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+ MCOS->getContext().addMCLineSection(Section, LineSection);
+ }
+
+ // Add the line entry to this section's entries.
+ LineSection->addLineEntry(LineEntry,
+ MCOS->getContext().getDwarfCompileUnitID());
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+//
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+ const MCSymbol &Start,
+ const MCSymbol &End,
+ int IntVal) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+ const MCExpr *RHS =
+ MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+ const MCExpr *Res1 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+ const MCExpr *Res2 =
+ MCConstantExpr::Create(IntVal, MCOS.getContext());
+ const MCExpr *Res3 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+ return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+ const MCSection *Section,
+ const MCLineSection *LineSection,
+ unsigned CUID) {
+ // This LineSection does not contain any LineEntry for the given Compile Unit.
+ if (!LineSection->containEntriesForID(CUID))
+ return;
+
+ unsigned FileNum = 1;
+ unsigned LastLine = 1;
+ unsigned Column = 0;
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ unsigned Isa = 0;
+ MCSymbol *LastLabel = NULL;
+
+ // Loop through each MCLineEntry and encode the dwarf line number table.
+ for (MCLineSection::const_iterator
+ it = LineSection->getMCLineEntries(CUID).begin(),
+ ie = LineSection->getMCLineEntries(CUID).end(); it != ie; ++it) {
+
+ if (FileNum != it->getFileNum()) {
+ FileNum = it->getFileNum();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+ MCOS->EmitULEB128IntValue(FileNum);
+ }
+ if (Column != it->getColumn()) {
+ Column = it->getColumn();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+ MCOS->EmitULEB128IntValue(Column);
+ }
+ if (Isa != it->getIsa()) {
+ Isa = it->getIsa();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+ MCOS->EmitULEB128IntValue(Isa);
+ }
+ if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+ Flags = it->getFlags();
+ MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+ }
+ if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+ if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+ if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+ int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+ MCSymbol *Label = it->getLabel();
+
+ // At this point we want to emit/create the sequence to encode the delta in
+ // line numbers and the increment of the address from the previous Label
+ // and the current Label.
+ const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+ MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+ asmInfo.getPointerSize());
+
+ LastLine = it->getLine();
+ LastLabel = Label;
+ }
+
+ // Emit a DW_LNE_end_sequence for the end of the section.
+ // Using the pointer Section create a temporary label at the end of the
+ // section and use that and the LastLabel to compute the address delta
+ // and use INT64_MAX as the line delta which is the signal that this is
+ // actually a DW_LNE_end_sequence.
+
+ // Switch to the section to be able to create a symbol at its end.
+ MCOS->SwitchSection(Section);
+
+ MCContext &context = MCOS->getContext();
+ // Create a symbol at the end of the section.
+ MCSymbol *SectionEnd = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the end of the section.
+ MCOS->EmitLabel(SectionEnd);
+
+ // Switch back the dwarf line section.
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
+
+ const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+ MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
+ asmInfo.getPointerSize());
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ // Switch to the section where the table will be emitted into.
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
+
+ const DenseMap<unsigned, MCSymbol *> &MCLineTableSymbols =
+ MCOS->getContext().getMCLineTableSymbols();
+ // CUID and MCLineTableSymbols are set in DwarfDebug, when DwarfDebug does
+ // not exist, CUID will be 0 and MCLineTableSymbols will be empty.
+ // Handle Compile Unit 0, the line table start symbol is the section symbol.
+ const MCSymbol *LineStartSym = EmitCU(MCOS, 0);
+ // Handle the rest of the Compile Units.
+ for (unsigned Is = 1, Ie = MCLineTableSymbols.size(); Is < Ie; Is++)
+ EmitCU(MCOS, Is);
+
+ // Now delete the MCLineSections that were created in MCLineEntry::Make()
+ // and used to emit the line table.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ for (DenseMap<const MCSection *, MCLineSection *>::const_iterator it =
+ MCLineSections.begin(), ie = MCLineSections.end(); it != ie;
+ ++it)
+ delete it->second;
+
+ return LineStartSym;
+}
+
+const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
+ MCContext &context = MCOS->getContext();
+
+ // Create a symbol at the beginning of the line table.
+ MCSymbol *LineStartSym = MCOS->getContext().getMCLineTableSymbol(CUID);
+ if (!LineStartSym)
+ LineStartSym = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the start of the line table.
+ MCOS->EmitLabel(LineStartSym);
+
+ // Create a symbol for the end of the section (to be set when we get there).
+ MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+ // The first 4 bytes is the total length of the information for this
+ // compilation unit (not including these 4 bytes for the length).
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+ 4);
+
+ // Next 2 bytes is the Version, which is Dwarf 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // Create a symbol for the end of the prologue (to be set when we get there).
+ MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+ // Length of the prologue, is the next 4 bytes. Which is the start of the
+ // section to the end of the prologue. Not including the 4 bytes for the
+ // total length, the 2 bytes for the version, and these 4 bytes for the
+ // length of the prologue.
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+ (4 + 2 + 4)), 4, 0);
+
+ // Parameters of the state machine, are next.
+ MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+ // Standard opcode lengths
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+ MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+ // Put out the directory and file tables.
+
+ // First the directory table.
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs(CUID);
+ for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+ MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
+ MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+ // Second the file table.
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles(CUID);
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
+ MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
+ // the Directory num
+ MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+ MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+ MCOS->EmitIntValue(0, 1); // filesize (always 0)
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+ // This is the end of the prologue, so set the value of the symbol at the
+ // end of the prologue (that was used in a previous expression).
+ MCOS->EmitLabel(ProEndSym);
+
+ // Put out the line tables.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ const std::vector<const MCSection *> &MCLineSectionOrder =
+ MCOS->getContext().getMCLineSectionOrder();
+ for (std::vector<const MCSection*>::const_iterator it =
+ MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+ ++it) {
+ const MCSection *Sec = *it;
+ const MCLineSection *Line = MCLineSections.lookup(Sec);
+ EmitDwarfLineTable(MCOS, Sec, Line, CUID);
+ }
+
+ if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+ && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+ // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+ // it requires:
+ // total_length >= prologue_length + 10
+ // We are 4 bytes short, since we have total_length = 51 and
+ // prologue_length = 45
+
+ // The regular end_sequence should be sufficient.
+ MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+ }
+
+ // This is the end of the section, so set the value of the symbol at the end
+ // of this section (that was used in a previous expression).
+ MCOS->EmitLabel(LineEndSym);
+
+ return LineStartSym;
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ MCOS->EmitBytes(OS.str());
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS) {
+ uint64_t Temp, Opcode;
+ bool NeedCopy = false;
+
+ // Scale the address delta by the minimum instruction length.
+ AddrDelta = ScaleAddrDelta(AddrDelta);
+
+ // A LineDelta of INT64_MAX is a signal that this is actually a
+ // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
+ // end_sequence to emit the matrix entry.
+ if (LineDelta == INT64_MAX) {
+ if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ else {
+ OS << char(dwarf::DW_LNS_advance_pc);
+ encodeULEB128(AddrDelta, OS);
+ }
+ OS << char(dwarf::DW_LNS_extended_op);
+ OS << char(1);
+ OS << char(dwarf::DW_LNE_end_sequence);
+ return;
+ }
+
+ // Bias the line delta by the base.
+ Temp = LineDelta - DWARF2_LINE_BASE;
+
+ // If the line increment is out of range of a special opcode, we must encode
+ // it with DW_LNS_advance_line.
+ if (Temp >= DWARF2_LINE_RANGE) {
+ OS << char(dwarf::DW_LNS_advance_line);
+ encodeSLEB128(LineDelta, OS);
+
+ LineDelta = 0;
+ Temp = 0 - DWARF2_LINE_BASE;
+ NeedCopy = true;
+ }
+
+ // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+ if (LineDelta == 0 && AddrDelta == 0) {
+ OS << char(dwarf::DW_LNS_copy);
+ return;
+ }
+
+ // Bias the opcode by the special opcode base.
+ Temp += DWARF2_LINE_OPCODE_BASE;
+
+ // Avoid overflow when addr_delta is large.
+ if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+ // Try using a special opcode.
+ Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(Opcode);
+ return;
+ }
+
+ // Try using DW_LNS_const_add_pc followed by special op.
+ Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ OS << char(Opcode);
+ return;
+ }
+ }
+
+ // Otherwise use DW_LNS_advance_pc.
+ OS << char(dwarf::DW_LNS_advance_pc);
+ encodeULEB128(AddrDelta, OS);
+
+ if (NeedCopy)
+ OS << char(dwarf::DW_LNS_copy);
+ else
+ OS << char(Temp);
+}
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+ OS << '"' << getName() << '"';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCDwarfFile::dump() const {
+ print(dbgs());
+}
+#endif
+
+// Utility function to write a tuple for .debug_abbrev.
+static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
+ MCOS->EmitULEB128IntValue(Name);
+ MCOS->EmitULEB128IntValue(Form);
+}
+
+// When generating dwarf for assembly source files this emits
+// the data for .debug_abbrev section which contains three DIEs.
+static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+
+ // DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty())
+ EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_label DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_label);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // Terminate the abbreviations for this compilation unit.
+ MCOS->EmitIntValue(0, 1);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_aranges section. Which contains a header and a table of pairs of
+// PointerSize'ed values for the address and size of section(s) with line table
+// entries (just the default .text in our case) and a terminating pair of zeros.
+static void EmitGenDwarfAranges(MCStreamer *MCOS,
+ const MCSymbol *InfoSectionSymbol) {
+ MCContext &context = MCOS->getContext();
+
+ // Create a symbol at the end of the section that we are creating the dwarf
+ // debugging info to use later in here as part of the expression to calculate
+ // the size of the section for the table.
+ MCOS->SwitchSection(context.getGenDwarfSection());
+ MCSymbol *SectionEndSym = context.CreateTempSymbol();
+ MCOS->EmitLabel(SectionEndSym);
+ context.setGenDwarfSectionEndSym(SectionEndSym);
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // This will be the length of the .debug_aranges section, first account for
+ // the size of each item in the header (see below where we emit these items).
+ int Length = 4 + 2 + 4 + 1 + 1;
+
+ // Figure the padding after the header before the table of address and size
+ // pairs who's values are PointerSize'ed.
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
+ if (Pad == 2 * AddrSize)
+ Pad = 0;
+ Length += Pad;
+
+ // Add the size of the pair of PointerSize'ed values for the address and size
+ // of the one default .text section we have in the table.
+ Length += 2 * AddrSize;
+ // And the pair of terminating zeros.
+ Length += 2 * AddrSize;
+
+
+ // Emit the header for this section.
+ // The 4 byte length not including the 4 byte value for the length.
+ MCOS->EmitIntValue(Length - 4, 4);
+ // The 2 byte version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+ // The 4 byte offset to the compile unit in the .debug_info from the start
+ // of the .debug_info.
+ if (InfoSectionSymbol)
+ MCOS->EmitSymbolValue(InfoSectionSymbol, 4);
+ else
+ MCOS->EmitIntValue(0, 4);
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+ // The 1 byte size of a segment descriptor, we use a value of zero.
+ MCOS->EmitIntValue(0, 1);
+ // Align the header with the padding if needed, before we put out the table.
+ for(int i = 0; i < Pad; i++)
+ MCOS->EmitIntValue(0, 1);
+
+ // Now emit the table of pairs of PointerSize'ed values for the section(s)
+ // address and size, in our case just the one default .text section.
+ const MCExpr *Addr = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+ *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
+ MCOS->EmitAbsValue(Addr, AddrSize);
+ MCOS->EmitAbsValue(Size, AddrSize);
+
+ // And finally the pair of terminating zeros.
+ MCOS->EmitIntValue(0, AddrSize);
+ MCOS->EmitIntValue(0, AddrSize);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_info section which contains three parts. The header, the compile_unit
+// DIE and a list of label DIEs.
+static void EmitGenDwarfInfo(MCStreamer *MCOS,
+ const MCSymbol *AbbrevSectionSymbol,
+ const MCSymbol *LineSectionSymbol) {
+ MCContext &context = MCOS->getContext();
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+
+ // Create a symbol at the start and end of this section used in here for the
+ // expression to calculate the length in the header.
+ MCSymbol *InfoStart = context.CreateTempSymbol();
+ MCOS->EmitLabel(InfoStart);
+ MCSymbol *InfoEnd = context.CreateTempSymbol();
+
+ // First part: the header.
+
+ // The 4 byte total length of the information for this compilation unit, not
+ // including these 4 bytes.
+ const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
+ MCOS->EmitAbsValue(Length, 4);
+
+ // The 2 byte DWARF version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
+ // it is at the start of that section so this is zero.
+ if (AbbrevSectionSymbol) {
+ MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4);
+ } else {
+ MCOS->EmitIntValue(0, 4);
+ }
+
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+
+ // Second part: the compile_unit DIE.
+
+ // The DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+
+ // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
+ // which is at the start of that section so this is zero.
+ if (LineSectionSymbol) {
+ MCOS->EmitSymbolValue(LineSectionSymbol, 4);
+ } else {
+ MCOS->EmitIntValue(0, 4);
+ }
+
+ // AT_low_pc, the first address of the default .text section.
+ const MCExpr *Start = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(Start, AddrSize);
+
+ // AT_high_pc, the last address of the default .text section.
+ const MCExpr *End = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(End, AddrSize);
+
+ // AT_name, the name of the source file. Reconstruct from the first directory
+ // and file table entries.
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs();
+ if (MCDwarfDirs.size() > 0) {
+ MCOS->EmitBytes(MCDwarfDirs[0]);
+ MCOS->EmitBytes("/");
+ }
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles();
+ MCOS->EmitBytes(MCDwarfFiles[1]->getName());
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_comp_dir, the working directory the assembly was done in.
+ MCOS->EmitBytes(context.getCompilationDir());
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_APPLE_flags, the command line arguments of the assembler tool.
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty()){
+ MCOS->EmitBytes(DwarfDebugFlags);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+ }
+
+ // AT_producer, the version of the assembler tool.
+ StringRef DwarfDebugProducer = context.getDwarfDebugProducer();
+ if (!DwarfDebugProducer.empty()){
+ MCOS->EmitBytes(DwarfDebugProducer);
+ }
+ else {
+ MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "));
+ MCOS->EmitBytes(StringRef(PACKAGE_VERSION));
+ MCOS->EmitBytes(StringRef(")"));
+ }
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2
+ // draft has no standard code for assembler.
+ MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
+
+ // Third part: the list of label DIEs.
+
+ // Loop on saved info for dwarf labels and create the DIEs for them.
+ const std::vector<const MCGenDwarfLabelEntry *> &Entries =
+ MCOS->getContext().getMCGenDwarfLabelEntries();
+ for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfLabelEntry *Entry = *it;
+
+ // The DW_TAG_label DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+
+ // AT_name, of the label without any leading underbar.
+ MCOS->EmitBytes(Entry->getName());
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_decl_file, index into the file table.
+ MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+
+ // AT_decl_line, source line number.
+ MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+
+ // AT_low_pc, start address of the label.
+ const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+ MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(AT_low_pc, AddrSize);
+
+ // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
+ MCOS->EmitIntValue(0, 1);
+
+ // The DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+
+ // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
+ MCOS->EmitIntValue(0, 1);
+ }
+ // Deallocate the MCGenDwarfLabelEntry classes that saved away the info
+ // for the dwarf labels.
+ for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfLabelEntry *Entry = *it;
+ delete Entry;
+ }
+
+ // Add the NULL DIE terminating the Compile Unit DIE's.
+ MCOS->EmitIntValue(0, 1);
+
+ // Now set the value of the symbol at the end of the info section.
+ MCOS->EmitLabel(InfoEnd);
+}
+
+//
+// When generating dwarf for assembly source files this emits the Dwarf
+// sections.
+//
+void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
+ // Create the dwarf sections in this order (.debug_line already created).
+ MCContext &context = MCOS->getContext();
+ const MCAsmInfo &AsmInfo = context.getAsmInfo();
+ bool CreateDwarfSectionSymbols =
+ AsmInfo.doesDwarfUseRelocationsAcrossSections();
+ if (!CreateDwarfSectionSymbols)
+ LineSectionSymbol = NULL;
+ MCSymbol *AbbrevSectionSymbol = NULL;
+ MCSymbol *InfoSectionSymbol = NULL;
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ if (CreateDwarfSectionSymbols) {
+ InfoSectionSymbol = context.CreateTempSymbol();
+ MCOS->EmitLabel(InfoSectionSymbol);
+ }
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+ if (CreateDwarfSectionSymbols) {
+ AbbrevSectionSymbol = context.CreateTempSymbol();
+ MCOS->EmitLabel(AbbrevSectionSymbol);
+ }
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // If there are no line table entries then do not emit any section contents.
+ if (context.getMCLineSections().empty())
+ return;
+
+ // Output the data for .debug_aranges section.
+ EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
+
+ // Output the data for .debug_abbrev section.
+ EmitGenDwarfAbbrev(MCOS);
+
+ // Output the data for .debug_info section.
+ EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+}
+
+//
+// When generating dwarf for assembly source files this is called when symbol
+// for a label is created. If this symbol is not a temporary and is in the
+// section that dwarf is being generated for, save the needed info to create
+// a dwarf label.
+//
+void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
+ SourceMgr &SrcMgr, SMLoc &Loc) {
+ // We won't create dwarf labels for temporary symbols or symbols not in
+ // the default text.
+ if (Symbol->isTemporary())
+ return;
+ MCContext &context = MCOS->getContext();
+ if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+ return;
+
+ // The dwarf label's name does not have the symbol name's leading
+ // underbar if any.
+ StringRef Name = Symbol->getName();
+ if (Name.startswith("_"))
+ Name = Name.substr(1, Name.size()-1);
+
+ // Get the dwarf file number to be used for the dwarf label.
+ unsigned FileNumber = context.getGenDwarfFileNumber();
+
+ // Finding the line number is the expensive part which is why we just don't
+ // pass it in as for some symbols we won't create a dwarf label.
+ int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
+
+ // We create a temporary symbol for use for the AT_high_pc and AT_low_pc
+ // values so that they don't have things like an ARM thumb bit from the
+ // original symbol. So when used they won't get a low bit set after
+ // relocation.
+ MCSymbol *Label = context.CreateTempSymbol();
+ MCOS->EmitLabel(Label);
+
+ // Create and entry for the info and add it to the other entries.
+ MCGenDwarfLabelEntry *Entry =
+ new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label);
+ MCOS->getContext().addMCGenDwarfLabelEntry(Entry);
+}
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int size = asmInfo.getCalleeSaveStackSlotSize();
+ if (asmInfo.isStackGrowthDirectionUp())
+ return size;
+ else
+ return -size;
+}
+
+static unsigned getSizeForEncoding(MCStreamer &streamer,
+ unsigned symbolEncoding) {
+ MCContext &context = streamer.getContext();
+ unsigned format = symbolEncoding & 0x0f;
+ switch (format) {
+ default: llvm_unreachable("Unknown Encoding");
+ case dwarf::DW_EH_PE_absptr:
+ case dwarf::DW_EH_PE_signed:
+ return context.getAsmInfo().getPointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ case dwarf::DW_EH_PE_sdata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ case dwarf::DW_EH_PE_sdata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ case dwarf::DW_EH_PE_sdata8:
+ return 8;
+ }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding, const char *comment = 0) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
+ symbolEncoding,
+ streamer);
+ unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+ if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
+ streamer.EmitAbsValue(v, size);
+}
+
+static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol,
+ symbolEncoding,
+ streamer);
+ unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+ streamer.EmitValue(v, size);
+}
+
+static const MachineLocation TranslateMachineLocation(
+ const MCRegisterInfo &MRI,
+ const MachineLocation &Loc) {
+ unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+ MachineLocation::VirtualFP :
+ unsigned(MRI.getDwarfRegNum(Loc.getReg(), true));
+ const MachineLocation &NewLoc = Loc.isReg() ?
+ MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+ return NewLoc;
+}
+
+namespace {
+ class FrameEmitterImpl {
+ int CFAOffset;
+ int CIENum;
+ bool UsingCFI;
+ bool IsEH;
+ const MCSymbol *SectionStart;
+ public:
+ FrameEmitterImpl(bool usingCFI, bool isEH)
+ : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
+ SectionStart(0) {}
+
+ void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
+
+ /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+ /// we're successful, return 'true'. Otherwise, return 'false' and it will
+ /// emit the normal CIE and FDE.
+ bool EmitCompactUnwind(MCStreamer &streamer,
+ const MCDwarfFrameInfo &frame);
+
+ const MCSymbol &EmitCIE(MCStreamer &streamer,
+ const MCSymbol *personality,
+ unsigned personalityEncoding,
+ const MCSymbol *lsda,
+ bool IsSignalFrame,
+ unsigned lsdaEncoding);
+ MCSymbol *EmitFDE(MCStreamer &streamer,
+ const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame);
+ void EmitCFIInstructions(MCStreamer &streamer,
+ const std::vector<MCCFIInstruction> &Instrs,
+ MCSymbol *BaseLabel);
+ void EmitCFIInstruction(MCStreamer &Streamer,
+ const MCCFIInstruction &Instr);
+ };
+
+} // end anonymous namespace
+
+static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
+ StringRef Prefix) {
+ if (Streamer.isVerboseAsm()) {
+ const char *EncStr;
+ switch (Encoding) {
+ default: EncStr = "<unknown encoding>"; break;
+ case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; break;
+ case dwarf::DW_EH_PE_omit: EncStr = "omit"; break;
+ case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; break;
+ case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; break;
+ case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; break;
+ case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; break;
+ case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+ EncStr = "pcrel udata4";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+ EncStr = "pcrel sdata4";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+ EncStr = "pcrel udata8";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+ EncStr = "screl sdata8";
+ break;
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
+ EncStr = "indirect pcrel udata4";
+ break;
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
+ EncStr = "indirect pcrel sdata4";
+ break;
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
+ EncStr = "indirect pcrel udata8";
+ break;
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
+ EncStr = "indirect pcrel sdata8";
+ break;
+ }
+
+ Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
+ }
+
+ Streamer.EmitIntValue(Encoding, 1);
+}
+
+void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
+ const MCCFIInstruction &Instr) {
+ int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+ bool VerboseAsm = Streamer.isVerboseAsm();
+
+ switch (Instr.getOperation()) {
+ case MCCFIInstruction::OpRegister: {
+ unsigned Reg1 = Instr.getRegister();
+ unsigned Reg2 = Instr.getRegister2();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_register");
+ Streamer.AddComment(Twine("Reg1 ") + Twine(Reg1));
+ Streamer.AddComment(Twine("Reg2 ") + Twine(Reg2));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
+ Streamer.EmitULEB128IntValue(Reg1);
+ Streamer.EmitULEB128IntValue(Reg2);
+ return;
+ }
+ case MCCFIInstruction::OpUndefined: {
+ unsigned Reg = Instr.getRegister();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_undefined");
+ Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
+ Streamer.EmitULEB128IntValue(Reg);
+ return;
+ }
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ case MCCFIInstruction::OpDefCfaOffset: {
+ const bool IsRelative =
+ Instr.getOperation() == MCCFIInstruction::OpAdjustCfaOffset;
+
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa_offset");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+
+ if (IsRelative)
+ CFAOffset += Instr.getOffset();
+ else
+ CFAOffset = -Instr.getOffset();
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+ Streamer.EmitULEB128IntValue(CFAOffset);
+
+ return;
+ }
+ case MCCFIInstruction::OpDefCfa: {
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+ Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+ CFAOffset = -Instr.getOffset();
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+ Streamer.EmitULEB128IntValue(CFAOffset);
+
+ return;
+ }
+
+ case MCCFIInstruction::OpDefCfaRegister: {
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa_register");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+ Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+ return;
+ }
+
+ case MCCFIInstruction::OpOffset:
+ case MCCFIInstruction::OpRelOffset: {
+ const bool IsRelative =
+ Instr.getOperation() == MCCFIInstruction::OpRelOffset;
+
+ unsigned Reg = Instr.getRegister();
+ int Offset = Instr.getOffset();
+ if (IsRelative)
+ Offset -= CFAOffset;
+ Offset = Offset / dataAlignmentFactor;
+
+ if (Offset < 0) {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitSLEB128IntValue(Offset);
+ } else if (Reg < 64) {
+ if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
+ Twine(Reg) + ")");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitULEB128IntValue(Offset);
+ } else {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitULEB128IntValue(Offset);
+ }
+ return;
+ }
+ case MCCFIInstruction::OpRememberState:
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
+ Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+ return;
+ case MCCFIInstruction::OpRestoreState:
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+ return;
+ case MCCFIInstruction::OpSameValue: {
+ unsigned Reg = Instr.getRegister();
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
+ Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ return;
+ }
+ case MCCFIInstruction::OpRestore: {
+ unsigned Reg = Instr.getRegister();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_restore");
+ Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+ return;
+ }
+ case MCCFIInstruction::OpEscape:
+ if (VerboseAsm) Streamer.AddComment("Escape bytes");
+ Streamer.EmitBytes(Instr.getValues());
+ return;
+ }
+ llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
+ const std::vector<MCCFIInstruction> &Instrs,
+ MCSymbol *BaseLabel) {
+ for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+ const MCCFIInstruction &Instr = Instrs[i];
+ MCSymbol *Label = Instr.getLabel();
+ // Throw out move if the label is invalid.
+ if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+ // Advance row if new location.
+ if (BaseLabel && Label) {
+ MCSymbol *ThisSym = Label;
+ if (ThisSym != BaseLabel) {
+ if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
+ streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+ BaseLabel = ThisSym;
+ }
+ }
+
+ EmitCFIInstruction(streamer, Instr);
+ }
+}
+
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+ const MCDwarfFrameInfo &Frame) {
+ MCContext &Context = Streamer.getContext();
+ const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
+ bool VerboseAsm = Streamer.isVerboseAsm();
+
+ // range-start range-length compact-unwind-enc personality-func lsda
+ // _foo LfooEnd-_foo 0x00000023 0 0
+ // _bar LbarEnd-_bar 0x00000025 __gxx_personality except_tab1
+ //
+ // .section __LD,__compact_unwind,regular,debug
+ //
+ // # compact unwind for _foo
+ // .quad _foo
+ // .set L1,LfooEnd-_foo
+ // .long L1
+ // .long 0x01010001
+ // .quad 0
+ // .quad 0
+ //
+ // # compact unwind for _bar
+ // .quad _bar
+ // .set L2,LbarEnd-_bar
+ // .long L2
+ // .long 0x01020011
+ // .quad __gxx_personality
+ // .quad except_tab1
+
+ uint32_t Encoding = Frame.CompactUnwindEncoding;
+ if (!Encoding) return false;
+
+ // The encoding needs to know we have an LSDA.
+ if (Frame.Lsda)
+ Encoding |= 0x40000000;
+
+ Streamer.SwitchSection(MOFI->getCompactUnwindSection());
+
+ // Range Start
+ unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
+ unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+ if (VerboseAsm) Streamer.AddComment("Range Start");
+ Streamer.EmitSymbolValue(Frame.Function, Size);
+
+ // Range Length
+ const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+ *Frame.End, 0);
+ if (VerboseAsm) Streamer.AddComment("Range Length");
+ Streamer.EmitAbsValue(Range, 4);
+
+ // Compact Encoding
+ Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
+ if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" +
+ Twine::utohexstr(Encoding));
+ Streamer.EmitIntValue(Encoding, Size);
+
+
+ // Personality Function
+ Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
+ if (VerboseAsm) Streamer.AddComment("Personality Function");
+ if (Frame.Personality)
+ Streamer.EmitSymbolValue(Frame.Personality, Size);
+ else
+ Streamer.EmitIntValue(0, Size); // No personality fn
+
+ // LSDA
+ Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
+ if (VerboseAsm) Streamer.AddComment("LSDA");
+ if (Frame.Lsda)
+ Streamer.EmitSymbolValue(Frame.Lsda, Size);
+ else
+ Streamer.EmitIntValue(0, Size); // No LSDA
+
+ return true;
+}
+
+const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
+ const MCSymbol *personality,
+ unsigned personalityEncoding,
+ const MCSymbol *lsda,
+ bool IsSignalFrame,
+ unsigned lsdaEncoding) {
+ MCContext &context = streamer.getContext();
+ const MCRegisterInfo &MRI = context.getRegisterInfo();
+ const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
+ bool verboseAsm = streamer.isVerboseAsm();
+
+ MCSymbol *sectionStart;
+ if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH)
+ sectionStart = context.CreateTempSymbol();
+ else
+ sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
+
+ streamer.EmitLabel(sectionStart);
+ CIENum++;
+
+ MCSymbol *sectionEnd = context.CreateTempSymbol();
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+ *sectionEnd, 4);
+ if (verboseAsm) streamer.AddComment("CIE Length");
+ streamer.EmitAbsValue(Length, 4);
+
+ // CIE ID
+ unsigned CIE_ID = IsEH ? 0 : -1;
+ if (verboseAsm) streamer.AddComment("CIE ID Tag");
+ streamer.EmitIntValue(CIE_ID, 4);
+
+ // Version
+ if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
+ streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+ // Augmentation String
+ SmallString<8> Augmentation;
+ if (IsEH) {
+ if (verboseAsm) streamer.AddComment("CIE Augmentation");
+ Augmentation += "z";
+ if (personality)
+ Augmentation += "P";
+ if (lsda)
+ Augmentation += "L";
+ Augmentation += "R";
+ if (IsSignalFrame)
+ Augmentation += "S";
+ streamer.EmitBytes(Augmentation.str());
+ }
+ streamer.EmitIntValue(0, 1);
+
+ // Code Alignment Factor
+ if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
+ streamer.EmitULEB128IntValue(1);
+
+ // Data Alignment Factor
+ if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
+ streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+ // Return Address Register
+ if (verboseAsm) streamer.AddComment("CIE Return Address Column");
+ streamer.EmitULEB128IntValue(MRI.getDwarfRegNum(MRI.getRARegister(), true));
+
+ // Augmentation Data Length (optional)
+
+ unsigned augmentationLength = 0;
+ if (IsEH) {
+ if (personality) {
+ // Personality Encoding
+ augmentationLength += 1;
+ // Personality
+ augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+ }
+ if (lsda)
+ augmentationLength += 1;
+ // Encoding of the FDE pointers
+ augmentationLength += 1;
+
+ if (verboseAsm) streamer.AddComment("Augmentation Size");
+ streamer.EmitULEB128IntValue(augmentationLength);
+
+ // Augmentation Data (optional)
+ if (personality) {
+ // Personality Encoding
+ EmitEncodingByte(streamer, personalityEncoding,
+ "Personality Encoding");
+ // Personality
+ if (verboseAsm) streamer.AddComment("Personality");
+ EmitPersonality(streamer, *personality, personalityEncoding);
+ }
+
+ if (lsda)
+ EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+
+ // Encoding of the FDE pointers
+ EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI),
+ "FDE Encoding");
+ }
+
+ // Initial Instructions
+
+ const MCAsmInfo &MAI = context.getAsmInfo();
+ const std::vector<MachineMove> &Moves = MAI.getInitialFrameState();
+ std::vector<MCCFIInstruction> Instructions;
+
+ for (int i = 0, n = Moves.size(); i != n; ++i) {
+ MCSymbol *Label = Moves[i].getLabel();
+ const MachineLocation &Dst =
+ TranslateMachineLocation(MRI, Moves[i].getDestination());
+ const MachineLocation &Src =
+ TranslateMachineLocation(MRI, Moves[i].getSource());
+
+ if (Dst.isReg()) {
+ assert(Dst.getReg() == MachineLocation::VirtualFP);
+ assert(!Src.isReg());
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset());
+ Instructions.push_back(Inst);
+ } else {
+ assert(Src.isReg());
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset();
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createOffset(Label, Reg, Offset);
+ Instructions.push_back(Inst);
+ }
+ }
+
+ EmitCFIInstructions(streamer, Instructions, NULL);
+
+ // Padding
+ streamer.EmitValueToAlignment(IsEH
+ ? 4 : context.getAsmInfo().getPointerSize());
+
+ streamer.EmitLabel(sectionEnd);
+ return *sectionStart;
+}
+
+MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
+ const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame) {
+ MCContext &context = streamer.getContext();
+ MCSymbol *fdeStart = context.CreateTempSymbol();
+ MCSymbol *fdeEnd = context.CreateTempSymbol();
+ const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
+ bool verboseAsm = streamer.isVerboseAsm();
+
+ if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) {
+ MCSymbol *EHSym =
+ context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
+ streamer.EmitEHSymAttributes(frame.Function, EHSym);
+ streamer.EmitLabel(EHSym);
+ }
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+ if (verboseAsm) streamer.AddComment("FDE Length");
+ streamer.EmitAbsValue(Length, 4);
+
+ streamer.EmitLabel(fdeStart);
+
+ // CIE Pointer
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ if (IsEH) {
+ const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+ 0);
+ if (verboseAsm) streamer.AddComment("FDE CIE Offset");
+ streamer.EmitAbsValue(offset, 4);
+ } else if (!asmInfo.doesDwarfUseRelocationsAcrossSections()) {
+ const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
+ cieStart, 0);
+ streamer.EmitAbsValue(offset, 4);
+ } else {
+ streamer.EmitSymbolValue(&cieStart, 4);
+ }
+
+ // PC Begin
+ unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
+ : (unsigned)dwarf::DW_EH_PE_absptr;
+ unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
+ EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
+
+ // PC Range
+ const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+ *frame.End, 0);
+ if (verboseAsm) streamer.AddComment("FDE address range");
+ streamer.EmitAbsValue(Range, PCSize);
+
+ if (IsEH) {
+ // Augmentation Data Length
+ unsigned augmentationLength = 0;
+
+ if (frame.Lsda)
+ augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+
+ if (verboseAsm) streamer.AddComment("Augmentation size");
+ streamer.EmitULEB128IntValue(augmentationLength);
+
+ // Augmentation Data
+ if (frame.Lsda)
+ EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
+ "Language Specific Data Area");
+ }
+
+ // Call Frame Instructions
+
+ EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+ // Padding
+ streamer.EmitValueToAlignment(PCSize);
+
+ return fdeEnd;
+}
+
+namespace {
+ struct CIEKey {
+ static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false); }
+ static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false); }
+
+ CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+ unsigned LsdaEncoding_, bool IsSignalFrame_) :
+ Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+ LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_) {
+ }
+ const MCSymbol* Personality;
+ unsigned PersonalityEncoding;
+ unsigned LsdaEncoding;
+ bool IsSignalFrame;
+ };
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<CIEKey> {
+ static CIEKey getEmptyKey() {
+ return CIEKey::getEmptyKey();
+ }
+ static CIEKey getTombstoneKey() {
+ return CIEKey::getTombstoneKey();
+ }
+ static unsigned getHashValue(const CIEKey &Key) {
+ return static_cast<unsigned>(hash_combine(Key.Personality,
+ Key.PersonalityEncoding,
+ Key.LsdaEncoding,
+ Key.IsSignalFrame));
+ }
+ static bool isEqual(const CIEKey &LHS,
+ const CIEKey &RHS) {
+ return LHS.Personality == RHS.Personality &&
+ LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+ LHS.LsdaEncoding == RHS.LsdaEncoding &&
+ LHS.IsSignalFrame == RHS.IsSignalFrame;
+ }
+ };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
+ bool UsingCFI,
+ bool IsEH) {
+ MCContext &Context = Streamer.getContext();
+ MCObjectFileInfo *MOFI =
+ const_cast<MCObjectFileInfo*>(Context.getObjectFileInfo());
+ FrameEmitterImpl Emitter(UsingCFI, IsEH);
+ ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
+
+ // Emit the compact unwind info if available.
+ if (IsEH && MOFI->getCompactUnwindSection())
+ for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+ const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+ if (Frame.CompactUnwindEncoding)
+ Emitter.EmitCompactUnwind(Streamer, Frame);
+ }
+
+ const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
+ *MOFI->getDwarfFrameSection();
+ Streamer.SwitchSection(&Section);
+ MCSymbol *SectionStart = Context.CreateTempSymbol();
+ Streamer.EmitLabel(SectionStart);
+ Emitter.setSectionStart(SectionStart);
+
+ MCSymbol *FDEEnd = NULL;
+ DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+
+ const MCSymbol *DummyDebugKey = NULL;
+ for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
+ const MCDwarfFrameInfo &Frame = FrameArray[i];
+ CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
+ Frame.LsdaEncoding, Frame.IsSignalFrame);
+ const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
+ if (!CIEStart)
+ CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
+ Frame.PersonalityEncoding, Frame.Lsda,
+ Frame.IsSignalFrame,
+ Frame.LsdaEncoding);
+
+ FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+
+ if (i != n - 1)
+ Streamer.EmitLabel(FDEEnd);
+ }
+
+ Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+ if (FDEEnd)
+ Streamer.EmitLabel(FDEEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+ Streamer.EmitBytes(OS.str());
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+ raw_ostream &OS) {
+ // FIXME: Assumes the code alignment factor is 1.
+ if (AddrDelta == 0) {
+ } else if (isUIntN(6, AddrDelta)) {
+ uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+ OS << Opcode;
+ } else if (isUInt<8>(AddrDelta)) {
+ OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+ OS << uint8_t(AddrDelta);
+ } else if (isUInt<16>(AddrDelta)) {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ } else {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ assert(isUInt<32>(AddrDelta));
+ OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ OS << uint8_t((AddrDelta >> 16) & 0xff);
+ OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+ }
+}
diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp
new file mode 100644
index 000000000000..560cdbc6abae
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELF.cpp
@@ -0,0 +1,86 @@
+//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+
+namespace llvm {
+
+void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+ SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+unsigned MCELF::GetBinding(const MCSymbolData &SD) {
+ uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ return Binding;
+}
+
+void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+ SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+unsigned MCELF::GetType(const MCSymbolData &SD) {
+ uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
+ return Type;
+}
+
+// Visibility is stored in the first two bits of st_other
+// st_other values are stored in the second byte of get/setFlags
+void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0x3 << ELF_STV_Shift);
+ SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
+unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+ unsigned Visibility =
+ (SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ return Visibility;
+}
+
+// Other is stored in the last six bits of st_other
+// st_other values are stored in the second byte of get/setFlags
+void MCELF::setOther(MCSymbolData &SD, unsigned Other) {
+ uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_Other_Shift);
+ SD.setFlags(OtherFlags | (Other << ELF_Other_Shift));
+}
+
+unsigned MCELF::getOther(MCSymbolData &SD) {
+ unsigned Other =
+ (SD.getFlags() & (0x3f << ELF_Other_Shift)) >> ELF_Other_Shift;
+ return Other;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..4cac84d66609
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,51 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+ uint8_t OSABI_,
+ uint16_t EMachine_,
+ bool HasRelocationAddend_,
+ bool IsN64_)
+ : OSABI(OSABI_), EMachine(EMachine_),
+ HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_),
+ IsN64(IsN64_){
+}
+
+const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return NULL;
+}
+
+const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ return &Symbol.AliasedSymbol();
+}
+
+void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup,
+ uint64_t &RelocOffset) {
+}
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+ // Sort by the r_offset, just like gnu as does.
+ array_pod_sort(Relocs.begin(), Relocs.end());
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 000000000000..7f5f1b63e5fe
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,552 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+
+inline void MCELFStreamer::SetSection(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind) {
+ SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+}
+
+inline void MCELFStreamer::SetSectionData() {
+ SetSection(".data",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+}
+
+inline void MCELFStreamer::SetSectionText() {
+ SetSection(".text",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+ SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+}
+
+inline void MCELFStreamer::SetSectionBss() {
+ SetSection(".bss",
+ ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+}
+
+MCELFStreamer::~MCELFStreamer() {
+}
+
+void MCELFStreamer::InitToTextSection() {
+ SetSectionText();
+}
+
+void MCELFStreamer::InitSections() {
+ // This emulates the same behavior of GNU as. This makes it easier
+ // to compare the output as the major sections are in the same order.
+ SetSectionText();
+ SetSectionData();
+ SetSectionBss();
+ SetSectionText();
+}
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCObjectStreamer::EmitLabel(Symbol);
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol->getSection());
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+ if (Section.getFlags() & ELF::SHF_TLS)
+ MCELF::SetType(SD, ELF::STT_TLS);
+}
+
+void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // Change parsing mode; no-op here.
+ case MCAF_Code32: return; // Change parsing mode; no-op here.
+ case MCAF_Code64: return; // Change parsing mode; no-op here.
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ }
+
+ llvm_unreachable("invalid assembler flag!");
+}
+
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+ MCSectionData *CurSection = getCurrentSectionData();
+ if (CurSection && CurSection->isBundleLocked())
+ report_fatal_error("Unterminated .bundle_lock when changing a section");
+ const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+ if (Grp)
+ getAssembler().getOrCreateSymbolData(*Grp);
+ this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+ AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+ Alias->setVariableValue(Value);
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_LazyReference:
+ case MCSA_Reference:
+ case MCSA_SymbolResolver:
+ case MCSA_PrivateExtern:
+ case MCSA_WeakDefinition:
+ case MCSA_WeakDefAutoPrivate:
+ case MCSA_Invalid:
+ case MCSA_IndirectSymbol:
+ llvm_unreachable("Invalid symbol attribute for ELF!");
+
+ case MCSA_NoDeadStrip:
+ case MCSA_ELF_TypeGnuUniqueObject:
+ // Ignore for now.
+ break;
+
+ case MCSA_Global:
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_WeakReference:
+ case MCSA_Weak:
+ MCELF::SetBinding(SD, ELF::STB_WEAK);
+ SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_Local:
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_ELF_TypeFunction:
+ MCELF::SetType(SD, ELF::STT_FUNC);
+ break;
+
+ case MCSA_ELF_TypeIndFunction:
+ MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+ break;
+
+ case MCSA_ELF_TypeObject:
+ MCELF::SetType(SD, ELF::STT_OBJECT);
+ break;
+
+ case MCSA_ELF_TypeTLS:
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+
+ case MCSA_ELF_TypeCommon:
+ MCELF::SetType(SD, ELF::STT_COMMON);
+ break;
+
+ case MCSA_ELF_TypeNoType:
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ break;
+
+ case MCSA_Protected:
+ MCELF::SetVisibility(SD, ELF::STV_PROTECTED);
+ break;
+
+ case MCSA_Hidden:
+ MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
+ break;
+
+ case MCSA_Internal:
+ MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
+ break;
+ }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ if (!BindingExplicitlySet.count(Symbol)) {
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ SD.setExternal(true);
+ }
+
+ MCELF::SetType(SD, ELF::STT_OBJECT);
+
+ if (MCELF::GetBinding(SD) == ELF_STB_Local) {
+ const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+ ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+ Symbol->setSection(*Section);
+
+ struct LocalCommon L = {&SD, Size, ByteAlignment};
+ LocalCommons.push_back(L);
+ } else {
+ SD.setCommon(Size, ByteAlignment);
+ }
+
+ SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
+
+void MCELFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setSize(Value);
+}
+
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ // FIXME: Should this be caught and done earlier?
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
+ EmitCommonSymbol(Symbol, Size, ByteAlignment);
+}
+
+void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ if (getCurrentSectionData()->isBundleLocked())
+ report_fatal_error("Emitting values inside a locked bundle is forbidden");
+ fixSymbolsInTLSFixups(Value);
+ MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace);
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ if (getCurrentSectionData()->isBundleLocked())
+ report_fatal_error("Emitting values inside a locked bundle is forbidden");
+ MCObjectStreamer::EmitValueToAlignment(ByteAlignment, Value,
+ ValueSize, MaxBytesToEmit);
+}
+
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+ MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+ Symbol->setSection(*getCurrentSection());
+ Symbol->setAbsolute();
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+ switch (expr->getKind()) {
+ case MCExpr::Target:
+ cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler());
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+ fixSymbolsInTLSFixups(be->getLHS());
+ fixSymbolsInTLSFixups(be->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+ switch (symRef.getKind()) {
+ default:
+ return;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_TLSLD:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ case MCSymbolRefExpr::VK_Mips_TLSGD:
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO:
+ case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
+ case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ case MCSymbolRefExpr::VK_PPC_TLS:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ break;
+ }
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+ break;
+ }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+ this->MCObjectStreamer::EmitInstToFragment(Inst);
+ MCRelaxableFragment &F = *cast<MCRelaxableFragment>(getCurrentFragment());
+
+ for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+ fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+ MCAssembler &Assembler = getAssembler();
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ fixSymbolsInTLSFixups(Fixups[i].getValue());
+
+ // There are several possibilities here:
+ //
+ // If bundling is disabled, append the encoded instruction to the current data
+ // fragment (or create a new such fragment if the current fragment is not a
+ // data fragment).
+ //
+ // If bundling is enabled:
+ // - If we're not in a bundle-locked group, emit the instruction into a
+ // fragment of its own. If there are no fixups registered for the
+ // instruction, emit a MCCompactEncodedInstFragment. Otherwise, emit a
+ // MCDataFragment.
+ // - If we're in a bundle-locked group, append the instruction to the current
+ // data fragment because we want all the instructions in a group to get into
+ // the same fragment. Be careful not to do that for the first instruction in
+ // the group, though.
+ MCDataFragment *DF;
+
+ if (Assembler.isBundlingEnabled()) {
+ MCSectionData *SD = getCurrentSectionData();
+ if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
+ // If we are bundle-locked, we re-use the current fragment.
+ // The bundle-locking directive ensures this is a new data fragment.
+ DF = cast<MCDataFragment>(getCurrentFragment());
+ else if (!SD->isBundleLocked() && Fixups.size() == 0) {
+ // Optimize memory usage by emitting the instruction to a
+ // MCCompactEncodedInstFragment when not in a bundle-locked group and
+ // there are no fixups registered.
+ MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+ CEIF->getContents().append(Code.begin(), Code.end());
+ return;
+ } else {
+ DF = new MCDataFragment(SD);
+ if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
+ // If this is a new fragment created for a bundle-locked group, and the
+ // group was marked as "align_to_end", set a flag in the fragment.
+ DF->setAlignToBundleEnd(true);
+ }
+ }
+
+ // We're now emitting an instruction in a bundle group, so this flag has
+ // to be turned off.
+ SD->setBundleGroupBeforeFirstInst(false);
+ } else {
+ DF = getOrCreateDataFragment();
+ }
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
+ DF->setHasInstructions(true);
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+ assert(AlignPow2 <= 30 && "Invalid bundle alignment");
+ MCAssembler &Assembler = getAssembler();
+ if (Assembler.getBundleAlignSize() == 0 && AlignPow2 > 0)
+ Assembler.setBundleAlignSize(1 << AlignPow2);
+ else
+ report_fatal_error(".bundle_align_mode should be only set once per file");
+}
+
+void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
+ MCSectionData *SD = getCurrentSectionData();
+
+ // Sanity checks
+ //
+ if (!getAssembler().isBundlingEnabled())
+ report_fatal_error(".bundle_lock forbidden when bundling is disabled");
+ else if (SD->isBundleLocked())
+ report_fatal_error("Nesting of .bundle_lock is forbidden");
+
+ SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
+ MCSectionData::BundleLocked);
+ SD->setBundleGroupBeforeFirstInst(true);
+}
+
+void MCELFStreamer::EmitBundleUnlock() {
+ MCSectionData *SD = getCurrentSectionData();
+
+ // Sanity checks
+ if (!getAssembler().isBundlingEnabled())
+ report_fatal_error(".bundle_unlock forbidden when bundling is disabled");
+ else if (!SD->isBundleLocked())
+ report_fatal_error(".bundle_unlock without matching lock");
+ else if (SD->isBundleGroupBeforeFirstInst())
+ report_fatal_error("Empty bundle-locked group is forbidden");
+
+ SD->setBundleLockState(MCSectionData::NotBundleLocked);
+}
+
+void MCELFStreamer::FinishImpl() {
+ EmitFrames(true);
+
+ for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+ e = LocalCommons.end();
+ i != e; ++i) {
+ MCSymbolData *SD = i->SD;
+ uint64_t Size = i->Size;
+ unsigned ByteAlignment = i->ByteAlignment;
+ const MCSymbol &Symbol = SD->getSymbol();
+ const MCSection &Section = Symbol.getSection();
+
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+ new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
+
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD->setFragment(F);
+
+ // Update the maximum alignment of the section if necessary.
+ if (ByteAlignment > SectData.getAlignment())
+ SectData.setAlignment(ByteAlignment);
+ }
+
+ this->MCObjectStreamer::FinishImpl();
+}
+void MCELFStreamer::EmitTCEntry(const MCSymbol &S) {
+ // Creates a R_PPC64_TOC relocation
+ MCObjectStreamer::EmitSymbolValue(&S, 8);
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll, bool NoExecStack) {
+ MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+}
+
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ llvm_unreachable("Generic ELF doesn't support this directive");
+}
+
+MCSymbolData &MCELFStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
+ return getAssembler().getOrCreateSymbolData(*Symbol);
+}
+
+void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolType(int Type) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EndCOFFSymbolDef() {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("ELF doesn't support this directive");
+}
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
new file mode 100644
index 000000000000..cd4d144575b1
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -0,0 +1,658 @@
+//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcexpr"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
+}
+}
+
+void MCExpr::print(raw_ostream &OS) const {
+ switch (getKind()) {
+ case MCExpr::Target:
+ return cast<MCTargetExpr>(this)->PrintImpl(OS);
+ case MCExpr::Constant:
+ OS << cast<MCConstantExpr>(*this).getValue();
+ return;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
+ const MCSymbol &Sym = SRE.getSymbol();
+ // Parenthesize names that start with $ so that they don't look like
+ // absolute names.
+ bool UseParens = Sym.getName()[0] == '$';
+
+ if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) {
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ UseParens = true;
+ }
+
+ if (UseParens)
+ OS << '(' << Sym << ')';
+ else
+ OS << Sym;
+
+ if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_NONE ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_PREL31)
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_LO16)
+ OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
+ return;
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
+ switch (UE.getOpcode()) {
+ case MCUnaryExpr::LNot: OS << '!'; break;
+ case MCUnaryExpr::Minus: OS << '-'; break;
+ case MCUnaryExpr::Not: OS << '~'; break;
+ case MCUnaryExpr::Plus: OS << '+'; break;
+ }
+ OS << *UE.getSubExpr();
+ return;
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
+ OS << *BE.getLHS();
+ } else {
+ OS << '(' << *BE.getLHS() << ')';
+ }
+
+ switch (BE.getOpcode()) {
+ case MCBinaryExpr::Add:
+ // Print "X-42" instead of "X+-42".
+ if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+ if (RHSC->getValue() < 0) {
+ OS << RHSC->getValue();
+ return;
+ }
+ }
+
+ OS << '+';
+ break;
+ case MCBinaryExpr::And: OS << '&'; break;
+ case MCBinaryExpr::Div: OS << '/'; break;
+ case MCBinaryExpr::EQ: OS << "=="; break;
+ case MCBinaryExpr::GT: OS << '>'; break;
+ case MCBinaryExpr::GTE: OS << ">="; break;
+ case MCBinaryExpr::LAnd: OS << "&&"; break;
+ case MCBinaryExpr::LOr: OS << "||"; break;
+ case MCBinaryExpr::LT: OS << '<'; break;
+ case MCBinaryExpr::LTE: OS << "<="; break;
+ case MCBinaryExpr::Mod: OS << '%'; break;
+ case MCBinaryExpr::Mul: OS << '*'; break;
+ case MCBinaryExpr::NE: OS << "!="; break;
+ case MCBinaryExpr::Or: OS << '|'; break;
+ case MCBinaryExpr::Shl: OS << "<<"; break;
+ case MCBinaryExpr::Shr: OS << ">>"; break;
+ case MCBinaryExpr::Sub: OS << '-'; break;
+ case MCBinaryExpr::Xor: OS << '^'; break;
+ }
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+ OS << *BE.getRHS();
+ } else {
+ OS << '(' << *BE.getRHS() << ')';
+ }
+ return;
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCExpr::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+/* *** */
+
+const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+ const MCExpr *RHS, MCContext &Ctx) {
+ return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+}
+
+const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) MCUnaryExpr(Opc, Expr);
+}
+
+const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+ return new (Ctx) MCConstantExpr(Value);
+}
+
+/* *** */
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+ VariantKind Kind,
+ MCContext &Ctx) {
+ return new (Ctx) MCSymbolRefExpr(Sym, Kind);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+ MCContext &Ctx) {
+ return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
+}
+
+StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+ switch (Kind) {
+ case VK_Invalid: return "<<invalid>>";
+ case VK_None: return "<<none>>";
+
+ case VK_GOT: return "GOT";
+ case VK_GOTOFF: return "GOTOFF";
+ case VK_GOTPCREL: return "GOTPCREL";
+ case VK_GOTTPOFF: return "GOTTPOFF";
+ case VK_INDNTPOFF: return "INDNTPOFF";
+ case VK_NTPOFF: return "NTPOFF";
+ case VK_GOTNTPOFF: return "GOTNTPOFF";
+ case VK_PLT: return "PLT";
+ case VK_TLSGD: return "TLSGD";
+ case VK_TLSLD: return "TLSLD";
+ case VK_TLSLDM: return "TLSLDM";
+ case VK_TPOFF: return "TPOFF";
+ case VK_DTPOFF: return "DTPOFF";
+ case VK_TLVP: return "TLVP";
+ case VK_SECREL: return "SECREL32";
+ case VK_ARM_NONE: return "(NONE)";
+ case VK_ARM_PLT: return "(PLT)";
+ case VK_ARM_GOT: return "(GOT)";
+ case VK_ARM_GOTOFF: return "(GOTOFF)";
+ case VK_ARM_TPOFF: return "(tpoff)";
+ case VK_ARM_GOTTPOFF: return "(gottpoff)";
+ case VK_ARM_TLSGD: return "(tlsgd)";
+ case VK_ARM_TARGET1: return "(target1)";
+ case VK_ARM_TARGET2: return "(target2)";
+ case VK_ARM_PREL31: return "(prel31)";
+ case VK_PPC_TOC: return "tocbase";
+ case VK_PPC_TOC_ENTRY: return "toc";
+ case VK_PPC_DARWIN_HA16: return "ha16";
+ case VK_PPC_DARWIN_LO16: return "lo16";
+ case VK_PPC_GAS_HA16: return "ha";
+ case VK_PPC_GAS_LO16: return "l";
+ case VK_PPC_TPREL16_HA: return "tprel@ha";
+ case VK_PPC_TPREL16_LO: return "tprel@l";
+ case VK_PPC_DTPREL16_HA: return "dtprel@ha";
+ case VK_PPC_DTPREL16_LO: return "dtprel@l";
+ case VK_PPC_TOC16_HA: return "toc@ha";
+ case VK_PPC_TOC16_LO: return "toc@l";
+ case VK_PPC_GOT_TPREL16_HA: return "got@tprel@ha";
+ case VK_PPC_GOT_TPREL16_LO: return "got@tprel@l";
+ case VK_PPC_TLS: return "tls";
+ case VK_PPC_GOT_TLSGD16_HA: return "got@tlsgd@ha";
+ case VK_PPC_GOT_TLSGD16_LO: return "got@tlsgd@l";
+ case VK_PPC_GOT_TLSLD16_HA: return "got@tlsld@ha";
+ case VK_PPC_GOT_TLSLD16_LO: return "got@tlsld@l";
+ case VK_PPC_TLSGD: return "tlsgd";
+ case VK_PPC_TLSLD: return "tlsld";
+ case VK_Mips_GPREL: return "GPREL";
+ case VK_Mips_GOT_CALL: return "GOT_CALL";
+ case VK_Mips_GOT16: return "GOT16";
+ case VK_Mips_GOT: return "GOT";
+ case VK_Mips_ABS_HI: return "ABS_HI";
+ case VK_Mips_ABS_LO: return "ABS_LO";
+ case VK_Mips_TLSGD: return "TLSGD";
+ case VK_Mips_TLSLDM: return "TLSLDM";
+ case VK_Mips_DTPREL_HI: return "DTPREL_HI";
+ case VK_Mips_DTPREL_LO: return "DTPREL_LO";
+ case VK_Mips_GOTTPREL: return "GOTTPREL";
+ case VK_Mips_TPREL_HI: return "TPREL_HI";
+ case VK_Mips_TPREL_LO: return "TPREL_LO";
+ case VK_Mips_GPOFF_HI: return "GPOFF_HI";
+ case VK_Mips_GPOFF_LO: return "GPOFF_LO";
+ case VK_Mips_GOT_DISP: return "GOT_DISP";
+ case VK_Mips_GOT_PAGE: return "GOT_PAGE";
+ case VK_Mips_GOT_OFST: return "GOT_OFST";
+ case VK_Mips_HIGHER: return "HIGHER";
+ case VK_Mips_HIGHEST: return "HIGHEST";
+ case VK_Mips_GOT_HI16: return "GOT_HI16";
+ case VK_Mips_GOT_LO16: return "GOT_LO16";
+ case VK_Mips_CALL_HI16: return "CALL_HI16";
+ case VK_Mips_CALL_LO16: return "CALL_LO16";
+ }
+ llvm_unreachable("Invalid variant kind");
+}
+
+MCSymbolRefExpr::VariantKind
+MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
+ return StringSwitch<VariantKind>(Name)
+ .Case("GOT", VK_GOT)
+ .Case("got", VK_GOT)
+ .Case("GOTOFF", VK_GOTOFF)
+ .Case("gotoff", VK_GOTOFF)
+ .Case("GOTPCREL", VK_GOTPCREL)
+ .Case("gotpcrel", VK_GOTPCREL)
+ .Case("GOTTPOFF", VK_GOTTPOFF)
+ .Case("gottpoff", VK_GOTTPOFF)
+ .Case("INDNTPOFF", VK_INDNTPOFF)
+ .Case("indntpoff", VK_INDNTPOFF)
+ .Case("NTPOFF", VK_NTPOFF)
+ .Case("ntpoff", VK_NTPOFF)
+ .Case("GOTNTPOFF", VK_GOTNTPOFF)
+ .Case("gotntpoff", VK_GOTNTPOFF)
+ .Case("PLT", VK_PLT)
+ .Case("plt", VK_PLT)
+ .Case("TLSGD", VK_TLSGD)
+ .Case("tlsgd", VK_TLSGD)
+ .Case("TLSLD", VK_TLSLD)
+ .Case("tlsld", VK_TLSLD)
+ .Case("TLSLDM", VK_TLSLDM)
+ .Case("tlsldm", VK_TLSLDM)
+ .Case("TPOFF", VK_TPOFF)
+ .Case("tpoff", VK_TPOFF)
+ .Case("DTPOFF", VK_DTPOFF)
+ .Case("dtpoff", VK_DTPOFF)
+ .Case("TLVP", VK_TLVP)
+ .Case("tlvp", VK_TLVP)
+ .Default(VK_Invalid);
+}
+
+/* *** */
+
+void MCTargetExpr::anchor() {}
+
+/* *** */
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+ return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout,
+ const SectionAddrMap &Addrs) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+ return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs) const {
+ MCValue Value;
+
+ // Fast path constants.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
+ Res = CE->getValue();
+ return true;
+ }
+
+ // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+ // absolutize differences across sections and that is what the MachO writer
+ // uses Addrs for.
+ bool IsRelocatable =
+ EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+
+ // Record the current value.
+ Res = Value.getConstant();
+
+ return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCSymbolRefExpr *&A,
+ const MCSymbolRefExpr *&B,
+ int64_t &Addend) {
+ if (!A || !B)
+ return;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+
+ if (SA.isUndefined() || SB.isUndefined())
+ return;
+
+ if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+ return;
+
+ MCSymbolData &AD = Asm->getSymbolData(SA);
+ MCSymbolData &BD = Asm->getSymbolData(SB);
+
+ if (AD.getFragment() == BD.getFragment()) {
+ Addend += (AD.getOffset() - BD.getOffset());
+
+ // Pointers to Thumb symbols need to have their low-bit set to allow
+ // for interworking.
+ if (Asm->isThumbFunc(&SA))
+ Addend |= 1;
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
+ return;
+ }
+
+ if (!Layout)
+ return;
+
+ const MCSectionData &SecA = *AD.getFragment()->getParent();
+ const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+ if ((&SecA != &SecB) && !Addrs)
+ return;
+
+ // Eagerly evaluate.
+ Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+ Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+ if (Addrs && (&SecA != &SecB))
+ Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+ // Pointers to Thumb symbols need to have their low-bit set to allow
+ // for interworking.
+ if (Asm->isThumbFunc(&SA))
+ Addend |= 1;
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
+}
+
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+/// Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+/// Result = LHS + RHS
+/// and
+/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+ const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
+ MCValue &Res) {
+ // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+ // about dealing with modifiers. This will ultimately bite us, one day.
+ const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+ const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+ int64_t LHS_Cst = LHS.getConstant();
+
+ // Fold the result constant immediately.
+ int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+ assert((!Layout || Asm) &&
+ "Must have an assembler object if layout is given!");
+
+ // If we have a layout, we can fold resolved differences.
+ if (Asm) {
+ // First, fold out any differences which are fully resolved. By
+ // reassociating terms in
+ // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+ // we have the four possible differences:
+ // (LHS_A - LHS_B),
+ // (LHS_A - RHS_B),
+ // (RHS_A - LHS_B),
+ // (RHS_A - RHS_B).
+ // Since we are attempting to be as aggressive as possible about folding, we
+ // attempt to evaluate each possible alternative.
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+ Result_Cst);
+ }
+
+ // We can't represent the addition or subtraction of two symbols.
+ if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
+ return false;
+
+ // At this point, we have at most one additive symbol and one subtractive
+ // symbol -- find them.
+ const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+ const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+ // If we have a negated symbol, then we must have also have a non-negated
+ // symbol in order to encode the expression.
+ if (B && !A)
+ return false;
+
+ Res = MCValue::get(A, B, Result_Cst);
+ return true;
+}
+
+bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+ 0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet) const {
+ ++stats::MCExprEvaluate;
+
+ switch (getKind()) {
+ case Target:
+ return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
+
+ case Constant:
+ Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
+ return true;
+
+ case SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+ const MCSymbol &Sym = SRE->getSymbol();
+
+ // Evaluate recursively if this is a variable.
+ if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+ bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+ Layout,
+ Addrs,
+ true);
+ // If we failed to simplify this to a constant, let the target
+ // handle it.
+ if (Ret && !Res.getSymA() && !Res.getSymB())
+ return true;
+ }
+
+ Res = MCValue::get(SRE, 0, 0);
+ return true;
+ }
+
+ case Unary: {
+ const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
+ MCValue Value;
+
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+ Addrs, InSet))
+ return false;
+
+ switch (AUE->getOpcode()) {
+ case MCUnaryExpr::LNot:
+ if (!Value.isAbsolute())
+ return false;
+ Res = MCValue::get(!Value.getConstant());
+ break;
+ case MCUnaryExpr::Minus:
+ /// -(a - b + const) ==> (b - a - const)
+ if (Value.getSymA() && !Value.getSymB())
+ return false;
+ Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+ -Value.getConstant());
+ break;
+ case MCUnaryExpr::Not:
+ if (!Value.isAbsolute())
+ return false;
+ Res = MCValue::get(~Value.getConstant());
+ break;
+ case MCUnaryExpr::Plus:
+ Res = Value;
+ break;
+ }
+
+ return true;
+ }
+
+ case Binary: {
+ const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
+ MCValue LHSValue, RHSValue;
+
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+ Addrs, InSet) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+ Addrs, InSet))
+ return false;
+
+ // We only support a few operations on non-constant expressions, handle
+ // those first.
+ if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
+ switch (ABE->getOpcode()) {
+ default:
+ return false;
+ case MCBinaryExpr::Sub:
+ // Negate RHS and add.
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+ RHSValue.getSymB(), RHSValue.getSymA(),
+ -RHSValue.getConstant(),
+ Res);
+
+ case MCBinaryExpr::Add:
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+ RHSValue.getSymA(), RHSValue.getSymB(),
+ RHSValue.getConstant(),
+ Res);
+ }
+ }
+
+ // FIXME: We need target hooks for the evaluation. It may be limited in
+ // width, and gas defines the result of comparisons and right shifts
+ // differently from Apple as.
+ int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
+ int64_t Result = 0;
+ switch (ABE->getOpcode()) {
+ case MCBinaryExpr::Add: Result = LHS + RHS; break;
+ case MCBinaryExpr::And: Result = LHS & RHS; break;
+ case MCBinaryExpr::Div: Result = LHS / RHS; break;
+ case MCBinaryExpr::EQ: Result = LHS == RHS; break;
+ case MCBinaryExpr::GT: Result = LHS > RHS; break;
+ case MCBinaryExpr::GTE: Result = LHS >= RHS; break;
+ case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
+ case MCBinaryExpr::LOr: Result = LHS || RHS; break;
+ case MCBinaryExpr::LT: Result = LHS < RHS; break;
+ case MCBinaryExpr::LTE: Result = LHS <= RHS; break;
+ case MCBinaryExpr::Mod: Result = LHS % RHS; break;
+ case MCBinaryExpr::Mul: Result = LHS * RHS; break;
+ case MCBinaryExpr::NE: Result = LHS != RHS; break;
+ case MCBinaryExpr::Or: Result = LHS | RHS; break;
+ case MCBinaryExpr::Shl: Result = LHS << RHS; break;
+ case MCBinaryExpr::Shr: Result = LHS >> RHS; break;
+ case MCBinaryExpr::Sub: Result = LHS - RHS; break;
+ case MCBinaryExpr::Xor: Result = LHS ^ RHS; break;
+ }
+
+ Res = MCValue::get(Result);
+ return true;
+ }
+ }
+
+ llvm_unreachable("Invalid assembly expression kind!");
+}
+
+const MCSection *MCExpr::FindAssociatedSection() const {
+ switch (getKind()) {
+ case Target:
+ // We never look through target specific expressions.
+ return cast<MCTargetExpr>(this)->FindAssociatedSection();
+
+ case Constant:
+ return MCSymbol::AbsolutePseudoSection;
+
+ case SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+ const MCSymbol &Sym = SRE->getSymbol();
+
+ if (Sym.isDefined())
+ return &Sym.getSection();
+
+ return 0;
+ }
+
+ case Unary:
+ return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection();
+
+ case Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
+ const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
+ const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+
+ // If either section is absolute, return the other.
+ if (LHS_S == MCSymbol::AbsolutePseudoSection)
+ return RHS_S;
+ if (RHS_S == MCSymbol::AbsolutePseudoSection)
+ return LHS_S;
+
+ // Otherwise, return the first non-null section.
+ return LHS_S ? LHS_S : RHS_S;
+ }
+ }
+
+ llvm_unreachable("Invalid assembly expression kind!");
+}
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
new file mode 100644
index 000000000000..124cc149beb6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -0,0 +1,72 @@
+//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ OS << "<MCOperand ";
+ if (!isValid())
+ OS << "INVALID";
+ else if (isReg())
+ OS << "Reg:" << getReg();
+ else if (isImm())
+ OS << "Imm:" << getImm();
+ else if (isExpr()) {
+ OS << "Expr:(" << *getExpr() << ")";
+ } else if (isInst()) {
+ OS << "Inst:(" << *getInst() << ")";
+ } else
+ OS << "UNDEFINED";
+ OS << ">";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCOperand::dump() const {
+ print(dbgs(), 0);
+ dbgs() << "\n";
+}
+#endif
+
+void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ OS << "<MCInst " << getOpcode();
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ OS << " ";
+ getOperand(i).print(OS, MAI);
+ }
+ OS << ">";
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
+ const MCInstPrinter *Printer,
+ StringRef Separator) const {
+ OS << "<MCInst #" << getOpcode();
+
+ // Show the instruction opcode name if we have access to a printer.
+ if (Printer)
+ OS << ' ' << Printer->getOpcodeName(getOpcode());
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ OS << Separator;
+ getOperand(i).print(OS, MAI);
+ }
+ OS << ">";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCInst::dump() const {
+ print(dbgs(), 0);
+ dbgs() << "\n";
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp
new file mode 100644
index 000000000000..73f30ffb52a0
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp
@@ -0,0 +1,61 @@
+//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCInstPrinter::~MCInstPrinter() {
+}
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return MII.getName(Opcode);
+}
+
+void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ llvm_unreachable("Target should implement this");
+}
+
+void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
+ if (!Annot.empty()) {
+ if (CommentStream)
+ (*CommentStream) << Annot;
+ else
+ OS << " " << MAI.getCommentString() << " " << Annot;
+ }
+}
+
+/// Utility functions to make adding mark ups simpler.
+StringRef MCInstPrinter::markup(StringRef s) const {
+ if (getUseMarkup())
+ return s;
+ else
+ return "";
+}
+StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
+ if (getUseMarkup())
+ return a;
+ else
+ return b;
+}
+
+/// Utility function to print immediates in decimal or hex.
+format_object1<int64_t> MCInstPrinter::formatImm(const int64_t Value) const {
+ if (getPrintImmHex())
+ return format("0x%" PRIx64, Value);
+ else
+ return format("%" PRId64, Value);
+}
diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
new file mode 100644
index 000000000000..7736702f35a4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -0,0 +1,21 @@
+//===-- MCInstrAnalysis.cpp - InstrDesc target hooks ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstrAnalysis.h"
+using namespace llvm;
+
+uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ if (Inst.getNumOperands() == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ return Addr+Size+Imm;
+}
diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp
new file mode 100644
index 000000000000..1d3022a93e86
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCLabel.cpp
@@ -0,0 +1,23 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+ OS << '"' << getInstance() << '"';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCLabel::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
new file mode 100644
index 000000000000..7d08d0ecd5e0
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -0,0 +1,436 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCMachOStreamer : public MCObjectStreamer {
+private:
+ virtual void EmitInstToData(const MCInst &Inst);
+
+ void EmitDataRegion(DataRegionData::KindTy Kind);
+ void EmitDataRegionEnd();
+public:
+ MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter)
+ : MCObjectStreamer(SK_MachOStreamer, Context, MAB, OS, Emitter) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void InitToTextSection();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitDebugLabel(MCSymbol *Symbol);
+ virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitLinkerOptions(ArrayRef<std::string> Options);
+ virtual void EmitDataRegion(MCDataRegionType Kind);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ llvm_unreachable("macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ llvm_unreachable("macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ llvm_unreachable("macho doesn't support this directive");
+ }
+ virtual void EndCOFFSymbolDef() {
+ llvm_unreachable("macho doesn't support this directive");
+ }
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("macho doesn't support this directive");
+ }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ uint64_t Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitFileDirective(StringRef Filename) {
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
+ }
+
+ virtual void FinishImpl();
+
+ /// @}
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_MachOStreamer;
+ }
+};
+
+} // end anonymous namespace.
+
+void MCMachOStreamer::InitSections() {
+ InitToTextSection();
+}
+
+void MCMachOStreamer::InitToTextSection() {
+ SwitchSection(getContext().getMachOSection(
+ "__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0,
+ SectionKind::getText()));
+}
+
+void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+ MCSymbolData &SD =
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ if (SD.isExternal())
+ EmitSymbolAttribute(EHSymbol, MCSA_Global);
+ if (SD.getFlags() & SF_WeakDefinition)
+ EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+ if (SD.isPrivateExtern())
+ EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ // isSymbolLinkerVisible uses the section.
+ Symbol->setSection(*getCurrentSection());
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(*Symbol))
+ new MCDataFragment(getCurrentSectionData());
+
+ MCObjectStreamer::EmitLabel(Symbol);
+
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+ // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+ // to clear the weak reference and weak definition bits too, but the
+ // implementation was buggy. For now we just try to match 'as', for
+ // diffability.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+}
+
+void MCMachOStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+}
+void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
+ if (!getAssembler().getBackend().hasDataInCodeSupport())
+ return;
+ // Create a temporary label to mark the start of the data region.
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+ // Record the region for the object writer to use.
+ DataRegionData Data = { Kind, Start, NULL };
+ std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
+ Regions.push_back(Data);
+}
+
+void MCMachOStreamer::EmitDataRegionEnd() {
+ if (!getAssembler().getBackend().hasDataInCodeSupport())
+ return;
+ std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
+ assert(Regions.size() && "Mismatched .end_data_region!");
+ DataRegionData &Data = Regions.back();
+ assert(Data.End == NULL && "Mismatched .end_data_region!");
+ // Create a temporary label to mark the end of the data region.
+ Data.End = getContext().CreateTempSymbol();
+ EmitLabel(Data.End);
+}
+
+void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().handleAssemblerFlag(Flag);
+ // Do any generic stuff we need to do.
+ switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // Change parsing mode; no-op here.
+ case MCAF_Code32: return; // Change parsing mode; no-op here.
+ case MCAF_Code64: return; // Change parsing mode; no-op here.
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ }
+}
+
+void MCMachOStreamer::EmitLinkerOptions(ArrayRef<std::string> Options) {
+ getAssembler().getLinkerOptions().push_back(Options);
+}
+
+void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) {
+ switch (Kind) {
+ case MCDR_DataRegion:
+ EmitDataRegion(DataRegionData::Data);
+ return;
+ case MCDR_DataRegionJT8:
+ EmitDataRegion(DataRegionData::JumpTable8);
+ return;
+ case MCDR_DataRegionJT16:
+ EmitDataRegion(DataRegionData::JumpTable16);
+ return;
+ case MCDR_DataRegionJT32:
+ EmitDataRegion(DataRegionData::JumpTable32);
+ return;
+ case MCDR_DataRegionEnd:
+ EmitDataRegionEnd();
+ return;
+ }
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ // Remember that the function is a thumb function. Fixup and relocation
+ // values will need adjusted.
+ getAssembler().setIsThumbFunc(Symbol);
+
+ // Mark the thumb bit on the symbol.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setFlags(SD.getFlags() | SF_ThumbFunc);
+}
+
+void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_Invalid:
+ case MCSA_ELF_TypeFunction:
+ case MCSA_ELF_TypeIndFunction:
+ case MCSA_ELF_TypeObject:
+ case MCSA_ELF_TypeTLS:
+ case MCSA_ELF_TypeCommon:
+ case MCSA_ELF_TypeNoType:
+ case MCSA_ELF_TypeGnuUniqueObject:
+ case MCSA_Hidden:
+ case MCSA_IndirectSymbol:
+ case MCSA_Internal:
+ case MCSA_Protected:
+ case MCSA_Weak:
+ case MCSA_Local:
+ llvm_unreachable("Invalid symbol attribute for Mach-O!");
+
+ case MCSA_Global:
+ SD.setExternal(true);
+ // This effectively clears the undefined lazy bit, in Darwin 'as', although
+ // it isn't very consistent because it implements this as part of symbol
+ // lookup.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
+ break;
+
+ case MCSA_LazyReference:
+ // FIXME: This requires -dynamic.
+ SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ if (Symbol->isUndefined())
+ SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+ break;
+
+ // Since .reference sets the no dead strip bit, it is equivalent to
+ // .no_dead_strip in practice.
+ case MCSA_Reference:
+ case MCSA_NoDeadStrip:
+ SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ break;
+
+ case MCSA_SymbolResolver:
+ SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+ break;
+
+ case MCSA_PrivateExtern:
+ SD.setExternal(true);
+ SD.setPrivateExtern(true);
+ break;
+
+ case MCSA_WeakReference:
+ // FIXME: This requires -dynamic.
+ if (Symbol->isUndefined())
+ SD.setFlags(SD.getFlags() | SF_WeakReference);
+ break;
+
+ case MCSA_WeakDefinition:
+ // FIXME: 'as' enforces that this is defined and global. The manual claims
+ // it has to be in a coalesced section, but this isn't enforced.
+ SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+ break;
+
+ case MCSA_WeakDefAutoPrivate:
+ SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+ break;
+ }
+}
+
+void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ // Encode the 'desc' value into the lowest implementation defined bits.
+ assert(DescValue == (DescValue & SF_DescFlagsMask) &&
+ "Invalid .desc value!");
+ getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
+ DescValue & SF_DescFlagsMask);
+}
+
+void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setExternal(true);
+ SD.setCommon(Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ // '.lcomm' is equivalent to '.zerofill'.
+ return EmitZerofill(getContext().getMachOSection("__DATA", "__bss",
+ MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Symbol, Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+
+ // The symbol may not be present, which only creates the section.
+ if (!Symbol)
+ return;
+
+ // FIXME: Assert that this section has the zerofill type.
+
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // Emit an align fragment if necessary.
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD.setFragment(F);
+
+ Symbol->setSection(*Section);
+
+ // Update the maximum alignment on the zero fill section if necessary.
+ if (ByteAlignment > SectData.getAlignment())
+ SectData.setAlignment(ByteAlignment);
+}
+
+// This should always be called with the thread local bss section. Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ EmitZerofill(Section, Symbol, Size, ByteAlignment);
+ return;
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCMachOStreamer::FinishImpl() {
+ EmitFrames(true);
+
+ // We have to set the fragment atom associations so we can relax properly for
+ // Mach-O.
+
+ // First, scan the symbol table to build a lookup table from fragments to
+ // defining symbols.
+ DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
+ for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
+ ie = getAssembler().symbol_end(); it != ie; ++it) {
+ if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
+ it->getFragment()) {
+ // An atom defining symbol should never be internal to a fragment.
+ assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
+ DefiningSymbolMap[it->getFragment()] = it;
+ }
+ }
+
+ // Set the fragment atom associations by tracking the last seen atom defining
+ // symbol.
+ for (MCAssembler::iterator it = getAssembler().begin(),
+ ie = getAssembler().end(); it != ie; ++it) {
+ MCSymbolData *CurrentAtom = 0;
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2) {
+ if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
+ CurrentAtom = SD;
+ it2->setAtom(CurrentAtom);
+ }
+ }
+
+ this->MCObjectStreamer::FinishImpl();
+}
+
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 000000000000..146cebf01a3a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+ bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+ bool UseAggressiveSymbolFolding_)
+ : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+ UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCModule.cpp b/contrib/llvm/lib/MC/MCModule.cpp
new file mode 100644
index 000000000000..f5631608330c
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCModule.cpp
@@ -0,0 +1,45 @@
+//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCModule.h"
+
+using namespace llvm;
+
+MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
+ uint64_t Begin, uint64_t End) {
+ assert(Begin < End && "Creating MCAtom with endpoints reversed?");
+
+ // Check for atoms already covering this range.
+ IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
+ assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+
+ // Create the new atom and add it to our maps.
+ MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
+ AtomAllocationTracker.insert(NewAtom);
+ OffsetMap.insert(Begin, End, NewAtom);
+ return NewAtom;
+}
+
+// remap - Update the interval mapping for an atom.
+void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
+ // Find and erase the old mapping.
+ IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
+ assert(I.valid() && "Atom offset not found in module!");
+ assert(*I == Atom && "Previous atom mapping was invalid!");
+ I.erase();
+
+ // Insert the new mapping.
+ OffsetMap.insert(NewBegin, NewEnd, Atom);
+
+ // Update the atom internal bounds.
+ Atom->Begin = NewBegin;
+ Atom->End = NewEnd;
+}
+
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
new file mode 100644
index 000000000000..c872b2203f87
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -0,0 +1,123 @@
+//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+namespace {
+
+ class MCNullStreamer : public MCStreamer {
+ public:
+ MCNullStreamer(MCContext &Context) : MCStreamer(SK_NullStreamer, Context) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitToTextSection() {
+ }
+
+ virtual void InitSections() {
+ }
+
+ virtual void ChangeSection(const MCSection *Section) {
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
+ }
+ virtual void EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+ }
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+ virtual void EmitThumbFunc(MCSymbol *Func) {}
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {}
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+ virtual void EmitCOFFSymbolType(int Type) {}
+ virtual void EndCOFFSymbolDef() {}
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol) {}
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ uint64_t Size = 0, unsigned ByteAlignment = 0) {}
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {}
+ virtual void EmitULEB128Value(const MCExpr *Value) {}
+ virtual void EmitSLEB128Value(const MCExpr *Value) {}
+ virtual void EmitGPRel32Value(const MCExpr *Value) {}
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0) {}
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {}
+
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) { return false; }
+
+ virtual void EmitFileDirective(StringRef Filename) {}
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename, unsigned CUID = 0) {
+ return false;
+ }
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator,
+ StringRef FileName) {}
+ virtual void EmitInstruction(const MCInst &Inst) {}
+
+ virtual void EmitBundleAlignMode(unsigned AlignPow2) {}
+ virtual void EmitBundleLock(bool AlignToEnd) {}
+ virtual void EmitBundleUnlock() {}
+
+ virtual void FinishImpl() {}
+
+ virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcEnd(Frame);
+ }
+
+ /// @}
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_NullStreamer;
+ }
+
+ };
+
+}
+
+MCStreamer *llvm::createNullStreamer(MCContext &Context) {
+ return new MCNullStreamer(Context);
+}
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
new file mode 100644
index 000000000000..d19e79ac64f9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -0,0 +1,683 @@
+//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+using namespace llvm;
+
+void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
+ // MachO
+ IsFunctionEHFrameSymbolPrivate = false;
+ SupportsWeakOmittedEHFrame = false;
+
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
+ | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+
+ // .comm doesn't support alignment before Leopard.
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+ CommDirectiveSupportsAlignment = false;
+
+ TextSection // .text
+ = Ctx->getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ DataSection // .data
+ = Ctx->getMachOSection("__DATA", "__data", 0,
+ SectionKind::getDataRel());
+
+ TLSDataSection // .tdata
+ = Ctx->getMachOSection("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+ SectionKind::getDataRel());
+ TLSBSSSection // .tbss
+ = Ctx->getMachOSection("__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ SectionKind::getThreadBSS());
+
+ // TODO: Verify datarel below.
+ TLSTLVSection // .tlv
+ = Ctx->getMachOSection("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+ SectionKind::getDataRel());
+
+ TLSThreadInitSection
+ = Ctx->getMachOSection("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getDataRel());
+
+ CStringSection // .cstring
+ = Ctx->getMachOSection("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS,
+ SectionKind::getMergeable1ByteCString());
+ UStringSection
+ = Ctx->getMachOSection("__TEXT","__ustring", 0,
+ SectionKind::getMergeable2ByteCString());
+ FourByteConstantSection // .literal4
+ = Ctx->getMachOSection("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS,
+ SectionKind::getMergeableConst4());
+ EightByteConstantSection // .literal8
+ = Ctx->getMachOSection("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS,
+ SectionKind::getMergeableConst8());
+
+ // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+ // to using it in -static mode.
+ SixteenByteConstantSection = 0;
+ if (RelocM != Reloc::Static &&
+ T.getArch() != Triple::x86_64 && T.getArch() != Triple::ppc64)
+ SixteenByteConstantSection = // .literal16
+ Ctx->getMachOSection("__TEXT", "__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS,
+ SectionKind::getMergeableConst16());
+
+ ReadOnlySection // .const
+ = Ctx->getMachOSection("__TEXT", "__const", 0,
+ SectionKind::getReadOnly());
+
+ TextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ ConstTextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__const_coal",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getReadOnly());
+ ConstDataSection // .const_data
+ = Ctx->getMachOSection("__DATA", "__const", 0,
+ SectionKind::getReadOnlyWithRel());
+ DataCoalSection
+ = Ctx->getMachOSection("__DATA","__datacoal_nt",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getDataRel());
+ DataCommonSection
+ = Ctx->getMachOSection("__DATA","__common",
+ MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+ DataBSSSection
+ = Ctx->getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+
+
+ LazySymbolPointerSection
+ = Ctx->getMachOSection("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ NonLazySymbolPointerSection
+ = Ctx->getMachOSection("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+
+ if (RelocM == Reloc::Static) {
+ StaticCtorSection
+ = Ctx->getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = Ctx->getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getDataRel());
+ } else {
+ StaticCtorSection
+ = Ctx->getMachOSection("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = Ctx->getMachOSection("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ }
+
+ // Exception Handling.
+ LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
+ SectionKind::getReadOnlyWithRel());
+
+ if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ CompactUnwindSection =
+ Ctx->getMachOSection("__LD", "__compact_unwind",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getReadOnly());
+
+ // Debug Information.
+ DwarfAccelNamesSection =
+ Ctx->getMachOSection("__DWARF", "__apple_names",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfAccelObjCSection =
+ Ctx->getMachOSection("__DWARF", "__apple_objc",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ // 16 character section limit...
+ DwarfAccelNamespaceSection =
+ Ctx->getMachOSection("__DWARF", "__apple_namespac",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfAccelTypesSection =
+ Ctx->getMachOSection("__DWARF", "__apple_types",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+
+ DwarfAbbrevSection =
+ Ctx->getMachOSection("__DWARF", "__debug_abbrev",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_info",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getMachOSection("__DWARF", "__debug_line",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getMachOSection("__DWARF", "__debug_frame",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_pubnames",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getMachOSection("__DWARF", "__debug_str",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ Ctx->getMachOSection("__DWARF", "__debug_loc",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_aranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_ranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_macinfo",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ Ctx->getMachOSection("__DWARF", "__debug_inlined",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+
+ TLSExtraDataSection = TLSTLVSection;
+}
+
+void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+ if (T.getArch() == Triple::mips ||
+ T.getArch() == Triple::mipsel)
+ FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
+ else if (T.getArch() == Triple::mips64 ||
+ T.getArch() == Triple::mips64el)
+ FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+ else
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+ if (T.getArch() == Triple::x86) {
+ PersonalityEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ FDEEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ } else if (T.getArch() == Triple::x86_64) {
+ if (RelocM == Reloc::PIC_) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (CMModel == CodeModel::Small
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ } else {
+ PersonalityEncoding =
+ (CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (CMModel == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ FDEEncoding = dwarf::DW_EH_PE_udata4;
+ TTypeEncoding = (CMModel == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ }
+ } else if (T.getArch() == Triple::aarch64) {
+ // The small model guarantees static code/data size < 4GB, but not where it
+ // will be in memory. Most of these could end up >2GB away so even a signed
+ // pc-relative 32-bit address is insufficient, theoretically.
+ if (RelocM == Reloc::PIC_) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+ FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ FDEEncoding = dwarf::DW_EH_PE_udata4;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ } else if (T.getArch() == Triple::ppc64) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+ FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ }
+
+ // Solaris requires different flags for .eh_frame to seemingly every other
+ // platform.
+ EHSectionType = ELF::SHT_PROGBITS;
+ EHSectionFlags = ELF::SHF_ALLOC;
+ if (T.getOS() == Triple::Solaris) {
+ if (T.getArch() == Triple::x86_64)
+ EHSectionType = ELF::SHT_X86_64_UNWIND;
+ else
+ EHSectionFlags |= ELF::SHF_WRITE;
+ }
+
+
+ // ELF
+ BSSSection =
+ Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ TextSection =
+ Ctx->getELFSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC,
+ SectionKind::getText());
+
+ DataSection =
+ Ctx->getELFSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ ReadOnlySection =
+ Ctx->getELFSection(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ TLSDataSection =
+ Ctx->getELFSection(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadData());
+
+ TLSBSSSection =
+ Ctx->getELFSection(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+
+ DataRelSection =
+ Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ DataRelLocalSection =
+ Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRelLocal());
+
+ DataRelROSection =
+ Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+
+ DataRelROLocalSection =
+ Ctx->getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+
+ MergeableConst4Section =
+ Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst4());
+
+ MergeableConst8Section =
+ Ctx->getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst8());
+
+ MergeableConst16Section =
+ Ctx->getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst16());
+
+ StaticCtorSection =
+ Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ StaticDtorSection =
+ Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ // Exception Handling Sections.
+
+ // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+ // it contains relocatable pointers. In PIC mode, this is probably a big
+ // runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ Ctx->getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ // Debug Info Sections.
+ DwarfAbbrevSection =
+ Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS,
+ SectionKind::getMergeable1ByteCString());
+ DwarfLocSection =
+ Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+
+ // DWARF5 Experimental Debug Info
+
+ // Accelerator Tables
+ DwarfAccelNamesSection =
+ Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfAccelObjCSection =
+ Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfAccelNamespaceSection =
+ Ctx->getELFSection(".apple_namespaces", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfAccelTypesSection =
+ Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+
+ // Fission Sections
+ DwarfInfoDWOSection =
+ Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfAbbrevDWOSection =
+ Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrDWOSection =
+ Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS,
+ SectionKind::getMergeable1ByteCString());
+ DwarfLineDWOSection =
+ Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLocDWOSection =
+ Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrOffDWOSection =
+ Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfAddrSection =
+ Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+}
+
+
+void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+ // COFF
+ TextSection =
+ Ctx->getCOFFSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE |
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ DataSection =
+ Ctx->getCOFFSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ ReadOnlySection =
+ Ctx->getCOFFSection(".rdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ if (T.getOS() == Triple::Win32) {
+ StaticCtorSection =
+ Ctx->getCOFFSection(".CRT$XCU",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ } else {
+ StaticCtorSection =
+ Ctx->getCOFFSection(".ctors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+
+
+ if (T.getOS() == Triple::Win32) {
+ StaticDtorSection =
+ Ctx->getCOFFSection(".CRT$XTX",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ } else {
+ StaticDtorSection =
+ Ctx->getCOFFSection(".dtors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+
+ // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+ // though it contains relocatable pointers. In PIC mode, this is probably a
+ // big runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ Ctx->getCOFFSection(".gcc_except_table",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+
+ // Debug info.
+ DwarfAbbrevSection =
+ Ctx->getCOFFSection(".debug_abbrev",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getCOFFSection(".debug_info",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getCOFFSection(".debug_line",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getCOFFSection(".debug_frame",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getCOFFSection(".debug_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getCOFFSection(".debug_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getCOFFSection(".debug_str",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ Ctx->getCOFFSection(".debug_loc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getCOFFSection(".debug_aranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getCOFFSection(".debug_ranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getCOFFSection(".debug_macinfo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
+ DrectveSection =
+ Ctx->getCOFFSection(".drectve",
+ COFF::IMAGE_SCN_LNK_INFO,
+ SectionKind::getMetadata());
+
+ PDataSection =
+ Ctx->getCOFFSection(".pdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+
+ XDataSection =
+ Ctx->getCOFFSection(".xdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+ TLSDataSection =
+ Ctx->getCOFFSection(".tls$",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
+ CodeModel::Model cm,
+ MCContext &ctx) {
+ RelocM = relocm;
+ CMModel = cm;
+ Ctx = &ctx;
+
+ // Common.
+ CommDirectiveSupportsAlignment = true;
+ SupportsWeakOmittedEHFrame = true;
+ IsFunctionEHFrameSymbolPrivate = true;
+
+ PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+
+ EHFrameSection = 0; // Created on demand.
+ CompactUnwindSection = 0; // Used only by selected targets.
+ DwarfAccelNamesSection = 0; // Used only by selected targets.
+ DwarfAccelObjCSection = 0; // Used only by selected targets.
+ DwarfAccelNamespaceSection = 0; // Used only by selected targets.
+ DwarfAccelTypesSection = 0; // Used only by selected targets.
+
+ Triple T(TT);
+ Triple::ArchType Arch = T.getArch();
+ // FIXME: Checking for Arch here to filter out bogus triples such as
+ // cellspu-apple-darwin. Perhaps we should fix in Triple?
+ if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
+ Arch == Triple::arm || Arch == Triple::thumb ||
+ Arch == Triple::ppc || Arch == Triple::ppc64 ||
+ Arch == Triple::UnknownArch) &&
+ (T.isOSDarwin() || T.getEnvironment() == Triple::MachO)) {
+ Env = IsMachO;
+ InitMachOMCObjectFileInfo(T);
+ } else if ((Arch == Triple::x86 || Arch == Triple::x86_64) &&
+ (T.getEnvironment() != Triple::ELF) &&
+ (T.getOS() == Triple::MinGW32 || T.getOS() == Triple::Cygwin ||
+ T.getOS() == Triple::Win32)) {
+ Env = IsCOFF;
+ InitCOFFMCObjectFileInfo(T);
+ } else {
+ Env = IsELF;
+ InitELFMCObjectFileInfo(T);
+ }
+}
+
+void MCObjectFileInfo::InitEHFrameSection() {
+ if (Env == IsMachO)
+ EHFrameSection =
+ Ctx->getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+ else if (Env == IsELF)
+ EHFrameSection =
+ Ctx->getELFSection(".eh_frame", EHSectionType,
+ EHSectionFlags,
+ SectionKind::getDataRel());
+ else
+ EHFrameSection =
+ Ctx->getCOFFSection(".eh_frame",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
new file mode 100644
index 000000000000..0d2ce83a8a10
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -0,0 +1,360 @@
+//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+ MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter_)
+ : MCStreamer(Kind, Context),
+ Assembler(new MCAssembler(Context, TAB, *Emitter_,
+ *TAB.createObjectWriter(OS), OS)),
+ CurSectionData(0) {}
+
+MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+ MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter_,
+ MCAssembler *_Assembler)
+ : MCStreamer(Kind, Context), Assembler(_Assembler), CurSectionData(0) {}
+
+MCObjectStreamer::~MCObjectStreamer() {
+ delete &Assembler->getBackend();
+ delete &Assembler->getEmitter();
+ delete &Assembler->getWriter();
+ delete Assembler;
+}
+
+void MCObjectStreamer::reset() {
+ if (Assembler)
+ Assembler->reset();
+ CurSectionData = 0;
+ MCStreamer::reset();
+}
+
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+ assert(getCurrentSectionData() && "No current section!");
+
+ if (!getCurrentSectionData()->empty())
+ return &getCurrentSectionData()->getFragmentList().back();
+
+ return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+ MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ // When bundling is enabled, we don't want to add data to a fragment that
+ // already has instructions (see MCELFStreamer::EmitInstToData for details)
+ if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
+ F = new MCDataFragment(getCurrentSectionData());
+ return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols(BE->getLHS());
+ AddValueSymbols(BE->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+ break;
+ }
+
+ return Value;
+}
+
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+ EmitIntValue(AbsValue, Size, AddrSpace);
+ return;
+ }
+ DF->getFixups().push_back(
+ MCFixup::Create(DF->getContents().size(), Value,
+ MCFixup::getKindForSize(Size, false)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcStart(Frame);
+}
+
+void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcEnd(Frame);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+ MCStreamer::EmitLabel(Symbol);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitULEB128IntValue(IntValue);
+ return;
+ }
+ Value = ForceExpAbs(Value);
+ new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitSLEB128IntValue(IntValue);
+ return;
+ }
+ Value = ForceExpAbs(Value);
+ new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ report_fatal_error("This file format doesn't support weak aliases.");
+}
+
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+
+ CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+}
+
+void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--; )
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+
+ MCSectionData *SD = getCurrentSectionData();
+ SD->setHasInstructions(true);
+
+ // Now that a machine instruction has been assembled into this section, make
+ // a line entry for any .loc directive that has been seen.
+ MCLineEntry::Make(this, getCurrentSection());
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ MCAssembler &Assembler = getAssembler();
+ if (!Assembler.getBackend().mayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
+ return;
+ }
+
+ // Otherwise, relax and emit it as data if either:
+ // - The RelaxAll flag was passed
+ // - Bundling is enabled and this instruction is inside a bundle-locked
+ // group. We want to emit all such instructions into the same data
+ // fragment.
+ if (Assembler.getRelaxAll() ||
+ (Assembler.isBundlingEnabled() && SD->isBundleLocked())) {
+ MCInst Relaxed;
+ getAssembler().getBackend().relaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().mayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().relaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
+ }
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+ // Always create a new, separate fragment here, because its size can change
+ // during relaxation.
+ MCRelaxableFragment *IF =
+ new MCRelaxableFragment(Inst, getCurrentSectionData());
+
+ SmallString<128> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+ VecOS.flush();
+ IF->getContents().append(Code.begin(), Code.end());
+}
+
+#ifndef NDEBUG
+static const char *BundlingNotImplementedMsg =
+ "Aligned bundling is not implemented for this object format";
+#endif
+
+void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+ llvm_unreachable(BundlingNotImplementedMsg);
+}
+
+void MCObjectStreamer::EmitBundleLock(bool AlignToEnd) {
+ llvm_unreachable(BundlingNotImplementedMsg);
+}
+
+void MCObjectStreamer::EmitBundleUnlock() {
+ llvm_unreachable(BundlingNotImplementedMsg);
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ if (!LastLabel) {
+ EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+ return;
+ }
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfLineAddr::Emit(this, LineDelta, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(AddrDelta);
+ new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(AddrDelta);
+ new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ EmitValueToAlignment(ByteAlignment, 0, 1, MaxBytesToEmit);
+ cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true);
+}
+
+bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ int64_t Res;
+ if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ return false;
+ }
+
+ MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+ EmitLabel(CurrentPos);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Ref =
+ MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+ const MCExpr *Delta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+
+ if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+ return true;
+ EmitFill(Res, Value);
+ return false;
+}
+
+// Associate GPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+ Value, FK_GPRel_4));
+ DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+// Associate GPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+ Value, FK_GPRel_4));
+ DF->getContents().resize(DF->getContents().size() + 8, 0);
+}
+
+void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ // FIXME: A MCFillFragment would be more memory efficient but MCExpr has
+ // problems evaluating expressions across multiple fragments.
+ getOrCreateDataFragment()->getContents().append(NumBytes, FillValue);
+}
+
+void MCObjectStreamer::FinishImpl() {
+ // Dump out the dwarf file & directory tables and line tables.
+ const MCSymbol *LineSectionSymbol = NULL;
+ if (getContext().hasDwarfFiles())
+ LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this, LineSectionSymbol);
+
+ getAssembler().Finish();
+}
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 000000000000..94d7cd6fd4f3
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,56 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+ const MCSymbolRefExpr *A,
+ const MCSymbolRefExpr *B,
+ bool InSet) const {
+ // Modified symbol references cannot be resolved.
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ B->getKind() != MCSymbolRefExpr::VK_None)
+ return false;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+ if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+ return false;
+
+ const MCSymbolData &DataA = Asm.getSymbolData(SA);
+ const MCSymbolData &DataB = Asm.getSymbolData(SB);
+ if(!DataA.getFragment() || !DataB.getFragment())
+ return false;
+
+ return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+ *DataB.getFragment(),
+ InSet,
+ false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+ // On ELF and COFF A - B is absolute if A and B are in the same section.
+ return &SecA == &SecB;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
new file mode 100644
index 000000000000..c1c594a74697
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -0,0 +1,516 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
+ CurBuf = NULL;
+ CurPtr = NULL;
+ isAtStartOfLine = true;
+}
+
+AsmLexer::~AsmLexer() {
+}
+
+void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
+ CurBuf = buf;
+
+ if (ptr)
+ CurPtr = ptr;
+ else
+ CurPtr = CurBuf->getBufferStart();
+
+ TokStart = 0;
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ SetError(SMLoc::getFromPointer(Loc), Msg);
+
+ return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+int AsmLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+}
+
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+ // Skip the fractional digit sequence.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ // Check for exponent; we intentionally accept a slighlty wider set of
+ // literals here and rely on the upstream client to reject invalid ones (e.g.,
+ // "1e+").
+ if (*CurPtr == 'e' || *CurPtr == 'E') {
+ ++CurPtr;
+ if (*CurPtr == '-' || *CurPtr == '+')
+ ++CurPtr;
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ }
+
+ return AsmToken(AsmToken::Real,
+ StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
+AsmToken AsmLexer::LexIdentifier() {
+ // Check for floating point literals.
+ if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+ // Disambiguate a .1243foo identifier from a floating literal.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+ return LexFloatLiteral();
+ }
+
+ while (IsIdentifierChar(*CurPtr))
+ ++CurPtr;
+
+ // Handle . as a special case.
+ if (CurPtr == TokStart+1 && TokStart[0] == '.')
+ return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+
+ return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+AsmToken AsmLexer::LexSlash() {
+ switch (*CurPtr) {
+ case '*': break; // C style comment.
+ case '/': return ++CurPtr, LexLineComment();
+ default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
+ }
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexLineComment: Comment: #[^\n]*
+/// : //[^\n]*
+AsmToken AsmLexer::LexLineComment() {
+ // FIXME: This is broken if we happen to a comment at the end of a file, which
+ // was .included, and which doesn't end with a newline.
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+}
+
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+ // Skip ULL, UL, U, L and LL suffices.
+ if (CurPtr[0] == 'U')
+ ++CurPtr;
+ if (CurPtr[0] == 'L')
+ ++CurPtr;
+ if (CurPtr[0] == 'L')
+ ++CurPtr;
+}
+
+// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
+// integer as a hexadecimal, possibly with leading zeroes.
+static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
+ const char *FirstHex = 0;
+ const char *LookAhead = CurPtr;
+ while (1) {
+ if (isdigit(*LookAhead)) {
+ ++LookAhead;
+ } else if (isxdigit(*LookAhead)) {
+ if (!FirstHex)
+ FirstHex = LookAhead;
+ ++LookAhead;
+ } else {
+ break;
+ }
+ }
+ bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
+ CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+ if (isHex)
+ return 16;
+ return DefaultRadix;
+}
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
+/// Decimal integer: [1-9][0-9]*
+AsmToken AsmLexer::LexDigit() {
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
+ unsigned Radix = doLookAhead(CurPtr, 10);
+ bool isHex = Radix == 16;
+ // Check for floating point literals.
+ if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
+ ++CurPtr;
+ return LexFloatLiteral();
+ }
+
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ long long Value;
+ if (Result.getAsInteger(Radix, Value)) {
+ // Allow positive values that are too large to fit into a signed 64-bit
+ // integer, but that do fit in an unsigned one, we just convert them over.
+ unsigned long long UValue;
+ if (Result.getAsInteger(Radix, UValue))
+ return ReturnError(TokStart, !isHex ? "invalid decimal number" :
+ "invalid hexdecimal number");
+ Value = (long long)UValue;
+ }
+
+ // Consume the [bB][hH].
+ if (Radix == 2 || Radix == 16)
+ ++CurPtr;
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores type
+ // suffices on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ // See if we actually have "0b" as part of something like "jmp 0b\n"
+ if (!isdigit(CurPtr[0])) {
+ --CurPtr;
+ StringRef Result(TokStart, CurPtr - TokStart);
+ return AsmToken(AsmToken::Integer, Result, 0);
+ }
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(TokStart, "invalid binary number");
+
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ long long Value;
+ if (Result.substr(2).getAsInteger(2, Value))
+ return ReturnError(TokStart, "invalid binary number");
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "invalid hexadecimal number");
+
+ unsigned long long Result;
+ if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
+ return ReturnError(TokStart, "invalid hexadecimal number");
+
+ // Consume the optional [hH].
+ if (*CurPtr == 'h' || *CurPtr == 'H')
+ ++CurPtr;
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ (int64_t)Result);
+ }
+
+ // Either octal or hexadecimal.
+ long long Value;
+ unsigned Radix = doLookAhead(CurPtr, 8);
+ bool isHex = Radix == 16;
+ StringRef Result(TokStart, CurPtr - TokStart);
+ if (Result.getAsInteger(Radix, Value))
+ return ReturnError(TokStart, !isHex ? "invalid octal number" :
+ "invalid hexdecimal number");
+
+ // Consume the [hH].
+ if (Radix == 16)
+ ++CurPtr;
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+}
+
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+ int CurChar = getNextChar();
+
+ if (CurChar == '\\')
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated single quote");
+
+ CurChar = getNextChar();
+
+ if (CurChar != '\'')
+ return ReturnError(TokStart, "single quote way too long");
+
+ // The idea here being that 'c' is basically just an integral
+ // constant.
+ StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+ long long Value;
+
+ if (Res.startswith("\'\\")) {
+ char theChar = Res[2];
+ switch (theChar) {
+ default: Value = theChar; break;
+ case '\'': Value = '\''; break;
+ case 't': Value = '\t'; break;
+ case 'n': Value = '\n'; break;
+ case 'b': Value = '\b'; break;
+ }
+ } else
+ Value = TokStart[1];
+
+ return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
+/// LexQuote: String: "..."
+AsmToken AsmLexer::LexQuote() {
+ int CurChar = getNextChar();
+ // TODO: does gas allow multiline string constants?
+ while (CurChar != '"') {
+ if (CurChar == '\\') {
+ // Allow \", etc.
+ CurChar = getNextChar();
+ }
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+
+ CurChar = getNextChar();
+ }
+
+ return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+ TokStart = CurPtr;
+
+ while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+ !isAtStatementSeparator(CurPtr) && // End of statement marker.
+ *CurPtr != '\n' &&
+ *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ ++CurPtr;
+ }
+ return StringRef(TokStart, CurPtr-TokStart);
+}
+
+StringRef AsmLexer::LexUntilEndOfLine() {
+ TokStart = CurPtr;
+
+ while (*CurPtr != '\n' &&
+ *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ ++CurPtr;
+ }
+ return StringRef(TokStart, CurPtr-TokStart);
+}
+
+bool AsmLexer::isAtStartOfComment(char Char) {
+ // FIXME: This won't work for multi-character comment indicators like "//".
+ return Char == *MAI.getCommentString();
+}
+
+bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
+ return strncmp(Ptr, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0;
+}
+
+AsmToken AsmLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ if (isAtStartOfComment(CurChar)) {
+ // If this comment starts with a '#', then return the Hash token and let
+ // the assembler parser see if it can be parsed as a cpp line filename
+ // comment. We do this only if we are at the start of a line.
+ if (CurChar == '#' && isAtStartOfLine)
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ isAtStartOfLine = true;
+ return LexLineComment();
+ }
+ if (isAtStatementSeparator(TokStart)) {
+ CurPtr += strlen(MAI.getSeparatorString()) - 1;
+ return AsmToken(AsmToken::EndOfStatement,
+ StringRef(TokStart, strlen(MAI.getSeparatorString())));
+ }
+
+ // If we're missing a newline at EOF, make sure we still get an
+ // EndOfStatement token before the Eof token.
+ if (CurChar == EOF && !isAtStartOfLine) {
+ isAtStartOfLine = true;
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ }
+
+ isAtStartOfLine = false;
+ switch (CurChar) {
+ default:
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "invalid character in input");
+ case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ case 0:
+ case ' ':
+ case '\t':
+ if (SkipSpace) {
+ // Ignore whitespace.
+ return LexToken();
+ } else {
+ int len = 1;
+ while (*CurPtr==' ' || *CurPtr=='\t') {
+ CurPtr++;
+ len++;
+ }
+ return AsmToken(AsmToken::Space, StringRef(TokStart, len));
+ }
+ case '\n': // FALL THROUGH.
+ case '\r':
+ isAtStartOfLine = true;
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+ case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+ case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+ case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+ case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+ case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+ case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
+ case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
+ case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
+ case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
+ case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+ case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+ case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+ case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
+ case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
+ case '=':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+ case '|':
+ if (*CurPtr == '|')
+ return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+ case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
+ case '&':
+ if (*CurPtr == '&')
+ return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
+ case '!':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+ case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+ case '/': return LexSlash();
+ case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '\'': return LexSingleQuote();
+ case '"': return LexQuote();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+ case '<':
+ switch (*CurPtr) {
+ case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+ StringRef(TokStart, 2));
+ case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
+ }
+ case '>':
+ switch (*CurPtr) {
+ case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
+ }
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
+ }
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
new file mode 100644
index 000000000000..804734cea939
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -0,0 +1,4272 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <set>
+#include <string>
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+FatalAssemblerWarnings("fatal-assembler-warnings",
+ cl::desc("Consider warnings as error"));
+
+MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
+
+namespace {
+
+/// \brief Helper types for tracking macro definitions.
+typedef std::vector<AsmToken> MCAsmMacroArgument;
+typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
+typedef std::pair<StringRef, MCAsmMacroArgument> MCAsmMacroParameter;
+typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
+
+struct MCAsmMacro {
+ StringRef Name;
+ StringRef Body;
+ MCAsmMacroParameters Parameters;
+
+public:
+ MCAsmMacro(StringRef N, StringRef B, const MCAsmMacroParameters &P) :
+ Name(N), Body(B), Parameters(P) {}
+
+ MCAsmMacro(const MCAsmMacro& Other)
+ : Name(Other.Name), Body(Other.Body), Parameters(Other.Parameters) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+ /// The macro being instantiated.
+ const MCAsmMacro *TheMacro;
+
+ /// The macro instantiation with substitutions.
+ MemoryBuffer *Instantiation;
+
+ /// The location of the instantiation.
+ SMLoc InstantiationLoc;
+
+ /// The buffer where parsing should resume upon instantiation completion.
+ int ExitBuffer;
+
+ /// The location where parsing should resume upon instantiation completion.
+ SMLoc ExitLoc;
+
+public:
+ MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL,
+ MemoryBuffer *I);
+};
+
+struct ParseStatementInfo {
+ /// ParsedOperands - The parsed operands from the last parsed statement.
+ SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+
+ /// Opcode - The opcode from the last parsed instruction.
+ unsigned Opcode;
+
+ /// Error - Was there an error parsing the inline assembly?
+ bool ParseError;
+
+ SmallVectorImpl<AsmRewrite> *AsmRewrites;
+
+ ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(0) {}
+ ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
+ : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
+
+ ~ParseStatementInfo() {
+ // Free any parsed operands.
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+ AsmParser(const AsmParser &) LLVM_DELETED_FUNCTION;
+ void operator=(const AsmParser &) LLVM_DELETED_FUNCTION;
+private:
+ AsmLexer Lexer;
+ MCContext &Ctx;
+ MCStreamer &Out;
+ const MCAsmInfo &MAI;
+ SourceMgr &SrcMgr;
+ SourceMgr::DiagHandlerTy SavedDiagHandler;
+ void *SavedDiagContext;
+ MCAsmParserExtension *PlatformParser;
+
+ /// This is the current buffer index we're lexing from as managed by the
+ /// SourceMgr object.
+ int CurBuffer;
+
+ AsmCond TheCondState;
+ std::vector<AsmCond> TheCondStack;
+
+ /// ExtensionDirectiveMap - maps directive names to handler methods in parser
+ /// extensions. Extensions register themselves in this map by calling
+ /// addDirectiveHandler.
+ StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
+
+ /// MacroMap - Map of currently defined macros.
+ StringMap<MCAsmMacro*> MacroMap;
+
+ /// ActiveMacros - Stack of active macro instantiations.
+ std::vector<MacroInstantiation*> ActiveMacros;
+
+ /// Boolean tracking whether macro substitution is enabled.
+ unsigned MacrosEnabledFlag : 1;
+
+ /// Flag tracking whether any errors have been encountered.
+ unsigned HadError : 1;
+
+ /// The values from the last parsed cpp hash file line comment if any.
+ StringRef CppHashFilename;
+ int64_t CppHashLineNumber;
+ SMLoc CppHashLoc;
+ int CppHashBuf;
+
+ /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
+ unsigned AssemblerDialect;
+
+ /// IsDarwin - is Darwin compatibility enabled?
+ bool IsDarwin;
+
+ /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+ bool ParsingInlineAsm;
+
+public:
+ AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ const MCAsmInfo &MAI);
+ virtual ~AsmParser();
+
+ virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+ virtual void addDirectiveHandler(StringRef Directive,
+ ExtensionDirectiveHandler Handler) {
+ ExtensionDirectiveMap[Directive] = Handler;
+ }
+
+public:
+ /// @name MCAsmParser Interface
+ /// {
+
+ virtual SourceMgr &getSourceManager() { return SrcMgr; }
+ virtual MCAsmLexer &getLexer() { return Lexer; }
+ virtual MCContext &getContext() { return Ctx; }
+ virtual MCStreamer &getStreamer() { return Out; }
+ virtual unsigned getAssemblerDialect() {
+ if (AssemblerDialect == ~0U)
+ return MAI.getAssemblerDialect();
+ else
+ return AssemblerDialect;
+ }
+ virtual void setAssemblerDialect(unsigned i) {
+ AssemblerDialect = i;
+ }
+
+ virtual bool Warning(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+ virtual bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+
+ virtual const AsmToken &Lex();
+
+ void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
+ bool isParsingInlineAsm() { return ParsingInlineAsm; }
+
+ bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<std::pair<void *,bool> > &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI);
+
+ bool parseExpression(const MCExpr *&Res);
+ virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool parseAbsoluteExpression(int64_t &Res);
+
+ /// parseIdentifier - Parse an identifier or string (as a quoted identifier)
+ /// and set \p Res to the identifier contents.
+ virtual bool parseIdentifier(StringRef &Res);
+ virtual void eatToEndOfStatement();
+
+ virtual void checkForValidSection();
+ /// }
+
+private:
+
+ bool ParseStatement(ParseStatementInfo &Info);
+ void EatToEndOfLine();
+ bool ParseCppHashLineFilenameComment(const SMLoc &L);
+
+ void CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
+ MCAsmMacroParameters Parameters);
+ bool expandMacro(raw_svector_ostream &OS, StringRef Body,
+ const MCAsmMacroParameters &Parameters,
+ const MCAsmMacroArguments &A,
+ const SMLoc &L);
+
+ /// \brief Are macros enabled in the parser?
+ bool MacrosEnabled() {return MacrosEnabledFlag;}
+
+ /// \brief Control a flag in the parser that enables or disables macros.
+ void SetMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
+
+ /// \brief Lookup a previously defined macro.
+ /// \param Name Macro name.
+ /// \returns Pointer to macro. NULL if no such macro was defined.
+ const MCAsmMacro* LookupMacro(StringRef Name);
+
+ /// \brief Define a new macro with the given name and information.
+ void DefineMacro(StringRef Name, const MCAsmMacro& Macro);
+
+ /// \brief Undefine a macro. If no such macro was defined, it's a no-op.
+ void UndefineMacro(StringRef Name);
+
+ /// \brief Are we inside a macro instantiation?
+ bool InsideMacroInstantiation() {return !ActiveMacros.empty();}
+
+ /// \brief Handle entry to macro instantiation.
+ ///
+ /// \param M The macro.
+ /// \param NameLoc Instantiation location.
+ bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
+
+ /// \brief Handle exit from macro instantiation.
+ void HandleMacroExit();
+
+ /// \brief Extract AsmTokens for a macro argument. If the argument delimiter
+ /// is initially unknown, set it to AsmToken::Eof. It will be set to the
+ /// correct delimiter by the method.
+ bool ParseMacroArgument(MCAsmMacroArgument &MA,
+ AsmToken::TokenKind &ArgumentDelimiter);
+
+ /// \brief Parse all macro arguments for a given macro.
+ bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
+
+ void PrintMacroInstantiations();
+ void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+ SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
+ }
+ static void DiagHandler(const SMDiagnostic &Diag, void *Context);
+
+ /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+ bool EnterIncludeFile(const std::string &Filename);
+ /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+ /// This returns true on failure.
+ bool ProcessIncbinFile(const std::string &Filename);
+
+ /// \brief Reset the current lexer position to that given by \p Loc. The
+ /// current token is not set; clients should ensure Lex() is called
+ /// subsequently.
+ ///
+ /// \param InBuffer If not -1, should be the known buffer id that contains the
+ /// location.
+ void JumpToLoc(SMLoc Loc, int InBuffer=-1);
+
+ /// \brief Parse up to the end of statement and a return the contents from the
+ /// current token until the end of the statement; the current token on exit
+ /// will be either the EndOfStatement or EOF.
+ virtual StringRef parseStringToEndOfStatement();
+
+ /// \brief Parse until the end of a statement or a comma is encountered,
+ /// return the contents from the current token up to the end or comma.
+ StringRef ParseStringToComma();
+
+ bool ParseAssignment(StringRef Name, bool allow_redef,
+ bool NoDeadStrip = false);
+
+ bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+ bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
+ // Generic (target and platform independent) directive parsing.
+ enum DirectiveKind {
+ DK_NO_DIRECTIVE, // Placeholder
+ DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
+ DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_SINGLE,
+ DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
+ DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
+ DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK,
+ DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL,
+ DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN,
+ DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
+ DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
+ DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
+ DK_IF, DK_IFB, DK_IFNB, DK_IFC, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
+ DK_ELSEIF, DK_ELSE, DK_ENDIF,
+ DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
+ DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
+ DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
+ DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA,
+ DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
+ DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
+ DK_CFI_REGISTER,
+ DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
+ DK_SLEB128, DK_ULEB128
+ };
+
+ /// DirectiveKindMap - Maps directive name --> DirectiveKind enum, for
+ /// directives parsed by this class.
+ StringMap<DirectiveKind> DirectiveKindMap;
+
+ // ".ascii", ".asciz", ".string"
+ bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+ bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
+ bool ParseDirectiveFill(); // ".fill"
+ bool ParseDirectiveZero(); // ".zero"
+ // ".set", ".equ", ".equiv"
+ bool ParseDirectiveSet(StringRef IDVal, bool allow_redef);
+ bool ParseDirectiveOrg(); // ".org"
+ // ".align{,32}", ".p2align{,w,l}"
+ bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+ // ".file", ".line", ".loc", ".stabs"
+ bool ParseDirectiveFile(SMLoc DirectiveLoc);
+ bool ParseDirectiveLine();
+ bool ParseDirectiveLoc();
+ bool ParseDirectiveStabs();
+
+ // .cfi directives
+ bool ParseDirectiveCFIRegister(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFISections();
+ bool ParseDirectiveCFIStartProc();
+ bool ParseDirectiveCFIEndProc();
+ bool ParseDirectiveCFIDefCfaOffset();
+ bool ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIAdjustCfaOffset();
+ bool ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIOffset(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
+ bool ParseDirectiveCFIRememberState();
+ bool ParseDirectiveCFIRestoreState();
+ bool ParseDirectiveCFISameValue(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRestore(SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIEscape();
+ bool ParseDirectiveCFISignalFrame();
+ bool ParseDirectiveCFIUndefined(SMLoc DirectiveLoc);
+
+ // macro directives
+ bool ParseDirectivePurgeMacro(SMLoc DirectiveLoc);
+ bool ParseDirectiveEndMacro(StringRef Directive);
+ bool ParseDirectiveMacro(SMLoc DirectiveLoc);
+ bool ParseDirectiveMacrosOnOff(StringRef Directive);
+
+ // ".bundle_align_mode"
+ bool ParseDirectiveBundleAlignMode();
+ // ".bundle_lock"
+ bool ParseDirectiveBundleLock();
+ // ".bundle_unlock"
+ bool ParseDirectiveBundleUnlock();
+
+ // ".space", ".skip"
+ bool ParseDirectiveSpace(StringRef IDVal);
+
+ // .sleb128 (Signed=true) and .uleb128 (Signed=false)
+ bool ParseDirectiveLEB128(bool Signed);
+
+ /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+ /// accepts a single symbol (which should be a label or an external).
+ bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+
+ bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+ bool ParseDirectiveAbort(); // ".abort"
+ bool ParseDirectiveInclude(); // ".include"
+ bool ParseDirectiveIncbin(); // ".incbin"
+
+ bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ // ".ifb" or ".ifnb", depending on ExpectBlank.
+ bool ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
+ // ".ifc" or ".ifnc", depending on ExpectEqual.
+ bool ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
+ // ".ifdef" or ".ifndef", depending on expect_defined
+ bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
+ bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+ bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+ bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+ virtual bool parseEscapedString(std::string &Data);
+
+ const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
+
+ // Macro-like directives
+ MCAsmMacro *ParseMacroLikeBody(SMLoc DirectiveLoc);
+ void InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ raw_svector_ostream &OS);
+ bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept"
+ bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
+ bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
+ bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
+
+ // "_emit" or "__emit"
+ bool ParseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
+ size_t Len);
+
+ // "align"
+ bool ParseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
+
+ void initializeDirectiveKindMap();
+};
+}
+
+namespace llvm {
+
+extern MCAsmParserExtension *createDarwinAsmParser();
+extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
+
+}
+
+enum { DEFAULT_ADDRSPACE = 0 };
+
+AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
+ MCStreamer &_Out, const MCAsmInfo &_MAI)
+ : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
+ PlatformParser(0),
+ CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0),
+ AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
+ // Save the old handler.
+ SavedDiagHandler = SrcMgr.getDiagHandler();
+ SavedDiagContext = SrcMgr.getDiagContext();
+ // Set our own handler which calls the saved handler.
+ SrcMgr.setDiagHandler(DiagHandler, this);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+ // Initialize the platform / file format parser.
+ //
+ // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
+ // created.
+ if (_MAI.hasMicrosoftFastStdCallMangling()) {
+ PlatformParser = createCOFFAsmParser();
+ PlatformParser->Initialize(*this);
+ } else if (_MAI.hasSubsectionsViaSymbols()) {
+ PlatformParser = createDarwinAsmParser();
+ PlatformParser->Initialize(*this);
+ IsDarwin = true;
+ } else {
+ PlatformParser = createELFAsmParser();
+ PlatformParser->Initialize(*this);
+ }
+
+ initializeDirectiveKindMap();
+}
+
+AsmParser::~AsmParser() {
+ assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+ // Destroy any macros.
+ for (StringMap<MCAsmMacro*>::iterator it = MacroMap.begin(),
+ ie = MacroMap.end(); it != ie; ++it)
+ delete it->getValue();
+
+ delete PlatformParser;
+}
+
+void AsmParser::PrintMacroInstantiations() {
+ // Print the active macro instantiation stack.
+ for (std::vector<MacroInstantiation*>::const_reverse_iterator
+ it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+ PrintMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
+ "while in macro instantiation");
+}
+
+bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+ if (FatalAssemblerWarnings)
+ return Error(L, Msg, Ranges);
+ PrintMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
+ PrintMacroInstantiations();
+ return false;
+}
+
+bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+ HadError = true;
+ PrintMessage(L, SourceMgr::DK_Error, Msg, Ranges);
+ PrintMacroInstantiations();
+ return true;
+}
+
+bool AsmParser::EnterIncludeFile(const std::string &Filename) {
+ std::string IncludedFile;
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (NewBuf == -1)
+ return true;
+
+ CurBuffer = NewBuf;
+
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+ return false;
+}
+
+/// Process the specified .incbin file by seaching for it in the include paths
+/// then just emitting the byte contents of the file to the streamer. This
+/// returns true on failure.
+bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+ std::string IncludedFile;
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (NewBuf == -1)
+ return true;
+
+ // Pick up the bytes from the file and emit them.
+ getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(),
+ DEFAULT_ADDRSPACE);
+ return false;
+}
+
+void AsmParser::JumpToLoc(SMLoc Loc, int InBuffer) {
+ if (InBuffer != -1) {
+ CurBuffer = InBuffer;
+ } else {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ }
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
+const AsmToken &AsmParser::Lex() {
+ const AsmToken *tok = &Lexer.Lex();
+
+ if (tok->is(AsmToken::Eof)) {
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ JumpToLoc(ParentIncludeLoc);
+ tok = &Lexer.Lex();
+ }
+ }
+
+ if (tok->is(AsmToken::Error))
+ Error(Lexer.getErrLoc(), Lexer.getErr());
+
+ return *tok;
+}
+
+bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+ // Create the initial section, if requested.
+ if (!NoInitialTextSection)
+ Out.InitSections();
+
+ // Prime the lexer.
+ Lex();
+
+ HadError = false;
+ AsmCond StartingCondState = TheCondState;
+
+ // If we are generating dwarf for assembly source files save the initial text
+ // section and generate a .file directive.
+ if (getContext().getGenDwarfForAssembly()) {
+ getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+ MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+ getStreamer().EmitLabel(SectionStartSym);
+ getContext().setGenDwarfSectionStartSym(SectionStartSym);
+ getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
+ StringRef(),
+ getContext().getMainFileName());
+ }
+
+ // While we have input, parse each statement.
+ while (Lexer.isNot(AsmToken::Eof)) {
+ ParseStatementInfo Info;
+ if (!ParseStatement(Info)) continue;
+
+ // We had an error, validate that one was emitted and recover by skipping to
+ // the next line.
+ assert(HadError && "Parse statement returned an error, but none emitted!");
+ eatToEndOfStatement();
+ }
+
+ if (TheCondState.TheCond != StartingCondState.TheCond ||
+ TheCondState.Ignore != StartingCondState.Ignore)
+ return TokError("unmatched .ifs or .elses");
+
+ // Check to see there are no empty DwarfFile slots.
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ if (!MCDwarfFiles[i])
+ TokError("unassigned file number: " + Twine(i) + " for .file directives");
+ }
+
+ // Check to see that all assembler local symbols were actually defined.
+ // Targets that don't do subsections via symbols may not want this, though,
+ // so conservatively exclude them. Only do this if we're finalizing, though,
+ // as otherwise we won't necessarilly have seen everything yet.
+ if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+ const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+ for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+ e = Symbols.end();
+ i != e; ++i) {
+ MCSymbol *Sym = i->getValue();
+ // Variable symbols may not be marked as defined, so check those
+ // explicitly. If we know it's a variable, we have a definition for
+ // the purposes of this check.
+ if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+ // FIXME: We would really like to refer back to where the symbol was
+ // first referenced for a source location. We need to add something
+ // to track that. Currently, we just point to the end of the file.
+ PrintMessage(getLexer().getLoc(), SourceMgr::DK_Error,
+ "assembler local symbol '" + Sym->getName() +
+ "' not defined");
+ }
+ }
+
+
+ // Finalize the output stream if there are no errors and if the client wants
+ // us to.
+ if (!HadError && !NoFinalize)
+ Out.Finish();
+
+ return HadError;
+}
+
+void AsmParser::checkForValidSection() {
+ if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
+ TokError("expected section directive before assembly directive");
+ Out.InitToTextSection();
+ }
+}
+
+/// eatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::eatToEndOfStatement() {
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ // Eat EOL.
+ if (Lexer.is(AsmToken::EndOfStatement))
+ Lex();
+}
+
+StringRef AsmParser::parseStringToEndOfStatement() {
+ const char *Start = getTok().getLoc().getPointer();
+
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ const char *End = getTok().getLoc().getPointer();
+ return StringRef(Start, End - Start);
+}
+
+StringRef AsmParser::ParseStringToComma() {
+ const char *Start = getTok().getLoc().getPointer();
+
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Comma) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ const char *End = getTok().getLoc().getPointer();
+ return StringRef(Start, End - Start);
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (parseExpression(Res)) return true;
+ if (Lexer.isNot(AsmToken::RParen))
+ return TokError("expected ')' in parentheses expression");
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex();
+ return false;
+}
+
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (parseExpression(Res)) return true;
+ if (Lexer.isNot(AsmToken::RBrac))
+ return TokError("expected ']' in brackets expression");
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex();
+ return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+/// primaryexpr ::= (parenexpr
+/// primaryexpr ::= symbol
+/// primaryexpr ::= number
+/// primaryexpr ::= '.'
+/// primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ SMLoc FirstTokenLoc = getLexer().getLoc();
+ AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
+ switch (FirstTokenKind) {
+ default:
+ return TokError("unknown token in expression");
+ // If we have an error assume that we've already handled it.
+ case AsmToken::Error:
+ return true;
+ case AsmToken::Exclaim:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateLNot(Res, getContext());
+ return false;
+ case AsmToken::Dollar:
+ case AsmToken::String:
+ case AsmToken::Identifier: {
+ StringRef Identifier;
+ if (parseIdentifier(Identifier)) {
+ if (FirstTokenKind == AsmToken::Dollar)
+ return Error(FirstTokenLoc, "invalid token in expression");
+ return true;
+ }
+
+ EndLoc = SMLoc::getFromPointer(Identifier.end());
+
+ // This is a symbol reference.
+ std::pair<StringRef, StringRef> Split = Identifier.split('@');
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
+
+ // Lookup the symbol variant if used.
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ if (Split.first.size() != Identifier.size()) {
+ Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+ if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ Variant = MCSymbolRefExpr::VK_None;
+ return TokError("invalid variant '" + Split.second + "'");
+ }
+ }
+
+ // If this is an absolute variable reference, substitute it now to preserve
+ // semantics in the face of reassignment.
+ if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+ if (Variant)
+ return Error(EndLoc, "unexpected modifier on variable reference");
+
+ Res = Sym->getVariableValue();
+ return false;
+ }
+
+ // Otherwise create a symbol ref.
+ Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ return false;
+ }
+ case AsmToken::Integer: {
+ SMLoc Loc = getTok().getLoc();
+ int64_t IntVal = getTok().getIntVal();
+ Res = MCConstantExpr::Create(IntVal, getContext());
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex(); // Eat token.
+ // Look for 'b' or 'f' following an Integer as a directional label
+ if (Lexer.getKind() == AsmToken::Identifier) {
+ StringRef IDVal = getTok().getString();
+ if (IDVal == "f" || IDVal == "b"){
+ MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+ IDVal == "f" ? 1 : 0);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+ if (IDVal == "b" && Sym->isUndefined())
+ return Error(Loc, "invalid reference to undefined symbol");
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex(); // Eat identifier.
+ }
+ }
+ return false;
+ }
+ case AsmToken::Real: {
+ APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ Res = MCConstantExpr::Create(IntVal, getContext());
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex(); // Eat token.
+ return false;
+ }
+ case AsmToken::Dot: {
+ // This is a '.' reference, which references the current PC. Emit a
+ // temporary label to the streamer and refer to it.
+ MCSymbol *Sym = Ctx.CreateTempSymbol();
+ Out.EmitLabel(Sym);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ EndLoc = Lexer.getTok().getEndLoc();
+ Lex(); // Eat identifier.
+ return false;
+ }
+ case AsmToken::LParen:
+ Lex(); // Eat the '('.
+ return ParseParenExpr(Res, EndLoc);
+ case AsmToken::LBrac:
+ if (!PlatformParser->HasBracketExpressions())
+ return TokError("brackets expression not supported on this target");
+ Lex(); // Eat the '['.
+ return ParseBracketExpr(Res, EndLoc);
+ case AsmToken::Minus:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateMinus(Res, getContext());
+ return false;
+ case AsmToken::Plus:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreatePlus(Res, getContext());
+ return false;
+ case AsmToken::Tilde:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateNot(Res, getContext());
+ return false;
+ }
+}
+
+bool AsmParser::parseExpression(const MCExpr *&Res) {
+ SMLoc EndLoc;
+ return parseExpression(Res, EndLoc);
+}
+
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // if there is exactly one symbol.
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return 0;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+ TokError("invalid variant on expression '" +
+ getTok().getIdentifier() + "' (already modified)");
+ return E;
+ }
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return 0;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+ if (!LHS && !RHS)
+ return 0;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// parseExpression - Parse an expression and return it.
+///
+/// expr ::= expr &&,|| expr -> lowest.
+/// expr ::= expr |,^,&,! expr
+/// expr ::= expr ==,!=,<>,<,<=,>,>= expr
+/// expr ::= expr <<,>> expr
+/// expr ::= expr +,- expr
+/// expr ::= expr *,/,% expr -> highest.
+/// expr ::= primaryexpr
+///
+bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+ // Parse the expression.
+ Res = 0;
+ if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+ return true;
+
+ // As a special case, we support 'a op b @ modifier' by rewriting the
+ // expression to include the modifier. This is inefficient, but in general we
+ // expect users to use 'a@modifier op b'.
+ if (Lexer.getKind() == AsmToken::At) {
+ Lex();
+
+ if (Lexer.isNot(AsmToken::Identifier))
+ return TokError("unexpected symbol modifier following '@'");
+
+ MCSymbolRefExpr::VariantKind Variant =
+ MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+ if (Variant == MCSymbolRefExpr::VK_Invalid)
+ return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+ const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+ if (!ModifiedRes) {
+ return TokError("invalid modifier '" + getTok().getIdentifier() +
+ "' (no symbols present)");
+ }
+
+ Res = ModifiedRes;
+ Lex();
+ }
+
+ // Try to constant fold it up front, if possible.
+ int64_t Value;
+ if (Res->EvaluateAsAbsolute(Value))
+ Res = MCConstantExpr::Create(Value, getContext());
+
+ return false;
+}
+
+bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+ Res = 0;
+ return ParseParenExpr(Res, EndLoc) ||
+ ParseBinOpRHS(1, Res, EndLoc);
+}
+
+bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
+ const MCExpr *Expr;
+
+ SMLoc StartLoc = Lexer.getLoc();
+ if (parseExpression(Expr))
+ return true;
+
+ if (!Expr->EvaluateAsAbsolute(Res))
+ return Error(StartLoc, "expected absolute expression");
+
+ return false;
+}
+
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+ MCBinaryExpr::Opcode &Kind) {
+ switch (K) {
+ default:
+ return 0; // not a binop.
+
+ // Lowest Precedence: &&, ||
+ case AsmToken::AmpAmp:
+ Kind = MCBinaryExpr::LAnd;
+ return 1;
+ case AsmToken::PipePipe:
+ Kind = MCBinaryExpr::LOr;
+ return 1;
+
+
+ // Low Precedence: |, &, ^
+ //
+ // FIXME: gas seems to support '!' as an infix operator?
+ case AsmToken::Pipe:
+ Kind = MCBinaryExpr::Or;
+ return 2;
+ case AsmToken::Caret:
+ Kind = MCBinaryExpr::Xor;
+ return 2;
+ case AsmToken::Amp:
+ Kind = MCBinaryExpr::And;
+ return 2;
+
+ // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
+ case AsmToken::EqualEqual:
+ Kind = MCBinaryExpr::EQ;
+ return 3;
+ case AsmToken::ExclaimEqual:
+ case AsmToken::LessGreater:
+ Kind = MCBinaryExpr::NE;
+ return 3;
+ case AsmToken::Less:
+ Kind = MCBinaryExpr::LT;
+ return 3;
+ case AsmToken::LessEqual:
+ Kind = MCBinaryExpr::LTE;
+ return 3;
+ case AsmToken::Greater:
+ Kind = MCBinaryExpr::GT;
+ return 3;
+ case AsmToken::GreaterEqual:
+ Kind = MCBinaryExpr::GTE;
+ return 3;
+
+ // Intermediate Precedence: <<, >>
+ case AsmToken::LessLess:
+ Kind = MCBinaryExpr::Shl;
+ return 4;
+ case AsmToken::GreaterGreater:
+ Kind = MCBinaryExpr::Shr;
+ return 4;
+
+ // High Intermediate Precedence: +, -
+ case AsmToken::Plus:
+ Kind = MCBinaryExpr::Add;
+ return 5;
+ case AsmToken::Minus:
+ Kind = MCBinaryExpr::Sub;
+ return 5;
+
+ // Highest Precedence: *, /, %
+ case AsmToken::Star:
+ Kind = MCBinaryExpr::Mul;
+ return 6;
+ case AsmToken::Slash:
+ Kind = MCBinaryExpr::Div;
+ return 6;
+ case AsmToken::Percent:
+ Kind = MCBinaryExpr::Mod;
+ return 6;
+ }
+}
+
+
+/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
+/// Res contains the LHS of the expression on input.
+bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
+ SMLoc &EndLoc) {
+ while (1) {
+ MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
+ unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
+
+ // If the next token is lower precedence than we are allowed to eat, return
+ // successfully with what we ate already.
+ if (TokPrec < Precedence)
+ return false;
+
+ Lex();
+
+ // Eat the next primary expression.
+ const MCExpr *RHS;
+ if (ParsePrimaryExpr(RHS, EndLoc)) return true;
+
+ // If BinOp binds less tightly with RHS than the operator after RHS, let
+ // the pending operator take RHS as its LHS.
+ MCBinaryExpr::Opcode Dummy;
+ unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
+ if (TokPrec < NextTokPrec) {
+ if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+ }
+
+ // Merge LHS and RHS according to operator.
+ Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
+ }
+}
+
+/// ParseStatement:
+/// ::= EndOfStatement
+/// ::= Label* Directive ...Operands... EndOfStatement
+/// ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ Out.AddBlankLine();
+ Lex();
+ return false;
+ }
+
+ // Statements always start with an identifier or are a full line comment.
+ AsmToken ID = getTok();
+ SMLoc IDLoc = ID.getLoc();
+ StringRef IDVal;
+ int64_t LocalLabelVal = -1;
+ // A full line comment is a '#' as the first token.
+ if (Lexer.is(AsmToken::Hash))
+ return ParseCppHashLineFilenameComment(IDLoc);
+
+ // Allow an integer followed by a ':' as a directional local label.
+ if (Lexer.is(AsmToken::Integer)) {
+ LocalLabelVal = getTok().getIntVal();
+ if (LocalLabelVal < 0) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ IDVal = "";
+ } else {
+ IDVal = getTok().getString();
+ Lex(); // Consume the integer token to be used as an identifier token.
+ if (Lexer.getKind() != AsmToken::Colon) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ }
+ }
+ } else if (Lexer.is(AsmToken::Dot)) {
+ // Treat '.' as a valid identifier in this context.
+ Lex();
+ IDVal = ".";
+ } else if (parseIdentifier(IDVal)) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ IDVal = "";
+ }
+
+ // Handle conditional assembly here before checking for skipping. We
+ // have to do this so that .endif isn't skipped in a ".if 0" block for
+ // example.
+ StringMap<DirectiveKind>::const_iterator DirKindIt =
+ DirectiveKindMap.find(IDVal);
+ DirectiveKind DirKind =
+ (DirKindIt == DirectiveKindMap.end()) ? DK_NO_DIRECTIVE :
+ DirKindIt->getValue();
+ switch (DirKind) {
+ default:
+ break;
+ case DK_IF:
+ return ParseDirectiveIf(IDLoc);
+ case DK_IFB:
+ return ParseDirectiveIfb(IDLoc, true);
+ case DK_IFNB:
+ return ParseDirectiveIfb(IDLoc, false);
+ case DK_IFC:
+ return ParseDirectiveIfc(IDLoc, true);
+ case DK_IFNC:
+ return ParseDirectiveIfc(IDLoc, false);
+ case DK_IFDEF:
+ return ParseDirectiveIfdef(IDLoc, true);
+ case DK_IFNDEF:
+ case DK_IFNOTDEF:
+ return ParseDirectiveIfdef(IDLoc, false);
+ case DK_ELSEIF:
+ return ParseDirectiveElseIf(IDLoc);
+ case DK_ELSE:
+ return ParseDirectiveElse(IDLoc);
+ case DK_ENDIF:
+ return ParseDirectiveEndIf(IDLoc);
+ }
+
+ // Ignore the statement if in the middle of inactive conditional
+ // (e.g. ".if 0").
+ if (TheCondState.Ignore) {
+ eatToEndOfStatement();
+ return false;
+ }
+
+ // FIXME: Recurse on local labels?
+
+ // See what kind of statement we have.
+ switch (Lexer.getKind()) {
+ case AsmToken::Colon: {
+ checkForValidSection();
+
+ // identifier ':' -> Label.
+ Lex();
+
+ // Diagnose attempt to use '.' as a label.
+ if (IDVal == ".")
+ return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
+
+ // Diagnose attempt to use a variable as a label.
+ //
+ // FIXME: Diagnostics. Note the location of the definition as a label.
+ // FIXME: This doesn't diagnose assignment to a symbol which has been
+ // implicitly marked as external.
+ MCSymbol *Sym;
+ if (LocalLabelVal == -1)
+ Sym = getContext().GetOrCreateSymbol(IDVal);
+ else
+ Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+ if (!Sym->isUndefined() || Sym->isVariable())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Emit the label.
+ if (!ParsingInlineAsm)
+ Out.EmitLabel(Sym);
+
+ // If we are generating dwarf for assembly source files then gather the
+ // info to make a dwarf label entry for this label if needed.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
+ IDLoc);
+
+ // Consume any end of statement token, if present, to avoid spurious
+ // AddBlankLine calls().
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ Lex();
+ if (Lexer.is(AsmToken::Eof))
+ return false;
+ }
+
+ return false;
+ }
+
+ case AsmToken::Equal:
+ // identifier '=' ... -> assignment statement
+ Lex();
+
+ return ParseAssignment(IDVal, true);
+
+ default: // Normal instruction or directive.
+ break;
+ }
+
+ // If macros are enabled, check to see if this is a macro instantiation.
+ if (MacrosEnabled())
+ if (const MCAsmMacro *M = LookupMacro(IDVal)) {
+ return HandleMacroEntry(M, IDLoc);
+ }
+
+ // Otherwise, we have a normal instruction or directive.
+
+ // Directives start with "."
+ if (IDVal[0] == '.' && IDVal != ".") {
+ // There are several entities interested in parsing directives:
+ //
+ // 1. The target-specific assembly parser. Some directives are target
+ // specific or may potentially behave differently on certain targets.
+ // 2. Asm parser extensions. For example, platform-specific parsers
+ // (like the ELF parser) register themselves as extensions.
+ // 3. The generic directive parser implemented by this class. These are
+ // all the directives that behave in a target and platform independent
+ // manner, or at least have a default behavior that's shared between
+ // all targets and platforms.
+
+ // First query the target-specific parser. It will return 'true' if it
+ // isn't interested in this directive.
+ if (!getTargetParser().ParseDirective(ID))
+ return false;
+
+ // Next, check the extention directive map to see if any extension has
+ // registered itself to parse this directive.
+ std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
+ ExtensionDirectiveMap.lookup(IDVal);
+ if (Handler.first)
+ return (*Handler.second)(Handler.first, IDVal, IDLoc);
+
+ // Finally, if no one else is interested in this directive, it must be
+ // generic and familiar to this class.
+ switch (DirKind) {
+ default:
+ break;
+ case DK_SET:
+ case DK_EQU:
+ return ParseDirectiveSet(IDVal, true);
+ case DK_EQUIV:
+ return ParseDirectiveSet(IDVal, false);
+ case DK_ASCII:
+ return ParseDirectiveAscii(IDVal, false);
+ case DK_ASCIZ:
+ case DK_STRING:
+ return ParseDirectiveAscii(IDVal, true);
+ case DK_BYTE:
+ return ParseDirectiveValue(1);
+ case DK_SHORT:
+ case DK_VALUE:
+ case DK_2BYTE:
+ return ParseDirectiveValue(2);
+ case DK_LONG:
+ case DK_INT:
+ case DK_4BYTE:
+ return ParseDirectiveValue(4);
+ case DK_QUAD:
+ case DK_8BYTE:
+ return ParseDirectiveValue(8);
+ case DK_SINGLE:
+ case DK_FLOAT:
+ return ParseDirectiveRealValue(APFloat::IEEEsingle);
+ case DK_DOUBLE:
+ return ParseDirectiveRealValue(APFloat::IEEEdouble);
+ case DK_ALIGN: {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+ }
+ case DK_ALIGN32: {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+ }
+ case DK_BALIGN:
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+ case DK_BALIGNW:
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+ case DK_BALIGNL:
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+ case DK_P2ALIGN:
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+ case DK_P2ALIGNW:
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+ case DK_P2ALIGNL:
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+ case DK_ORG:
+ return ParseDirectiveOrg();
+ case DK_FILL:
+ return ParseDirectiveFill();
+ case DK_ZERO:
+ return ParseDirectiveZero();
+ case DK_EXTERN:
+ eatToEndOfStatement(); // .extern is the default, ignore it.
+ return false;
+ case DK_GLOBL:
+ case DK_GLOBAL:
+ return ParseDirectiveSymbolAttribute(MCSA_Global);
+ case DK_INDIRECT_SYMBOL:
+ return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
+ case DK_LAZY_REFERENCE:
+ return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
+ case DK_NO_DEAD_STRIP:
+ return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+ case DK_SYMBOL_RESOLVER:
+ return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+ case DK_PRIVATE_EXTERN:
+ return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+ case DK_REFERENCE:
+ return ParseDirectiveSymbolAttribute(MCSA_Reference);
+ case DK_WEAK_DEFINITION:
+ return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+ case DK_WEAK_REFERENCE:
+ return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+ case DK_WEAK_DEF_CAN_BE_HIDDEN:
+ return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+ case DK_COMM:
+ case DK_COMMON:
+ return ParseDirectiveComm(/*IsLocal=*/false);
+ case DK_LCOMM:
+ return ParseDirectiveComm(/*IsLocal=*/true);
+ case DK_ABORT:
+ return ParseDirectiveAbort();
+ case DK_INCLUDE:
+ return ParseDirectiveInclude();
+ case DK_INCBIN:
+ return ParseDirectiveIncbin();
+ case DK_CODE16:
+ case DK_CODE16GCC:
+ return TokError(Twine(IDVal) + " not supported yet");
+ case DK_REPT:
+ return ParseDirectiveRept(IDLoc);
+ case DK_IRP:
+ return ParseDirectiveIrp(IDLoc);
+ case DK_IRPC:
+ return ParseDirectiveIrpc(IDLoc);
+ case DK_ENDR:
+ return ParseDirectiveEndr(IDLoc);
+ case DK_BUNDLE_ALIGN_MODE:
+ return ParseDirectiveBundleAlignMode();
+ case DK_BUNDLE_LOCK:
+ return ParseDirectiveBundleLock();
+ case DK_BUNDLE_UNLOCK:
+ return ParseDirectiveBundleUnlock();
+ case DK_SLEB128:
+ return ParseDirectiveLEB128(true);
+ case DK_ULEB128:
+ return ParseDirectiveLEB128(false);
+ case DK_SPACE:
+ case DK_SKIP:
+ return ParseDirectiveSpace(IDVal);
+ case DK_FILE:
+ return ParseDirectiveFile(IDLoc);
+ case DK_LINE:
+ return ParseDirectiveLine();
+ case DK_LOC:
+ return ParseDirectiveLoc();
+ case DK_STABS:
+ return ParseDirectiveStabs();
+ case DK_CFI_SECTIONS:
+ return ParseDirectiveCFISections();
+ case DK_CFI_STARTPROC:
+ return ParseDirectiveCFIStartProc();
+ case DK_CFI_ENDPROC:
+ return ParseDirectiveCFIEndProc();
+ case DK_CFI_DEF_CFA:
+ return ParseDirectiveCFIDefCfa(IDLoc);
+ case DK_CFI_DEF_CFA_OFFSET:
+ return ParseDirectiveCFIDefCfaOffset();
+ case DK_CFI_ADJUST_CFA_OFFSET:
+ return ParseDirectiveCFIAdjustCfaOffset();
+ case DK_CFI_DEF_CFA_REGISTER:
+ return ParseDirectiveCFIDefCfaRegister(IDLoc);
+ case DK_CFI_OFFSET:
+ return ParseDirectiveCFIOffset(IDLoc);
+ case DK_CFI_REL_OFFSET:
+ return ParseDirectiveCFIRelOffset(IDLoc);
+ case DK_CFI_PERSONALITY:
+ return ParseDirectiveCFIPersonalityOrLsda(true);
+ case DK_CFI_LSDA:
+ return ParseDirectiveCFIPersonalityOrLsda(false);
+ case DK_CFI_REMEMBER_STATE:
+ return ParseDirectiveCFIRememberState();
+ case DK_CFI_RESTORE_STATE:
+ return ParseDirectiveCFIRestoreState();
+ case DK_CFI_SAME_VALUE:
+ return ParseDirectiveCFISameValue(IDLoc);
+ case DK_CFI_RESTORE:
+ return ParseDirectiveCFIRestore(IDLoc);
+ case DK_CFI_ESCAPE:
+ return ParseDirectiveCFIEscape();
+ case DK_CFI_SIGNAL_FRAME:
+ return ParseDirectiveCFISignalFrame();
+ case DK_CFI_UNDEFINED:
+ return ParseDirectiveCFIUndefined(IDLoc);
+ case DK_CFI_REGISTER:
+ return ParseDirectiveCFIRegister(IDLoc);
+ case DK_MACROS_ON:
+ case DK_MACROS_OFF:
+ return ParseDirectiveMacrosOnOff(IDVal);
+ case DK_MACRO:
+ return ParseDirectiveMacro(IDLoc);
+ case DK_ENDM:
+ case DK_ENDMACRO:
+ return ParseDirectiveEndMacro(IDVal);
+ case DK_PURGEM:
+ return ParseDirectivePurgeMacro(IDLoc);
+ }
+
+ return Error(IDLoc, "unknown directive");
+ }
+
+ // __asm _emit or __asm __emit
+ if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
+ IDVal == "_EMIT" || IDVal == "__EMIT"))
+ return ParseDirectiveMSEmit(IDLoc, Info, IDVal.size());
+
+ // __asm align
+ if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
+ return ParseDirectiveMSAlign(IDLoc, Info);
+
+ checkForValidSection();
+
+ // Canonicalize the opcode to lower case.
+ std::string OpcodeStr = IDVal.lower();
+ ParseInstructionInfo IInfo(Info.AsmRewrites);
+ bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr,
+ IDLoc, Info.ParsedOperands);
+ Info.ParseError = HadError;
+
+ // Dump the parsed representation, if requested.
+ if (getShowParsedOperands()) {
+ SmallString<256> Str;
+ raw_svector_ostream OS(Str);
+ OS << "parsed instruction: [";
+ for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
+ if (i != 0)
+ OS << ", ";
+ Info.ParsedOperands[i]->print(OS);
+ }
+ OS << "]";
+
+ PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str());
+ }
+
+ // If we are generating dwarf for assembly source files and the current
+ // section is the initial text section then generate a .loc directive for
+ // the instruction.
+ if (!HadError && getContext().getGenDwarfForAssembly() &&
+ getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
+
+ unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
+
+ // If we previously parsed a cpp hash file line comment then make sure the
+ // current Dwarf File is for the CppHashFilename if not then emit the
+ // Dwarf File table for it and adjust the line number for the .loc.
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ if (CppHashFilename.size() != 0) {
+ if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
+ CppHashFilename)
+ getStreamer().EmitDwarfFileDirective(
+ getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename);
+
+ unsigned CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc,CppHashBuf);
+ Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo);
+ }
+
+ getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(),
+ Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ?
+ DWARF2_FLAG_IS_STMT : 0, 0, 0,
+ StringRef());
+ }
+
+ // If parsing succeeded, match the instruction.
+ if (!HadError) {
+ unsigned ErrorInfo;
+ HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
+ Info.ParsedOperands,
+ Out, ErrorInfo,
+ ParsingInlineAsm);
+ }
+
+ // Don't skip the rest of the line, the instruction parser is responsible for
+ // that.
+ return false;
+}
+
+/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line
+/// since they may not be able to be tokenized to get to the end of line token.
+void AsmParser::EatToEndOfLine() {
+ if (!Lexer.is(AsmToken::EndOfStatement))
+ Lexer.LexUntilEndOfLine();
+ // Eat EOL.
+ Lex();
+}
+
+/// ParseCppHashLineFilenameComment as this:
+/// ::= # number "filename"
+/// or just as a full line comment if it doesn't have a number and a string.
+bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) {
+ Lex(); // Eat the hash token.
+
+ if (getLexer().isNot(AsmToken::Integer)) {
+ // Consume the line since in cases it is not a well-formed line directive,
+ // as if were simply a full line comment.
+ EatToEndOfLine();
+ return false;
+ }
+
+ int64_t LineNumber = getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String)) {
+ EatToEndOfLine();
+ return false;
+ }
+
+ StringRef Filename = getTok().getString();
+ // Get rid of the enclosing quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Save the SMLoc, Filename and LineNumber for later use by diagnostics.
+ CppHashLoc = L;
+ CppHashFilename = Filename;
+ CppHashLineNumber = LineNumber;
+ CppHashBuf = CurBuffer;
+
+ // Ignore any trailing characters, they're just comment.
+ EatToEndOfLine();
+ return false;
+}
+
+/// DiagHandler - will use the last parsed cpp hash line filename comment
+/// for the Filename and LineNo if any in the diagnostic.
+void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
+ const AsmParser *Parser = static_cast<const AsmParser*>(Context);
+ raw_ostream &OS = errs();
+
+ const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
+ const SMLoc &DiagLoc = Diag.getLoc();
+ int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
+
+ // Like SourceMgr::PrintMessage() we need to print the include stack if any
+ // before printing the message.
+ int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) {
+ SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
+ DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
+ }
+
+ // If we have not parsed a cpp hash line filename comment or the source
+ // manager changed or buffer changed (like in a nested include) then just
+ // print the normal diagnostic using its Filename and LineNo.
+ if (!Parser->CppHashLineNumber ||
+ &DiagSrcMgr != &Parser->SrcMgr ||
+ DiagBuf != CppHashBuf) {
+ if (Parser->SavedDiagHandler)
+ Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
+ else
+ Diag.print(0, OS);
+ return;
+ }
+
+ // Use the CppHashFilename and calculate a line number based on the
+ // CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for
+ // the diagnostic.
+ const std::string Filename = Parser->CppHashFilename;
+
+ int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
+ int CppHashLocLineNo =
+ Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf);
+ int LineNo = Parser->CppHashLineNumber - 1 +
+ (DiagLocLineNo - CppHashLocLineNo);
+
+ SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(),
+ Filename, LineNo, Diag.getColumnNo(),
+ Diag.getKind(), Diag.getMessage(),
+ Diag.getLineContents(), Diag.getRanges());
+
+ if (Parser->SavedDiagHandler)
+ Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
+ else
+ NewDiag.print(0, OS);
+}
+
+// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
+// difference being that that function accepts '@' as part of identifiers and
+// we can't do that. AsmLexer.cpp should probably be changed to handle
+// '@' as a special case when needed.
+static bool isIdentifierChar(char c) {
+ return isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$' ||
+ c == '.';
+}
+
+bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
+ const MCAsmMacroParameters &Parameters,
+ const MCAsmMacroArguments &A,
+ const SMLoc &L) {
+ unsigned NParameters = Parameters.size();
+ if (NParameters != 0 && NParameters != A.size())
+ return Error(L, "Wrong number of arguments");
+
+ // A macro without parameters is handled differently on Darwin:
+ // gas accepts no arguments and does no substitutions
+ while (!Body.empty()) {
+ // Scan for the next substitution.
+ std::size_t End = Body.size(), Pos = 0;
+ for (; Pos != End; ++Pos) {
+ // Check for a substitution or escape.
+ if (!NParameters) {
+ // This macro has no parameters, look for $0, $1, etc.
+ if (Body[Pos] != '$' || Pos + 1 == End)
+ continue;
+
+ char Next = Body[Pos + 1];
+ if (Next == '$' || Next == 'n' ||
+ isdigit(static_cast<unsigned char>(Next)))
+ break;
+ } else {
+ // This macro has parameters, look for \foo, \bar, etc.
+ if (Body[Pos] == '\\' && Pos + 1 != End)
+ break;
+ }
+ }
+
+ // Add the prefix.
+ OS << Body.slice(0, Pos);
+
+ // Check if we reached the end.
+ if (Pos == End)
+ break;
+
+ if (!NParameters) {
+ switch (Body[Pos+1]) {
+ // $$ => $
+ case '$':
+ OS << '$';
+ break;
+
+ // $n => number of arguments
+ case 'n':
+ OS << A.size();
+ break;
+
+ // $[0-9] => argument
+ default: {
+ // Missing arguments are ignored.
+ unsigned Index = Body[Pos+1] - '0';
+ if (Index >= A.size())
+ break;
+
+ // Otherwise substitute with the token values, with spaces eliminated.
+ for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ OS << it->getString();
+ break;
+ }
+ }
+ Pos += 2;
+ } else {
+ unsigned I = Pos + 1;
+ while (isIdentifierChar(Body[I]) && I + 1 != End)
+ ++I;
+
+ const char *Begin = Body.data() + Pos +1;
+ StringRef Argument(Begin, I - (Pos +1));
+ unsigned Index = 0;
+ for (; Index < NParameters; ++Index)
+ if (Parameters[Index].first == Argument)
+ break;
+
+ if (Index == NParameters) {
+ if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
+ Pos += 3;
+ else {
+ OS << '\\' << Argument;
+ Pos = I;
+ }
+ } else {
+ for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ if (it->getKind() == AsmToken::String)
+ OS << it->getStringContents();
+ else
+ OS << it->getString();
+
+ Pos += 1 + Argument.size();
+ }
+ }
+ // Update the scan point.
+ Body = Body.substr(Pos);
+ }
+
+ return false;
+}
+
+MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL,
+ int EB, SMLoc EL,
+ MemoryBuffer *I)
+ : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
+ ExitLoc(EL)
+{
+}
+
+static bool IsOperator(AsmToken::TokenKind kind)
+{
+ switch (kind)
+ {
+ default:
+ return false;
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Tilde:
+ case AsmToken::Slash:
+ case AsmToken::Star:
+ case AsmToken::Dot:
+ case AsmToken::Equal:
+ case AsmToken::EqualEqual:
+ case AsmToken::Pipe:
+ case AsmToken::PipePipe:
+ case AsmToken::Caret:
+ case AsmToken::Amp:
+ case AsmToken::AmpAmp:
+ case AsmToken::Exclaim:
+ case AsmToken::ExclaimEqual:
+ case AsmToken::Percent:
+ case AsmToken::Less:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess:
+ case AsmToken::LessGreater:
+ case AsmToken::Greater:
+ case AsmToken::GreaterEqual:
+ case AsmToken::GreaterGreater:
+ return true;
+ }
+}
+
+bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
+ AsmToken::TokenKind &ArgumentDelimiter) {
+ unsigned ParenLevel = 0;
+ unsigned AddTokens = 0;
+
+ // gas accepts arguments separated by whitespace, except on Darwin
+ if (!IsDarwin)
+ Lexer.setSkipSpace(false);
+
+ for (;;) {
+ if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) {
+ Lexer.setSkipSpace(true);
+ return TokError("unexpected token in macro instantiation");
+ }
+
+ if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+ // Spaces and commas cannot be mixed to delimit parameters
+ if (ArgumentDelimiter == AsmToken::Eof)
+ ArgumentDelimiter = AsmToken::Comma;
+ else if (ArgumentDelimiter != AsmToken::Comma) {
+ Lexer.setSkipSpace(true);
+ return TokError("expected ' ' for macro argument separator");
+ }
+ break;
+ }
+
+ if (Lexer.is(AsmToken::Space)) {
+ Lex(); // Eat spaces
+
+ // Spaces can delimit parameters, but could also be part an expression.
+ // If the token after a space is an operator, add the token and the next
+ // one into this argument
+ if (ArgumentDelimiter == AsmToken::Space ||
+ ArgumentDelimiter == AsmToken::Eof) {
+ if (IsOperator(Lexer.getKind())) {
+ // Check to see whether the token is used as an operator,
+ // or part of an identifier
+ const char *NextChar = getTok().getEndLoc().getPointer();
+ if (*NextChar == ' ')
+ AddTokens = 2;
+ }
+
+ if (!AddTokens && ParenLevel == 0) {
+ if (ArgumentDelimiter == AsmToken::Eof &&
+ !IsOperator(Lexer.getKind()))
+ ArgumentDelimiter = AsmToken::Space;
+ break;
+ }
+ }
+ }
+
+ // HandleMacroEntry relies on not advancing the lexer here
+ // to be able to fill in the remaining default parameter values
+ if (Lexer.is(AsmToken::EndOfStatement))
+ break;
+
+ // Adjust the current parentheses level.
+ if (Lexer.is(AsmToken::LParen))
+ ++ParenLevel;
+ else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+ --ParenLevel;
+
+ // Append the token to the current argument list.
+ MA.push_back(getTok());
+ if (AddTokens)
+ AddTokens--;
+ Lex();
+ }
+
+ Lexer.setSkipSpace(true);
+ if (ParenLevel != 0)
+ return TokError("unbalanced parentheses in macro argument");
+ return false;
+}
+
+// Parse the macro instantiation arguments.
+bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) {
+ const unsigned NParameters = M ? M->Parameters.size() : 0;
+ // Argument delimiter is initially unknown. It will be set by
+ // ParseMacroArgument()
+ AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
+
+ // Parse two kinds of macro invocations:
+ // - macros defined without any parameters accept an arbitrary number of them
+ // - macros defined with parameters accept at most that many of them
+ for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
+ ++Parameter) {
+ MCAsmMacroArgument MA;
+
+ if (ParseMacroArgument(MA, ArgumentDelimiter))
+ return true;
+
+ if (!MA.empty() || !NParameters)
+ A.push_back(MA);
+ else if (NParameters) {
+ if (!M->Parameters[Parameter].second.empty())
+ A.push_back(M->Parameters[Parameter].second);
+ }
+
+ // At the end of the statement, fill in remaining arguments that have
+ // default values. If there aren't any, then the next argument is
+ // required but missing
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ if (NParameters && Parameter < NParameters - 1) {
+ if (M->Parameters[Parameter + 1].second.empty())
+ return TokError("macro argument '" +
+ Twine(M->Parameters[Parameter + 1].first) +
+ "' is missing");
+ else
+ continue;
+ }
+ return false;
+ }
+
+ if (Lexer.is(AsmToken::Comma))
+ Lex();
+ }
+ return TokError("Too many arguments");
+}
+
+const MCAsmMacro* AsmParser::LookupMacro(StringRef Name) {
+ StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+ return (I == MacroMap.end()) ? NULL : I->getValue();
+}
+
+void AsmParser::DefineMacro(StringRef Name, const MCAsmMacro& Macro) {
+ MacroMap[Name] = new MCAsmMacro(Macro);
+}
+
+void AsmParser::UndefineMacro(StringRef Name) {
+ StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+ if (I != MacroMap.end()) {
+ delete I->getValue();
+ MacroMap.erase(I);
+ }
+}
+
+bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
+ // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+ // this, although we should protect against infinite loops.
+ if (ActiveMacros.size() == 20)
+ return TokError("macros cannot be nested more than 20 levels deep");
+
+ MCAsmMacroArguments A;
+ if (ParseMacroArguments(M, A))
+ return true;
+
+ // Remove any trailing empty arguments. Do this after-the-fact as we have
+ // to keep empty arguments in the middle of the list or positionality
+ // gets off. e.g., "foo 1, , 2" vs. "foo 1, 2,"
+ while (!A.empty() && A.back().empty())
+ A.pop_back();
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ StringRef Body = M->Body;
+ raw_svector_ostream OS(Buf);
+
+ if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
+ return true;
+
+ // We include the .endmacro in the buffer as our cue to exit the macro
+ // instantiation.
+ OS << ".endmacro\n";
+
+ MemoryBuffer *Instantiation =
+ MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+
+ // Create the macro instantiation object and add to the current macro
+ // instantiation stack.
+ MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+ CurBuffer,
+ getTok().getLoc(),
+ Instantiation);
+ ActiveMacros.push_back(MI);
+
+ // Jump to the macro instantiation and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lex();
+
+ return false;
+}
+
+void AsmParser::HandleMacroExit() {
+ // Jump to the EndOfStatement we should return to, and consume it.
+ JumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
+ Lex();
+
+ // Pop the instantiation entry.
+ delete ActiveMacros.back();
+ ActiveMacros.pop_back();
+}
+
+static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
+ return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
+ break;
+ }
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return false;
+ case MCExpr::SymbolRef: {
+ const MCSymbol &S = static_cast<const MCSymbolRefExpr*>(Value)->getSymbol();
+ if (S.isVariable())
+ return IsUsedIn(Sym, S.getVariableValue());
+ return &S == Sym;
+ }
+ case MCExpr::Unary:
+ return IsUsedIn(Sym, static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+ }
+
+ llvm_unreachable("Unknown expr kind!");
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
+ bool NoDeadStrip) {
+ // FIXME: Use better location, we should use proper tokens.
+ SMLoc EqualLoc = Lexer.getLoc();
+
+ const MCExpr *Value;
+ if (parseExpression(Value))
+ return true;
+
+ // Note: we don't count b as used in "a = b". This is to allow
+ // a = b
+ // b = c
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in assignment");
+
+ // Error on assignment to '.'.
+ if (Name == ".") {
+ return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported "
+ "(use '.space' or '.org').)"));
+ }
+
+ // Eat the end of statement marker.
+ Lex();
+
+ // Validate that the LHS is allowed to be a variable (either it has not been
+ // used as a symbol, or it is an absolute symbol).
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ // Diagnose assignment to a label.
+ //
+ // FIXME: Diagnostics. Note the location of the definition as a label.
+ // FIXME: Diagnose assignment to protected identifier (e.g., register name).
+ if (IsUsedIn(Sym, Value))
+ return Error(EqualLoc, "Recursive use of '" + Name + "'");
+ else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+ ; // Allow redefinitions of undefined symbols only used in directives.
+ else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+ ; // Allow redefinitions of variables that haven't yet been used.
+ else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
+ return Error(EqualLoc, "redefinition of '" + Name + "'");
+ else if (!Sym->isVariable())
+ return Error(EqualLoc, "invalid assignment to '" + Name + "'");
+ else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
+ return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
+ Name + "'");
+
+ // Don't count these checks as uses.
+ Sym->setUsed(false);
+ } else
+ Sym = getContext().GetOrCreateSymbol(Name);
+
+ // FIXME: Handle '.'.
+
+ // Do the assignment.
+ Out.EmitAssignment(Sym, Value);
+ if (NoDeadStrip)
+ Out.EmitSymbolAttribute(Sym, MCSA_NoDeadStrip);
+
+
+ return false;
+}
+
+/// parseIdentifier:
+/// ::= identifier
+/// ::= string
+bool AsmParser::parseIdentifier(StringRef &Res) {
+ // The assembler has relaxed rules for accepting identifiers, in particular we
+ // allow things like '.globl $foo', which would normally be separate
+ // tokens. At this level, we have already lexed so we cannot (currently)
+ // handle this as a context dependent token, instead we detect adjacent tokens
+ // and return the combined identifier.
+ if (Lexer.is(AsmToken::Dollar)) {
+ SMLoc DollarLoc = getLexer().getLoc();
+
+ // Consume the dollar sign, and check for a following identifier.
+ Lex();
+ if (Lexer.isNot(AsmToken::Identifier))
+ return true;
+
+ // We have a '$' followed by an identifier, make sure they are adjacent.
+ if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ return true;
+
+ // Construct the joined identifier and consume the token.
+ Res = StringRef(DollarLoc.getPointer(),
+ getTok().getIdentifier().size() + 1);
+ Lex();
+ return false;
+ }
+
+ if (Lexer.isNot(AsmToken::Identifier) &&
+ Lexer.isNot(AsmToken::String))
+ return true;
+
+ Res = getTok().getIdentifier();
+
+ Lex(); // Consume the identifier token.
+
+ return false;
+}
+
+/// ParseDirectiveSet:
+/// ::= .equ identifier ',' expression
+/// ::= .equiv identifier ',' expression
+/// ::= .set identifier ',' expression
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
+ StringRef Name;
+
+ if (parseIdentifier(Name))
+ return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "'");
+ Lex();
+
+ return ParseAssignment(Name, allow_redef, true);
+}
+
+bool AsmParser::parseEscapedString(std::string &Data) {
+ assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
+
+ Data = "";
+ StringRef Str = getTok().getStringContents();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] != '\\') {
+ Data += Str[i];
+ continue;
+ }
+
+ // Recognize escaped characters. Note that this escape semantics currently
+ // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+ ++i;
+ if (i == e)
+ return TokError("unexpected backslash at end of string");
+
+ // Recognize octal sequences.
+ if ((unsigned) (Str[i] - '0') <= 7) {
+ // Consume up to three octal characters.
+ unsigned Value = Str[i] - '0';
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+ }
+ }
+
+ if (Value > 255)
+ return TokError("invalid octal escape sequence (out of range)");
+
+ Data += (unsigned char) Value;
+ continue;
+ }
+
+ // Otherwise recognize individual escapes.
+ switch (Str[i]) {
+ default:
+ // Just reject invalid escape sequences for now.
+ return TokError("invalid escape sequence (unrecognized character)");
+
+ case 'b': Data += '\b'; break;
+ case 'f': Data += '\f'; break;
+ case 'n': Data += '\n'; break;
+ case 'r': Data += '\r'; break;
+ case 't': Data += '\t'; break;
+ case '"': Data += '"'; break;
+ case '\\': Data += '\\'; break;
+ }
+ }
+
+ return false;
+}
+
+/// ParseDirectiveAscii:
+/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ checkForValidSection();
+
+ for (;;) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+ std::string Data;
+ if (parseEscapedString(Data))
+ return true;
+
+ getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
+ if (ZeroTerminated)
+ getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
+
+ Lex();
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveValue
+/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveValue(unsigned Size) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ checkForValidSection();
+
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (parseExpression(Value))
+ return true;
+
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE);
+ } else
+ getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveRealValue
+/// ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ checkForValidSection();
+
+ for (;;) {
+ // We don't truly support arithmetic on floating point expressions, so we
+ // have to manually parse unary prefixes.
+ bool IsNeg = false;
+ if (getLexer().is(AsmToken::Minus)) {
+ Lex();
+ IsNeg = true;
+ } else if (getLexer().is(AsmToken::Plus))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer) &&
+ getLexer().isNot(AsmToken::Real) &&
+ getLexer().isNot(AsmToken::Identifier))
+ return TokError("unexpected token in directive");
+
+ // Convert to an APFloat.
+ APFloat Value(Semantics);
+ StringRef IDVal = getTok().getString();
+ if (getLexer().is(AsmToken::Identifier)) {
+ if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
+ Value = APFloat::getInf(Semantics);
+ else if (!IDVal.compare_lower("nan"))
+ Value = APFloat::getNaN(Semantics, false, ~0);
+ else
+ return TokError("invalid floating point literal");
+ } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
+ APFloat::opInvalidOp)
+ return TokError("invalid floating point literal");
+ if (IsNeg)
+ Value.changeSign();
+
+ // Consume the numeric token.
+ Lex();
+
+ // Emit the value as an integer.
+ APInt AsInt = Value.bitcastToAPInt();
+ getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+ AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveZero
+/// ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+ checkForValidSection();
+
+ int64_t NumBytes;
+ if (parseAbsoluteExpression(NumBytes))
+ return true;
+
+ int64_t Val = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (parseAbsoluteExpression(Val))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zero' directive");
+
+ Lex();
+
+ getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveFill
+/// ::= .fill expression , expression , expression
+bool AsmParser::ParseDirectiveFill() {
+ checkForValidSection();
+
+ int64_t NumValues;
+ if (parseAbsoluteExpression(NumValues))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
+
+ int64_t FillSize;
+ if (parseAbsoluteExpression(FillSize))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
+
+ int64_t FillExpr;
+ if (parseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.fill' directive");
+
+ Lex();
+
+ if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
+ return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
+
+ for (uint64_t i = 0, e = NumValues; i != e; ++i)
+ getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveOrg
+/// ::= .org expression [ , expression ]
+bool AsmParser::ParseDirectiveOrg() {
+ checkForValidSection();
+
+ const MCExpr *Offset;
+ SMLoc Loc = getTok().getLoc();
+ if (parseExpression(Offset))
+ return true;
+
+ // Parse optional fill expression.
+ int64_t FillExpr = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.org' directive");
+ Lex();
+
+ if (parseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.org' directive");
+ }
+
+ Lex();
+
+ // Only limited forms of relocatable expressions are accepted here, it
+ // has to be relative to the current section. The streamer will return
+ // 'true' if the expression wasn't evaluatable.
+ if (getStreamer().EmitValueToOffset(Offset, FillExpr))
+ return Error(Loc, "expected assembly-time absolute expression");
+
+ return false;
+}
+
+/// ParseDirectiveAlign
+/// ::= {.align, ...} expression [ , expression [ , expression ]]
+bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+ checkForValidSection();
+
+ SMLoc AlignmentLoc = getLexer().getLoc();
+ int64_t Alignment;
+ if (parseAbsoluteExpression(Alignment))
+ return true;
+
+ SMLoc MaxBytesLoc;
+ bool HasFillExpr = false;
+ int64_t FillExpr = 0;
+ int64_t MaxBytesToFill = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ // The fill expression can be omitted while specifying a maximum number of
+ // alignment bytes, e.g:
+ // .align 3,,4
+ if (getLexer().isNot(AsmToken::Comma)) {
+ HasFillExpr = true;
+ if (parseAbsoluteExpression(FillExpr))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ MaxBytesLoc = getLexer().getLoc();
+ if (parseAbsoluteExpression(MaxBytesToFill))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+ }
+ }
+
+ Lex();
+
+ if (!HasFillExpr)
+ FillExpr = 0;
+
+ // Compute alignment in bytes.
+ if (IsPow2) {
+ // FIXME: Diagnose overflow.
+ if (Alignment >= 32) {
+ Error(AlignmentLoc, "invalid alignment value");
+ Alignment = 31;
+ }
+
+ Alignment = 1ULL << Alignment;
+ } else {
+ // Reject alignments that aren't a power of two, for gas compatibility.
+ if (!isPowerOf2_64(Alignment))
+ Error(AlignmentLoc, "alignment must be a power of 2");
+ }
+
+ // Diagnose non-sensical max bytes to align.
+ if (MaxBytesLoc.isValid()) {
+ if (MaxBytesToFill < 1) {
+ Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+ "many bytes, ignoring maximum bytes expression");
+ MaxBytesToFill = 0;
+ }
+
+ if (MaxBytesToFill >= Alignment) {
+ Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
+ "has no effect");
+ MaxBytesToFill = 0;
+ }
+ }
+
+ // Check whether we should use optimal code alignment for this .align
+ // directive.
+ bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+ if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+ ValueSize == 1 && UseCodeAlign) {
+ getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
+ } else {
+ // FIXME: Target specific behavior about how the "extra" bytes are filled.
+ getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+ MaxBytesToFill);
+ }
+
+ return false;
+}
+
+/// ParseDirectiveFile
+/// ::= .file [number] filename
+/// ::= .file number directory filename
+bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
+ // FIXME: I'm not sure what this is.
+ int64_t FileNumber = -1;
+ SMLoc FileNumberLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Integer)) {
+ FileNumber = getTok().getIntVal();
+ Lex();
+
+ if (FileNumber < 1)
+ return TokError("file number less than one");
+ }
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.file' directive");
+
+ // Usually the directory and filename together, otherwise just the directory.
+ StringRef Path = getTok().getString();
+ Path = Path.substr(1, Path.size()-2);
+ Lex();
+
+ StringRef Directory;
+ StringRef Filename;
+ if (getLexer().is(AsmToken::String)) {
+ if (FileNumber == -1)
+ return TokError("explicit path specified, but no file number");
+ Filename = getTok().getString();
+ Filename = Filename.substr(1, Filename.size()-2);
+ Directory = Path;
+ Lex();
+ } else {
+ Filename = Path;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.file' directive");
+
+ if (FileNumber == -1)
+ getStreamer().EmitFileDirective(Filename);
+ else {
+ if (getContext().getGenDwarfForAssembly() == true)
+ Error(DirectiveLoc, "input can't have .file dwarf directives when -g is "
+ "used to generate dwarf debug info for assembly code");
+
+ if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename))
+ Error(FileNumberLoc, "file number already allocated");
+ }
+
+ return false;
+}
+
+/// ParseDirectiveLine
+/// ::= .line [number]
+bool AsmParser::ParseDirectiveLine() {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("unexpected token in '.line' directive");
+
+ int64_t LineNumber = getTok().getIntVal();
+ (void) LineNumber;
+ Lex();
+
+ // FIXME: Do something with the .line.
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.line' directive");
+
+ return false;
+}
+
+/// ParseDirectiveLoc
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified). The remaining
+/// optional items are .loc sub-directives.
+bool AsmParser::ParseDirectiveLoc() {
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("unexpected token in '.loc' directive");
+ int64_t FileNumber = getTok().getIntVal();
+ if (FileNumber < 1)
+ return TokError("file number less than one in '.loc' directive");
+ if (!getContext().isValidDwarfFileNumber(FileNumber))
+ return TokError("unassigned file number in '.loc' directive");
+ Lex();
+
+ int64_t LineNumber = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ LineNumber = getTok().getIntVal();
+ if (LineNumber < 1)
+ return TokError("line number less than one in '.loc' directive");
+ Lex();
+ }
+
+ int64_t ColumnPos = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ ColumnPos = getTok().getIntVal();
+ if (ColumnPos < 0)
+ return TokError("column position less than zero in '.loc' directive");
+ Lex();
+ }
+
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ unsigned Isa = 0;
+ int64_t Discriminator = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+ if (parseIdentifier(Name))
+ return TokError("unexpected token in '.loc' directive");
+
+ if (Name == "basic_block")
+ Flags |= DWARF2_FLAG_BASIC_BLOCK;
+ else if (Name == "prologue_end")
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ else if (Name == "epilogue_begin")
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ else if (Name == "is_stmt") {
+ Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (parseExpression(Value))
+ return true;
+ // The expression must be the constant 0 or 1.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value == 0)
+ Flags &= ~DWARF2_FLAG_IS_STMT;
+ else if (Value == 1)
+ Flags |= DWARF2_FLAG_IS_STMT;
+ else
+ return Error(Loc, "is_stmt value not 0 or 1");
+ }
+ else {
+ return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+ }
+ }
+ else if (Name == "isa") {
+ Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (parseExpression(Value))
+ return true;
+ // The expression must be a constant greater or equal to 0.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value < 0)
+ return Error(Loc, "isa number less than zero");
+ Isa = Value;
+ }
+ else {
+ return Error(Loc, "isa number not a constant value");
+ }
+ }
+ else if (Name == "discriminator") {
+ if (parseAbsoluteExpression(Discriminator))
+ return true;
+ }
+ else {
+ return Error(Loc, "unknown sub-directive in '.loc' directive");
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ }
+ }
+
+ getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+ Isa, Discriminator, StringRef());
+
+ return false;
+}
+
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool AsmParser::ParseDirectiveStabs() {
+ return TokError("unsupported directive '.stabs'");
+}
+
+/// ParseDirectiveCFISections
+/// ::= .cfi_sections section [, section]
+bool AsmParser::ParseDirectiveCFISections() {
+ StringRef Name;
+ bool EH = false;
+ bool Debug = false;
+
+ if (parseIdentifier(Name))
+ return TokError("Expected an identifier");
+
+ if (Name == ".eh_frame")
+ EH = true;
+ else if (Name == ".debug_frame")
+ Debug = true;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (parseIdentifier(Name))
+ return TokError("Expected an identifier");
+
+ if (Name == ".eh_frame")
+ EH = true;
+ else if (Name == ".debug_frame")
+ Debug = true;
+ }
+
+ getStreamer().EmitCFISections(EH, Debug);
+ return false;
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool AsmParser::ParseDirectiveCFIStartProc() {
+ getStreamer().EmitCFIStartProc();
+ return false;
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool AsmParser::ParseDirectiveCFIEndProc() {
+ getStreamer().EmitCFIEndProc();
+ return false;
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+ SMLoc DirectiveLoc) {
+ unsigned RegNo;
+
+ if (getLexer().isNot(AsmToken::Integer)) {
+ if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
+ return true;
+ Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true);
+ } else
+ return parseAbsoluteExpression(Register);
+
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register, offset
+bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Offset = 0;
+ if (parseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIDefCfa(Register, Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool AsmParser::ParseDirectiveCFIDefCfaOffset() {
+ int64_t Offset = 0;
+ if (parseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIDefCfaOffset(Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIRegister
+/// ::= .cfi_register register, register
+bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) {
+ int64_t Register1 = 0;
+ if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Register2 = 0;
+ if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIRegister(Register1, Register2);
+ return false;
+}
+
+/// ParseDirectiveCFIAdjustCfaOffset
+/// ::= .cfi_adjust_cfa_offset adjustment
+bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() {
+ int64_t Adjustment = 0;
+ if (parseAbsoluteExpression(Adjustment))
+ return true;
+
+ getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool AsmParser::ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIDefCfaRegister(Register);
+ return false;
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_offset register, offset
+bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ int64_t Offset = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ if (parseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIOffset(Register, Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIRelOffset
+/// ::= .cfi_rel_offset register, offset
+bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Offset = 0;
+ if (parseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIRelOffset(Register, Offset);
+ return false;
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+ if (Encoding & ~0xff)
+ return false;
+
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return true;
+
+ const unsigned Format = Encoding & 0xf;
+ if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+ Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+ Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+ Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+ return false;
+
+ const unsigned Application = Encoding & 0x70;
+ if (Application != dwarf::DW_EH_PE_absptr &&
+ Application != dwarf::DW_EH_PE_pcrel)
+ return false;
+
+ return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// IsPersonality true for cfi_personality, false for cfi_lsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
+ int64_t Encoding = 0;
+ if (parseAbsoluteExpression(Encoding))
+ return true;
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return false;
+
+ if (!isValidEncoding(Encoding))
+ return TokError("unsupported encoding.");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Name;
+ if (parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (IsPersonality)
+ getStreamer().EmitCFIPersonality(Sym, Encoding);
+ else
+ getStreamer().EmitCFILsda(Sym, Encoding);
+ return false;
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool AsmParser::ParseDirectiveCFIRememberState() {
+ getStreamer().EmitCFIRememberState();
+ return false;
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool AsmParser::ParseDirectiveCFIRestoreState() {
+ getStreamer().EmitCFIRestoreState();
+ return false;
+}
+
+/// ParseDirectiveCFISameValue
+/// ::= .cfi_same_value register
+bool AsmParser::ParseDirectiveCFISameValue(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFISameValue(Register);
+ return false;
+}
+
+/// ParseDirectiveCFIRestore
+/// ::= .cfi_restore register
+bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIRestore(Register);
+ return false;
+}
+
+/// ParseDirectiveCFIEscape
+/// ::= .cfi_escape expression[,...]
+bool AsmParser::ParseDirectiveCFIEscape() {
+ std::string Values;
+ int64_t CurrValue;
+ if (parseAbsoluteExpression(CurrValue))
+ return true;
+
+ Values.push_back((uint8_t)CurrValue);
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (parseAbsoluteExpression(CurrValue))
+ return true;
+
+ Values.push_back((uint8_t)CurrValue);
+ }
+
+ getStreamer().EmitCFIEscape(Values);
+ return false;
+}
+
+/// ParseDirectiveCFISignalFrame
+/// ::= .cfi_signal_frame
+bool AsmParser::ParseDirectiveCFISignalFrame() {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(),
+ "unexpected token in '.cfi_signal_frame'");
+
+ getStreamer().EmitCFISignalFrame();
+ return false;
+}
+
+/// ParseDirectiveCFIUndefined
+/// ::= .cfi_undefined register
+bool AsmParser::ParseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIUndefined(Register);
+ return false;
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(),
+ "unexpected token in '" + Directive + "' directive");
+
+ SetMacrosEnabled(Directive == ".macros_on");
+ return false;
+}
+
+/// ParseDirectiveMacro
+/// ::= .macro name [parameters]
+bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
+ StringRef Name;
+ if (parseIdentifier(Name))
+ return TokError("expected identifier in '.macro' directive");
+
+ MCAsmMacroParameters Parameters;
+ // Argument delimiter is initially unknown. It will be set by
+ // ParseMacroArgument()
+ AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ MCAsmMacroParameter Parameter;
+ if (parseIdentifier(Parameter.first))
+ return TokError("expected identifier in '.macro' directive");
+
+ if (getLexer().is(AsmToken::Equal)) {
+ Lex();
+ if (ParseMacroArgument(Parameter.second, ArgumentDelimiter))
+ return true;
+ }
+
+ Parameters.push_back(Parameter);
+
+ if (getLexer().is(AsmToken::Comma))
+ Lex();
+ else if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ }
+ }
+
+ // Eat the end of statement.
+ Lex();
+
+ AsmToken EndToken, StartToken = getTok();
+
+ // Lex the macro definition.
+ for (;;) {
+ // Check whether we have reached the end of the file.
+ if (getLexer().is(AsmToken::Eof))
+ return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+ // Otherwise, check whether we have reach the .endmacro.
+ if (getLexer().is(AsmToken::Identifier) &&
+ (getTok().getIdentifier() == ".endm" ||
+ getTok().getIdentifier() == ".endmacro")) {
+ EndToken = getTok();
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + EndToken.getIdentifier() +
+ "' directive");
+ break;
+ }
+
+ // Otherwise, scan til the end of the statement.
+ eatToEndOfStatement();
+ }
+
+ if (LookupMacro(Name)) {
+ return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+ }
+
+ const char *BodyStart = StartToken.getLoc().getPointer();
+ const char *BodyEnd = EndToken.getLoc().getPointer();
+ StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+ CheckForBadMacro(DirectiveLoc, Name, Body, Parameters);
+ DefineMacro(Name, MCAsmMacro(Name, Body, Parameters));
+ return false;
+}
+
+/// CheckForBadMacro
+///
+/// With the support added for named parameters there may be code out there that
+/// is transitioning from positional parameters. In versions of gas that did
+/// not support named parameters they would be ignored on the macro defintion.
+/// But to support both styles of parameters this is not possible so if a macro
+/// defintion has named parameters but does not use them and has what appears
+/// to be positional parameters, strings like $1, $2, ... and $n, then issue a
+/// warning that the positional parameter found in body which have no effect.
+/// Hoping the developer will either remove the named parameters from the macro
+/// definiton so the positional parameters get used if that was what was
+/// intended or change the macro to use the named parameters. It is possible
+/// this warning will trigger when the none of the named parameters are used
+/// and the strings like $1 are infact to simply to be passed trough unchanged.
+void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
+ StringRef Body,
+ MCAsmMacroParameters Parameters) {
+ // If this macro is not defined with named parameters the warning we are
+ // checking for here doesn't apply.
+ unsigned NParameters = Parameters.size();
+ if (NParameters == 0)
+ return;
+
+ bool NamedParametersFound = false;
+ bool PositionalParametersFound = false;
+
+ // Look at the body of the macro for use of both the named parameters and what
+ // are likely to be positional parameters. This is what expandMacro() is
+ // doing when it finds the parameters in the body.
+ while (!Body.empty()) {
+ // Scan for the next possible parameter.
+ std::size_t End = Body.size(), Pos = 0;
+ for (; Pos != End; ++Pos) {
+ // Check for a substitution or escape.
+ // This macro is defined with parameters, look for \foo, \bar, etc.
+ if (Body[Pos] == '\\' && Pos + 1 != End)
+ break;
+
+ // This macro should have parameters, but look for $0, $1, ..., $n too.
+ if (Body[Pos] != '$' || Pos + 1 == End)
+ continue;
+ char Next = Body[Pos + 1];
+ if (Next == '$' || Next == 'n' ||
+ isdigit(static_cast<unsigned char>(Next)))
+ break;
+ }
+
+ // Check if we reached the end.
+ if (Pos == End)
+ break;
+
+ if (Body[Pos] == '$') {
+ switch (Body[Pos+1]) {
+ // $$ => $
+ case '$':
+ break;
+
+ // $n => number of arguments
+ case 'n':
+ PositionalParametersFound = true;
+ break;
+
+ // $[0-9] => argument
+ default: {
+ PositionalParametersFound = true;
+ break;
+ }
+ }
+ Pos += 2;
+ } else {
+ unsigned I = Pos + 1;
+ while (isIdentifierChar(Body[I]) && I + 1 != End)
+ ++I;
+
+ const char *Begin = Body.data() + Pos +1;
+ StringRef Argument(Begin, I - (Pos +1));
+ unsigned Index = 0;
+ for (; Index < NParameters; ++Index)
+ if (Parameters[Index].first == Argument)
+ break;
+
+ if (Index == NParameters) {
+ if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
+ Pos += 3;
+ else {
+ Pos = I;
+ }
+ } else {
+ NamedParametersFound = true;
+ Pos += 1 + Argument.size();
+ }
+ }
+ // Update the scan point.
+ Body = Body.substr(Pos);
+ }
+
+ if (!NamedParametersFound && PositionalParametersFound)
+ Warning(DirectiveLoc, "macro defined with named parameters which are not "
+ "used in macro body, possible positional parameter "
+ "found in body which will have no effect");
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Directive + "' directive");
+
+ // If we are inside a macro instantiation, terminate the current
+ // instantiation.
+ if (InsideMacroInstantiation()) {
+ HandleMacroExit();
+ return false;
+ }
+
+ // Otherwise, this .endmacro is a stray entry in the file; well formed
+ // .endmacro directives are handled during the macro definition parsing.
+ return TokError("unexpected '" + Directive + "' in file, "
+ "no current macro definition");
+}
+
+/// ParseDirectivePurgeMacro
+/// ::= .purgem
+bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) {
+ StringRef Name;
+ if (parseIdentifier(Name))
+ return TokError("expected identifier in '.purgem' directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.purgem' directive");
+
+ if (!LookupMacro(Name))
+ return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
+
+ UndefineMacro(Name);
+ return false;
+}
+
+/// ParseDirectiveBundleAlignMode
+/// ::= {.bundle_align_mode} expression
+bool AsmParser::ParseDirectiveBundleAlignMode() {
+ checkForValidSection();
+
+ // Expect a single argument: an expression that evaluates to a constant
+ // in the inclusive range 0-30.
+ SMLoc ExprLoc = getLexer().getLoc();
+ int64_t AlignSizePow2;
+ if (parseAbsoluteExpression(AlignSizePow2))
+ return true;
+ else if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token after expression in"
+ " '.bundle_align_mode' directive");
+ else if (AlignSizePow2 < 0 || AlignSizePow2 > 30)
+ return Error(ExprLoc,
+ "invalid bundle alignment size (expected between 0 and 30)");
+
+ Lex();
+
+ // Because of AlignSizePow2's verified range we can safely truncate it to
+ // unsigned.
+ getStreamer().EmitBundleAlignMode(static_cast<unsigned>(AlignSizePow2));
+ return false;
+}
+
+/// ParseDirectiveBundleLock
+/// ::= {.bundle_lock} [align_to_end]
+bool AsmParser::ParseDirectiveBundleLock() {
+ checkForValidSection();
+ bool AlignToEnd = false;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ StringRef Option;
+ SMLoc Loc = getTok().getLoc();
+ const char *kInvalidOptionError =
+ "invalid option for '.bundle_lock' directive";
+
+ if (parseIdentifier(Option))
+ return Error(Loc, kInvalidOptionError);
+
+ if (Option != "align_to_end")
+ return Error(Loc, kInvalidOptionError);
+ else if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Loc,
+ "unexpected token after '.bundle_lock' directive option");
+ AlignToEnd = true;
+ }
+
+ Lex();
+
+ getStreamer().EmitBundleLock(AlignToEnd);
+ return false;
+}
+
+/// ParseDirectiveBundleLock
+/// ::= {.bundle_lock}
+bool AsmParser::ParseDirectiveBundleUnlock() {
+ checkForValidSection();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.bundle_unlock' directive");
+ Lex();
+
+ getStreamer().EmitBundleUnlock();
+ return false;
+}
+
+/// ParseDirectiveSpace
+/// ::= (.skip | .space) expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace(StringRef IDVal) {
+ checkForValidSection();
+
+ int64_t NumBytes;
+ if (parseAbsoluteExpression(NumBytes))
+ return true;
+
+ int64_t FillExpr = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ Lex();
+
+ if (parseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ }
+
+ Lex();
+
+ if (NumBytes <= 0)
+ return TokError("invalid number of bytes in '" +
+ Twine(IDVal) + "' directive");
+
+ // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+ getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveLEB128
+/// ::= (.sleb128 | .uleb128) expression
+bool AsmParser::ParseDirectiveLEB128(bool Signed) {
+ checkForValidSection();
+ const MCExpr *Value;
+
+ if (parseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ if (Signed)
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
+
+ return false;
+}
+
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
+bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+
+ if (parseIdentifier(Name))
+ return Error(Loc, "expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ // Assembler local symbols don't make any sense here. Complain loudly.
+ if (Sym->isTemporary())
+ return Error(Loc, "non-local symbol required in directive");
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveComm
+/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+ checkForValidSection();
+
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef Name;
+ if (parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (parseAbsoluteExpression(Pow2Alignment))
+ return true;
+
+ LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
+ if (IsLocal && LCOMM == LCOMM::NoAlignment)
+ return Error(Pow2AlignmentLoc, "alignment not supported on this target");
+
+ // If this target takes alignments in bytes (not log) validate and convert.
+ if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
+ (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
+ if (!isPowerOf2_64(Pow2Alignment))
+ return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
+ Pow2Alignment = Log2_64(Pow2Alignment);
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+ Lex();
+
+ // NOTE: a size of zero for a .comm should create a undefined symbol
+ // but a size of .lcomm creates a bss symbol of size zero.
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+ "be less than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+ "alignment, can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Create the Symbol as a common or local common with Size and Pow2Alignment
+ if (IsLocal) {
+ getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+ return false;
+ }
+
+ getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+ return false;
+}
+
+/// ParseDirectiveAbort
+/// ::= .abort [... message ...]
+bool AsmParser::ParseDirectiveAbort() {
+ // FIXME: Use loc from directive.
+ SMLoc Loc = getLexer().getLoc();
+
+ StringRef Str = parseStringToEndOfStatement();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.abort' directive");
+
+ Lex();
+
+ if (Str.empty())
+ Error(Loc, ".abort detected. Assembly stopping.");
+ else
+ Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+ // FIXME: Actually abort assembly here.
+
+ return false;
+}
+
+/// ParseDirectiveInclude
+/// ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.include' directive");
+
+ std::string Filename = getTok().getString();
+ SMLoc IncludeLoc = getLexer().getLoc();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.include' directive");
+
+ // Strip the quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Attempt to switch the lexer to the included file before consuming the end
+ // of statement to avoid losing it when we switch.
+ if (EnterIncludeFile(Filename)) {
+ Error(IncludeLoc, "Could not find include file '" + Filename + "'");
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIncbin
+/// ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.incbin' directive");
+
+ std::string Filename = getTok().getString();
+ SMLoc IncbinLoc = getLexer().getLoc();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.incbin' directive");
+
+ // Strip the quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Attempt to process the included file.
+ if (ProcessIncbinFile(Filename)) {
+ Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+ if (TheCondState.Ignore) {
+ eatToEndOfStatement();
+ } else {
+ int64_t ExprValue;
+ if (parseAbsoluteExpression(ExprValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.if' directive");
+
+ Lex();
+
+ TheCondState.CondMet = ExprValue;
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIfb
+/// ::= .ifb string
+bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+
+ if (TheCondState.Ignore) {
+ eatToEndOfStatement();
+ } else {
+ StringRef Str = parseStringToEndOfStatement();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.ifb' directive");
+
+ Lex();
+
+ TheCondState.CondMet = ExpectBlank == Str.empty();
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIfc
+/// ::= .ifc string1, string2
+bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+
+ if (TheCondState.Ignore) {
+ eatToEndOfStatement();
+ } else {
+ StringRef Str1 = ParseStringToComma();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.ifc' directive");
+
+ Lex();
+
+ StringRef Str2 = parseStringToEndOfStatement();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.ifc' directive");
+
+ Lex();
+
+ TheCondState.CondMet = ExpectEqual == (Str1 == Str2);
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIfdef
+/// ::= .ifdef symbol
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+ StringRef Name;
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+
+ if (TheCondState.Ignore) {
+ eatToEndOfStatement();
+ } else {
+ if (parseIdentifier(Name))
+ return TokError("expected identifier after '.ifdef'");
+
+ Lex();
+
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+ if (expect_defined)
+ TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+ else
+ TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+ if (TheCondState.TheCond != AsmCond::IfCond &&
+ TheCondState.TheCond != AsmCond::ElseIfCond)
+ Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+ " an .elseif");
+ TheCondState.TheCond = AsmCond::ElseIfCond;
+
+ bool LastIgnoreState = false;
+ if (!TheCondStack.empty())
+ LastIgnoreState = TheCondStack.back().Ignore;
+ if (LastIgnoreState || TheCondState.CondMet) {
+ TheCondState.Ignore = true;
+ eatToEndOfStatement();
+ }
+ else {
+ int64_t ExprValue;
+ if (parseAbsoluteExpression(ExprValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.elseif' directive");
+
+ Lex();
+ TheCondState.CondMet = ExprValue;
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.else' directive");
+
+ Lex();
+
+ if (TheCondState.TheCond != AsmCond::IfCond &&
+ TheCondState.TheCond != AsmCond::ElseIfCond)
+ Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+ ".elseif");
+ TheCondState.TheCond = AsmCond::ElseCond;
+ bool LastIgnoreState = false;
+ if (!TheCondStack.empty())
+ LastIgnoreState = TheCondStack.back().Ignore;
+ if (LastIgnoreState || TheCondState.CondMet)
+ TheCondState.Ignore = true;
+ else
+ TheCondState.Ignore = false;
+
+ return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.endif' directive");
+
+ Lex();
+
+ if ((TheCondState.TheCond == AsmCond::NoCond) ||
+ TheCondStack.empty())
+ Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+ ".else");
+ if (!TheCondStack.empty()) {
+ TheCondState = TheCondStack.back();
+ TheCondStack.pop_back();
+ }
+
+ return false;
+}
+
+void AsmParser::initializeDirectiveKindMap() {
+ DirectiveKindMap[".set"] = DK_SET;
+ DirectiveKindMap[".equ"] = DK_EQU;
+ DirectiveKindMap[".equiv"] = DK_EQUIV;
+ DirectiveKindMap[".ascii"] = DK_ASCII;
+ DirectiveKindMap[".asciz"] = DK_ASCIZ;
+ DirectiveKindMap[".string"] = DK_STRING;
+ DirectiveKindMap[".byte"] = DK_BYTE;
+ DirectiveKindMap[".short"] = DK_SHORT;
+ DirectiveKindMap[".value"] = DK_VALUE;
+ DirectiveKindMap[".2byte"] = DK_2BYTE;
+ DirectiveKindMap[".long"] = DK_LONG;
+ DirectiveKindMap[".int"] = DK_INT;
+ DirectiveKindMap[".4byte"] = DK_4BYTE;
+ DirectiveKindMap[".quad"] = DK_QUAD;
+ DirectiveKindMap[".8byte"] = DK_8BYTE;
+ DirectiveKindMap[".single"] = DK_SINGLE;
+ DirectiveKindMap[".float"] = DK_FLOAT;
+ DirectiveKindMap[".double"] = DK_DOUBLE;
+ DirectiveKindMap[".align"] = DK_ALIGN;
+ DirectiveKindMap[".align32"] = DK_ALIGN32;
+ DirectiveKindMap[".balign"] = DK_BALIGN;
+ DirectiveKindMap[".balignw"] = DK_BALIGNW;
+ DirectiveKindMap[".balignl"] = DK_BALIGNL;
+ DirectiveKindMap[".p2align"] = DK_P2ALIGN;
+ DirectiveKindMap[".p2alignw"] = DK_P2ALIGNW;
+ DirectiveKindMap[".p2alignl"] = DK_P2ALIGNL;
+ DirectiveKindMap[".org"] = DK_ORG;
+ DirectiveKindMap[".fill"] = DK_FILL;
+ DirectiveKindMap[".zero"] = DK_ZERO;
+ DirectiveKindMap[".extern"] = DK_EXTERN;
+ DirectiveKindMap[".globl"] = DK_GLOBL;
+ DirectiveKindMap[".global"] = DK_GLOBAL;
+ DirectiveKindMap[".indirect_symbol"] = DK_INDIRECT_SYMBOL;
+ DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE;
+ DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP;
+ DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER;
+ DirectiveKindMap[".private_extern"] = DK_PRIVATE_EXTERN;
+ DirectiveKindMap[".reference"] = DK_REFERENCE;
+ DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION;
+ DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE;
+ DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN;
+ DirectiveKindMap[".comm"] = DK_COMM;
+ DirectiveKindMap[".common"] = DK_COMMON;
+ DirectiveKindMap[".lcomm"] = DK_LCOMM;
+ DirectiveKindMap[".abort"] = DK_ABORT;
+ DirectiveKindMap[".include"] = DK_INCLUDE;
+ DirectiveKindMap[".incbin"] = DK_INCBIN;
+ DirectiveKindMap[".code16"] = DK_CODE16;
+ DirectiveKindMap[".code16gcc"] = DK_CODE16GCC;
+ DirectiveKindMap[".rept"] = DK_REPT;
+ DirectiveKindMap[".irp"] = DK_IRP;
+ DirectiveKindMap[".irpc"] = DK_IRPC;
+ DirectiveKindMap[".endr"] = DK_ENDR;
+ DirectiveKindMap[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE;
+ DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
+ DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
+ DirectiveKindMap[".if"] = DK_IF;
+ DirectiveKindMap[".ifb"] = DK_IFB;
+ DirectiveKindMap[".ifnb"] = DK_IFNB;
+ DirectiveKindMap[".ifc"] = DK_IFC;
+ DirectiveKindMap[".ifnc"] = DK_IFNC;
+ DirectiveKindMap[".ifdef"] = DK_IFDEF;
+ DirectiveKindMap[".ifndef"] = DK_IFNDEF;
+ DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF;
+ DirectiveKindMap[".elseif"] = DK_ELSEIF;
+ DirectiveKindMap[".else"] = DK_ELSE;
+ DirectiveKindMap[".endif"] = DK_ENDIF;
+ DirectiveKindMap[".skip"] = DK_SKIP;
+ DirectiveKindMap[".space"] = DK_SPACE;
+ DirectiveKindMap[".file"] = DK_FILE;
+ DirectiveKindMap[".line"] = DK_LINE;
+ DirectiveKindMap[".loc"] = DK_LOC;
+ DirectiveKindMap[".stabs"] = DK_STABS;
+ DirectiveKindMap[".sleb128"] = DK_SLEB128;
+ DirectiveKindMap[".uleb128"] = DK_ULEB128;
+ DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
+ DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
+ DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
+ DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
+ DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
+ DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
+ DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
+ DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
+ DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
+ DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
+ DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
+ DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
+ DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
+ DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
+ DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
+ DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
+ DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
+ DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
+ DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
+ DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
+ DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
+ DirectiveKindMap[".macro"] = DK_MACRO;
+ DirectiveKindMap[".endm"] = DK_ENDM;
+ DirectiveKindMap[".endmacro"] = DK_ENDMACRO;
+ DirectiveKindMap[".purgem"] = DK_PURGEM;
+}
+
+
+MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
+ AsmToken EndToken, StartToken = getTok();
+
+ unsigned NestLevel = 0;
+ for (;;) {
+ // Check whether we have reached the end of the file.
+ if (getLexer().is(AsmToken::Eof)) {
+ Error(DirectiveLoc, "no matching '.endr' in definition");
+ return 0;
+ }
+
+ if (Lexer.is(AsmToken::Identifier) &&
+ (getTok().getIdentifier() == ".rept")) {
+ ++NestLevel;
+ }
+
+ // Otherwise, check whether we have reached the .endr.
+ if (Lexer.is(AsmToken::Identifier) &&
+ getTok().getIdentifier() == ".endr") {
+ if (NestLevel == 0) {
+ EndToken = getTok();
+ Lex();
+ if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ TokError("unexpected token in '.endr' directive");
+ return 0;
+ }
+ break;
+ }
+ --NestLevel;
+ }
+
+ // Otherwise, scan till the end of the statement.
+ eatToEndOfStatement();
+ }
+
+ const char *BodyStart = StartToken.getLoc().getPointer();
+ const char *BodyEnd = EndToken.getLoc().getPointer();
+ StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+
+ // We Are Anonymous.
+ StringRef Name;
+ MCAsmMacroParameters Parameters;
+ return new MCAsmMacro(Name, Body, Parameters);
+}
+
+void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ raw_svector_ostream &OS) {
+ OS << ".endr\n";
+
+ MemoryBuffer *Instantiation =
+ MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+
+ // Create the macro instantiation object and add to the current macro
+ // instantiation stack.
+ MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc,
+ CurBuffer,
+ getTok().getLoc(),
+ Instantiation);
+ ActiveMacros.push_back(MI);
+
+ // Jump to the macro instantiation and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lex();
+}
+
+bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
+ int64_t Count;
+ if (parseAbsoluteExpression(Count))
+ return TokError("unexpected token in '.rept' directive");
+
+ if (Count < 0)
+ return TokError("Count is negative");
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.rept' directive");
+
+ // Eat the end of statement.
+ Lex();
+
+ // Lex the rept definition.
+ MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ if (!M)
+ return true;
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ MCAsmMacroParameters Parameters;
+ MCAsmMacroArguments A;
+ raw_svector_ostream OS(Buf);
+ while (Count--) {
+ if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
+ return true;
+ }
+ InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+
+ return false;
+}
+
+/// ParseDirectiveIrp
+/// ::= .irp symbol,values
+bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
+ MCAsmMacroParameters Parameters;
+ MCAsmMacroParameter Parameter;
+
+ if (parseIdentifier(Parameter.first))
+ return TokError("expected identifier in '.irp' directive");
+
+ Parameters.push_back(Parameter);
+
+ if (Lexer.isNot(AsmToken::Comma))
+ return TokError("expected comma in '.irp' directive");
+
+ Lex();
+
+ MCAsmMacroArguments A;
+ if (ParseMacroArguments(0, A))
+ return true;
+
+ // Eat the end of statement.
+ Lex();
+
+ // Lex the irp definition.
+ MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ if (!M)
+ return true;
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ raw_svector_ostream OS(Buf);
+
+ for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+ MCAsmMacroArguments Args;
+ Args.push_back(*i);
+
+ if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
+ return true;
+ }
+
+ InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+
+ return false;
+}
+
+/// ParseDirectiveIrpc
+/// ::= .irpc symbol,values
+bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
+ MCAsmMacroParameters Parameters;
+ MCAsmMacroParameter Parameter;
+
+ if (parseIdentifier(Parameter.first))
+ return TokError("expected identifier in '.irpc' directive");
+
+ Parameters.push_back(Parameter);
+
+ if (Lexer.isNot(AsmToken::Comma))
+ return TokError("expected comma in '.irpc' directive");
+
+ Lex();
+
+ MCAsmMacroArguments A;
+ if (ParseMacroArguments(0, A))
+ return true;
+
+ if (A.size() != 1 || A.front().size() != 1)
+ return TokError("unexpected token in '.irpc' directive");
+
+ // Eat the end of statement.
+ Lex();
+
+ // Lex the irpc definition.
+ MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ if (!M)
+ return true;
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ raw_svector_ostream OS(Buf);
+
+ StringRef Values = A.front().front().getString();
+ std::size_t I, End = Values.size();
+ for (I = 0; I < End; ++I) {
+ MCAsmMacroArgument Arg;
+ Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1)));
+
+ MCAsmMacroArguments Args;
+ Args.push_back(Arg);
+
+ if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
+ return true;
+ }
+
+ InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+
+ return false;
+}
+
+bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
+ if (ActiveMacros.empty())
+ return TokError("unmatched '.endr' directive");
+
+ // The only .repl that should get here are the ones created by
+ // InstantiateMacroLikeBody.
+ assert(getLexer().is(AsmToken::EndOfStatement));
+
+ HandleMacroExit();
+ return false;
+}
+
+bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
+ size_t Len) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (parseExpression(Value))
+ return true;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ if (!MCE)
+ return Error(ExprLoc, "unexpected expression in _emit");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8, IntValue) && !isIntN(8, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+
+ Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len));
+ return false;
+}
+
+bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (parseExpression(Value))
+ return true;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ if (!MCE)
+ return Error(ExprLoc, "unexpected expression in align");
+ uint64_t IntValue = MCE->getValue();
+ if (!isPowerOf2_64(IntValue))
+ return Error(ExprLoc, "literal value not a power of two greater then zero");
+
+ Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5,
+ Log2_64(IntValue)));
+ return false;
+}
+
+// We are comparing pointers, but the pointers are relative to a single string.
+// Thus, this should always be deterministic.
+static int RewritesSort(const void *A, const void *B) {
+ const AsmRewrite *AsmRewriteA = static_cast<const AsmRewrite *>(A);
+ const AsmRewrite *AsmRewriteB = static_cast<const AsmRewrite *>(B);
+ if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
+ return -1;
+ if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
+ return 1;
+
+ // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
+ // to the same location. Make sure the SizeDirective rewrite is performed
+ // first. This also ensure the sort algorithm is stable.
+ if (AsmRewriteA->Kind == AOK_SizeDirective) {
+ assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
+ "Expected an Input/Output rewrite!");
+ return -1;
+ }
+ if (AsmRewriteB->Kind == AOK_SizeDirective) {
+ assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
+ "Expected an Input/Output rewrite!");
+ return 1;
+ }
+ llvm_unreachable ("Unstable rewrite sort.");
+}
+
+bool
+AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI) {
+ SmallVector<void *, 4> InputDecls;
+ SmallVector<void *, 4> OutputDecls;
+ SmallVector<bool, 4> InputDeclsAddressOf;
+ SmallVector<bool, 4> OutputDeclsAddressOf;
+ SmallVector<std::string, 4> InputConstraints;
+ SmallVector<std::string, 4> OutputConstraints;
+ SmallVector<unsigned, 4> ClobberRegs;
+
+ SmallVector<AsmRewrite, 4> AsmStrRewrites;
+
+ // Prime the lexer.
+ Lex();
+
+ // While we have input, parse each statement.
+ unsigned InputIdx = 0;
+ unsigned OutputIdx = 0;
+ while (getLexer().isNot(AsmToken::Eof)) {
+ ParseStatementInfo Info(&AsmStrRewrites);
+ if (ParseStatement(Info))
+ return true;
+
+ if (Info.ParseError)
+ return true;
+
+ if (Info.Opcode == ~0U)
+ continue;
+
+ const MCInstrDesc &Desc = MII->get(Info.Opcode);
+
+ // Build the list of clobbers, outputs and inputs.
+ for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
+ MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
+
+ // Immediate.
+ if (Operand->isImm())
+ continue;
+
+ // Register operand.
+ if (Operand->isReg() && !Operand->needAddressOf()) {
+ unsigned NumDefs = Desc.getNumDefs();
+ // Clobber.
+ if (NumDefs && Operand->getMCOperandNum() < NumDefs)
+ ClobberRegs.push_back(Operand->getReg());
+ continue;
+ }
+
+ // Expr/Input or Output.
+ bool IsVarDecl;
+ unsigned Length, Size, Type;
+ void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
+ Length, Size, Type,
+ IsVarDecl);
+ if (!OpDecl)
+ continue;
+
+ bool isOutput = (i == 1) && Desc.mayStore();
+ if (isOutput) {
+ ++InputIdx;
+ OutputDecls.push_back(OpDecl);
+ OutputDeclsAddressOf.push_back(Operand->needAddressOf());
+ OutputConstraints.push_back('=' + Operand->getConstraint().str());
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
+ Operand->getNameLen()));
+ } else {
+ InputDecls.push_back(OpDecl);
+ InputDeclsAddressOf.push_back(Operand->needAddressOf());
+ InputConstraints.push_back(Operand->getConstraint().str());
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
+ Operand->getNameLen()));
+ }
+ }
+ }
+
+ // Set the number of Outputs and Inputs.
+ NumOutputs = OutputDecls.size();
+ NumInputs = InputDecls.size();
+
+ // Set the unique clobbers.
+ array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
+ ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
+ ClobberRegs.end());
+ Clobbers.assign(ClobberRegs.size(), std::string());
+ for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
+ raw_string_ostream OS(Clobbers[I]);
+ IP->printRegName(OS, ClobberRegs[I]);
+ }
+
+ // Merge the various outputs and inputs. Output are expected first.
+ if (NumOutputs || NumInputs) {
+ unsigned NumExprs = NumOutputs + NumInputs;
+ OpDecls.resize(NumExprs);
+ Constraints.resize(NumExprs);
+ for (unsigned i = 0; i < NumOutputs; ++i) {
+ OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
+ Constraints[i] = OutputConstraints[i];
+ }
+ for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
+ OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
+ Constraints[j] = InputConstraints[i];
+ }
+ }
+
+ // Build the IR assembly string.
+ std::string AsmStringIR;
+ raw_string_ostream OS(AsmStringIR);
+ const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
+ array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
+ for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
+ E = AsmStrRewrites.end();
+ I != E; ++I) {
+ const char *Loc = (*I).Loc.getPointer();
+ assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
+
+ unsigned AdditionalSkip = 0;
+ AsmRewriteKind Kind = (*I).Kind;
+
+ // Emit everything up to the immediate/expression.
+ unsigned Len = Loc - AsmStart;
+ if (Len) {
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
+ --Len;
+ OS << StringRef(AsmStart, Len);
+ }
+
+ // Skip the original expression.
+ if (Kind == AOK_Skip) {
+ AsmStart = Loc + (*I).Len;
+ continue;
+ }
+
+ // Rewrite expressions in $N notation.
+ switch (Kind) {
+ default: break;
+ case AOK_Imm:
+ OS << "$$" << (*I).Val;
+ break;
+ case AOK_ImmPrefix:
+ OS << "$$";
+ break;
+ case AOK_Input:
+ OS << '$' << InputIdx++;
+ break;
+ case AOK_Output:
+ OS << '$' << OutputIdx++;
+ break;
+ case AOK_SizeDirective:
+ switch ((*I).Val) {
+ default: break;
+ case 8: OS << "byte ptr "; break;
+ case 16: OS << "word ptr "; break;
+ case 32: OS << "dword ptr "; break;
+ case 64: OS << "qword ptr "; break;
+ case 80: OS << "xword ptr "; break;
+ case 128: OS << "xmmword ptr "; break;
+ case 256: OS << "ymmword ptr "; break;
+ }
+ break;
+ case AOK_Emit:
+ OS << ".byte";
+ break;
+ case AOK_Align: {
+ unsigned Val = (*I).Val;
+ OS << ".align " << Val;
+
+ // Skip the original immediate.
+ assert(Val < 10 && "Expected alignment less then 2^10.");
+ AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
+ break;
+ }
+ case AOK_DotOperator:
+ OS << (*I).Val;
+ break;
+ }
+
+ // Skip the original expression.
+ AsmStart = Loc + (*I).Len + AdditionalSkip;
+
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
+ *AsmStart == ']')
+ ++AsmStart;
+ }
+
+ // Emit the remainder of the asm string.
+ if (AsmStart != AsmEnd)
+ OS << StringRef(AsmStart, AsmEnd - AsmStart);
+
+ AsmString = OS.str();
+ return false;
+}
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM,
+ MCContext &C, MCStreamer &Out,
+ const MCAsmInfo &MAI) {
+ return new AsmParser(SM, C, Out, MAI);
+}
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 000000000000..a50eab217d21
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,499 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+ template<bool (COFFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+ this, HandleDirective<COFFAsmParser, HandlerMethod>);
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+ bool ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind);
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ MCAsmParserExtension::Initialize(Parser);
+
+ addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+ addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+ addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32");
+
+ // Win64 EH directives.
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
+ ".seh_proc");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
+ ".seh_endproc");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
+ ".seh_startchained");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
+ ".seh_endchained");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>(
+ ".seh_handler");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
+ ".seh_handlerdata");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
+ ".seh_pushreg");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
+ ".seh_setframe");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
+ ".seh_stackalloc");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
+ ".seh_savereg");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
+ ".seh_savexmm");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
+ ".seh_pushframe");
+ addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
+ ".seh_endprologue");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+ }
+
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ }
+
+ bool ParseDirectiveDef(StringRef, SMLoc);
+ bool ParseDirectiveScl(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveEndef(StringRef, SMLoc);
+ bool ParseDirectiveSecRel32(StringRef, SMLoc);
+
+ // Win64 EH directives.
+ bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndProc(StringRef, SMLoc);
+ bool ParseSEHDirectiveStartChained(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndChained(StringRef, SMLoc);
+ bool ParseSEHDirectiveHandler(StringRef, SMLoc);
+ bool ParseSEHDirectiveHandlerData(StringRef, SMLoc);
+ bool ParseSEHDirectivePushReg(StringRef, SMLoc);
+ bool ParseSEHDirectiveSetFrame(StringRef, SMLoc);
+ bool ParseSEHDirectiveAllocStack(StringRef, SMLoc);
+ bool ParseSEHDirectiveSaveReg(StringRef, SMLoc);
+ bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc);
+ bool ParseSEHDirectivePushFrame(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
+
+ bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
+ bool ParseSEHRegisterNumber(unsigned &RegNo);
+ bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc);
+public:
+ COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".weak", ... } [ identifier ( , identifier )* ]
+bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+ .Case(".weak", MCSA_Weak)
+ .Default(MCSA_Invalid);
+ assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getCOFFSection(
+ Section, Characteristics, Kind));
+
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+ StringRef SymbolName;
+
+ if (getParser().parseIdentifier(SymbolName))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+ getStreamer().BeginCOFFSymbolDef(Sym);
+
+ Lex();
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+ int64_t SymbolStorageClass;
+ if (getParser().parseAbsoluteExpression(SymbolStorageClass))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ int64_t Type;
+ if (getParser().parseAbsoluteExpression(Type))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolType(Type);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EndCOFFSymbolDef();
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().parseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitCOFFSecRel32(Symbol);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().parseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitWin64EHStartProc(Symbol);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndProc();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHStartChained();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndChained();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().parseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify one or both of @unwind or @except");
+ Lex();
+ bool unwind = false, except = false;
+ if (ParseAtUnwindOrAtExcept(unwind, except))
+ return true;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (ParseAtUnwindOrAtExcept(unwind, except))
+ return true;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitWin64EHHandler(handler, unwind, except);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHHandlerData();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) {
+ unsigned Reg;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHPushReg(Reg);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify a stack pointer offset");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (Off & 0x0F)
+ return Error(startLoc, "offset is not a multiple of 16");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHSetFrame(Reg, Off);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) {
+ int64_t Size;
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ if (Size & 7)
+ return Error(startLoc, "size is not a multiple of 8");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHAllocStack(Size);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (Off & 7)
+ return Error(startLoc, "size is not a multiple of 8");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ // FIXME: Err on %xmm* registers
+ getStreamer().EmitWin64EHSaveReg(Reg, Off);
+ return false;
+}
+
+// FIXME: This method is inherently x86-specific. It should really be in the
+// x86 backend.
+bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ if (Off & 0x0F)
+ return Error(startLoc, "offset is not a multiple of 16");
+
+ Lex();
+ // FIXME: Err on non-%xmm* registers
+ getStreamer().EmitWin64EHSaveXMM(Reg, Off);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) {
+ bool Code = false;
+ StringRef CodeID;
+ if (getLexer().is(AsmToken::At)) {
+ SMLoc startLoc = getLexer().getLoc();
+ Lex();
+ if (!getParser().parseIdentifier(CodeID)) {
+ if (CodeID != "code")
+ return Error(startLoc, "expected @code");
+ Code = true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHPushFrame(Code);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndProlog();
+ return false;
+}
+
+bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
+ StringRef identifier;
+ if (getLexer().isNot(AsmToken::At))
+ return TokError("a handler attribute must begin with '@'");
+ SMLoc startLoc = getLexer().getLoc();
+ Lex();
+ if (getParser().parseIdentifier(identifier))
+ return Error(startLoc, "expected @unwind or @except");
+ if (identifier == "unwind")
+ unwind = true;
+ else if (identifier == "except")
+ except = true;
+ else
+ return Error(startLoc, "expected @unwind or @except");
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
+ SMLoc startLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Percent)) {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+ SMLoc endLoc;
+ unsigned LLVMRegNo;
+ if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
+ return true;
+
+#if 0
+ // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering
+ // violation so this validation code is disabled.
+
+ // Check that this is a non-volatile register.
+ const unsigned *NVRegs = TAI.getCalleeSavedRegs();
+ unsigned i;
+ for (i = 0; NVRegs[i] != 0; ++i)
+ if (NVRegs[i] == LLVMRegNo)
+ break;
+ if (NVRegs[i] == 0)
+ return Error(startLoc, "expected non-volatile register");
+#endif
+
+ int SEHRegNo = MRI.getSEHRegNum(LLVMRegNo);
+ if (SEHRegNo < 0)
+ return Error(startLoc,"register can't be represented in SEH unwind info");
+ RegNo = SEHRegNo;
+ }
+ else {
+ int64_t n;
+ if (getParser().parseAbsoluteExpression(n))
+ return true;
+ if (n > 15)
+ return Error(startLoc, "register number is too high");
+ RegNo = n;
+ }
+
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+ return new COFFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
new file mode 100644
index 000000000000..6d6409fb69e2
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -0,0 +1,826 @@
+//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+namespace {
+
+/// \brief Implementation of directive handling which is shared across all
+/// Darwin targets.
+class DarwinAsmParser : public MCAsmParserExtension {
+ template<bool (DarwinAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+ this, HandleDirective<DarwinAsmParser, HandlerMethod>);
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+ bool ParseSectionSwitch(const char *Segment, const char *Section,
+ unsigned TAA = 0, unsigned ImplicitAlign = 0,
+ unsigned StubSize = 0);
+
+public:
+ DarwinAsmParser() {}
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+ ".subsections_via_symbols");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(
+ ".pushsection");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(
+ ".popsection");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+ ".secure_log_unique");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+ ".secure_log_reset");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
+
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(
+ ".data_region");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(
+ ".end_data_region");
+
+ // Special section directives.
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(
+ ".const_data");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(
+ ".constructor");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(
+ ".cstring");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(
+ ".destructor");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(
+ ".fvmlib_init0");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(
+ ".fvmlib_init1");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(
+ ".lazy_symbol_pointer");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLinkerOption>(
+ ".linker_option");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(
+ ".literal16");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(
+ ".literal4");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(
+ ".literal8");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(
+ ".mod_init_func");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(
+ ".mod_term_func");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(
+ ".non_lazy_symbol_pointer");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(
+ ".objc_cat_cls_meth");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(
+ ".objc_cat_inst_meth");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(
+ ".objc_category");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(
+ ".objc_class");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(
+ ".objc_class_names");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(
+ ".objc_class_vars");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(
+ ".objc_cls_meth");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(
+ ".objc_cls_refs");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(
+ ".objc_inst_meth");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(
+ ".objc_instance_vars");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(
+ ".objc_message_refs");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(
+ ".objc_meta_class");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(
+ ".objc_meth_var_names");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(
+ ".objc_meth_var_types");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(
+ ".objc_module_info");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(
+ ".objc_protocol");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(
+ ".objc_selector_strs");
+ addDirectiveHandler<
+ &DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(
+ ".objc_string_object");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(
+ ".objc_symbols");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(
+ ".picsymbol_stub");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(
+ ".static_const");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(
+ ".static_data");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(
+ ".symbol_stub");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(
+ ".thread_init_func");
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
+ }
+
+ bool ParseDirectiveDesc(StringRef, SMLoc);
+ bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
+ bool ParseDirectiveLsym(StringRef, SMLoc);
+ bool ParseDirectiveLinkerOption(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
+ bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
+ bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
+ bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
+ bool ParseDirectiveTBSS(StringRef, SMLoc);
+ bool ParseDirectiveZerofill(StringRef, SMLoc);
+ bool ParseDirectiveDataRegion(StringRef, SMLoc);
+ bool ParseDirectiveDataRegionEnd(StringRef, SMLoc);
+
+ // Named Section Directive
+ bool ParseSectionDirectiveConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__const");
+ }
+ bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__static_const");
+ }
+ bool ParseSectionDirectiveCString(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS, 4);
+ }
+ bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS, 8);
+ }
+ bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS, 16);
+ }
+ bool ParseSectionDirectiveConstructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__constructor");
+ }
+ bool ParseSectionDirectiveDestructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__destructor");
+ }
+ bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init0");
+ }
+ bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init1");
+ }
+ bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__symbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ // FIXME: Different on PPC and ARM.
+ 0, 16);
+ }
+ bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__picsymbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__data");
+ }
+ bool ParseSectionDirectiveStaticData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__static_data");
+ }
+ bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveDyld(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__dyld");
+ }
+ bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveConstData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__const");
+ }
+ bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__meta_class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__protocol",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__string_object",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__message_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__symbols",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__category",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__instance_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__module_info",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__selector_strs",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ }
+ bool ParseSectionDirectiveTLV(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+ }
+ bool ParseSectionDirectiveIdent(StringRef, SMLoc) {
+ // Darwin silently ignores the .ident directive.
+ getParser().eatToEndOfStatement();
+ return false;
+ }
+ bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+ }
+
+};
+
+} // end anonymous namespace
+
+bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
+ const char *Section,
+ unsigned TAA, unsigned Align,
+ unsigned StubSize) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ // FIXME: Arch specific.
+ bool isText = TAA & MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS;
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+
+ // Set the implicit alignment, if any.
+ //
+ // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+ // alignment on the section (e.g., if one manually inserts bytes into the
+ // section, then just issuing the section switch directive will not realign
+ // the section. However, this is arguably more reasonable behavior, and there
+ // is no good reason for someone to intentionally emit incorrectly sized
+ // values into the implicitly aligned sections.
+ if (Align)
+ getStreamer().EmitValueToAlignment(Align, 0, 1, 0);
+
+ return false;
+}
+
+/// ParseDirectiveDesc
+/// ::= .desc identifier , expression
+bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.desc' directive");
+ Lex();
+
+ int64_t DescValue;
+ if (getParser().parseAbsoluteExpression(DescValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.desc' directive");
+
+ Lex();
+
+ // Set the n_desc field of this Symbol to this DescValue
+ getStreamer().EmitSymbolDesc(Sym, DescValue);
+
+ return false;
+}
+
+/// ParseDirectiveDumpOrLoad
+/// ::= ( .dump | .load ) "filename"
+bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
+ SMLoc IDLoc) {
+ bool IsDump = Directive == ".dump";
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.dump' or '.load' directive");
+
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.dump' or '.load' directive");
+
+ Lex();
+
+ // FIXME: If/when .dump and .load are implemented they will be done in the
+ // the assembly parser and not have any need for an MCStreamer API.
+ if (IsDump)
+ return Warning(IDLoc, "ignoring directive .dump for now");
+ else
+ return Warning(IDLoc, "ignoring directive .load for now");
+}
+
+/// ParseDirectiveLinkerOption
+/// ::= .linker_option "string" ( , "string" )*
+bool DarwinAsmParser::ParseDirectiveLinkerOption(StringRef IDVal, SMLoc) {
+ SmallVector<std::string, 4> Args;
+ for (;;) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+ std::string Data;
+ if (getParser().parseEscapedString(Data))
+ return true;
+
+ Args.push_back(Data);
+
+ Lex();
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ Lex();
+ }
+
+ getStreamer().EmitLinkerOptions(Args);
+ return false;
+}
+
+/// ParseDirectiveLsym
+/// ::= .lsym identifier , expression
+bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.lsym' directive");
+ Lex();
+
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.lsym' directive");
+
+ Lex();
+
+ // We don't currently support this directive.
+ //
+ // FIXME: Diagnostic location!
+ (void) Sym;
+ return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveSection:
+/// ::= .section identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ SMLoc Loc = getLexer().getLoc();
+
+ StringRef SectionName;
+ if (getParser().parseIdentifier(SectionName))
+ return Error(Loc, "expected identifier after '.section' directive");
+
+ // Verify there is a following comma.
+ if (!getLexer().is(AsmToken::Comma))
+ return TokError("unexpected token in '.section' directive");
+
+ std::string SectionSpec = SectionName;
+ SectionSpec += ",";
+
+ // Add all the tokens until the end of the line, ParseSectionSpecifier will
+ // handle this.
+ StringRef EOL = getLexer().LexUntilEndOfStatement();
+ SectionSpec.append(EOL.begin(), EOL.end());
+
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.section' directive");
+ Lex();
+
+
+ StringRef Segment, Section;
+ unsigned StubSize;
+ unsigned TAA;
+ bool TAAParsed;
+ std::string ErrorStr =
+ MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+ TAA, TAAParsed, StubSize);
+
+ if (!ErrorStr.empty())
+ return Error(Loc, ErrorStr.c_str());
+
+ // FIXME: Arch specific.
+ bool isText = Segment == "__TEXT"; // FIXME: Hack.
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+ return false;
+}
+
+/// ParseDirectivePushSection:
+/// ::= .pushsection identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectivePushSection(StringRef S, SMLoc Loc) {
+ getStreamer().PushSection();
+
+ if (ParseDirectiveSection(S, Loc)) {
+ getStreamer().PopSection();
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirectivePopSection:
+/// ::= .popsection
+bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().PopSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
+/// ParseDirectivePrevious:
+/// ::= .previous
+bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection == NULL)
+ return TokError(".previous without corresponding .section");
+ getStreamer().SwitchSection(PreviousSection);
+ return false;
+}
+
+/// ParseDirectiveSecureLogUnique
+/// ::= .secure_log_unique ... message ...
+bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
+ StringRef LogMessage = getParser().parseStringToEndOfStatement();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_unique' directive");
+
+ if (getContext().getSecureLogUsed() != false)
+ return Error(IDLoc, ".secure_log_unique specified multiple times");
+
+ // Get the secure log path.
+ const char *SecureLogFile = getContext().getSecureLogFile();
+ if (SecureLogFile == NULL)
+ return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
+ "environment variable unset.");
+
+ // Open the secure log file if we haven't already.
+ raw_ostream *OS = getContext().getSecureLog();
+ if (OS == NULL) {
+ std::string Err;
+ OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append);
+ if (!Err.empty()) {
+ delete OS;
+ return Error(IDLoc, Twine("can't open secure log file: ") +
+ SecureLogFile + " (" + Err + ")");
+ }
+ getContext().setSecureLog(OS);
+ }
+
+ // Write the message.
+ int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+ *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
+ << LogMessage + "\n";
+
+ getContext().setSecureLogUsed(true);
+
+ return false;
+}
+
+/// ParseDirectiveSecureLogReset
+/// ::= .secure_log_reset
+bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_reset' directive");
+
+ Lex();
+
+ getContext().setSecureLogUsed(false);
+
+ return false;
+}
+
+/// ParseDirectiveSubsectionsViaSymbols
+/// ::= .subsections_via_symbols
+bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.subsections_via_symbols' directive");
+
+ Lex();
+
+ getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return false;
+}
+
+/// ParseDirectiveTBSS
+/// ::= .tbss identifier, size, align
+bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.tbss' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+ "zero");
+
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+ "than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ getStreamer().EmitTBSSSymbol(getContext().getMachOSection(
+ "__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ 0, SectionKind::getThreadBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+/// ParseDirectiveZerofill
+/// ::= .zerofill segname , sectname [, identifier , size_expression [
+/// , align_expression ]]
+bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
+ StringRef Segment;
+ if (getParser().parseIdentifier(Segment))
+ return TokError("expected segment name after '.zerofill' directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Section;
+ if (getParser().parseIdentifier(Section))
+ return TokError("expected section name after comma in '.zerofill' "
+ "directive");
+
+ // If this is the end of the line all that was wanted was to create the
+ // the section but with no symbol.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ // Create the zerofill section but no symbol
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()));
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef IDStr;
+ if (getParser().parseIdentifier(IDStr))
+ return TokError("expected identifier in directive");
+
+ // handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zerofill' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+ "than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+ "can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Create the zerofill Symbol with Size and Pow2Alignment
+ //
+ // FIXME: Arch specific.
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+/// ParseDirectiveDataRegion
+/// ::= .data_region [ ( jt8 | jt16 | jt32 ) ]
+bool DarwinAsmParser::ParseDirectiveDataRegion(StringRef, SMLoc) {
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Lex();
+ getStreamer().EmitDataRegion(MCDR_DataRegion);
+ return false;
+ }
+ StringRef RegionType;
+ SMLoc Loc = getParser().getTok().getLoc();
+ if (getParser().parseIdentifier(RegionType))
+ return TokError("expected region type after '.data_region' directive");
+ int Kind = StringSwitch<int>(RegionType)
+ .Case("jt8", MCDR_DataRegionJT8)
+ .Case("jt16", MCDR_DataRegionJT16)
+ .Case("jt32", MCDR_DataRegionJT32)
+ .Default(-1);
+ if (Kind == -1)
+ return Error(Loc, "unknown region type in '.data_region' directive");
+ Lex();
+
+ getStreamer().EmitDataRegion((MCDataRegionType)Kind);
+ return false;
+}
+
+/// ParseDirectiveDataRegionEnd
+/// ::= .end_data_region
+bool DarwinAsmParser::ParseDirectiveDataRegionEnd(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.end_data_region' directive");
+
+ Lex();
+ getStreamer().EmitDataRegion(MCDR_DataRegionEnd);
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createDarwinAsmParser() {
+ return new DarwinAsmParser;
+}
+
+} // end llvm namespace
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
new file mode 100644
index 000000000000..4c45e087445d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -0,0 +1,622 @@
+//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+namespace {
+
+class ELFAsmParser : public MCAsmParserExtension {
+ template<bool (ELFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+ this, HandleDirective<ELFAsmParser, HandlerMethod>);
+
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+ bool ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind);
+ bool SeenIdent;
+
+public:
+ ELFAsmParser() : SeenIdent(false) {
+ BracketExpressionsSupported = true;
+ }
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+ addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
+ addDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
+ }
+
+ // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+ // the best way for us to get access to it?
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
+ }
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
+ }
+ bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
+ SectionKind::getThreadData());
+ }
+ bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+ }
+ bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+ }
+ bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+ }
+ bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+ return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectiveSize(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveIdent(StringRef, SMLoc);
+ bool ParseDirectiveSymver(StringRef, SMLoc);
+ bool ParseDirectiveVersion(StringRef, SMLoc);
+ bool ParseDirectiveWeakref(StringRef, SMLoc);
+ bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
+
+private:
+ bool ParseSectionName(StringRef &SectionName);
+};
+
+}
+
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
+bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+ .Case(".weak", MCSA_Weak)
+ .Case(".local", MCSA_Local)
+ .Case(".hidden", MCSA_Hidden)
+ .Case(".internal", MCSA_Internal)
+ .Case(".protected", MCSA_Protected)
+ .Default(MCSA_Invalid);
+ assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getELFSection(
+ Section, Type, Flags, Kind));
+
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getStreamer().EmitELFSize(Sym, Expr);
+ return false;
+}
+
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+ // A section name can contain -, so we cannot just use
+ // parseIdentifier.
+ SMLoc FirstLoc = getLexer().getLoc();
+ unsigned Size = 0;
+
+ if (getLexer().is(AsmToken::String)) {
+ SectionName = getTok().getIdentifier();
+ Lex();
+ return false;
+ }
+
+ for (;;) {
+ StringRef Tmp;
+ unsigned CurSize;
+
+ SMLoc PrevLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Minus)) {
+ CurSize = 1;
+ Lex(); // Consume the "-".
+ } else if (getLexer().is(AsmToken::String)) {
+ CurSize = getTok().getIdentifier().size() + 2;
+ Lex();
+ } else if (getLexer().is(AsmToken::Identifier)) {
+ CurSize = getTok().getIdentifier().size();
+ Lex();
+ } else {
+ break;
+ }
+
+ Size += CurSize;
+ SectionName = StringRef(FirstLoc.getPointer(), Size);
+
+ // Make sure the following token is adjacent.
+ if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+ break;
+ }
+ if (Size == 0)
+ return true;
+
+ return false;
+}
+
+static SectionKind computeSectionKind(unsigned Flags) {
+ if (Flags & ELF::SHF_EXECINSTR)
+ return SectionKind::getText();
+ if (Flags & ELF::SHF_TLS)
+ return SectionKind::getThreadData();
+ return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+ int flags = 0;
+
+ for (unsigned i = 0; i < flagsStr.size(); i++) {
+ switch (flagsStr[i]) {
+ case 'a':
+ flags |= ELF::SHF_ALLOC;
+ break;
+ case 'x':
+ flags |= ELF::SHF_EXECINSTR;
+ break;
+ case 'w':
+ flags |= ELF::SHF_WRITE;
+ break;
+ case 'M':
+ flags |= ELF::SHF_MERGE;
+ break;
+ case 'S':
+ flags |= ELF::SHF_STRINGS;
+ break;
+ case 'T':
+ flags |= ELF::SHF_TLS;
+ break;
+ case 'c':
+ flags |= ELF::XCORE_SHF_CP_SECTION;
+ break;
+ case 'd':
+ flags |= ELF::XCORE_SHF_DP_SECTION;
+ break;
+ case 'G':
+ flags |= ELF::SHF_GROUP;
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+ getStreamer().PushSection();
+
+ if (ParseDirectiveSection(s, loc)) {
+ getStreamer().PopSection();
+ return true;
+ }
+
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().PopSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ StringRef SectionName;
+
+ if (ParseSectionName(SectionName))
+ return TokError("expected identifier in directive");
+
+ StringRef TypeName;
+ int64_t Size = 0;
+ StringRef GroupName;
+ unsigned Flags = 0;
+
+ // Set the defaults first.
+ if (SectionName == ".fini" || SectionName == ".init" ||
+ SectionName == ".rodata")
+ Flags |= ELF::SHF_ALLOC;
+ if (SectionName == ".fini" || SectionName == ".init")
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in directive");
+
+ StringRef FlagsStr = getTok().getStringContents();
+ Lex();
+
+ int extraFlags = parseSectionFlags(FlagsStr);
+ if (extraFlags < 0)
+ return TokError("unknown flag");
+ Flags |= extraFlags;
+
+ bool Mergeable = Flags & ELF::SHF_MERGE;
+ bool Group = Flags & ELF::SHF_GROUP;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ if (Mergeable)
+ return TokError("Mergeable section must specify the type");
+ if (Group)
+ return TokError("Group section must specify the type");
+ } else {
+ Lex();
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+
+ Lex();
+ if (getParser().parseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+
+ if (Mergeable) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected the entry size");
+ Lex();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+ if (Size <= 0)
+ return TokError("entry size must be positive");
+ }
+
+ if (Group) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected group name");
+ Lex();
+ if (getParser().parseIdentifier(GroupName))
+ return true;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ StringRef Linkage;
+ if (getParser().parseIdentifier(Linkage))
+ return true;
+ if (Linkage != "comdat")
+ return TokError("Linkage must be 'comdat'");
+ }
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ unsigned Type = ELF::SHT_PROGBITS;
+
+ if (TypeName.empty()) {
+ if (SectionName.startswith(".note"))
+ Type = ELF::SHT_NOTE;
+ else if (SectionName == ".init_array")
+ Type = ELF::SHT_INIT_ARRAY;
+ else if (SectionName == ".fini_array")
+ Type = ELF::SHT_FINI_ARRAY;
+ else if (SectionName == ".preinit_array")
+ Type = ELF::SHT_PREINIT_ARRAY;
+ } else {
+ if (TypeName == "init_array")
+ Type = ELF::SHT_INIT_ARRAY;
+ else if (TypeName == "fini_array")
+ Type = ELF::SHT_FINI_ARRAY;
+ else if (TypeName == "preinit_array")
+ Type = ELF::SHT_PREINIT_ARRAY;
+ else if (TypeName == "nobits")
+ Type = ELF::SHT_NOBITS;
+ else if (TypeName == "progbits")
+ Type = ELF::SHT_PROGBITS;
+ else if (TypeName == "note")
+ Type = ELF::SHT_NOTE;
+ else if (TypeName == "unwind")
+ Type = ELF::SHT_X86_64_UNWIND;
+ else
+ return TokError("unknown section type");
+ }
+
+ SectionKind Kind = computeSectionKind(Flags);
+ getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+ Flags, Kind, Size,
+ GroupName));
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection == NULL)
+ return TokError(".previous without corresponding .section");
+ getStreamer().SwitchSection(PreviousSection);
+
+ return false;
+}
+
+/// ParseDirectiveELFType
+/// ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.type' directive");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+ Lex();
+
+ StringRef Type;
+ SMLoc TypeLoc;
+
+ TypeLoc = getLexer().getLoc();
+ if (getParser().parseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("function", MCSA_ELF_TypeFunction)
+ .Case("object", MCSA_ELF_TypeObject)
+ .Case("tls_object", MCSA_ELF_TypeTLS)
+ .Case("common", MCSA_ELF_TypeCommon)
+ .Case("notype", MCSA_ELF_TypeNoType)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
+ .Default(MCSA_Invalid);
+
+ if (Attr == MCSA_Invalid)
+ return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.type' directive");
+
+ Lex();
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ return false;
+}
+
+/// ParseDirectiveIdent
+/// ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.ident' directive");
+
+ StringRef Data = getTok().getIdentifier();
+
+ Lex();
+
+ const MCSection *Comment =
+ getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ SectionKind::getReadOnly(),
+ 1, "");
+
+ getStreamer().PushSection();
+ getStreamer().SwitchSection(Comment);
+ if (!SeenIdent) {
+ getStreamer().EmitIntValue(0, 1);
+ SeenIdent = true;
+ }
+ getStreamer().EmitBytes(Data);
+ getStreamer().EmitIntValue(0, 1);
+ getStreamer().PopSection();
+ return false;
+}
+
+/// ParseDirectiveSymver
+/// ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef AliasName;
+ if (getParser().parseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (AliasName.find('@') == StringRef::npos)
+ return TokError("expected a '@' in the name");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+ getStreamer().EmitAssignment(Alias, Value);
+ return false;
+}
+
+/// ParseDirectiveVersion
+/// ::= .version string
+bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.version' directive");
+
+ StringRef Data = getTok().getIdentifier();
+
+ Lex();
+
+ const MCSection *Note =
+ getContext().getELFSection(".note", ELF::SHT_NOTE, 0,
+ SectionKind::getReadOnly());
+
+ getStreamer().PushSection();
+ getStreamer().SwitchSection(Note);
+ getStreamer().EmitIntValue(Data.size()+1, 4); // namesz.
+ getStreamer().EmitIntValue(0, 4); // descsz = 0 (no description).
+ getStreamer().EmitIntValue(1, 4); // type = NT_VERSION.
+ getStreamer().EmitBytes(Data); // name.
+ getStreamer().EmitIntValue(0, 1); // terminate the string.
+ getStreamer().EmitValueToAlignment(4); // ensure 4 byte alignment.
+ getStreamer().PopSection();
+ return false;
+}
+
+/// ParseDirectiveWeakref
+/// ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+ // FIXME: Share code with the other alias building directives.
+
+ StringRef AliasName;
+ if (getParser().parseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitWeakReference(Alias, Sym);
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createELFAsmParser() {
+ return new ELFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
new file mode 100644
index 000000000000..3867691107fb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -0,0 +1,32 @@
+//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
+ TokStart(0), SkipSpace(true) {
+}
+
+MCAsmLexer::~MCAsmLexer() {
+}
+
+SMLoc MCAsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+SMLoc AsmToken::getLoc() const {
+ return SMLoc::getFromPointer(Str.data());
+}
+
+SMLoc AsmToken::getEndLoc() const {
+ return SMLoc::getFromPointer(Str.data() + Str.size());
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
new file mode 100644
index 000000000000..6e1ebad36c0d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -0,0 +1,50 @@
+//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
+}
+
+MCAsmParser::~MCAsmParser() {
+}
+
+void MCAsmParser::setTargetParser(MCTargetAsmParser &P) {
+ assert(!TargetParser && "Target parser is already initialized!");
+ TargetParser = &P;
+ TargetParser->Initialize(*this);
+}
+
+const AsmToken &MCAsmParser::getTok() {
+ return getLexer().getTok();
+}
+
+bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
+ Error(getLexer().getLoc(), Msg, Ranges);
+ return true;
+}
+
+bool MCAsmParser::parseExpression(const MCExpr *&Res) {
+ SMLoc L;
+ return parseExpression(Res, L);
+}
+
+void MCParsedAsmOperand::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " " << *this;
+#endif
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
new file mode 100644
index 000000000000..3f25a14926b6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -0,0 +1,22 @@
+//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+using namespace llvm;
+
+MCAsmParserExtension::MCAsmParserExtension() :
+ BracketExpressionsSupported(false) {
+}
+
+MCAsmParserExtension::~MCAsmParserExtension() {
+}
+
+void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
+ this->Parser = &Parser;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
new file mode 100644
index 000000000000..60a3a3b59a3d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -0,0 +1,19 @@
+//===-- MCTargetAsmParser.cpp - Target Assembly Parser ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCTargetAsmParser.h"
+using namespace llvm;
+
+MCTargetAsmParser::MCTargetAsmParser()
+ : AvailableFeatures(0), ParsingInlineAsm(false)
+{
+}
+
+MCTargetAsmParser::~MCTargetAsmParser() {
+}
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 000000000000..0e04c5537acb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,242 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+ MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter)
+ : MCObjectStreamer(SK_PureStreamer, Context, TAB, OS, Emitter) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void InitToTextSection();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitDebugLabel(MCSymbol *Symbol);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ uint64_t Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+ virtual void FinishImpl();
+
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitThumbFunc(MCSymbol *Func) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EndCOFFSymbolDef() {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitFileDirective(StringRef Filename) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename, unsigned CUID = 0) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+
+ /// @}
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_PureStreamer;
+ }
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+ InitToTextSection();
+}
+
+void MCPureStreamer::InitToTextSection() {
+ // FIMXE: To what!?
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ Symbol->setSection(*getCurrentSection());
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+ new MCDataFragment(getCurrentSectionData());
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+
+void MCPureStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ return false;
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCRelaxableFragment *IF =
+ new MCRelaxableFragment(Inst, getCurrentSectionData());
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ IF->getContents() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::FinishImpl() {
+ // FIXME: Handle DWARF tables?
+
+ this->MCObjectStreamer::FinishImpl();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *CE) {
+ return new MCPureStreamer(Context, MAB, OS, CE);
+}
diff --git a/contrib/llvm/lib/MC/MCRegisterInfo.cpp b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
new file mode 100644
index 000000000000..5c71106c9017
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
@@ -0,0 +1,74 @@
+//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements MCRegisterInfo functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+using namespace llvm;
+
+unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+ const MCRegisterClass *RC) const {
+ for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
+ if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx))
+ return *Supers;
+ return 0;
+}
+
+unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const {
+ assert(Idx && Idx < getNumSubRegIndices() &&
+ "This is not a subregister index");
+ // Get a pointer to the corresponding SubRegIndices list. This list has the
+ // name of each sub-register in the same order as MCSubRegIterator.
+ const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+ for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+ if (*SRI == Idx)
+ return *Subs;
+ return 0;
+}
+
+unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const {
+ assert(SubReg && SubReg < getNumRegs() && "This is not a register");
+ // Get a pointer to the corresponding SubRegIndices list. This list has the
+ // name of each sub-register in the same order as MCSubRegIterator.
+ const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+ for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+ if (*Subs == SubReg)
+ return *SRI;
+ return 0;
+}
+
+int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
+ unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
+
+ DwarfLLVMRegPair Key = { RegNum, 0 };
+ const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+ if (I == M+Size || I->FromReg != RegNum)
+ return -1;
+ return I->ToReg;
+}
+
+int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
+ unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
+
+ DwarfLLVMRegPair Key = { RegNum, 0 };
+ const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+ assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
+ return I->ToReg;
+}
+
+int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const {
+ const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum);
+ if (I == L2SEHRegs.end()) return (int)RegNum;
+ return I->second;
+}
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
new file mode 100644
index 000000000000..ccf4a7dddf73
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -0,0 +1,22 @@
+//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MCSection
+//===----------------------------------------------------------------------===//
+
+MCSection::~MCSection() {
+}
+
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 000000000000..aac93775aebe
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" || Name == ".bss")
+ return true;
+
+ return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+
+ // standard sections don't require the '.section'
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName() << '\n';
+ return;
+ }
+
+ OS << "\t.section\t" << getSectionName() << ",\"";
+ if (getKind().isText())
+ OS << 'x';
+ if (getKind().isWriteable())
+ OS << 'w';
+ else
+ OS << 'r';
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
+ OS << 'n';
+ OS << "\"\n";
+
+ if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+ switch (Selection) {
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+ OS << "\t.linkonce one_only\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ANY:
+ OS << "\t.linkonce discard\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+ OS << "\t.linkonce same_size\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ OS << "\t.linkonce same_contents\n";
+ break;
+ //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+ // with the .linkonce directive. For now, we treat it as an invalid
+ // comdat selection value.
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+ // OS << "\t.linkonce largest\n";
+ // break;
+ default:
+ assert (0 && "unsupported COFF selection type");
+ break;
+ }
+ }
+}
+
+bool MCSectionCOFF::UseCodeAlign() const {
+ return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+ return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
new file mode 100644
index 000000000000..0775cfa776d7
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -0,0 +1,150 @@
+//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionELF::~MCSectionELF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" ||
+ (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
+ return true;
+
+ return false;
+}
+
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName() << '\n';
+ return;
+ }
+
+ StringRef name = getSectionName();
+ if (name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
+ OS << "\t.section\t" << name;
+ } else {
+ OS << "\t.section\t\"";
+ for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
+ if (*b == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*b != '\\') // Neither " or backslash
+ OS << *b;
+ else if (b + 1 == e) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << b[0] << b[1]; // Quoted character
+ ++b;
+ }
+ }
+ OS << '"';
+ }
+
+ // Handle the weird solaris syntax if desired.
+ if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
+ !(Flags & ELF::SHF_MERGE)) {
+ if (Flags & ELF::SHF_ALLOC)
+ OS << ",#alloc";
+ if (Flags & ELF::SHF_EXECINSTR)
+ OS << ",#execinstr";
+ if (Flags & ELF::SHF_WRITE)
+ OS << ",#write";
+ if (Flags & ELF::SHF_TLS)
+ OS << ",#tls";
+ OS << '\n';
+ return;
+ }
+
+ OS << ",\"";
+ if (Flags & ELF::SHF_ALLOC)
+ OS << 'a';
+ if (Flags & ELF::SHF_EXECINSTR)
+ OS << 'x';
+ if (Flags & ELF::SHF_GROUP)
+ OS << 'G';
+ if (Flags & ELF::SHF_WRITE)
+ OS << 'w';
+ if (Flags & ELF::SHF_MERGE)
+ OS << 'M';
+ if (Flags & ELF::SHF_STRINGS)
+ OS << 'S';
+ if (Flags & ELF::SHF_TLS)
+ OS << 'T';
+
+ // If there are target-specific flags, print them.
+ if (Flags & ELF::XCORE_SHF_CP_SECTION)
+ OS << 'c';
+ if (Flags & ELF::XCORE_SHF_DP_SECTION)
+ OS << 'd';
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (MAI.getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ if (Type == ELF::SHT_INIT_ARRAY)
+ OS << "init_array";
+ else if (Type == ELF::SHT_FINI_ARRAY)
+ OS << "fini_array";
+ else if (Type == ELF::SHT_PREINIT_ARRAY)
+ OS << "preinit_array";
+ else if (Type == ELF::SHT_NOBITS)
+ OS << "nobits";
+ else if (Type == ELF::SHT_NOTE)
+ OS << "note";
+ else if (Type == ELF::SHT_PROGBITS)
+ OS << "progbits";
+
+ if (EntrySize) {
+ assert(Flags & ELF::SHF_MERGE);
+ OS << "," << EntrySize;
+ }
+
+ if (Flags & ELF::SHF_GROUP)
+ OS << "," << Group->getName() << ",comdat";
+ OS << '\n';
+}
+
+bool MCSectionELF::UseCodeAlign() const {
+ return getFlags() & ELF::SHF_EXECINSTR;
+}
+
+bool MCSectionELF::isVirtualSection() const {
+ return getType() == ELF::SHT_NOBITS;
+}
+
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString()) return 1;
+ if (Kind.isMergeable2ByteCString()) return 2;
+ if (Kind.isMergeable4ByteCString()) return 4;
+ if (Kind.isMergeableConst4()) return 4;
+ if (Kind.isMergeableConst8()) return 8;
+ if (Kind.isMergeableConst16()) return 16;
+ return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
new file mode 100644
index 000000000000..fc323155befa
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -0,0 +1,303 @@
+//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+/// SectionTypeDescriptors - These are strings that describe the various section
+/// types. This *must* be kept in order with and stay synchronized with the
+/// section type list.
+static const struct {
+ const char *AssemblerName, *EnumName;
+} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = {
+ { "regular", "S_REGULAR" }, // 0x00
+ { 0, "S_ZEROFILL" }, // 0x01
+ { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02
+ { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03
+ { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04
+ { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05
+ { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06
+ { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07
+ { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08
+ { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09
+ { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A
+ { "coalesced", "S_COALESCED" }, // 0x0B
+ { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C
+ { "interposing", "S_INTERPOSING" }, // 0x0D
+ { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E
+ { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
+ { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+ { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11
+ { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12
+ { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13
+ { "thread_local_variable_pointers",
+ "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14
+ { "thread_local_init_function_pointers",
+ "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15
+};
+
+
+/// SectionAttrDescriptors - This is an array of descriptors for section
+/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed
+/// by attribute, instead it is searched. The last entry has an AttrFlagEnd
+/// AttrFlag value.
+static const struct {
+ unsigned AttrFlag;
+ const char *AssemblerName, *EnumName;
+} SectionAttrDescriptors[] = {
+#define ENTRY(ASMNAME, ENUM) \
+ { MCSectionMachO::ENUM, ASMNAME, #ENUM },
+ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS)
+ENTRY("no_toc", S_ATTR_NO_TOC)
+ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS)
+ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP)
+ENTRY("live_support", S_ATTR_LIVE_SUPPORT)
+ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
+ENTRY("debug", S_ATTR_DEBUG)
+ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC)
+ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC)
+#undef ENTRY
+ { 0, "none", 0 }, // used if section has no attributes but has a stub size
+#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set
+ { AttrFlagEnd, 0, 0 }
+};
+
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+ unsigned TAA, unsigned reserved2, SectionKind K)
+ : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+ assert(Segment.size() <= 16 && Section.size() <= 16 &&
+ "Segment or section string too long");
+ for (unsigned i = 0; i != 16; ++i) {
+ if (i < Segment.size())
+ SegmentName[i] = Segment[i];
+ else
+ SegmentName[i] = 0;
+
+ if (i < Section.size())
+ SectionName[i] = Section[i];
+ else
+ SectionName[i] = 0;
+ }
+}
+
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+ OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
+
+ // Get the section type and attributes.
+ unsigned TAA = getTypeAndAttributes();
+ if (TAA == 0) {
+ OS << '\n';
+ return;
+ }
+
+ unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
+ assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
+ "Invalid SectionType specified!");
+
+ if (SectionTypeDescriptors[SectionType].AssemblerName) {
+ OS << ',';
+ OS << SectionTypeDescriptors[SectionType].AssemblerName;
+ } else {
+ // If we have no name for the attribute, stop here.
+ OS << '\n';
+ return;
+ }
+
+ // If we don't have any attributes, we're done.
+ unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
+ if (SectionAttrs == 0) {
+ // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
+ // the attribute specifier.
+ if (Reserved2 != 0)
+ OS << ",none," << Reserved2;
+ OS << '\n';
+ return;
+ }
+
+ // Check each attribute to see if we have it.
+ char Separator = ',';
+ for (unsigned i = 0;
+ SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag;
+ ++i) {
+ // Check to see if we have this attribute.
+ if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
+ continue;
+
+ // Yep, clear it and print it.
+ SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
+
+ OS << Separator;
+ if (SectionAttrDescriptors[i].AssemblerName)
+ OS << SectionAttrDescriptors[i].AssemblerName;
+ else
+ OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
+ Separator = '+';
+ }
+
+ assert(SectionAttrs == 0 && "Unknown section attributes!");
+
+ // If we have a S_SYMBOL_STUBS size specified, print it.
+ if (Reserved2 != 0)
+ OS << ',' << Reserved2;
+ OS << '\n';
+}
+
+bool MCSectionMachO::UseCodeAlign() const {
+ return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+ return (getType() == MCSectionMachO::S_ZEROFILL ||
+ getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
+/// StripSpaces - This removes leading and trailing spaces from the StringRef.
+static void StripSpaces(StringRef &Str) {
+ while (!Str.empty() && isspace(static_cast<unsigned char>(Str[0])))
+ Str = Str.substr(1);
+ while (!Str.empty() && isspace(static_cast<unsigned char>(Str.back())))
+ Str = Str.substr(0, Str.size()-1);
+}
+
+/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+/// This is a string that can appear after a .section directive in a mach-o
+/// flavored .s file. If successful, this fills in the specified Out
+/// parameters and returns an empty string. When an invalid section
+/// specifier is present, this returns a string indicating the problem.
+std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
+ StringRef &Segment, // Out.
+ StringRef &Section, // Out.
+ unsigned &TAA, // Out.
+ bool &TAAParsed, // Out.
+ unsigned &StubSize) { // Out.
+ TAAParsed = false;
+ // Find the first comma.
+ std::pair<StringRef, StringRef> Comma = Spec.split(',');
+
+ // If there is no comma, we fail.
+ if (Comma.second.empty())
+ return "mach-o section specifier requires a segment and section "
+ "separated by a comma";
+
+ // Capture segment, remove leading and trailing whitespace.
+ Segment = Comma.first;
+ StripSpaces(Segment);
+
+ // Verify that the segment is present and not too long.
+ if (Segment.empty() || Segment.size() > 16)
+ return "mach-o section specifier requires a segment whose length is "
+ "between 1 and 16 characters";
+
+ // Split the section name off from any attributes if present.
+ Comma = Comma.second.split(',');
+
+ // Capture section, remove leading and trailing whitespace.
+ Section = Comma.first;
+ StripSpaces(Section);
+
+ // Verify that the section is present and not too long.
+ if (Section.empty() || Section.size() > 16)
+ return "mach-o section specifier requires a section whose length is "
+ "between 1 and 16 characters";
+
+ // If there is no comma after the section, we're done.
+ TAA = 0;
+ StubSize = 0;
+ if (Comma.second.empty())
+ return "";
+
+ // Otherwise, we need to parse the section type and attributes.
+ Comma = Comma.second.split(',');
+
+ // Get the section type.
+ StringRef SectionType = Comma.first;
+ StripSpaces(SectionType);
+
+ // Figure out which section type it is.
+ unsigned TypeID;
+ for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
+ if (SectionTypeDescriptors[TypeID].AssemblerName &&
+ SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
+ break;
+
+ // If we didn't find the section type, reject it.
+ if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
+ return "mach-o section specifier uses an unknown section type";
+
+ // Remember the TypeID.
+ TAA = TypeID;
+ TAAParsed = true;
+
+ // If we have no comma after the section type, there are no attributes.
+ if (Comma.second.empty()) {
+ // S_SYMBOL_STUBS always require a symbol stub size specifier.
+ if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier of type 'symbol_stubs' requires a size "
+ "specifier";
+ return "";
+ }
+
+ // Otherwise, we do have some attributes. Split off the size specifier if
+ // present.
+ Comma = Comma.second.split(',');
+ StringRef Attrs = Comma.first;
+
+ // The attribute list is a '+' separated list of attributes.
+ std::pair<StringRef, StringRef> Plus = Attrs.split('+');
+
+ while (1) {
+ StringRef Attr = Plus.first;
+ StripSpaces(Attr);
+
+ // Look up the attribute.
+ for (unsigned i = 0; ; ++i) {
+ if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
+ return "mach-o section specifier has invalid attribute";
+
+ if (SectionAttrDescriptors[i].AssemblerName &&
+ Attr == SectionAttrDescriptors[i].AssemblerName) {
+ TAA |= SectionAttrDescriptors[i].AttrFlag;
+ break;
+ }
+ }
+
+ if (Plus.second.empty()) break;
+ Plus = Plus.second.split('+');
+ };
+
+ // Okay, we've parsed the section attributes, see if we have a stub size spec.
+ if (Comma.second.empty()) {
+ // S_SYMBOL_STUBS always require a symbol stub size specifier.
+ if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier of type 'symbol_stubs' requires a size "
+ "specifier";
+ return "";
+ }
+
+ // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS.
+ if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier cannot have a stub size specified because "
+ "it does not have type 'symbol_stubs'";
+
+ // Okay, if we do, it must be a number.
+ StringRef StubSizeStr = Comma.second;
+ StripSpaces(StubSizeStr);
+
+ // Convert the stub size from a string to an integer.
+ if (StubSizeStr.getAsInteger(0, StubSize))
+ return "mach-o section specifier has a malformed stub size";
+
+ return "";
+}
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
new file mode 100644
index 000000000000..d02e5535bde5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -0,0 +1,627 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
+ : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
+ CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
+ const MCSection *section = 0;
+ SectionStack.push_back(std::make_pair(section, section));
+}
+
+MCStreamer::~MCStreamer() {
+ for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
+ delete W64UnwindInfos[i];
+}
+
+void MCStreamer::reset() {
+ for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
+ delete W64UnwindInfos[i];
+ EmitEHFrame = true;
+ EmitDebugFrame = false;
+ CurrentW64UnwindInfo = 0;
+ LastSymbol = 0;
+ const MCSection *section = 0;
+ SectionStack.clear();
+ SectionStack.push_back(std::make_pair(section, section));
+}
+
+const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
+ const MCSymbol *A,
+ const MCSymbol *B) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *ARef =
+ MCSymbolRefExpr::Create(A, Variant, Context);
+ const MCExpr *BRef =
+ MCSymbolRefExpr::Create(B, Variant, Context);
+ const MCExpr *AddrDelta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ return AddrDelta;
+}
+
+const MCExpr *MCStreamer::ForceExpAbs(const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding() ||
+ isa<MCSymbolRefExpr>(Expr))
+ return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+raw_ostream &MCStreamer::GetCommentOS() {
+ // By default, discard comments.
+ return nulls();
+}
+
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+ const MCSymbol *Label, int PointerSize) {
+ // emit the sequence to set the address
+ EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+ EmitULEB128IntValue(PointerSize + 1);
+ EmitIntValue(dwarf::DW_LNE_set_address, 1);
+ EmitSymbolValue(Label, PointerSize);
+
+ // emit the sequence for the LineDelta (from 1) and a zero address delta.
+ MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
+
+/// EmitIntValue - Special case of EmitValue that avoids the client having to
+/// pass in a MCExpr for constant integers.
+void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(Size <= 8 && "Invalid size");
+ assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+ "Invalid size");
+ char buf[8];
+ const bool isLittleEndian = Context.getAsmInfo().isLittleEndian();
+ for (unsigned i = 0; i != Size; ++i) {
+ unsigned index = isLittleEndian ? i : (Size - i - 1);
+ buf[i] = uint8_t(Value >> (index * 8));
+ }
+ EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding,
+ unsigned AddrSpace) {
+ SmallString<128> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ encodeULEB128(Value, OSE, Padding);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+ SmallString<128> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ encodeSLEB128(Value, OSE);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ const MCExpr *ABS = ForceExpAbs(Value);
+ EmitValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(Value, Size, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size,
+ AddrSpace);
+}
+
+void MCStreamer::EmitGPRel64Value(const MCExpr *Value) {
+ report_fatal_error("unsupported directive in streamer");
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ report_fatal_error("unsupported directive in streamer");
+}
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ const MCExpr *E = MCConstantExpr::Create(FillValue, getContext());
+ for (uint64_t i = 0, e = NumBytes; i != e; ++i)
+ EmitValue(E, 1, AddrSpace);
+}
+
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Directory,
+ StringRef Filename, unsigned CUID) {
+ return getContext().GetDwarfFile(Directory, Filename, FileNo, CUID) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator,
+ StringRef FileName) {
+ getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+ Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+ if (FrameInfos.empty())
+ return 0;
+ return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (!CurFrame || CurFrame->End)
+ report_fatal_error("No open frame");
+}
+
+void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+}
+
+void MCStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
+ LastSymbol = Symbol;
+}
+
+void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
+ LastSymbol = Symbol;
+}
+
+void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->CompactUnwindEncoding = CompactUnwindEncoding;
+}
+
+void MCStreamer::EmitCFISections(bool EH, bool Debug) {
+ assert(EH || Debug);
+ EmitEHFrame = EH;
+ EmitDebugFrame = Debug;
+}
+
+void MCStreamer::EmitCFIStartProc() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (CurFrame && !CurFrame->End)
+ report_fatal_error("Starting a frame before finishing the previous one!");
+
+ MCDwarfFrameInfo Frame;
+ EmitCFIStartProcImpl(Frame);
+
+ FrameInfos.push_back(Frame);
+}
+
+void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) {
+ Frame.Function = LastSymbol;
+ // If the function is externally visible, we need to create a local
+ // symbol to avoid relocations.
+ StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
+ if (LastSymbol && LastSymbol->getName().startswith(Prefix)) {
+ Frame.Begin = LastSymbol;
+ } else {
+ Frame.Begin = getContext().CreateTempSymbol();
+ EmitLabel(Frame.Begin);
+ }
+}
+
+void MCStreamer::EmitCFIEndProc() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ EmitCFIEndProcImpl(*CurFrame);
+}
+
+void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
+ Frame.End = getContext().CreateTempSymbol();
+ EmitLabel(Frame.End);
+}
+
+MCSymbol *MCStreamer::EmitCFICommon() {
+ EnsureValidFrame();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ return Label;
+}
+
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfa(Label, Register, Offset);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfaOffset(Label, Offset);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfaRegister(Label, Register);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createOffset(Label, Register, Offset);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRelOffset(Label, Register, Offset);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Personality = Sym;
+ CurFrame->PersonalityEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Lsda = Sym;
+ CurFrame->LsdaEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFIRememberState() {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRestoreState() {
+ // FIXME: Error if there is no matching cfi_remember_state.
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISameValue(int64_t Register) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createSameValue(Label, Register);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRestore(int64_t Register) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRestore(Label, Register);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIEscape(StringRef Values) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISignalFrame() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->IsSignalFrame = true;
+}
+
+void MCStreamer::EmitCFIUndefined(int64_t Register) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createUndefined(Label, Register);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRegister(Label, Register1, Register2);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
+ W64UnwindInfos.push_back(Frame);
+ CurrentW64UnwindInfo = W64UnwindInfos.back();
+}
+
+void MCStreamer::EnsureValidW64UnwindInfo() {
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (!CurFrame || CurFrame->End)
+ report_fatal_error("No open Win64 EH frame function!");
+}
+
+void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame && !CurFrame->End)
+ report_fatal_error("Starting a function before ending the previous one!");
+ MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+ Frame->Begin = getContext().CreateTempSymbol();
+ Frame->Function = Symbol;
+ EmitLabel(Frame->Begin);
+ setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndProc() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Not all chained regions terminated!");
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+}
+
+void MCStreamer::EmitWin64EHStartChained() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ Frame->Begin = getContext().CreateTempSymbol();
+ Frame->Function = CurFrame->Function;
+ Frame->ChainedParent = CurFrame;
+ EmitLabel(Frame->Begin);
+ setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndChained() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (!CurFrame->ChainedParent)
+ report_fatal_error("End of a chained region outside a chained region!");
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+ CurrentW64UnwindInfo = CurFrame->ChainedParent;
+}
+
+void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Chained unwind areas can't have handlers!");
+ CurFrame->ExceptionHandler = Sym;
+ if (!Except && !Unwind)
+ report_fatal_error("Don't know what kind of handler this is!");
+ if (Unwind)
+ CurFrame->HandlesUnwind = true;
+ if (Except)
+ CurFrame->HandlesExceptions = true;
+}
+
+void MCStreamer::EmitWin64EHHandlerData() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Chained unwind areas can't have handlers!");
+}
+
+void MCStreamer::EmitWin64EHPushReg(unsigned Register) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Win64EH::UOP_PushNonVol, Label, Register);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->LastFrameInst >= 0)
+ report_fatal_error("Frame register and offset already specified!");
+ if (Offset & 0x0F)
+ report_fatal_error("Misaligned frame pointer offset!");
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
+ CurFrame->LastFrameInst = CurFrame->Instructions.size();
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHAllocStack(unsigned Size) {
+ EnsureValidW64UnwindInfo();
+ if (Size & 7)
+ report_fatal_error("Misaligned stack allocation!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Label, Size);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ if (Offset & 7)
+ report_fatal_error("Misaligned saved register offset!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(
+ Offset > 512*1024-8 ? Win64EH::UOP_SaveNonVolBig : Win64EH::UOP_SaveNonVol,
+ Label, Register, Offset);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ if (Offset & 0x0F)
+ report_fatal_error("Misaligned saved vector register offset!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(
+ Offset > 512*1024-16 ? Win64EH::UOP_SaveXMM128Big : Win64EH::UOP_SaveXMM128,
+ Label, Register, Offset);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHPushFrame(bool Code) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->Instructions.size() > 0)
+ report_fatal_error("If present, PushMachFrame must be the first UOP");
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Win64EH::UOP_PushMachFrame, Label, Code);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHEndProlog() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ CurFrame->PrologEnd = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->PrologEnd);
+}
+
+void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+ llvm_unreachable("This file format doesn't support this directive");
+}
+
+void MCStreamer::EmitFnStart() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitFnEnd() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitCantUnwind() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitHandlerData() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitPad(int64_t Offset) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitTCEntry(const MCSymbol &S) {
+ llvm_unreachable("Unsupported method");
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCStreamer::EmitRawText(StringRef String) {
+ errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
+ " something must not be fully mc'ized\n";
+ abort();
+}
+
+void MCStreamer::EmitRawText(const Twine &T) {
+ SmallString<128> Str;
+ T.toVector(Str);
+ EmitRawText(Str.str());
+}
+
+void MCStreamer::EmitFrames(bool usingCFI) {
+ if (!getNumFrameInfos())
+ return;
+
+ if (EmitEHFrame)
+ MCDwarfFrameEmitter::Emit(*this, usingCFI, true);
+
+ if (EmitDebugFrame)
+ MCDwarfFrameEmitter::Emit(*this, usingCFI, false);
+}
+
+void MCStreamer::EmitW64Tables() {
+ if (!getNumW64UnwindInfos())
+ return;
+
+ MCWin64EHUnwindEmitter::Emit(*this);
+}
+
+void MCStreamer::Finish() {
+ if (!FrameInfos.empty() && !FrameInfos.back().End)
+ report_fatal_error("Unfinished frame!");
+
+ FinishImpl();
+}
+
+MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
+ report_fatal_error("Not supported!");
+ return *(static_cast<MCSymbolData*>(0));
+}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
new file mode 100644
index 000000000000..f18828dd41ef
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -0,0 +1,118 @@
+//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors.
+
+/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented
+/// with feature string). Recompute feature bits and scheduling model.
+void
+MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
+ SubtargetFeatures Features(FS);
+ FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+ ProcFeatures, NumFeatures);
+
+ if (!CPU.empty())
+ CPUSchedModel = getSchedModelForCPU(CPU);
+ else
+ CPUSchedModel = &MCSchedModel::DefaultSchedModel;
+}
+
+void
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+ const SubtargetFeatureKV *PF,
+ const SubtargetFeatureKV *PD,
+ const SubtargetInfoKV *ProcSched,
+ const MCWriteProcResEntry *WPR,
+ const MCWriteLatencyEntry *WL,
+ const MCReadAdvanceEntry *RA,
+ const InstrStage *IS,
+ const unsigned *OC,
+ const unsigned *FP,
+ unsigned NF, unsigned NP) {
+ TargetTriple = TT;
+ ProcFeatures = PF;
+ ProcDesc = PD;
+ ProcSchedModels = ProcSched;
+ WriteProcResTable = WPR;
+ WriteLatencyTable = WL;
+ ReadAdvanceTable = RA;
+
+ Stages = IS;
+ OperandCycles = OC;
+ ForwardingPaths = FP;
+ NumFeatures = NF;
+ NumProcs = NP;
+
+ InitMCProcessorInfo(CPU, FS);
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version does not change the implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+ FeatureBits ^= FB;
+ return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version will also change all implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+ SubtargetFeatures Features;
+ FeatureBits = Features.ToggleFeature(FeatureBits, FS,
+ ProcFeatures, NumFeatures);
+ return FeatureBits;
+}
+
+
+const MCSchedModel *
+MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
+ assert(ProcSchedModels && "Processor machine model not available!");
+
+#ifndef NDEBUG
+ for (size_t i = 1; i < NumProcs; i++) {
+ assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 &&
+ "Processor machine model table is not sorted");
+ }
+#endif
+
+ // Find entry
+ SubtargetInfoKV KV;
+ KV.Key = CPU.data();
+ const SubtargetInfoKV *Found =
+ std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, KV);
+ if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
+ errs() << "'" << CPU
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ return &MCSchedModel::DefaultSchedModel;
+ }
+ assert(Found->Value && "Missing processor SchedModel value");
+ return (const MCSchedModel *)Found->Value;
+}
+
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+ const MCSchedModel *SchedModel = getSchedModelForCPU(CPU);
+ return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths);
+}
+
+/// Initialize an InstrItineraryData instance.
+void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
+ InstrItins =
+ InstrItineraryData(CPUSchedModel, Stages, OperandCycles, ForwardingPaths);
+}
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
new file mode 100644
index 000000000000..b973c57f7b81
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -0,0 +1,83 @@
+//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Sentinel value for the absolute pseudo section.
+const MCSection *MCSymbol::AbsolutePseudoSection =
+ reinterpret_cast<const MCSection *>(1);
+
+static bool isAcceptableChar(char C) {
+ if ((C < 'a' || C > 'z') &&
+ (C < 'A' || C > 'Z') &&
+ (C < '0' || C > '9') &&
+ C != '_' && C != '$' && C != '.' && C != '@')
+ return false;
+ return true;
+}
+
+/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be
+/// syntactically correct.
+static bool NameNeedsQuoting(StringRef Str) {
+ assert(!Str.empty() && "Cannot create an empty MCSymbol");
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ if (!isAcceptableChar(Str[i]))
+ return true;
+ return false;
+}
+
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+ const MCSymbol *S = this;
+ while (S->isVariable()) {
+ const MCExpr *Value = S->getVariableValue();
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return *S;
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+ S = &Ref->getSymbol();
+ }
+ return *S;
+}
+
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+ assert(!IsUsed && "Cannot set a variable that has already been used.");
+ assert(Value && "Invalid variable value!");
+ this->Value = Value;
+
+ // Variables should always be marked as in the same "section" as the value.
+ const MCSection *Section = Value->FindAssociatedSection();
+ if (Section)
+ setSection(*Section);
+ else
+ setUndefined();
+}
+
+void MCSymbol::print(raw_ostream &OS) const {
+ // The name for this MCSymbol is required to be a valid target name. However,
+ // some targets support quoting names with funny characters. If the name
+ // contains a funny character, then print it quoted.
+ if (!NameNeedsQuoting(getName())) {
+ OS << getName();
+ return;
+ }
+
+ OS << '"' << getName() << '"';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCSymbol::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp
new file mode 100644
index 000000000000..4393777211e8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCValue.cpp
@@ -0,0 +1,38 @@
+//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ if (isAbsolute()) {
+ OS << getConstant();
+ return;
+ }
+
+ getSymA()->print(OS);
+
+ if (getSymB()) {
+ OS << " - ";
+ getSymB()->print(OS);
+ }
+
+ if (getConstant())
+ OS << " + " << getConstant();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCValue::dump() const {
+ print(dbgs(), 0);
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
new file mode 100644
index 000000000000..c5b637c92443
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -0,0 +1,278 @@
+//===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+
+namespace llvm {
+
+// NOTE: All relocations generated here are 4-byte image-relative.
+
+static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &instArray){
+ uint8_t count = 0;
+ for (std::vector<MCWin64EHInstruction>::const_iterator I = instArray.begin(),
+ E = instArray.end(); I != E; ++I) {
+ switch (I->getOperation()) {
+ case Win64EH::UOP_PushNonVol:
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_SetFPReg:
+ case Win64EH::UOP_PushMachFrame:
+ count += 1;
+ break;
+ case Win64EH::UOP_SaveNonVol:
+ case Win64EH::UOP_SaveXMM128:
+ count += 2;
+ break;
+ case Win64EH::UOP_SaveNonVolBig:
+ case Win64EH::UOP_SaveXMM128Big:
+ count += 3;
+ break;
+ case Win64EH::UOP_AllocLarge:
+ if (I->getSize() > 512*1024-8)
+ count += 3;
+ else
+ count += 2;
+ break;
+ }
+ }
+ return count;
+}
+
+static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
+ MCSymbol *rhs) {
+ MCContext &context = streamer.getContext();
+ const MCExpr *diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(
+ lhs, context),
+ MCSymbolRefExpr::Create(
+ rhs, context),
+ context);
+ streamer.EmitAbsValue(diff, 1);
+
+}
+
+static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
+ MCWin64EHInstruction &inst) {
+ uint8_t b1, b2;
+ uint16_t w;
+ b2 = (inst.getOperation() & 0x0F);
+ switch (inst.getOperation()) {
+ case Win64EH::UOP_PushNonVol:
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_AllocLarge:
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ if (inst.getSize() > 512*1024-8) {
+ b2 |= 0x10;
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getSize() & 0xFFF8;
+ streamer.EmitIntValue(w, 2);
+ w = inst.getSize() >> 16;
+ } else {
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getSize() >> 3;
+ }
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_AllocSmall:
+ b2 |= (((inst.getSize()-8) >> 3) & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_SetFPReg:
+ b1 = inst.getOffset() & 0xF0;
+ streamer.EmitIntValue(b1, 1);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_SaveNonVol:
+ case Win64EH::UOP_SaveXMM128:
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getOffset() >> 3;
+ if (inst.getOperation() == Win64EH::UOP_SaveXMM128)
+ w >>= 1;
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_SaveNonVolBig:
+ case Win64EH::UOP_SaveXMM128Big:
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ if (inst.getOperation() == Win64EH::UOP_SaveXMM128Big)
+ w = inst.getOffset() & 0xFFF0;
+ else
+ w = inst.getOffset() & 0xFFF8;
+ streamer.EmitIntValue(w, 2);
+ w = inst.getOffset() >> 16;
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_PushMachFrame:
+ if (inst.isPushCodeFrame())
+ b2 |= 0x10;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ }
+}
+
+static void EmitRuntimeFunction(MCStreamer &streamer,
+ const MCWin64EHUnwindInfo *info) {
+ MCContext &context = streamer.getContext();
+
+ streamer.EmitValueToAlignment(4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->Begin, context), 4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->End, context), 4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, context), 4);
+}
+
+static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
+ // If this UNWIND_INFO already has a symbol, it's already been emitted.
+ if (info->Symbol) return;
+
+ MCContext &context = streamer.getContext();
+ streamer.EmitValueToAlignment(4);
+ // Upper 3 bits are the version number (currently 1).
+ uint8_t flags = 0x01;
+ info->Symbol = context.CreateTempSymbol();
+ streamer.EmitLabel(info->Symbol);
+
+ if (info->ChainedParent)
+ flags |= Win64EH::UNW_ChainInfo << 3;
+ else {
+ if (info->HandlesUnwind)
+ flags |= Win64EH::UNW_TerminateHandler << 3;
+ if (info->HandlesExceptions)
+ flags |= Win64EH::UNW_ExceptionHandler << 3;
+ }
+ streamer.EmitIntValue(flags, 1);
+
+ if (info->PrologEnd)
+ EmitAbsDifference(streamer, info->PrologEnd, info->Begin);
+ else
+ streamer.EmitIntValue(0, 1);
+
+ uint8_t numCodes = CountOfUnwindCodes(info->Instructions);
+ streamer.EmitIntValue(numCodes, 1);
+
+ uint8_t frame = 0;
+ if (info->LastFrameInst >= 0) {
+ MCWin64EHInstruction &frameInst = info->Instructions[info->LastFrameInst];
+ assert(frameInst.getOperation() == Win64EH::UOP_SetFPReg);
+ frame = (frameInst.getRegister() & 0x0F) |
+ (frameInst.getOffset() & 0xF0);
+ }
+ streamer.EmitIntValue(frame, 1);
+
+ // Emit unwind instructions (in reverse order).
+ uint8_t numInst = info->Instructions.size();
+ for (uint8_t c = 0; c < numInst; ++c) {
+ MCWin64EHInstruction inst = info->Instructions.back();
+ info->Instructions.pop_back();
+ EmitUnwindCode(streamer, info->Begin, inst);
+ }
+
+ if (flags & (Win64EH::UNW_ChainInfo << 3))
+ EmitRuntimeFunction(streamer, info->ChainedParent);
+ else if (flags &
+ ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3))
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, context),
+ 4);
+ else if (numCodes < 2) {
+ // The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not
+ // a chained unwind info, if there is no handler, and if there are fewer
+ // than 2 slots used in the unwind code array, we have to pad to 8 bytes.
+ if (numCodes == 1)
+ streamer.EmitIntValue(0, 2);
+ else
+ streamer.EmitIntValue(0, 4);
+ }
+}
+
+StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) {
+ if (!func || !func->isInSection()) return "";
+ const MCSection *section = &func->getSection();
+ const MCSectionCOFF *COFFSection;
+ if ((COFFSection = dyn_cast<MCSectionCOFF>(section))) {
+ StringRef name = COFFSection->getSectionName();
+ size_t dollar = name.find('$');
+ size_t dot = name.find('.', 1);
+ if (dollar == StringRef::npos && dot == StringRef::npos)
+ return "";
+ if (dot == StringRef::npos)
+ return name.substr(dollar);
+ if (dollar == StringRef::npos || dot < dollar)
+ return name.substr(dot);
+ return name.substr(dollar);
+ }
+ return "";
+}
+
+static const MCSection *getWin64EHTableSection(StringRef suffix,
+ MCContext &context) {
+ if (suffix == "")
+ return context.getObjectFileInfo()->getXDataSection();
+
+ return context.getCOFFSection((".xdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+}
+
+static const MCSection *getWin64EHFuncTableSection(StringRef suffix,
+ MCContext &context) {
+ if (suffix == "")
+ return context.getObjectFileInfo()->getPDataSection();
+ return context.getCOFFSection((".pdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+}
+
+void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
+ MCWin64EHUnwindInfo *info) {
+ // Switch sections (the static function above is meant to be called from
+ // here and from Emit().
+ MCContext &context = streamer.getContext();
+ const MCSection *xdataSect =
+ getWin64EHTableSection(GetSectionSuffix(info->Function), context);
+ streamer.SwitchSection(xdataSect);
+
+ llvm::EmitUnwindInfo(streamer, info);
+}
+
+void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
+ MCContext &context = streamer.getContext();
+ // Emit the unwind info structs first.
+ for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+ MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+ const MCSection *xdataSect =
+ getWin64EHTableSection(GetSectionSuffix(info.Function), context);
+ streamer.SwitchSection(xdataSect);
+ llvm::EmitUnwindInfo(streamer, &info);
+ }
+ // Now emit RUNTIME_FUNCTION entries.
+ for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+ MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+ const MCSection *pdataSect =
+ getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context);
+ streamer.SwitchSection(pdataSect);
+ EmitRuntimeFunction(streamer, &info);
+ }
+}
+
+} // End of namespace llvm
+
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 000000000000..a5ba3c36532a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,939 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+
+void MachObjectWriter::reset() {
+ Relocations.clear();
+ IndirectSymBase.clear();
+ StringTable.clear();
+ LocalSymbolData.clear();
+ ExternalSymbolData.clear();
+ UndefinedSymbolData.clear();
+ MCObjectWriter::reset();
+}
+
+bool MachObjectWriter::
+doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
+ // Undefined symbols are always extern.
+ if (SD->Symbol->isUndefined())
+ return true;
+
+ // References to weak definitions require external relocation entries; the
+ // definition may not always be the one in the same object file.
+ if (SD->getFlags() & SF_WeakDefinition)
+ return true;
+
+ // Otherwise, we can use an internal relocation.
+ return false;
+}
+
+bool MachObjectWriter::
+MachSymbolData::operator<(const MachSymbolData &RHS) const {
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+}
+
+bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+ (MCFixupKind) Kind);
+
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
+ const MCAsmLayout &Layout) const {
+ return getSectionAddress(Fragment->getParent()) +
+ Layout.getFragmentOffset(Fragment);
+}
+
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+ const MCAsmLayout &Layout) const {
+ const MCSymbol &S = SD->getSymbol();
+
+ // If this is a variable, then recursively evaluate now.
+ if (S.isVariable()) {
+ if (const MCConstantExpr *C =
+ dyn_cast<const MCConstantExpr>(S.getVariableValue()))
+ return C->getValue();
+
+
+ MCValue Target;
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ // Verify that any used symbols are defined.
+ if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymA()->getSymbol().getName() + "'");
+ if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymB()->getSymbol().getName() + "'");
+
+ uint64_t Address = Target.getConstant();
+ if (Target.getSymA())
+ Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+ Target.getSymA()->getSymbol()), Layout);
+ if (Target.getSymB())
+ Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+ Target.getSymB()->getSymbol()), Layout);
+ return Address;
+ }
+
+ return getSectionAddress(SD->getFragment()->getParent()) +
+ Layout.getSymbolOffset(SD);
+}
+
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+ const MCAsmLayout &Layout) const {
+ uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+ unsigned Next = SD->getLayoutOrder() + 1;
+ if (Next >= Layout.getSectionOrder().size())
+ return 0;
+
+ const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+ if (NextSD.getSection().isVirtualSection())
+ return 0;
+ return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
+
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+ unsigned LoadCommandsSize,
+ bool SubsectionsViaSymbols) {
+ uint32_t Flags = 0;
+
+ if (SubsectionsViaSymbols)
+ Flags |= macho::HF_SubsectionsViaSymbols;
+
+ // struct mach_header (28 bytes) or
+ // struct mach_header_64 (32 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+ Write32(TargetObjectWriter->getCPUType());
+ Write32(TargetObjectWriter->getCPUSubtype());
+
+ Write32(macho::HFT_Object);
+ Write32(NumLoadCommands);
+ Write32(LoadCommandsSize);
+ Write32(Flags);
+ if (is64Bit())
+ Write32(0); // reserved
+
+ assert(OS.tell() - Start ==
+ (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
+
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \param NumSections The number of sections in this segment.
+/// \param SectionDataSize The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+ uint64_t VMSize,
+ uint64_t SectionDataStartOffset,
+ uint64_t SectionDataSize) {
+ // struct segment_command (56 bytes) or
+ // struct segment_command_64 (72 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ unsigned SegmentLoadCommandSize =
+ is64Bit() ? macho::SegmentLoadCommand64Size:
+ macho::SegmentLoadCommand32Size;
+ Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+ Write32(SegmentLoadCommandSize +
+ NumSections * (is64Bit() ? macho::Section64Size :
+ macho::Section32Size));
+
+ WriteBytes("", 16);
+ if (is64Bit()) {
+ Write64(0); // vmaddr
+ Write64(VMSize); // vmsize
+ Write64(SectionDataStartOffset); // file offset
+ Write64(SectionDataSize); // file size
+ } else {
+ Write32(0); // vmaddr
+ Write32(VMSize); // vmsize
+ Write32(SectionDataStartOffset); // file offset
+ Write32(SectionDataSize); // file size
+ }
+ Write32(0x7); // maxprot
+ Write32(0x7); // initprot
+ Write32(NumSections);
+ Write32(0); // flags
+
+ assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
+
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionData &SD,
+ uint64_t FileOffset,
+ uint64_t RelocationsStart,
+ unsigned NumRelocations) {
+ uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+ // The offset is unused for virtual sections.
+ if (SD.getSection().isVirtualSection()) {
+ assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+ FileOffset = 0;
+ }
+
+ // struct section (68 bytes) or
+ // struct section_64 (80 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+ WriteBytes(Section.getSectionName(), 16);
+ WriteBytes(Section.getSegmentName(), 16);
+ if (is64Bit()) {
+ Write64(getSectionAddress(&SD)); // address
+ Write64(SectionSize); // size
+ } else {
+ Write32(getSectionAddress(&SD)); // address
+ Write32(SectionSize); // size
+ }
+ Write32(FileOffset);
+
+ unsigned Flags = Section.getTypeAndAttributes();
+ if (SD.hasInstructions())
+ Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+ assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+ Write32(Log2_32(SD.getAlignment()));
+ Write32(NumRelocations ? RelocationsStart : 0);
+ Write32(NumRelocations);
+ Write32(Flags);
+ Write32(IndirectSymBase.lookup(&SD)); // reserved1
+ Write32(Section.getStubSize()); // reserved2
+ if (is64Bit())
+ Write32(0); // reserved3
+
+ assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+ macho::Section32Size));
+}
+
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+ uint32_t NumSymbols,
+ uint32_t StringTableOffset,
+ uint32_t StringTableSize) {
+ // struct symtab_command (24 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(macho::LCT_Symtab);
+ Write32(macho::SymtabLoadCommandSize);
+ Write32(SymbolOffset);
+ Write32(NumSymbols);
+ Write32(StringTableOffset);
+ Write32(StringTableSize);
+
+ assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+ uint32_t NumLocalSymbols,
+ uint32_t FirstExternalSymbol,
+ uint32_t NumExternalSymbols,
+ uint32_t FirstUndefinedSymbol,
+ uint32_t NumUndefinedSymbols,
+ uint32_t IndirectSymbolOffset,
+ uint32_t NumIndirectSymbols) {
+ // struct dysymtab_command (80 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(macho::LCT_Dysymtab);
+ Write32(macho::DysymtabLoadCommandSize);
+ Write32(FirstLocalSymbol);
+ Write32(NumLocalSymbols);
+ Write32(FirstExternalSymbol);
+ Write32(NumExternalSymbols);
+ Write32(FirstUndefinedSymbol);
+ Write32(NumUndefinedSymbols);
+ Write32(0); // tocoff
+ Write32(0); // ntoc
+ Write32(0); // modtaboff
+ Write32(0); // nmodtab
+ Write32(0); // extrefsymoff
+ Write32(0); // nextrefsyms
+ Write32(IndirectSymbolOffset);
+ Write32(NumIndirectSymbols);
+ Write32(0); // extreloff
+ Write32(0); // nextrel
+ Write32(0); // locreloff
+ Write32(0); // nlocrel
+
+ assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &Data = *MSD.SymbolData;
+ const MCSymbol &Symbol = Data.getSymbol();
+ uint8_t Type = 0;
+ uint16_t Flags = Data.getFlags();
+ uint64_t Address = 0;
+
+ // Set the N_TYPE bits. See <mach-o/nlist.h>.
+ //
+ // FIXME: Are the prebound or indirect fields possible here?
+ if (Symbol.isUndefined())
+ Type = macho::STT_Undefined;
+ else if (Symbol.isAbsolute())
+ Type = macho::STT_Absolute;
+ else
+ Type = macho::STT_Section;
+
+ // FIXME: Set STAB bits.
+
+ if (Data.isPrivateExtern())
+ Type |= macho::STF_PrivateExtern;
+
+ // Set external bit.
+ if (Data.isExternal() || Symbol.isUndefined())
+ Type |= macho::STF_External;
+
+ // Compute the symbol address.
+ if (Symbol.isDefined()) {
+ Address = getSymbolAddress(&Data, Layout);
+ } else if (Data.isCommon()) {
+ // Common symbols are encoded with the size in the address
+ // field, and their alignment in the flags.
+ Address = Data.getCommonSize();
+
+ // Common alignment is packed into the 'desc' bits.
+ if (unsigned Align = Data.getCommonAlignment()) {
+ unsigned Log2Size = Log2_32(Align);
+ assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+ if (Log2Size > 15)
+ report_fatal_error("invalid 'common' alignment '" +
+ Twine(Align) + "'");
+ // FIXME: Keep this mask with the SymbolFlags enumeration.
+ Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+ }
+ }
+
+ // struct nlist (12 bytes)
+
+ Write32(MSD.StringIndex);
+ Write8(Type);
+ Write8(MSD.SectionIndex);
+
+ // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+ // value.
+ Write16(Flags);
+ if (is64Bit())
+ Write64(Address);
+ else
+ Write32(Address);
+}
+
+void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type,
+ uint32_t DataOffset,
+ uint32_t DataSize) {
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(Type);
+ Write32(macho::LinkeditLoadCommandSize);
+ Write32(DataOffset);
+ Write32(DataSize);
+
+ assert(OS.tell() - Start == macho::LinkeditLoadCommandSize);
+}
+
+static unsigned ComputeLinkerOptionsLoadCommandSize(
+ const std::vector<std::string> &Options, bool is64Bit)
+{
+ unsigned Size = sizeof(macho::LinkerOptionsLoadCommand);
+ for (unsigned i = 0, e = Options.size(); i != e; ++i)
+ Size += Options[i].size() + 1;
+ return RoundUpToAlignment(Size, is64Bit ? 8 : 4);
+}
+
+void MachObjectWriter::WriteLinkerOptionsLoadCommand(
+ const std::vector<std::string> &Options)
+{
+ unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(macho::LCT_LinkerOptions);
+ Write32(Size);
+ Write32(Options.size());
+ uint64_t BytesWritten = sizeof(macho::LinkerOptionsLoadCommand);
+ for (unsigned i = 0, e = Options.size(); i != e; ++i) {
+ // Write each string, including the null byte.
+ const std::string &Option = Options[i];
+ WriteBytes(Option.c_str(), Option.size() + 1);
+ BytesWritten += Option.size() + 1;
+ }
+
+ // Pad to a multiple of the pointer size.
+ WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
+
+ assert(OS.tell() - Start == Size);
+}
+
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+}
+
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+ // This is the point where 'as' creates actual symbols for indirect symbols
+ // (in the following two passes). It would be easier for us to do this sooner
+ // when we see the attribute, but that makes getting the order in the symbol
+ // table much more complicated than it is worth.
+ //
+ // FIXME: Revisit this when the dust settles.
+
+ // Bind non lazy symbol pointers first.
+ unsigned IndirectIndex = 0;
+ for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+ const MCSectionMachO &Section =
+ cast<MCSectionMachO>(it->SectionData->getSection());
+
+ if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+ continue;
+
+ // Initialize the section indirect symbol base, if necessary.
+ IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex));
+
+ Asm.getOrCreateSymbolData(*it->Symbol);
+ }
+
+ // Then lazy symbol pointers and symbol stubs.
+ IndirectIndex = 0;
+ for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+ const MCSectionMachO &Section =
+ cast<MCSectionMachO>(it->SectionData->getSection());
+
+ if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+ Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+ continue;
+
+ // Initialize the section indirect symbol base, if necessary.
+ IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex));
+
+ // Set the symbol type to undefined lazy, but only on construction.
+ //
+ // FIXME: Do not hardcode.
+ bool Created;
+ MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+ if (Created)
+ Entry.setFlags(Entry.getFlags() | 0x0001);
+ }
+}
+
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+ std::vector<MachSymbolData> &LocalSymbolData,
+ std::vector<MachSymbolData> &ExternalSymbolData,
+ std::vector<MachSymbolData> &UndefinedSymbolData) {
+ // Build section lookup table.
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index)
+ SectionIndexMap[&it->getSection()] = Index;
+ assert(Index <= 256 && "Too many sections!");
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // Build the symbol arrays and the string table, but only for non-local
+ // symbols.
+ //
+ // The particular order that we collect the symbols and create the string
+ // table, then sort the symbols is chosen to match 'as'. Even though it
+ // doesn't matter for correctness, this is important for letting us diff .o
+ // files.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ continue;
+
+ if (!it->isExternal() && !Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ MachSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isUndefined()) {
+ MSD.SectionIndex = 0;
+ UndefinedSymbolData.push_back(MSD);
+ } else if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = 0;
+ ExternalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ ExternalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Now add the data for local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ continue;
+
+ if (it->isExternal() || Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ MachSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = 0;
+ LocalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ LocalSymbolData.push_back(MSD);
+ }
+ }
+
+ // External and undefined symbols are required to be in lexicographic order.
+ std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices.
+ Index = 0;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+ // The string table is padded to a multiple of 4.
+ while (StringTable.size() % 4)
+ StringTable += '\x00';
+}
+
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t StartAddress = 0;
+ const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+ for (int i = 0, n = Order.size(); i != n ; ++i) {
+ const MCSectionData *SD = Order[i];
+ StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+ SectionAddress[SD] = StartAddress;
+ StartAddress += Layout.getSectionAddressSize(SD);
+
+ // Explicitly pad the section to match the alignment requirements of the
+ // following one. This is for 'gas' compatibility, it shouldn't
+ /// strictly be necessary.
+ StartAddress += getPaddingSize(SD, Layout);
+ }
+}
+
+void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ for (MCAssembler::symbol_iterator i = Asm.symbol_begin(),
+ e = Asm.symbol_end();
+ i != e; ++i) {
+ MCSymbolData &SD = *i;
+ if (!SD.getSymbol().isVariable())
+ continue;
+
+ // Is the variable is a symbol difference (SA - SB + C) expression,
+ // and neither symbol is external, mark the variable as absolute.
+ const MCExpr *Expr = SD.getSymbol().getVariableValue();
+ MCValue Value;
+ if (Expr->EvaluateAsRelocatable(Value, Layout)) {
+ if (Value.getSymA() && Value.getSymB())
+ const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute();
+ }
+ }
+}
+
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ computeSectionAddresses(Asm, Layout);
+
+ // Create symbol data for any indirect symbols.
+ BindIndirectSymbols(Asm);
+
+ // Mark symbol difference expressions in variables (from .set or = directives)
+ // as absolute.
+ markAbsoluteVariableSymbols(Asm, Layout);
+
+ // Compute symbol table information and bind symbol indices.
+ ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+ UndefinedSymbolData);
+}
+
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ if (InSet)
+ return true;
+
+ // The effective address is
+ // addr(atom(A)) + offset(A)
+ // - addr(atom(B)) - offset(B)
+ // and the offsets are not relocatable, so the fixup is fully resolved when
+ // addr(atom(A)) - addr(atom(B)) == 0.
+ const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+ const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+ const MCSection &SecA = SA.getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+
+ if (IsPCRel) {
+ // The simple (Darwin, except on x86_64) way of dealing with this was to
+ // assume that any reference to a temporary symbol *must* be a temporary
+ // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+ // relocation to a temporary symbol (in the same section) is fully
+ // resolved. This also works in conjunction with absolutized .set, which
+ // requires the compiler to use .set to absolutize the differences between
+ // symbols which the compiler knows to be assembly time constants, so we
+ // don't need to worry about considering symbol differences fully resolved.
+ //
+ // If the file isn't using sub-sections-via-symbols, we can make the
+ // same assumptions about any symbol that we normally make about
+ // assembler locals.
+
+ if (!Asm.getBackend().hasReliableSymbolDifference()) {
+ if (!SA.isInSection() || &SecA != &SecB ||
+ (!SA.isTemporary() &&
+ FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() &&
+ Asm.getSubsectionsViaSymbols()))
+ return false;
+ return true;
+ }
+ // For Darwin x86_64, there is one special case when the reference IsPCRel.
+ // If the fragment with the reference does not have a base symbol but meets
+ // the simple way of dealing with this, in that it is a temporary symbol in
+ // the same atom then it is assumed to be fully resolved. This is needed so
+ // a relocation entry is not created and so the static linker does not
+ // mess up the reference later.
+ else if(!FB.getAtom() &&
+ SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
+ return true;
+ }
+ } else {
+ if (!TargetObjectWriter->useAggressiveSymbolFolding())
+ return false;
+ }
+
+ const MCFragment *FA = Asm.getSymbolData(SA).getFragment();
+
+ // Bail if the symbol has no fragment.
+ if (!FA)
+ return false;
+
+ A_Base = FA->getAtom();
+ if (!A_Base)
+ return false;
+
+ B_Base = FB.getAtom();
+ if (!B_Base)
+ return false;
+
+ // If the atoms are the same, they are guaranteed to have the same address.
+ if (A_Base == B_Base)
+ return true;
+
+ // Otherwise, we can't prove this is fully resolved.
+ return false;
+}
+
+void MachObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ unsigned NumSections = Asm.size();
+
+ // The section data starts after the header, the segment load command (and
+ // section headers) and the symbol table.
+ unsigned NumLoadCommands = 1;
+ uint64_t LoadCommandsSize = is64Bit() ?
+ macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+ macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+ // Add the data-in-code load command size, if used.
+ unsigned NumDataRegions = Asm.getDataRegions().size();
+ if (NumDataRegions) {
+ ++NumLoadCommands;
+ LoadCommandsSize += macho::LinkeditLoadCommandSize;
+ }
+
+ // Add the symbol table load command sizes, if used.
+ unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+ UndefinedSymbolData.size();
+ if (NumSymbols) {
+ NumLoadCommands += 2;
+ LoadCommandsSize += (macho::SymtabLoadCommandSize +
+ macho::DysymtabLoadCommandSize);
+ }
+
+ // Add the linker option load commands sizes.
+ const std::vector<std::vector<std::string> > &LinkerOptions =
+ Asm.getLinkerOptions();
+ for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
+ ++NumLoadCommands;
+ LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i],
+ is64Bit());
+ }
+
+ // Compute the total size of the section data, as well as its file size and vm
+ // size.
+ uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+ macho::Header32Size) + LoadCommandsSize;
+ uint64_t SectionDataSize = 0;
+ uint64_t SectionDataFileSize = 0;
+ uint64_t VMSize = 0;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ uint64_t Address = getSectionAddress(&SD);
+ uint64_t Size = Layout.getSectionAddressSize(&SD);
+ uint64_t FileSize = Layout.getSectionFileSize(&SD);
+ FileSize += getPaddingSize(&SD, Layout);
+
+ VMSize = std::max(VMSize, Address + Size);
+
+ if (SD.getSection().isVirtualSection())
+ continue;
+
+ SectionDataSize = std::max(SectionDataSize, Address + Size);
+ SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
+ }
+
+ // The section data is padded to 4 bytes.
+ //
+ // FIXME: Is this machine dependent?
+ unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+ SectionDataFileSize += SectionDataPadding;
+
+ // Write the prolog, starting with the header and load command...
+ WriteHeader(NumLoadCommands, LoadCommandsSize,
+ Asm.getSubsectionsViaSymbols());
+ WriteSegmentLoadCommand(NumSections, VMSize,
+ SectionDataStart, SectionDataSize);
+
+ // ... and then the section headers.
+ uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ unsigned NumRelocs = Relocs.size();
+ uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+ WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+ RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+ }
+
+ // Write the data-in-code load command, if used.
+ uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
+ if (NumDataRegions) {
+ uint64_t DataRegionsOffset = RelocTableEnd;
+ uint64_t DataRegionsSize = NumDataRegions * 8;
+ WriteLinkeditLoadCommand(macho::LCT_DataInCode, DataRegionsOffset,
+ DataRegionsSize);
+ }
+
+ // Write the symbol table load command, if used.
+ if (NumSymbols) {
+ unsigned FirstLocalSymbol = 0;
+ unsigned NumLocalSymbols = LocalSymbolData.size();
+ unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+ unsigned NumExternalSymbols = ExternalSymbolData.size();
+ unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+ unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+ unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+ unsigned NumSymTabSymbols =
+ NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+ uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+ uint64_t IndirectSymbolOffset = 0;
+
+ // If used, the indirect symbols are written after the section data.
+ if (NumIndirectSymbols)
+ IndirectSymbolOffset = DataInCodeTableEnd;
+
+ // The symbol table is written after the indirect symbol data.
+ uint64_t SymbolTableOffset = DataInCodeTableEnd + IndirectSymbolSize;
+
+ // The string table is written after symbol table.
+ uint64_t StringTableOffset =
+ SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+ macho::Nlist32Size);
+ WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+ StringTableOffset, StringTable.size());
+
+ WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+ FirstExternalSymbol, NumExternalSymbols,
+ FirstUndefinedSymbol, NumUndefinedSymbols,
+ IndirectSymbolOffset, NumIndirectSymbols);
+ }
+
+ // Write the linker options load commands.
+ for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
+ WriteLinkerOptionsLoadCommand(LinkerOptions[i]);
+ }
+
+ // Write the actual section data.
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ Asm.writeSectionData(it, Layout);
+
+ uint64_t Pad = getPaddingSize(it, Layout);
+ for (unsigned int i = 0; i < Pad; ++i)
+ Write8(0);
+ }
+
+ // Write the extra padding.
+ WriteZeros(SectionDataPadding);
+
+ // Write the relocation entries.
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ // Write the section relocation entries, in reverse order to match 'as'
+ // (approximately, the exact algorithm is more complicated than this).
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ Write32(Relocs[e - i - 1].Word0);
+ Write32(Relocs[e - i - 1].Word1);
+ }
+ }
+
+ // Write out the data-in-code region payload, if there is one.
+ for (MCAssembler::const_data_region_iterator
+ it = Asm.data_region_begin(), ie = Asm.data_region_end();
+ it != ie; ++it) {
+ const DataRegionData *Data = &(*it);
+ uint64_t Start =
+ getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start),
+ Layout);
+ uint64_t End =
+ getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End),
+ Layout);
+ DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
+ << " start: " << Start << "(" << Data->Start->getName() << ")"
+ << " end: " << End << "(" << Data->End->getName() << ")"
+ << " size: " << End - Start
+ << "\n");
+ Write32(Start);
+ Write16(End - Start);
+ Write16(Data->Kind);
+ }
+
+ // Write the symbol table data, if used.
+ if (NumSymbols) {
+ // Write the indirect symbol entries.
+ for (MCAssembler::const_indirect_symbol_iterator
+ it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+ // Indirect symbols in the non lazy symbol pointer section have some
+ // special handling.
+ const MCSectionMachO &Section =
+ static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+ if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+ // If this symbol is defined and internal, mark it as such.
+ if (it->Symbol->isDefined() &&
+ !Asm.getSymbolData(*it->Symbol).isExternal()) {
+ uint32_t Flags = macho::ISF_Local;
+ if (it->Symbol->isAbsolute())
+ Flags |= macho::ISF_Absolute;
+ Write32(Flags);
+ continue;
+ }
+ }
+
+ Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+ }
+
+ // FIXME: Check that offsets match computed ones.
+
+ // Write the symbol table entries.
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ WriteNlist(LocalSymbolData[i], Layout);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ WriteNlist(ExternalSymbolData[i], Layout);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ WriteNlist(UndefinedSymbolData[i], Layout);
+
+ // Write the string table.
+ OS << StringTable.str();
+ }
+}
+
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return new MachObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
new file mode 100644
index 000000000000..7625abd465fa
--- /dev/null
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -0,0 +1,374 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const StringRef Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const StringRef Feature,
+ bool IsEnabled) {
+ assert(!Feature.empty() && "Empty string");
+ if (hasFlag(Feature))
+ return Feature;
+ std::string Prefix = IsEnabled ? "+" : "-";
+ Prefix += Feature;
+ return Prefix;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const StringRef S) {
+ if (S.empty())
+ return;
+
+ // Start at beginning of string.
+ size_t Pos = 0;
+ while (true) {
+ // Find the next comma
+ size_t Comma = S.find(',', Pos);
+ // If no comma found then the rest of the string is used
+ if (Comma == std::string::npos) {
+ // Add string to vector
+ V.push_back(S.substr(Pos));
+ break;
+ }
+ // Otherwise add substring to vector
+ V.push_back(S.substr(Pos, Comma - Pos));
+ // Advance to next item
+ Pos = Comma + 1;
+ }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+ // Start with empty string.
+ std::string Result;
+ // If the vector is not empty
+ if (!V.empty()) {
+ // Start with the first feature
+ Result = V[0];
+ // For each successive feature
+ for (size_t i = 1; i < V.size(); i++) {
+ // Add a comma
+ Result += ",";
+ // Add the feature
+ Result += V[i];
+ }
+ }
+ // Return the features string
+ return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const StringRef String,
+ bool IsEnabled) {
+ // Don't add empty features
+ if (!String.empty()) {
+ // Convert to lowercase, prepend flag and add to vector
+ Features.push_back(PrependFlag(String.lower(), IsEnabled));
+ }
+}
+
+/// Find KV in array using binary search.
+static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A,
+ size_t L) {
+ // Make the lower bound element we're looking for
+ SubtargetFeatureKV KV;
+ KV.Key = S.data();
+ // Determine the end of the array
+ const SubtargetFeatureKV *Hi = A + L;
+ // Binary search the array
+ const SubtargetFeatureKV *F = std::lower_bound(A, Hi, KV);
+ // If not found then return NULL
+ if (F == Hi || StringRef(F->Key) != S) return NULL;
+ // Return the found array item
+ return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+ size_t Size) {
+ size_t MaxLen = 0;
+ for (size_t i = 0; i < Size; i++)
+ MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+ // Print the CPU table.
+ errs() << "Available CPUs for this target:\n\n";
+ for (size_t i = 0; i != CPUTableSize; i++)
+ errs() << format(" %-*s - %s.\n",
+ MaxCPULen, CPUTable[i].Key, CPUTable[i].Desc);
+ errs() << '\n';
+
+ // Print the Feature table.
+ errs() << "Available features for this target:\n\n";
+ for (size_t i = 0; i != FeatTableSize; i++)
+ errs() << format(" %-*s - %s.\n",
+ MaxFeatLen, FeatTable[i].Key, FeatTable[i].Desc);
+ errs() << '\n';
+
+ errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+ "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ std::exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
+ // Break up string into separate features
+ Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+ return Join(Features);
+}
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FeatureEntry->Implies & FE.Value) {
+ Bits |= FE.Value;
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+///
+static
+void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FE.Implies & FeatureEntry->Value) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ToggleFeature - Toggle a feature and returns the newly updated feature
+/// bits.
+uint64_t
+SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+
+ return Bits;
+}
+
+
+/// getFeatureBits - Get feature bits a CPU.
+///
+uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
+ const SubtargetFeatureKV *CPUTable,
+ size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ if (!FeatureTableSize || !CPUTableSize)
+ return 0;
+
+#ifndef NDEBUG
+ for (size_t i = 1; i < CPUTableSize; i++) {
+ assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+ "CPU table is not sorted");
+ }
+ for (size_t i = 1; i < FeatureTableSize; i++) {
+ assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+ "CPU features table is not sorted");
+ }
+#endif
+ uint64_t Bits = 0; // Resulting bits
+
+ // Check if help is needed
+ if (CPU == "help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find CPU entry if CPU name is specified.
+ if (!CPU.empty()) {
+ const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+ // If there is a match
+ if (CPUEntry) {
+ // Set base feature bits
+ Bits = CPUEntry->Value;
+
+ // Set the feature implied by this CPU feature, if any.
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+ if (CPUEntry->Value & FE.Value)
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << CPU
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ }
+ }
+
+ // Iterate through each feature
+ for (size_t i = 0, E = Features.size(); i < E; i++) {
+ const StringRef Feature = Features[i];
+
+ // Check for help
+ if (Feature == "+help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+ }
+
+ return Bits;
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(raw_ostream &OS) const {
+ for (size_t i = 0, e = Features.size(); i != e; ++i)
+ OS << Features[i] << " ";
+ OS << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+ print(dbgs());
+}
+#endif
+
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
+///
+/// FIXME: This is an inelegant way of specifying the features of a
+/// subtarget. It would be better if we could encode this information
+/// into the IR. See <rdar://5972456>.
+///
+void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
+ if (Triple.getVendor() == Triple::Apple) {
+ if (Triple.getArch() == Triple::ppc) {
+ // powerpc-apple-*
+ AddFeature("altivec");
+ } else if (Triple.getArch() == Triple::ppc64) {
+ // powerpc64-apple-*
+ AddFeature("64bit");
+ AddFeature("altivec");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..6dffed73dfb3
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -0,0 +1,903 @@
+//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file writer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TimeValue.h"
+#include <cstdio>
+
+using namespace llvm;
+
+namespace {
+typedef SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+ ATFunctionDefinition,
+ ATbfAndefSymbol,
+ ATWeakExternal,
+ ATFile,
+ ATSectionDefinition
+};
+
+struct AuxSymbol {
+ AuxiliaryType AuxType;
+ COFF::Auxiliary Aux;
+};
+
+class COFFSymbol;
+class COFFSection;
+
+class COFFSymbol {
+public:
+ COFF::symbol Data;
+
+ typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+ name Name;
+ int Index;
+ AuxiliarySymbols Aux;
+ COFFSymbol *Other;
+ COFFSection *Section;
+ int Relocations;
+
+ MCSymbolData const *MCData;
+
+ COFFSymbol(StringRef name);
+ size_t size() const;
+ void set_name_offset(uint32_t Offset);
+
+ bool should_keep() const;
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+ COFF::relocation Data;
+ COFFSymbol *Symb;
+
+ COFFRelocation() : Symb(NULL) {}
+ static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+ COFF::section Header;
+
+ std::string Name;
+ int Number;
+ MCSectionData const *MCData;
+ COFFSymbol *Symbol;
+ relocations Relocations;
+
+ COFFSection(StringRef name);
+ static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+ typedef StringMap<size_t> map;
+ map Map;
+
+ void update_length();
+public:
+ std::vector<char> Data;
+
+ StringTable();
+ size_t size() const;
+ size_t insert(StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+ typedef std::vector<COFFSymbol*> symbols;
+ typedef std::vector<COFFSection*> sections;
+
+ typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
+ typedef DenseMap<MCSection const *, COFFSection *> section_map;
+
+ llvm::OwningPtr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
+
+ // Root level file contents.
+ COFF::header Header;
+ sections Sections;
+ symbols Symbols;
+ StringTable Strings;
+
+ // Maps used during object file creation.
+ section_map SectionMap;
+ symbol_map SymbolMap;
+
+ WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
+ ~WinCOFFObjectWriter();
+
+ COFFSymbol *createSymbol(StringRef Name);
+ COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+ COFFSection *createSection(StringRef Name);
+
+ template <typename object_t, typename list_t>
+ object_t *createCOFFEntity(StringRef Name, list_t &List);
+
+ void DefineSection(MCSectionData const &SectionData);
+ void DefineSymbol(MCSymbol const &Symbol,
+ MCSymbolData const &SymbolData,
+ MCAssembler &Assembler);
+
+ void MakeSymbolReal(COFFSymbol &S, size_t Index);
+ void MakeSectionReal(COFFSection &S, size_t Number);
+
+ bool ExportSection(COFFSection const *S);
+ bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+ bool IsPhysicalSection(COFFSection *S);
+
+ // Entity writing methods.
+
+ void WriteFileHeader(const COFF::header &Header);
+ void WriteSymbol(const COFFSymbol *S);
+ void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+ void WriteSectionHeader(const COFF::section &S);
+ void WriteRelocation(const COFF::relocation &R);
+
+ // MCObjectWriter interface implementation.
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+ void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x000000FF) >> 0;
+ Ptr[1] = (Value & 0x0000FF00) >> 8;
+ Ptr[2] = (Value & 0x00FF0000) >> 16;
+ Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x00FF) >> 0;
+ Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0xFF) >> 0;
+}
+
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(StringRef name)
+ : Name(name.begin(), name.end())
+ , Other(NULL)
+ , Section(NULL)
+ , Relocations(0)
+ , MCData(NULL) {
+ memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+ return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+ write_uint32_le(Data.Name + 0, 0);
+ write_uint32_le(Data.Name + 4, Offset);
+}
+
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+ // no section means its external, keep it
+ if (Section == NULL)
+ return true;
+
+ // if it has relocations pointing at it, keep it
+ if (Relocations > 0) {
+ assert(Section->Number != -1 && "Sections with relocations must be real!");
+ return true;
+ }
+
+ // if the section its in is being droped, drop it
+ if (Section->Number == -1)
+ return false;
+
+ // if it is the section symbol, keep it
+ if (Section->Symbol == this)
+ return true;
+
+ // if its temporary, drop it
+ if (MCData && MCData->getSymbol().isTemporary())
+ return false;
+
+ // otherwise, keep it
+ return true;
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(StringRef name)
+ : Name(name)
+ , MCData(NULL)
+ , Symbol(NULL) {
+ memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+ return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+ write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+ // The string table data begins with the length of the entire string table
+ // including the length header. Allocate space for this header.
+ Data.resize(4);
+ update_length();
+}
+
+size_t StringTable::size() const {
+ return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(StringRef String) {
+ map::iterator i = Map.find(String);
+
+ if (i != Map.end())
+ return i->second;
+
+ size_t Offset = Data.size();
+
+ // Insert string data into string table.
+ Data.insert(Data.end(), String.begin(), String.end());
+ Data.push_back('\0');
+
+ // Put a reference to it in the map.
+ Map[String] = Offset;
+
+ // Update the internal length field.
+ update_length();
+
+ return Offset;
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+ raw_ostream &OS)
+ : MCObjectWriter(OS, true)
+ , TargetObjectWriter(MOTW) {
+ memset(&Header, 0, sizeof(Header));
+
+ Header.Machine = TargetObjectWriter->getMachine();
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+ for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+ delete *I;
+ for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+ delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
+ return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+ symbol_map::iterator i = SymbolMap.find(Symbol);
+ if (i != SymbolMap.end())
+ return i->second;
+ COFFSymbol *RetSymbol
+ = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+ SymbolMap[Symbol] = RetSymbol;
+ return RetSymbol;
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
+ return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name,
+ list_t &List) {
+ object_t *Object = new object_t(Name);
+
+ List.push_back(Object);
+
+ return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+ assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+ && "Got non COFF section in the COFF backend!");
+ // FIXME: Not sure how to verify this (at least in a debug build).
+ MCSectionCOFF const &Sec =
+ static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+ COFFSection *coff_section = createSection(Sec.getSectionName());
+ COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
+
+ coff_section->Symbol = coff_symbol;
+ coff_symbol->Section = coff_section;
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+
+ // In this case the auxiliary symbol is a Section Definition.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+ coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+ uint32_t &Characteristics = coff_section->Header.Characteristics;
+ switch (SectionData.getAlignment()) {
+ case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break;
+ case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break;
+ case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break;
+ case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break;
+ case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break;
+ case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break;
+ case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break;
+ case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break;
+ case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break;
+ case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break;
+ case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+ case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+ case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+ case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+ default:
+ llvm_unreachable("unsupported section alignment");
+ }
+
+ // Bind internal COFF section to MC section.
+ coff_section->MCData = &SectionData;
+ SectionMap[&SectionData.getSection()] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
+ MCSymbolData const &SymbolData,
+ MCAssembler &Assembler) {
+ COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
+
+ coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
+ coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+ if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ if (Symbol.isVariable()) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ // FIXME: This assert message isn't very good.
+ assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef &&
+ "Value must be a SymbolRef!");
+
+ coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol);
+ } else {
+ std::string WeakName = std::string(".weak.")
+ + Symbol.getName().str()
+ + ".default";
+ COFFSymbol *WeakDefault = createSymbol(WeakName);
+ WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+ WeakDefault->Data.StorageClass = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ WeakDefault->Data.Type = 0;
+ WeakDefault->Data.Value = 0;
+ coff_symbol->Other = WeakDefault;
+ }
+
+ // Setup the Weak External auxiliary symbol.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATWeakExternal;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ }
+
+ // If no storage class was specified in the streamer, define it here.
+ if (coff_symbol->Data.StorageClass == 0) {
+ bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+ coff_symbol->Data.StorageClass =
+ external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ }
+
+ if (SymbolData.Fragment != NULL)
+ coff_symbol->Section =
+ SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+
+ // Bind internal COFF symbol to MC symbol.
+ coff_symbol->MCData = &SymbolData;
+ SymbolMap[&Symbol] = coff_symbol;
+}
+
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ // FIXME: Why is this number 999999? This number is never mentioned in the
+ // spec. I'm assuming this is due to the printed value needing to fit into
+ // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+ // 6)? The spec does not state if this entry should be null terminated in
+ // this case, and thus this seems to be the best way to do it. I think I
+ // just solved my own FIXME...
+ if (StringTableEntry > 999999)
+ report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+ std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+ } else
+ std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+
+ S.Number = Number;
+ S.Symbol->Data.SectionNumber = S.Number;
+ S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
+
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ S.set_name_offset(StringTableEntry);
+ } else
+ std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+ S.Index = Index;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+ return !S->MCData->getFragmentList().empty();
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Asm) {
+ // This doesn't seem to be right. Strings referred to from the .data section
+ // need symbols so they can be linked to code in the .text section right?
+
+ // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+ // For now, all non-variable symbols are exported,
+ // the linker will sort the rest out for us.
+ return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+ return (S->Header.Characteristics
+ & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+ WriteLE16(Header.Machine);
+ WriteLE16(Header.NumberOfSections);
+ WriteLE32(Header.TimeDateStamp);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ WriteLE16(Header.SizeOfOptionalHeader);
+ WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+ WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+ WriteLE32(S->Data.Value);
+ WriteLE16(S->Data.SectionNumber);
+ WriteLE16(S->Data.Type);
+ Write8(S->Data.StorageClass);
+ Write8(S->Data.NumberOfAuxSymbols);
+ WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+ const COFFSymbol::AuxiliarySymbols &S) {
+ for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+ i != e; ++i) {
+ switch(i->AuxType) {
+ case ATFunctionDefinition:
+ WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+ WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+ break;
+ case ATbfAndefSymbol:
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+ WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+ WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+ break;
+ case ATWeakExternal:
+ WriteLE32(i->Aux.WeakExternal.TagIndex);
+ WriteLE32(i->Aux.WeakExternal.Characteristics);
+ WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+ break;
+ case ATFile:
+ WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+ sizeof(i->Aux.File.FileName)));
+ break;
+ case ATSectionDefinition:
+ WriteLE32(i->Aux.SectionDefinition.Length);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+ WriteLE32(i->Aux.SectionDefinition.CheckSum);
+ WriteLE16(i->Aux.SectionDefinition.Number);
+ Write8(i->Aux.SectionDefinition.Selection);
+ WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+ break;
+ }
+ }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+ WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+ WriteLE32(S.VirtualSize);
+ WriteLE32(S.VirtualAddress);
+ WriteLE32(S.SizeOfRawData);
+ WriteLE32(S.PointerToRawData);
+ WriteLE32(S.PointerToRelocations);
+ WriteLE32(S.PointerToLineNumbers);
+ WriteLE16(S.NumberOfRelocations);
+ WriteLE16(S.NumberOfLineNumbers);
+ WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+ WriteLE32(R.VirtualAddress);
+ WriteLE32(R.SymbolTableIndex);
+ WriteLE16(R.Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // "Define" each section & symbol. This creates section & symbol
+ // entries in the staging area.
+
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+ DefineSection(*i);
+
+ for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+ e = Asm.symbol_end(); i != e; i++) {
+ if (ExportSymbol(*i, Asm)) {
+ const MCSymbol &Alias = i->getSymbol();
+ const MCSymbol &Symbol = Alias.AliasedSymbol();
+ DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm);
+ }
+ }
+}
+
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+ MCSectionData const *SectionData = Fragment->getParent();
+
+ // Mark this symbol as requiring an entry in the symbol table.
+ assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
+ "Section must already have been defined in ExecutePostLayoutBinding!");
+ assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
+ "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+ COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+ COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+ const MCSymbolRefExpr *SymA = Target.getSymA();
+ const MCSymbolRefExpr *SymB = Target.getSymB();
+ const bool CrossSection = SymB &&
+ &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection();
+
+ if (Target.getSymB()) {
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+ // Offset of the symbol in the section
+ int64_t a = Layout.getSymbolOffset(&B_SD);
+
+ // Ofeset of the relocation in the section
+ int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+ FixedValue = b - a;
+ // In the case where we have SymbA and SymB, we just need to store the delta
+ // between the two symbols. Update FixedValue to account for the delta, and
+ // skip recording the relocation.
+ if (!CrossSection)
+ return;
+ } else {
+ FixedValue = Target.getConstant();
+ }
+
+ COFFRelocation Reloc;
+
+ Reloc.Data.SymbolTableIndex = 0;
+ Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+
+ // Turn relocations for temporary symbols into section relocations.
+ if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
+ Reloc.Symb = coff_symbol->Section->Symbol;
+ FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+ + coff_symbol->MCData->getOffset();
+ } else
+ Reloc.Symb = coff_symbol;
+
+ ++Reloc.Symb->Relocations;
+
+ Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+ unsigned FixupKind = Fixup.getKind();
+
+ if (CrossSection)
+ FixupKind = FK_PCRel_4;
+
+ Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+
+ // FIXME: Can anyone explain what this does other than adjust for the size
+ // of the offset?
+ if (Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32 ||
+ Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32)
+ FixedValue += 4;
+
+ coff_section->Relocations.push_back(Reloc);
+}
+
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // Assign symbol and section indexes and offsets.
+ Header.NumberOfSections = 0;
+
+ for (sections::iterator i = Sections.begin(),
+ e = Sections.end(); i != e; i++) {
+ if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+ MakeSectionReal(**i, ++Header.NumberOfSections);
+ } else {
+ (*i)->Number = -1;
+ }
+ }
+
+ Header.NumberOfSymbols = 0;
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+ // Update section number & offset for symbols that have them.
+ if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+ assert(coff_symbol->Section != NULL);
+
+ coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
+ coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+ + SymbolData->Offset;
+ }
+
+ if (coff_symbol->should_keep()) {
+ MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+ // Update auxiliary symbol info.
+ coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+ Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ } else
+ coff_symbol->Index = -1;
+ }
+
+ // Fixup weak external references.
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ if (coff_symbol->Other != NULL) {
+ assert(coff_symbol->Index != -1);
+ assert(coff_symbol->Aux.size() == 1 &&
+ "Symbol must contain one aux symbol!");
+ assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
+ "Symbol's aux symbol must be a Weak External!");
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
+ }
+ }
+
+ // Assign file offsets to COFF object file structures.
+
+ unsigned offset = 0;
+
+ offset += COFF::HeaderSize;
+ offset += COFF::SectionSize * Header.NumberOfSections;
+
+ for (MCAssembler::const_iterator i = Asm.begin(),
+ e = Asm.end();
+ i != e; i++) {
+ COFFSection *Sec = SectionMap[&i->getSection()];
+
+ if (Sec->Number == -1)
+ continue;
+
+ Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+ if (IsPhysicalSection(Sec)) {
+ Sec->Header.PointerToRawData = offset;
+
+ offset += Sec->Header.SizeOfRawData;
+ }
+
+ if (Sec->Relocations.size() > 0) {
+ bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+ if (RelocationsOverflow) {
+ // Signal overflow by setting NumberOfSections to max value. Actual
+ // size is found in reloc #0. Microsoft tools understand this.
+ Sec->Header.NumberOfRelocations = 0xffff;
+ } else {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ }
+ Sec->Header.PointerToRelocations = offset;
+
+ if (RelocationsOverflow) {
+ // Reloc #0 will contain actual count, so make room for it.
+ offset += COFF::RelocationSize;
+ }
+
+ offset += COFF::RelocationSize * Sec->Relocations.size();
+
+ for (relocations::iterator cr = Sec->Relocations.begin(),
+ er = Sec->Relocations.end();
+ cr != er; ++cr) {
+ assert((*cr).Symb->Index != -1);
+ (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+ }
+ }
+
+ assert(Sec->Symbol->Aux.size() == 1
+ && "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symbol->Aux[0];
+ assert(Aux.AuxType == ATSectionDefinition &&
+ "Section's symbol's aux symbol must be a Section Definition!");
+ Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+ Aux.Aux.SectionDefinition.NumberOfRelocations =
+ Sec->Header.NumberOfRelocations;
+ Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+ Sec->Header.NumberOfLineNumbers;
+ }
+
+ Header.PointerToSymbolTable = offset;
+
+ Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+ // Write it all to disk...
+ WriteFileHeader(Header);
+
+ {
+ sections::iterator i, ie;
+ MCAssembler::const_iterator j, je;
+
+ for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+ if ((*i)->Number != -1) {
+ if ((*i)->Relocations.size() >= 0xffff) {
+ (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ }
+ WriteSectionHeader((*i)->Header);
+ }
+
+ for (i = Sections.begin(), ie = Sections.end(),
+ j = Asm.begin(), je = Asm.end();
+ (i != ie) && (j != je); ++i, ++j) {
+
+ if ((*i)->Number == -1)
+ continue;
+
+ if ((*i)->Header.PointerToRawData != 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRawData &&
+ "Section::PointerToRawData is insane!");
+
+ Asm.writeSectionData(j, Layout);
+ }
+
+ if ((*i)->Relocations.size() > 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+ "Section::PointerToRelocations is insane!");
+
+ if ((*i)->Relocations.size() >= 0xffff) {
+ // In case of overflow, write actual relocation count as first
+ // relocation. Including the synthetic reloc itself (+ 1).
+ COFF::relocation r;
+ r.VirtualAddress = (*i)->Relocations.size() + 1;
+ r.SymbolTableIndex = 0;
+ r.Type = 0;
+ WriteRelocation(r);
+ }
+
+ for (relocations::const_iterator k = (*i)->Relocations.begin(),
+ ke = (*i)->Relocations.end();
+ k != ke; k++) {
+ WriteRelocation(k->Data);
+ }
+ } else
+ assert((*i)->Header.PointerToRelocations == 0 &&
+ "Section::PointerToRelocations is insane!");
+ }
+ }
+
+ assert(OS.tell() == Header.PointerToSymbolTable &&
+ "Header::PointerToSymbolTable is insane!");
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+ if ((*i)->Index != -1)
+ WriteSymbol(*i);
+
+ OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
+}
+
+MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
+ Machine(Machine_) {
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter factory function
+
+namespace llvm {
+ MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+ raw_ostream &OS) {
+ return new WinCOFFObjectWriter(MOTW, OS);
+ }
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..75f343c421bb
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -0,0 +1,336 @@
+//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file streamer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFStreamer"
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class WinCOFFStreamer : public MCObjectStreamer {
+public:
+ MCSymbol const *CurSymbol;
+
+ WinCOFFStreamer(MCContext &Context,
+ MCAsmBackend &MAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS);
+
+ void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External);
+
+ // MCStreamer interface
+
+ virtual void InitSections();
+ virtual void InitToTextSection();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitDebugLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size,unsigned ByteAlignment);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment);
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitWin64EHHandlerData();
+ virtual void FinishImpl();
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_WinCOFFStreamer;
+ }
+
+private:
+ virtual void EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+ }
+
+ void SetSection(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+ }
+
+ void SetSectionText() {
+ SetSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionData() {
+ SetSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionBSS() {
+ SetSection(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
+};
+} // end anonymous namespace.
+
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &CE, raw_ostream &OS)
+ : MCObjectStreamer(SK_WinCOFFStreamer, Context, MAB, OS, &CE),
+ CurSymbol(NULL) {}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External) {
+ assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+ std::string SectionName(".bss$linkonce");
+ SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+ MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ unsigned Characteristics =
+ COFF::IMAGE_SCN_LNK_COMDAT |
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+ const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+ SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+ MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+ if (SectionData.getAlignment() < ByteAlignment)
+ SectionData.setAlignment(ByteAlignment);
+
+ SymbolData.setExternal(External);
+
+ Symbol->setSection(*Section);
+
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+ SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
+}
+
+// MCStreamer interface
+
+void WinCOFFStreamer::InitToTextSection() {
+ SetSectionText();
+}
+
+void WinCOFFStreamer::InitSections() {
+ SetSectionText();
+ SetSectionData();
+ SetSectionBSS();
+ SetSectionText();
+}
+
+void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCObjectStreamer::EmitLabel(Symbol);
+}
+
+void WinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+}
+void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ assert(Symbol && "Symbol must be non-null!");
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ switch (Attribute) {
+ case MCSA_WeakReference:
+ case MCSA_Weak: {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+ SD.setExternal(true);
+ }
+ break;
+
+ case MCSA_Global:
+ getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+ break;
+
+ default:
+ llvm_unreachable("unsupported attribute");
+ }
+}
+
+void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+ "to BeginCOFFSymbolDef!");
+ CurSymbol = Symbol;
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+ "the first byte!");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ StorageClass << COFF::SF_ClassShift,
+ COFF::SF_ClassMask);
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+ "bytes");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ Type << COFF::SF_TypeShift,
+ COFF::SF_TypeMask);
+}
+
+void WinCOFFStreamer::EndCOFFSymbolDef() {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ CurSymbol = NULL;
+}
+
+void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol)
+{
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->getFixups().push_back(
+ MCFixup::Create(DF->getContents().size(),
+ MCSymbolRefExpr::Create (Symbol, getContext ()),
+ FK_SecRel_4));
+ DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ AddCommonSymbol(Symbol, Size, ByteAlignment, true);
+}
+
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ AddCommonSymbol(Symbol, Size, ByteAlignment, false);
+}
+
+void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size,unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+ // Ignore for now, linkers don't care, and proper debug
+ // info will be a much large effort.
+}
+
+void WinCOFFStreamer::EmitWin64EHHandlerData() {
+ MCStreamer::EmitWin64EHHandlerData();
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+}
+
+void WinCOFFStreamer::FinishImpl() {
+ EmitW64Tables();
+ MCObjectStreamer::FinishImpl();
+}
+
+namespace llvm
+{
+ MCStreamer *createWinCOFFStreamer(MCContext &Context,
+ MCAsmBackend &MAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS,
+ bool RelaxAll) {
+ WinCOFFStreamer *S = new WinCOFFStreamer(Context, MAB, CE, OS);
+ S->getAssembler().setRelaxAll(RelaxAll);
+ return S;
+ }
+}
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp
new file mode 100644
index 000000000000..0e13d0540fa6
--- /dev/null
+++ b/contrib/llvm/lib/Object/Archive.cpp
@@ -0,0 +1,301 @@
+//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ArchiveObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Archive.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace object;
+
+static const char *Magic = "!<arch>\n";
+
+static bool isInternalMember(const ArchiveMemberHeader &amh) {
+ static const char *const internals[] = {
+ "/",
+ "//",
+ "#_LLVM_SYM_TAB_#"
+ };
+
+ StringRef name = amh.getName();
+ for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) {
+ if (name == internals[i])
+ return true;
+ }
+ return false;
+}
+
+void Archive::anchor() { }
+
+error_code Archive::Child::getName(StringRef &Result) const {
+ StringRef name = ToHeader(Data.data())->getName();
+ // Check if it's a special name.
+ if (name[0] == '/') {
+ if (name.size() == 1) { // Linker member.
+ Result = name;
+ return object_error::success;
+ }
+ if (name.size() == 2 && name[1] == '/') { // String table.
+ Result = name;
+ return object_error::success;
+ }
+ // It's a long name.
+ // Get the offset.
+ std::size_t offset;
+ if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
+ llvm_unreachable("Long name offset is not an integer");
+ const char *addr = Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader)
+ + offset;
+ // Verify it.
+ if (Parent->StringTable == Parent->end_children()
+ || addr < (Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader))
+ || addr > (Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader)
+ + Parent->StringTable->getSize()))
+ return object_error::parse_failed;
+
+ // GNU long file names end with a /.
+ if (Parent->kind() == K_GNU) {
+ StringRef::size_type End = StringRef(addr).find('/');
+ Result = StringRef(addr, End);
+ } else {
+ Result = addr;
+ }
+ return object_error::success;
+ } else if (name.startswith("#1/")) {
+ uint64_t name_size;
+ if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
+ llvm_unreachable("Long name length is not an ingeter");
+ Result = Data.substr(sizeof(ArchiveMemberHeader), name_size);
+ return object_error::success;
+ }
+ // It's a simple name.
+ if (name[name.size() - 1] == '/')
+ Result = name.substr(0, name.size() - 1);
+ else
+ Result = name;
+ return object_error::success;
+}
+
+error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
+ OwningPtr<Binary> ret;
+ OwningPtr<MemoryBuffer> Buff;
+ if (error_code ec = getMemoryBuffer(Buff))
+ return ec;
+ if (error_code ec = createBinary(Buff.take(), ret))
+ return ec;
+ Result.swap(ret);
+ return object_error::success;
+}
+
+Archive::Archive(MemoryBuffer *source, error_code &ec)
+ : Binary(Binary::ID_Archive, source) {
+ // Check for sufficient magic.
+ if (!source || source->getBufferSize()
+ < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
+ || StringRef(source->getBufferStart(), 8) != Magic) {
+ ec = object_error::invalid_file_type;
+ return;
+ }
+
+ // Get the special members.
+ child_iterator i = begin_children(false);
+ child_iterator e = end_children();
+
+ StringRef name;
+ if ((ec = i->getName(name)))
+ return;
+
+ // Below is the pattern that is used to figure out the archive format
+ // GNU archive format
+ // First member : / (points to the symbol table )
+ // Second member : // (may exist, if it exists, points to the string table)
+ // Note : The string table is used if the filename exceeds 15 characters
+ // BSD archive format
+ // First member : __.SYMDEF (points to the symbol table)
+ // There is no string table, if the filename exceeds 15 characters or has a
+ // embedded space, the filename has #1/<size>, The size represents the size
+ // of the filename that needs to be read after the archive header
+ // COFF archive format
+ // First member : /
+ // Second member : / (provides a directory of symbols)
+ // Third member : // contains the string table, this is present even if the
+ // string table is empty
+ if (name == "/") {
+ SymbolTable = i;
+ StringTable = e;
+ if (i != e) ++i;
+ if (i == e) {
+ ec = object_error::parse_failed;
+ return;
+ }
+ if ((ec = i->getName(name)))
+ return;
+ if (name[0] != '/') {
+ Format = K_GNU;
+ } else if ((name.size() > 1) && (name == "//")) {
+ Format = K_GNU;
+ StringTable = i;
+ ++i;
+ } else {
+ Format = K_COFF;
+ if (i != e) {
+ SymbolTable = i;
+ ++i;
+ }
+ if (i != e) {
+ StringTable = i;
+ }
+ }
+ } else if (name == "__.SYMDEF") {
+ Format = K_BSD;
+ SymbolTable = i;
+ StringTable = e;
+ }
+ ec = object_error::success;
+}
+
+Archive::child_iterator Archive::begin_children(bool skip_internal) const {
+ const char *Loc = Data->getBufferStart() + strlen(Magic);
+ size_t Size = sizeof(ArchiveMemberHeader) +
+ ToHeader(Loc)->getSize();
+ Child c(this, StringRef(Loc, Size));
+ // Skip internals at the beginning of an archive.
+ if (skip_internal && isInternalMember(*ToHeader(Loc)))
+ return c.getNext();
+ return c;
+}
+
+Archive::child_iterator Archive::end_children() const {
+ return Child(this, StringRef(0, 0));
+}
+
+error_code Archive::Symbol::getName(StringRef &Result) const {
+ Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex);
+ return object_error::success;
+}
+
+error_code Archive::Symbol::getMember(child_iterator &Result) const {
+ const char *Buf = Parent->SymbolTable->getBuffer().begin();
+ const char *Offsets = Buf + 4;
+ uint32_t Offset = 0;
+ if (Parent->kind() == K_GNU) {
+ Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
+ + SymbolIndex);
+ } else if (Parent->kind() == K_BSD) {
+ llvm_unreachable("BSD format is not supported");
+ } else {
+ uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+
+ // Skip offsets.
+ Buf += sizeof(support::ulittle32_t)
+ + (MemberCount * sizeof(support::ulittle32_t));
+
+ uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+
+ if (SymbolIndex >= SymbolCount)
+ return object_error::parse_failed;
+
+ // Skip SymbolCount to get to the indices table.
+ const char *Indices = Buf + sizeof(support::ulittle32_t);
+
+ // Get the index of the offset in the file member offset table for this
+ // symbol.
+ uint16_t OffsetIndex =
+ *(reinterpret_cast<const support::ulittle16_t*>(Indices)
+ + SymbolIndex);
+ // Subtract 1 since OffsetIndex is 1 based.
+ --OffsetIndex;
+
+ if (OffsetIndex >= MemberCount)
+ return object_error::parse_failed;
+
+ Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
+ + OffsetIndex);
+ }
+
+ const char *Loc = Parent->getData().begin() + Offset;
+ size_t Size = sizeof(ArchiveMemberHeader) +
+ ToHeader(Loc)->getSize();
+ Result = Child(Parent, StringRef(Loc, Size));
+
+ return object_error::success;
+}
+
+Archive::Symbol Archive::Symbol::getNext() const {
+ Symbol t(*this);
+ // Go to one past next null.
+ t.StringIndex =
+ Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
+ ++t.SymbolIndex;
+ return t;
+}
+
+Archive::symbol_iterator Archive::begin_symbols() const {
+ const char *buf = SymbolTable->getBuffer().begin();
+ if (kind() == K_GNU) {
+ uint32_t symbol_count = 0;
+ symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+ } else if (kind() == K_BSD) {
+ llvm_unreachable("BSD archive format is not supported");
+ } else {
+ uint32_t member_count = 0;
+ uint32_t symbol_count = 0;
+ member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (symbol_count * 2); // Skip indices.
+ }
+ uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
+ return symbol_iterator(Symbol(this, 0, string_start_offset));
+}
+
+Archive::symbol_iterator Archive::end_symbols() const {
+ const char *buf = SymbolTable->getBuffer().begin();
+ uint32_t symbol_count = 0;
+ if (kind() == K_GNU) {
+ symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+ } else if (kind() == K_BSD) {
+ llvm_unreachable("BSD archive format is not supported");
+ } else {
+ uint32_t member_count = 0;
+ member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ }
+ return symbol_iterator(
+ Symbol(this, symbol_count, 0));
+}
+
+Archive::child_iterator Archive::findSym(StringRef name) const {
+ Archive::symbol_iterator bs = begin_symbols();
+ Archive::symbol_iterator es = end_symbols();
+ Archive::child_iterator result;
+
+ StringRef symname;
+ for (; bs != es; ++bs) {
+ if (bs->getName(symname))
+ return end_children();
+ if (symname == name) {
+ if (bs->getMember(result))
+ return end_children();
+ return result;
+ }
+ }
+ return end_children();
+}
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
new file mode 100644
index 000000000000..4e528d8ea565
--- /dev/null
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -0,0 +1,103 @@
+//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+// Include headers for createBinary.
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
+
+using namespace llvm;
+using namespace object;
+
+Binary::~Binary() {
+ delete Data;
+}
+
+Binary::Binary(unsigned int Type, MemoryBuffer *Source)
+ : TypeID(Type)
+ , Data(Source) {}
+
+StringRef Binary::getData() const {
+ return Data->getBuffer();
+}
+
+StringRef Binary::getFileName() const {
+ return Data->getBufferIdentifier();
+}
+
+error_code object::createBinary(MemoryBuffer *Source,
+ OwningPtr<Binary> &Result) {
+ OwningPtr<MemoryBuffer> scopedSource(Source);
+ if (!Source)
+ return make_error_code(errc::invalid_argument);
+ if (Source->getBufferSize() < 64)
+ return object_error::invalid_file_type;
+ sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(),
+ static_cast<unsigned>(Source->getBufferSize()));
+ error_code ec;
+ switch (type) {
+ case sys::Archive_FileType: {
+ OwningPtr<Binary> ret(new Archive(scopedSource.take(), ec));
+ if (ec) return ec;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType: {
+ OwningPtr<Binary> ret(
+ ObjectFile::createELFObjectFile(scopedSource.take()));
+ if (!ret)
+ return object_error::invalid_file_type;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: {
+ OwningPtr<Binary> ret(
+ ObjectFile::createMachOObjectFile(scopedSource.take()));
+ if (!ret)
+ return object_error::invalid_file_type;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ case sys::COFF_FileType: {
+ OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec));
+ if (ec) return ec;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ default: // Unrecognized object file format.
+ return object_error::invalid_file_type;
+ }
+}
+
+error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Path, File))
+ return ec;
+ return createBinary(File.take(), Result);
+}
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 000000000000..ca90e0e3c3fc
--- /dev/null
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,837 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/COFF.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+// Returns false if size is greater than the buffer size. And sets ec.
+bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) {
+ if (m->getBufferSize() < size) {
+ ec = object_error::unexpected_eof;
+ return false;
+ }
+ return true;
+}
+
+// Returns false if any bytes in [addr, addr + size) fall outsize of m.
+bool checkAddr(const MemoryBuffer *m,
+ error_code &ec,
+ uintptr_t addr,
+ uint64_t size) {
+ if (addr + size < addr ||
+ addr + size < size ||
+ addr + size > uintptr_t(m->getBufferEnd())) {
+ ec = object_error::unexpected_eof;
+ return false;
+ }
+ return true;
+}
+}
+
+const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const {
+ const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p);
+
+# ifndef NDEBUG
+ // Verify that the symbol points to a valid entry in the symbol table.
+ uintptr_t offset = uintptr_t(addr) - uintptr_t(base());
+ if (offset < Header->PointerToSymbolTable
+ || offset >= Header->PointerToSymbolTable
+ + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+ report_fatal_error("Symbol was outside of symbol table.");
+
+ assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+ == 0 && "Symbol did not point to the beginning of a symbol");
+# endif
+
+ return addr;
+}
+
+const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const {
+ const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p);
+
+# ifndef NDEBUG
+ // Verify that the section points to a valid entry in the section table.
+ if (addr < SectionTable
+ || addr >= (SectionTable + Header->NumberOfSections))
+ report_fatal_error("Section was outside of section table.");
+
+ uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable);
+ assert(offset % sizeof(coff_section) == 0 &&
+ "Section did not point to the beginning of a section");
+# endif
+
+ return addr;
+}
+
+error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
+ SymbolRef &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ symb += 1 + symb->NumberOfAuxSymbols;
+ Symb.p = reinterpret_cast<uintptr_t>(symb);
+ Result = SymbolRef(Symb, this);
+ return object_error::success;
+}
+
+ error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
+ StringRef &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ return getSymbolName(symb, Result);
+}
+
+error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
+ uint64_t &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = Section->PointerToRawData + symb->Value;
+ else
+ Result = symb->Value;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = Section->VirtualAddress + symb->Value;
+ else
+ Result = symb->Value;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::Type &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ Result = SymbolRef::ST_Other;
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+ symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
+ Result = SymbolRef::ST_Unknown;
+ } else {
+ if (symb->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
+ Result = SymbolRef::ST_Function;
+ } else {
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'r' || Type == 'R') {
+ Result = SymbolRef::ST_Data;
+ }
+ }
+ }
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolFlags(DataRefImpl Symb,
+ uint32_t &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ Result = SymbolRef::SF_None;
+
+ // TODO: Correctly set SF_FormatSpecific, SF_ThreadLocal, SF_Common
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+ symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
+ Result |= SymbolRef::SF_Undefined;
+
+ // TODO: This are certainly too restrictive.
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ Result |= SymbolRef::SF_Global;
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+ Result |= SymbolRef::SF_Weak;
+
+ if (symb->SectionNumber == COFF::IMAGE_SYM_ABSOLUTE)
+ Result |= SymbolRef::SF_Absolute;
+
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
+ uint64_t &Result) const {
+ // FIXME: Return the correct size. This requires looking at all the symbols
+ // in the same section as this symbol, and looking for either the next
+ // symbol, or the end of the section.
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = Section->SizeOfRawData - symb->Value;
+ else
+ Result = 0;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+ char &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ StringRef name;
+ if (error_code ec = getSymbolName(Symb, name))
+ return ec;
+ char ret = StringSwitch<char>(name)
+ .StartsWith(".debug", 'N')
+ .StartsWith(".sxdata", 'N')
+ .Default('?');
+
+ if (ret != '?') {
+ Result = ret;
+ return object_error::success;
+ }
+
+ uint32_t Characteristics = 0;
+ if (symb->SectionNumber > 0) {
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ Characteristics = Section->Characteristics;
+ }
+
+ switch (symb->SectionNumber) {
+ case COFF::IMAGE_SYM_UNDEFINED:
+ // Check storage classes.
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+ Result = 'w';
+ return object_error::success; // Don't do ::toupper.
+ } else if (symb->Value != 0) // Check for common symbols.
+ ret = 'c';
+ else
+ ret = 'u';
+ break;
+ case COFF::IMAGE_SYM_ABSOLUTE:
+ ret = 'a';
+ break;
+ case COFF::IMAGE_SYM_DEBUG:
+ ret = 'n';
+ break;
+ default:
+ // Check section type.
+ if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+ ret = 't';
+ else if ( Characteristics & COFF::IMAGE_SCN_MEM_READ
+ && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+ ret = 'r';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ ret = 'd';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ ret = 'b';
+ else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+ ret = 'i';
+
+ // Check for section symbol.
+ else if ( symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+ && symb->Value == 0)
+ ret = 's';
+ }
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ ret = ::toupper(static_cast<unsigned char>(ret));
+
+ Result = ret;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED)
+ Result = end_sections();
+ else {
+ const coff_section *sec = 0;
+ if (error_code ec = getSection(symb->SectionNumber, sec)) return ec;
+ DataRefImpl Sec;
+ Sec.p = reinterpret_cast<uintptr_t>(sec);
+ Result = section_iterator(SectionRef(Sec, this));
+ }
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolValue(DataRefImpl Symb,
+ uint64_t &Val) const {
+ report_fatal_error("getSymbolValue unimplemented in COFFObjectFile");
+}
+
+error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
+ SectionRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ sec += 1;
+ Sec.p = reinterpret_cast<uintptr_t>(sec);
+ Result = SectionRef(Sec, this);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
+ StringRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ return getSectionName(sec, Result);
+}
+
+error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->VirtualAddress;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->SizeOfRawData;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
+ StringRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ ArrayRef<uint8_t> Res;
+ error_code EC = getSectionContents(sec, Res);
+ Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
+ return EC;
+}
+
+error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
+ uint64_t &Res) const {
+ const coff_section *sec = toSec(Sec);
+ if (!sec)
+ return object_error::parse_failed;
+ Res = uint64_t(1) << (((sec->Characteristics & 0x00F00000) >> 20) - 1);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionData(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented
+ Result = true;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionVirtual(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *symb_sec = 0;
+ if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
+ if (symb_sec == sec)
+ Result = true;
+ else
+ Result = false;
+ return object_error::success;
+}
+
+relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+ const coff_section *sec = toSec(Sec);
+ DataRefImpl ret;
+ if (sec->NumberOfRelocations == 0)
+ ret.p = 0;
+ else
+ ret.p = reinterpret_cast<uintptr_t>(base() + sec->PointerToRelocations);
+
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
+relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+ const coff_section *sec = toSec(Sec);
+ DataRefImpl ret;
+ if (sec->NumberOfRelocations == 0)
+ ret.p = 0;
+ else
+ ret.p = reinterpret_cast<uintptr_t>(
+ reinterpret_cast<const coff_relocation*>(
+ base() + sec->PointerToRelocations)
+ + sec->NumberOfRelocations);
+
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
+ : ObjectFile(Binary::ID_COFF, Object, ec)
+ , Header(0)
+ , SectionTable(0)
+ , SymbolTable(0)
+ , StringTable(0)
+ , StringTableSize(0) {
+ // Check that we at least have enough room for a header.
+ if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
+
+ // The actual starting location of the COFF header in the file. This can be
+ // non-zero in PE/COFF files.
+ uint64_t HeaderStart = 0;
+
+ // Check if this is a PE/COFF file.
+ if (base()[0] == 0x4d && base()[1] == 0x5a) {
+ // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
+ // PE signature to find 'normal' COFF header.
+ if (!checkSize(Data, ec, 0x3c + 8)) return;
+ HeaderStart = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c);
+ // Check the PE header. ("PE\0\0")
+ if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
+ ec = object_error::parse_failed;
+ return;
+ }
+ HeaderStart += 4; // Skip the PE Header.
+ }
+
+ Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
+ if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
+ return;
+
+ SectionTable =
+ reinterpret_cast<const coff_section *>( base()
+ + HeaderStart
+ + sizeof(coff_file_header)
+ + Header->SizeOfOptionalHeader);
+ if (!checkAddr(Data, ec, uintptr_t(SectionTable),
+ Header->NumberOfSections * sizeof(coff_section)))
+ return;
+
+ if (Header->PointerToSymbolTable != 0) {
+ SymbolTable =
+ reinterpret_cast<const coff_symbol *>(base()
+ + Header->PointerToSymbolTable);
+ if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+ Header->NumberOfSymbols * sizeof(coff_symbol)))
+ return;
+
+ // Find string table.
+ StringTable = reinterpret_cast<const char *>(base())
+ + Header->PointerToSymbolTable
+ + Header->NumberOfSymbols * sizeof(coff_symbol);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+ return;
+
+ StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+ return;
+ // Check that the string table is null terminated if has any in it.
+ if (StringTableSize < 4
+ || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+ ec = object_error::parse_failed;
+ return;
+ }
+ }
+
+ ec = object_error::success;
+}
+
+symbol_iterator COFFObjectFile::begin_symbols() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+symbol_iterator COFFObjectFile::end_symbols() const {
+ // The symbol table ends where the string table begins.
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(StringTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+symbol_iterator COFFObjectFile::begin_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+symbol_iterator COFFObjectFile::end_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::begin_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::end_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+StringRef COFFObjectFile::getLoadName() const {
+ // COFF does not have this field.
+ return "";
+}
+
+
+section_iterator COFFObjectFile::begin_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SectionTable);
+ return section_iterator(SectionRef(ret, this));
+}
+
+section_iterator COFFObjectFile::end_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+ return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+ return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return "COFF-i386";
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return "COFF-x86-64";
+ default:
+ return "COFF-<unknown arch>";
+ }
+}
+
+unsigned COFFObjectFile::getArch() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return Triple::x86;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return Triple::x86_64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
+ Res = Header;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSection(int32_t index,
+ const coff_section *&Result) const {
+ // Check for special index values.
+ if (index == COFF::IMAGE_SYM_UNDEFINED ||
+ index == COFF::IMAGE_SYM_ABSOLUTE ||
+ index == COFF::IMAGE_SYM_DEBUG)
+ Result = NULL;
+ else if (index > 0 && index <= Header->NumberOfSections)
+ // We already verified the section table data, so no need to check again.
+ Result = SectionTable + (index - 1);
+ else
+ return object_error::parse_failed;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getString(uint32_t offset,
+ StringRef &Result) const {
+ if (StringTableSize <= 4)
+ // Tried to get a string from an empty string table.
+ return object_error::parse_failed;
+ if (offset >= StringTableSize)
+ return object_error::unexpected_eof;
+ Result = StringRef(StringTable + offset);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbol(uint32_t index,
+ const coff_symbol *&Result) const {
+ if (index < Header->NumberOfSymbols)
+ Result = SymbolTable + index;
+ else
+ return object_error::parse_failed;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
+ StringRef &Res) const {
+ // Check for string table entry. First 4 bytes are 0.
+ if (symbol->Name.Offset.Zeroes == 0) {
+ uint32_t Offset = symbol->Name.Offset.Offset;
+ if (error_code ec = getString(Offset, Res))
+ return ec;
+ return object_error::success;
+ }
+
+ if (symbol->Name.ShortName[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Res = StringRef(symbol->Name.ShortName);
+ else
+ // Not null terminated, use all 8 bytes.
+ Res = StringRef(symbol->Name.ShortName, 8);
+ return object_error::success;
+}
+
+ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData(
+ const coff_symbol *symbol) const {
+ const uint8_t *aux = NULL;
+
+ if ( symbol->NumberOfAuxSymbols > 0 ) {
+ // AUX data comes immediately after the symbol in COFF
+ aux = reinterpret_cast<const uint8_t *>(symbol + 1);
+# ifndef NDEBUG
+ // Verify that the aux symbol points to a valid entry in the symbol table.
+ uintptr_t offset = uintptr_t(aux) - uintptr_t(base());
+ if (offset < Header->PointerToSymbolTable
+ || offset >= Header->PointerToSymbolTable
+ + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+ report_fatal_error("Aux Symbol data was outside of symbol table.");
+
+ assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+ == 0 && "Aux Symbol data did not point to the beginning of a symbol");
+# endif
+ }
+ return ArrayRef<uint8_t>(aux, symbol->NumberOfAuxSymbols * sizeof(coff_symbol));
+}
+
+error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+ StringRef &Res) const {
+ StringRef Name;
+ if (Sec->Name[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Name = Sec->Name;
+ else
+ // Not null terminated, use all 8 bytes.
+ Name = StringRef(Sec->Name, 8);
+
+ // Check for string table entry. First byte is '/'.
+ if (Name[0] == '/') {
+ uint32_t Offset;
+ if (Name.substr(1).getAsInteger(10, Offset))
+ return object_error::parse_failed;
+ if (error_code ec = getString(Offset, Name))
+ return ec;
+ }
+
+ Res = Name;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const {
+ // The only thing that we need to verify is that the contents is contained
+ // within the file bounds. We don't need to make sure it doesn't cover other
+ // data, as there's nothing that says that is not allowed.
+ uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+ uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
+ if (ConEnd > uintptr_t(Data->getBufferEnd()))
+ return object_error::parse_failed;
+ Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
+ Sec->SizeOfRawData);
+ return object_error::success;
+}
+
+const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
+ return reinterpret_cast<const coff_relocation*>(Rel.p);
+}
+error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Res) const {
+ Rel.p = reinterpret_cast<uintptr_t>(
+ reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
+ Res = RelocationRef(Rel, this);
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
+ Res = toRel(Rel)->VirtualAddress;
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ Res = toRel(Rel)->VirtualAddress;
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Res) const {
+ const coff_relocation* R = toRel(Rel);
+ DataRefImpl Symb;
+ Symb.p = reinterpret_cast<uintptr_t>(SymbolTable + R->SymbolTableIndex);
+ Res = SymbolRef(Symb, this);
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
+ uint64_t &Res) const {
+ const coff_relocation* R = toRel(Rel);
+ Res = R->Type;
+ return object_error::success;
+}
+
+const coff_section *COFFObjectFile::getCOFFSection(section_iterator &It) const {
+ return toSec(It->getRawDataRefImpl());
+}
+
+const coff_symbol *COFFObjectFile::getCOFFSymbol(symbol_iterator &It) const {
+ return toSymb(It->getRawDataRefImpl());
+}
+
+const coff_relocation *COFFObjectFile::getCOFFRelocation(
+ relocation_iterator &It) const {
+ return toRel(It->getRawDataRefImpl());
+}
+
+
+#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \
+ case COFF::enum: res = #enum; break;
+
+error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const coff_relocation *reloc = toRel(Rel);
+ StringRef res;
+ switch (Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ switch (reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32);
+ default:
+ res = "Unknown";
+ }
+ break;
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ switch (reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32);
+ default:
+ res = "Unknown";
+ }
+ break;
+ default:
+ res = "Unknown";
+ }
+ Result.append(res.begin(), res.end());
+ return object_error::success;
+}
+
+#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
+
+error_code COFFObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Res) const {
+ Res = 0;
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const coff_relocation *reloc = toRel(Rel);
+ const coff_symbol *symb = 0;
+ if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec;
+ DataRefImpl sym;
+ sym.p = reinterpret_cast<uintptr_t>(symb);
+ StringRef symname;
+ if (error_code ec = getSymbolName(sym, symname)) return ec;
+ Result.append(symname.begin(), symname.end());
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Result) const {
+ report_fatal_error("getLibraryNext not implemented in COFFObjectFile");
+}
+
+error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Result) const {
+ report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
+}
+
+namespace llvm {
+
+ ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+ error_code ec;
+ return new COFFObjectFile(Object, ec);
+ }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 000000000000..cfe0eb467e53
--- /dev/null
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,74 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the ELFObjectFile class implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+
+using namespace object;
+
+// Creates an in-memory object-file by default: createELFObjectFile(Buffer)
+ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+ std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+ error_code ec;
+
+ std::size_t MaxAlignment =
+ 1ULL << CountTrailingZeros_64(uintptr_t(Object->getBufferStart()));
+
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+ if (MaxAlignment >= 4)
+ return new ELFObjectFile<ELFType<support::little, 4, false> >(Object, ec);
+ else
+#endif
+ if (MaxAlignment >= 2)
+ return new ELFObjectFile<ELFType<support::little, 2, false> >(Object, ec);
+ else
+ llvm_unreachable("Invalid alignment for ELF file!");
+ else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+ if (MaxAlignment >= 4)
+ return new ELFObjectFile<ELFType<support::big, 4, false> >(Object, ec);
+ else
+#endif
+ if (MaxAlignment >= 2)
+ return new ELFObjectFile<ELFType<support::big, 2, false> >(Object, ec);
+ else
+ llvm_unreachable("Invalid alignment for ELF file!");
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+ if (MaxAlignment >= 8)
+ return new ELFObjectFile<ELFType<support::big, 8, true> >(Object, ec);
+ else
+#endif
+ if (MaxAlignment >= 2)
+ return new ELFObjectFile<ELFType<support::big, 2, true> >(Object, ec);
+ else
+ llvm_unreachable("Invalid alignment for ELF file!");
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+ if (MaxAlignment >= 8)
+ return new ELFObjectFile<ELFType<support::little, 8, true> >(Object, ec);
+ else
+#endif
+ if (MaxAlignment >= 2)
+ return new ELFObjectFile<ELFType<support::little, 2, true> >(Object, ec);
+ else
+ llvm_unreachable("Invalid alignment for ELF file!");
+ }
+
+ report_fatal_error("Buffer is not an ELF object file!");
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
new file mode 100644
index 000000000000..25946257ab5a
--- /dev/null
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -0,0 +1,57 @@
+//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class _object_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+ virtual error_condition default_error_condition(int ev) const;
+};
+}
+
+const char *_object_error_category::name() const {
+ return "llvm.object";
+}
+
+std::string _object_error_category::message(int ev) const {
+ switch (ev) {
+ case object_error::success: return "Success";
+ case object_error::invalid_file_type:
+ return "The file was not recognized as a valid object file";
+ case object_error::parse_failed:
+ return "Invalid data was encountered while parsing the file";
+ case object_error::unexpected_eof:
+ return "The end of the file was unexpectedly encountered";
+ default:
+ llvm_unreachable("An enumerator of object_error does not have a message "
+ "defined.");
+ }
+}
+
+error_condition _object_error_category::default_error_condition(int ev) const {
+ if (ev == object_error::success)
+ return errc::success;
+ return errc::invalid_argument;
+}
+
+const error_category &object::object_category() {
+ static _object_error_category o;
+ return o;
+}
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
new file mode 100644
index 000000000000..c9c341a207c7
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObject.cpp
@@ -0,0 +1,422 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+ Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+ StringRef Buffer, uint64_t Base,
+ InMemoryStruct<T> &Res) {
+ typedef T struct_type;
+ uint64_t Size = sizeof(struct_type);
+
+ // Check that the buffer contains the expected data.
+ if (Base + Size > Buffer.size()) {
+ Res = 0;
+ return;
+ }
+
+ // Check whether we can return a direct pointer.
+ struct_type *Ptr = reinterpret_cast<struct_type *>(
+ const_cast<char *>(Buffer.data() + Base));
+ if (!MOO.isSwappedEndian()) {
+ Res = Ptr;
+ return;
+ }
+
+ // Otherwise, copy the struct and translate the values.
+ Res = *Ptr;
+ SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+ bool Is64Bit_)
+ : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+ IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+ HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+ // Load the common header.
+ memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+ if (IsSwappedEndian) {
+ SwapValue(Header.Magic);
+ SwapValue(Header.CPUType);
+ SwapValue(Header.CPUSubtype);
+ SwapValue(Header.FileType);
+ SwapValue(Header.NumLoadCommands);
+ SwapValue(Header.SizeOfLoadCommands);
+ SwapValue(Header.Flags);
+ }
+
+ if (is64Bit()) {
+ memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+ sizeof(Header64Ext));
+ if (IsSwappedEndian) {
+ SwapValue(Header64Ext.Reserved);
+ }
+ }
+
+ // Create the load command array if sane.
+ if (getHeader().NumLoadCommands < (1 << 20))
+ LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+ delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+ std::string *ErrorStr) {
+ // First, check the magic value and initialize the basic object info.
+ bool IsLittleEndian = false, Is64Bit = false;
+ StringRef Magic = Buffer->getBuffer().slice(0, 4);
+ if (Magic == "\xFE\xED\xFA\xCE") {
+ } else if (Magic == "\xCE\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ } else if (Magic == "\xFE\xED\xFA\xCF") {
+ Is64Bit = true;
+ } else if (Magic == "\xCF\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ Is64Bit = true;
+ } else {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+ return 0;
+ }
+
+ // Ensure that the at least the full header is present.
+ unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+ if (Buffer->getBufferSize() < HeaderSize) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+ return 0;
+ }
+
+ OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+ Is64Bit));
+
+ // Check for bogus number of load commands.
+ if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+ return 0;
+ }
+
+ if (ErrorStr) *ErrorStr = "";
+ return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+ return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+ HasStringTable = true;
+ StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+ SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+ assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+ // Load the command, if necessary.
+ if (Index >= NumLoadedCommands) {
+ uint64_t Offset;
+ if (Index == 0) {
+ Offset = getHeaderSize();
+ } else {
+ const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+ Offset = Prev.Offset + Prev.Command.Size;
+ }
+
+ LoadCommandInfo &Info = LoadCommands[Index];
+ memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+ sizeof(macho::LoadCommand));
+ if (IsSwappedEndian) {
+ SwapValue(Info.Command.Type);
+ SwapValue(Info.Command.Size);
+ }
+ Info.Offset = Offset;
+ NumLoadedCommands = Index + 1;
+ }
+
+ return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.SymbolTableOffset);
+ SwapValue(Value.NumSymbolTableEntries);
+ SwapValue(Value.StringTableOffset);
+ SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.LocalSymbolsIndex);
+ SwapValue(Value.NumLocalSymbols);
+ SwapValue(Value.ExternalSymbolsIndex);
+ SwapValue(Value.NumExternalSymbols);
+ SwapValue(Value.UndefinedSymbolsIndex);
+ SwapValue(Value.NumUndefinedSymbols);
+ SwapValue(Value.TOCOffset);
+ SwapValue(Value.NumTOCEntries);
+ SwapValue(Value.ModuleTableOffset);
+ SwapValue(Value.NumModuleTableEntries);
+ SwapValue(Value.ReferenceSymbolTableOffset);
+ SwapValue(Value.NumReferencedSymbolTableEntries);
+ SwapValue(Value.IndirectSymbolTableOffset);
+ SwapValue(Value.NumIndirectSymbolTableEntries);
+ SwapValue(Value.ExternalRelocationTableOffset);
+ SwapValue(Value.NumExternalRelocationTableEntries);
+ SwapValue(Value.LocalRelocationTableOffset);
+ SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.DataOffset);
+ SwapValue(Value.DataSize);
+}
+void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::LinkerOptionsLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.Count);
+}
+void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+ SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+ unsigned Index,
+ InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+ uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+ Index * sizeof(macho::IndirectSymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+ Index * sizeof(macho::Section));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+ SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section64> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment64 &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+ Index * sizeof(macho::Section64));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+ SwapValue(Value.Word0);
+ SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::RelocationEntry> &Res) const {
+ uint64_t Offset = (RelocationTableOffset +
+ Index * sizeof(macho::RelocationEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::SymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::Symbol64TableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DataInCodeTableEntry &Value) {
+ SwapValue(Value.Offset);
+ SwapValue(Value.Length);
+ SwapValue(Value.Kind);
+}
+void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::DataInCodeTableEntry> &Res) const {
+ uint64_t Offset = (TableOffset +
+ Index * sizeof(macho::DataInCodeTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+void MachOObject::ReadULEB128s(uint64_t Index,
+ SmallVectorImpl<uint64_t> &Out) const {
+ DataExtractor extractor(Buffer->getBuffer(), true, 0);
+
+ uint32_t offset = Index;
+ uint64_t data = 0;
+ while (uint64_t delta = extractor.getULEB128(&offset)) {
+ data += delta;
+ Out.push_back(data);
+ }
+}
+
+/* ** */
+// Object Dumping Facilities
+void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
+void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
+
+void MachOObject::printHeader(raw_ostream &O) const {
+ O << "('cputype', " << Header.CPUType << ")\n";
+ O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
+ O << "('filetype', " << Header.FileType << ")\n";
+ O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
+ O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
+ O << "('flag', " << Header.Flags << ")\n";
+
+ // Print extended header if 64-bit.
+ if (is64Bit())
+ O << "('reserved', " << Header64Ext.Reserved << ")\n";
+}
+
+void MachOObject::print(raw_ostream &O) const {
+ O << "Header:\n";
+ printHeader(O);
+ O << "Load Commands:\n";
+
+ O << "Buffer:\n";
+}
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
new file mode 100644
index 000000000000..6501df9fb986
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -0,0 +1,1340 @@
+//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOObjectFile class, which binds the MachOObject
+// class to the generic ObjectFile wrapper.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachO.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cctype>
+#include <cstring>
+#include <limits>
+
+using namespace llvm;
+using namespace object;
+
+namespace llvm {
+namespace object {
+
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
+ error_code &ec)
+ : ObjectFile(Binary::ID_MachO, Object, ec),
+ MachOObj(MOO),
+ RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
+ DataRefImpl DRI;
+ moveToNextSection(DRI);
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ Sections.push_back(DRI);
+ DRI.d.b++;
+ moveToNextSection(DRI);
+ }
+}
+
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+ error_code ec;
+ std::string Err;
+ MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
+ if (!MachOObj)
+ return NULL;
+ // MachOObject takes ownership of the Buffer we passed to it, and
+ // MachOObjectFile does, too, so we need to make sure they don't get the
+ // same object. A MemoryBuffer is cheap (it's just a reference to memory,
+ // not a copy of the memory itself), so just make a new copy here for
+ // the MachOObjectFile.
+ MemoryBuffer *NewBuffer =
+ MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
+ Buffer->getBufferIdentifier(), false);
+ return new MachOObjectFile(NewBuffer, MachOObj, ec);
+}
+
+/*===-- Symbols -----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Symtab) {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+ if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
+ return;
+ }
+
+ DRI.d.a++;
+ DRI.d.b = 0;
+ }
+}
+
+void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+ if (RegisteredStringTable != DRI.d.a) {
+ MachOObj->RegisterStringTable(*SymtabLoadCmd);
+ RegisteredStringTable = DRI.d.a;
+ }
+
+ MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+ Res);
+}
+
+void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+ if (RegisteredStringTable != DRI.d.a) {
+ MachOObj->RegisterStringTable(*SymtabLoadCmd);
+ RegisteredStringTable = DRI.d.a;
+ }
+
+ MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+ Res);
+}
+
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
+ SymbolRef &Result) const {
+ DRI.d.b++;
+ moveToNextSymbol(DRI);
+ Result = SymbolRef(DRI, this);
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
+ StringRef &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = Entry->Value;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section64> Section;
+ getSection64(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = Entry->Value;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section> Section;
+ getSection(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
+ }
+
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = Entry->Value;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = Entry->Value;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+ uint64_t &Result) const {
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ uint64_t BeginOffset;
+ uint64_t EndOffset = 0;
+ uint8_t SectionIndex;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ uint32_t flags = SymbolRef::SF_None;
+ getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common)
+ Result = Entry->Value;
+ else
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbol64TableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ uint32_t flags = SymbolRef::SF_None;
+ getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common)
+ Result = Entry->Value;
+ else
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbolTableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ }
+ if (!EndOffset) {
+ uint64_t Size;
+ getSectionSize(Sections[SectionIndex-1], Size);
+ getSectionAddress(Sections[SectionIndex-1], EndOffset);
+ EndOffset += Size;
+ }
+ Result = EndOffset - BeginOffset;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
+ char &Result) const {
+ uint8_t Type, Flags;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Type = Entry->Type;
+ Flags = Entry->Flags;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Type = Entry->Type;
+ Flags = Entry->Flags;
+ }
+
+ char Char;
+ switch (Type & macho::STF_TypeMask) {
+ case macho::STT_Undefined:
+ Char = 'u';
+ break;
+ case macho::STT_Absolute:
+ case macho::STT_Section:
+ Char = 's';
+ break;
+ default:
+ Char = '?';
+ break;
+ }
+
+ if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
+ Char = toupper(static_cast<unsigned char>(Char));
+ Result = Char;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
+ uint32_t &Result) const {
+ uint16_t MachOFlags;
+ uint8_t MachOType;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ MachOFlags = Entry->Flags;
+ MachOType = Entry->Type;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ MachOFlags = Entry->Flags;
+ MachOType = Entry->Type;
+ }
+
+ // TODO: Correctly set SF_ThreadLocal
+ Result = SymbolRef::SF_None;
+
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+ Result |= SymbolRef::SF_Undefined;
+
+ if (MachOFlags & macho::STF_StabsEntryMask)
+ Result |= SymbolRef::SF_FormatSpecific;
+
+ if (MachOType & MachO::NlistMaskExternal) {
+ Result |= SymbolRef::SF_Global;
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+ Result |= SymbolRef::SF_Common;
+ }
+
+ if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
+ Result |= SymbolRef::SF_Weak;
+
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute)
+ Result |= SymbolRef::SF_Absolute;
+
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Res) const {
+ uint8_t index;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ index = Entry->SectionIndex;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ index = Entry->SectionIndex;
+ }
+
+ if (index == 0)
+ Res = end_sections();
+ else
+ Res = section_iterator(SectionRef(Sections[index-1], this));
+
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::Type &Res) const {
+ uint8_t n_type;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ n_type = Entry->Type;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ n_type = Entry->Type;
+ }
+ Res = SymbolRef::ST_Other;
+
+ // If this is a STAB debugging symbol, we can do nothing more.
+ if (n_type & MachO::NlistMaskStab) {
+ Res = SymbolRef::ST_Debug;
+ return object_error::success;
+ }
+
+ switch (n_type & MachO::NlistMaskType) {
+ case MachO::NListTypeUndefined :
+ Res = SymbolRef::ST_Unknown;
+ break;
+ case MachO::NListTypeSection :
+ Res = SymbolRef::ST_Function;
+ break;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb,
+ uint64_t &Val) const {
+ report_fatal_error("getSymbolValue unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::begin_symbols() const {
+ // DRI.d.a = segment number; DRI.d.b = symbol index.
+ DataRefImpl DRI;
+ moveToNextSymbol(DRI);
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::end_symbols() const {
+ DataRefImpl DRI;
+ DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+StringRef MachOObjectFile::getLoadName() const {
+ // TODO: Implement
+ report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
+
+/*===-- Sections ----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Segment) {
+ InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
+ MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
+ if (DRI.d.b < SegmentLoadCmd->NumSections)
+ return;
+ } else if (LCI.Command.Type == macho::LCT_Segment64) {
+ InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
+ MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
+ if (DRI.d.b < Segment64LoadCmd->NumSections)
+ return;
+ }
+
+ DRI.d.a++;
+ DRI.d.b = 0;
+ }
+}
+
+error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
+ SectionRef &Result) const {
+ DRI.d.b++;
+ moveToNextSection(DRI);
+ Result = SectionRef(DRI, this);
+ return object_error::success;
+}
+
+void
+MachOObjectFile::getSection(DataRefImpl DRI,
+ InMemoryStruct<macho::Section> &Res) const {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSection(LCI, DRI.d.b, Res);
+}
+
+std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ SectionList::const_iterator loc =
+ std::find(Sections.begin(), Sections.end(), Sec);
+ assert(loc != Sections.end() && "Sec is not a valid section!");
+ return std::distance(Sections.begin(), loc);
+}
+
+void
+MachOObjectFile::getSection64(DataRefImpl DRI,
+ InMemoryStruct<macho::Section64> &Res) const {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSection64(LCI, DRI.d.b, Res);
+}
+
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Segment64)
+ return true;
+ assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+ return false;
+}
+
+static StringRef parseSegmentOrSectionName(const char *P) {
+ if (P[15] == 0)
+ // Null terminated.
+ return P;
+ // Not null terminated, so this is a 16 char string.
+ return StringRef(P, 16);
+}
+
+error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
+ StringRef &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+ DRI.d.b * sizeof(macho::Section64);
+ StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
+ const macho::Section64 *sec =
+ reinterpret_cast<const macho::Section64*>(Data.data());
+ Result = parseSegmentOrSectionName(sec->Name);
+ } else {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+ DRI.d.b * sizeof(macho::Section);
+ StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
+ const macho::Section *sec =
+ reinterpret_cast<const macho::Section*>(Data.data());
+ Result = parseSegmentOrSectionName(sec->Name);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
+ StringRef &Res) const {
+ if (is64BitLoadCommand(MachOObj.get(), Sec)) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
+ unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+ Sec.d.b * sizeof(macho::Section64);
+ StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
+ const macho::Section64 *sec =
+ reinterpret_cast<const macho::Section64*>(Data.data());
+ Res = parseSegmentOrSectionName(sec->SegmentName);
+ } else {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
+ unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+ Sec.d.b * sizeof(macho::Section);
+ StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
+ const macho::Section *sec =
+ reinterpret_cast<const macho::Section*>(Data.data());
+ Res = parseSegmentOrSectionName(sec->SegmentName);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = Sect->Address;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = Sect->Address;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = Sect->Size;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = Sect->Size;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
+ StringRef &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = MachOObj->getData(Sect->Offset, Sect->Size);
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = MachOObj->getData(Sect->Offset, Sect->Size);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = uint64_t(1) << Sect->Align;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = uint64_t(1) << Sect->Align;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
+ bool &Result) const {
+ if (is64BitLoadCommand(MachOObj.get(), DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = Sect->Flags & macho::SF_PureInstructions;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = Sect->Flags & macho::SF_PureInstructions;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = true;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
+ bool &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
+ Result = (SectionType == MachO::SectionTypeZeroFill ||
+ SectionType == MachO::SectionTypeZeroFillLarge);
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
+ Result = (SectionType == MachO::SectionTypeZeroFill ||
+ SectionType == MachO::SectionTypeZeroFillLarge);
+ }
+
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
+ bool &Result) const {
+ // Consider using the code from isSectionText to look for __const sections.
+ // Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS
+ // to use section attributes to distinguish code from data.
+
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
+ SymbolRef::Type ST;
+ getSymbolType(Symb, ST);
+ if (ST == SymbolRef::ST_Unknown) {
+ Result = false;
+ return object_error::success;
+ }
+
+ uint64_t SectBegin, SectEnd;
+ getSectionAddress(Sec, SectBegin);
+ getSectionSize(Sec, SectEnd);
+ SectEnd += SectBegin;
+
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ uint64_t SymAddr= Entry->Value;
+ Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ uint64_t SymAddr= Entry->Value;
+ Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
+ }
+
+ return object_error::success;
+}
+
+relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+ DataRefImpl ret;
+ ret.d.b = getSectionIndex(Sec);
+ return relocation_iterator(RelocationRef(ret, this));
+}
+relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+ uint32_t last_reloc;
+ if (is64BitLoadCommand(MachOObj.get(), Sec)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sec, Sect);
+ last_reloc = Sect->NumRelocationTableEntries;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sec, Sect);
+ last_reloc = Sect->NumRelocationTableEntries;
+ }
+ DataRefImpl ret;
+ ret.d.a = last_reloc;
+ ret.d.b = getSectionIndex(Sec);
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
+ DataRefImpl DRI;
+ moveToNextSection(DRI);
+ return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator MachOObjectFile::end_sections() const {
+ DataRefImpl DRI;
+ DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+ return section_iterator(SectionRef(DRI, this));
+}
+
+/*===-- Relocations -------------------------------------------------------===*/
+
+void MachOObjectFile::
+getRelocation(DataRefImpl Rel,
+ InMemoryStruct<macho::RelocationEntry> &Res) const {
+ uint32_t relOffset;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ relOffset = Sect->RelocationTableOffset;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ relOffset = Sect->RelocationTableOffset;
+ }
+ MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
+}
+error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Res) const {
+ ++Rel.d.a;
+ Res = RelocationRef(Rel, this);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
+ const uint8_t* sectAddress = 0;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Address;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Address;
+ }
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ uint64_t RelAddr = 0;
+ if (isScattered)
+ RelAddr = RE->Word0 & 0xFFFFFF;
+ else
+ RelAddr = RE->Word0;
+
+ Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ if (isScattered)
+ Res = RE->Word0 & 0xFFFFFF;
+ else
+ Res = RE->Word0;
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ uint32_t SymbolIdx = RE->Word1 & 0xffffff;
+ bool isExtern = (RE->Word1 >> 27) & 1;
+
+ DataRefImpl Sym;
+ moveToNextSymbol(Sym);
+ if (isExtern) {
+ for (unsigned i = 0; i < SymbolIdx; i++) {
+ Sym.d.b++;
+ moveToNextSymbol(Sym);
+ assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
+ "Relocation symbol index out of range!");
+ }
+ }
+ Res = SymbolRef(Sym, this);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
+ uint64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ Res = RE->Word0;
+ Res <<= 32;
+ Res |= RE->Word1;
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ // TODO: Support scattered relocations.
+ StringRef res;
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ unsigned r_type;
+ if (isScattered)
+ r_type = (RE->Word0 >> 24) & 0xF;
+ else
+ r_type = (RE->Word1 >> 28) & 0xF;
+
+ switch (Arch) {
+ case Triple::x86: {
+ static const char *const Table[] = {
+ "GENERIC_RELOC_VANILLA",
+ "GENERIC_RELOC_PAIR",
+ "GENERIC_RELOC_SECTDIFF",
+ "GENERIC_RELOC_PB_LA_PTR",
+ "GENERIC_RELOC_LOCAL_SECTDIFF",
+ "GENERIC_RELOC_TLV" };
+
+ if (r_type > 6)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::x86_64: {
+ static const char *const Table[] = {
+ "X86_64_RELOC_UNSIGNED",
+ "X86_64_RELOC_SIGNED",
+ "X86_64_RELOC_BRANCH",
+ "X86_64_RELOC_GOT_LOAD",
+ "X86_64_RELOC_GOT",
+ "X86_64_RELOC_SUBTRACTOR",
+ "X86_64_RELOC_SIGNED_1",
+ "X86_64_RELOC_SIGNED_2",
+ "X86_64_RELOC_SIGNED_4",
+ "X86_64_RELOC_TLV" };
+
+ if (r_type > 9)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::arm: {
+ static const char *const Table[] = {
+ "ARM_RELOC_VANILLA",
+ "ARM_RELOC_PAIR",
+ "ARM_RELOC_SECTDIFF",
+ "ARM_RELOC_LOCAL_SECTDIFF",
+ "ARM_RELOC_PB_LA_PTR",
+ "ARM_RELOC_BR24",
+ "ARM_THUMB_RELOC_BR22",
+ "ARM_THUMB_32BIT_BRANCH",
+ "ARM_RELOC_HALF",
+ "ARM_RELOC_HALF_SECTDIFF" };
+
+ if (r_type > 9)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::ppc: {
+ static const char *const Table[] = {
+ "PPC_RELOC_VANILLA",
+ "PPC_RELOC_PAIR",
+ "PPC_RELOC_BR14",
+ "PPC_RELOC_BR24",
+ "PPC_RELOC_HI16",
+ "PPC_RELOC_LO16",
+ "PPC_RELOC_HA16",
+ "PPC_RELOC_LO14",
+ "PPC_RELOC_SECTDIFF",
+ "PPC_RELOC_PB_LA_PTR",
+ "PPC_RELOC_HI16_SECTDIFF",
+ "PPC_RELOC_LO16_SECTDIFF",
+ "PPC_RELOC_HA16_SECTDIFF",
+ "PPC_RELOC_JBSR",
+ "PPC_RELOC_LO14_SECTDIFF",
+ "PPC_RELOC_LOCAL_SECTDIFF" };
+
+ res = Table[r_type];
+ break;
+ }
+ case Triple::UnknownArch:
+ res = "Unknown";
+ break;
+ }
+ Result.append(res.begin(), res.end());
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ Res = 0;
+ if (!isExtern) {
+ const uint8_t* sectAddress = base();
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ }
+ Res = reinterpret_cast<uintptr_t>(sectAddress);
+ }
+ return object_error::success;
+}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+error_code advance(T &it, size_t Val) {
+ error_code ec;
+ while (Val--) {
+ it.increment(ec);
+ }
+ return ec;
+}
+
+template<class T>
+void advanceTo(T &it, size_t Val) {
+ if (error_code ec = advance(it, Val))
+ report_fatal_error(ec.message());
+}
+
+void MachOObjectFile::printRelocationTargetName(
+ InMemoryStruct<macho::RelocationEntry>& RE,
+ raw_string_ostream &fmt) const {
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ // Target of a scattered relocation is an address. In the interest of
+ // generating pretty output, scan through the symbol table looking for a
+ // symbol that aligns with that address. If we find one, print it.
+ // Otherwise, we just print the hex address of the target.
+ if (isScattered) {
+ uint32_t Val = RE->Word1;
+
+ error_code ec;
+ for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
+ SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+
+ // If we couldn't find a symbol that this relocation refers to, try
+ // to find a section beginning instead.
+ for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
+ SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+
+ fmt << format("0x%x", Val);
+ return;
+ }
+
+ StringRef S;
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ uint32_t Val = RE->Word1 & 0xFFFFFF;
+
+ if (isExtern) {
+ symbol_iterator SI = begin_symbols();
+ advanceTo(SI, Val);
+ SI->getName(S);
+ } else {
+ section_iterator SI = begin_sections();
+ advanceTo(SI, Val);
+ SI->getName(S);
+ }
+
+ fmt << S;
+}
+
+error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+
+ unsigned Type;
+ if (isScattered)
+ Type = (RE->Word0 >> 24) & 0xF;
+ else
+ Type = (RE->Word1 >> 28) & 0xF;
+
+ bool isPCRel;
+ if (isScattered)
+ isPCRel = ((RE->Word0 >> 30) & 1);
+ else
+ isPCRel = ((RE->Word1 >> 24) & 1);
+
+ // Determine any addends that should be displayed with the relocation.
+ // These require decoding the relocation type, which is triple-specific.
+
+ // X86_64 has entirely custom relocation types.
+ if (Arch == Triple::x86_64) {
+ bool isPCRel = ((RE->Word1 >> 24) & 1);
+
+ switch (Type) {
+ case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD
+ case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT
+ printRelocationTargetName(RE, fmt);
+ fmt << "@GOT";
+ if (isPCRel) fmt << "PCREL";
+ break;
+ }
+ case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86_64_SUBTRACTOR must be followed by a relocation of type
+ // X86_64_RELOC_UNSIGNED.
+ // NOTE: Scattered relocations don't exist on x86_64.
+ unsigned RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 0)
+ report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
+ "X86_64_RELOC_SUBTRACTOR.");
+
+ // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
+ // X86_64_SUBTRACTOR contains to the subtrahend.
+ printRelocationTargetName(RENext, fmt);
+ fmt << "-";
+ printRelocationTargetName(RE, fmt);
+ break;
+ }
+ case macho::RIT_X86_64_TLV:
+ printRelocationTargetName(RE, fmt);
+ fmt << "@TLV";
+ if (isPCRel) fmt << "P";
+ break;
+ case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
+ printRelocationTargetName(RE, fmt);
+ fmt << "-1";
+ break;
+ case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
+ printRelocationTargetName(RE, fmt);
+ fmt << "-2";
+ break;
+ case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
+ printRelocationTargetName(RE, fmt);
+ fmt << "-4";
+ break;
+ default:
+ printRelocationTargetName(RE, fmt);
+ break;
+ }
+ // X86 and ARM share some relocation types in common.
+ } else if (Arch == Triple::x86 || Arch == Triple::arm) {
+ // Generic relocation types...
+ switch (Type) {
+ case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
+ return object_error::success;
+ case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 1)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_SECTDIFF.");
+
+ printRelocationTargetName(RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ break;
+ }
+ }
+
+ if (Arch == Triple::x86) {
+ // All X86 relocations that need special printing were already
+ // handled in the generic code.
+ switch (Type) {
+ case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 1)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_LOCAL_SECTDIFF.");
+
+ printRelocationTargetName(RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ break;
+ }
+ case macho::RIT_Generic_TLV: {
+ printRelocationTargetName(RE, fmt);
+ fmt << "@TLV";
+ if (isPCRel) fmt << "P";
+ break;
+ }
+ default:
+ printRelocationTargetName(RE, fmt);
+ }
+ } else { // ARM-specific relocations
+ switch (Type) {
+ case macho::RIT_ARM_Half: // ARM_RELOC_HALF
+ case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
+ // Half relocations steal a bit from the length field to encode
+ // whether this is an upper16 or a lower16 relocation.
+ bool isUpper;
+ if (isScattered)
+ isUpper = (RE->Word0 >> 28) & 1;
+ else
+ isUpper = (RE->Word1 >> 25) & 1;
+
+ if (isUpper)
+ fmt << ":upper16:(";
+ else
+ fmt << ":lower16:(";
+ printRelocationTargetName(RE, fmt);
+
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // ARM half relocs must be followed by a relocation of type
+ // ARM_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+
+ if (RType != 1)
+ report_fatal_error("Expected ARM_RELOC_PAIR after "
+ "GENERIC_RELOC_HALF");
+
+ // NOTE: The half of the target virtual address is stashed in the
+ // address field of the secondary relocation, but we can't reverse
+ // engineer the constant offset from it without decoding the movw/movt
+ // instruction to find the other half in its immediate field.
+
+ // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+ // symbol/section pointer of the follow-on relocation.
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ }
+
+ fmt << ")";
+ break;
+ }
+ default: {
+ printRelocationTargetName(RE, fmt);
+ }
+ }
+ }
+ } else
+ printRelocationTargetName(RE, fmt);
+
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
+ bool &Result) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ unsigned Type;
+ if (isScattered)
+ Type = (RE->Word0 >> 24) & 0xF;
+ else
+ Type = (RE->Word1 >> 28) & 0xF;
+
+ Result = false;
+
+ // On arches that use the generic relocations, GENERIC_RELOC_PAIR
+ // is always hidden.
+ if (Arch == Triple::x86 || Arch == Triple::arm) {
+ if (Type == macho::RIT_Pair) Result = true;
+ } else if (Arch == Triple::x86_64) {
+ // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
+ // an X864_64_RELOC_SUBTRACTOR.
+ if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
+ DataRefImpl RelPrev = Rel;
+ RelPrev.d.a--;
+ InMemoryStruct<macho::RelocationEntry> REPrev;
+ getRelocation(RelPrev, REPrev);
+
+ unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
+
+ if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+ }
+ }
+
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Res) const {
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Res) const {
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+
+/*===-- Miscellaneous -----------------------------------------------------===*/
+
+uint8_t MachOObjectFile::getBytesInAddress() const {
+ return MachOObj->is64Bit() ? 8 : 4;
+}
+
+StringRef MachOObjectFile::getFileFormatName() const {
+ if (!MachOObj->is64Bit()) {
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeI386:
+ return "Mach-O 32-bit i386";
+ case llvm::MachO::CPUTypeARM:
+ return "Mach-O arm";
+ case llvm::MachO::CPUTypePowerPC:
+ return "Mach-O 32-bit ppc";
+ default:
+ assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+ "64-bit object file when we're not 64-bit?");
+ return "Mach-O 32-bit unknown";
+ }
+ }
+
+ // Make sure the cpu type has the correct mask.
+ assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+ == llvm::MachO::CPUArchABI64 &&
+ "32-bit object file when we're 64-bit?");
+
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeX86_64:
+ return "Mach-O 64-bit x86-64";
+ case llvm::MachO::CPUTypePowerPC64:
+ return "Mach-O 64-bit ppc64";
+ default:
+ return "Mach-O 64-bit unknown";
+ }
+}
+
+unsigned MachOObjectFile::getArch() const {
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeI386:
+ return Triple::x86;
+ case llvm::MachO::CPUTypeX86_64:
+ return Triple::x86_64;
+ case llvm::MachO::CPUTypeARM:
+ return Triple::arm;
+ case llvm::MachO::CPUTypePowerPC:
+ return Triple::ppc;
+ case llvm::MachO::CPUTypePowerPC64:
+ return Triple::ppc64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+} // end namespace object
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
new file mode 100644
index 000000000000..f061ea7cebed
--- /dev/null
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -0,0 +1,218 @@
+//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings to the file-format-independent object
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm-c/Object.h"
+
+using namespace llvm;
+using namespace object;
+
+// ObjectFile creation
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
+ return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
+}
+
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
+ delete unwrap(ObjectFile);
+}
+
+// ObjectFile Section iterators
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
+ section_iterator SI = unwrap(ObjectFile)->begin_sections();
+ return wrap(new section_iterator(SI));
+}
+
+void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSectionIteratorRef SI) {
+ return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
+}
+
+void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
+ LLVMSymbolIteratorRef Sym) {
+ if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+ report_fatal_error(ec.message());
+}
+
+// ObjectFile Symbol iterators
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) {
+ symbol_iterator SI = unwrap(ObjectFile)->begin_symbols();
+ return wrap(new symbol_iterator(SI));
+}
+
+void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSymbolIteratorRef SI) {
+ return (*unwrap(SI) == unwrap(ObjectFile)->end_symbols()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextSymbol failed: " + ec.message());
+}
+
+// SectionRef accessors
+const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getName(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
+
+uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getSize(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getContents(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
+
+uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
+ LLVMSymbolIteratorRef Sym) {
+ bool ret;
+ if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// Section Relocation iterators
+LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) {
+ relocation_iterator SI = (*unwrap(Section))->begin_relocations();
+ return wrap(new relocation_iterator(SI));
+}
+
+void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section,
+ LLVMRelocationIteratorRef SI) {
+ return (*unwrap(SI) == (*unwrap(Section))->end_relocations()) ? 1 : 0;
+}
+
+void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextRelocation failed: " +
+ ec.message());
+}
+
+
+// SymbolRef accessors
+const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getName(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
+
+uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getSize(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// RelocationRef accessors
+uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getOffset(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
+ SymbolRef ret;
+ if (error_code ec = (*unwrap(RI))->getSymbol(ret))
+ report_fatal_error(ec.message());
+
+ return wrap(new symbol_iterator(ret));
+}
+
+uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getType(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
+ SmallVector<char, 0> ret;
+ if (error_code ec = (*unwrap(RI))->getTypeName(ret))
+ report_fatal_error(ec.message());
+
+ char *str = static_cast<char*>(malloc(ret.size()));
+ std::copy(ret.begin(), ret.end(), str);
+ return str;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) {
+ SmallVector<char, 0> ret;
+ if (error_code ec = (*unwrap(RI))->getValueString(ret))
+ report_fatal_error(ec.message());
+
+ char *str = static_cast<char*>(malloc(ret.size()));
+ std::copy(ret.begin(), ret.end(), str);
+ return str;
+}
+
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
new file mode 100644
index 000000000000..860c87be9846
--- /dev/null
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -0,0 +1,66 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+void ObjectFile::anchor() { }
+
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ : Binary(Type, source) {
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+ if (!Object || Object->getBufferSize() < 64)
+ return 0;
+ sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+ static_cast<unsigned>(Object->getBufferSize()));
+ switch (type) {
+ case sys::Unknown_FileType:
+ return 0;
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType:
+ return createELFObjectFile(Object);
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::Mach_O_DSYMCompanion_FileType:
+ return createMachOObjectFile(Object);
+ case sys::COFF_FileType:
+ return createCOFFObjectFile(Object);
+ default:
+ llvm_unreachable("Unexpected Object File Type");
+ }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+ OwningPtr<MemoryBuffer> File;
+ if (MemoryBuffer::getFile(ObjectPath, File))
+ return NULL;
+ return createObjectFile(File.take());
+}
diff --git a/contrib/llvm/lib/Option/Arg.cpp b/contrib/llvm/lib/Option/Arg.cpp
new file mode 100644
index 000000000000..4c8da58f5368
--- /dev/null
+++ b/contrib/llvm/lib/Option/Arg.cpp
@@ -0,0 +1,122 @@
+//===--- Arg.cpp - Argument Implementations -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Arg.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg)
+ : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+ Claimed(false), OwnsValues(false) {
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+ const char *Value0, const Arg *_BaseArg)
+ : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+ Claimed(false), OwnsValues(false) {
+ Values.push_back(Value0);
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+ const char *Value0, const char *Value1, const Arg *_BaseArg)
+ : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+ Claimed(false), OwnsValues(false) {
+ Values.push_back(Value0);
+ Values.push_back(Value1);
+}
+
+Arg::~Arg() {
+ if (OwnsValues) {
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ delete[] Values[i];
+ }
+}
+
+void Arg::dump() const {
+ llvm::errs() << "<";
+
+ llvm::errs() << " Opt:";
+ Opt.dump();
+
+ llvm::errs() << " Index:" << Index;
+
+ llvm::errs() << " Values: [";
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (i) llvm::errs() << ", ";
+ llvm::errs() << "'" << Values[i] << "'";
+ }
+
+ llvm::errs() << "]>\n";
+}
+
+std::string Arg::getAsString(const ArgList &Args) const {
+ SmallString<256> Res;
+ llvm::raw_svector_ostream OS(Res);
+
+ ArgStringList ASL;
+ render(Args, ASL);
+ for (ArgStringList::iterator
+ it = ASL.begin(), ie = ASL.end(); it != ie; ++it) {
+ if (it != ASL.begin())
+ OS << ' ';
+ OS << *it;
+ }
+
+ return OS.str();
+}
+
+void Arg::renderAsInput(const ArgList &Args, ArgStringList &Output) const {
+ if (!getOption().hasNoOptAsInput()) {
+ render(Args, Output);
+ return;
+ }
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+ Output.push_back(getValue(i));
+}
+
+void Arg::render(const ArgList &Args, ArgStringList &Output) const {
+ switch (getOption().getRenderStyle()) {
+ case Option::RenderValuesStyle:
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+ Output.push_back(getValue(i));
+ break;
+
+ case Option::RenderCommaJoinedStyle: {
+ SmallString<256> Res;
+ llvm::raw_svector_ostream OS(Res);
+ OS << getSpelling();
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ',';
+ OS << getValue(i);
+ }
+ Output.push_back(Args.MakeArgString(OS.str()));
+ break;
+ }
+
+ case Option::RenderJoinedStyle:
+ Output.push_back(Args.GetOrMakeJoinedArgString(
+ getIndex(), getSpelling(), getValue(0)));
+ for (unsigned i = 1, e = getNumValues(); i != e; ++i)
+ Output.push_back(getValue(i));
+ break;
+
+ case Option::RenderSeparateStyle:
+ Output.push_back(Args.MakeArgString(getSpelling()));
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+ Output.push_back(getValue(i));
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/Option/ArgList.cpp b/contrib/llvm/lib/Option/ArgList.cpp
new file mode 100644
index 000000000000..39b22d776ed9
--- /dev/null
+++ b/contrib/llvm/lib/Option/ArgList.cpp
@@ -0,0 +1,385 @@
+//===--- ArgList.cpp - Argument List Management ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/ArgList.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+void arg_iterator::SkipToNextArg() {
+ for (; Current != Args.end(); ++Current) {
+ // Done if there are no filters.
+ if (!Id0.isValid())
+ break;
+
+ // Otherwise require a match.
+ const Option &O = (*Current)->getOption();
+ if (O.matches(Id0) ||
+ (Id1.isValid() && O.matches(Id1)) ||
+ (Id2.isValid() && O.matches(Id2)))
+ break;
+ }
+}
+
+//
+
+ArgList::ArgList() {
+}
+
+ArgList::~ArgList() {
+}
+
+void ArgList::append(Arg *A) {
+ Args.push_back(A);
+}
+
+void ArgList::eraseArg(OptSpecifier Id) {
+ for (iterator it = begin(), ie = end(); it != ie; ) {
+ if ((*it)->getOption().matches(Id)) {
+ it = Args.erase(it);
+ ie = end();
+ } else {
+ ++it;
+ }
+ }
+}
+
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
+ // FIXME: Make search efficient?
+ for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+ if ((*it)->getOption().matches(Id))
+ return *it;
+ return 0;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1)) {
+ Res = *it;
+ Res->claim();
+
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) ||
+ (*it)->getOption().matches(Id3)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3,
+ OptSpecifier Id4) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) ||
+ (*it)->getOption().matches(Id3) ||
+ (*it)->getOption().matches(Id4)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3,
+ OptSpecifier Id4, OptSpecifier Id5) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) ||
+ (*it)->getOption().matches(Id3) ||
+ (*it)->getOption().matches(Id4) ||
+ (*it)->getOption().matches(Id5)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3,
+ OptSpecifier Id4, OptSpecifier Id5,
+ OptSpecifier Id6) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) ||
+ (*it)->getOption().matches(Id3) ||
+ (*it)->getOption().matches(Id4) ||
+ (*it)->getOption().matches(Id5) ||
+ (*it)->getOption().matches(Id6)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3,
+ OptSpecifier Id4, OptSpecifier Id5,
+ OptSpecifier Id6, OptSpecifier Id7) const {
+ Arg *Res = 0;
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) ||
+ (*it)->getOption().matches(Id3) ||
+ (*it)->getOption().matches(Id4) ||
+ (*it)->getOption().matches(Id5) ||
+ (*it)->getOption().matches(Id6) ||
+ (*it)->getOption().matches(Id7)) {
+ Res = *it;
+ Res->claim();
+ }
+ }
+
+ return Res;
+}
+
+bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const {
+ if (Arg *A = getLastArg(Pos, Neg))
+ return A->getOption().matches(Pos);
+ return Default;
+}
+
+StringRef ArgList::getLastArgValue(OptSpecifier Id,
+ StringRef Default) const {
+ if (Arg *A = getLastArg(Id))
+ return A->getValue();
+ return Default;
+}
+
+std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
+ SmallVector<const char *, 16> Values;
+ AddAllArgValues(Values, Id);
+ return std::vector<std::string>(Values.begin(), Values.end());
+}
+
+void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const {
+ if (Arg *A = getLastArg(Id)) {
+ A->claim();
+ A->render(*this, Output);
+ }
+}
+
+void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
+ OptSpecifier Id1, OptSpecifier Id2) const {
+ for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+ ie = filtered_end(); it != ie; ++it) {
+ (*it)->claim();
+ (*it)->render(*this, Output);
+ }
+}
+
+void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
+ OptSpecifier Id1, OptSpecifier Id2) const {
+ for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+ ie = filtered_end(); it != ie; ++it) {
+ (*it)->claim();
+ for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
+ Output.push_back((*it)->getValue(i));
+ }
+}
+
+void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
+ const char *Translation,
+ bool Joined) const {
+ for (arg_iterator it = filtered_begin(Id0),
+ ie = filtered_end(); it != ie; ++it) {
+ (*it)->claim();
+
+ if (Joined) {
+ Output.push_back(MakeArgString(StringRef(Translation) +
+ (*it)->getValue(0)));
+ } else {
+ Output.push_back(Translation);
+ Output.push_back((*it)->getValue(0));
+ }
+ }
+}
+
+void ArgList::ClaimAllArgs(OptSpecifier Id0) const {
+ for (arg_iterator it = filtered_begin(Id0),
+ ie = filtered_end(); it != ie; ++it)
+ (*it)->claim();
+}
+
+void ArgList::ClaimAllArgs() const {
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it)
+ if (!(*it)->isClaimed())
+ (*it)->claim();
+}
+
+const char *ArgList::MakeArgString(const Twine &T) const {
+ SmallString<256> Str;
+ T.toVector(Str);
+ return MakeArgString(Str.str());
+}
+
+const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
+ StringRef LHS,
+ StringRef RHS) const {
+ StringRef Cur = getArgString(Index);
+ if (Cur.size() == LHS.size() + RHS.size() &&
+ Cur.startswith(LHS) && Cur.endswith(RHS))
+ return Cur.data();
+
+ return MakeArgString(LHS + RHS);
+}
+
+//
+
+InputArgList::InputArgList(const char* const *ArgBegin,
+ const char* const *ArgEnd)
+ : NumInputArgStrings(ArgEnd - ArgBegin) {
+ ArgStrings.append(ArgBegin, ArgEnd);
+}
+
+InputArgList::~InputArgList() {
+ // An InputArgList always owns its arguments.
+ for (iterator it = begin(), ie = end(); it != ie; ++it)
+ delete *it;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0) const {
+ unsigned Index = ArgStrings.size();
+
+ // Tuck away so we have a reliable const char *.
+ SynthesizedStrings.push_back(String0);
+ ArgStrings.push_back(SynthesizedStrings.back().c_str());
+
+ return Index;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0,
+ StringRef String1) const {
+ unsigned Index0 = MakeIndex(String0);
+ unsigned Index1 = MakeIndex(String1);
+ assert(Index0 + 1 == Index1 && "Unexpected non-consecutive indices!");
+ (void) Index1;
+ return Index0;
+}
+
+const char *InputArgList::MakeArgString(StringRef Str) const {
+ return getArgString(MakeIndex(Str));
+}
+
+//
+
+DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
+ : BaseArgs(_BaseArgs) {
+}
+
+DerivedArgList::~DerivedArgList() {
+ // We only own the arguments we explicitly synthesized.
+ for (iterator it = SynthesizedArgs.begin(), ie = SynthesizedArgs.end();
+ it != ie; ++it)
+ delete *it;
+}
+
+const char *DerivedArgList::MakeArgString(StringRef Str) const {
+ return BaseArgs.MakeArgString(Str);
+}
+
+Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
+ Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+ Twine(Opt.getName())),
+ BaseArgs.MakeIndex(Opt.getName()), BaseArg);
+ SynthesizedArgs.push_back(A);
+ return A;
+}
+
+Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
+ StringRef Value) const {
+ unsigned Index = BaseArgs.MakeIndex(Value);
+ Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+ Twine(Opt.getName())),
+ Index, BaseArgs.getArgString(Index), BaseArg);
+ SynthesizedArgs.push_back(A);
+ return A;
+}
+
+Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
+ StringRef Value) const {
+ unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
+ Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+ Twine(Opt.getName())),
+ Index, BaseArgs.getArgString(Index + 1), BaseArg);
+ SynthesizedArgs.push_back(A);
+ return A;
+}
+
+Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
+ StringRef Value) const {
+ unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+ Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+ Twine(Opt.getName())), Index,
+ BaseArgs.getArgString(Index) + Opt.getName().size(),
+ BaseArg);
+ SynthesizedArgs.push_back(A);
+ return A;
+}
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
new file mode 100644
index 000000000000..5c8a0eacd1f4
--- /dev/null
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -0,0 +1,387 @@
+//===--- OptTable.cpp - Option Table Implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+// Ordering on Info. The ordering is *almost* lexicographic, with two
+// exceptions. First, '\0' comes at the end of the alphabet instead of
+// the beginning (thus options precede any other options which prefix
+// them). Second, for options with the same name, the less permissive
+// version should come first; a Flag option should precede a Joined
+// option, for example.
+
+static int StrCmpOptionName(const char *A, const char *B) {
+ char a = *A, b = *B;
+ while (a == b) {
+ if (a == '\0')
+ return 0;
+
+ a = *++A;
+ b = *++B;
+ }
+
+ if (a == '\0') // A is a prefix of B.
+ return 1;
+ if (b == '\0') // B is a prefix of A.
+ return -1;
+
+ // Otherwise lexicographic.
+ return (a < b) ? -1 : 1;
+}
+
+namespace llvm {
+namespace opt {
+
+static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
+ if (&A == &B)
+ return false;
+
+ if (int N = StrCmpOptionName(A.Name, B.Name))
+ return N == -1;
+
+ for (const char * const *APre = A.Prefixes,
+ * const *BPre = B.Prefixes;
+ *APre != 0 && *BPre != 0; ++APre, ++BPre) {
+ if (int N = StrCmpOptionName(*APre, *BPre))
+ return N == -1;
+ }
+
+ // Names are the same, check that classes are in order; exactly one
+ // should be joined, and it should succeed the other.
+ assert(((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) &&
+ "Unexpected classes for options with same name.");
+ return B.Kind == Option::JoinedClass;
+}
+
+// Support lower_bound between info and an option name.
+static inline bool operator<(const OptTable::Info &I, const char *Name) {
+ return StrCmpOptionName(I.Name, Name) == -1;
+}
+static inline bool operator<(const char *Name, const OptTable::Info &I) {
+ return StrCmpOptionName(Name, I.Name) == -1;
+}
+}
+}
+
+OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
+
+OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos)
+ : OptionInfos(_OptionInfos),
+ NumOptionInfos(_NumOptionInfos),
+ TheInputOptionID(0),
+ TheUnknownOptionID(0),
+ FirstSearchableIndex(0)
+{
+ // Explicitly zero initialize the error to work around a bug in array
+ // value-initialization on MinGW with gcc 4.3.5.
+
+ // Find start of normal options.
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ unsigned Kind = getInfo(i + 1).Kind;
+ if (Kind == Option::InputClass) {
+ assert(!TheInputOptionID && "Cannot have multiple input options!");
+ TheInputOptionID = getInfo(i + 1).ID;
+ } else if (Kind == Option::UnknownClass) {
+ assert(!TheUnknownOptionID && "Cannot have multiple unknown options!");
+ TheUnknownOptionID = getInfo(i + 1).ID;
+ } else if (Kind != Option::GroupClass) {
+ FirstSearchableIndex = i;
+ break;
+ }
+ }
+ assert(FirstSearchableIndex != 0 && "No searchable options?");
+
+#ifndef NDEBUG
+ // Check that everything after the first searchable option is a
+ // regular option class.
+ for (unsigned i = FirstSearchableIndex, e = getNumOptions(); i != e; ++i) {
+ Option::OptionClass Kind = (Option::OptionClass) getInfo(i + 1).Kind;
+ assert((Kind != Option::InputClass && Kind != Option::UnknownClass &&
+ Kind != Option::GroupClass) &&
+ "Special options should be defined first!");
+ }
+
+ // Check that options are in order.
+ for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions(); i != e; ++i){
+ if (!(getInfo(i) < getInfo(i + 1))) {
+ getOption(i).dump();
+ getOption(i + 1).dump();
+ llvm_unreachable("Options are not in order!");
+ }
+ }
+#endif
+
+ // Build prefixes.
+ for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions() + 1;
+ i != e; ++i) {
+ if (const char *const *P = getInfo(i).Prefixes) {
+ for (; *P != 0; ++P) {
+ PrefixesUnion.insert(*P);
+ }
+ }
+ }
+
+ // Build prefix chars.
+ for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(),
+ E = PrefixesUnion.end(); I != E; ++I) {
+ StringRef Prefix = I->getKey();
+ for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
+ C != CE; ++C)
+ if (std::find(PrefixChars.begin(), PrefixChars.end(), *C)
+ == PrefixChars.end())
+ PrefixChars.push_back(*C);
+ }
+}
+
+OptTable::~OptTable() {
+}
+
+const Option OptTable::getOption(OptSpecifier Opt) const {
+ unsigned id = Opt.getID();
+ if (id == 0)
+ return Option(0, 0);
+ assert((unsigned) (id - 1) < getNumOptions() && "Invalid ID.");
+ return Option(&getInfo(id), this);
+}
+
+bool OptTable::isOptionHelpHidden(OptSpecifier id) const {
+ return getInfo(id).Flags & HelpHidden;
+}
+
+static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
+ if (Arg == "-")
+ return true;
+ for (llvm::StringSet<>::const_iterator I = Prefixes.begin(),
+ E = Prefixes.end(); I != E; ++I)
+ if (Arg.startswith(I->getKey()))
+ return false;
+ return true;
+}
+
+/// \returns Matched size. 0 means no match.
+static unsigned matchOption(const OptTable::Info *I, StringRef Str) {
+ for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) {
+ StringRef Prefix(*Pre);
+ if (Str.startswith(Prefix) && Str.substr(Prefix.size()).startswith(I->Name))
+ return Prefix.size() + StringRef(I->Name).size();
+ }
+ return 0;
+}
+
+Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const {
+ unsigned Prev = Index;
+ const char *Str = Args.getArgString(Index);
+
+ // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+ // itself.
+ if (isInput(PrefixesUnion, Str))
+ return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+
+ const Info *Start = OptionInfos + FirstSearchableIndex;
+ const Info *End = OptionInfos + getNumOptions();
+ StringRef Name = StringRef(Str).ltrim(PrefixChars);
+
+ // Search for the first next option which could be a prefix.
+ Start = std::lower_bound(Start, End, Name.data());
+
+ // Options are stored in sorted order, with '\0' at the end of the
+ // alphabet. Since the only options which can accept a string must
+ // prefix it, we iteratively search for the next option which could
+ // be a prefix.
+ //
+ // FIXME: This is searching much more than necessary, but I am
+ // blanking on the simplest way to make it fast. We can solve this
+ // problem when we move to TableGen.
+ for (; Start != End; ++Start) {
+ unsigned ArgSize = 0;
+ // Scan for first option which is a proper prefix.
+ for (; Start != End; ++Start)
+ if ((ArgSize = matchOption(Start, Str)))
+ break;
+ if (Start == End)
+ break;
+
+ // See if this option matches.
+ if (Arg *A = Option(Start, this).accept(Args, Index, ArgSize))
+ return A;
+
+ // Otherwise, see if this argument was missing values.
+ if (Prev != Index)
+ return 0;
+ }
+
+ return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
+}
+
+InputArgList *OptTable::ParseArgs(const char* const *ArgBegin,
+ const char* const *ArgEnd,
+ unsigned &MissingArgIndex,
+ unsigned &MissingArgCount) const {
+ InputArgList *Args = new InputArgList(ArgBegin, ArgEnd);
+
+ // FIXME: Handle '@' args (or at least error on them).
+
+ MissingArgIndex = MissingArgCount = 0;
+ unsigned Index = 0, End = ArgEnd - ArgBegin;
+ while (Index < End) {
+ // Ignore empty arguments (other things may still take them as arguments).
+ if (Args->getArgString(Index)[0] == '\0') {
+ ++Index;
+ continue;
+ }
+
+ unsigned Prev = Index;
+ Arg *A = ParseOneArg(*Args, Index);
+ assert(Index > Prev && "Parser failed to consume argument.");
+
+ // Check for missing argument error.
+ if (!A) {
+ assert(Index >= End && "Unexpected parser error.");
+ assert(Index - Prev - 1 && "No missing arguments!");
+ MissingArgIndex = Prev;
+ MissingArgCount = Index - Prev - 1;
+ break;
+ }
+
+ Args->append(A);
+ }
+
+ return Args;
+}
+
+static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
+ const Option O = Opts.getOption(Id);
+ std::string Name = O.getPrefixedName();
+
+ // Add metavar, if used.
+ switch (O.getKind()) {
+ case Option::GroupClass: case Option::InputClass: case Option::UnknownClass:
+ llvm_unreachable("Invalid option with help text.");
+
+ case Option::MultiArgClass:
+ llvm_unreachable("Cannot print metavar for this kind of option.");
+
+ case Option::FlagClass:
+ break;
+
+ case Option::SeparateClass: case Option::JoinedOrSeparateClass:
+ Name += ' ';
+ // FALLTHROUGH
+ case Option::JoinedClass: case Option::CommaJoinedClass:
+ case Option::JoinedAndSeparateClass:
+ if (const char *MetaVarName = Opts.getOptionMetaVar(Id))
+ Name += MetaVarName;
+ else
+ Name += "<value>";
+ break;
+ }
+
+ return Name;
+}
+
+static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
+ std::vector<std::pair<std::string,
+ const char*> > &OptionHelp) {
+ OS << Title << ":\n";
+
+ // Find the maximum option length.
+ unsigned OptionFieldWidth = 0;
+ for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+ // Skip titles.
+ if (!OptionHelp[i].second)
+ continue;
+
+ // Limit the amount of padding we are willing to give up for alignment.
+ unsigned Length = OptionHelp[i].first.size();
+ if (Length <= 23)
+ OptionFieldWidth = std::max(OptionFieldWidth, Length);
+ }
+
+ const unsigned InitialPad = 2;
+ for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+ const std::string &Option = OptionHelp[i].first;
+ int Pad = OptionFieldWidth - int(Option.size());
+ OS.indent(InitialPad) << Option;
+
+ // Break on long option names.
+ if (Pad < 0) {
+ OS << "\n";
+ Pad = OptionFieldWidth + InitialPad;
+ }
+ OS.indent(Pad + 1) << OptionHelp[i].second << '\n';
+ }
+}
+
+static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
+ unsigned GroupID = Opts.getOptionGroupID(Id);
+
+ // If not in a group, return the default help group.
+ if (!GroupID)
+ return "OPTIONS";
+
+ // Abuse the help text of the option groups to store the "help group"
+ // name.
+ //
+ // FIXME: Split out option groups.
+ if (const char *GroupHelp = Opts.getOptionHelpText(GroupID))
+ return GroupHelp;
+
+ // Otherwise keep looking.
+ return getOptionHelpGroup(Opts, GroupID);
+}
+
+void OptTable::PrintHelp(raw_ostream &OS, const char *Name,
+ const char *Title, bool ShowHidden) const {
+ OS << "OVERVIEW: " << Title << "\n";
+ OS << '\n';
+ OS << "USAGE: " << Name << " [options] <inputs>\n";
+ OS << '\n';
+
+ // Render help text into a map of group-name to a list of (option, help)
+ // pairs.
+ typedef std::map<std::string,
+ std::vector<std::pair<std::string, const char*> > > helpmap_ty;
+ helpmap_ty GroupedOptionHelp;
+
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ unsigned Id = i + 1;
+
+ // FIXME: Split out option groups.
+ if (getOptionKind(Id) == Option::GroupClass)
+ continue;
+
+ if (!ShowHidden && isOptionHelpHidden(Id))
+ continue;
+
+ if (const char *Text = getOptionHelpText(Id)) {
+ const char *HelpGroup = getOptionHelpGroup(*this, Id);
+ const std::string &OptName = getOptionHelpName(*this, Id);
+ GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text));
+ }
+ }
+
+ for (helpmap_ty::iterator it = GroupedOptionHelp .begin(),
+ ie = GroupedOptionHelp.end(); it != ie; ++it) {
+ if (it != GroupedOptionHelp .begin())
+ OS << "\n";
+ PrintHelpOptionList(OS, it->first, it->second);
+ }
+
+ OS.flush();
+}
diff --git a/contrib/llvm/lib/Option/Option.cpp b/contrib/llvm/lib/Option/Option.cpp
new file mode 100644
index 000000000000..0e2263475e0c
--- /dev/null
+++ b/contrib/llvm/lib/Option/Option.cpp
@@ -0,0 +1,202 @@
+//===--- Option.cpp - Abstract Driver Options -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Option.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Option::Option(const OptTable::Info *info, const OptTable *owner)
+ : Info(info), Owner(owner) {
+
+ // Multi-level aliases are not supported, and alias options cannot
+ // have groups. This just simplifies option tracking, it is not an
+ // inherent limitation.
+ assert((!Info || !getAlias().isValid() || (!getAlias().getAlias().isValid() &&
+ !getGroup().isValid())) &&
+ "Multi-level aliases and aliases with groups are unsupported.");
+}
+
+Option::~Option() {
+}
+
+void Option::dump() const {
+ llvm::errs() << "<";
+ switch (getKind()) {
+#define P(N) case N: llvm::errs() << #N; break
+ P(GroupClass);
+ P(InputClass);
+ P(UnknownClass);
+ P(FlagClass);
+ P(JoinedClass);
+ P(SeparateClass);
+ P(CommaJoinedClass);
+ P(MultiArgClass);
+ P(JoinedOrSeparateClass);
+ P(JoinedAndSeparateClass);
+#undef P
+ }
+
+ llvm::errs() << " Prefixes:[";
+ for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) {
+ llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", ");
+ }
+ llvm::errs() << ']';
+
+ llvm::errs() << " Name:\"" << getName() << '"';
+
+ const Option Group = getGroup();
+ if (Group.isValid()) {
+ llvm::errs() << " Group:";
+ Group.dump();
+ }
+
+ const Option Alias = getAlias();
+ if (Alias.isValid()) {
+ llvm::errs() << " Alias:";
+ Alias.dump();
+ }
+
+ if (getKind() == MultiArgClass)
+ llvm::errs() << " NumArgs:" << getNumArgs();
+
+ llvm::errs() << ">\n";
+}
+
+bool Option::matches(OptSpecifier Opt) const {
+ // Aliases are never considered in matching, look through them.
+ const Option Alias = getAlias();
+ if (Alias.isValid())
+ return Alias.matches(Opt);
+
+ // Check exact match.
+ if (getID() == Opt.getID())
+ return true;
+
+ const Option Group = getGroup();
+ if (Group.isValid())
+ return Group.matches(Opt);
+ return false;
+}
+
+Arg *Option::accept(const ArgList &Args,
+ unsigned &Index,
+ unsigned ArgSize) const {
+ const Option &UnaliasedOption = getUnaliasedOption();
+ StringRef Spelling;
+ // If the option was an alias, get the spelling from the unaliased one.
+ if (getID() == UnaliasedOption.getID()) {
+ Spelling = StringRef(Args.getArgString(Index), ArgSize);
+ } else {
+ Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) +
+ Twine(UnaliasedOption.getName()));
+ }
+
+ switch (getKind()) {
+ case FlagClass:
+ if (ArgSize != strlen(Args.getArgString(Index)))
+ return 0;
+
+ return new Arg(UnaliasedOption, Spelling, Index++);
+ case JoinedClass: {
+ const char *Value = Args.getArgString(Index) + ArgSize;
+ return new Arg(UnaliasedOption, Spelling, Index++, Value);
+ }
+ case CommaJoinedClass: {
+ // Always matches.
+ const char *Str = Args.getArgString(Index) + ArgSize;
+ Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+
+ // Parse out the comma separated values.
+ const char *Prev = Str;
+ for (;; ++Str) {
+ char c = *Str;
+
+ if (!c || c == ',') {
+ if (Prev != Str) {
+ char *Value = new char[Str - Prev + 1];
+ memcpy(Value, Prev, Str - Prev);
+ Value[Str - Prev] = '\0';
+ A->getValues().push_back(Value);
+ }
+
+ if (!c)
+ break;
+
+ Prev = Str + 1;
+ }
+ }
+ A->setOwnsValues(true);
+
+ return A;
+ }
+ case SeparateClass:
+ // Matches iff this is an exact match.
+ // FIXME: Avoid strlen.
+ if (ArgSize != strlen(Args.getArgString(Index)))
+ return 0;
+
+ Index += 2;
+ if (Index > Args.getNumInputArgStrings())
+ return 0;
+
+ return new Arg(UnaliasedOption, Spelling,
+ Index - 2, Args.getArgString(Index - 1));
+ case MultiArgClass: {
+ // Matches iff this is an exact match.
+ // FIXME: Avoid strlen.
+ if (ArgSize != strlen(Args.getArgString(Index)))
+ return 0;
+
+ Index += 1 + getNumArgs();
+ if (Index > Args.getNumInputArgStrings())
+ return 0;
+
+ Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
+ Args.getArgString(Index - getNumArgs()));
+ for (unsigned i = 1; i != getNumArgs(); ++i)
+ A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
+ return A;
+ }
+ case JoinedOrSeparateClass: {
+ // If this is not an exact match, it is a joined arg.
+ // FIXME: Avoid strlen.
+ if (ArgSize != strlen(Args.getArgString(Index))) {
+ const char *Value = Args.getArgString(Index) + ArgSize;
+ return new Arg(*this, Spelling, Index++, Value);
+ }
+
+ // Otherwise it must be separate.
+ Index += 2;
+ if (Index > Args.getNumInputArgStrings())
+ return 0;
+
+ return new Arg(UnaliasedOption, Spelling,
+ Index - 2, Args.getArgString(Index - 1));
+ }
+ case JoinedAndSeparateClass:
+ // Always matches.
+ Index += 2;
+ if (Index > Args.getNumInputArgStrings())
+ return 0;
+
+ return new Arg(UnaliasedOption, Spelling, Index - 2,
+ Args.getArgString(Index - 2) + ArgSize,
+ Args.getArgString(Index - 1));
+ default:
+ llvm_unreachable("Invalid option kind!");
+ }
+}
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
new file mode 100644
index 000000000000..6182e3415005
--- /dev/null
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -0,0 +1,3592 @@
+//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstring>
+#include <limits.h>
+
+using namespace llvm;
+
+#define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
+
+/* Assumed in hexadecimal significand parsing, and conversion to
+ hexadecimal strings. */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+
+namespace llvm {
+
+ /* Represents floating point arithmetic semantics. */
+ struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ exponent_t maxExponent;
+
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ exponent_t minExponent;
+
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
+ };
+
+ const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
+ const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
+ const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
+ const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
+ const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
+ const fltSemantics APFloat::Bogus = { 0, 0, 0 };
+
+ /* The PowerPC format consists of two doubles. It does not map cleanly
+ onto the usual format above. It is approximated using twice the
+ mantissa bits. Note that for exponents near the double minimum,
+ we no longer can represent the full 106 mantissa bits, so those
+ will be treated as denormal numbers.
+
+ FIXME: While this approximation is equivalent to what GCC uses for
+ compile-time arithmetic on PPC double-double numbers, it is not able
+ to represent all possible values held by a PPC double-double number,
+ for example: (long double) 1.0 + (long double) 0x1p-106
+ Should this be replaced by a full emulation of PPC double-double? */
+ const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
+
+ /* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 815 / (351 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+ const unsigned int maxExponent = 16383;
+ const unsigned int maxPrecision = 113;
+ const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+ const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
+ / (351 * integerPartWidth));
+}
+
+/* A bunch of private, handy routines. */
+
+static inline unsigned int
+partCountForBits(unsigned int bits)
+{
+ return ((bits) + integerPartWidth - 1) / integerPartWidth;
+}
+
+/* Returns 0U-9U. Return values >= 10U are not digits. */
+static inline unsigned int
+decDigitValue(unsigned int c)
+{
+ return c - '0';
+}
+
+/* Return the value of a decimal exponent of the form
+ [+-]ddddddd.
+
+ If the exponent overflows, returns a large exponent with the
+ appropriate sign. */
+static int
+readExponent(StringRef::iterator begin, StringRef::iterator end)
+{
+ bool isNegative;
+ unsigned int absExponent;
+ const unsigned int overlargeExponent = 24000; /* FIXME. */
+ StringRef::iterator p = begin;
+
+ assert(p != end && "Exponent has no digits");
+
+ isNegative = (*p == '-');
+ if (*p == '-' || *p == '+') {
+ p++;
+ assert(p != end && "Exponent has no digits");
+ }
+
+ absExponent = decDigitValue(*p++);
+ assert(absExponent < 10U && "Invalid character in exponent");
+
+ for (; p != end; ++p) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ assert(value < 10U && "Invalid character in exponent");
+
+ value += absExponent * 10;
+ if (absExponent >= overlargeExponent) {
+ absExponent = overlargeExponent;
+ p = end; /* outwit assert below */
+ break;
+ }
+ absExponent = value;
+ }
+
+ assert(p == end && "Invalid exponent in exponent");
+
+ if (isNegative)
+ return -(int) absExponent;
+ else
+ return (int) absExponent;
+}
+
+/* This is ugly and needs cleaning up, but I don't immediately see
+ how whilst remaining safe. */
+static int
+totalExponent(StringRef::iterator p, StringRef::iterator end,
+ int exponentAdjustment)
+{
+ int unsignedExponent;
+ bool negative, overflow;
+ int exponent = 0;
+
+ assert(p != end && "Exponent has no digits");
+
+ negative = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ assert(p != end && "Exponent has no digits");
+ }
+
+ unsignedExponent = 0;
+ overflow = false;
+ for (; p != end; ++p) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ assert(value < 10U && "Invalid character in exponent");
+
+ unsignedExponent = unsignedExponent * 10 + value;
+ if (unsignedExponent > 32767) {
+ overflow = true;
+ break;
+ }
+ }
+
+ if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
+ overflow = true;
+
+ if (!overflow) {
+ exponent = unsignedExponent;
+ if (negative)
+ exponent = -exponent;
+ exponent += exponentAdjustment;
+ if (exponent > 32767 || exponent < -32768)
+ overflow = true;
+ }
+
+ if (overflow)
+ exponent = negative ? -32768: 32767;
+
+ return exponent;
+}
+
+static StringRef::iterator
+skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
+ StringRef::iterator *dot)
+{
+ StringRef::iterator p = begin;
+ *dot = end;
+ while (*p == '0' && p != end)
+ p++;
+
+ if (*p == '.') {
+ *dot = p++;
+
+ assert(end - begin != 1 && "Significand has no digits");
+
+ while (*p == '0' && p != end)
+ p++;
+ }
+
+ return p;
+}
+
+/* Given a normal decimal floating point number of the form
+
+ dddd.dddd[eE][+-]ddd
+
+ where the decimal point and exponent are optional, fill out the
+ structure D. Exponent is appropriate if the significand is
+ treated as an integer, and normalizedExponent if the significand
+ is taken to have the decimal point after a single leading
+ non-zero digit.
+
+ If the value is zero, V->firstSigDigit points to a non-digit, and
+ the return exponent is zero.
+*/
+struct decimalInfo {
+ const char *firstSigDigit;
+ const char *lastSigDigit;
+ int exponent;
+ int normalizedExponent;
+};
+
+static void
+interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
+ decimalInfo *D)
+{
+ StringRef::iterator dot = end;
+ StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
+
+ D->firstSigDigit = p;
+ D->exponent = 0;
+ D->normalizedExponent = 0;
+
+ for (; p != end; ++p) {
+ if (*p == '.') {
+ assert(dot == end && "String contains multiple dots");
+ dot = p++;
+ if (p == end)
+ break;
+ }
+ if (decDigitValue(*p) >= 10U)
+ break;
+ }
+
+ if (p != end) {
+ assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
+ assert(p != begin && "Significand has no digits");
+ assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+ /* p points to the first non-digit in the string */
+ D->exponent = readExponent(p + 1, end);
+
+ /* Implied decimal point? */
+ if (dot == end)
+ dot = p;
+ }
+
+ /* If number is all zeroes accept any exponent. */
+ if (p != D->firstSigDigit) {
+ /* Drop insignificant trailing zeroes. */
+ if (p != begin) {
+ do
+ do
+ p--;
+ while (p != begin && *p == '0');
+ while (p != begin && *p == '.');
+ }
+
+ /* Adjust the exponents for any decimal point. */
+ D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
+ D->normalizedExponent = (D->exponent +
+ static_cast<exponent_t>((p - D->firstSigDigit)
+ - (dot > D->firstSigDigit && dot < p)));
+ }
+
+ D->lastSigDigit = p;
+}
+
+/* Return the trailing fraction of a hexadecimal number.
+ DIGITVALUE is the first hex digit of the fraction, P points to
+ the next digit. */
+static lostFraction
+trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
+ unsigned int digitValue)
+{
+ unsigned int hexDigit;
+
+ /* If the first trailing digit isn't 0 or 8 we can work out the
+ fraction immediately. */
+ if (digitValue > 8)
+ return lfMoreThanHalf;
+ else if (digitValue < 8 && digitValue > 0)
+ return lfLessThanHalf;
+
+ /* Otherwise we need to find the first non-zero digit. */
+ while (*p == '0')
+ p++;
+
+ assert(p != end && "Invalid trailing hexadecimal fraction!");
+
+ hexDigit = hexDigitValue(*p);
+
+ /* If we ran off the end it is exactly zero or one-half, otherwise
+ a little more. */
+ if (hexDigit == -1U)
+ return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
+ else
+ return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
+}
+
+/* Return the fraction lost were a bignum truncated losing the least
+ significant BITS bits. */
+static lostFraction
+lostFractionThroughTruncation(const integerPart *parts,
+ unsigned int partCount,
+ unsigned int bits)
+{
+ unsigned int lsb;
+
+ lsb = APInt::tcLSB(parts, partCount);
+
+ /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
+ if (bits <= lsb)
+ return lfExactlyZero;
+ if (bits == lsb + 1)
+ return lfExactlyHalf;
+ if (bits <= partCount * integerPartWidth &&
+ APInt::tcExtractBit(parts, bits - 1))
+ return lfMoreThanHalf;
+
+ return lfLessThanHalf;
+}
+
+/* Shift DST right BITS bits noting lost fraction. */
+static lostFraction
+shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+{
+ lostFraction lost_fraction;
+
+ lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
+
+ APInt::tcShiftRight(dst, parts, bits);
+
+ return lost_fraction;
+}
+
+/* Combine the effect of two lost fractions. */
+static lostFraction
+combineLostFractions(lostFraction moreSignificant,
+ lostFraction lessSignificant)
+{
+ if (lessSignificant != lfExactlyZero) {
+ if (moreSignificant == lfExactlyZero)
+ moreSignificant = lfLessThanHalf;
+ else if (moreSignificant == lfExactlyHalf)
+ moreSignificant = lfMoreThanHalf;
+ }
+
+ return moreSignificant;
+}
+
+/* The error from the true value, in half-ulps, on multiplying two
+ floating point numbers, which differ from the value they
+ approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+ than the returned value.
+
+ See "How to Read Floating Point Numbers Accurately" by William D
+ Clinger. */
+static unsigned int
+HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+{
+ assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+ if (HUerr1 + HUerr2 == 0)
+ return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
+ else
+ return inexactMultiply + 2 * (HUerr1 + HUerr2);
+}
+
+/* The number of ulps from the boundary (zero, or half if ISNEAREST)
+ when the least significant BITS are truncated. BITS cannot be
+ zero. */
+static integerPart
+ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+{
+ unsigned int count, partBits;
+ integerPart part, boundary;
+
+ assert(bits != 0);
+
+ bits--;
+ count = bits / integerPartWidth;
+ partBits = bits % integerPartWidth + 1;
+
+ part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+ if (isNearest)
+ boundary = (integerPart) 1 << (partBits - 1);
+ else
+ boundary = 0;
+
+ if (count == 0) {
+ if (part - boundary <= boundary - part)
+ return part - boundary;
+ else
+ return boundary - part;
+ }
+
+ if (part == boundary) {
+ while (--count)
+ if (parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return parts[0];
+ } else if (part == boundary - 1) {
+ while (--count)
+ if (~parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return -parts[0];
+ }
+
+ return ~(integerPart) 0; /* A lot. */
+}
+
+/* Place pow(5, power) in DST, and return the number of parts used.
+ DST must be at least one part larger than size of the answer. */
+static unsigned int
+powerOf5(integerPart *dst, unsigned int power)
+{
+ static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+ 15625, 78125 };
+ integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+ pow5s[0] = 78125 * 5;
+
+ unsigned int partsCount[16] = { 1 };
+ integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+ unsigned int result;
+ assert(power <= maxExponent);
+
+ p1 = dst;
+ p2 = scratch;
+
+ *p1 = firstEightPowers[power & 7];
+ power >>= 3;
+
+ result = 1;
+ pow5 = pow5s;
+
+ for (unsigned int n = 0; power; power >>= 1, n++) {
+ unsigned int pc;
+
+ pc = partsCount[n];
+
+ /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
+ if (pc == 0) {
+ pc = partsCount[n - 1];
+ APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+ pc *= 2;
+ if (pow5[pc - 1] == 0)
+ pc--;
+ partsCount[n] = pc;
+ }
+
+ if (power & 1) {
+ integerPart *tmp;
+
+ APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+ result += pc;
+ if (p2[result - 1] == 0)
+ result--;
+
+ /* Now result is in p1 with partsCount parts and p2 is scratch
+ space. */
+ tmp = p1, p1 = p2, p2 = tmp;
+ }
+
+ pow5 += pc;
+ }
+
+ if (p1 != dst)
+ APInt::tcAssign(dst, p1, result);
+
+ return result;
+}
+
+/* Zero at the end to avoid modular arithmetic when adding one; used
+ when rounding up during hexadecimal output. */
+static const char hexDigitsLower[] = "0123456789abcdef0";
+static const char hexDigitsUpper[] = "0123456789ABCDEF0";
+static const char infinityL[] = "infinity";
+static const char infinityU[] = "INFINITY";
+static const char NaNL[] = "nan";
+static const char NaNU[] = "NAN";
+
+/* Write out an integerPart in hexadecimal, starting with the most
+ significant nibble. Write out exactly COUNT hexdigits, return
+ COUNT. */
+static unsigned int
+partAsHex (char *dst, integerPart part, unsigned int count,
+ const char *hexDigitChars)
+{
+ unsigned int result = count;
+
+ assert(count != 0 && count <= integerPartWidth / 4);
+
+ part >>= (integerPartWidth - 4 * count);
+ while (count--) {
+ dst[count] = hexDigitChars[part & 0xf];
+ part >>= 4;
+ }
+
+ return result;
+}
+
+/* Write out an unsigned decimal integer. */
+static char *
+writeUnsignedDecimal (char *dst, unsigned int n)
+{
+ char buff[40], *p;
+
+ p = buff;
+ do
+ *p++ = '0' + n % 10;
+ while (n /= 10);
+
+ do
+ *dst++ = *--p;
+ while (p != buff);
+
+ return dst;
+}
+
+/* Write out a signed decimal integer. */
+static char *
+writeSignedDecimal (char *dst, int value)
+{
+ if (value < 0) {
+ *dst++ = '-';
+ dst = writeUnsignedDecimal(dst, -(unsigned) value);
+ } else
+ dst = writeUnsignedDecimal(dst, value);
+
+ return dst;
+}
+
+/* Constructors. */
+void
+APFloat::initialize(const fltSemantics *ourSemantics)
+{
+ unsigned int count;
+
+ semantics = ourSemantics;
+ count = partCount();
+ if (count > 1)
+ significand.parts = new integerPart[count];
+}
+
+void
+APFloat::freeSignificand()
+{
+ if (partCount() > 1)
+ delete [] significand.parts;
+}
+
+void
+APFloat::assign(const APFloat &rhs)
+{
+ assert(semantics == rhs.semantics);
+
+ sign = rhs.sign;
+ category = rhs.category;
+ exponent = rhs.exponent;
+ if (category == fcNormal || category == fcNaN)
+ copySignificand(rhs);
+}
+
+void
+APFloat::copySignificand(const APFloat &rhs)
+{
+ assert(category == fcNormal || category == fcNaN);
+ assert(rhs.partCount() >= partCount());
+
+ APInt::tcAssign(significandParts(), rhs.significandParts(),
+ partCount());
+}
+
+/* Make this number a NaN, with an arbitrary but deterministic value
+ for the significand. If double or longer, this is a signalling NaN,
+ which may not be ideal. If float, this is QNaN(0). */
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
+{
+ category = fcNaN;
+ sign = Negative;
+
+ integerPart *significand = significandParts();
+ unsigned numParts = partCount();
+
+ // Set the significand bits to the fill.
+ if (!fill || fill->getNumWords() < numParts)
+ APInt::tcSet(significand, 0, numParts);
+ if (fill) {
+ APInt::tcAssign(significand, fill->getRawData(),
+ std::min(fill->getNumWords(), numParts));
+
+ // Zero out the excess bits of the significand.
+ unsigned bitsToPreserve = semantics->precision - 1;
+ unsigned part = bitsToPreserve / 64;
+ bitsToPreserve %= 64;
+ significand[part] &= ((1ULL << bitsToPreserve) - 1);
+ for (part++; part != numParts; ++part)
+ significand[part] = 0;
+ }
+
+ unsigned QNaNBit = semantics->precision - 2;
+
+ if (SNaN) {
+ // We always have to clear the QNaN bit to make it an SNaN.
+ APInt::tcClearBit(significand, QNaNBit);
+
+ // If there are no bits set in the payload, we have to set
+ // *something* to make it a NaN instead of an infinity;
+ // conventionally, this is the next bit down from the QNaN bit.
+ if (APInt::tcIsZero(significand, numParts))
+ APInt::tcSetBit(significand, QNaNBit - 1);
+ } else {
+ // We always have to set the QNaN bit to make it a QNaN.
+ APInt::tcSetBit(significand, QNaNBit);
+ }
+
+ // For x87 extended precision, we want to make a NaN, not a
+ // pseudo-NaN. Maybe we should expose the ability to make
+ // pseudo-NaNs?
+ if (semantics == &APFloat::x87DoubleExtended)
+ APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+ const APInt *fill) {
+ APFloat value(Sem, uninitialized);
+ value.makeNaN(SNaN, Negative, fill);
+ return value;
+}
+
+APFloat &
+APFloat::operator=(const APFloat &rhs)
+{
+ if (this != &rhs) {
+ if (semantics != rhs.semantics) {
+ freeSignificand();
+ initialize(rhs.semantics);
+ }
+ assign(rhs);
+ }
+
+ return *this;
+}
+
+bool
+APFloat::isDenormal() const {
+ return isNormal() && (exponent == semantics->minExponent) &&
+ (APInt::tcExtractBit(significandParts(),
+ semantics->precision - 1) == 0);
+}
+
+bool
+APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+ if (this == &rhs)
+ return true;
+ if (semantics != rhs.semantics ||
+ category != rhs.category ||
+ sign != rhs.sign)
+ return false;
+ if (category==fcZero || category==fcInfinity)
+ return true;
+ else if (category==fcNormal && exponent!=rhs.exponent)
+ return false;
+ else {
+ int i= partCount();
+ const integerPart* p=significandParts();
+ const integerPart* q=rhs.significandParts();
+ for (; i>0; i--, p++, q++) {
+ if (*p != *q)
+ return false;
+ }
+ return true;
+ }
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
+ initialize(&ourSemantics);
+ sign = 0;
+ zeroSignificand();
+ exponent = ourSemantics.precision - 1;
+ significandParts()[0] = value;
+ normalize(rmNearestTiesToEven, lfExactlyZero);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics) {
+ initialize(&ourSemantics);
+ category = fcZero;
+ sign = false;
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
+ // Allocates storage if necessary but does not initialize it.
+ initialize(&ourSemantics);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics,
+ fltCategory ourCategory, bool negative) {
+ initialize(&ourSemantics);
+ category = ourCategory;
+ sign = negative;
+ if (category == fcNormal)
+ category = fcZero;
+ else if (ourCategory == fcNaN)
+ makeNaN();
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
+ initialize(&ourSemantics);
+ convertFromString(text, rmNearestTiesToEven);
+}
+
+APFloat::APFloat(const APFloat &rhs) {
+ initialize(rhs.semantics);
+ assign(rhs);
+}
+
+APFloat::~APFloat()
+{
+ freeSignificand();
+}
+
+// Profile - This method 'profiles' an APFloat for use with FoldingSet.
+void APFloat::Profile(FoldingSetNodeID& ID) const {
+ ID.Add(bitcastToAPInt());
+}
+
+unsigned int
+APFloat::partCount() const
+{
+ return partCountForBits(semantics->precision + 1);
+}
+
+unsigned int
+APFloat::semanticsPrecision(const fltSemantics &semantics)
+{
+ return semantics.precision;
+}
+
+const integerPart *
+APFloat::significandParts() const
+{
+ return const_cast<APFloat *>(this)->significandParts();
+}
+
+integerPart *
+APFloat::significandParts()
+{
+ assert(category == fcNormal || category == fcNaN);
+
+ if (partCount() > 1)
+ return significand.parts;
+ else
+ return &significand.part;
+}
+
+void
+APFloat::zeroSignificand()
+{
+ category = fcNormal;
+ APInt::tcSet(significandParts(), 0, partCount());
+}
+
+/* Increment an fcNormal floating point number's significand. */
+void
+APFloat::incrementSignificand()
+{
+ integerPart carry;
+
+ carry = APInt::tcIncrement(significandParts(), partCount());
+
+ /* Our callers should never cause us to overflow. */
+ assert(carry == 0);
+ (void)carry;
+}
+
+/* Add the significand of the RHS. Returns the carry flag. */
+integerPart
+APFloat::addSignificand(const APFloat &rhs)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
+}
+
+/* Subtract the significand of the RHS with a borrow flag. Returns
+ the borrow flag. */
+integerPart
+APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
+ partCount());
+}
+
+/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
+ on to the full-precision result of the multiplication. Returns the
+ lost fraction. */
+lostFraction
+APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
+{
+ unsigned int omsb; // One, not zero, based MSB.
+ unsigned int partsCount, newPartsCount, precision;
+ integerPart *lhsSignificand;
+ integerPart scratch[4];
+ integerPart *fullSignificand;
+ lostFraction lost_fraction;
+ bool ignored;
+
+ assert(semantics == rhs.semantics);
+
+ precision = semantics->precision;
+ newPartsCount = partCountForBits(precision * 2);
+
+ if (newPartsCount > 4)
+ fullSignificand = new integerPart[newPartsCount];
+ else
+ fullSignificand = scratch;
+
+ lhsSignificand = significandParts();
+ partsCount = partCount();
+
+ APInt::tcFullMultiply(fullSignificand, lhsSignificand,
+ rhs.significandParts(), partsCount, partsCount);
+
+ lost_fraction = lfExactlyZero;
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ exponent += rhs.exponent;
+
+ if (addend) {
+ Significand savedSignificand = significand;
+ const fltSemantics *savedSemantics = semantics;
+ fltSemantics extendedSemantics;
+ opStatus status;
+ unsigned int extendedPrecision;
+
+ /* Normalize our MSB. */
+ extendedPrecision = precision + precision - 1;
+ if (omsb != extendedPrecision) {
+ APInt::tcShiftLeft(fullSignificand, newPartsCount,
+ extendedPrecision - omsb);
+ exponent -= extendedPrecision - omsb;
+ }
+
+ /* Create new semantics. */
+ extendedSemantics = *semantics;
+ extendedSemantics.precision = extendedPrecision;
+
+ if (newPartsCount == 1)
+ significand.part = fullSignificand[0];
+ else
+ significand.parts = fullSignificand;
+ semantics = &extendedSemantics;
+
+ APFloat extendedAddend(*addend);
+ status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
+ assert(status == opOK);
+ (void)status;
+ lost_fraction = addOrSubtractSignificand(extendedAddend, false);
+
+ /* Restore our state. */
+ if (newPartsCount == 1)
+ fullSignificand[0] = significand.part;
+ significand = savedSignificand;
+ semantics = savedSemantics;
+
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ }
+
+ exponent -= (precision - 1);
+
+ if (omsb > precision) {
+ unsigned int bits, significantParts;
+ lostFraction lf;
+
+ bits = omsb - precision;
+ significantParts = partCountForBits(omsb);
+ lf = shiftRight(fullSignificand, significantParts, bits);
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+ exponent += bits;
+ }
+
+ APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
+
+ if (newPartsCount > 4)
+ delete [] fullSignificand;
+
+ return lost_fraction;
+}
+
+/* Multiply the significands of LHS and RHS to DST. */
+lostFraction
+APFloat::divideSignificand(const APFloat &rhs)
+{
+ unsigned int bit, i, partsCount;
+ const integerPart *rhsSignificand;
+ integerPart *lhsSignificand, *dividend, *divisor;
+ integerPart scratch[4];
+ lostFraction lost_fraction;
+
+ assert(semantics == rhs.semantics);
+
+ lhsSignificand = significandParts();
+ rhsSignificand = rhs.significandParts();
+ partsCount = partCount();
+
+ if (partsCount > 2)
+ dividend = new integerPart[partsCount * 2];
+ else
+ dividend = scratch;
+
+ divisor = dividend + partsCount;
+
+ /* Copy the dividend and divisor as they will be modified in-place. */
+ for (i = 0; i < partsCount; i++) {
+ dividend[i] = lhsSignificand[i];
+ divisor[i] = rhsSignificand[i];
+ lhsSignificand[i] = 0;
+ }
+
+ exponent -= rhs.exponent;
+
+ unsigned int precision = semantics->precision;
+
+ /* Normalize the divisor. */
+ bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
+ if (bit) {
+ exponent += bit;
+ APInt::tcShiftLeft(divisor, partsCount, bit);
+ }
+
+ /* Normalize the dividend. */
+ bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
+ if (bit) {
+ exponent -= bit;
+ APInt::tcShiftLeft(dividend, partsCount, bit);
+ }
+
+ /* Ensure the dividend >= divisor initially for the loop below.
+ Incidentally, this means that the division loop below is
+ guaranteed to set the integer bit to one. */
+ if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+ exponent--;
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
+ }
+
+ /* Long division. */
+ for (bit = precision; bit; bit -= 1) {
+ if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+ APInt::tcSubtract(dividend, divisor, 0, partsCount);
+ APInt::tcSetBit(lhsSignificand, bit - 1);
+ }
+
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ }
+
+ /* Figure out the lost fraction. */
+ int cmp = APInt::tcCompare(dividend, divisor, partsCount);
+
+ if (cmp > 0)
+ lost_fraction = lfMoreThanHalf;
+ else if (cmp == 0)
+ lost_fraction = lfExactlyHalf;
+ else if (APInt::tcIsZero(dividend, partsCount))
+ lost_fraction = lfExactlyZero;
+ else
+ lost_fraction = lfLessThanHalf;
+
+ if (partsCount > 2)
+ delete [] dividend;
+
+ return lost_fraction;
+}
+
+unsigned int
+APFloat::significandMSB() const
+{
+ return APInt::tcMSB(significandParts(), partCount());
+}
+
+unsigned int
+APFloat::significandLSB() const
+{
+ return APInt::tcLSB(significandParts(), partCount());
+}
+
+/* Note that a zero result is NOT normalized to fcZero. */
+lostFraction
+APFloat::shiftSignificandRight(unsigned int bits)
+{
+ /* Our exponent should not overflow. */
+ assert((exponent_t) (exponent + bits) >= exponent);
+
+ exponent += bits;
+
+ return shiftRight(significandParts(), partCount(), bits);
+}
+
+/* Shift the significand left BITS bits, subtract BITS from its exponent. */
+void
+APFloat::shiftSignificandLeft(unsigned int bits)
+{
+ assert(bits < semantics->precision);
+
+ if (bits) {
+ unsigned int partsCount = partCount();
+
+ APInt::tcShiftLeft(significandParts(), partsCount, bits);
+ exponent -= bits;
+
+ assert(!APInt::tcIsZero(significandParts(), partsCount));
+ }
+}
+
+APFloat::cmpResult
+APFloat::compareAbsoluteValue(const APFloat &rhs) const
+{
+ int compare;
+
+ assert(semantics == rhs.semantics);
+ assert(category == fcNormal);
+ assert(rhs.category == fcNormal);
+
+ compare = exponent - rhs.exponent;
+
+ /* If exponents are equal, do an unsigned bignum comparison of the
+ significands. */
+ if (compare == 0)
+ compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
+ partCount());
+
+ if (compare > 0)
+ return cmpGreaterThan;
+ else if (compare < 0)
+ return cmpLessThan;
+ else
+ return cmpEqual;
+}
+
+/* Handle overflow. Sign is preserved. We either become infinity or
+ the largest finite number. */
+APFloat::opStatus
+APFloat::handleOverflow(roundingMode rounding_mode)
+{
+ /* Infinity? */
+ if (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway ||
+ (rounding_mode == rmTowardPositive && !sign) ||
+ (rounding_mode == rmTowardNegative && sign)) {
+ category = fcInfinity;
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ /* Otherwise we become the largest finite number. */
+ category = fcNormal;
+ exponent = semantics->maxExponent;
+ APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
+ semantics->precision);
+
+ return opInexact;
+}
+
+/* Returns TRUE if, when truncating the current number, with BIT the
+ new LSB, with the given lost fraction and rounding mode, the result
+ would need to be rounded away from zero (i.e., by increasing the
+ signficand). This routine must work for fcZero of both signs, and
+ fcNormal numbers. */
+bool
+APFloat::roundAwayFromZero(roundingMode rounding_mode,
+ lostFraction lost_fraction,
+ unsigned int bit) const
+{
+ /* NaNs and infinities should not have lost fractions. */
+ assert(category == fcNormal || category == fcZero);
+
+ /* Current callers never pass this so we don't handle it. */
+ assert(lost_fraction != lfExactlyZero);
+
+ switch (rounding_mode) {
+ case rmNearestTiesToAway:
+ return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+ case rmNearestTiesToEven:
+ if (lost_fraction == lfMoreThanHalf)
+ return true;
+
+ /* Our zeroes don't have a significand to test. */
+ if (lost_fraction == lfExactlyHalf && category != fcZero)
+ return APInt::tcExtractBit(significandParts(), bit);
+
+ return false;
+
+ case rmTowardZero:
+ return false;
+
+ case rmTowardPositive:
+ return sign == false;
+
+ case rmTowardNegative:
+ return sign == true;
+ }
+ llvm_unreachable("Invalid rounding mode found");
+}
+
+APFloat::opStatus
+APFloat::normalize(roundingMode rounding_mode,
+ lostFraction lost_fraction)
+{
+ unsigned int omsb; /* One, not zero, based MSB. */
+ int exponentChange;
+
+ if (category != fcNormal)
+ return opOK;
+
+ /* Before rounding normalize the exponent of fcNormal numbers. */
+ omsb = significandMSB() + 1;
+
+ if (omsb) {
+ /* OMSB is numbered from 1. We want to place it in the integer
+ bit numbered PRECISION if possible, with a compensating change in
+ the exponent. */
+ exponentChange = omsb - semantics->precision;
+
+ /* If the resulting exponent is too high, overflow according to
+ the rounding mode. */
+ if (exponent + exponentChange > semantics->maxExponent)
+ return handleOverflow(rounding_mode);
+
+ /* Subnormal numbers have exponent minExponent, and their MSB
+ is forced based on that. */
+ if (exponent + exponentChange < semantics->minExponent)
+ exponentChange = semantics->minExponent - exponent;
+
+ /* Shifting left is easy as we don't lose precision. */
+ if (exponentChange < 0) {
+ assert(lost_fraction == lfExactlyZero);
+
+ shiftSignificandLeft(-exponentChange);
+
+ return opOK;
+ }
+
+ if (exponentChange > 0) {
+ lostFraction lf;
+
+ /* Shift right and capture any new lost fraction. */
+ lf = shiftSignificandRight(exponentChange);
+
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+
+ /* Keep OMSB up-to-date. */
+ if (omsb > (unsigned) exponentChange)
+ omsb -= exponentChange;
+ else
+ omsb = 0;
+ }
+ }
+
+ /* Now round the number according to rounding_mode given the lost
+ fraction. */
+
+ /* As specified in IEEE 754, since we do not trap we do not report
+ underflow for exact results. */
+ if (lost_fraction == lfExactlyZero) {
+ /* Canonicalize zeroes. */
+ if (omsb == 0)
+ category = fcZero;
+
+ return opOK;
+ }
+
+ /* Increment the significand if we're rounding away from zero. */
+ if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+ if (omsb == 0)
+ exponent = semantics->minExponent;
+
+ incrementSignificand();
+ omsb = significandMSB() + 1;
+
+ /* Did the significand increment overflow? */
+ if (omsb == (unsigned) semantics->precision + 1) {
+ /* Renormalize by incrementing the exponent and shifting our
+ significand right one. However if we already have the
+ maximum exponent we overflow to infinity. */
+ if (exponent == semantics->maxExponent) {
+ category = fcInfinity;
+
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ shiftSignificandRight(1);
+
+ return opInexact;
+ }
+ }
+
+ /* The normal case - we were and are not denormal, and any
+ significand increment above didn't overflow. */
+ if (omsb == semantics->precision)
+ return opInexact;
+
+ /* We have a non-zero denormal. */
+ assert(omsb < semantics->precision);
+
+ /* Canonicalize zeroes. */
+ if (omsb == 0)
+ category = fcZero;
+
+ /* The fcZero case is a denormal that underflowed to zero. */
+ return (opStatus) (opUnderflow | opInexact);
+}
+
+APFloat::opStatus
+APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ category = fcInfinity;
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ assign(rhs);
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcZero):
+ /* Sign depends on rounding mode; handled by caller. */
+ return opOK;
+
+ case convolve(fcInfinity, fcInfinity):
+ /* Differently signed infinities can only be validly
+ subtracted. */
+ if (((sign ^ rhs.sign)!=0) != subtract) {
+ makeNaN();
+ return opInvalidOp;
+ }
+
+ return opOK;
+
+ case convolve(fcNormal, fcNormal):
+ return opDivByZero;
+ }
+}
+
+/* Add or subtract two normal numbers. */
+lostFraction
+APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
+{
+ integerPart carry;
+ lostFraction lost_fraction;
+ int bits;
+
+ /* Determine if the operation on the absolute values is effectively
+ an addition or subtraction. */
+ subtract ^= (sign ^ rhs.sign) ? true : false;
+
+ /* Are we bigger exponent-wise than the RHS? */
+ bits = exponent - rhs.exponent;
+
+ /* Subtraction is more subtle than one might naively expect. */
+ if (subtract) {
+ APFloat temp_rhs(rhs);
+ bool reverse;
+
+ if (bits == 0) {
+ reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+ lost_fraction = lfExactlyZero;
+ } else if (bits > 0) {
+ lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+ shiftSignificandLeft(1);
+ reverse = false;
+ } else {
+ lost_fraction = shiftSignificandRight(-bits - 1);
+ temp_rhs.shiftSignificandLeft(1);
+ reverse = true;
+ }
+
+ if (reverse) {
+ carry = temp_rhs.subtractSignificand
+ (*this, lost_fraction != lfExactlyZero);
+ copySignificand(temp_rhs);
+ sign = !sign;
+ } else {
+ carry = subtractSignificand
+ (temp_rhs, lost_fraction != lfExactlyZero);
+ }
+
+ /* Invert the lost fraction - it was on the RHS and
+ subtracted. */
+ if (lost_fraction == lfLessThanHalf)
+ lost_fraction = lfMoreThanHalf;
+ else if (lost_fraction == lfMoreThanHalf)
+ lost_fraction = lfLessThanHalf;
+
+ /* The code above is intended to ensure that no borrow is
+ necessary. */
+ assert(!carry);
+ (void)carry;
+ } else {
+ if (bits > 0) {
+ APFloat temp_rhs(rhs);
+
+ lost_fraction = temp_rhs.shiftSignificandRight(bits);
+ carry = addSignificand(temp_rhs);
+ } else {
+ lost_fraction = shiftSignificandRight(-bits);
+ carry = addSignificand(rhs);
+ }
+
+ /* We have a guard bit; generating a carry cannot happen. */
+ assert(!carry);
+ (void)carry;
+ }
+
+ return lost_fraction;
+}
+
+APFloat::opStatus
+APFloat::multiplySpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ category = fcInfinity;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcZero, fcZero):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcInfinity, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::divideSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ category = fcInfinity;
+ return opDivByZero;
+
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::modSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcInfinity):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+/* Change sign. */
+void
+APFloat::changeSign()
+{
+ /* Look mummy, this one's easy. */
+ sign = !sign;
+}
+
+void
+APFloat::clearSign()
+{
+ /* So is this one. */
+ sign = 0;
+}
+
+void
+APFloat::copySign(const APFloat &rhs)
+{
+ /* And this one. */
+ sign = rhs.sign;
+}
+
+/* Normalized addition or subtraction. */
+APFloat::opStatus
+APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
+ bool subtract)
+{
+ opStatus fs;
+
+ fs = addOrSubtractSpecials(rhs, subtract);
+
+ /* This return code means it was not a simple case. */
+ if (fs == opDivByZero) {
+ lostFraction lost_fraction;
+
+ lost_fraction = addOrSubtractSignificand(rhs, subtract);
+ fs = normalize(rounding_mode, lost_fraction);
+
+ /* Can only be zero if we lost no fraction. */
+ assert(category != fcZero || lost_fraction == lfExactlyZero);
+ }
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if (category == fcZero) {
+ if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
+ sign = (rounding_mode == rmTowardNegative);
+ }
+
+ return fs;
+}
+
+/* Normalized addition. */
+APFloat::opStatus
+APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, false);
+}
+
+/* Normalized subtraction. */
+APFloat::opStatus
+APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, true);
+}
+
+/* Normalized multiply. */
+APFloat::opStatus
+APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ sign ^= rhs.sign;
+ fs = multiplySpecials(rhs);
+
+ if (category == fcNormal) {
+ lostFraction lost_fraction = multiplySignificand(rhs, 0);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized divide. */
+APFloat::opStatus
+APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ sign ^= rhs.sign;
+ fs = divideSpecials(rhs);
+
+ if (category == fcNormal) {
+ lostFraction lost_fraction = divideSignificand(rhs);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized remainder. This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::remainder(const APFloat &rhs)
+{
+ opStatus fs;
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ return fs;
+}
+
+/* Normalized llvm frem (C fmod).
+ This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+ fs = modSpecials(rhs);
+
+ if (category == fcNormal && rhs.category == fcNormal) {
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmTowardZero, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ }
+ return fs;
+}
+
+/* Normalized fused-multiply-add. */
+APFloat::opStatus
+APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
+ const APFloat &addend,
+ roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ /* Post-multiplication sign, before addition. */
+ sign ^= multiplicand.sign;
+
+ /* If and only if all arguments are normal do we need to do an
+ extended-precision calculation. */
+ if (category == fcNormal &&
+ multiplicand.category == fcNormal &&
+ addend.category == fcNormal) {
+ lostFraction lost_fraction;
+
+ lost_fraction = multiplySignificand(multiplicand, &addend);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if (category == fcZero && sign != addend.sign)
+ sign = (rounding_mode == rmTowardNegative);
+ } else {
+ fs = multiplySpecials(multiplicand);
+
+ /* FS can only be opOK or opInvalidOp. There is no more work
+ to do in the latter case. The IEEE-754R standard says it is
+ implementation-defined in this case whether, if ADDEND is a
+ quiet NaN, we raise invalid op; this implementation does so.
+
+ If we need to do the addition we can do so with normal
+ precision. */
+ if (fs == opOK)
+ fs = addOrSubtract(addend, rounding_mode, false);
+ }
+
+ return fs;
+}
+
+/* Rounding-mode corrrect round to integral value. */
+APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
+ opStatus fs;
+
+ // If the exponent is large enough, we know that this value is already
+ // integral, and the arithmetic below would potentially cause it to saturate
+ // to +/-Inf. Bail out early instead.
+ if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
+ return opOK;
+
+ // The algorithm here is quite simple: we add 2^(p-1), where p is the
+ // precision of our format, and then subtract it back off again. The choice
+ // of rounding modes for the addition/subtraction determines the rounding mode
+ // for our integral rounding as well.
+ // NOTE: When the input value is negative, we do subtraction followed by
+ // addition instead.
+ APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
+ IntegerConstant <<= semanticsPrecision(*semantics)-1;
+ APFloat MagicConstant(*semantics);
+ fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
+ rmNearestTiesToEven);
+ MagicConstant.copySign(*this);
+
+ if (fs != opOK)
+ return fs;
+
+ // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+ bool inputSign = isNegative();
+
+ fs = add(MagicConstant, rounding_mode);
+ if (fs != opOK && fs != opInexact)
+ return fs;
+
+ fs = subtract(MagicConstant, rounding_mode);
+
+ // Restore the input sign.
+ if (inputSign != isNegative())
+ changeSign();
+
+ return fs;
+}
+
+
+/* Comparison requires normalized numbers. */
+APFloat::cmpResult
+APFloat::compare(const APFloat &rhs) const
+{
+ cmpResult result;
+
+ assert(semantics == rhs.semantics);
+
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ return cmpUnordered;
+
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcNormal, fcZero):
+ if (sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ if (rhs.sign)
+ return cmpGreaterThan;
+ else
+ return cmpLessThan;
+
+ case convolve(fcInfinity, fcInfinity):
+ if (sign == rhs.sign)
+ return cmpEqual;
+ else if (sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcZero, fcZero):
+ return cmpEqual;
+
+ case convolve(fcNormal, fcNormal):
+ break;
+ }
+
+ /* Two normal numbers. Do they have the same sign? */
+ if (sign != rhs.sign) {
+ if (sign)
+ result = cmpLessThan;
+ else
+ result = cmpGreaterThan;
+ } else {
+ /* Compare absolute values; invert result if negative. */
+ result = compareAbsoluteValue(rhs);
+
+ if (sign) {
+ if (result == cmpLessThan)
+ result = cmpGreaterThan;
+ else if (result == cmpGreaterThan)
+ result = cmpLessThan;
+ }
+ }
+
+ return result;
+}
+
+/// APFloat::convert - convert a value of one floating point type to another.
+/// The return value corresponds to the IEEE754 exceptions. *losesInfo
+/// records whether the transformation lost information, i.e. whether
+/// converting the result back to the original type will produce the
+/// original value (this is almost the same as return value==fsOK, but there
+/// are edge cases where this is not so).
+
+APFloat::opStatus
+APFloat::convert(const fltSemantics &toSemantics,
+ roundingMode rounding_mode, bool *losesInfo)
+{
+ lostFraction lostFraction;
+ unsigned int newPartCount, oldPartCount;
+ opStatus fs;
+ int shift;
+ const fltSemantics &fromSemantics = *semantics;
+
+ lostFraction = lfExactlyZero;
+ newPartCount = partCountForBits(toSemantics.precision + 1);
+ oldPartCount = partCount();
+ shift = toSemantics.precision - fromSemantics.precision;
+
+ bool X86SpecialNan = false;
+ if (&fromSemantics == &APFloat::x87DoubleExtended &&
+ &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+ (!(*significandParts() & 0x8000000000000000ULL) ||
+ !(*significandParts() & 0x4000000000000000ULL))) {
+ // x86 has some unusual NaNs which cannot be represented in any other
+ // format; note them here.
+ X86SpecialNan = true;
+ }
+
+ // If this is a truncation, perform the shift before we narrow the storage.
+ if (shift < 0 && (category==fcNormal || category==fcNaN))
+ lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
+
+ // Fix the storage so it can hold to new value.
+ if (newPartCount > oldPartCount) {
+ // The new type requires more storage; make it available.
+ integerPart *newParts;
+ newParts = new integerPart[newPartCount];
+ APInt::tcSet(newParts, 0, newPartCount);
+ if (category==fcNormal || category==fcNaN)
+ APInt::tcAssign(newParts, significandParts(), oldPartCount);
+ freeSignificand();
+ significand.parts = newParts;
+ } else if (newPartCount == 1 && oldPartCount != 1) {
+ // Switch to built-in storage for a single part.
+ integerPart newPart = 0;
+ if (category==fcNormal || category==fcNaN)
+ newPart = significandParts()[0];
+ freeSignificand();
+ significand.part = newPart;
+ }
+
+ // Now that we have the right storage, switch the semantics.
+ semantics = &toSemantics;
+
+ // If this is an extension, perform the shift now that the storage is
+ // available.
+ if (shift > 0 && (category==fcNormal || category==fcNaN))
+ APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+
+ if (category == fcNormal) {
+ fs = normalize(rounding_mode, lostFraction);
+ *losesInfo = (fs != opOK);
+ } else if (category == fcNaN) {
+ *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
+
+ // For x87 extended precision, we want to make a NaN, not a special NaN if
+ // the input wasn't special either.
+ if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
+ APInt::tcSetBit(significandParts(), semantics->precision - 1);
+
+ // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
+ // does not give you back the same bits. This is dubious, and we
+ // don't currently do it. You're really supposed to get
+ // an invalid operation signal at runtime, but nobody does that.
+ fs = opOK;
+ } else {
+ *losesInfo = false;
+ fs = opOK;
+ }
+
+ return fs;
+}
+
+/* Convert a floating point number to an integer according to the
+ rounding mode. If the rounded integer value is out of range this
+ returns an invalid operation exception and the contents of the
+ destination parts are unspecified. If the rounded value is in
+ range but the floating point number is not the exact integer, the C
+ standard doesn't require an inexact exception to be raised. IEEE
+ 854 does require it so we do that.
+
+ Note that for conversions to integer type the C standard requires
+ round-to-zero to always be used. */
+APFloat::opStatus
+APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode,
+ bool *isExact) const
+{
+ lostFraction lost_fraction;
+ const integerPart *src;
+ unsigned int dstPartsCount, truncatedBits;
+
+ *isExact = false;
+
+ /* Handle the three special cases first. */
+ if (category == fcInfinity || category == fcNaN)
+ return opInvalidOp;
+
+ dstPartsCount = partCountForBits(width);
+
+ if (category == fcZero) {
+ APInt::tcSet(parts, 0, dstPartsCount);
+ // Negative zero can't be represented as an int.
+ *isExact = !sign;
+ return opOK;
+ }
+
+ src = significandParts();
+
+ /* Step 1: place our absolute value, with any fraction truncated, in
+ the destination. */
+ if (exponent < 0) {
+ /* Our absolute value is less than one; truncate everything. */
+ APInt::tcSet(parts, 0, dstPartsCount);
+ /* For exponent -1 the integer bit represents .5, look at that.
+ For smaller exponents leftmost truncated bit is 0. */
+ truncatedBits = semantics->precision -1U - exponent;
+ } else {
+ /* We want the most significant (exponent + 1) bits; the rest are
+ truncated. */
+ unsigned int bits = exponent + 1U;
+
+ /* Hopelessly large in magnitude? */
+ if (bits > width)
+ return opInvalidOp;
+
+ if (bits < semantics->precision) {
+ /* We truncate (semantics->precision - bits) bits. */
+ truncatedBits = semantics->precision - bits;
+ APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+ } else {
+ /* We want at least as many bits as are available. */
+ APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
+ APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+ truncatedBits = 0;
+ }
+ }
+
+ /* Step 2: work out any lost fraction, and increment the absolute
+ value if we would round away from zero. */
+ if (truncatedBits) {
+ lost_fraction = lostFractionThroughTruncation(src, partCount(),
+ truncatedBits);
+ if (lost_fraction != lfExactlyZero &&
+ roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+ if (APInt::tcIncrement(parts, dstPartsCount))
+ return opInvalidOp; /* Overflow. */
+ }
+ } else {
+ lost_fraction = lfExactlyZero;
+ }
+
+ /* Step 3: check if we fit in the destination. */
+ unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+
+ if (sign) {
+ if (!isSigned) {
+ /* Negative numbers cannot be represented as unsigned. */
+ if (omsb != 0)
+ return opInvalidOp;
+ } else {
+ /* It takes omsb bits to represent the unsigned integer value.
+ We lose a bit for the sign, but care is needed as the
+ maximally negative integer is a special case. */
+ if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+ return opInvalidOp;
+
+ /* This case can happen because of rounding. */
+ if (omsb > width)
+ return opInvalidOp;
+ }
+
+ APInt::tcNegate (parts, dstPartsCount);
+ } else {
+ if (omsb >= width + !isSigned)
+ return opInvalidOp;
+ }
+
+ if (lost_fraction == lfExactlyZero) {
+ *isExact = true;
+ return opOK;
+ } else
+ return opInexact;
+}
+
+/* Same as convertToSignExtendedInteger, except we provide
+ deterministic values in case of an invalid operation exception,
+ namely zero for NaNs and the minimal or maximal value respectively
+ for underflow or overflow.
+ The *isExact output tells whether the result is exact, in the sense
+ that converting it back to the original floating point type produces
+ the original value. This is almost equivalent to result==opOK,
+ except for negative zeroes.
+*/
+APFloat::opStatus
+APFloat::convertToInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode, bool *isExact) const
+{
+ opStatus fs;
+
+ fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+ isExact);
+
+ if (fs == opInvalidOp) {
+ unsigned int bits, dstPartsCount;
+
+ dstPartsCount = partCountForBits(width);
+
+ if (category == fcNaN)
+ bits = 0;
+ else if (sign)
+ bits = isSigned;
+ else
+ bits = width - isSigned;
+
+ APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+ if (sign && isSigned)
+ APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+ }
+
+ return fs;
+}
+
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+ an APSInt, whose initial bit-width and signed-ness are used to determine the
+ precision of the conversion.
+ */
+APFloat::opStatus
+APFloat::convertToInteger(APSInt &result,
+ roundingMode rounding_mode, bool *isExact) const
+{
+ unsigned bitWidth = result.getBitWidth();
+ SmallVector<uint64_t, 4> parts(result.getNumWords());
+ opStatus status = convertToInteger(
+ parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
+ // Keeps the original signed-ness.
+ result = APInt(bitWidth, parts);
+ return status;
+}
+
+/* Convert an unsigned integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. The sign of the floating
+ point number is not modified. */
+APFloat::opStatus
+APFloat::convertFromUnsignedParts(const integerPart *src,
+ unsigned int srcCount,
+ roundingMode rounding_mode)
+{
+ unsigned int omsb, precision, dstCount;
+ integerPart *dst;
+ lostFraction lost_fraction;
+
+ category = fcNormal;
+ omsb = APInt::tcMSB(src, srcCount) + 1;
+ dst = significandParts();
+ dstCount = partCount();
+ precision = semantics->precision;
+
+ /* We want the most significant PRECISION bits of SRC. There may not
+ be that many; extract what we can. */
+ if (precision <= omsb) {
+ exponent = omsb - 1;
+ lost_fraction = lostFractionThroughTruncation(src, srcCount,
+ omsb - precision);
+ APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
+ } else {
+ exponent = precision - 1;
+ lost_fraction = lfExactlyZero;
+ APInt::tcExtract(dst, dstCount, src, omsb, 0);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::convertFromAPInt(const APInt &Val,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = Val.getNumWords();
+ APInt api = Val;
+
+ sign = false;
+ if (isSigned && api.isNegative()) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+/* Convert a two's complement integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. ISSIGNED is true if the
+ integer is signed, in which case it must be sign-extended. */
+APFloat::opStatus
+APFloat::convertFromSignExtendedInteger(const integerPart *src,
+ unsigned int srcCount,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ opStatus status;
+
+ if (isSigned &&
+ APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+ integerPart *copy;
+
+ /* If we're signed and negative negate a copy. */
+ sign = true;
+ copy = new integerPart[srcCount];
+ APInt::tcAssign(copy, src, srcCount);
+ APInt::tcNegate(copy, srcCount);
+ status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
+ delete [] copy;
+ } else {
+ sign = false;
+ status = convertFromUnsignedParts(src, srcCount, rounding_mode);
+ }
+
+ return status;
+}
+
+/* FIXME: should this just take a const APInt reference? */
+APFloat::opStatus
+APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+ unsigned int width, bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = partCountForBits(width);
+ APInt api = APInt(width, makeArrayRef(parts, partCount));
+
+ sign = false;
+ if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+APFloat::opStatus
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
+{
+ lostFraction lost_fraction = lfExactlyZero;
+ integerPart *significand;
+ unsigned int bitPos, partsCount;
+ StringRef::iterator dot, firstSignificantDigit;
+
+ zeroSignificand();
+ exponent = 0;
+ category = fcNormal;
+
+ significand = significandParts();
+ partsCount = partCount();
+ bitPos = partsCount * integerPartWidth;
+
+ /* Skip leading zeroes and any (hexa)decimal point. */
+ StringRef::iterator begin = s.begin();
+ StringRef::iterator end = s.end();
+ StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ firstSignificantDigit = p;
+
+ for (; p != end;) {
+ integerPart hex_value;
+
+ if (*p == '.') {
+ assert(dot == end && "String contains multiple dots");
+ dot = p++;
+ if (p == end) {
+ break;
+ }
+ }
+
+ hex_value = hexDigitValue(*p);
+ if (hex_value == -1U) {
+ break;
+ }
+
+ p++;
+
+ if (p == end) {
+ break;
+ } else {
+ /* Store the number whilst 4-bit nibbles remain. */
+ if (bitPos) {
+ bitPos -= 4;
+ hex_value <<= bitPos % integerPartWidth;
+ significand[bitPos / integerPartWidth] |= hex_value;
+ } else {
+ lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+ while (p != end && hexDigitValue(*p) != -1U)
+ p++;
+ break;
+ }
+ }
+ }
+
+ /* Hex floats require an exponent but not a hexadecimal point. */
+ assert(p != end && "Hex strings require an exponent");
+ assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
+ assert(p != begin && "Significand has no digits");
+ assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+ /* Ignore the exponent if we are zero. */
+ if (p != firstSignificantDigit) {
+ int expAdjustment;
+
+ /* Implicit hexadecimal point? */
+ if (dot == end)
+ dot = p;
+
+ /* Calculate the exponent adjustment implicit in the number of
+ significant digits. */
+ expAdjustment = static_cast<int>(dot - firstSignificantDigit);
+ if (expAdjustment < 0)
+ expAdjustment++;
+ expAdjustment = expAdjustment * 4 - 1;
+
+ /* Adjust for writing the significand starting at the most
+ significant nibble. */
+ expAdjustment += semantics->precision;
+ expAdjustment -= partsCount * integerPartWidth;
+
+ /* Adjust for the given exponent. */
+ exponent = totalExponent(p + 1, end, expAdjustment);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+ unsigned sigPartCount, int exp,
+ roundingMode rounding_mode)
+{
+ unsigned int parts, pow5PartCount;
+ fltSemantics calcSemantics = { 32767, -32767, 0 };
+ integerPart pow5Parts[maxPowerOfFiveParts];
+ bool isNearest;
+
+ isNearest = (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway);
+
+ parts = partCountForBits(semantics->precision + 11);
+
+ /* Calculate pow(5, abs(exp)). */
+ pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+ for (;; parts *= 2) {
+ opStatus sigStatus, powStatus;
+ unsigned int excessPrecision, truncatedBits;
+
+ calcSemantics.precision = parts * integerPartWidth - 1;
+ excessPrecision = calcSemantics.precision - semantics->precision;
+ truncatedBits = excessPrecision;
+
+ APFloat decSig(calcSemantics, fcZero, sign);
+ APFloat pow5(calcSemantics, fcZero, false);
+
+ sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+ rmNearestTiesToEven);
+ powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+ rmNearestTiesToEven);
+ /* Add exp, as 10^n = 5^n * 2^n. */
+ decSig.exponent += exp;
+
+ lostFraction calcLostFraction;
+ integerPart HUerr, HUdistance;
+ unsigned int powHUerr;
+
+ if (exp >= 0) {
+ /* multiplySignificand leaves the precision-th bit set to 1. */
+ calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+ powHUerr = powStatus != opOK;
+ } else {
+ calcLostFraction = decSig.divideSignificand(pow5);
+ /* Denormal numbers have less precision. */
+ if (decSig.exponent < semantics->minExponent) {
+ excessPrecision += (semantics->minExponent - decSig.exponent);
+ truncatedBits = excessPrecision;
+ if (excessPrecision > calcSemantics.precision)
+ excessPrecision = calcSemantics.precision;
+ }
+ /* Extra half-ulp lost in reciprocal of exponent. */
+ powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
+ }
+
+ /* Both multiplySignificand and divideSignificand return the
+ result with the integer bit set. */
+ assert(APInt::tcExtractBit
+ (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+ HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+ powHUerr);
+ HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+ excessPrecision, isNearest);
+
+ /* Are we guaranteed to round correctly if we truncate? */
+ if (HUdistance >= HUerr) {
+ APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+ calcSemantics.precision - excessPrecision,
+ excessPrecision);
+ /* Take the exponent of decSig. If we tcExtract-ed less bits
+ above we must adjust our exponent to compensate for the
+ implicit right shift. */
+ exponent = (decSig.exponent + semantics->precision
+ - (calcSemantics.precision - excessPrecision));
+ calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+ decSig.partCount(),
+ truncatedBits);
+ return normalize(rounding_mode, calcLostFraction);
+ }
+ }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
+{
+ decimalInfo D;
+ opStatus fs;
+
+ /* Scan the text. */
+ StringRef::iterator p = str.begin();
+ interpretDecimal(p, str.end(), &D);
+
+ /* Handle the quick cases. First the case of no significant digits,
+ i.e. zero, and then exponents that are obviously too large or too
+ small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
+ definitely overflows if
+
+ (exp - 1) * L >= maxExponent
+
+ and definitely underflows to zero where
+
+ (exp + 1) * L <= minExponent - precision
+
+ With integer arithmetic the tightest bounds for L are
+
+ 93/28 < L < 196/59 [ numerator <= 256 ]
+ 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
+ */
+
+ if (decDigitValue(*D.firstSigDigit) >= 10U) {
+ category = fcZero;
+ fs = opOK;
+
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
+ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+ fs = handleOverflow(rounding_mode);
+
+ /* If it wasn't, then it also wasn't high enough to overflow max
+ during the log-rebasing in the min-exponent check. Check that it
+ won't overflow min in either check, then perform the min-exponent
+ check. */
+ } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+ (D.normalizedExponent + 1) * 28738 <=
+ 8651 * (semantics->minExponent - (int) semantics->precision)) {
+ /* Underflow to zero and round. */
+ zeroSignificand();
+ fs = normalize(rounding_mode, lfLessThanHalf);
+
+ /* We can finally safely perform the max-exponent check. */
+ } else if ((D.normalizedExponent - 1) * 42039
+ >= 12655 * semantics->maxExponent) {
+ /* Overflow and round. */
+ fs = handleOverflow(rounding_mode);
+ } else {
+ integerPart *decSignificand;
+ unsigned int partCount;
+
+ /* A tight upper bound on number of bits required to hold an
+ N-digit decimal integer is N * 196 / 59. Allocate enough space
+ to hold the full significand, and an extra part required by
+ tcMultiplyPart. */
+ partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+ partCount = partCountForBits(1 + 196 * partCount / 59);
+ decSignificand = new integerPart[partCount + 1];
+ partCount = 0;
+
+ /* Convert to binary efficiently - we do almost all multiplication
+ in an integerPart. When this would overflow do we do a single
+ bignum multiplication, and then revert again to multiplication
+ in an integerPart. */
+ do {
+ integerPart decValue, val, multiplier;
+
+ val = 0;
+ multiplier = 1;
+
+ do {
+ if (*p == '.') {
+ p++;
+ if (p == str.end()) {
+ break;
+ }
+ }
+ decValue = decDigitValue(*p++);
+ assert(decValue < 10U && "Invalid character in significand");
+ multiplier *= 10;
+ val = val * 10 + decValue;
+ /* The maximum number that can be multiplied by ten with any
+ digit added without overflowing an integerPart. */
+ } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
+
+ /* Multiply out the current part. */
+ APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+ partCount, partCount + 1, false);
+
+ /* If we used another part (likely but not guaranteed), increase
+ the count. */
+ if (decSignificand[partCount])
+ partCount++;
+ } while (p <= D.lastSigDigit);
+
+ category = fcNormal;
+ fs = roundSignificandWithExponent(decSignificand, partCount,
+ D.exponent, rounding_mode);
+
+ delete [] decSignificand;
+ }
+
+ return fs;
+}
+
+APFloat::opStatus
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
+{
+ assert(!str.empty() && "Invalid string length");
+
+ /* Handle a leading minus sign. */
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ sign = *p == '-' ? 1 : 0;
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String has no digits");
+ }
+
+ if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ assert(slen - 2 && "Invalid string");
+ return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+ rounding_mode);
+ }
+
+ return convertFromDecimalString(StringRef(p, slen), rounding_mode);
+}
+
+/* Write out a hexadecimal representation of the floating point value
+ to DST, which must be of sufficient size, in the C99 form
+ [-]0xh.hhhhp[+-]d. Return the number of characters written,
+ excluding the terminating NUL.
+
+ If UPPERCASE, the output is in upper case, otherwise in lower case.
+
+ HEXDIGITS digits appear altogether, rounding the value if
+ necessary. If HEXDIGITS is 0, the minimal precision to display the
+ number precisely is used instead. If nothing would appear after
+ the decimal point it is suppressed.
+
+ The decimal exponent is always printed and has at least one digit.
+ Zero values display an exponent of zero. Infinities and NaNs
+ appear as "infinity" or "nan" respectively.
+
+ The above rules are as specified by C99. There is ambiguity about
+ what the leading hexadecimal digit should be. This implementation
+ uses whatever is necessary so that the exponent is displayed as
+ stored. This implies the exponent will fall within the IEEE format
+ range, and the leading hexadecimal digit will be 0 (for denormals),
+ 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
+ any other digits zero).
+*/
+unsigned int
+APFloat::convertToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase, roundingMode rounding_mode) const
+{
+ char *p;
+
+ p = dst;
+ if (sign)
+ *dst++ = '-';
+
+ switch (category) {
+ case fcInfinity:
+ memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
+ dst += sizeof infinityL - 1;
+ break;
+
+ case fcNaN:
+ memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
+ dst += sizeof NaNU - 1;
+ break;
+
+ case fcZero:
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+ *dst++ = '0';
+ if (hexDigits > 1) {
+ *dst++ = '.';
+ memset (dst, '0', hexDigits - 1);
+ dst += hexDigits - 1;
+ }
+ *dst++ = upperCase ? 'P': 'p';
+ *dst++ = '0';
+ break;
+
+ case fcNormal:
+ dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
+ break;
+ }
+
+ *dst = 0;
+
+ return static_cast<unsigned int>(dst - p);
+}
+
+/* Does the hard work of outputting the correctly rounded hexadecimal
+ form of a normal floating point number with the specified number of
+ hexadecimal digits. If HEXDIGITS is zero the minimum number of
+ digits necessary to print the value precisely is output. */
+char *
+APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase,
+ roundingMode rounding_mode) const
+{
+ unsigned int count, valueBits, shift, partsCount, outputDigits;
+ const char *hexDigitChars;
+ const integerPart *significand;
+ char *p;
+ bool roundUp;
+
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+
+ roundUp = false;
+ hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
+
+ significand = significandParts();
+ partsCount = partCount();
+
+ /* +3 because the first digit only uses the single integer bit, so
+ we have 3 virtual zero most-significant-bits. */
+ valueBits = semantics->precision + 3;
+ shift = integerPartWidth - valueBits % integerPartWidth;
+
+ /* The natural number of digits required ignoring trailing
+ insignificant zeroes. */
+ outputDigits = (valueBits - significandLSB () + 3) / 4;
+
+ /* hexDigits of zero means use the required number for the
+ precision. Otherwise, see if we are truncating. If we are,
+ find out if we need to round away from zero. */
+ if (hexDigits) {
+ if (hexDigits < outputDigits) {
+ /* We are dropping non-zero bits, so need to check how to round.
+ "bits" is the number of dropped bits. */
+ unsigned int bits;
+ lostFraction fraction;
+
+ bits = valueBits - hexDigits * 4;
+ fraction = lostFractionThroughTruncation (significand, partsCount, bits);
+ roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
+ }
+ outputDigits = hexDigits;
+ }
+
+ /* Write the digits consecutively, and start writing in the location
+ of the hexadecimal point. We move the most significant digit
+ left and add the hexadecimal point later. */
+ p = ++dst;
+
+ count = (valueBits + integerPartWidth - 1) / integerPartWidth;
+
+ while (outputDigits && count) {
+ integerPart part;
+
+ /* Put the most significant integerPartWidth bits in "part". */
+ if (--count == partsCount)
+ part = 0; /* An imaginary higher zero part. */
+ else
+ part = significand[count] << shift;
+
+ if (count && shift)
+ part |= significand[count - 1] >> (integerPartWidth - shift);
+
+ /* Convert as much of "part" to hexdigits as we can. */
+ unsigned int curDigits = integerPartWidth / 4;
+
+ if (curDigits > outputDigits)
+ curDigits = outputDigits;
+ dst += partAsHex (dst, part, curDigits, hexDigitChars);
+ outputDigits -= curDigits;
+ }
+
+ if (roundUp) {
+ char *q = dst;
+
+ /* Note that hexDigitChars has a trailing '0'. */
+ do {
+ q--;
+ *q = hexDigitChars[hexDigitValue (*q) + 1];
+ } while (*q == '0');
+ assert(q >= p);
+ } else {
+ /* Add trailing zeroes. */
+ memset (dst, '0', outputDigits);
+ dst += outputDigits;
+ }
+
+ /* Move the most significant digit to before the point, and if there
+ is something after the decimal point add it. This must come
+ after rounding above. */
+ p[-1] = p[0];
+ if (dst -1 == p)
+ dst--;
+ else
+ p[0] = '.';
+
+ /* Finally output the exponent. */
+ *dst++ = upperCase ? 'P': 'p';
+
+ return writeSignedDecimal (dst, exponent);
+}
+
+hash_code llvm::hash_value(const APFloat &Arg) {
+ if (Arg.category != APFloat::fcNormal)
+ return hash_combine((uint8_t)Arg.category,
+ // NaN has no sign, fix it at zero.
+ Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
+ Arg.semantics->precision);
+
+ // Normal floats need their exponent and significand hashed.
+ return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
+ Arg.semantics->precision, Arg.exponent,
+ hash_combine_range(
+ Arg.significandParts(),
+ Arg.significandParts() + Arg.partCount()));
+}
+
+// Conversion from APFloat to/from host float/double. It may eventually be
+// possible to eliminate these and have everybody deal with APFloats, but that
+// will take a while. This approach will not easily extend to long double.
+// Current implementation requires integerPartWidth==64, which is correct at
+// the moment but could be made more general.
+
+// Denormals have exponent minExponent in APFloat, but minExponent-1 in
+// the actual IEEE respresentations. We compensate for that here.
+
+APInt
+APFloat::convertF80LongDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+ assert(partCount()==2);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+16383; //bias
+ mysignificand = significandParts()[0];
+ if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7fff;
+ mysignificand = 0x8000000000000000ULL;
+ } else {
+ assert(category == fcNaN && "Unknown category");
+ myexponent = 0x7fff;
+ mysignificand = significandParts()[0];
+ }
+
+ uint64_t words[2];
+ words[0] = mysignificand;
+ words[1] = ((uint64_t)(sign & 1) << 15) |
+ (myexponent & 0x7fffLL);
+ return APInt(80, words);
+}
+
+APInt
+APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+ assert(partCount()==2);
+
+ uint64_t words[2];
+ opStatus fs;
+ bool losesInfo;
+
+ // Convert number to double. To avoid spurious underflows, we re-
+ // normalize against the "double" minExponent first, and only *then*
+ // truncate the mantissa. The result of that second conversion
+ // may be inexact, but should never underflow.
+ // Declare fltSemantics before APFloat that uses it (and
+ // saves pointer to it) to ensure correct destruction order.
+ fltSemantics extendedSemantics = *semantics;
+ extendedSemantics.minExponent = IEEEdouble.minExponent;
+ APFloat extended(*this);
+ fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK && !losesInfo);
+ (void)fs;
+
+ APFloat u(extended);
+ fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK || fs == opInexact);
+ (void)fs;
+ words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
+
+ // If conversion was exact or resulted in a special case, we're done;
+ // just set the second double to zero. Otherwise, re-convert back to
+ // the extended format and compute the difference. This now should
+ // convert exactly to double.
+ if (u.category == fcNormal && losesInfo) {
+ fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK && !losesInfo);
+ (void)fs;
+
+ APFloat v(extended);
+ v.subtract(u, rmNearestTiesToEven);
+ fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK && !losesInfo);
+ (void)fs;
+ words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
+ } else {
+ words[1] = 0;
+ }
+
+ return APInt(128, words);
+}
+
+APInt
+APFloat::convertQuadrupleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+ assert(partCount()==2);
+
+ uint64_t myexponent, mysignificand, mysignificand2;
+
+ if (category==fcNormal) {
+ myexponent = exponent+16383; //bias
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = mysignificand2 = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7fff;
+ mysignificand = mysignificand2 = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x7fff;
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ }
+
+ uint64_t words[2];
+ words[0] = mysignificand;
+ words[1] = ((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7fff) << 48) |
+ (mysignificand2 & 0xffffffffffffLL);
+
+ return APInt(128, words);
+}
+
+APInt
+APFloat::convertDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+ assert(partCount()==1);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+1023; //bias
+ mysignificand = *significandParts();
+ if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7ff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x7ff;
+ mysignificand = *significandParts();
+ }
+
+ return APInt(64, ((((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7ff) << 52) |
+ (mysignificand & 0xfffffffffffffLL))));
+}
+
+APInt
+APFloat::convertFloatAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+ assert(partCount()==1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+127; //bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x800000))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0xff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0xff;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
+ (mysignificand & 0x7fffff)));
+}
+
+APInt
+APFloat::convertHalfAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
+ assert(partCount()==1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+15; //bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x400))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x1f;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x1f;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
+ (mysignificand & 0x3ff)));
+}
+
+// This function creates an APInt that is just a bit map of the floating
+// point constant as it would appear in memory. It is not a conversion,
+// and treating the result as a normal integer is unlikely to be useful.
+
+APInt
+APFloat::bitcastToAPInt() const
+{
+ if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
+ return convertHalfAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+ return convertFloatAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+ return convertDoubleAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+ return convertQuadrupleAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+ return convertPPCDoubleDoubleAPFloatToAPInt();
+
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+ "unknown format!");
+ return convertF80LongDoubleAPFloatToAPInt();
+}
+
+float
+APFloat::convertToFloat() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+ "Float semantics are not IEEEsingle");
+ APInt api = bitcastToAPInt();
+ return api.bitsToFloat();
+}
+
+double
+APFloat::convertToDouble() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+ "Float semantics are not IEEEdouble");
+ APInt api = bitcastToAPInt();
+ return api.bitsToDouble();
+}
+
+/// Integer bit is explicit in this format. Intel hardware (387 and later)
+/// does not support these bit patterns:
+/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+/// exponent = 0, integer bit 1 ("pseudodenormal")
+/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+/// At the moment, the first two are treated as NaNs, the second two as Normal.
+void
+APFloat::initFromF80LongDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==80);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i2 & 0x7fff);
+ uint64_t mysignificand = i1;
+
+ initialize(&APFloat::x87DoubleExtended);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i2>>15);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
+ // exponent meaningless
+ category = fcNaN;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16383;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ if (myexponent==0) // denormal
+ exponent = -16382;
+ }
+}
+
+void
+APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==128);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ opStatus fs;
+ bool losesInfo;
+
+ // Get the first double and convert to our format.
+ initFromDoubleAPInt(APInt(64, i1));
+ fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK && !losesInfo);
+ (void)fs;
+
+ // Unless we have a special case, add in second double.
+ if (category == fcNormal) {
+ APFloat v(IEEEdouble, APInt(64, i2));
+ fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
+ assert(fs == opOK && !losesInfo);
+ (void)fs;
+
+ add(v, rmNearestTiesToEven);
+ }
+}
+
+void
+APFloat::initFromQuadrupleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==128);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i2 >> 48) & 0x7fff;
+ uint64_t mysignificand = i1;
+ uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+
+ initialize(&APFloat::IEEEquad);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i2>>63);
+ if (myexponent==0 &&
+ (mysignificand==0 && mysignificand2==0)) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7fff &&
+ (mysignificand==0 && mysignificand2==0)) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7fff &&
+ (mysignificand!=0 || mysignificand2 !=0)) {
+ // exponent meaningless
+ category = fcNaN;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16383;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ if (myexponent==0) // denormal
+ exponent = -16382;
+ else
+ significandParts()[1] |= 0x1000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==64);
+ uint64_t i = *api.getRawData();
+ uint64_t myexponent = (i >> 52) & 0x7ff;
+ uint64_t mysignificand = i & 0xfffffffffffffLL;
+
+ initialize(&APFloat::IEEEdouble);
+ assert(partCount()==1);
+
+ sign = static_cast<unsigned int>(i>>63);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7ff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7ff && mysignificand!=0) {
+ // exponent meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 1023;
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -1022;
+ else
+ *significandParts() |= 0x10000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromFloatAPInt(const APInt & api)
+{
+ assert(api.getBitWidth()==32);
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 23) & 0xff;
+ uint32_t mysignificand = i & 0x7fffff;
+
+ initialize(&APFloat::IEEEsingle);
+ assert(partCount()==1);
+
+ sign = i >> 31;
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0xff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0xff && mysignificand!=0) {
+ // sign, exponent, significand meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 127; //bias
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -126;
+ else
+ *significandParts() |= 0x800000; // integer bit
+ }
+}
+
+void
+APFloat::initFromHalfAPInt(const APInt & api)
+{
+ assert(api.getBitWidth()==16);
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 10) & 0x1f;
+ uint32_t mysignificand = i & 0x3ff;
+
+ initialize(&APFloat::IEEEhalf);
+ assert(partCount()==1);
+
+ sign = i >> 15;
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x1f && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x1f && mysignificand!=0) {
+ // sign, exponent, significand meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 15; //bias
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -14;
+ else
+ *significandParts() |= 0x400; // integer bit
+ }
+}
+
+/// Treat api as containing the bits of a floating point number. Currently
+/// we infer the floating point type from the size of the APInt. The
+/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
+/// when the size is anything else).
+void
+APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
+{
+ if (Sem == &IEEEhalf)
+ return initFromHalfAPInt(api);
+ if (Sem == &IEEEsingle)
+ return initFromFloatAPInt(api);
+ if (Sem == &IEEEdouble)
+ return initFromDoubleAPInt(api);
+ if (Sem == &x87DoubleExtended)
+ return initFromF80LongDoubleAPInt(api);
+ if (Sem == &IEEEquad)
+ return initFromQuadrupleAPInt(api);
+ if (Sem == &PPCDoubleDouble)
+ return initFromPPCDoubleDoubleAPInt(api);
+
+ llvm_unreachable(0);
+}
+
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+ switch (BitWidth) {
+ case 16:
+ return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
+ case 32:
+ return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
+ case 64:
+ return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
+ case 80:
+ return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
+ case 128:
+ if (isIEEE)
+ return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
+ return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
+ default:
+ llvm_unreachable("Unknown floating bit width");
+ }
+}
+
+APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 1..10
+ // significand = 1..1
+
+ Val.exponent = Sem.maxExponent; // unbiased
+
+ // 1-initialize all bits....
+ Val.zeroSignificand();
+ integerPart *significand = Val.significandParts();
+ unsigned N = partCountForBits(Sem.precision);
+ for (unsigned i = 0; i != N; ++i)
+ significand[i] = ~((integerPart) 0);
+
+ // ...and then clear the top bits for internal consistency.
+ if (Sem.precision % integerPartWidth != 0)
+ significand[N-1] &=
+ (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
+
+ return Val;
+}
+
+APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 0..0
+ // significand = 0..01
+
+ Val.exponent = Sem.minExponent; // unbiased
+ Val.zeroSignificand();
+ Val.significandParts()[0] = 1;
+ return Val;
+}
+
+APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 0..0
+ // significand = 10..0
+
+ Val.exponent = Sem.minExponent;
+ Val.zeroSignificand();
+ Val.significandParts()[partCountForBits(Sem.precision)-1] |=
+ (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
+
+ return Val;
+}
+
+APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
+ initFromAPInt(&Sem, API);
+}
+
+APFloat::APFloat(float f) {
+ initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
+}
+
+APFloat::APFloat(double d) {
+ initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
+}
+
+namespace {
+ void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
+ Buffer.append(Str.begin(), Str.end());
+ }
+
+ /// Removes data from the given significand until it is no more
+ /// precise than is required for the desired precision.
+ void AdjustToPrecision(APInt &significand,
+ int &exp, unsigned FormatPrecision) {
+ unsigned bits = significand.getActiveBits();
+
+ // 196/59 is a very slight overestimate of lg_2(10).
+ unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
+
+ if (bits <= bitsRequired) return;
+
+ unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
+ if (!tensRemovable) return;
+
+ exp += tensRemovable;
+
+ APInt divisor(significand.getBitWidth(), 1);
+ APInt powten(significand.getBitWidth(), 10);
+ while (true) {
+ if (tensRemovable & 1)
+ divisor *= powten;
+ tensRemovable >>= 1;
+ if (!tensRemovable) break;
+ powten *= powten;
+ }
+
+ significand = significand.udiv(divisor);
+
+ // Truncate the significand down to its active bit count.
+ significand = significand.trunc(significand.getActiveBits());
+ }
+
+
+ void AdjustToPrecision(SmallVectorImpl<char> &buffer,
+ int &exp, unsigned FormatPrecision) {
+ unsigned N = buffer.size();
+ if (N <= FormatPrecision) return;
+
+ // The most significant figures are the last ones in the buffer.
+ unsigned FirstSignificant = N - FormatPrecision;
+
+ // Round.
+ // FIXME: this probably shouldn't use 'round half up'.
+
+ // Rounding down is just a truncation, except we also want to drop
+ // trailing zeros from the new result.
+ if (buffer[FirstSignificant - 1] < '5') {
+ while (FirstSignificant < N && buffer[FirstSignificant] == '0')
+ FirstSignificant++;
+
+ exp += FirstSignificant;
+ buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+ return;
+ }
+
+ // Rounding up requires a decimal add-with-carry. If we continue
+ // the carry, the newly-introduced zeros will just be truncated.
+ for (unsigned I = FirstSignificant; I != N; ++I) {
+ if (buffer[I] == '9') {
+ FirstSignificant++;
+ } else {
+ buffer[I]++;
+ break;
+ }
+ }
+
+ // If we carried through, we have exactly one digit of precision.
+ if (FirstSignificant == N) {
+ exp += FirstSignificant;
+ buffer.clear();
+ buffer.push_back('1');
+ return;
+ }
+
+ exp += FirstSignificant;
+ buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+ }
+}
+
+void APFloat::toString(SmallVectorImpl<char> &Str,
+ unsigned FormatPrecision,
+ unsigned FormatMaxPadding) const {
+ switch (category) {
+ case fcInfinity:
+ if (isNegative())
+ return append(Str, "-Inf");
+ else
+ return append(Str, "+Inf");
+
+ case fcNaN: return append(Str, "NaN");
+
+ case fcZero:
+ if (isNegative())
+ Str.push_back('-');
+
+ if (!FormatMaxPadding)
+ append(Str, "0.0E+0");
+ else
+ Str.push_back('0');
+ return;
+
+ case fcNormal:
+ break;
+ }
+
+ if (isNegative())
+ Str.push_back('-');
+
+ // Decompose the number into an APInt and an exponent.
+ int exp = exponent - ((int) semantics->precision - 1);
+ APInt significand(semantics->precision,
+ makeArrayRef(significandParts(),
+ partCountForBits(semantics->precision)));
+
+ // Set FormatPrecision if zero. We want to do this before we
+ // truncate trailing zeros, as those are part of the precision.
+ if (!FormatPrecision) {
+ // It's an interesting question whether to use the nominal
+ // precision or the active precision here for denormals.
+
+ // FormatPrecision = ceil(significandBits / lg_2(10))
+ FormatPrecision = (semantics->precision * 59 + 195) / 196;
+ }
+
+ // Ignore trailing binary zeros.
+ int trailingZeros = significand.countTrailingZeros();
+ exp += trailingZeros;
+ significand = significand.lshr(trailingZeros);
+
+ // Change the exponent from 2^e to 10^e.
+ if (exp == 0) {
+ // Nothing to do.
+ } else if (exp > 0) {
+ // Just shift left.
+ significand = significand.zext(semantics->precision + exp);
+ significand <<= exp;
+ exp = 0;
+ } else { /* exp < 0 */
+ int texp = -exp;
+
+ // We transform this using the identity:
+ // (N)(2^-e) == (N)(5^e)(10^-e)
+ // This means we have to multiply N (the significand) by 5^e.
+ // To avoid overflow, we have to operate on numbers large
+ // enough to store N * 5^e:
+ // log2(N * 5^e) == log2(N) + e * log2(5)
+ // <= semantics->precision + e * 137 / 59
+ // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
+
+ unsigned precision = semantics->precision + (137 * texp + 136) / 59;
+
+ // Multiply significand by 5^e.
+ // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
+ significand = significand.zext(precision);
+ APInt five_to_the_i(precision, 5);
+ while (true) {
+ if (texp & 1) significand *= five_to_the_i;
+
+ texp >>= 1;
+ if (!texp) break;
+ five_to_the_i *= five_to_the_i;
+ }
+ }
+
+ AdjustToPrecision(significand, exp, FormatPrecision);
+
+ SmallVector<char, 256> buffer;
+
+ // Fill the buffer.
+ unsigned precision = significand.getBitWidth();
+ APInt ten(precision, 10);
+ APInt digit(precision, 0);
+
+ bool inTrail = true;
+ while (significand != 0) {
+ // digit <- significand % 10
+ // significand <- significand / 10
+ APInt::udivrem(significand, ten, significand, digit);
+
+ unsigned d = digit.getZExtValue();
+
+ // Drop trailing zeros.
+ if (inTrail && !d) exp++;
+ else {
+ buffer.push_back((char) ('0' + d));
+ inTrail = false;
+ }
+ }
+
+ assert(!buffer.empty() && "no characters in buffer!");
+
+ // Drop down to FormatPrecision.
+ // TODO: don't do more precise calculations above than are required.
+ AdjustToPrecision(buffer, exp, FormatPrecision);
+
+ unsigned NDigits = buffer.size();
+
+ // Check whether we should use scientific notation.
+ bool FormatScientific;
+ if (!FormatMaxPadding)
+ FormatScientific = true;
+ else {
+ if (exp >= 0) {
+ // 765e3 --> 765000
+ // ^^^
+ // But we shouldn't make the number look more precise than it is.
+ FormatScientific = ((unsigned) exp > FormatMaxPadding ||
+ NDigits + (unsigned) exp > FormatPrecision);
+ } else {
+ // Power of the most significant digit.
+ int MSD = exp + (int) (NDigits - 1);
+ if (MSD >= 0) {
+ // 765e-2 == 7.65
+ FormatScientific = false;
+ } else {
+ // 765e-5 == 0.00765
+ // ^ ^^
+ FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
+ }
+ }
+ }
+
+ // Scientific formatting is pretty straightforward.
+ if (FormatScientific) {
+ exp += (NDigits - 1);
+
+ Str.push_back(buffer[NDigits-1]);
+ Str.push_back('.');
+ if (NDigits == 1)
+ Str.push_back('0');
+ else
+ for (unsigned I = 1; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-1-I]);
+ Str.push_back('E');
+
+ Str.push_back(exp >= 0 ? '+' : '-');
+ if (exp < 0) exp = -exp;
+ SmallVector<char, 6> expbuf;
+ do {
+ expbuf.push_back((char) ('0' + (exp % 10)));
+ exp /= 10;
+ } while (exp);
+ for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
+ Str.push_back(expbuf[E-1-I]);
+ return;
+ }
+
+ // Non-scientific, positive exponents.
+ if (exp >= 0) {
+ for (unsigned I = 0; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-1-I]);
+ for (unsigned I = 0; I != (unsigned) exp; ++I)
+ Str.push_back('0');
+ return;
+ }
+
+ // Non-scientific, negative exponents.
+
+ // The number of digits to the left of the decimal point.
+ int NWholeDigits = exp + (int) NDigits;
+
+ unsigned I = 0;
+ if (NWholeDigits > 0) {
+ for (; I != (unsigned) NWholeDigits; ++I)
+ Str.push_back(buffer[NDigits-I-1]);
+ Str.push_back('.');
+ } else {
+ unsigned NZeros = 1 + (unsigned) -NWholeDigits;
+
+ Str.push_back('0');
+ Str.push_back('.');
+ for (unsigned Z = 1; Z != NZeros; ++Z)
+ Str.push_back('0');
+ }
+
+ for (; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-I-1]);
+}
+
+bool APFloat::getExactInverse(APFloat *inv) const {
+ // Special floats and denormals have no exact inverse.
+ if (category != fcNormal)
+ return false;
+
+ // Check that the number is a power of two by making sure that only the
+ // integer bit is set in the significand.
+ if (significandLSB() != semantics->precision - 1)
+ return false;
+
+ // Get the inverse.
+ APFloat reciprocal(*semantics, 1ULL);
+ if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
+ return false;
+
+ // Avoid multiplication with a denormal, it is not safe on all platforms and
+ // may be slower than a normal division.
+ if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
+ return false;
+
+ assert(reciprocal.category == fcNormal &&
+ reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
+
+ if (inv)
+ *inv = reciprocal;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
new file mode 100644
index 000000000000..e8534753b46e
--- /dev/null
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -0,0 +1,2910 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "apint"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+using namespace llvm;
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ memset(result, 0, numWords * sizeof(uint64_t));
+ return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure. The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ return result;
+}
+
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+ unsigned r;
+
+ if (radix == 16 || radix == 36) {
+ r = cdigit - '0';
+ if (r <= 9)
+ return r;
+
+ r = cdigit - 'A';
+ if (r <= radix - 11U)
+ return r + 10;
+
+ r = cdigit - 'a';
+ if (r <= radix - 11U)
+ return r + 10;
+
+ radix = 10;
+ }
+
+ r = cdigit - '0';
+ if (r < radix)
+ return r;
+
+ return -1U;
+}
+
+
+void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
+ pVal = getClearedMemory(getNumWords());
+ pVal[0] = val;
+ if (isSigned && int64_t(val) < 0)
+ for (unsigned i = 1; i < getNumWords(); ++i)
+ pVal[i] = -1ULL;
+}
+
+void APInt::initSlowCase(const APInt& that) {
+ pVal = getMemory(getNumWords());
+ memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
+ assert(BitWidth && "Bitwidth too small");
+ assert(bigVal.data() && "Null pointer detected!");
+ if (isSingleWord())
+ VAL = bigVal[0];
+ else {
+ // Get memory, cleared to 0
+ pVal = getClearedMemory(getNumWords());
+ // Calculate the number of words to copy
+ unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
+ // Copy the words from bigVal to pVal
+ memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
+ }
+ // Make sure unused high bits are cleared
+ clearUnusedBits();
+}
+
+APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
+ : BitWidth(numBits), VAL(0) {
+ initFromArray(bigVal);
+}
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+ : BitWidth(numBits), VAL(0) {
+ initFromArray(makeArrayRef(bigVal, numWords));
+}
+
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
+ : BitWidth(numbits), VAL(0) {
+ assert(BitWidth && "Bitwidth too small");
+ fromString(numbits, Str, radix);
+}
+
+APInt& APInt::AssignSlowCase(const APInt& RHS) {
+ // Don't do anything for X = X
+ if (this == &RHS)
+ return *this;
+
+ if (BitWidth == RHS.getBitWidth()) {
+ // assume same bit-width single-word case is already handled
+ assert(!isSingleWord());
+ memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+ return *this;
+ }
+
+ if (isSingleWord()) {
+ // assume case where both are single words is already handled
+ assert(!RHS.isSingleWord());
+ VAL = 0;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ } else if (getNumWords() == RHS.getNumWords())
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ else if (RHS.isSingleWord()) {
+ delete [] pVal;
+ VAL = RHS.VAL;
+ } else {
+ delete [] pVal;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ }
+ BitWidth = RHS.BitWidth;
+ return clearUnusedBits();
+}
+
+APInt& APInt::operator=(uint64_t RHS) {
+ if (isSingleWord())
+ VAL = RHS;
+ else {
+ pVal[0] = RHS;
+ memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ }
+ return clearUnusedBits();
+}
+
+/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger(BitWidth);
+
+ if (isSingleWord()) {
+ ID.AddInteger(VAL);
+ return;
+ }
+
+ unsigned NumWords = getNumWords();
+ for (unsigned i = 0; i < NumWords; ++i)
+ ID.AddInteger(pVal[i]);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array, x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ dest[i] = y + x[i];
+ if (dest[i] < y)
+ y = 1; // Carry one to next digit.
+ else {
+ y = 0; // No need to carry so exit early
+ break;
+ }
+ }
+ return y;
+}
+
+/// @brief Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+ if (isSingleWord())
+ ++VAL;
+ else
+ add_1(pVal, pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x. The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t X = x[i];
+ x[i] -= y;
+ if (y > X)
+ y = 1; // We have to "borrow 1" from next "digit"
+ else {
+ y = 0; // No need to borrow
+ break; // Remaining digits are unchanged so exit early
+ }
+ }
+ return bool(y);
+}
+
+/// @brief Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+ if (isSingleWord())
+ --VAL;
+ else
+ sub_1(pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool carry = false;
+ for (unsigned i = 0; i< len; ++i) {
+ uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
+ dest[i] = x[i] + y[i] + carry;
+ carry = dest[i] < limit || (carry && dest[i] == limit);
+ }
+ return carry;
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// @brief Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL += RHS.VAL;
+ else {
+ add(pVal, pVal, RHS.pVal, getNumWords());
+ }
+ return clearUnusedBits();
+}
+
+/// Subtracts the integer array y from the integer array x
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool borrow = false;
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+ borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+ dest[i] = x_tmp - y[i];
+ }
+ return borrow;
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL -= RHS.VAL;
+ else
+ sub(pVal, pVal, RHS.pVal, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ // Split y into high 32-bit part (hy) and low 32-bit part (ly)
+ uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
+ uint64_t carry = 0;
+
+ // For each digit of x.
+ for (unsigned i = 0; i < len; ++i) {
+ // Split x into high and low words
+ uint64_t lx = x[i] & 0xffffffffULL;
+ uint64_t hx = x[i] >> 32;
+ // hasCarry - A flag to indicate if there is a carry to the next digit.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ dest[i] = carry + lx * ly;
+ // Determine if the add above introduces carry.
+ hasCarry = (dest[i] < carry) ? 1 : 0;
+ carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
+ // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+ // (2^32 - 1) + 2^32 = 2^64.
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+ (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
+ }
+ return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
+ unsigned ylen) {
+ dest[xlen] = mul_1(dest, x, xlen, y[0]);
+ for (unsigned i = 1; i < ylen; ++i) {
+ uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
+ uint64_t carry = 0, lx = 0, hx = 0;
+ for (unsigned j = 0; j < xlen; ++j) {
+ lx = x[j] & 0xffffffffULL;
+ hx = x[j] >> 32;
+ // hasCarry - A flag to indicate if has carry.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ uint64_t resul = carry + lx * ly;
+ hasCarry = (resul < carry) ? 1 : 0;
+ carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ resul = (carry << 32) | (resul & 0xffffffffULL);
+ dest[i+j] += resul;
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+ (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
+ ((lx * hy) >> 32) + hx * hy;
+ }
+ dest[i+xlen] = carry;
+ }
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL *= RHS.VAL;
+ clearUnusedBits();
+ return *this;
+ }
+
+ // Get some bit facts about LHS and check for zero
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+ if (!lhsWords)
+ // 0 * X ===> 0
+ return *this;
+
+ // Get some bit facts about RHS and check for zero
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+ if (!rhsWords) {
+ // X * 0 ===> 0
+ clearAllBits();
+ return *this;
+ }
+
+ // Allocate space for the result
+ unsigned destWords = rhsWords + lhsWords;
+ uint64_t *dest = getMemory(destWords);
+
+ // Perform the long multiply
+ mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+
+ // Copy result back into *this
+ clearAllBits();
+ unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
+ memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+ clearUnusedBits();
+
+ // delete dest array and return
+ delete[] dest;
+ return *this;
+}
+
+APInt& APInt::operator&=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL &= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] &= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator|=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL |= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] |= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator^=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL ^= RHS.VAL;
+ this->clearUnusedBits();
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] ^= RHS.pVal[i];
+ return clearUnusedBits();
+}
+
+APInt APInt::AndSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t* val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] & RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::OrSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] | RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::XorSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] ^ RHS.pVal[i];
+
+ // 0^0==1 so clear the high bits in case they got set.
+ return APInt(val, getBitWidth()).clearUnusedBits();
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL * RHS.VAL);
+ APInt Result(*this);
+ Result *= RHS;
+ return Result;
+}
+
+APInt APInt::operator+(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL + RHS.VAL);
+ APInt Result(BitWidth, 0);
+ add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+APInt APInt::operator-(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL - RHS.VAL);
+ APInt Result(BitWidth, 0);
+ sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+ // Get some facts about the number of bits used in the two operands.
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If the number of bits isn't the same, they aren't equal
+ if (n1 != n2)
+ return false;
+
+ // If the number of bits fits in a word, we only need to compare the low word.
+ if (n1 <= APINT_BITS_PER_WORD)
+ return pVal[0] == RHS.pVal[0];
+
+ // Otherwise, compare everything
+ for (int i = whichWord(n1 - 1); i >= 0; --i)
+ if (pVal[i] != RHS.pVal[i])
+ return false;
+ return true;
+}
+
+bool APInt::EqualSlowCase(uint64_t Val) const {
+ unsigned n = getActiveBits();
+ if (n <= APINT_BITS_PER_WORD)
+ return pVal[0] == Val;
+ else
+ return false;
+}
+
+bool APInt::ult(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord())
+ return VAL < RHS.VAL;
+
+ // Get active bit length of both operands
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If magnitude of LHS is less than RHS, return true.
+ if (n1 < n2)
+ return true;
+
+ // If magnitude of RHS is greather than LHS, return false.
+ if (n2 < n1)
+ return false;
+
+ // If they bot fit in a word, just compare the low order word
+ if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+ return pVal[0] < RHS.pVal[0];
+
+ // Otherwise, compare all words
+ unsigned topWord = whichWord(std::max(n1,n2)-1);
+ for (int i = topWord; i >= 0; --i) {
+ if (pVal[i] > RHS.pVal[i])
+ return false;
+ if (pVal[i] < RHS.pVal[i])
+ return true;
+ }
+ return false;
+}
+
+bool APInt::slt(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord()) {
+ int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+ int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
+ return lhsSext < rhsSext;
+ }
+
+ APInt lhs(*this);
+ APInt rhs(RHS);
+ bool lhsNeg = isNegative();
+ bool rhsNeg = rhs.isNegative();
+ if (lhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ lhs.flipAllBits();
+ ++lhs;
+ }
+ if (rhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ rhs.flipAllBits();
+ ++rhs;
+ }
+
+ // Now we have unsigned values to compare so do the comparison if necessary
+ // based on the negativeness of the values.
+ if (lhsNeg)
+ if (rhsNeg)
+ return lhs.ugt(rhs);
+ else
+ return true;
+ else if (rhsNeg)
+ return false;
+ else
+ return lhs.ult(rhs);
+}
+
+void APInt::setBit(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL |= maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+}
+
+/// Set the given bit to 0 whose position is given as "bitPosition".
+/// @brief Set a given bit to 0.
+void APInt::clearBit(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL &= ~maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+}
+
+/// @brief Toggle every bit to its opposite value.
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// @brief Toggles a given bit to its opposite value.
+void APInt::flipBit(unsigned bitPosition) {
+ assert(bitPosition < BitWidth && "Out of the bit-width range!");
+ if ((*this)[bitPosition]) clearBit(bitPosition);
+ else setBit(bitPosition);
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ size_t slen = str.size();
+
+ // Each computation below needs to know if it's negative.
+ StringRef::iterator p = str.begin();
+ unsigned isNegative = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed
+ if (radix == 2)
+ return slen + isNegative;
+ if (radix == 8)
+ return slen * 3 + isNegative;
+ if (radix == 16)
+ return slen * 4 + isNegative;
+
+ // FIXME: base 36
+
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor. This
+ // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+ // bits in that case.
+ unsigned sufficient
+ = radix == 10? (slen == 1 ? 4 : slen * 64/18)
+ : (slen == 1 ? 7 : slen * 16/3);
+
+ // Convert to the actual binary value.
+ APInt tmp(sufficient, StringRef(p, slen), radix);
+
+ // Compute how many bits are required. If the log is infinite, assume we need
+ // just bit.
+ unsigned log = tmp.logBase2();
+ if (log == (unsigned)-1) {
+ return isNegative + 1;
+ } else {
+ return isNegative + log + 1;
+ }
+}
+
+hash_code llvm::hash_value(const APInt &Arg) {
+ if (Arg.isSingleWord())
+ return hash_combine(Arg.VAL);
+
+ return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
+}
+
+/// HiBits - This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+ return APIntOps::lshr(*this, BitWidth - numBits);
+}
+
+/// LoBits - This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+ return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
+ BitWidth - numBits);
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+ // Treat the most significand word differently because it might have
+ // meaningless bits set beyond the precision.
+ unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+ integerPart MSWMask;
+ if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+ else {
+ MSWMask = ~integerPart(0);
+ BitsInMSW = APINT_BITS_PER_WORD;
+ }
+
+ unsigned i = getNumWords();
+ integerPart MSW = pVal[i-1] & MSWMask;
+ if (MSW)
+ return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+ unsigned Count = BitsInMSW;
+ for (--i; i > 0u; --i) {
+ if (pVal[i-1] == 0)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += CountLeadingZeros_64(pVal[i-1]);
+ break;
+ }
+ }
+ return Count;
+}
+
+unsigned APInt::countLeadingOnes() const {
+ if (isSingleWord())
+ return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
+
+ unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+ unsigned shift;
+ if (!highWordBits) {
+ highWordBits = APINT_BITS_PER_WORD;
+ shift = 0;
+ } else {
+ shift = APINT_BITS_PER_WORD - highWordBits;
+ }
+ int i = getNumWords() - 1;
+ unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
+ if (Count == highWordBits) {
+ for (i--; i >= 0; --i) {
+ if (pVal[i] == -1ULL)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += CountLeadingOnes_64(pVal[i]);
+ break;
+ }
+ }
+ }
+ return Count;
+}
+
+unsigned APInt::countTrailingZeros() const {
+ if (isSingleWord())
+ return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == 0; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingZeros_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingOnes_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ Count += CountPopulation_64(pVal[i]);
+ return Count;
+}
+
+/// Perform a logical right-shift from Src to Dst, which must be equal or
+/// non-overlapping, of Words words, by Shift, which must be less than 64.
+static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
+ unsigned Shift) {
+ uint64_t Carry = 0;
+ for (int I = Words - 1; I >= 0; --I) {
+ uint64_t Tmp = Src[I];
+ Dst[I] = (Tmp >> Shift) | Carry;
+ Carry = Tmp << (64 - Shift);
+ }
+}
+
+APInt APInt::byteSwap() const {
+ assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+ if (BitWidth == 16)
+ return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+ if (BitWidth == 32)
+ return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+ if (BitWidth == 48) {
+ unsigned Tmp1 = unsigned(VAL >> 16);
+ Tmp1 = ByteSwap_32(Tmp1);
+ uint16_t Tmp2 = uint16_t(VAL);
+ Tmp2 = ByteSwap_16(Tmp2);
+ return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+ }
+ if (BitWidth == 64)
+ return APInt(BitWidth, ByteSwap_64(VAL));
+
+ APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
+ for (unsigned I = 0, N = getNumWords(); I != N; ++I)
+ Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
+ if (Result.BitWidth != BitWidth) {
+ lshrNear(Result.pVal, Result.pVal, getNumWords(),
+ Result.BitWidth - BitWidth);
+ Result.BitWidth = BitWidth;
+ }
+ return Result;
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
+ const APInt& API2) {
+ APInt A = API1, B = API2;
+ while (!!B) {
+ APInt T = B;
+ B = APIntOps::urem(A, B);
+ A = T;
+ }
+ return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.D = Double;
+
+ // Get the sign bit from the highest order bit
+ bool isNeg = T.I >> 63;
+
+ // Get the 11-bit exponent and adjust for the 1023 bit bias
+ int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+
+ // If the exponent is negative, the value is < 0 so just return 0.
+ if (exp < 0)
+ return APInt(width, 0u);
+
+ // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+ uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+
+ // If the exponent doesn't shift all bits out of the mantissa
+ if (exp < 52)
+ return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+ APInt(width, mantissa >> (52 - exp));
+
+ // If the client didn't provide enough bits for us to shift the mantissa into
+ // then the result is undefined, just return 0
+ if (width <= exp - 52)
+ return APInt(width, 0);
+
+ // Otherwise, we have to shift the mantissa bits up to the right location
+ APInt Tmp(width, mantissa);
+ Tmp = Tmp.shl((unsigned)exp - 52);
+ return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function converts this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+/// --------------------------------------
+/// | Sign Exponent Fraction Bias |
+/// |-------------------------------------- |
+/// | 1[63] 11[62-52] 52[51-00] 1023 |
+/// --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+ // Handle the simple case where the value is contained in one uint64_t.
+ // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
+ if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+ if (isSigned) {
+ int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
+ return double(sext);
+ } else
+ return double(getWord(0));
+ }
+
+ // Determine if the value is negative.
+ bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+ // Construct the absolute value if we're negative.
+ APInt Tmp(isNeg ? -(*this) : (*this));
+
+ // Figure out how many bits we're using.
+ unsigned n = Tmp.getActiveBits();
+
+ // The exponent (without bias normalization) is just the number of bits
+ // we are using. Note that the sign bit is gone since we constructed the
+ // absolute value.
+ uint64_t exp = n;
+
+ // Return infinity for exponent overflow
+ if (exp > 1023) {
+ if (!isSigned || !isNeg)
+ return std::numeric_limits<double>::infinity();
+ else
+ return -std::numeric_limits<double>::infinity();
+ }
+ exp += 1023; // Increment for 1023 bias
+
+ // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+ // extract the high 52 bits from the correct words in pVal.
+ uint64_t mantissa;
+ unsigned hiWord = whichWord(n-1);
+ if (hiWord == 0) {
+ mantissa = Tmp.pVal[0];
+ if (n > 52)
+ mantissa >>= n - 52; // shift down, we want the top 52 bits.
+ } else {
+ assert(hiWord > 0 && "huh?");
+ uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+ uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+ mantissa = hibits | lobits;
+ }
+
+ // The leading bit of mantissa is implicit, so get rid of it.
+ uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.I = sign | (exp << 52) | mantissa;
+ return T.D;
+}
+
+// Truncate to new width.
+APInt APInt::trunc(unsigned width) const {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, getRawData()[0]);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+ Result.pVal[i] = pVal[i];
+
+ // Truncate and copy any partial word.
+ unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = pVal[i] << bits >> bits;
+
+ return Result;
+}
+
+// Sign extend to a new width.
+APInt APInt::sext(unsigned width) const {
+ assert(width > BitWidth && "Invalid APInt SignExtend request");
+
+ if (width <= APINT_BITS_PER_WORD) {
+ uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+ val = (int64_t)val >> (width - BitWidth);
+ return APInt(width, val >> (APINT_BITS_PER_WORD - width));
+ }
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ uint64_t word = 0;
+ for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+ word = getRawData()[i];
+ Result.pVal[i] = word;
+ }
+
+ // Read and sign-extend any partial word.
+ unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ word = (int64_t)getRawData()[i] << bits >> bits;
+ else
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+ // Write remaining full words.
+ for (; i != width / APINT_BITS_PER_WORD; i++) {
+ Result.pVal[i] = word;
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+ }
+
+ // Write any partial word.
+ bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = word << bits >> bits;
+
+ return Result;
+}
+
+// Zero extend to a new width.
+APInt APInt::zext(unsigned width) const {
+ assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, VAL);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy words.
+ unsigned i;
+ for (i = 0; i != getNumWords(); i++)
+ Result.pVal[i] = getRawData()[i];
+
+ // Zero remaining words.
+ memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+ return Result;
+}
+
+APInt APInt::zextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt APInt::sextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt APInt::zextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ return *this;
+}
+
+APInt APInt::sextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(const APInt &shiftAmt) const {
+ return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(unsigned shiftAmt) const {
+ assert(shiftAmt <= BitWidth && "Invalid shift amount");
+ // Handle a degenerate case
+ if (shiftAmt == 0)
+ return *this;
+
+ // Handle single word shifts with built-in ashr
+ if (isSingleWord()) {
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0); // undefined
+ else {
+ unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
+ return APInt(BitWidth,
+ (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
+ }
+ }
+
+ // If all the bits were shifted out, the result is, technically, undefined.
+ // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+ // issues in the algorithm below.
+ if (shiftAmt == BitWidth) {
+ if (isNegative())
+ return APInt(BitWidth, -1ULL, true);
+ else
+ return APInt(BitWidth, 0);
+ }
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // Compute some values needed by the following shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+ unsigned breakWord = getNumWords() - 1 - offset; // last word affected
+ unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
+ if (bitsInWord == 0)
+ bitsInWord = APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ // Move the words containing significant bits
+ for (unsigned i = 0; i <= breakWord; ++i)
+ val[i] = pVal[i+offset]; // move whole word
+
+ // Adjust the top significant word for sign bit fill, if negative
+ if (isNegative())
+ if (bitsInWord < APINT_BITS_PER_WORD)
+ val[breakWord] |= ~0ULL << bitsInWord; // set high bits
+ } else {
+ // Shift the low order words
+ for (unsigned i = 0; i < breakWord; ++i) {
+ // This combines the shifted corresponding word with the low bits from
+ // the next word (shifted into this word's high bits).
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ }
+
+ // Shift the break word. In this case there are no bits from the next word
+ // to include in this word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Deal with sign extenstion in the break word, and possibly the word before
+ // it.
+ if (isNegative()) {
+ if (wordShift > bitsInWord) {
+ if (breakWord > 0)
+ val[breakWord-1] |=
+ ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+ val[breakWord] |= ~0ULL;
+ } else
+ val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+ }
+ }
+
+ // Remaining words are 0 or -1, just assign them.
+ uint64_t fillValue = (isNegative() ? -1ULL : 0);
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = fillValue;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(const APInt &shiftAmt) const {
+ return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(unsigned shiftAmt) const {
+ if (isSingleWord()) {
+ if (shiftAmt >= BitWidth)
+ return APInt(BitWidth, 0);
+ else
+ return APInt(BitWidth, this->VAL >> shiftAmt);
+ }
+
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt >= BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids
+ // issues with shifting by the size of the integer type, which produces
+ // undefined results in the code below. This is also an optimization.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, compute the shift with a simple carry
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ lshrNear(val, pVal, getNumWords(), shiftAmt);
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < getNumWords() - offset; ++i)
+ val[i] = pVal[i+offset];
+ for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
+ val[i] = 0;
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Shift the low order words
+ unsigned breakWord = getNumWords() - offset -1;
+ for (unsigned i = 0; i < breakWord; ++i)
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ // Shift the break word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Remaining words are 0
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// @brief Left-shift function.
+APInt APInt::shl(const APInt &shiftAmt) const {
+ // It's undefined behavior in C to shift by BitWidth or greater.
+ return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::shlSlowCase(unsigned shiftAmt) const {
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids a
+ // lshr by the words size in the loop below which can produce incorrect
+ // results. It also avoids the expensive computation below for a common case.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, do it the easy way
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ uint64_t carry = 0;
+ for (unsigned i = 0; i < getNumWords(); i++) {
+ val[i] = pVal[i] << shiftAmt | carry;
+ carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
+ }
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < offset; i++)
+ val[i] = 0;
+ for (unsigned i = offset; i < getNumWords(); i++)
+ val[i] = pVal[i-offset];
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Copy whole words from this to Result.
+ unsigned i = getNumWords() - 1;
+ for (; i > offset; --i)
+ val[i] = pVal[i-offset] << wordShift |
+ pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
+ val[offset] = pVal[0] << wordShift;
+ for (i = 0; i < offset; ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+ return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
+ if (rotateAmt == 0)
+ return *this;
+ return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+ return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
+ if (rotateAmt == 0)
+ return *this;
+ return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+ // Determine the magnitude of the value.
+ unsigned magnitude = getActiveBits();
+
+ // Use a fast table for some small values. This also gets rid of some
+ // rounding errors in libc sqrt for small values.
+ if (magnitude <= 5) {
+ static const uint8_t results[32] = {
+ /* 0 */ 0,
+ /* 1- 2 */ 1, 1,
+ /* 3- 6 */ 2, 2, 2, 2,
+ /* 7-12 */ 3, 3, 3, 3, 3, 3,
+ /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ /* 31 */ 6
+ };
+ return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+ }
+
+ // If the magnitude of the value fits in less than 52 bits (the precision of
+ // an IEEE double precision floating point value), then we can use the
+ // libc sqrt function which will probably use a hardware sqrt computation.
+ // This should be faster than the algorithm below.
+ if (magnitude < 52) {
+#if HAVE_ROUND
+ return APInt(BitWidth,
+ uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#else
+ return APInt(BitWidth,
+ uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
+#endif
+ }
+
+ // Okay, all the short cuts are exhausted. We must compute it. The following
+ // is a classical Babylonian method for computing the square root. This code
+ // was adapted to APINt from a wikipedia article on such computations.
+ // See http://www.wikipedia.org/ and go to the page named
+ // Calculate_an_integer_square_root.
+ unsigned nbits = BitWidth, i = 4;
+ APInt testy(BitWidth, 16);
+ APInt x_old(BitWidth, 1);
+ APInt x_new(BitWidth, 0);
+ APInt two(BitWidth, 2);
+
+ // Select a good starting value using binary logarithms.
+ for (;; i += 2, testy = testy.shl(2))
+ if (i >= nbits || this->ule(testy)) {
+ x_old = x_old.shl(i / 2);
+ break;
+ }
+
+ // Use the Babylonian method to arrive at the integer square root:
+ for (;;) {
+ x_new = (this->udiv(x_old) + x_old).udiv(two);
+ if (x_old.ule(x_new))
+ break;
+ x_old = x_new;
+ }
+
+ // Make sure we return the closest approximation
+ // NOTE: The rounding calculation below is correct. It will produce an
+ // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+ // determined to be a rounding issue with pari/gp as it begins to use a
+ // floating point representation after 192 bits. There are no discrepancies
+ // between this algorithm and pari/gp for bit widths < 192 bits.
+ APInt square(x_old * x_old);
+ APInt nextSquare((x_old + 1) * (x_old +1));
+ if (this->ult(square))
+ return x_old;
+ assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
+ return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+ assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+ // Using the properties listed at the following web page (accessed 06/21/08):
+ // http://www.numbertheory.org/php/euclid.html
+ // (especially the properties numbered 3, 4 and 9) it can be proved that
+ // BitWidth bits suffice for all the computations in the algorithm implemented
+ // below. More precisely, this number of bits suffice if the multiplicative
+ // inverse exists, but may not suffice for the general extended Euclidean
+ // algorithm.
+
+ APInt r[2] = { modulo, *this };
+ APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+ APInt q(BitWidth, 0);
+
+ unsigned i;
+ for (i = 0; r[i^1] != 0; i ^= 1) {
+ // An overview of the math without the confusing bit-flipping:
+ // q = r[i-2] / r[i-1]
+ // r[i] = r[i-2] % r[i-1]
+ // t[i] = t[i-2] - t[i-1] * q
+ udivrem(r[i], r[i^1], q, r[i]);
+ t[i] -= t[i^1] * q;
+ }
+
+ // If this APInt and the modulo are not coprime, there is no multiplicative
+ // inverse, so return 0. We check this by looking at the next-to-last
+ // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+ // algorithm.
+ if (r[i] != 1)
+ return APInt(BitWidth, 0);
+
+ // The next-to-last t is the multiplicative inverse. However, we are
+ // interested in a positive inverse. Calcuate a positive one from a negative
+ // one if necessary. A simple addition of the modulo suffices because
+ // abs(t[i]) is known to be less than *this/2 (see the link above).
+ return t[i].isNegative() ? t[i] + modulo : t[i];
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt ad, anc, delta, q1, r1, q2, r2, t;
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ struct ms mag;
+
+ ad = d.abs();
+ t = signedMin + (d.lshr(d.getBitWidth() - 1));
+ anc = t - 1 - t.urem(ad); // absolute value of nc
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
+ r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
+ r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = q1<<1; // update q1 = 2p/abs(nc)
+ r1 = r1<<1; // update r1 = rem(2p/abs(nc))
+ if (r1.uge(anc)) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = q2<<1; // update q2 = 2p/abs(d)
+ r2 = r2<<1; // update r2 = rem(2p/abs(d))
+ if (r2.uge(ad)) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1.ult(delta) || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d.isNegative()) mag.m = -mag.m; // resulting magic number
+ mag.s = p - d.getBitWidth(); // resulting shift
+ return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+APInt::mu APInt::magicu(unsigned LeadingZeros) const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+ nc = allOnes - (allOnes - d).urem(d);
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
+ r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
+ r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1.uge(nc - r1)) {
+ q1 = q1 + q1 + 1; // update q1
+ r1 = r1 + r1 - nc; // update r1
+ }
+ else {
+ q1 = q1+q1; // update q1
+ r1 = r1+r1; // update r1
+ }
+ if ((r2 + 1).uge(d - r2)) {
+ if (q2.uge(signedMax)) magu.a = 1;
+ q2 = q2+q2 + 1; // update q2
+ r2 = r2+r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2.uge(signedMin)) magu.a = 1;
+ q2 = q2+q2; // update q2
+ r2 = r2+r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < d.getBitWidth()*2 &&
+ (q1.ult(delta) || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - d.getBitWidth(); // resulting shift
+ return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
+ unsigned m, unsigned n) {
+ assert(u && "Must provide dividend");
+ assert(v && "Must provide divisor");
+ assert(q && "Must provide quotient");
+ assert(u != v && u != q && v != q && "Must us different memory");
+ assert(n>1 && "n must be > 1");
+
+ // Knuth uses the value b as the base of the number system. In our case b
+ // is 2^31 so we just set it to -1u.
+ uint64_t b = uint64_t(1) << 32;
+
+#if 0
+ DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+ DEBUG(dbgs() << "KnuthDiv: original:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << " by");
+ DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+ DEBUG(dbgs() << '\n');
+#endif
+ // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+ // u and v by d. Note that we have taken Knuth's advice here to use a power
+ // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+ // 2 allows us to shift instead of multiply and it is easy to determine the
+ // shift amount from the leading zeros. We are basically normalizing the u
+ // and v so that its high bits are shifted to the top of v's range without
+ // overflow. Note that this can require an extra word in u so that u must
+ // be of length m+n+1.
+ unsigned shift = CountLeadingZeros_32(v[n-1]);
+ unsigned v_carry = 0;
+ unsigned u_carry = 0;
+ if (shift) {
+ for (unsigned i = 0; i < m+n; ++i) {
+ unsigned u_tmp = u[i] >> (32 - shift);
+ u[i] = (u[i] << shift) | u_carry;
+ u_carry = u_tmp;
+ }
+ for (unsigned i = 0; i < n; ++i) {
+ unsigned v_tmp = v[i] >> (32 - shift);
+ v[i] = (v[i] << shift) | v_carry;
+ v_carry = v_tmp;
+ }
+ }
+ u[m+n] = u_carry;
+#if 0
+ DEBUG(dbgs() << "KnuthDiv: normal:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << " by");
+ DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
+ int j = m;
+ do {
+ DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+ // D3. [Calculate q'.].
+ // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+ // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+ // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+ // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+ // on v[n-2] determines at high speed most of the cases in which the trial
+ // value qp is one too large, and it eliminates all cases where qp is two
+ // too large.
+ uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+ DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+ uint64_t qp = dividend / v[n-1];
+ uint64_t rp = dividend % v[n-1];
+ if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+ qp--;
+ rp += v[n-1];
+ if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+ qp--;
+ }
+ DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+ // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+ // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+ // consists of a simple multiplication by a one-place number, combined with
+ // a subtraction.
+ bool isNeg = false;
+ for (unsigned i = 0; i < n; ++i) {
+ uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+ uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+ bool borrow = subtrahend > u_tmp;
+ DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
+ << ", subtrahend == " << subtrahend
+ << ", borrow = " << borrow << '\n');
+
+ uint64_t result = u_tmp - subtrahend;
+ unsigned k = j + i;
+ u[k++] = (unsigned)(result & (b-1)); // subtract low word
+ u[k++] = (unsigned)(result >> 32); // subtract high word
+ while (borrow && k <= m+n) { // deal with borrow to the left
+ borrow = u[k] == 0;
+ u[k]--;
+ k++;
+ }
+ isNeg |= borrow;
+ DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " <<
+ u[j+i+1] << '\n');
+ }
+ DEBUG(dbgs() << "KnuthDiv: after subtraction:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << '\n');
+ // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+ // this step is actually negative, (u[j+n]...u[j]) should be left as the
+ // true value plus b**(n+1), namely as the b's complement of
+ // the true value, and a "borrow" to the left should be remembered.
+ //
+ if (isNeg) {
+ bool carry = true; // true because b's complement is "complement + 1"
+ for (unsigned i = 0; i <= m+n; ++i) {
+ u[i] = ~u[i] + carry; // b's complement
+ carry = carry && u[i] == 0;
+ }
+ }
+ DEBUG(dbgs() << "KnuthDiv: after complement:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << '\n');
+
+ // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+ // negative, go to step D6; otherwise go on to step D7.
+ q[j] = (unsigned)qp;
+ if (isNeg) {
+ // D6. [Add back]. The probability that this step is necessary is very
+ // small, on the order of only 2/b. Make sure that test data accounts for
+ // this possibility. Decrease q[j] by 1
+ q[j]--;
+ // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+ // A carry will occur to the left of u[j+n], and it should be ignored
+ // since it cancels with the borrow that occurred in D4.
+ bool carry = false;
+ for (unsigned i = 0; i < n; i++) {
+ unsigned limit = std::min(u[j+i],v[i]);
+ u[j+i] += v[i] + carry;
+ carry = u[j+i] < limit || (carry && u[j+i] == limit);
+ }
+ u[j+n] += carry;
+ }
+ DEBUG(dbgs() << "KnuthDiv: after correction:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+ DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+ // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
+ } while (--j >= 0);
+
+ DEBUG(dbgs() << "KnuthDiv: quotient:");
+ DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
+ DEBUG(dbgs() << '\n');
+
+ // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+ // remainder may be obtained by dividing u[...] by d. If r is non-null we
+ // compute the remainder (urem uses this).
+ if (r) {
+ // The value d is expressed by the "shift" value above since we avoided
+ // multiplication by d by using a shift left. So, all we have to do is
+ // shift right here. In order to mak
+ if (shift) {
+ unsigned carry = 0;
+ DEBUG(dbgs() << "KnuthDiv: remainder:");
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = (u[i] >> shift) | carry;
+ carry = u[i] << (32 - shift);
+ DEBUG(dbgs() << " " << r[i]);
+ }
+ } else {
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = u[i];
+ DEBUG(dbgs() << " " << r[i]);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#if 0
+ DEBUG(dbgs() << '\n');
+#endif
+}
+
+void APInt::divide(const APInt LHS, unsigned lhsWords,
+ const APInt &RHS, unsigned rhsWords,
+ APInt *Quotient, APInt *Remainder)
+{
+ assert(lhsWords >= rhsWords && "Fractional result");
+
+ // First, compose the values into an array of 32-bit words instead of
+ // 64-bit words. This is a necessity of both the "short division" algorithm
+ // and the Knuth "classical algorithm" which requires there to be native
+ // operations for +, -, and * on an m bit value with an m*2 bit result. We
+ // can't use 64-bit operands here because we don't have native results of
+ // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+ // work on large-endian machines.
+ uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
+ unsigned n = rhsWords * 2;
+ unsigned m = (lhsWords * 2) - n;
+
+ // Allocate space for the temporary values we need either on the stack, if
+ // it will fit, or on the heap if it won't.
+ unsigned SPACE[128];
+ unsigned *U = 0;
+ unsigned *V = 0;
+ unsigned *Q = 0;
+ unsigned *R = 0;
+ if ((Remainder?4:3)*n+2*m+1 <= 128) {
+ U = &SPACE[0];
+ V = &SPACE[m+n+1];
+ Q = &SPACE[(m+n+1) + n];
+ if (Remainder)
+ R = &SPACE[(m+n+1) + n + (m+n)];
+ } else {
+ U = new unsigned[m + n + 1];
+ V = new unsigned[n];
+ Q = new unsigned[m+n];
+ if (Remainder)
+ R = new unsigned[n];
+ }
+
+ // Initialize the dividend
+ memset(U, 0, (m+n+1)*sizeof(unsigned));
+ for (unsigned i = 0; i < lhsWords; ++i) {
+ uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+ U[i * 2] = (unsigned)(tmp & mask);
+ U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+ U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+ // Initialize the divisor
+ memset(V, 0, (n)*sizeof(unsigned));
+ for (unsigned i = 0; i < rhsWords; ++i) {
+ uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+ V[i * 2] = (unsigned)(tmp & mask);
+ V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+
+ // initialize the quotient and remainder
+ memset(Q, 0, (m+n) * sizeof(unsigned));
+ if (Remainder)
+ memset(R, 0, n * sizeof(unsigned));
+
+ // Now, adjust m and n for the Knuth division. n is the number of words in
+ // the divisor. m is the number of words by which the dividend exceeds the
+ // divisor (i.e. m+n is the length of the dividend). These sizes must not
+ // contain any zero words or the Knuth algorithm fails.
+ for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+ n--;
+ m++;
+ }
+ for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+ m--;
+
+ // If we're left with only a single word for the divisor, Knuth doesn't work
+ // so we implement the short division algorithm here. This is much simpler
+ // and faster because we are certain that we can divide a 64-bit quantity
+ // by a 32-bit quantity at hardware speed and short division is simply a
+ // series of such operations. This is just like doing short division but we
+ // are using base 2^32 instead of base 10.
+ assert(n != 0 && "Divide by zero?");
+ if (n == 1) {
+ unsigned divisor = V[0];
+ unsigned remainder = 0;
+ for (int i = m+n-1; i >= 0; i--) {
+ uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
+ if (partial_dividend == 0) {
+ Q[i] = 0;
+ remainder = 0;
+ } else if (partial_dividend < divisor) {
+ Q[i] = 0;
+ remainder = (unsigned)partial_dividend;
+ } else if (partial_dividend == divisor) {
+ Q[i] = 1;
+ remainder = 0;
+ } else {
+ Q[i] = (unsigned)(partial_dividend / divisor);
+ remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
+ }
+ }
+ if (R)
+ R[0] = remainder;
+ } else {
+ // Now we're ready to invoke the Knuth classical divide algorithm. In this
+ // case n > 1.
+ KnuthDiv(U, V, Q, R, m, n);
+ }
+
+ // If the caller wants the quotient
+ if (Quotient) {
+ // Set up the Quotient value's memory.
+ if (Quotient->BitWidth != LHS.BitWidth) {
+ if (Quotient->isSingleWord())
+ Quotient->VAL = 0;
+ else
+ delete [] Quotient->pVal;
+ Quotient->BitWidth = LHS.BitWidth;
+ if (!Quotient->isSingleWord())
+ Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+ } else
+ Quotient->clearAllBits();
+
+ // The quotient is in Q. Reconstitute the quotient into Quotient's low
+ // order words.
+ if (lhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Quotient->isSingleWord())
+ Quotient->VAL = tmp;
+ else
+ Quotient->pVal[0] = tmp;
+ } else {
+ assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
+ for (unsigned i = 0; i < lhsWords; ++i)
+ Quotient->pVal[i] =
+ uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // If the caller wants the remainder
+ if (Remainder) {
+ // Set up the Remainder value's memory.
+ if (Remainder->BitWidth != RHS.BitWidth) {
+ if (Remainder->isSingleWord())
+ Remainder->VAL = 0;
+ else
+ delete [] Remainder->pVal;
+ Remainder->BitWidth = RHS.BitWidth;
+ if (!Remainder->isSingleWord())
+ Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+ } else
+ Remainder->clearAllBits();
+
+ // The remainder is in R. Reconstitute the remainder into Remainder's low
+ // order words.
+ if (rhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Remainder->isSingleWord())
+ Remainder->VAL = tmp;
+ else
+ Remainder->pVal[0] = tmp;
+ } else {
+ assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
+ for (unsigned i = 0; i < rhsWords; ++i)
+ Remainder->pVal[i] =
+ uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // Clean up the memory we allocated.
+ if (U != &SPACE[0]) {
+ delete [] U;
+ delete [] V;
+ delete [] Q;
+ delete [] R;
+ }
+}
+
+APInt APInt::udiv(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+ // First, deal with the easy case
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Divide by zero?");
+ return APInt(BitWidth, VAL / RHS.VAL);
+ }
+
+ // Get some facts about the LHS and RHS number of bits and words
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Divided by zero???");
+ unsigned lhsBits = this->getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ } else if (*this == RHS) {
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ } else if (lhsWords == 1 && rhsWords == 1) {
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(1,0); // to hold result.
+ divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+ return Quotient;
+}
+
+APInt APInt::sdiv(const APInt &RHS) const {
+ if (isNegative()) {
+ if (RHS.isNegative())
+ return (-(*this)).udiv(-RHS);
+ return -((-(*this)).udiv(RHS));
+ }
+ if (RHS.isNegative())
+ return -(this->udiv(-RHS));
+ return this->udiv(RHS);
+}
+
+APInt APInt::urem(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Remainder by zero?");
+ return APInt(BitWidth, VAL % RHS.VAL);
+ }
+
+ // Get some facts about the LHS
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+ // Get some facts about the RHS
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Performing remainder operation by zero ???");
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ // 0 % Y ===> 0
+ return APInt(BitWidth, 0);
+ } else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X % Y ===> X, iff X < Y
+ return *this;
+ } else if (*this == RHS) {
+ // X % X == 0;
+ return APInt(BitWidth, 0);
+ } else if (lhsWords == 1) {
+ // All high words are zero, just use native remainder
+ return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Remainder(1,0);
+ divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+ return Remainder;
+}
+
+APInt APInt::srem(const APInt &RHS) const {
+ if (isNegative()) {
+ if (RHS.isNegative())
+ return -((-(*this)).urem(-RHS));
+ return -((-(*this)).urem(RHS));
+ }
+ if (RHS.isNegative())
+ return this->urem(-RHS);
+ return this->urem(RHS);
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ // Get some size facts about the dividend and divisor
+ unsigned lhsBits = LHS.getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = 0; // 0 / Y ===> 0
+ Remainder = 0; // 0 % Y ===> 0
+ return;
+ }
+
+ if (lhsWords < rhsWords || LHS.ult(RHS)) {
+ Remainder = LHS; // X % Y ===> X, iff X < Y
+ Quotient = 0; // X / Y ===> 0, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = 1; // X / X ===> 1
+ Remainder = 0; // X % X ===> 0;
+ return;
+ }
+
+ if (lhsWords == 1 && rhsWords == 1) {
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
+ uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+ Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
+ Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+}
+
+void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ if (LHS.isNegative()) {
+ if (RHS.isNegative())
+ APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
+ else {
+ APInt::udivrem(-LHS, RHS, Quotient, Remainder);
+ Quotient = -Quotient;
+ }
+ Remainder = -Remainder;
+ } else if (RHS.isNegative()) {
+ APInt::udivrem(LHS, -RHS, Quotient, Remainder);
+ Quotient = -Quotient;
+ } else {
+ APInt::udivrem(LHS, RHS, Quotient, Remainder);
+ }
+}
+
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = Res.ult(RHS);
+ return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this - RHS;
+ Overflow = isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this-RHS;
+ Overflow = Res.ugt(*this);
+ return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+ // MININT/-1 --> overflow.
+ Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+ Overflow = ShAmt >= getBitWidth();
+ if (Overflow)
+ ShAmt = getBitWidth()-1;
+
+ if (isNonNegative()) // Don't allow sign change.
+ Overflow = ShAmt >= countLeadingZeros();
+ else
+ Overflow = ShAmt >= countLeadingOnes();
+
+ return *this << ShAmt;
+}
+
+
+
+
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
+ // Check our assumptions here
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ bool isNeg = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+ assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+ assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+ assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+ assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+ "Insufficient bit width");
+
+ // Allocate memory
+ if (!isSingleWord())
+ pVal = getClearedMemory(getNumWords());
+
+ // Figure out if we can shift instead of multiply
+ unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+ // Set up an APInt for the digit to add outside the loop so we don't
+ // constantly construct/destruct it.
+ APInt apdigit(getBitWidth(), 0);
+ APInt apradix(getBitWidth(), radix);
+
+ // Enter digit traversal loop
+ for (StringRef::iterator e = str.end(); p != e; ++p) {
+ unsigned digit = getDigit(*p, radix);
+ assert(digit < radix && "Invalid character in digit string");
+
+ // Shift or multiply the value by the radix
+ if (slen > 1) {
+ if (shift)
+ *this <<= shift;
+ else
+ *this *= apradix;
+ }
+
+ // Add in the digit we just interpreted
+ if (apdigit.isSingleWord())
+ apdigit.VAL = digit;
+ else
+ apdigit.pVal[0] = digit;
+ *this += apdigit;
+ }
+ // If its negative, put it in two's complement form
+ if (isNeg) {
+ --(*this);
+ this->flipAllBits();
+ }
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+ bool Signed, bool formatAsCLiteral) const {
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
+ Radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ const char *Prefix = "";
+ if (formatAsCLiteral) {
+ switch (Radix) {
+ case 2:
+ // Binary literals are a non-standard extension added in gcc 4.3:
+ // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+ Prefix = "0b";
+ break;
+ case 8:
+ Prefix = "0";
+ break;
+ case 10:
+ break; // No prefix
+ case 16:
+ Prefix = "0x";
+ break;
+ default:
+ llvm_unreachable("Invalid radix!");
+ }
+ }
+
+ // First, check for a zero value and just short circuit the logic below.
+ if (*this == 0) {
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+ Str.push_back('0');
+ return;
+ }
+
+ static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ if (isSingleWord()) {
+ char Buffer[65];
+ char *BufPtr = Buffer+65;
+
+ uint64_t N;
+ if (!Signed) {
+ N = getZExtValue();
+ } else {
+ int64_t I = getSExtValue();
+ if (I >= 0) {
+ N = I;
+ } else {
+ Str.push_back('-');
+ N = -(uint64_t)I;
+ }
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ while (N) {
+ *--BufPtr = Digits[N % Radix];
+ N /= Radix;
+ }
+ Str.append(BufPtr, Buffer+65);
+ return;
+ }
+
+ APInt Tmp(*this);
+
+ if (Signed && isNegative()) {
+ // They want to print the signed version and it is a negative value
+ // Flip the bits and add one to turn it into the equivalent positive
+ // value and put a '-' in the result.
+ Tmp.flipAllBits();
+ ++Tmp;
+ Str.push_back('-');
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ // We insert the digits backward, then reverse them to get the right order.
+ unsigned StartDig = Str.size();
+
+ // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+ // because the number of bits per digit (1, 3 and 4 respectively) divides
+ // equaly. We just shift until the value is zero.
+ if (Radix == 2 || Radix == 8 || Radix == 16) {
+ // Just shift tmp right for each digit width until it becomes zero
+ unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+ unsigned MaskAmt = Radix - 1;
+
+ while (Tmp != 0) {
+ unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+ Str.push_back(Digits[Digit]);
+ Tmp = Tmp.lshr(ShiftAmt);
+ }
+ } else {
+ APInt divisor(Radix == 10? 4 : 8, Radix);
+ while (Tmp != 0) {
+ APInt APdigit(1, 0);
+ APInt tmp2(Tmp.getBitWidth(), 0);
+ divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+ &APdigit);
+ unsigned Digit = (unsigned)APdigit.getZExtValue();
+ assert(Digit < Radix && "divide failed");
+ Str.push_back(Digits[Digit]);
+ Tmp = tmp2;
+ }
+ }
+
+ // Reverse the digits before returning.
+ std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// toString - This returns the APInt as a std::string. Note that this is an
+/// inefficient method. It is better to pass in a SmallVector/SmallString
+/// to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+ SmallString<40> S;
+ toString(S, Radix, Signed, /* formatAsCLiteral = */false);
+ return S.str();
+}
+
+
+void APInt::dump() const {
+ SmallString<40> S, U;
+ this->toStringUnsigned(U);
+ this->toStringSigned(S);
+ dbgs() << "APInt(" << BitWidth << "b, "
+ << U.str() << "u " << S.str() << "s)";
+}
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+ SmallString<40> S;
+ this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
+ OS << S.str();
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
+// and unrestricting assumption.
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+
+/* Some handy functions local to this file. */
+namespace {
+
+ /* Returns the integer part with the least significant BITS set.
+ BITS cannot be zero. */
+ static inline integerPart
+ lowBitMask(unsigned int bits)
+ {
+ assert(bits != 0 && bits <= integerPartWidth);
+
+ return ~(integerPart) 0 >> (integerPartWidth - bits);
+ }
+
+ /* Returns the value of the lower half of PART. */
+ static inline integerPart
+ lowHalf(integerPart part)
+ {
+ return part & lowBitMask(integerPartWidth / 2);
+ }
+
+ /* Returns the value of the upper half of PART. */
+ static inline integerPart
+ highHalf(integerPart part)
+ {
+ return part >> (integerPartWidth / 2);
+ }
+
+ /* Returns the bit number of the most significant set bit of a part.
+ If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partMSB(integerPart value)
+ {
+ unsigned int n, msb;
+
+ if (value == 0)
+ return -1U;
+
+ n = integerPartWidth / 2;
+
+ msb = 0;
+ do {
+ if (value >> n) {
+ value >>= n;
+ msb += n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return msb;
+ }
+
+ /* Returns the bit number of the least significant set bit of a
+ part. If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partLSB(integerPart value)
+ {
+ unsigned int n, lsb;
+
+ if (value == 0)
+ return -1U;
+
+ lsb = integerPartWidth - 1;
+ n = integerPartWidth / 2;
+
+ do {
+ if (value << n) {
+ value <<= n;
+ lsb -= n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return lsb;
+ }
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+ zeroes out higher parts. */
+void
+APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(parts > 0);
+
+ dst[0] = part;
+ for (i = 1; i < parts; i++)
+ dst[i] = 0;
+}
+
+/* Assign one bignum to another. */
+void
+APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise. */
+bool
+APInt::tcIsZero(const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ if (src[i])
+ return false;
+
+ return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1. */
+int
+APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
+{
+ return (parts[bit / integerPartWidth] &
+ ((integerPart) 1 << bit % integerPartWidth)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void
+APInt::tcSetBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+}
+
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] &=
+ ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
+/* Returns the bit number of the least significant set bit of a
+ number. If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcLSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int i, lsb;
+
+ for (i = 0; i < n; i++) {
+ if (parts[i] != 0) {
+ lsb = partLSB(parts[i]);
+
+ return lsb + i * integerPartWidth;
+ }
+ }
+
+ return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+ If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcMSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int msb;
+
+ do {
+ --n;
+
+ if (parts[n] != 0) {
+ msb = partMSB(parts[n]);
+
+ return msb + n * integerPartWidth;
+ }
+ } while (n);
+
+ return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+ srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+ the least significant bit of DST. All high bits above srcBITS in
+ DST are zero-filled. */
+void
+APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
+ unsigned int srcBits, unsigned int srcLSB)
+{
+ unsigned int firstSrcPart, dstParts, shift, n;
+
+ dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+ assert(dstParts <= dstCount);
+
+ firstSrcPart = srcLSB / integerPartWidth;
+ tcAssign (dst, src + firstSrcPart, dstParts);
+
+ shift = srcLSB % integerPartWidth;
+ tcShiftRight (dst, dstParts, shift);
+
+ /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+ in DST. If this is less that srcBits, append the rest, else
+ clear the high bits. */
+ n = dstParts * integerPartWidth - shift;
+ if (n < srcBits) {
+ integerPart mask = lowBitMask (srcBits - n);
+ dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+ << n % integerPartWidth);
+ } else if (n > srcBits) {
+ if (srcBits % integerPartWidth)
+ dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+ }
+
+ /* Clear high parts. */
+ while (dstParts < dstCount)
+ dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcAdd(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for (i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] += rhs[i] + 1;
+ c = (dst[i] <= l);
+ } else {
+ dst[i] += rhs[i];
+ c = (dst[i] < l);
+ }
+ }
+
+ return c;
+}
+
+/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for (i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] -= rhs[i] + 1;
+ c = (dst[i] >= l);
+ } else {
+ dst[i] -= rhs[i];
+ c = (dst[i] > l);
+ }
+ }
+
+ return c;
+}
+
+/* Negate a bignum in-place. */
+void
+APInt::tcNegate(integerPart *dst, unsigned int parts)
+{
+ tcComplement(dst, parts);
+ tcIncrement(dst, parts);
+}
+
+/* DST += SRC * MULTIPLIER + CARRY if add is true
+ DST = SRC * MULTIPLIER + CARRY if add is false
+
+ Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+ they must start at the same point, i.e. DST == SRC.
+
+ If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+ returned. Otherwise DST is filled with the least significant
+ DSTPARTS parts of the result, and if all of the omitted higher
+ parts were zero return zero, otherwise overflow occurred and
+ return one. */
+int
+APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
+ integerPart multiplier, integerPart carry,
+ unsigned int srcParts, unsigned int dstParts,
+ bool add)
+{
+ unsigned int i, n;
+
+ /* Otherwise our writes of DST kill our later reads of SRC. */
+ assert(dst <= src || dst >= src + srcParts);
+ assert(dstParts <= srcParts + 1);
+
+ /* N loops; minimum of dstParts and srcParts. */
+ n = dstParts < srcParts ? dstParts: srcParts;
+
+ for (i = 0; i < n; i++) {
+ integerPart low, mid, high, srcPart;
+
+ /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+ This cannot overflow, because
+
+ (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+ which is less than n^2. */
+
+ srcPart = src[i];
+
+ if (multiplier == 0 || srcPart == 0) {
+ low = carry;
+ high = 0;
+ } else {
+ low = lowHalf(srcPart) * lowHalf(multiplier);
+ high = highHalf(srcPart) * highHalf(multiplier);
+
+ mid = lowHalf(srcPart) * highHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ mid = highHalf(srcPart) * lowHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ /* Now add carry. */
+ if (low + carry < low)
+ high++;
+ low += carry;
+ }
+
+ if (add) {
+ /* And now DST[i], and store the new low part there. */
+ if (low + dst[i] < low)
+ high++;
+ dst[i] += low;
+ } else
+ dst[i] = low;
+
+ carry = high;
+ }
+
+ if (i < dstParts) {
+ /* Full multiplication, there is no overflow. */
+ assert(i + 1 == dstParts);
+ dst[i] = carry;
+ return 0;
+ } else {
+ /* We overflowed if there is carry. */
+ if (carry)
+ return 1;
+
+ /* We would overflow if any significant unwritten parts would be
+ non-zero. This is true if any remaining src parts are non-zero
+ and the multiplier is non-zero. */
+ if (multiplier)
+ for (; i < srcParts; i++)
+ if (src[i])
+ return 1;
+
+ /* We fitted in the narrow destination. */
+ return 0;
+ }
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+ is filled with the least significant parts of the result. Returns
+ one if overflow occurred, otherwise zero. DST must be disjoint
+ from both operands. */
+int
+APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+ int overflow;
+
+ assert(dst != lhs && dst != rhs);
+
+ overflow = 0;
+ tcSet(dst, 0, parts);
+
+ for (i = 0; i < parts; i++)
+ overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+ parts - i, true);
+
+ return overflow;
+}
+
+/* DST = LHS * RHS, where DST has width the sum of the widths of the
+ operands. No overflow occurs. DST must be disjoint from both
+ operands. Returns the number of parts required to hold the
+ result. */
+unsigned int
+APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int lhsParts,
+ unsigned int rhsParts)
+{
+ /* Put the narrower number on the LHS for less loops below. */
+ if (lhsParts > rhsParts) {
+ return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+ } else {
+ unsigned int n;
+
+ assert(dst != lhs && dst != rhs);
+
+ tcSet(dst, 0, rhsParts);
+
+ for (n = 0; n < lhsParts; n++)
+ tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+
+ n = lhsParts + rhsParts;
+
+ return n - (dst[n - 1] == 0);
+ }
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+ Otherwise set LHS to LHS / RHS with the fractional part discarded,
+ set REMAINDER to the remainder, return zero. i.e.
+
+ OLD_LHS = RHS * LHS + REMAINDER
+
+ SCRATCH is a bignum of the same size as the operands and result for
+ use by the routine; its contents need not be initialized and are
+ destroyed. LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int
+APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
+ integerPart *remainder, integerPart *srhs,
+ unsigned int parts)
+{
+ unsigned int n, shiftCount;
+ integerPart mask;
+
+ assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+ shiftCount = tcMSB(rhs, parts) + 1;
+ if (shiftCount == 0)
+ return true;
+
+ shiftCount = parts * integerPartWidth - shiftCount;
+ n = shiftCount / integerPartWidth;
+ mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+
+ tcAssign(srhs, rhs, parts);
+ tcShiftLeft(srhs, parts, shiftCount);
+ tcAssign(remainder, lhs, parts);
+ tcSet(lhs, 0, parts);
+
+ /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+ the total. */
+ for (;;) {
+ int compare;
+
+ compare = tcCompare(remainder, srhs, parts);
+ if (compare >= 0) {
+ tcSubtract(remainder, srhs, 0, parts);
+ lhs[n] |= mask;
+ }
+
+ if (shiftCount == 0)
+ break;
+ shiftCount--;
+ tcShiftRight(srhs, parts, 1);
+ if ((mask >>= 1) == 0)
+ mask = (integerPart) 1 << (integerPartWidth - 1), n--;
+ }
+
+ return false;
+}
+
+/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero.
+ There are no restrictions on COUNT. */
+void
+APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ while (parts > jump) {
+ integerPart part;
+
+ parts--;
+
+ /* dst[i] comes from the two parts src[i - jump] and, if we have
+ an intra-part shift, src[i - jump - 1]. */
+ part = dst[parts - jump];
+ if (shift) {
+ part <<= shift;
+ if (parts >= jump + 1)
+ part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+ }
+
+ dst[parts] = part;
+ }
+
+ while (parts > 0)
+ dst[--parts] = 0;
+ }
+}
+
+/* Shift a bignum right COUNT bits in-place. Shifted in bits are
+ zero. There are no restrictions on COUNT. */
+void
+APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int i, jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ /* Perform the shift. This leaves the most significant COUNT bits
+ of the result at zero. */
+ for (i = 0; i < parts; i++) {
+ integerPart part;
+
+ if (i + jump >= parts) {
+ part = 0;
+ } else {
+ part = dst[i + jump];
+ if (shift) {
+ part >>= shift;
+ if (i + jump + 1 < parts)
+ part |= dst[i + jump + 1] << (integerPartWidth - shift);
+ }
+ }
+
+ dst[i] = part;
+ }
+ }
+}
+
+/* Bitwise and of two bignums. */
+void
+APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums. */
+void
+APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums. */
+void
+APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place. */
+void
+APInt::tcComplement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums. */
+int
+APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
+ unsigned int parts)
+{
+ while (parts) {
+ parts--;
+ if (lhs[parts] == rhs[parts])
+ continue;
+
+ if (lhs[parts] > rhs[parts])
+ return 1;
+ else
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Increment a bignum in-place, return the carry flag. */
+integerPart
+APInt::tcIncrement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ if (++dst[i] != 0)
+ break;
+
+ return i == parts;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+void
+APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
+ unsigned int bits)
+{
+ unsigned int i;
+
+ i = 0;
+ while (bits > integerPartWidth) {
+ dst[i++] = ~(integerPart) 0;
+ bits -= integerPartWidth;
+ }
+
+ if (bits)
+ dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
diff --git a/contrib/llvm/lib/Support/APSInt.cpp b/contrib/llvm/lib/Support/APSInt.cpp
new file mode 100644
index 000000000000..73acafa690c7
--- /dev/null
+++ b/contrib/llvm/lib/Support/APSInt.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+
+using namespace llvm;
+
+void APSInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
+ APInt::Profile(ID);
+}
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
new file mode 100644
index 000000000000..3c4191b805a3
--- /dev/null
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -0,0 +1,196 @@
+//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BumpPtrAllocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+namespace llvm {
+
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
+ SlabAllocator &allocator)
+ : SlabSize(size), SizeThreshold(std::min(size, threshold)),
+ Allocator(allocator), CurSlab(0), BytesAllocated(0) { }
+
+BumpPtrAllocator::~BumpPtrAllocator() {
+ DeallocateSlabs(CurSlab);
+}
+
+/// AlignPtr - Align Ptr to Alignment bytes, rounding up. Alignment should
+/// be a power of two. This method rounds up, so AlignPtr(7, 4) == 8 and
+/// AlignPtr(8, 4) == 8.
+char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
+ assert(Alignment && (Alignment & (Alignment - 1)) == 0 &&
+ "Alignment is not a power of two!");
+
+ // Do the alignment.
+ return (char*)(((uintptr_t)Ptr + Alignment - 1) &
+ ~(uintptr_t)(Alignment - 1));
+}
+
+/// StartNewSlab - Allocate a new slab and move the bump pointers over into
+/// the new slab. Modifies CurPtr and End.
+void BumpPtrAllocator::StartNewSlab() {
+ // If we allocated a big number of slabs already it's likely that we're going
+ // to allocate more. Increase slab size to reduce mallocs and possibly memory
+ // overhead. The factors are chosen conservatively to avoid overallocation.
+ if (BytesAllocated >= SlabSize * 128)
+ SlabSize *= 2;
+
+ MemSlab *NewSlab = Allocator.Allocate(SlabSize);
+ NewSlab->NextPtr = CurSlab;
+ CurSlab = NewSlab;
+ CurPtr = (char*)(CurSlab + 1);
+ End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// DeallocateSlabs - Deallocate all memory slabs after and including this
+/// one.
+void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
+ while (Slab) {
+ MemSlab *NextSlab = Slab->NextPtr;
+#ifndef NDEBUG
+ // Poison the memory so stale pointers crash sooner. Note we must
+ // preserve the Size and NextPtr fields at the beginning.
+ sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab));
+ memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab));
+#endif
+ Allocator.Deallocate(Slab);
+ Slab = NextSlab;
+ }
+}
+
+/// Reset - Deallocate all but the current slab and reset the current pointer
+/// to the beginning of it, freeing all memory allocated so far.
+void BumpPtrAllocator::Reset() {
+ if (!CurSlab)
+ return;
+ DeallocateSlabs(CurSlab->NextPtr);
+ CurSlab->NextPtr = 0;
+ CurPtr = (char*)(CurSlab + 1);
+ End = ((char*)CurSlab) + CurSlab->Size;
+ BytesAllocated = 0;
+}
+
+/// Allocate - Allocate space at the specified alignment.
+///
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+ if (!CurSlab) // Start a new slab if we haven't allocated one already.
+ StartNewSlab();
+
+ // Keep track of how many bytes we've allocated.
+ BytesAllocated += Size;
+
+ // 0-byte alignment means 1-byte alignment.
+ if (Alignment == 0) Alignment = 1;
+
+ // Allocate the aligned space, going forwards from CurPtr.
+ char *Ptr = AlignPtr(CurPtr, Alignment);
+
+ // Check if we can hold it.
+ if (Ptr + Size <= End) {
+ CurPtr = Ptr + Size;
+ // Update the allocation point of this memory block in MemorySanitizer.
+ // Without this, MemorySanitizer messages for values originated from here
+ // will point to the allocation of the entire slab.
+ __msan_allocated_memory(Ptr, Size);
+ return Ptr;
+ }
+
+ // If Size is really big, allocate a separate slab for it.
+ size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1;
+ if (PaddedSize > SizeThreshold) {
+ MemSlab *NewSlab = Allocator.Allocate(PaddedSize);
+
+ // Put the new slab after the current slab, since we are not allocating
+ // into it.
+ NewSlab->NextPtr = CurSlab->NextPtr;
+ CurSlab->NextPtr = NewSlab;
+
+ Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
+ assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+ __msan_allocated_memory(Ptr, Size);
+ return Ptr;
+ }
+
+ // Otherwise, start a new slab and try again.
+ StartNewSlab();
+ Ptr = AlignPtr(CurPtr, Alignment);
+ CurPtr = Ptr + Size;
+ assert(CurPtr <= End && "Unable to allocate memory!");
+ __msan_allocated_memory(Ptr, Size);
+ return Ptr;
+}
+
+unsigned BumpPtrAllocator::GetNumSlabs() const {
+ unsigned NumSlabs = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ ++NumSlabs;
+ }
+ return NumSlabs;
+}
+
+size_t BumpPtrAllocator::getTotalMemory() const {
+ size_t TotalMemory = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ TotalMemory += Slab->Size;
+ }
+ return TotalMemory;
+}
+
+void BumpPtrAllocator::PrintStats() const {
+ unsigned NumSlabs = 0;
+ size_t TotalMemory = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ TotalMemory += Slab->Size;
+ ++NumSlabs;
+ }
+
+ errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
+ << "Bytes used: " << BytesAllocated << '\n'
+ << "Bytes allocated: " << TotalMemory << '\n'
+ << "Bytes wasted: " << (TotalMemory - BytesAllocated)
+ << " (includes alignment, etc)\n";
+}
+
+MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
+ MallocSlabAllocator();
+
+SlabAllocator::~SlabAllocator() { }
+
+MallocSlabAllocator::~MallocSlabAllocator() { }
+
+MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
+ MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
+ Slab->Size = Size;
+ Slab->NextPtr = 0;
+ return Slab;
+}
+
+void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
+ Allocator.Deallocate(Slab);
+}
+
+void PrintRecyclerStats(size_t Size,
+ size_t Align,
+ size_t FreeListSize) {
+ errs() << "Recycler element size: " << Size << '\n'
+ << "Recycler element alignment: " << Align << '\n'
+ << "Number of elements free for recycling: " << FreeListSize << '\n';
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Atomic.cpp b/contrib/llvm/lib/Support/Atomic.cpp
new file mode 100644
index 000000000000..9559ad729570
--- /dev/null
+++ b/contrib/llvm/lib/Support/Atomic.cpp
@@ -0,0 +1,116 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Config/llvm-config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+#if defined(__GNUC__) || (defined(__IBMCPP__) && __IBMCPP__ >= 1210)
+#define GNU_ATOMICS
+#endif
+
+void sys::MemoryFence() {
+#if LLVM_HAS_ATOMICS == 0
+ return;
+#else
+# if defined(GNU_ATOMICS)
+ __sync_synchronize();
+# elif defined(_MSC_VER)
+ MemoryBarrier();
+# else
+# error No memory fence implementation for your platform!
+# endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+ sys::cas_flag new_value,
+ sys::cas_flag old_value) {
+#if LLVM_HAS_ATOMICS == 0
+ sys::cas_flag result = *ptr;
+ if (result == old_value)
+ *ptr = new_value;
+ return result;
+#elif defined(GNU_ATOMICS)
+ return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+ return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+# error No compare-and-swap implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_HAS_ATOMICS == 0
+ ++(*ptr);
+ return *ptr;
+#elif defined(GNU_ATOMICS)
+ return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedIncrement(ptr);
+#else
+# error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_HAS_ATOMICS == 0
+ --(*ptr);
+ return *ptr;
+#elif defined(GNU_ATOMICS)
+ return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedDecrement(ptr);
+#else
+# error No atomic decrement implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+#if LLVM_HAS_ATOMICS == 0
+ *ptr += val;
+ return *ptr;
+#elif defined(GNU_ATOMICS)
+ return __sync_add_and_fetch(ptr, val);
+#elif defined(_MSC_VER)
+ return InterlockedExchangeAdd(ptr, val) + val;
+#else
+# error No atomic add implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+ sys::cas_flag original, result;
+ do {
+ original = *ptr;
+ result = original * val;
+ } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+ return result;
+}
+
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+ sys::cas_flag original, result;
+ do {
+ original = *ptr;
+ result = original / val;
+ } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+ return result;
+}
diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp
new file mode 100644
index 000000000000..84a993e3e5b6
--- /dev/null
+++ b/contrib/llvm/lib/Support/BlockFrequency.cpp
@@ -0,0 +1,130 @@
+//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Block Frequency class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+/// mult96bit - Multiply FREQ by N and store result in W array.
+void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) {
+ uint64_t u0 = freq & UINT32_MAX;
+ uint64_t u1 = freq >> 32;
+
+ // Represent 96-bit value as w[2]:w[1]:w[0];
+ uint32_t w[3] = { 0, 0, 0 };
+
+ uint64_t t = u0 * N;
+ uint64_t k = t >> 32;
+ w[0] = t;
+ t = u1 * N + k;
+ w[1] = t;
+ w[2] = t >> 32;
+
+ // W[1] - higher bits.
+ // W[0] - lower bits.
+ W[0] = w[0] + ((uint64_t) w[1] << 32);
+ W[1] = w[2];
+}
+
+
+/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency.
+uint64_t div96bit(uint64_t W[2], uint32_t D) {
+ uint64_t y = W[0];
+ uint64_t x = W[1];
+ int i;
+
+ for (i = 1; i <= 64 && x; ++i) {
+ uint32_t t = (int)x >> 31;
+ x = (x << 1) | (y >> 63);
+ y = y << 1;
+ if ((x | t) >= D) {
+ x -= D;
+ ++y;
+ }
+ }
+
+ return y << (64 - i + 1);
+}
+
+}
+
+
+BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+ uint32_t n = Prob.getNumerator();
+ uint32_t d = Prob.getDenominator();
+
+ assert(n <= d && "Probability must be less or equal to 1.");
+
+ // Calculate Frequency * n.
+ uint64_t mulLo = (Frequency & UINT32_MAX) * n;
+ uint64_t mulHi = (Frequency >> 32) * n;
+ uint64_t mulRes = (mulHi << 32) + mulLo;
+
+ // If there was overflow use 96-bit operations.
+ if (mulHi > UINT32_MAX || mulRes < mulLo) {
+ // 96-bit value represented as W[1]:W[0].
+ uint64_t W[2];
+
+ // Probability is less or equal to 1 which means that results must fit
+ // 64-bit.
+ mult96bit(Frequency, n, W);
+ Frequency = div96bit(W, d);
+ return *this;
+ }
+
+ Frequency = mulRes / d;
+ return *this;
+}
+
+const BlockFrequency
+BlockFrequency::operator*(const BranchProbability &Prob) const {
+ BlockFrequency Freq(Frequency);
+ Freq *= Prob;
+ return Freq;
+}
+
+BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
+ uint64_t Before = Freq.Frequency;
+ Frequency += Freq.Frequency;
+
+ // If overflow, set frequency to the maximum value.
+ if (Frequency < Before)
+ Frequency = UINT64_MAX;
+
+ return *this;
+}
+
+const BlockFrequency
+BlockFrequency::operator+(const BlockFrequency &Prob) const {
+ BlockFrequency Freq(Frequency);
+ Freq += Prob;
+ return Freq;
+}
+
+void BlockFrequency::print(raw_ostream &OS) const {
+ OS << Frequency;
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) {
+ Freq.print(OS);
+ return OS;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/BranchProbability.cpp b/contrib/llvm/lib/Support/BranchProbability.cpp
new file mode 100644
index 000000000000..e8b83e59802d
--- /dev/null
+++ b/contrib/llvm/lib/Support/BranchProbability.cpp
@@ -0,0 +1,36 @@
+//===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Branch Probability class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void BranchProbability::print(raw_ostream &OS) const {
+ OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0);
+}
+
+void BranchProbability::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
+ Prob.print(OS);
+ return OS;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/COPYRIGHT.regex b/contrib/llvm/lib/Support/COPYRIGHT.regex
new file mode 100644
index 000000000000..a6392fd37c3d
--- /dev/null
+++ b/contrib/llvm/lib/Support/COPYRIGHT.regex
@@ -0,0 +1,54 @@
+$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
+
+Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
+This software is not subject to any license of the American Telephone
+and Telegraph Company or of the Regents of the University of California.
+
+Permission is granted to anyone to use this software for any purpose on
+any computer system, and to alter it and redistribute it, subject
+to the following restrictions:
+
+1. The author is not responsible for the consequences of use of this
+ software, no matter how awful, even if they arise from flaws in it.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission. Since few users ever read sources,
+ credits must appear in the documentation.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software. Since few users
+ ever read sources, credits must appear in the documentation.
+
+4. This notice may not be removed or altered.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94
+ */
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
new file mode 100644
index 000000000000..560d7eb289c6
--- /dev/null
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -0,0 +1,1420 @@
+//===-- CommandLine.cpp - Command line parser implementation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool. It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you could try
+// reading the library documentation located in docs/CommandLine.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cerrno>
+#include <cstdlib>
+using namespace llvm;
+using namespace cl;
+
+//===----------------------------------------------------------------------===//
+// Template instantiations and anchors.
+//
+namespace llvm { namespace cl {
+TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+TEMPLATE_INSTANTIATION(class basic_parser<int>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>);
+TEMPLATE_INSTANTIATION(class basic_parser<double>);
+TEMPLATE_INSTANTIATION(class basic_parser<float>);
+TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+TEMPLATE_INSTANTIATION(class opt<unsigned>);
+TEMPLATE_INSTANTIATION(class opt<int>);
+TEMPLATE_INSTANTIATION(class opt<std::string>);
+TEMPLATE_INSTANTIATION(class opt<char>);
+TEMPLATE_INSTANTIATION(class opt<bool>);
+} } // end namespace llvm::cl
+
+void GenericOptionValue::anchor() {}
+void OptionValue<boolOrDefault>::anchor() {}
+void OptionValue<std::string>::anchor() {}
+void Option::anchor() {}
+void basic_parser_impl::anchor() {}
+void parser<bool>::anchor() {}
+void parser<boolOrDefault>::anchor() {}
+void parser<int>::anchor() {}
+void parser<unsigned>::anchor() {}
+void parser<unsigned long long>::anchor() {}
+void parser<double>::anchor() {}
+void parser<float>::anchor() {}
+void parser<std::string>::anchor() {}
+void parser<char>::anchor() {}
+
+//===----------------------------------------------------------------------===//
+
+// Globals for name and overview of program. Program name is not a string to
+// avoid static ctor/dtor issues.
+static char ProgramName[80] = "<premain>";
+static const char *ProgramOverview = 0;
+
+// This collects additional help to be printed.
+static ManagedStatic<std::vector<const char*> > MoreHelp;
+
+extrahelp::extrahelp(const char *Help)
+ : morehelp(Help) {
+ MoreHelp->push_back(Help);
+}
+
+static bool OptionListChanged = false;
+
+// MarkOptionsChanged - Internal helper function.
+void cl::MarkOptionsChanged() {
+ OptionListChanged = true;
+}
+
+/// RegisteredOptionList - This is the list of the command line options that
+/// have statically constructed themselves.
+static Option *RegisteredOptionList = 0;
+
+void Option::addArgument() {
+ assert(NextRegistered == 0 && "argument multiply registered!");
+
+ NextRegistered = RegisteredOptionList;
+ RegisteredOptionList = this;
+ MarkOptionsChanged();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
+/// GetOptionInfo - Scan the list of registered options, turning them into data
+/// structures that are easier to handle.
+static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
+ SmallVectorImpl<Option*> &SinkOpts,
+ StringMap<Option*> &OptionsMap) {
+ SmallVector<const char*, 16> OptionNames;
+ Option *CAOpt = 0; // The ConsumeAfter option if it exists.
+ for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
+ // If this option wants to handle multiple option names, get the full set.
+ // This handles enum options like "-O1 -O2" etc.
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ // Handle named options.
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ // Add argument to the argument map!
+ if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+ errs() << ProgramName << ": CommandLine Error: Argument '"
+ << OptionNames[i] << "' defined more than once!\n";
+ }
+ }
+
+ OptionNames.clear();
+
+ // Remember information about positional options.
+ if (O->getFormattingFlag() == cl::Positional)
+ PositionalOpts.push_back(O);
+ else if (O->getMiscFlags() & cl::Sink) // Remember sink options
+ SinkOpts.push_back(O);
+ else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ if (CAOpt)
+ O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+ CAOpt = O;
+ }
+ }
+
+ if (CAOpt)
+ PositionalOpts.push_back(CAOpt);
+
+ // Make sure that they are in order of registration not backwards.
+ std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+}
+
+
+/// LookupOption - Lookup the option specified by the specified option on the
+/// command line. If there is a value specified (after an equal sign) return
+/// that as well. This assumes that leading dashes have already been stripped.
+static Option *LookupOption(StringRef &Arg, StringRef &Value,
+ const StringMap<Option*> &OptionsMap) {
+ // Reject all dashes.
+ if (Arg.empty()) return 0;
+
+ size_t EqualPos = Arg.find('=');
+
+ // If we have an equals sign, remember the value.
+ if (EqualPos == StringRef::npos) {
+ // Look up the option.
+ StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
+ return I != OptionsMap.end() ? I->second : 0;
+ }
+
+ // If the argument before the = is a valid option name, we match. If not,
+ // return Arg unmolested.
+ StringMap<Option*>::const_iterator I =
+ OptionsMap.find(Arg.substr(0, EqualPos));
+ if (I == OptionsMap.end()) return 0;
+
+ Value = Arg.substr(EqualPos+1);
+ Arg = Arg.substr(0, EqualPos);
+ return I->second;
+}
+
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line. If there is a value specified
+/// (after an equal sign) return that as well. This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+ const StringMap<Option*> &OptionsMap,
+ std::string &NearestString) {
+ // Reject all dashes.
+ if (Arg.empty()) return 0;
+
+ // Split on any equal sign.
+ std::pair<StringRef, StringRef> SplitArg = Arg.split('=');
+ StringRef &LHS = SplitArg.first; // LHS == Arg when no '=' is present.
+ StringRef &RHS = SplitArg.second;
+
+ // Find the closest match.
+ Option *Best = 0;
+ unsigned BestDistance = 0;
+ for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+ ie = OptionsMap.end(); it != ie; ++it) {
+ Option *O = it->second;
+ SmallVector<const char*, 16> OptionNames;
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
+ StringRef Flag = PermitValue ? LHS : Arg;
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ StringRef Name = OptionNames[i];
+ unsigned Distance = StringRef(Name).edit_distance(
+ Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+ if (!Best || Distance < BestDistance) {
+ Best = O;
+ BestDistance = Distance;
+ if (RHS.empty() || !PermitValue)
+ NearestString = OptionNames[i];
+ else
+ NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+ }
+ }
+ }
+
+ return Best;
+}
+
+/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
+/// does special handling of cl::CommaSeparated options.
+static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
+ StringRef ArgName,
+ StringRef Value, bool MultiArg = false)
+{
+ // Check to see if this option accepts a comma separated list of values. If
+ // it does, we have to split up the value into multiple values.
+ if (Handler->getMiscFlags() & CommaSeparated) {
+ StringRef Val(Value);
+ StringRef::size_type Pos = Val.find(',');
+
+ while (Pos != StringRef::npos) {
+ // Process the portion before the comma.
+ if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg))
+ return true;
+ // Erase the portion before the comma, AND the comma.
+ Val = Val.substr(Pos+1);
+ Value.substr(Pos+1); // Increment the original value pointer as well.
+ // Check for another comma.
+ Pos = Val.find(',');
+ }
+
+ Value = Val;
+ }
+
+ if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
+ return true;
+
+ return false;
+}
+
+/// ProvideOption - For Value, this differentiates between an empty value ("")
+/// and a null value (StringRef()). The later is accepted for arguments that
+/// don't allow a value (-foo) the former is rejected (-foo=).
+static inline bool ProvideOption(Option *Handler, StringRef ArgName,
+ StringRef Value, int argc,
+ const char *const *argv, int &i) {
+ // Is this a multi-argument option?
+ unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
+
+ // Enforce value requirements
+ switch (Handler->getValueExpectedFlag()) {
+ case ValueRequired:
+ if (Value.data() == 0) { // No value specified?
+ if (i+1 >= argc)
+ return Handler->error("requires a value!");
+ // Steal the next argument, like for '-o filename'
+ Value = argv[++i];
+ }
+ break;
+ case ValueDisallowed:
+ if (NumAdditionalVals > 0)
+ return Handler->error("multi-valued option specified"
+ " with ValueDisallowed modifier!");
+
+ if (Value.data())
+ return Handler->error("does not allow a value! '" +
+ Twine(Value) + "' specified.");
+ break;
+ case ValueOptional:
+ break;
+ }
+
+ // If this isn't a multi-arg option, just run the handler.
+ if (NumAdditionalVals == 0)
+ return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value);
+
+ // If it is, run the handle several times.
+ bool MultiArg = false;
+
+ if (Value.data()) {
+ if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+ return true;
+ --NumAdditionalVals;
+ MultiArg = true;
+ }
+
+ while (NumAdditionalVals > 0) {
+ if (i+1 >= argc)
+ return Handler->error("not enough values!");
+ Value = argv[++i];
+
+ if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+ return true;
+ MultiArg = true;
+ --NumAdditionalVals;
+ }
+ return false;
+}
+
+static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
+ int Dummy = i;
+ return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
+}
+
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+ return O->getFormattingFlag() == cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+ return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+}
+
+// getOptionPred - Check to see if there are any options that satisfy the
+// specified predicate with names that are the prefixes in Name. This is
+// checked by progressively stripping characters off of the name, checking to
+// see if there options that satisfy the predicate. If we find one, return it,
+// otherwise return null.
+//
+static Option *getOptionPred(StringRef Name, size_t &Length,
+ bool (*Pred)(const Option*),
+ const StringMap<Option*> &OptionsMap) {
+
+ StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name);
+
+ // Loop while we haven't found an option and Name still has at least two
+ // characters in it (so that the next iteration will not be the empty
+ // string.
+ while (OMI == OptionsMap.end() && Name.size() > 1) {
+ Name = Name.substr(0, Name.size()-1); // Chop off the last character.
+ OMI = OptionsMap.find(Name);
+ }
+
+ if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+ Length = Name.size();
+ return OMI->second; // Found one!
+ }
+ return 0; // No option found!
+}
+
+/// HandlePrefixedOrGroupedOption - The specified argument string (which started
+/// with at least one '-') does not fully match an available option. Check to
+/// see if this is a prefix or grouped option. If so, split arg into output an
+/// Arg/Value pair and return the Option to parse it with.
+static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
+ bool &ErrorParsing,
+ const StringMap<Option*> &OptionsMap) {
+ if (Arg.size() == 1) return 0;
+
+ // Do the lookup!
+ size_t Length = 0;
+ Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
+ if (PGOpt == 0) return 0;
+
+ // If the option is a prefixed option, then the value is simply the
+ // rest of the name... so fall through to later processing, by
+ // setting up the argument name flags and value fields.
+ if (PGOpt->getFormattingFlag() == cl::Prefix) {
+ Value = Arg.substr(Length);
+ Arg = Arg.substr(0, Length);
+ assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
+ return PGOpt;
+ }
+
+ // This must be a grouped option... handle them now. Grouping options can't
+ // have values.
+ assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+ do {
+ // Move current arg name out of Arg into OneArgName.
+ StringRef OneArgName = Arg.substr(0, Length);
+ Arg = Arg.substr(Length);
+
+ // Because ValueRequired is an invalid flag for grouped arguments,
+ // we don't need to pass argc/argv in.
+ assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+ "Option can not be cl::Grouping AND cl::ValueRequired!");
+ int Dummy = 0;
+ ErrorParsing |= ProvideOption(PGOpt, OneArgName,
+ StringRef(), 0, 0, Dummy);
+
+ // Get the next grouping option.
+ PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
+ } while (PGOpt && Length != Arg.size());
+
+ // Return the last option with Arg cut down to just the last one.
+ return PGOpt;
+}
+
+
+
+static bool RequiresValue(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::Required ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+static bool EatsUnboundedNumberOfValues(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::ZeroOrMore ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+/// ParseCStringVector - Break INPUT up wherever one or more
+/// whitespace characters are found, and store the resulting tokens in
+/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
+/// using strdup(), so it is the caller's responsibility to free()
+/// them later.
+///
+static void ParseCStringVector(std::vector<char *> &OutputVector,
+ const char *Input) {
+ // Characters which will be treated as token separators:
+ StringRef Delims = " \v\f\t\r\n";
+
+ StringRef WorkStr(Input);
+ while (!WorkStr.empty()) {
+ // If the first character is a delimiter, strip them off.
+ if (Delims.find(WorkStr[0]) != StringRef::npos) {
+ size_t Pos = WorkStr.find_first_not_of(Delims);
+ if (Pos == StringRef::npos) Pos = WorkStr.size();
+ WorkStr = WorkStr.substr(Pos);
+ continue;
+ }
+
+ // Find position of first delimiter.
+ size_t Pos = WorkStr.find_first_of(Delims);
+ if (Pos == StringRef::npos) Pos = WorkStr.size();
+
+ // Everything from 0 to Pos is the next word to copy.
+ char *NewStr = (char*)malloc(Pos+1);
+ memcpy(NewStr, WorkStr.data(), Pos);
+ NewStr[Pos] = 0;
+ OutputVector.push_back(NewStr);
+
+ WorkStr = WorkStr.substr(Pos);
+ }
+}
+
+/// ParseEnvironmentOptions - An alternative entry point to the
+/// CommandLine library, which allows you to read the program's name
+/// from the caller (as PROGNAME) and its command-line arguments from
+/// an environment variable (whose name is given in ENVVAR).
+///
+void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
+ const char *Overview) {
+ // Check args.
+ assert(progName && "Program name not specified");
+ assert(envVar && "Environment variable name missing");
+
+ // Get the environment variable they want us to parse options out of.
+ const char *envValue = getenv(envVar);
+ if (!envValue)
+ return;
+
+ // Get program's "name", which we wouldn't know without the caller
+ // telling us.
+ std::vector<char*> newArgv;
+ newArgv.push_back(strdup(progName));
+
+ // Parse the value of the environment variable into a "command line"
+ // and hand it off to ParseCommandLineOptions().
+ ParseCStringVector(newArgv, envValue);
+ int newArgc = static_cast<int>(newArgv.size());
+ ParseCommandLineOptions(newArgc, &newArgv[0], Overview);
+
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free(*i);
+}
+
+
+/// ExpandResponseFiles - Copy the contents of argv into newArgv,
+/// substituting the contents of the response files for the arguments
+/// of type @file.
+static void ExpandResponseFiles(unsigned argc, const char*const* argv,
+ std::vector<char*>& newArgv) {
+ for (unsigned i = 1; i != argc; ++i) {
+ const char *arg = argv[i];
+
+ if (arg[0] == '@') {
+ sys::PathWithStatus respFile(++arg);
+
+ // Check that the response file is not empty (mmap'ing empty
+ // files can be problematic).
+ const sys::FileStatus *FileStat = respFile.getFileStatus();
+ if (FileStat && FileStat->getSize() != 0) {
+
+ // If we could open the file, parse its contents, otherwise
+ // pass the @file option verbatim.
+
+ // TODO: we should also support recursive loading of response files,
+ // since this is how gcc behaves. (From their man page: "The file may
+ // itself contain additional @file options; any such options will be
+ // processed recursively.")
+
+ // Mmap the response file into memory.
+ OwningPtr<MemoryBuffer> respFilePtr;
+ if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
+ ParseCStringVector(newArgv, respFilePtr->getBufferStart());
+ continue;
+ }
+ }
+ }
+ newArgv.push_back(strdup(arg));
+ }
+}
+
+void cl::ParseCommandLineOptions(int argc, const char * const *argv,
+ const char *Overview) {
+ // Process all registered options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> Opts;
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+
+ assert((!Opts.empty() || !PositionalOpts.empty()) &&
+ "No options specified!");
+
+ // Expand response files.
+ std::vector<char*> newArgv;
+ newArgv.push_back(strdup(argv[0]));
+ ExpandResponseFiles(argc, argv, newArgv);
+ argv = &newArgv[0];
+ argc = static_cast<int>(newArgv.size());
+
+ // Copy the program name into ProgName, making sure not to overflow it.
+ std::string ProgName = sys::path::filename(argv[0]);
+ size_t Len = std::min(ProgName.size(), size_t(79));
+ memcpy(ProgramName, ProgName.data(), Len);
+ ProgramName[Len] = '\0';
+
+ ProgramOverview = Overview;
+ bool ErrorParsing = false;
+
+ // Check out the positional arguments to collect information about them.
+ unsigned NumPositionalRequired = 0;
+
+ // Determine whether or not there are an unlimited number of positionals
+ bool HasUnlimitedPositionals = false;
+
+ Option *ConsumeAfterOpt = 0;
+ if (!PositionalOpts.empty()) {
+ if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ assert(PositionalOpts.size() > 1 &&
+ "Cannot specify cl::ConsumeAfter without a positional argument!");
+ ConsumeAfterOpt = PositionalOpts[0];
+ }
+
+ // Calculate how many positional values are _required_.
+ bool UnboundedFound = false;
+ for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+ i != e; ++i) {
+ Option *Opt = PositionalOpts[i];
+ if (RequiresValue(Opt))
+ ++NumPositionalRequired;
+ else if (ConsumeAfterOpt) {
+ // ConsumeAfter cannot be combined with "optional" positional options
+ // unless there is only one positional argument...
+ if (PositionalOpts.size() > 2)
+ ErrorParsing |=
+ Opt->error("error - this positional option will never be matched, "
+ "because it does not Require a value, and a "
+ "cl::ConsumeAfter option is active!");
+ } else if (UnboundedFound && !Opt->ArgStr[0]) {
+ // This option does not "require" a value... Make sure this option is
+ // not specified after an option that eats all extra arguments, or this
+ // one will never get any!
+ //
+ ErrorParsing |= Opt->error("error - option can never match, because "
+ "another positional argument will match an "
+ "unbounded number of values, and this option"
+ " does not require a value!");
+ }
+ UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
+ }
+ HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
+ }
+
+ // PositionalVals - A vector of "positional" arguments we accumulate into
+ // the process at the end.
+ //
+ SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;
+
+ // If the program has named positional arguments, and the name has been run
+ // across, keep track of which positional argument was named. Otherwise put
+ // the positional args into the PositionalVals list...
+ Option *ActivePositionalArg = 0;
+
+ // Loop over all of the arguments... processing them.
+ bool DashDashFound = false; // Have we read '--'?
+ for (int i = 1; i < argc; ++i) {
+ Option *Handler = 0;
+ Option *NearestHandler = 0;
+ std::string NearestHandlerString;
+ StringRef Value;
+ StringRef ArgName = "";
+
+ // If the option list changed, this means that some command line
+ // option has just been registered or deregistered. This can occur in
+ // response to things like -load, etc. If this happens, rescan the options.
+ if (OptionListChanged) {
+ PositionalOpts.clear();
+ SinkOpts.clear();
+ Opts.clear();
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+ OptionListChanged = false;
+ }
+
+ // Check to see if this is a positional argument. This argument is
+ // considered to be positional if it doesn't start with '-', if it is "-"
+ // itself, or if we have seen "--" already.
+ //
+ if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
+ // Positional argument!
+ if (ActivePositionalArg) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ }
+
+ if (!PositionalOpts.empty()) {
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+
+ // All of the positional arguments have been fulfulled, give the rest to
+ // the consume after option... if it's specified...
+ //
+ if (PositionalVals.size() >= NumPositionalRequired &&
+ ConsumeAfterOpt != 0) {
+ for (++i; i < argc; ++i)
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+ break; // Handle outside of the argument processing loop...
+ }
+
+ // Delay processing positional arguments until the end...
+ continue;
+ }
+ } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
+ !DashDashFound) {
+ DashDashFound = true; // This is the mythical "--"?
+ continue; // Don't try to process it as an argument itself.
+ } else if (ActivePositionalArg &&
+ (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
+ // If there is a positional argument eating options, check to see if this
+ // option is another positional argument. If so, treat it as an argument,
+ // otherwise feed it to the eating positional.
+ ArgName = argv[i]+1;
+ // Eat leading dashes.
+ while (!ArgName.empty() && ArgName[0] == '-')
+ ArgName = ArgName.substr(1);
+
+ Handler = LookupOption(ArgName, Value, Opts);
+ if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ }
+
+ } else { // We start with a '-', must be an argument.
+ ArgName = argv[i]+1;
+ // Eat leading dashes.
+ while (!ArgName.empty() && ArgName[0] == '-')
+ ArgName = ArgName.substr(1);
+
+ Handler = LookupOption(ArgName, Value, Opts);
+
+ // Check to see if this "option" is really a prefixed or grouped argument.
+ if (Handler == 0)
+ Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
+ ErrorParsing, Opts);
+
+ // Otherwise, look for the closest available option to report to the user
+ // in the upcoming error.
+ if (Handler == 0 && SinkOpts.empty())
+ NearestHandler = LookupNearestOption(ArgName, Opts,
+ NearestHandlerString);
+ }
+
+ if (Handler == 0) {
+ if (SinkOpts.empty()) {
+ errs() << ProgramName << ": Unknown command line argument '"
+ << argv[i] << "'. Try: '" << argv[0] << " -help'\n";
+
+ if (NearestHandler) {
+ // If we know a near match, report it as well.
+ errs() << ProgramName << ": Did you mean '-"
+ << NearestHandlerString << "'?\n";
+ }
+
+ ErrorParsing = true;
+ } else {
+ for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
+ E = SinkOpts.end(); I != E ; ++I)
+ (*I)->addOccurrence(i, "", argv[i]);
+ }
+ continue;
+ }
+
+ // If this is a named positional argument, just remember that it is the
+ // active one...
+ if (Handler->getFormattingFlag() == cl::Positional)
+ ActivePositionalArg = Handler;
+ else
+ ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
+ }
+
+ // Check and handle positional arguments now...
+ if (NumPositionalRequired > PositionalVals.size()) {
+ errs() << ProgramName
+ << ": Not enough positional command line arguments specified!\n"
+ << "Must specify at least " << NumPositionalRequired
+ << " positional arguments: See: " << argv[0] << " -help\n";
+
+ ErrorParsing = true;
+ } else if (!HasUnlimitedPositionals &&
+ PositionalVals.size() > PositionalOpts.size()) {
+ errs() << ProgramName
+ << ": Too many positional arguments specified!\n"
+ << "Can specify at most " << PositionalOpts.size()
+ << " positional arguments: See: " << argv[0] << " -help\n";
+ ErrorParsing = true;
+
+ } else if (ConsumeAfterOpt == 0) {
+ // Positional args have already been handled if ConsumeAfter is specified.
+ unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
+ for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (RequiresValue(PositionalOpts[i])) {
+ ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ --NumPositionalRequired; // We fulfilled our duty...
+ }
+
+ // If we _can_ give this option more arguments, do so now, as long as we
+ // do not give it values that others need. 'Done' controls whether the
+ // option even _WANTS_ any more.
+ //
+ bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
+ while (NumVals-ValNo > NumPositionalRequired && !Done) {
+ switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
+ case cl::Optional:
+ Done = true; // Optional arguments want _at most_ one value
+ // FALL THROUGH
+ case cl::ZeroOrMore: // Zero or more will take all they can get...
+ case cl::OneOrMore: // One or more will take all they can get...
+ ProvidePositionalOption(PositionalOpts[i],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ break;
+ default:
+ llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
+ "positional argument processing!");
+ }
+ }
+ }
+ } else {
+ assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
+ unsigned ValNo = 0;
+ for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
+ if (RequiresValue(PositionalOpts[j])) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle the case where there is just one positional option, and it's
+ // optional. In this case, we want to give JUST THE FIRST option to the
+ // positional option and keep the rest for the consume after. The above
+ // loop would have assigned no values to positional options in this case.
+ //
+ if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle over all of the rest of the arguments to the
+ // cl::ConsumeAfter command line option...
+ for (; ValNo != PositionalVals.size(); ++ValNo)
+ ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ }
+
+ // Loop over args and make sure all required args are specified!
+ for (StringMap<Option*>::iterator I = Opts.begin(),
+ E = Opts.end(); I != E; ++I) {
+ switch (I->second->getNumOccurrencesFlag()) {
+ case Required:
+ case OneOrMore:
+ if (I->second->getNumOccurrences() == 0) {
+ I->second->error("must be specified at least once!");
+ ErrorParsing = true;
+ }
+ // Fall through
+ default:
+ break;
+ }
+ }
+
+ // Now that we know if -debug is specified, we can use it.
+ // Note that if ReadResponseFiles == true, this must be done before the
+ // memory allocated for the expanded command line is free()d below.
+ DEBUG(dbgs() << "Args: ";
+ for (int i = 0; i < argc; ++i)
+ dbgs() << argv[i] << ' ';
+ dbgs() << '\n';
+ );
+
+ // Free all of the memory allocated to the map. Command line options may only
+ // be processed once!
+ Opts.clear();
+ PositionalOpts.clear();
+ MoreHelp->clear();
+
+ // Free the memory allocated by ExpandResponseFiles.
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free(*i);
+
+ // If we had an error processing our arguments, don't let the program execute
+ if (ErrorParsing) exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// Option Base class implementation
+//
+
+bool Option::error(const Twine &Message, StringRef ArgName) {
+ if (ArgName.data() == 0) ArgName = ArgStr;
+ if (ArgName.empty())
+ errs() << HelpStr; // Be nice for positional arguments
+ else
+ errs() << ProgramName << ": for the -" << ArgName;
+
+ errs() << " option: " << Message << "\n";
+ return true;
+}
+
+bool Option::addOccurrence(unsigned pos, StringRef ArgName,
+ StringRef Value, bool MultiArg) {
+ if (!MultiArg)
+ NumOccurrences++; // Increment the number of times we have been seen
+
+ switch (getNumOccurrencesFlag()) {
+ case Optional:
+ if (NumOccurrences > 1)
+ return error("may only occur zero or one times!", ArgName);
+ break;
+ case Required:
+ if (NumOccurrences > 1)
+ return error("must occur exactly one time!", ArgName);
+ // Fall through
+ case OneOrMore:
+ case ZeroOrMore:
+ case ConsumeAfter: break;
+ }
+
+ return handleOccurrence(pos, ArgName, Value);
+}
+
+
+// getValueStr - Get the value description string, using "DefaultMsg" if nothing
+// has been specified yet.
+//
+static const char *getValueStr(const Option &O, const char *DefaultMsg) {
+ if (O.ValueStr[0] == 0) return DefaultMsg;
+ return O.ValueStr;
+}
+
+//===----------------------------------------------------------------------===//
+// cl::alias class implementation
+//
+
+// Return the width of the option tag for printing...
+size_t alias::getOptionWidth() const {
+ return std::strlen(ArgStr)+6;
+}
+
+// Print out the option for the alias.
+void alias::printOptionInfo(size_t GlobalWidth) const {
+ size_t L = std::strlen(ArgStr);
+ outs() << " -" << ArgStr;
+ outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Implementation code...
+//
+
+// basic_parser implementation
+//
+
+// Return the width of the option tag for printing...
+size_t basic_parser_impl::getOptionWidth(const Option &O) const {
+ size_t Len = std::strlen(O.ArgStr);
+ if (const char *ValName = getValueName())
+ Len += std::strlen(getValueStr(O, ValName))+3;
+
+ return Len + 6;
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void basic_parser_impl::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+
+ if (const char *ValName = getValueName())
+ outs() << "=<" << getValueStr(O, ValName) << '>';
+
+ outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
+}
+
+void basic_parser_impl::printOptionName(const Option &O,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+}
+
+
+// parser<bool> implementation
+//
+bool parser<bool>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, bool &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = true;
+ return false;
+ }
+
+ if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+ Value = false;
+ return false;
+ }
+ return O.error("'" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<boolOrDefault> implementation
+//
+bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, boolOrDefault &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = BOU_TRUE;
+ return false;
+ }
+ if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+ Value = BOU_FALSE;
+ return false;
+ }
+
+ return O.error("'" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<int> implementation
+//
+bool parser<int>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, int &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for integer argument!");
+ return false;
+}
+
+// parser<unsigned> implementation
+//
+bool parser<unsigned>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, unsigned &Value) {
+
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+ return false;
+}
+
+// parser<unsigned long long> implementation
+//
+bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, unsigned long long &Value){
+
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+ return false;
+}
+
+// parser<double>/parser<float> implementation
+//
+static bool parseDouble(Option &O, StringRef Arg, double &Value) {
+ SmallString<32> TmpStr(Arg.begin(), Arg.end());
+ const char *ArgStart = TmpStr.c_str();
+ char *End;
+ Value = strtod(ArgStart, &End);
+ if (*End != 0)
+ return O.error("'" + Arg + "' value invalid for floating point argument!");
+ return false;
+}
+
+bool parser<double>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, double &Val) {
+ return parseDouble(O, Arg, Val);
+}
+
+bool parser<float>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, float &Val) {
+ double dVal;
+ if (parseDouble(O, Arg, dVal))
+ return true;
+ Val = (float)dVal;
+ return false;
+}
+
+
+
+// generic_parser_base implementation
+//
+
+// findOption - Return the option number corresponding to the specified
+// argument string. If the option is not found, getNumOptions() is returned.
+//
+unsigned generic_parser_base::findOption(const char *Name) {
+ unsigned e = getNumOptions();
+
+ for (unsigned i = 0; i != e; ++i) {
+ if (strcmp(getOption(i), Name) == 0)
+ return i;
+ }
+ return e;
+}
+
+
+// Return the width of the option tag for printing...
+size_t generic_parser_base::getOptionWidth(const Option &O) const {
+ if (O.hasArgStr()) {
+ size_t Size = std::strlen(O.ArgStr)+6;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ Size = std::max(Size, std::strlen(getOption(i))+8);
+ return Size;
+ } else {
+ size_t BaseSize = 0;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8);
+ return BaseSize;
+ }
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void generic_parser_base::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ if (O.hasArgStr()) {
+ size_t L = std::strlen(O.ArgStr);
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
+
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
+ outs() << " =" << getOption(i);
+ outs().indent(NumSpaces) << " - " << getDescription(i) << '\n';
+ }
+ } else {
+ if (O.HelpStr[0])
+ outs() << " " << O.HelpStr << '\n';
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t L = std::strlen(getOption(i));
+ outs() << " -" << getOption(i);
+ outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
+ }
+ }
+}
+
+static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
+
+// printGenericOptionDiff - Print the value of this option and it's default.
+//
+// "Generic" options have each value mapped to a name.
+void generic_parser_base::
+printGenericOptionDiff(const Option &O, const GenericOptionValue &Value,
+ const GenericOptionValue &Default,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+
+ unsigned NumOpts = getNumOptions();
+ for (unsigned i = 0; i != NumOpts; ++i) {
+ if (Value.compare(getOptionValue(i)))
+ continue;
+
+ outs() << "= " << getOption(i);
+ size_t L = std::strlen(getOption(i));
+ size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
+ outs().indent(NumSpaces) << " (default: ";
+ for (unsigned j = 0; j != NumOpts; ++j) {
+ if (Default.compare(getOptionValue(j)))
+ continue;
+ outs() << getOption(j);
+ break;
+ }
+ outs() << ")\n";
+ return;
+ }
+ outs() << "= *unknown option value*\n";
+}
+
+// printOptionDiff - Specializations for printing basic value types.
+//
+#define PRINT_OPT_DIFF(T) \
+ void parser<T>:: \
+ printOptionDiff(const Option &O, T V, OptionValue<T> D, \
+ size_t GlobalWidth) const { \
+ printOptionName(O, GlobalWidth); \
+ std::string Str; \
+ { \
+ raw_string_ostream SS(Str); \
+ SS << V; \
+ } \
+ outs() << "= " << Str; \
+ size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\
+ outs().indent(NumSpaces) << " (default: "; \
+ if (D.hasValue()) \
+ outs() << D.getValue(); \
+ else \
+ outs() << "*no default*"; \
+ outs() << ")\n"; \
+ } \
+
+PRINT_OPT_DIFF(bool)
+PRINT_OPT_DIFF(boolOrDefault)
+PRINT_OPT_DIFF(int)
+PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(unsigned long long)
+PRINT_OPT_DIFF(double)
+PRINT_OPT_DIFF(float)
+PRINT_OPT_DIFF(char)
+
+void parser<std::string>::
+printOptionDiff(const Option &O, StringRef V, OptionValue<std::string> D,
+ size_t GlobalWidth) const {
+ printOptionName(O, GlobalWidth);
+ outs() << "= " << V;
+ size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0;
+ outs().indent(NumSpaces) << " (default: ";
+ if (D.hasValue())
+ outs() << D.getValue();
+ else
+ outs() << "*no default*";
+ outs() << ")\n";
+}
+
+// Print a placeholder for options that don't yet support printOptionDiff().
+void basic_parser_impl::
+printOptionNoValue(const Option &O, size_t GlobalWidth) const {
+ printOptionName(O, GlobalWidth);
+ outs() << "= *cannot print option value*\n";
+}
+
+//===----------------------------------------------------------------------===//
+// -help and -help-hidden option implementation
+//
+
+static int OptNameCompare(const void *LHS, const void *RHS) {
+ typedef std::pair<const char *, Option*> pair_ty;
+
+ return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
+}
+
+// Copy Options into a vector so we can sort them as we like.
+static void
+sortOpts(StringMap<Option*> &OptMap,
+ SmallVectorImpl< std::pair<const char *, Option*> > &Opts,
+ bool ShowHidden) {
+ SmallPtrSet<Option*, 128> OptionSet; // Duplicate option detection.
+
+ for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+ I != E; ++I) {
+ // Ignore really-hidden options.
+ if (I->second->getOptionHiddenFlag() == ReallyHidden)
+ continue;
+
+ // Unless showhidden is set, ignore hidden flags.
+ if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+ continue;
+
+ // If we've already seen this option, don't add it to the list again.
+ if (!OptionSet.insert(I->second))
+ continue;
+
+ Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+ I->second));
+ }
+
+ // Sort the options list alphabetically.
+ qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+}
+
+namespace {
+
+class HelpPrinter {
+ const bool ShowHidden;
+
+public:
+ explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
+
+ void operator=(bool Value) {
+ if (Value == false) return;
+
+ // Get all the options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> OptMap;
+ GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+ SmallVector<std::pair<const char *, Option*>, 128> Opts;
+ sortOpts(OptMap, Opts, ShowHidden);
+
+ if (ProgramOverview)
+ outs() << "OVERVIEW: " << ProgramOverview << "\n";
+
+ outs() << "USAGE: " << ProgramName << " [options]";
+
+ // Print out the positional options.
+ Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists...
+ if (!PositionalOpts.empty() &&
+ PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
+ CAOpt = PositionalOpts[0];
+
+ for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (PositionalOpts[i]->ArgStr[0])
+ outs() << " --" << PositionalOpts[i]->ArgStr;
+ outs() << " " << PositionalOpts[i]->HelpStr;
+ }
+
+ // Print the consume after option info if it exists...
+ if (CAOpt) outs() << " " << CAOpt->HelpStr;
+
+ outs() << "\n\n";
+
+ // Compute the maximum argument length...
+ size_t MaxArgLen = 0;
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+ outs() << "OPTIONS:\n";
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionInfo(MaxArgLen);
+
+ // Print any extra help the user has declared.
+ for (std::vector<const char *>::iterator I = MoreHelp->begin(),
+ E = MoreHelp->end(); I != E; ++I)
+ outs() << *I;
+ MoreHelp->clear();
+
+ // Halt the program since help information was printed
+ exit(1);
+ }
+};
+} // End anonymous namespace
+
+// Define the two HelpPrinter instances that are used to print out help, or
+// help-hidden...
+//
+static HelpPrinter NormalPrinter(false);
+static HelpPrinter HiddenPrinter(true);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+ cl::location(NormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HHOp("help-hidden", cl::desc("Display all available options"),
+ cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+static cl::opt<bool>
+PrintOptions("print-options",
+ cl::desc("Print non-default options after command line parsing"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+PrintAllOptions("print-all-options",
+ cl::desc("Print all option values after command line parsing"),
+ cl::Hidden, cl::init(false));
+
+// Print the value of each option.
+void cl::PrintOptionValues() {
+ if (!PrintOptions && !PrintAllOptions) return;
+
+ // Get all the options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> OptMap;
+ GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+ SmallVector<std::pair<const char *, Option*>, 128> Opts;
+ sortOpts(OptMap, Opts, /*ShowHidden*/true);
+
+ // Compute the maximum argument length...
+ size_t MaxArgLen = 0;
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
+}
+
+static void (*OverrideVersionPrinter)() = 0;
+
+static std::vector<void (*)()>* ExtraVersionPrinters = 0;
+
+namespace {
+class VersionPrinter {
+public:
+ void print() {
+ raw_ostream &OS = outs();
+ OS << "LLVM (http://llvm.org/):\n"
+ << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+#ifdef LLVM_VERSION_INFO
+ OS << LLVM_VERSION_INFO;
+#endif
+ OS << "\n ";
+#ifndef __OPTIMIZE__
+ OS << "DEBUG build";
+#else
+ OS << "Optimized build";
+#endif
+#ifndef NDEBUG
+ OS << " with assertions";
+#endif
+ std::string CPU = sys::getHostCPUName();
+ if (CPU == "generic") CPU = "(unknown)";
+ OS << ".\n"
+#if (ENABLE_TIMESTAMPS == 1)
+ << " Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
+ << " Default target: " << sys::getDefaultTargetTriple() << '\n'
+ << " Host CPU: " << CPU << '\n';
+ }
+ void operator=(bool OptionWasSpecified) {
+ if (!OptionWasSpecified) return;
+
+ if (OverrideVersionPrinter != 0) {
+ (*OverrideVersionPrinter)();
+ exit(1);
+ }
+ print();
+
+ // Iterate over any registered extra printers and call them to add further
+ // information.
+ if (ExtraVersionPrinters != 0) {
+ outs() << '\n';
+ for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(),
+ E = ExtraVersionPrinters->end();
+ I != E; ++I)
+ (*I)();
+ }
+
+ exit(1);
+ }
+};
+} // End anonymous namespace
+
+
+// Define the --version option that prints out the LLVM version for the tool
+static VersionPrinter VersionPrinterInstance;
+
+static cl::opt<VersionPrinter, true, parser<bool> >
+VersOp("version", cl::desc("Display the version of this program"),
+ cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+
+// Utility function for printing the help message.
+void cl::PrintHelpMessage() {
+ // This looks weird, but it actually prints the help message. The
+ // NormalPrinter variable is a HelpPrinter and the help gets printed when
+ // its operator= is invoked. That's because the "normal" usages of the
+ // help printer is to be assigned true/false depending on whether the
+ // -help option was given or not. Since we're circumventing that we have
+ // to make it look like -help was given, so we assign true.
+ NormalPrinter = true;
+}
+
+/// Utility function for printing version number.
+void cl::PrintVersionMessage() {
+ VersionPrinterInstance.print();
+}
+
+void cl::SetVersionPrinter(void (*func)()) {
+ OverrideVersionPrinter = func;
+}
+
+void cl::AddExtraVersionPrinter(void (*func)()) {
+ if (ExtraVersionPrinters == 0)
+ ExtraVersionPrinters = new std::vector<void (*)()>;
+
+ ExtraVersionPrinters->push_back(func);
+}
diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp
new file mode 100644
index 000000000000..5c5895026b67
--- /dev/null
+++ b/contrib/llvm/lib/Support/ConstantRange.cpp
@@ -0,0 +1,731 @@
+//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value. This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range. To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators. When used with boolean values, the following are important
+// ranges (other integral ranges use min/max values for special range values):
+//
+// [F, F) = {} = Empty set
+// [T, F) = {T}
+// [F, T) = {F}
+// [T, T) = {F, T} = Full set
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// Initialize a full (the default) or empty set for the specified type.
+///
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
+ if (Full)
+ Lower = Upper = APInt::getMaxValue(BitWidth);
+ else
+ Lower = Upper = APInt::getMinValue(BitWidth);
+}
+
+/// Initialize a range to hold the single specified value.
+///
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
+
+ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
+ Lower(L), Upper(U) {
+ assert(L.getBitWidth() == U.getBitWidth() &&
+ "ConstantRange with unequal bit widths");
+ assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
+ "Lower == Upper, but they aren't min or max value!");
+}
+
+ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
+ const ConstantRange &CR) {
+ if (CR.isEmptySet())
+ return CR;
+
+ uint32_t W = CR.getBitWidth();
+ switch (Pred) {
+ default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()");
+ case CmpInst::ICMP_EQ:
+ return CR;
+ case CmpInst::ICMP_NE:
+ if (CR.isSingleElement())
+ return ConstantRange(CR.getUpper(), CR.getLower());
+ return ConstantRange(W);
+ case CmpInst::ICMP_ULT: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMinValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getMinValue(W), UMax);
+ }
+ case CmpInst::ICMP_SLT: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMinSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax);
+ }
+ case CmpInst::ICMP_ULE: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMaxValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getMinValue(W), UMax + 1);
+ }
+ case CmpInst::ICMP_SLE: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMaxSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+ }
+ case CmpInst::ICMP_UGT: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMaxValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(UMin + 1, APInt::getNullValue(W));
+ }
+ case CmpInst::ICMP_SGT: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMaxSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+ }
+ case CmpInst::ICMP_UGE: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMinValue())
+ return ConstantRange(W);
+ return ConstantRange(UMin, APInt::getNullValue(W));
+ }
+ case CmpInst::ICMP_SGE: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMinSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(SMin, APInt::getSignedMinValue(W));
+ }
+ }
+}
+
+/// isFullSet - Return true if this set contains all of the elements possible
+/// for this data-type
+bool ConstantRange::isFullSet() const {
+ return Lower == Upper && Lower.isMaxValue();
+}
+
+/// isEmptySet - Return true if this set contains no members.
+///
+bool ConstantRange::isEmptySet() const {
+ return Lower == Upper && Lower.isMinValue();
+}
+
+/// isWrappedSet - Return true if this set wraps around the top of the range,
+/// for example: [100, 8)
+///
+bool ConstantRange::isWrappedSet() const {
+ return Lower.ugt(Upper);
+}
+
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+ return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+ contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
+/// getSetSize - Return the number of elements in this set.
+///
+APInt ConstantRange::getSetSize() const {
+ if (isEmptySet())
+ return APInt(getBitWidth()+1, 0);
+
+ if (isFullSet()) {
+ APInt Size(getBitWidth()+1, 0);
+ Size.setBit(getBitWidth());
+ return Size;
+ }
+
+ // This is also correct for wrapped sets.
+ return (Upper - Lower).zext(getBitWidth()+1);
+}
+
+/// getUnsignedMax - Return the largest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMax() const {
+ if (isFullSet() || isWrappedSet())
+ return APInt::getMaxValue(getBitWidth());
+ return getUpper() - 1;
+}
+
+/// getUnsignedMin - Return the smallest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMin() const {
+ if (isFullSet() || (isWrappedSet() && getUpper() != 0))
+ return APInt::getMinValue(getBitWidth());
+ return getLower();
+}
+
+/// getSignedMax - Return the largest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMax() const {
+ APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getUpper() - 1;
+ return SignedMax;
+ }
+ if (getLower().isNegative() == getUpper().isNegative())
+ return SignedMax;
+ return getUpper() - 1;
+}
+
+/// getSignedMin - Return the smallest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMin() const {
+ APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getLower();
+ return SignedMin;
+ }
+ if ((getUpper() - 1).slt(getLower())) {
+ if (getUpper() != SignedMin)
+ return SignedMin;
+ }
+ return getLower();
+}
+
+/// contains - Return true if the specified value is in the set.
+///
+bool ConstantRange::contains(const APInt &V) const {
+ if (Lower == Upper)
+ return isFullSet();
+
+ if (!isWrappedSet())
+ return Lower.ule(V) && V.ult(Upper);
+ return Lower.ule(V) || V.ult(Upper);
+}
+
+/// contains - Return true if the argument is a subset of this range.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
+///
+bool ConstantRange::contains(const ConstantRange &Other) const {
+ if (isFullSet() || Other.isEmptySet()) return true;
+ if (isEmptySet() || Other.isFullSet()) return false;
+
+ if (!isWrappedSet()) {
+ if (Other.isWrappedSet())
+ return false;
+
+ return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
+ }
+
+ if (!Other.isWrappedSet())
+ return Other.getUpper().ule(Upper) ||
+ Lower.ule(Other.getLower());
+
+ return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
+}
+
+/// subtract - Subtract the specified constant from the endpoints of this
+/// constant range.
+ConstantRange ConstantRange::subtract(const APInt &Val) const {
+ assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
+ // If the set is empty or full, don't modify the endpoints.
+ if (Lower == Upper)
+ return *this;
+ return ConstantRange(Lower - Val, Upper - Val);
+}
+
+/// \brief Subtract the specified range from this range (aka relative complement
+/// of the sets).
+ConstantRange ConstantRange::difference(const ConstantRange &CR) const {
+ return intersectWith(CR.inverse());
+}
+
+/// intersectWith - Return the range that results from the intersection of this
+/// range with another range. The resultant range is guaranteed to include all
+/// elements contained in both input ranges, and to have the smallest possible
+/// set size that does so. Because there may be two intersections with the
+/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ // Handle common cases.
+ if ( isEmptySet() || CR.isFullSet()) return *this;
+ if (CR.isEmptySet() || isFullSet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet())
+ return CR.intersectWith(*this);
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (Lower.ult(CR.Lower)) {
+ if (Upper.ule(CR.Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ if (Upper.ult(CR.Upper))
+ return ConstantRange(CR.Lower, Upper);
+
+ return CR;
+ }
+ if (Upper.ult(CR.Upper))
+ return *this;
+
+ if (Lower.ult(CR.Upper))
+ return ConstantRange(Lower, CR.Upper);
+
+ return ConstantRange(getBitWidth(), false);
+ }
+
+ if (isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Lower.ult(Upper)) {
+ if (CR.Upper.ult(Upper))
+ return CR;
+
+ if (CR.Upper.ule(Lower))
+ return ConstantRange(CR.Lower, Upper);
+
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ return CR;
+ }
+ if (CR.Lower.ult(Lower)) {
+ if (CR.Upper.ule(Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ return ConstantRange(Lower, CR.Upper);
+ }
+ return CR;
+ }
+
+ if (CR.Upper.ult(Upper)) {
+ if (CR.Lower.ult(Upper)) {
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ return CR;
+ }
+
+ if (CR.Lower.ult(Lower))
+ return ConstantRange(Lower, CR.Upper);
+
+ return CR;
+ }
+ if (CR.Upper.ule(Lower)) {
+ if (CR.Lower.ult(Lower))
+ return *this;
+
+ return ConstantRange(CR.Lower, Upper);
+ }
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ return CR;
+}
+
+
+/// unionWith - Return the range that results from the union of this range with
+/// another range. The resultant range is guaranteed to include the elements of
+/// both sets, but may contain more. For example, [3, 9) union [12,15) is
+/// [3, 15), which includes 9, 10, and 11, which were not included in either
+/// set before.
+///
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ if ( isFullSet() || CR.isEmptySet()) return *this;
+ if (CR.isFullSet() || isEmptySet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
+ // If the two ranges are disjoint, find the smaller gap and bridge it.
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2))
+ return ConstantRange(Lower, CR.Upper);
+ return ConstantRange(CR.Lower, Upper);
+ }
+
+ APInt L = Lower, U = Upper;
+ if (CR.Lower.ult(L))
+ L = CR.Lower;
+ if ((CR.Upper - 1).ugt(U - 1))
+ U = CR.Upper;
+
+ if (L == 0 && U == 0)
+ return ConstantRange(getBitWidth());
+
+ return ConstantRange(L, U);
+ }
+
+ if (!CR.isWrappedSet()) {
+ // ------U L----- and ------U L----- : this
+ // L--U L--U : CR
+ if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
+ return *this;
+
+ // ------U L----- : this
+ // L---------U : CR
+ if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
+ return ConstantRange(getBitWidth());
+
+ // ----U L---- : this
+ // L---U : CR
+ // <d1> <d2>
+ if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2))
+ return ConstantRange(Lower, CR.Upper);
+ return ConstantRange(CR.Lower, Upper);
+ }
+
+ // ----U L----- : this
+ // L----U : CR
+ if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+ return ConstantRange(CR.Lower, Upper);
+
+ // ------U L---- : this
+ // L-----U : CR
+ assert(CR.Lower.ult(Upper) && CR.Upper.ult(Lower) &&
+ "ConstantRange::unionWith missed a case with one range wrapped");
+ return ConstantRange(Lower, CR.Upper);
+ }
+
+ // ------U L---- and ------U L---- : this
+ // -U L----------- and ------------U L : CR
+ if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
+ return ConstantRange(getBitWidth());
+
+ APInt L = Lower, U = Upper;
+ if (CR.Upper.ugt(U))
+ U = CR.Upper;
+ if (CR.Lower.ult(L))
+ L = CR.Lower;
+
+ return ConstantRange(L, U);
+}
+
+/// zeroExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// zero extended.
+ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet() || isWrappedSet()) {
+ // Change into [0, 1 << src bit width)
+ APInt LowerExt(DstTySize, 0);
+ if (!Upper) // special case: [X, 0) -- not really wrapping around
+ LowerExt = Lower.zext(DstTySize);
+ return ConstantRange(LowerExt, APInt(DstTySize, 1).shl(SrcTySize));
+ }
+
+ return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
+}
+
+/// signExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// sign extended.
+ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet() || isSignWrappedSet()) {
+ return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
+ APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
+ }
+
+ return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
+}
+
+/// truncate - Return a new range in the specified integer type, which must be
+/// strictly smaller than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// truncated to the specified type.
+ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
+ assert(getBitWidth() > DstTySize && "Not a value truncation");
+ if (isEmptySet())
+ return ConstantRange(DstTySize, /*isFullSet=*/false);
+ if (isFullSet())
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
+
+ APInt MaxValue = APInt::getMaxValue(DstTySize).zext(getBitWidth());
+ APInt MaxBitValue(getBitWidth(), 0);
+ MaxBitValue.setBit(DstTySize);
+
+ APInt LowerDiv(Lower), UpperDiv(Upper);
+ ConstantRange Union(DstTySize, /*isFullSet=*/false);
+
+ // Analyze wrapped sets in their two parts: [0, Upper) \/ [Lower, MaxValue]
+ // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and
+ // then we do the union with [MaxValue, Upper)
+ if (isWrappedSet()) {
+ // if Upper is greater than Max Value, it covers the whole truncated range.
+ if (Upper.uge(MaxValue))
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
+
+ Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
+ UpperDiv = APInt::getMaxValue(getBitWidth());
+
+ // Union covers the MaxValue case, so return if the remaining range is just
+ // MaxValue.
+ if (LowerDiv == UpperDiv)
+ return Union;
+ }
+
+ // Chop off the most significant bits that are past the destination bitwidth.
+ if (LowerDiv.uge(MaxValue)) {
+ APInt Div(getBitWidth(), 0);
+ APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv);
+ UpperDiv = UpperDiv - MaxBitValue * Div;
+ }
+
+ if (UpperDiv.ule(MaxValue))
+ return ConstantRange(LowerDiv.trunc(DstTySize),
+ UpperDiv.trunc(DstTySize)).unionWith(Union);
+
+ // The truncated value wrapps around. Check if we can do better than fullset.
+ APInt UpperModulo = UpperDiv - MaxBitValue;
+ if (UpperModulo.ult(LowerDiv))
+ return ConstantRange(LowerDiv.trunc(DstTySize),
+ UpperModulo.trunc(DstTySize)).unionWith(Union);
+
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
+}
+
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ if (SrcTySize < DstTySize)
+ return zeroExtend(DstTySize);
+ return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ if (SrcTySize < DstTySize)
+ return signExtend(DstTySize);
+ return *this;
+}
+
+ConstantRange
+ConstantRange::add(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() + Other.getLower();
+ APInt NewUpper = getUpper() + Other.getUpper() - 1;
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() - Other.getUpper() + 1;
+ APInt NewUpper = getUpper() - Other.getLower();
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
+ConstantRange::multiply(const ConstantRange &Other) const {
+ // TODO: If either operand is a single element and the multiply is known to
+ // be non-wrapping, round the result min and max value to the appropriate
+ // multiple of that element. If wrapping is possible, at least adjust the
+ // range according to the greatest power-of-two factor of the single element.
+
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
+ APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
+ APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
+ APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2);
+
+ ConstantRange Result_zext = ConstantRange(this_min * Other_min,
+ this_max * Other_max + 1);
+ return Result_zext.truncate(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::smax(const ConstantRange &Other) const {
+ // X smax Y is: range(smax(X_smin, Y_smin),
+ // smax(X_smax, Y_smax))
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
+ APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
+ if (NewU == NewL)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::umax(const ConstantRange &Other) const {
+ // X umax Y is: range(umax(X_umin, Y_umin),
+ // umax(X_umax, Y_umax))
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+ APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
+ if (NewU == NewL)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::udiv(const ConstantRange &RHS) const {
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (RHS.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
+
+ APInt RHS_umin = RHS.getUnsignedMin();
+ if (RHS_umin == 0) {
+ // We want the lowest value in RHS excluding zero. Usually that would be 1
+ // except for a range in the form of [X, 1) in which case it would be X.
+ if (RHS.getUpper() == 1)
+ RHS_umin = RHS.getLower();
+ else
+ RHS_umin = APInt(getBitWidth(), 1);
+ }
+
+ APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+
+ // If the LHS is Full and the RHS is a wrapped interval containing 1 then
+ // this could occur.
+ if (Lower == Upper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return ConstantRange(Lower, Upper);
+}
+
+ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+ if (umin.isAllOnesValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+ if (umax.isMinValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
+ConstantRange
+ConstantRange::shl(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+ APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
+
+ // there's no overflow!
+ APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+ if (Zeros.ugt(Other.getUnsignedMax()))
+ return ConstantRange(min, max + 1);
+
+ // FIXME: implement the other tricky cases
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+ APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+ if (min == max + 1)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return ConstantRange(min, max + 1);
+}
+
+ConstantRange ConstantRange::inverse() const {
+ if (isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(Upper, Lower);
+}
+
+/// print - Print out the bounds to a stream...
+///
+void ConstantRange::print(raw_ostream &OS) const {
+ if (isFullSet())
+ OS << "full-set";
+ else if (isEmptySet())
+ OS << "empty-set";
+ else
+ OS << "[" << Lower << "," << Upper << ")";
+}
+
+/// dump - Allow printing from a debugger easily...
+///
+void ConstantRange::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/Support/ConvertUTF.c b/contrib/llvm/lib/Support/ConvertUTF.c
new file mode 100644
index 000000000000..23f17ca25aea
--- /dev/null
+++ b/contrib/llvm/lib/Support/ConvertUTF.c
@@ -0,0 +1,571 @@
+/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------=*/
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+ Conversions between UTF32, UTF-16, and UTF-8. Source code file.
+ Author: Mark E. Davis, 1994.
+ Rev History: Rick McGowan, fixes & updates May 2001.
+ Sept 2001: fixed const & error conditions per
+ mods suggested by S. Parent & A. Lillich.
+ June 2002: Tim Dodd added detection and handling of incomplete
+ source sequences, enhanced error detection, added casts
+ to eliminate compiler warnings.
+ July 2003: slight mods to back out aggressive FFFE detection.
+ Jan 2004: updated switches in from-UTF8 conversions.
+ Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+
+ See the header file "ConvertUTF.h" for complete documentation.
+
+------------------------------------------------------------------------ */
+
+
+#include "llvm/Support/ConvertUTF.h"
+#ifdef CVTUTF_DEBUG
+#include <stdio.h>
+#endif
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+#define false 0
+#define true 1
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/* --------------------------------------------------------------------- */
+
+/* The interface converts a whole buffer to avoid function-call overhead.
+ * Constants have been gathered. Loops & conditionals have been removed as
+ * much as possible for efficiency, in favor of drop-through switches.
+ * (See "Note A" at the bottom of the file for equivalent code.)
+ * If your compiler supports it, the "isLegalUTF8" call can be turned
+ * into an inline function.
+ */
+
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF16 (
+ const UTF32** sourceStart, const UTF32* sourceEnd,
+ UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF32* source = *sourceStart;
+ UTF16* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch;
+ if (target >= targetEnd) {
+ result = targetExhausted; break;
+ }
+ ch = *source++;
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ *target++ = (UTF16)ch; /* normal case */
+ }
+ } else if (ch > UNI_MAX_LEGAL_UTF32) {
+ if (flags == strictConversion) {
+ result = sourceIllegal;
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ if (target + 1 >= targetEnd) {
+ --source; /* Back up source pointer! */
+ result = targetExhausted; break;
+ }
+ ch -= halfBase;
+ *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+ *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+ }
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF32 (
+ const UTF16** sourceStart, const UTF16* sourceEnd,
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF16* source = *sourceStart;
+ UTF32* target = *targetStart;
+ UTF32 ch, ch2;
+ while (source < sourceEnd) {
+ const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+ ch = *source++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source buffer... */
+ if (source < sourceEnd) {
+ ch2 = *source;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ ++source;
+ } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --source; /* return to the high surrogate */
+ result = sourceExhausted;
+ break;
+ }
+ } else if (flags == strictConversion) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ }
+ }
+ if (target >= targetEnd) {
+ source = oldSource; /* Back up source pointer! */
+ result = targetExhausted; break;
+ }
+ *target++ = ch;
+ }
+ *sourceStart = source;
+ *targetStart = target;
+#ifdef CVTUTF_DEBUG
+if (result == sourceIllegal) {
+ fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
+ fflush(stderr);
+}
+#endif
+ return result;
+}
+ConversionResult ConvertUTF16toUTF8 (
+ const UTF16** sourceStart, const UTF16* sourceEnd,
+ UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF16* source = *sourceStart;
+ UTF8* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch;
+ unsigned short bytesToWrite = 0;
+ const UTF32 byteMask = 0xBF;
+ const UTF32 byteMark = 0x80;
+ const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+ ch = *source++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source buffer... */
+ if (source < sourceEnd) {
+ UTF32 ch2 = *source;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ ++source;
+ } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ --source; /* return to the high surrogate */
+ result = sourceExhausted;
+ break;
+ }
+ } else if (flags == strictConversion) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ }
+ }
+ /* Figure out how many bytes the result will require */
+ if (ch < (UTF32)0x80) { bytesToWrite = 1;
+ } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
+ } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
+ } else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
+ } else { bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ target += bytesToWrite;
+ if (target > targetEnd) {
+ source = oldSource; /* Back up source pointer! */
+ target -= bytesToWrite; result = targetExhausted; break;
+ }
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
+ }
+ target += bytesToWrite;
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF8 (
+ const UTF32** sourceStart, const UTF32* sourceEnd,
+ UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF32* source = *sourceStart;
+ UTF8* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch;
+ unsigned short bytesToWrite = 0;
+ const UTF32 byteMask = 0xBF;
+ const UTF32 byteMark = 0x80;
+ ch = *source++;
+ if (flags == strictConversion ) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ --source; /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ }
+ }
+ /*
+ * Figure out how many bytes the result will require. Turn any
+ * illegally large UTF32 things (> Plane 17) into replacement chars.
+ */
+ if (ch < (UTF32)0x80) { bytesToWrite = 1;
+ } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
+ } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
+ } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
+ } else { bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ result = sourceIllegal;
+ }
+
+ target += bytesToWrite;
+ if (target > targetEnd) {
+ --source; /* Back up source pointer! */
+ target -= bytesToWrite; result = targetExhausted; break;
+ }
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
+ }
+ target += bytesToWrite;
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ * length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns false. The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+static Boolean isLegalUTF8(const UTF8 *source, int length) {
+ UTF8 a;
+ const UTF8 *srcptr = source+length;
+ switch (length) {
+ default: return false;
+ /* Everything else falls through when "true"... */
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+
+ switch (*source) {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return false; break;
+ case 0xED: if (a > 0x9F) return false; break;
+ case 0xF0: if (a < 0x90) return false; break;
+ case 0xF4: if (a > 0x8F) return false; break;
+ default: if (a < 0x80) return false;
+ }
+
+ case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+ }
+ if (*source > 0xF4) return false;
+ return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 sequence is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
+ int length = trailingBytesForUTF8[*source]+1;
+ if (length > sourceEnd - source) {
+ return false;
+ }
+ return isLegalUTF8(source, length);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return the total number of bytes in a codepoint
+ * represented in UTF-8, given the value of the first byte.
+ */
+unsigned getNumBytesForUTF8(UTF8 first) {
+ return trailingBytesForUTF8[first] + 1;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 string is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
+ while (*source != sourceEnd) {
+ int length = trailingBytesForUTF8[**source] + 1;
+ if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+ return false;
+ *source += length;
+ }
+ return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF16 (
+ const UTF8** sourceStart, const UTF8* sourceEnd,
+ UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF8* source = *sourceStart;
+ UTF16* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (extraBytesToRead >= sourceEnd - source) {
+ result = sourceExhausted; break;
+ }
+ /* Do this check whether lenient or strict */
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ result = sourceIllegal;
+ break;
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (target >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up source pointer! */
+ result = targetExhausted; break;
+ }
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ *target++ = (UTF16)ch; /* normal case */
+ }
+ } else if (ch > UNI_MAX_UTF16) {
+ if (flags == strictConversion) {
+ result = sourceIllegal;
+ source -= (extraBytesToRead+1); /* return to the start */
+ break; /* Bail out; shouldn't continue */
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ if (target + 1 >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up source pointer! */
+ result = targetExhausted; break;
+ }
+ ch -= halfBase;
+ *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+ *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+ }
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+ const UTF8** sourceStart, const UTF8* sourceEnd,
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF8* source = *sourceStart;
+ UTF32* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (extraBytesToRead >= sourceEnd - source) {
+ result = sourceExhausted; break;
+ }
+ /* Do this check whether lenient or strict */
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ result = sourceIllegal;
+ break;
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (target >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up the source pointer! */
+ result = targetExhausted; break;
+ }
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ *target++ = ch;
+ }
+ } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ result = sourceIllegal;
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+/* ---------------------------------------------------------------------
+
+ Note A.
+ The fall-through switches in UTF-8 reading code save a
+ temp variable, some decrements & conditionals. The switches
+ are equivalent to the following loop:
+ {
+ int tmpBytesToRead = extraBytesToRead+1;
+ do {
+ ch += *source++;
+ --tmpBytesToRead;
+ if (tmpBytesToRead) ch <<= 6;
+ } while (tmpBytesToRead > 0);
+ }
+ In UTF-8 writing code, the switches on "bytesToWrite" are
+ similarly unrolled loops.
+
+ --------------------------------------------------------------------- */
diff --git a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
new file mode 100644
index 000000000000..458fbb0b496a
--- /dev/null
+++ b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -0,0 +1,76 @@
+//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConvertUTF.h"
+
+namespace llvm {
+
+bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
+ char *&ResultPtr, const UTF8 *&ErrorPtr) {
+ assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
+ ConversionResult result = conversionOK;
+ // Copy the character span over.
+ if (WideCharWidth == 1) {
+ const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin());
+ if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.end()))) {
+ result = sourceIllegal;
+ ErrorPtr = Pos;
+ } else {
+ memcpy(ResultPtr, Source.data(), Source.size());
+ ResultPtr += Source.size();
+ }
+ } else if (WideCharWidth == 2) {
+ const UTF8 *sourceStart = (const UTF8*)Source.data();
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+ ConversionFlags flags = strictConversion;
+ result = ConvertUTF8toUTF16(
+ &sourceStart, sourceStart + Source.size(),
+ &targetStart, targetStart + 2*Source.size(), flags);
+ if (result == conversionOK)
+ ResultPtr = reinterpret_cast<char*>(targetStart);
+ else
+ ErrorPtr = sourceStart;
+ } else if (WideCharWidth == 4) {
+ const UTF8 *sourceStart = (const UTF8*)Source.data();
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+ ConversionFlags flags = strictConversion;
+ result = ConvertUTF8toUTF32(
+ &sourceStart, sourceStart + Source.size(),
+ &targetStart, targetStart + 4*Source.size(), flags);
+ if (result == conversionOK)
+ ResultPtr = reinterpret_cast<char*>(targetStart);
+ else
+ ErrorPtr = sourceStart;
+ }
+ assert((result != targetExhausted)
+ && "ConvertUTF8toUTFXX exhausted target buffer");
+ return result == conversionOK;
+}
+
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
+ const UTF32 *SourceStart = &Source;
+ const UTF32 *SourceEnd = SourceStart + 1;
+ UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
+ UTF8 *TargetEnd = TargetStart + 4;
+ ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
+ &TargetStart, TargetEnd,
+ strictConversion);
+ if (CR != conversionOK)
+ return false;
+
+ ResultPtr = reinterpret_cast<char*>(TargetStart);
+ return true;
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 000000000000..182c362cc755
--- /dev/null
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,346 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
+#include <cstdio>
+#include <setjmp.h>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+ CrashRecoveryContext *CRC;
+ std::string Backtrace;
+ ::jmp_buf JumpBuffer;
+ volatile unsigned Failed : 1;
+
+public:
+ CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+ Failed(false) {
+ CurrentContext.set(this);
+ }
+ ~CrashRecoveryContextImpl() {
+ CurrentContext.erase();
+ }
+
+ void HandleCrash() {
+ // Eliminate the current context entry, to avoid re-entering in case the
+ // cleanup code crashes.
+ CurrentContext.erase();
+
+ assert(!Failed && "Crash recovery context already failed!");
+ Failed = true;
+
+ // FIXME: Stash the backtrace.
+
+ // Jump back to the RunSafely we were called under.
+ longjmp(JumpBuffer, 1);
+ }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+static sys::ThreadLocal<const CrashRecoveryContextCleanup>
+ tlIsRecoveringFromCrash;
+
+CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+ // Reclaim registered resources.
+ CrashRecoveryContextCleanup *i = head;
+ tlIsRecoveringFromCrash.set(head);
+ while (i) {
+ CrashRecoveryContextCleanup *tmp = i;
+ i = tmp->next;
+ tmp->cleanupFired = true;
+ tmp->recoverResources();
+ delete tmp;
+ }
+ tlIsRecoveringFromCrash.erase();
+
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ delete CRCI;
+}
+
+bool CrashRecoveryContext::isRecoveringFromCrash() {
+ return tlIsRecoveringFromCrash.get() != 0;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+ if (!gCrashRecoveryEnabled)
+ return 0;
+
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ if (!CRCI)
+ return 0;
+
+ return CRCI->CRC;
+}
+
+void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
+{
+ if (!cleanup)
+ return;
+ if (head)
+ head->prev = cleanup;
+ cleanup->next = head;
+ head = cleanup;
+}
+
+void
+CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
+ if (!cleanup)
+ return;
+ if (cleanup == head) {
+ head = cleanup->next;
+ if (head)
+ head->prev = 0;
+ }
+ else {
+ cleanup->prev->next = cleanup->next;
+ if (cleanup->next)
+ cleanup->next->prev = cleanup->prev;
+ }
+ delete cleanup;
+}
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/Windows.h"
+
+// On Windows, we can make use of vectored exception handling to
+// catch most crashing situations. Note that this does mean
+// we will be alerted of exceptions *before* structured exception
+// handling has the opportunity to catch it. But that isn't likely
+// to cause problems because nowhere in the project is SEH being
+// used.
+//
+// Vectored exception handling is built on top of SEH, and so it
+// works on a per-thread basis.
+//
+// The vectored exception handler functionality was added in Windows
+// XP, so if support for older versions of Windows is required,
+// it will have to be added.
+//
+// If we want to support as far back as Win2k, we could use the
+// SetUnhandledExceptionFilter API, but there's a risk of that
+// being entirely overwritten (it's not a chain).
+
+static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
+{
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // Something has gone horribly wrong, so let's just tell everyone
+ // to keep searching
+ CrashRecoveryContext::Disable();
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+
+ // TODO: We can capture the stack backtrace here and store it on the
+ // implementation if we so choose.
+
+ // Handle the crash
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+
+ // Note that we don't actually get here because HandleCrash calls
+ // longjmp, which means the HandleCrash function never returns.
+ llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
+}
+
+// Because the Enable and Disable calls are static, it means that
+// there may not actually be an Impl available, or even a current
+// CrashRecoveryContext at all. So we make use of a thread-local
+// exception table. The handles contained in here will either be
+// non-NULL, valid VEH handles, or NULL.
+static sys::ThreadLocal<const void> sCurrentExceptionHandle;
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+
+ // We can set up vectored exception handling now. We will install our
+ // handler as the front of the list, though there's no assurances that
+ // it will remain at the front (another call could install itself before
+ // our handler). This 1) isn't likely, and 2) shouldn't cause problems.
+ PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler);
+ sCurrentExceptionHandle.set(handle);
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+
+ PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
+ if (currentHandle) {
+ // Now we can remove the vectored exception handler from the chain
+ ::RemoveVectoredExceptionHandler(currentHandle);
+
+ // Reset the handle in our thread-local set.
+ sCurrentExceptionHandle.set(NULL);
+ }
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static const int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // We didn't find a crash recovery context -- this means either we got a
+ // signal on a thread we didn't expect it on, the application got a signal
+ // outside of a crash recovery context, or something else went horribly
+ // wrong.
+ //
+ // Disable crash recovery and raise the signal again. The assumption here is
+ // that the enclosing application will terminate soon, and we won't want to
+ // attempt crash recovery again.
+ //
+ // This call of Disable isn't thread safe, but it doesn't actually matter.
+ CrashRecoveryContext::Disable();
+ raise(Signal);
+
+ // The signal will be thrown once the signal mask is restored.
+ return;
+ }
+
+ // Unblock the signal we received.
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ sigaddset(&SigMask, Signal);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ if (CRCI)
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+
+ // Setup the signal handler.
+ struct sigaction Handler;
+ Handler.sa_handler = CrashRecoverySignalHandler;
+ Handler.sa_flags = 0;
+ sigemptyset(&Handler.sa_mask);
+
+ for (unsigned i = 0; i != NumSignals; ++i) {
+ sigaction(Signals[i], &Handler, &PrevActions[i]);
+ }
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+
+ // Restore the previous signal handlers.
+ for (unsigned i = 0; i != NumSignals; ++i)
+ sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+ // If crash recovery is disabled, do nothing.
+ if (gCrashRecoveryEnabled) {
+ assert(!Impl && "Crash recovery context already initialized!");
+ CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+ Impl = CRCI;
+
+ if (setjmp(CRCI->JumpBuffer) != 0) {
+ return false;
+ }
+ }
+
+ Fn(UserData);
+ return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ assert(CRCI && "Crash recovery context never initialized!");
+ CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+ CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+ assert(CRC && "Crash recovery context never initialized!");
+ assert(CRC->Failed && "No crash was detected!");
+ return CRC->Backtrace;
+}
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+ void (*UserFn)(void*);
+ void *UserData;
+ CrashRecoveryContext *CRC;
+ bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+ RunSafelyOnThreadInfo *Info =
+ reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+ Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+ llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+ return Info.Result;
+}
diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
new file mode 100644
index 000000000000..34e82cf44169
--- /dev/null
+++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -0,0 +1,360 @@
+//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// The algorithm we use attempts to exploit the dependency information by
+// minimizing top-down. We start by constructing an initial root set R, and
+// then iteratively:
+//
+// 1. Minimize the set R using the test predicate:
+// P'(S) = P(S union pred*(S))
+//
+// 2. Extend R to R' = R union pred(R).
+//
+// until a fixed point is reached.
+//
+// The idea is that we want to quickly prune entire portions of the graph, so we
+// try to find high-level nodes that can be eliminated with all of their
+// dependents.
+//
+// FIXME: The current algorithm doesn't actually provide a strong guarantee
+// about the minimality of the result. The problem is that after adding nodes to
+// the required set, we no longer consider them for elimination. For strictly
+// well formed predicates, this doesn't happen, but it commonly occurs in
+// practice when there are unmodelled dependencies. I believe we can resolve
+// this by allowing the required set to be minimized as well, but need more test
+// cases first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class DAGDeltaAlgorithmImpl {
+ friend class DeltaActiveSetHelper;
+
+public:
+ typedef DAGDeltaAlgorithm::change_ty change_ty;
+ typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
+ typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
+ typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+private:
+ typedef std::vector<change_ty>::iterator pred_iterator_ty;
+ typedef std::vector<change_ty>::iterator succ_iterator_ty;
+ typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
+ typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+
+ DAGDeltaAlgorithm &DDA;
+
+ const changeset_ty &Changes;
+ const std::vector<edge_ty> &Dependencies;
+
+ std::vector<change_ty> Roots;
+
+ /// Cache of failed test results. Successful test results are never cached
+ /// since we always reduce following a success. We maintain an independent
+ /// cache from that used by the individual delta passes because we may get
+ /// hits across multiple individual delta invocations.
+ mutable std::set<changeset_ty> FailedTestsCache;
+
+ // FIXME: Gross.
+ std::map<change_ty, std::vector<change_ty> > Predecessors;
+ std::map<change_ty, std::vector<change_ty> > Successors;
+
+ std::map<change_ty, std::set<change_ty> > PredClosure;
+ std::map<change_ty, std::set<change_ty> > SuccClosure;
+
+private:
+ pred_iterator_ty pred_begin(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].begin();
+ }
+ pred_iterator_ty pred_end(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].end();
+ }
+
+ pred_closure_iterator_ty pred_closure_begin(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].begin();
+ }
+ pred_closure_iterator_ty pred_closure_end(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].end();
+ }
+
+ succ_iterator_ty succ_begin(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].begin();
+ }
+ succ_iterator_ty succ_end(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].end();
+ }
+
+ succ_closure_iterator_ty succ_closure_begin(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].begin();
+ }
+ succ_closure_iterator_ty succ_closure_end(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].end();
+ }
+
+ void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ const changeset_ty &Required) {
+ DDA.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ /// ExecuteOneTest - Execute a single test predicate on the change set \p S.
+ bool ExecuteOneTest(const changeset_ty &S) {
+ // Check dependencies invariant.
+ DEBUG({
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it)
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2)
+ assert(S.count(*it2) && "Attempt to run invalid changeset!");
+ });
+
+ return DDA.ExecuteOneTest(S);
+ }
+
+public:
+ DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty> &_Dependencies);
+
+ changeset_ty Run();
+
+ /// GetTestResult - Get the test result for the active set \p Changes with
+ /// \p Required changes from the cache, executing the test if necessary.
+ ///
+ /// \param Changes - The set of active changes being minimized, which should
+ /// have their pred closure included in the test.
+ /// \param Required - The set of changes which have previously been
+ /// established to be required.
+ /// \return - The test result.
+ bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required);
+};
+
+/// Helper object for minimizing an active set of changes.
+class DeltaActiveSetHelper : public DeltaAlgorithm {
+ DAGDeltaAlgorithmImpl &DDAI;
+
+ const changeset_ty &Required;
+
+protected:
+ /// UpdatedSearchState - Callback used when the search state changes.
+ virtual void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) LLVM_OVERRIDE {
+ DDAI.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ virtual bool ExecuteOneTest(const changeset_ty &S) LLVM_OVERRIDE {
+ return DDAI.GetTestResult(S, Required);
+ }
+
+public:
+ DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
+ const changeset_ty &_Required)
+ : DDAI(_DDAI), Required(_Required) {}
+};
+
+}
+
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty>
+ &_Dependencies)
+ : DDA(_DDA),
+ Changes(_Changes),
+ Dependencies(_Dependencies)
+{
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ }
+ for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
+ ie = Dependencies.end(); it != ie; ++it) {
+ Predecessors[it->second].push_back(it->first);
+ Successors[it->first].push_back(it->second);
+ }
+
+ // Compute the roots.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ if (succ_begin(*it) == succ_end(*it))
+ Roots.push_back(*it);
+
+ // Pre-compute the closure of the successor relation.
+ std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
+ while (!Worklist.empty()) {
+ change_ty Change = Worklist.back();
+ Worklist.pop_back();
+
+ std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
+ for (pred_iterator_ty it = pred_begin(Change),
+ ie = pred_end(Change); it != ie; ++it) {
+ SuccClosure[*it].insert(Change);
+ SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
+ Worklist.push_back(*it);
+ }
+ }
+
+ // Invert to form the predecessor closure map.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
+ PredClosure[*it2].insert(*it);
+
+ // Dump useful debug info.
+ DEBUG({
+ llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
+ llvm::errs() << "Changes: [";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ if (it != Changes.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+
+ if (succ_begin(*it) != succ_end(*it)) {
+ llvm::errs() << "(";
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << "->" << *it2;
+ }
+ llvm::errs() << ")";
+ }
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Roots: [";
+ for (std::vector<change_ty>::const_iterator it = Roots.begin(),
+ ie = Roots.end(); it != ie; ++it) {
+ if (it != Roots.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Predecessor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
+ ie2 = pred_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != pred_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "Successor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "\n\n";
+ });
+}
+
+bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
+ const changeset_ty &Required) {
+ changeset_ty Extended(Required);
+ Extended.insert(Changes.begin(), Changes.end());
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+
+ if (FailedTestsCache.count(Extended))
+ return false;
+
+ bool Result = ExecuteOneTest(Extended);
+ if (!Result)
+ FailedTestsCache.insert(Extended);
+
+ return Result;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithmImpl::Run() {
+ // The current set of changes we are minimizing, starting at the roots.
+ changeset_ty CurrentSet(Roots.begin(), Roots.end());
+
+ // The set of required changes.
+ changeset_ty Required;
+
+ // Iterate until the active set of changes is empty. Convergence is guaranteed
+ // assuming input was a DAG.
+ //
+ // Invariant: CurrentSet intersect Required == {}
+ // Invariant: Required == (Required union succ*(Required))
+ while (!CurrentSet.empty()) {
+ DEBUG({
+ llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, "
+ << Required.size() << " required changes\n";
+ });
+
+ // Minimize the current set of changes.
+ DeltaActiveSetHelper Helper(*this, Required);
+ changeset_ty CurrentMinSet = Helper.Run(CurrentSet);
+
+ // Update the set of required changes. Since
+ // CurrentMinSet subset CurrentSet
+ // and after the last iteration,
+ // succ(CurrentSet) subset Required
+ // then
+ // succ(CurrentMinSet) subset Required
+ // and our invariant on Required is maintained.
+ Required.insert(CurrentMinSet.begin(), CurrentMinSet.end());
+
+ // Replace the current set with the predecssors of the minimized set of
+ // active changes.
+ CurrentSet.clear();
+ for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
+ ie = CurrentMinSet.end(); it != ie; ++it)
+ CurrentSet.insert(pred_begin(*it), pred_end(*it));
+
+ // FIXME: We could enforce CurrentSet intersect Required == {} here if we
+ // wanted to protect against cyclic graphs.
+ }
+
+ return Required;
+}
+
+void DAGDeltaAlgorithm::anchor() {
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
+ const std::vector<edge_ty> &Dependencies) {
+ return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run();
+}
diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp
new file mode 100644
index 000000000000..3d5cce05358c
--- /dev/null
+++ b/contrib/llvm/lib/Support/DataExtractor.cpp
@@ -0,0 +1,175 @@
+//===-- DataExtractor.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+using namespace llvm;
+
+template <typename T>
+static T getU(uint32_t *offset_ptr, const DataExtractor *de,
+ bool isLittleEndian, const char *Data) {
+ T val = 0;
+ uint32_t offset = *offset_ptr;
+ if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
+ std::memcpy(&val, &Data[offset], sizeof(val));
+ if (sys::isLittleEndianHost() != isLittleEndian)
+ val = sys::SwapByteOrder(val);
+
+ // Advance the offset
+ *offset_ptr += sizeof(val);
+ }
+ return val;
+}
+
+template <typename T>
+static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count,
+ const DataExtractor *de, bool isLittleEndian, const char *Data){
+ uint32_t offset = *offset_ptr;
+
+ if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) {
+ for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
+ ++value_ptr, offset += sizeof(*dst))
+ *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data);
+ // Advance the offset
+ *offset_ptr = offset;
+ // Return a non-NULL pointer to the converted data as an indicator of
+ // success
+ return dst;
+ }
+ return NULL;
+}
+
+uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const {
+ return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint8_t *
+DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const {
+ return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+
+uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const {
+ return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst,
+ uint32_t count) const {
+ return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
+ return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst,
+ uint32_t count) const {
+ return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const {
+ return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst,
+ uint32_t count) const {
+ return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+uint64_t
+DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+ switch (byte_size) {
+ case 1:
+ return getU8(offset_ptr);
+ case 2:
+ return getU16(offset_ptr);
+ case 4:
+ return getU32(offset_ptr);
+ case 8:
+ return getU64(offset_ptr);
+ }
+ llvm_unreachable("getUnsigned unhandled case!");
+}
+
+int64_t
+DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+ switch (byte_size) {
+ case 1:
+ return (int8_t)getU8(offset_ptr);
+ case 2:
+ return (int16_t)getU16(offset_ptr);
+ case 4:
+ return (int32_t)getU32(offset_ptr);
+ case 8:
+ return (int64_t)getU64(offset_ptr);
+ }
+ llvm_unreachable("getSigned unhandled case!");
+}
+
+const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
+ uint32_t offset = *offset_ptr;
+ StringRef::size_type pos = Data.find('\0', offset);
+ if (pos != StringRef::npos) {
+ *offset_ptr = pos + 1;
+ return Data.data() + offset;
+ }
+ return NULL;
+}
+
+uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
+ uint64_t result = 0;
+ if (Data.empty())
+ return 0;
+
+ unsigned shift = 0;
+ uint32_t offset = *offset_ptr;
+ uint8_t byte = 0;
+
+ while (isValidOffset(offset)) {
+ byte = Data[offset++];
+ result |= uint64_t(byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0)
+ break;
+ }
+
+ *offset_ptr = offset;
+ return result;
+}
+
+int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
+ int64_t result = 0;
+ if (Data.empty())
+ return 0;
+
+ unsigned shift = 0;
+ uint32_t offset = *offset_ptr;
+ uint8_t byte = 0;
+
+ while (isValidOffset(offset)) {
+ byte = Data[offset++];
+ result |= uint64_t(byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0)
+ break;
+ }
+
+ // Sign bit of byte is 2nd high order bit (0x40)
+ if (shift < 64 && (byte & 0x40))
+ result |= -(1ULL << shift);
+
+ *offset_ptr = offset;
+ return result;
+}
diff --git a/contrib/llvm/lib/Support/DataStream.cpp b/contrib/llvm/lib/Support/DataStream.cpp
new file mode 100644
index 000000000000..0a02281c2549
--- /dev/null
+++ b/contrib/llvm/lib/Support/DataStream.cpp
@@ -0,0 +1,98 @@
+//===--- llvm/Support/DataStream.cpp - Lazy streamed data -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DataStreamer, which fetches bytes of Data from
+// a stream source. It provides support for streaming (lazy reading) of
+// bitcode. An example implementation of streaming from a file or stdin
+// is included.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "Data-stream"
+#include "llvm/Support/DataStream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cerrno>
+#include <cstdio>
+#include <string>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+// Interface goals:
+// * StreamableMemoryObject doesn't care about complexities like using
+// threads/async callbacks to actually overlap download+compile
+// * Don't want to duplicate Data in memory
+// * Don't need to know total Data len in advance
+// Non-goals:
+// StreamableMemoryObject already has random access so this interface only does
+// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
+// Data here in addition to MemoryObject). This also means that if we want
+// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
+
+STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
+
+namespace llvm {
+DataStreamer::~DataStreamer() {}
+}
+
+namespace {
+
+// Very simple stream backed by a file. Mostly useful for stdin and debugging;
+// actual file access is probably still best done with mmap.
+class DataFileStreamer : public DataStreamer {
+ int Fd;
+public:
+ DataFileStreamer() : Fd(0) {}
+ virtual ~DataFileStreamer() {
+ close(Fd);
+ }
+ virtual size_t GetBytes(unsigned char *buf, size_t len) LLVM_OVERRIDE {
+ NumStreamFetches++;
+ return read(Fd, buf, len);
+ }
+
+ error_code OpenFile(const std::string &Filename) {
+ if (Filename == "-") {
+ Fd = 0;
+ sys::Program::ChangeStdinToBinary();
+ return error_code::success();
+ }
+
+ int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+ OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
+#endif
+ Fd = ::open(Filename.c_str(), OpenFlags);
+ if (Fd == -1)
+ return error_code(errno, posix_category());
+ return error_code::success();
+ }
+};
+
+}
+
+namespace llvm {
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+ std::string *StrError) {
+ DataFileStreamer *s = new DataFileStreamer();
+ if (error_code e = s->OpenFile(Filename)) {
+ *StrError = std::string("Could not open ") + Filename + ": " +
+ e.message() + "\n";
+ return NULL;
+ }
+ return s;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
new file mode 100644
index 000000000000..d9cb8a9da815
--- /dev/null
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -0,0 +1,134 @@
+//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handy way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo". Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling without assertions, the -debug-* options and all code in
+// DEBUG() statements disappears, so it does not affect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/circular_raw_ostream.h"
+
+using namespace llvm;
+
+// All Debug.h functionality is a no-op in NDEBUG mode.
+#ifndef NDEBUG
+bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option
+
+// -debug - Command line option to enable the DEBUG statements in the passes.
+// This flag may only be enabled in debug builds.
+static cl::opt<bool, true>
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+ cl::location(DebugFlag));
+
+// -debug-buffer-size - Buffer the last N characters of debug output
+//until program termination.
+static cl::opt<unsigned>
+DebugBufferSize("debug-buffer-size",
+ cl::desc("Buffer the last N characters of debug output "
+ "until program termination. "
+ "[default 0 -- immediate print-out]"),
+ cl::Hidden,
+ cl::init(0));
+
+static std::string CurrentDebugType;
+
+namespace {
+
+struct DebugOnlyOpt {
+ void operator=(const std::string &Val) const {
+ DebugFlag |= !Val.empty();
+ CurrentDebugType = Val;
+ }
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
+
+static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+ cl::Hidden, cl::value_desc("debug string"),
+ cl::location(DebugOnlyOptLoc), cl::ValueRequired);
+
+// Signal handlers - dump debug output on termination.
+static void debug_user_sig_handler(void *Cookie) {
+ // This is a bit sneaky. Since this is under #ifndef NDEBUG, we
+ // know that debug mode is enabled and dbgs() really is a
+ // circular_raw_ostream. If NDEBUG is defined, then dbgs() ==
+ // errs() but this will never be invoked.
+ llvm::circular_raw_ostream *dbgout =
+ static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
+ dbgout->flushBufferWithBanner();
+}
+
+// isCurrentDebugType - Return true if the specified string is the debug type
+// specified on the command line, or if none was specified on the command line
+// with the -debug-only=X option.
+//
+bool llvm::isCurrentDebugType(const char *DebugType) {
+ return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+}
+
+/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified. Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void llvm::setCurrentDebugType(const char *Type) {
+ CurrentDebugType = Type;
+}
+
+/// dbgs - Return a circular-buffered debug stream.
+raw_ostream &llvm::dbgs() {
+ // Do one-time initialization in a thread-safe way.
+ static struct dbgstream {
+ circular_raw_ostream strm;
+
+ dbgstream() :
+ strm(errs(), "*** Debug Log Output ***\n",
+ (!EnableDebugBuffering || !DebugFlag) ? 0 : DebugBufferSize) {
+ if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0)
+ // TODO: Add a handler for SIGUSER1-type signals so the user can
+ // force a debug dump.
+ sys::AddSignalHandler(&debug_user_sig_handler, 0);
+ // Otherwise we've already set the debug stream buffer size to
+ // zero, disabling buffering so it will output directly to errs().
+ }
+ } thestrm;
+
+ return thestrm.strm;
+}
+
+#else
+// Avoid "has no symbols" warning.
+namespace llvm {
+ /// dbgs - Return errs().
+ raw_ostream &dbgs() {
+ return errs();
+ }
+}
+
+#endif
+
+/// EnableDebugBuffering - Turn on signal handler installation.
+///
+bool llvm::EnableDebugBuffering = false;
diff --git a/contrib/llvm/lib/Support/DeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 000000000000..9e52874de832
--- /dev/null
+++ b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+ if (FailedTestsCache.count(Changes))
+ return false;
+
+ bool Result = ExecuteOneTest(Changes);
+ if (!Result)
+ FailedTestsCache.insert(Changes);
+
+ return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+ // FIXME: Allow clients to provide heuristics for improved splitting.
+
+ // FIXME: This is really slow.
+ changeset_ty LHS, RHS;
+ unsigned idx = 0, N = S.size() / 2;
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it, ++idx)
+ ((idx < N) ? LHS : RHS).insert(*it);
+ if (!LHS.empty())
+ Res.push_back(LHS);
+ if (!RHS.empty())
+ Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) {
+ // Invariant: union(Res) == Changes
+ UpdatedSearchState(Changes, Sets);
+
+ // If there is nothing left we can remove, we are done.
+ if (Sets.size() <= 1)
+ return Changes;
+
+ // Look for a passing subset.
+ changeset_ty Res;
+ if (Search(Changes, Sets, Res))
+ return Res;
+
+ // Otherwise, partition the sets if possible; if not we are done.
+ changesetlist_ty SplitSets;
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it)
+ Split(*it, SplitSets);
+ if (SplitSets.size() == Sets.size())
+ return Changes;
+
+ return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ changeset_ty &Res) {
+ // FIXME: Parallelize.
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it) {
+ // If the test passes on this subset alone, recurse.
+ if (GetTestResult(*it)) {
+ changesetlist_ty Sets;
+ Split(*it, Sets);
+ Res = Delta(*it, Sets);
+ return true;
+ }
+
+ // Otherwise, if we have more than two sets, see if test passes on the
+ // complement.
+ if (Sets.size() > 2) {
+ // FIXME: This is really slow.
+ changeset_ty Complement;
+ std::set_difference(
+ Changes.begin(), Changes.end(), it->begin(), it->end(),
+ std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+ if (GetTestResult(Complement)) {
+ changesetlist_ty ComplementSets;
+ ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+ ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+ Res = Delta(Complement, ComplementSets);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+ // Check empty set first to quickly find poor test functions.
+ if (GetTestResult(changeset_ty()))
+ return changeset_ty();
+
+ // Otherwise run the real delta algorithm.
+ changesetlist_ty Sets;
+ Split(Changes, Sets);
+
+ return Delta(Changes, Sets);
+}
diff --git a/contrib/llvm/lib/Support/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp
new file mode 100644
index 000000000000..b3244fab7df7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Disassembler.cpp
@@ -0,0 +1,74 @@
+//===- lib/Support/Disassembler.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+#include <iomanip>
+#include <sstream>
+#include <string>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler()
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+ // We have option to enable udis86 library.
+# if USE_UDIS86
+ return true;
+#else
+ return false;
+#endif
+#else
+ return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+ uint64_t pc) {
+ std::stringstream res;
+
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+ && USE_UDIS86
+ unsigned bits;
+# if defined(__i386__)
+ bits = 32;
+# else
+ bits = 64;
+# endif
+
+ ud_t ud_obj;
+
+ ud_init(&ud_obj);
+ ud_set_input_buffer(&ud_obj, start, length);
+ ud_set_mode(&ud_obj, bits);
+ ud_set_pc(&ud_obj, pc);
+ ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+ res << std::setbase(16)
+ << std::setw(bits/4);
+
+ while (ud_disassemble(&ud_obj)) {
+ res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+ }
+#else
+ res << "No disassembler available. See configure help for options.\n";
+#endif
+
+ return res.str();
+}
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
new file mode 100644
index 000000000000..0f91c11ac260
--- /dev/null
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -0,0 +1,724 @@
+//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for generic dwarf information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+/// TagString - Return the string for the specified tag.
+///
+const char *llvm::dwarf::TagString(unsigned Tag) {
+ switch (Tag) {
+ case DW_TAG_array_type: return "DW_TAG_array_type";
+ case DW_TAG_class_type: return "DW_TAG_class_type";
+ case DW_TAG_entry_point: return "DW_TAG_entry_point";
+ case DW_TAG_enumeration_type: return "DW_TAG_enumeration_type";
+ case DW_TAG_formal_parameter: return "DW_TAG_formal_parameter";
+ case DW_TAG_imported_declaration: return "DW_TAG_imported_declaration";
+ case DW_TAG_label: return "DW_TAG_label";
+ case DW_TAG_lexical_block: return "DW_TAG_lexical_block";
+ case DW_TAG_member: return "DW_TAG_member";
+ case DW_TAG_pointer_type: return "DW_TAG_pointer_type";
+ case DW_TAG_reference_type: return "DW_TAG_reference_type";
+ case DW_TAG_compile_unit: return "DW_TAG_compile_unit";
+ case DW_TAG_string_type: return "DW_TAG_string_type";
+ case DW_TAG_structure_type: return "DW_TAG_structure_type";
+ case DW_TAG_subroutine_type: return "DW_TAG_subroutine_type";
+ case DW_TAG_typedef: return "DW_TAG_typedef";
+ case DW_TAG_union_type: return "DW_TAG_union_type";
+ case DW_TAG_unspecified_parameters: return "DW_TAG_unspecified_parameters";
+ case DW_TAG_variant: return "DW_TAG_variant";
+ case DW_TAG_common_block: return "DW_TAG_common_block";
+ case DW_TAG_common_inclusion: return "DW_TAG_common_inclusion";
+ case DW_TAG_inheritance: return "DW_TAG_inheritance";
+ case DW_TAG_inlined_subroutine: return "DW_TAG_inlined_subroutine";
+ case DW_TAG_module: return "DW_TAG_module";
+ case DW_TAG_ptr_to_member_type: return "DW_TAG_ptr_to_member_type";
+ case DW_TAG_set_type: return "DW_TAG_set_type";
+ case DW_TAG_subrange_type: return "DW_TAG_subrange_type";
+ case DW_TAG_with_stmt: return "DW_TAG_with_stmt";
+ case DW_TAG_access_declaration: return "DW_TAG_access_declaration";
+ case DW_TAG_base_type: return "DW_TAG_base_type";
+ case DW_TAG_catch_block: return "DW_TAG_catch_block";
+ case DW_TAG_const_type: return "DW_TAG_const_type";
+ case DW_TAG_constant: return "DW_TAG_constant";
+ case DW_TAG_enumerator: return "DW_TAG_enumerator";
+ case DW_TAG_file_type: return "DW_TAG_file_type";
+ case DW_TAG_friend: return "DW_TAG_friend";
+ case DW_TAG_namelist: return "DW_TAG_namelist";
+ case DW_TAG_namelist_item: return "DW_TAG_namelist_item";
+ case DW_TAG_packed_type: return "DW_TAG_packed_type";
+ case DW_TAG_subprogram: return "DW_TAG_subprogram";
+ case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter";
+ case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter";
+ case DW_TAG_thrown_type: return "DW_TAG_thrown_type";
+ case DW_TAG_try_block: return "DW_TAG_try_block";
+ case DW_TAG_variant_part: return "DW_TAG_variant_part";
+ case DW_TAG_variable: return "DW_TAG_variable";
+ case DW_TAG_volatile_type: return "DW_TAG_volatile_type";
+ case DW_TAG_dwarf_procedure: return "DW_TAG_dwarf_procedure";
+ case DW_TAG_restrict_type: return "DW_TAG_restrict_type";
+ case DW_TAG_interface_type: return "DW_TAG_interface_type";
+ case DW_TAG_namespace: return "DW_TAG_namespace";
+ case DW_TAG_imported_module: return "DW_TAG_imported_module";
+ case DW_TAG_unspecified_type: return "DW_TAG_unspecified_type";
+ case DW_TAG_partial_unit: return "DW_TAG_partial_unit";
+ case DW_TAG_imported_unit: return "DW_TAG_imported_unit";
+ case DW_TAG_condition: return "DW_TAG_condition";
+ case DW_TAG_shared_type: return "DW_TAG_shared_type";
+ case DW_TAG_lo_user: return "DW_TAG_lo_user";
+ case DW_TAG_hi_user: return "DW_TAG_hi_user";
+ case DW_TAG_auto_variable: return "DW_TAG_auto_variable";
+ case DW_TAG_arg_variable: return "DW_TAG_arg_variable";
+ case DW_TAG_rvalue_reference_type: return "DW_TAG_rvalue_reference_type";
+ case DW_TAG_template_alias: return "DW_TAG_template_alias";
+ case DW_TAG_MIPS_loop: return "DW_TAG_MIPS_loop";
+ case DW_TAG_type_unit: return "DW_TAG_type_unit";
+ case DW_TAG_format_label: return "DW_TAG_format_label";
+ case DW_TAG_function_template: return "DW_TAG_function_template";
+ case DW_TAG_class_template: return "DW_TAG_class_template";
+ case DW_TAG_GNU_template_template_param:
+ return "DW_TAG_GNU_template_template_param";
+ case DW_TAG_GNU_template_parameter_pack:
+ return "DW_TAG_GNU_template_parameter_pack";
+ case DW_TAG_GNU_formal_parameter_pack:
+ return "DW_TAG_GNU_formal_parameter_pack";
+ case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property";
+ }
+ return 0;
+}
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *llvm::dwarf::ChildrenString(unsigned Children) {
+ switch (Children) {
+ case DW_CHILDREN_no: return "DW_CHILDREN_no";
+ case DW_CHILDREN_yes: return "DW_CHILDREN_yes";
+ }
+ return 0;
+}
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *llvm::dwarf::AttributeString(unsigned Attribute) {
+ switch (Attribute) {
+ case DW_AT_sibling: return "DW_AT_sibling";
+ case DW_AT_location: return "DW_AT_location";
+ case DW_AT_name: return "DW_AT_name";
+ case DW_AT_ordering: return "DW_AT_ordering";
+ case DW_AT_byte_size: return "DW_AT_byte_size";
+ case DW_AT_bit_offset: return "DW_AT_bit_offset";
+ case DW_AT_bit_size: return "DW_AT_bit_size";
+ case DW_AT_stmt_list: return "DW_AT_stmt_list";
+ case DW_AT_low_pc: return "DW_AT_low_pc";
+ case DW_AT_high_pc: return "DW_AT_high_pc";
+ case DW_AT_language: return "DW_AT_language";
+ case DW_AT_discr: return "DW_AT_discr";
+ case DW_AT_discr_value: return "DW_AT_discr_value";
+ case DW_AT_visibility: return "DW_AT_visibility";
+ case DW_AT_import: return "DW_AT_import";
+ case DW_AT_string_length: return "DW_AT_string_length";
+ case DW_AT_common_reference: return "DW_AT_common_reference";
+ case DW_AT_comp_dir: return "DW_AT_comp_dir";
+ case DW_AT_const_value: return "DW_AT_const_value";
+ case DW_AT_containing_type: return "DW_AT_containing_type";
+ case DW_AT_default_value: return "DW_AT_default_value";
+ case DW_AT_inline: return "DW_AT_inline";
+ case DW_AT_is_optional: return "DW_AT_is_optional";
+ case DW_AT_lower_bound: return "DW_AT_lower_bound";
+ case DW_AT_producer: return "DW_AT_producer";
+ case DW_AT_prototyped: return "DW_AT_prototyped";
+ case DW_AT_return_addr: return "DW_AT_return_addr";
+ case DW_AT_start_scope: return "DW_AT_start_scope";
+ case DW_AT_bit_stride: return "DW_AT_bit_stride";
+ case DW_AT_upper_bound: return "DW_AT_upper_bound";
+ case DW_AT_abstract_origin: return "DW_AT_abstract_origin";
+ case DW_AT_accessibility: return "DW_AT_accessibility";
+ case DW_AT_address_class: return "DW_AT_address_class";
+ case DW_AT_artificial: return "DW_AT_artificial";
+ case DW_AT_base_types: return "DW_AT_base_types";
+ case DW_AT_calling_convention: return "DW_AT_calling_convention";
+ case DW_AT_count: return "DW_AT_count";
+ case DW_AT_data_member_location: return "DW_AT_data_member_location";
+ case DW_AT_decl_column: return "DW_AT_decl_column";
+ case DW_AT_decl_file: return "DW_AT_decl_file";
+ case DW_AT_decl_line: return "DW_AT_decl_line";
+ case DW_AT_declaration: return "DW_AT_declaration";
+ case DW_AT_discr_list: return "DW_AT_discr_list";
+ case DW_AT_encoding: return "DW_AT_encoding";
+ case DW_AT_external: return "DW_AT_external";
+ case DW_AT_frame_base: return "DW_AT_frame_base";
+ case DW_AT_friend: return "DW_AT_friend";
+ case DW_AT_identifier_case: return "DW_AT_identifier_case";
+ case DW_AT_macro_info: return "DW_AT_macro_info";
+ case DW_AT_namelist_item: return "DW_AT_namelist_item";
+ case DW_AT_priority: return "DW_AT_priority";
+ case DW_AT_segment: return "DW_AT_segment";
+ case DW_AT_specification: return "DW_AT_specification";
+ case DW_AT_static_link: return "DW_AT_static_link";
+ case DW_AT_type: return "DW_AT_type";
+ case DW_AT_use_location: return "DW_AT_use_location";
+ case DW_AT_variable_parameter: return "DW_AT_variable_parameter";
+ case DW_AT_virtuality: return "DW_AT_virtuality";
+ case DW_AT_vtable_elem_location: return "DW_AT_vtable_elem_location";
+ case DW_AT_allocated: return "DW_AT_allocated";
+ case DW_AT_associated: return "DW_AT_associated";
+ case DW_AT_data_location: return "DW_AT_data_location";
+ case DW_AT_byte_stride: return "DW_AT_byte_stride";
+ case DW_AT_entry_pc: return "DW_AT_entry_pc";
+ case DW_AT_use_UTF8: return "DW_AT_use_UTF8";
+ case DW_AT_extension: return "DW_AT_extension";
+ case DW_AT_ranges: return "DW_AT_ranges";
+ case DW_AT_trampoline: return "DW_AT_trampoline";
+ case DW_AT_call_column: return "DW_AT_call_column";
+ case DW_AT_call_file: return "DW_AT_call_file";
+ case DW_AT_call_line: return "DW_AT_call_line";
+ case DW_AT_description: return "DW_AT_description";
+ case DW_AT_binary_scale: return "DW_AT_binary_scale";
+ case DW_AT_decimal_scale: return "DW_AT_decimal_scale";
+ case DW_AT_small: return "DW_AT_small";
+ case DW_AT_decimal_sign: return "DW_AT_decimal_sign";
+ case DW_AT_digit_count: return "DW_AT_digit_count";
+ case DW_AT_picture_string: return "DW_AT_picture_string";
+ case DW_AT_mutable: return "DW_AT_mutable";
+ case DW_AT_threads_scaled: return "DW_AT_threads_scaled";
+ case DW_AT_explicit: return "DW_AT_explicit";
+ case DW_AT_object_pointer: return "DW_AT_object_pointer";
+ case DW_AT_endianity: return "DW_AT_endianity";
+ case DW_AT_elemental: return "DW_AT_elemental";
+ case DW_AT_pure: return "DW_AT_pure";
+ case DW_AT_recursive: return "DW_AT_recursive";
+ case DW_AT_signature: return "DW_AT_signature";
+ case DW_AT_main_subprogram: return "DW_AT_main_subprogram";
+ case DW_AT_data_bit_offset: return "DW_AT_data_bit_offset";
+ case DW_AT_const_expr: return "DW_AT_const_expr";
+ case DW_AT_enum_class: return "DW_AT_enum_class";
+ case DW_AT_linkage_name: return "DW_AT_linkage_name";
+ case DW_AT_MIPS_loop_begin: return "DW_AT_MIPS_loop_begin";
+ case DW_AT_MIPS_tail_loop_begin: return "DW_AT_MIPS_tail_loop_begin";
+ case DW_AT_MIPS_epilog_begin: return "DW_AT_MIPS_epilog_begin";
+ case DW_AT_MIPS_loop_unroll_factor: return "DW_AT_MIPS_loop_unroll_factor";
+ case DW_AT_MIPS_software_pipeline_depth:
+ return "DW_AT_MIPS_software_pipeline_depth";
+ case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name";
+ case DW_AT_MIPS_stride: return "DW_AT_MIPS_stride";
+ case DW_AT_MIPS_abstract_name: return "DW_AT_MIPS_abstract_name";
+ case DW_AT_MIPS_clone_origin: return "DW_AT_MIPS_clone_origin";
+ case DW_AT_MIPS_has_inlines: return "DW_AT_MIPS_has_inlines";
+ case DW_AT_MIPS_stride_byte: return "DW_AT_MIPS_stride_byte";
+ case DW_AT_MIPS_stride_elem: return "DW_AT_MIPS_stride_elem";
+ case DW_AT_MIPS_ptr_dopetype: return "DW_AT_MIPS_ptr_dopetype";
+ case DW_AT_MIPS_allocatable_dopetype:
+ return "DW_AT_MIPS_allocatable_dopetype";
+ case DW_AT_MIPS_assumed_shape_dopetype:
+ return "DW_AT_MIPS_assumed_shape_dopetype";
+ case DW_AT_sf_names: return "DW_AT_sf_names";
+ case DW_AT_src_info: return "DW_AT_src_info";
+ case DW_AT_mac_info: return "DW_AT_mac_info";
+ case DW_AT_src_coords: return "DW_AT_src_coords";
+ case DW_AT_body_begin: return "DW_AT_body_begin";
+ case DW_AT_body_end: return "DW_AT_body_end";
+ case DW_AT_GNU_vector: return "DW_AT_GNU_vector";
+ case DW_AT_GNU_template_name: return "DW_AT_GNU_template_name";
+ case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size";
+ case DW_AT_lo_user: return "DW_AT_lo_user";
+ case DW_AT_hi_user: return "DW_AT_hi_user";
+ case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized";
+ case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags";
+ case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa";
+ case DW_AT_APPLE_block: return "DW_AT_APPLE_block";
+ case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers";
+ case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class";
+ case DW_AT_APPLE_omit_frame_ptr: return "DW_AT_APPLE_omit_frame_ptr";
+ case DW_AT_APPLE_property_name: return "DW_AT_APPLE_property_name";
+ case DW_AT_APPLE_property_getter: return "DW_AT_APPLE_property_getter";
+ case DW_AT_APPLE_property_setter: return "DW_AT_APPLE_property_setter";
+ case DW_AT_APPLE_property_attribute: return "DW_AT_APPLE_property_attribute";
+ case DW_AT_APPLE_property: return "DW_AT_APPLE_property";
+ case DW_AT_APPLE_objc_complete_type: return "DW_AT_APPLE_objc_complete_type";
+
+ // DWARF5 Fission Extension Attribute
+ case DW_AT_GNU_dwo_name: return "DW_AT_GNU_dwo_name";
+ case DW_AT_GNU_dwo_id: return "DW_AT_GNU_dwo_id";
+ case DW_AT_GNU_ranges_base: return "DW_AT_GNU_ranges_base";
+ case DW_AT_GNU_addr_base: return "DW_AT_GNU_addr_base";
+ case DW_AT_GNU_pubnames: return "DW_AT_GNU_pubnames";
+ case DW_AT_GNU_pubtypes: return "DW_AT_GNU_pubtypes";
+ }
+ return 0;
+}
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_FORM_addr: return "DW_FORM_addr";
+ case DW_FORM_block2: return "DW_FORM_block2";
+ case DW_FORM_block4: return "DW_FORM_block4";
+ case DW_FORM_data2: return "DW_FORM_data2";
+ case DW_FORM_data4: return "DW_FORM_data4";
+ case DW_FORM_data8: return "DW_FORM_data8";
+ case DW_FORM_string: return "DW_FORM_string";
+ case DW_FORM_block: return "DW_FORM_block";
+ case DW_FORM_block1: return "DW_FORM_block1";
+ case DW_FORM_data1: return "DW_FORM_data1";
+ case DW_FORM_flag: return "DW_FORM_flag";
+ case DW_FORM_sdata: return "DW_FORM_sdata";
+ case DW_FORM_strp: return "DW_FORM_strp";
+ case DW_FORM_udata: return "DW_FORM_udata";
+ case DW_FORM_ref_addr: return "DW_FORM_ref_addr";
+ case DW_FORM_ref1: return "DW_FORM_ref1";
+ case DW_FORM_ref2: return "DW_FORM_ref2";
+ case DW_FORM_ref4: return "DW_FORM_ref4";
+ case DW_FORM_ref8: return "DW_FORM_ref8";
+ case DW_FORM_ref_udata: return "DW_FORM_ref_udata";
+ case DW_FORM_indirect: return "DW_FORM_indirect";
+ case DW_FORM_sec_offset: return "DW_FORM_sec_offset";
+ case DW_FORM_exprloc: return "DW_FORM_exprloc";
+ case DW_FORM_flag_present: return "DW_FORM_flag_present";
+ case DW_FORM_ref_sig8: return "DW_FORM_ref_sig8";
+
+ // DWARF5 Fission Extension Forms
+ case DW_FORM_GNU_addr_index: return "DW_FORM_GNU_addr_index";
+ case DW_FORM_GNU_str_index: return "DW_FORM_GNU_str_index";
+ }
+ return 0;
+}
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_OP_addr: return "DW_OP_addr";
+ case DW_OP_deref: return "DW_OP_deref";
+ case DW_OP_const1u: return "DW_OP_const1u";
+ case DW_OP_const1s: return "DW_OP_const1s";
+ case DW_OP_const2u: return "DW_OP_const2u";
+ case DW_OP_const2s: return "DW_OP_const2s";
+ case DW_OP_const4u: return "DW_OP_const4u";
+ case DW_OP_const4s: return "DW_OP_const4s";
+ case DW_OP_const8u: return "DW_OP_const8u";
+ case DW_OP_const8s: return "DW_OP_const8s";
+ case DW_OP_constu: return "DW_OP_constu";
+ case DW_OP_consts: return "DW_OP_consts";
+ case DW_OP_dup: return "DW_OP_dup";
+ case DW_OP_drop: return "DW_OP_drop";
+ case DW_OP_over: return "DW_OP_over";
+ case DW_OP_pick: return "DW_OP_pick";
+ case DW_OP_swap: return "DW_OP_swap";
+ case DW_OP_rot: return "DW_OP_rot";
+ case DW_OP_xderef: return "DW_OP_xderef";
+ case DW_OP_abs: return "DW_OP_abs";
+ case DW_OP_and: return "DW_OP_and";
+ case DW_OP_div: return "DW_OP_div";
+ case DW_OP_minus: return "DW_OP_minus";
+ case DW_OP_mod: return "DW_OP_mod";
+ case DW_OP_mul: return "DW_OP_mul";
+ case DW_OP_neg: return "DW_OP_neg";
+ case DW_OP_not: return "DW_OP_not";
+ case DW_OP_or: return "DW_OP_or";
+ case DW_OP_plus: return "DW_OP_plus";
+ case DW_OP_plus_uconst: return "DW_OP_plus_uconst";
+ case DW_OP_shl: return "DW_OP_shl";
+ case DW_OP_shr: return "DW_OP_shr";
+ case DW_OP_shra: return "DW_OP_shra";
+ case DW_OP_xor: return "DW_OP_xor";
+ case DW_OP_skip: return "DW_OP_skip";
+ case DW_OP_bra: return "DW_OP_bra";
+ case DW_OP_eq: return "DW_OP_eq";
+ case DW_OP_ge: return "DW_OP_ge";
+ case DW_OP_gt: return "DW_OP_gt";
+ case DW_OP_le: return "DW_OP_le";
+ case DW_OP_lt: return "DW_OP_lt";
+ case DW_OP_ne: return "DW_OP_ne";
+ case DW_OP_lit0: return "DW_OP_lit0";
+ case DW_OP_lit1: return "DW_OP_lit1";
+ case DW_OP_lit2: return "DW_OP_lit2";
+ case DW_OP_lit3: return "DW_OP_lit3";
+ case DW_OP_lit4: return "DW_OP_lit4";
+ case DW_OP_lit5: return "DW_OP_lit5";
+ case DW_OP_lit6: return "DW_OP_lit6";
+ case DW_OP_lit7: return "DW_OP_lit7";
+ case DW_OP_lit8: return "DW_OP_lit8";
+ case DW_OP_lit9: return "DW_OP_lit9";
+ case DW_OP_lit10: return "DW_OP_lit10";
+ case DW_OP_lit11: return "DW_OP_lit11";
+ case DW_OP_lit12: return "DW_OP_lit12";
+ case DW_OP_lit13: return "DW_OP_lit13";
+ case DW_OP_lit14: return "DW_OP_lit14";
+ case DW_OP_lit15: return "DW_OP_lit15";
+ case DW_OP_lit16: return "DW_OP_lit16";
+ case DW_OP_lit17: return "DW_OP_lit17";
+ case DW_OP_lit18: return "DW_OP_lit18";
+ case DW_OP_lit19: return "DW_OP_lit19";
+ case DW_OP_lit20: return "DW_OP_lit20";
+ case DW_OP_lit21: return "DW_OP_lit21";
+ case DW_OP_lit22: return "DW_OP_lit22";
+ case DW_OP_lit23: return "DW_OP_lit23";
+ case DW_OP_lit24: return "DW_OP_lit24";
+ case DW_OP_lit25: return "DW_OP_lit25";
+ case DW_OP_lit26: return "DW_OP_lit26";
+ case DW_OP_lit27: return "DW_OP_lit27";
+ case DW_OP_lit28: return "DW_OP_lit28";
+ case DW_OP_lit29: return "DW_OP_lit29";
+ case DW_OP_lit30: return "DW_OP_lit30";
+ case DW_OP_lit31: return "DW_OP_lit31";
+ case DW_OP_reg0: return "DW_OP_reg0";
+ case DW_OP_reg1: return "DW_OP_reg1";
+ case DW_OP_reg2: return "DW_OP_reg2";
+ case DW_OP_reg3: return "DW_OP_reg3";
+ case DW_OP_reg4: return "DW_OP_reg4";
+ case DW_OP_reg5: return "DW_OP_reg5";
+ case DW_OP_reg6: return "DW_OP_reg6";
+ case DW_OP_reg7: return "DW_OP_reg7";
+ case DW_OP_reg8: return "DW_OP_reg8";
+ case DW_OP_reg9: return "DW_OP_reg9";
+ case DW_OP_reg10: return "DW_OP_reg10";
+ case DW_OP_reg11: return "DW_OP_reg11";
+ case DW_OP_reg12: return "DW_OP_reg12";
+ case DW_OP_reg13: return "DW_OP_reg13";
+ case DW_OP_reg14: return "DW_OP_reg14";
+ case DW_OP_reg15: return "DW_OP_reg15";
+ case DW_OP_reg16: return "DW_OP_reg16";
+ case DW_OP_reg17: return "DW_OP_reg17";
+ case DW_OP_reg18: return "DW_OP_reg18";
+ case DW_OP_reg19: return "DW_OP_reg19";
+ case DW_OP_reg20: return "DW_OP_reg20";
+ case DW_OP_reg21: return "DW_OP_reg21";
+ case DW_OP_reg22: return "DW_OP_reg22";
+ case DW_OP_reg23: return "DW_OP_reg23";
+ case DW_OP_reg24: return "DW_OP_reg24";
+ case DW_OP_reg25: return "DW_OP_reg25";
+ case DW_OP_reg26: return "DW_OP_reg26";
+ case DW_OP_reg27: return "DW_OP_reg27";
+ case DW_OP_reg28: return "DW_OP_reg28";
+ case DW_OP_reg29: return "DW_OP_reg29";
+ case DW_OP_reg30: return "DW_OP_reg30";
+ case DW_OP_reg31: return "DW_OP_reg31";
+ case DW_OP_breg0: return "DW_OP_breg0";
+ case DW_OP_breg1: return "DW_OP_breg1";
+ case DW_OP_breg2: return "DW_OP_breg2";
+ case DW_OP_breg3: return "DW_OP_breg3";
+ case DW_OP_breg4: return "DW_OP_breg4";
+ case DW_OP_breg5: return "DW_OP_breg5";
+ case DW_OP_breg6: return "DW_OP_breg6";
+ case DW_OP_breg7: return "DW_OP_breg7";
+ case DW_OP_breg8: return "DW_OP_breg8";
+ case DW_OP_breg9: return "DW_OP_breg9";
+ case DW_OP_breg10: return "DW_OP_breg10";
+ case DW_OP_breg11: return "DW_OP_breg11";
+ case DW_OP_breg12: return "DW_OP_breg12";
+ case DW_OP_breg13: return "DW_OP_breg13";
+ case DW_OP_breg14: return "DW_OP_breg14";
+ case DW_OP_breg15: return "DW_OP_breg15";
+ case DW_OP_breg16: return "DW_OP_breg16";
+ case DW_OP_breg17: return "DW_OP_breg17";
+ case DW_OP_breg18: return "DW_OP_breg18";
+ case DW_OP_breg19: return "DW_OP_breg19";
+ case DW_OP_breg20: return "DW_OP_breg20";
+ case DW_OP_breg21: return "DW_OP_breg21";
+ case DW_OP_breg22: return "DW_OP_breg22";
+ case DW_OP_breg23: return "DW_OP_breg23";
+ case DW_OP_breg24: return "DW_OP_breg24";
+ case DW_OP_breg25: return "DW_OP_breg25";
+ case DW_OP_breg26: return "DW_OP_breg26";
+ case DW_OP_breg27: return "DW_OP_breg27";
+ case DW_OP_breg28: return "DW_OP_breg28";
+ case DW_OP_breg29: return "DW_OP_breg29";
+ case DW_OP_breg30: return "DW_OP_breg30";
+ case DW_OP_breg31: return "DW_OP_breg31";
+ case DW_OP_regx: return "DW_OP_regx";
+ case DW_OP_fbreg: return "DW_OP_fbreg";
+ case DW_OP_bregx: return "DW_OP_bregx";
+ case DW_OP_piece: return "DW_OP_piece";
+ case DW_OP_deref_size: return "DW_OP_deref_size";
+ case DW_OP_xderef_size: return "DW_OP_xderef_size";
+ case DW_OP_nop: return "DW_OP_nop";
+ case DW_OP_push_object_address: return "DW_OP_push_object_address";
+ case DW_OP_call2: return "DW_OP_call2";
+ case DW_OP_call4: return "DW_OP_call4";
+ case DW_OP_call_ref: return "DW_OP_call_ref";
+ case DW_OP_form_tls_address: return "DW_OP_form_tls_address";
+ case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa";
+ case DW_OP_bit_piece: return "DW_OP_bit_piece";
+ case DW_OP_implicit_value: return "DW_OP_implicit_value";
+ case DW_OP_stack_value: return "DW_OP_stack_value";
+ case DW_OP_lo_user: return "DW_OP_lo_user";
+ case DW_OP_hi_user: return "DW_OP_hi_user";
+
+ // DWARF5 Fission Proposal Op Extensions
+ case DW_OP_GNU_addr_index: return "DW_OP_GNU_addr_index";
+ case DW_OP_GNU_const_index: return "DW_OP_GNU_const_index";
+ }
+ return 0;
+}
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_ATE_address: return "DW_ATE_address";
+ case DW_ATE_boolean: return "DW_ATE_boolean";
+ case DW_ATE_complex_float: return "DW_ATE_complex_float";
+ case DW_ATE_float: return "DW_ATE_float";
+ case DW_ATE_signed: return "DW_ATE_signed";
+ case DW_ATE_signed_char: return "DW_ATE_signed_char";
+ case DW_ATE_unsigned: return "DW_ATE_unsigned";
+ case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char";
+ case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float";
+ case DW_ATE_UTF: return "DW_ATE_UTF";
+ case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal";
+ case DW_ATE_numeric_string: return "DW_ATE_numeric_string";
+ case DW_ATE_edited: return "DW_ATE_edited";
+ case DW_ATE_signed_fixed: return "DW_ATE_signed_fixed";
+ case DW_ATE_unsigned_fixed: return "DW_ATE_unsigned_fixed";
+ case DW_ATE_decimal_float: return "DW_ATE_decimal_float";
+ case DW_ATE_lo_user: return "DW_ATE_lo_user";
+ case DW_ATE_hi_user: return "DW_ATE_hi_user";
+ }
+ return 0;
+}
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
+ switch (Sign) {
+ case DW_DS_unsigned: return "DW_DS_unsigned";
+ case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch";
+ case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch";
+ case DW_DS_leading_separate: return "DW_DS_leading_separate";
+ case DW_DS_trailing_separate: return "DW_DS_trailing_separate";
+ }
+ return 0;
+}
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *llvm::dwarf::EndianityString(unsigned Endian) {
+ switch (Endian) {
+ case DW_END_default: return "DW_END_default";
+ case DW_END_big: return "DW_END_big";
+ case DW_END_little: return "DW_END_little";
+ case DW_END_lo_user: return "DW_END_lo_user";
+ case DW_END_hi_user: return "DW_END_hi_user";
+ }
+ return 0;
+}
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *llvm::dwarf::AccessibilityString(unsigned Access) {
+ switch (Access) {
+ // Accessibility codes
+ case DW_ACCESS_public: return "DW_ACCESS_public";
+ case DW_ACCESS_protected: return "DW_ACCESS_protected";
+ case DW_ACCESS_private: return "DW_ACCESS_private";
+ }
+ return 0;
+}
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
+ switch (Visibility) {
+ case DW_VIS_local: return "DW_VIS_local";
+ case DW_VIS_exported: return "DW_VIS_exported";
+ case DW_VIS_qualified: return "DW_VIS_qualified";
+ }
+ return 0;
+}
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
+ switch (Virtuality) {
+ case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none";
+ case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual";
+ case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual";
+ }
+ return 0;
+}
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *llvm::dwarf::LanguageString(unsigned Language) {
+ switch (Language) {
+ case DW_LANG_C89: return "DW_LANG_C89";
+ case DW_LANG_C: return "DW_LANG_C";
+ case DW_LANG_Ada83: return "DW_LANG_Ada83";
+ case DW_LANG_C_plus_plus: return "DW_LANG_C_plus_plus";
+ case DW_LANG_Cobol74: return "DW_LANG_Cobol74";
+ case DW_LANG_Cobol85: return "DW_LANG_Cobol85";
+ case DW_LANG_Fortran77: return "DW_LANG_Fortran77";
+ case DW_LANG_Fortran90: return "DW_LANG_Fortran90";
+ case DW_LANG_Pascal83: return "DW_LANG_Pascal83";
+ case DW_LANG_Modula2: return "DW_LANG_Modula2";
+ case DW_LANG_Java: return "DW_LANG_Java";
+ case DW_LANG_C99: return "DW_LANG_C99";
+ case DW_LANG_Ada95: return "DW_LANG_Ada95";
+ case DW_LANG_Fortran95: return "DW_LANG_Fortran95";
+ case DW_LANG_PLI: return "DW_LANG_PLI";
+ case DW_LANG_ObjC: return "DW_LANG_ObjC";
+ case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus";
+ case DW_LANG_UPC: return "DW_LANG_UPC";
+ case DW_LANG_D: return "DW_LANG_D";
+ case DW_LANG_lo_user: return "DW_LANG_lo_user";
+ case DW_LANG_hi_user: return "DW_LANG_hi_user";
+ }
+ return 0;
+}
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *llvm::dwarf::CaseString(unsigned Case) {
+ switch (Case) {
+ case DW_ID_case_sensitive: return "DW_ID_case_sensitive";
+ case DW_ID_up_case: return "DW_ID_up_case";
+ case DW_ID_down_case: return "DW_ID_down_case";
+ case DW_ID_case_insensitive: return "DW_ID_case_insensitive";
+ }
+ return 0;
+}
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *llvm::dwarf::ConventionString(unsigned Convention) {
+ switch (Convention) {
+ case DW_CC_normal: return "DW_CC_normal";
+ case DW_CC_program: return "DW_CC_program";
+ case DW_CC_nocall: return "DW_CC_nocall";
+ case DW_CC_lo_user: return "DW_CC_lo_user";
+ case DW_CC_hi_user: return "DW_CC_hi_user";
+ }
+ return 0;
+}
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *llvm::dwarf::InlineCodeString(unsigned Code) {
+ switch (Code) {
+ case DW_INL_not_inlined: return "DW_INL_not_inlined";
+ case DW_INL_inlined: return "DW_INL_inlined";
+ case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined";
+ case DW_INL_declared_inlined: return "DW_INL_declared_inlined";
+ }
+ return 0;
+}
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
+ switch (Order) {
+ case DW_ORD_row_major: return "DW_ORD_row_major";
+ case DW_ORD_col_major: return "DW_ORD_col_major";
+ }
+ return 0;
+}
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
+ switch (Discriminant) {
+ case DW_DSC_label: return "DW_DSC_label";
+ case DW_DSC_range: return "DW_DSC_range";
+ }
+ return 0;
+}
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *llvm::dwarf::LNStandardString(unsigned Standard) {
+ switch (Standard) {
+ case DW_LNS_copy: return "DW_LNS_copy";
+ case DW_LNS_advance_pc: return "DW_LNS_advance_pc";
+ case DW_LNS_advance_line: return "DW_LNS_advance_line";
+ case DW_LNS_set_file: return "DW_LNS_set_file";
+ case DW_LNS_set_column: return "DW_LNS_set_column";
+ case DW_LNS_negate_stmt: return "DW_LNS_negate_stmt";
+ case DW_LNS_set_basic_block: return "DW_LNS_set_basic_block";
+ case DW_LNS_const_add_pc: return "DW_LNS_const_add_pc";
+ case DW_LNS_fixed_advance_pc: return "DW_LNS_fixed_advance_pc";
+ case DW_LNS_set_prologue_end: return "DW_LNS_set_prologue_end";
+ case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin";
+ case DW_LNS_set_isa: return "DW_LNS_set_isa";
+ }
+ return 0;
+}
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
+ switch (Encoding) {
+ // Line Number Extended Opcode Encodings
+ case DW_LNE_end_sequence: return "DW_LNE_end_sequence";
+ case DW_LNE_set_address: return "DW_LNE_set_address";
+ case DW_LNE_define_file: return "DW_LNE_define_file";
+ case DW_LNE_set_discriminator: return "DW_LNE_set_discriminator";
+ case DW_LNE_lo_user: return "DW_LNE_lo_user";
+ case DW_LNE_hi_user: return "DW_LNE_hi_user";
+ }
+ return 0;
+}
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
+ switch (Encoding) {
+ // Macinfo Type Encodings
+ case DW_MACINFO_define: return "DW_MACINFO_define";
+ case DW_MACINFO_undef: return "DW_MACINFO_undef";
+ case DW_MACINFO_start_file: return "DW_MACINFO_start_file";
+ case DW_MACINFO_end_file: return "DW_MACINFO_end_file";
+ case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext";
+ }
+ return 0;
+}
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_CFA_nop: return "DW_CFA_nop";
+ case DW_CFA_advance_loc: return "DW_CFA_advance_loc";
+ case DW_CFA_offset: return "DW_CFA_offset";
+ case DW_CFA_restore: return "DW_CFA_restore";
+ case DW_CFA_set_loc: return "DW_CFA_set_loc";
+ case DW_CFA_advance_loc1: return "DW_CFA_advance_loc1";
+ case DW_CFA_advance_loc2: return "DW_CFA_advance_loc2";
+ case DW_CFA_advance_loc4: return "DW_CFA_advance_loc4";
+ case DW_CFA_offset_extended: return "DW_CFA_offset_extended";
+ case DW_CFA_restore_extended: return "DW_CFA_restore_extended";
+ case DW_CFA_undefined: return "DW_CFA_undefined";
+ case DW_CFA_same_value: return "DW_CFA_same_value";
+ case DW_CFA_register: return "DW_CFA_register";
+ case DW_CFA_remember_state: return "DW_CFA_remember_state";
+ case DW_CFA_restore_state: return "DW_CFA_restore_state";
+ case DW_CFA_def_cfa: return "DW_CFA_def_cfa";
+ case DW_CFA_def_cfa_register: return "DW_CFA_def_cfa_register";
+ case DW_CFA_def_cfa_offset: return "DW_CFA_def_cfa_offset";
+ case DW_CFA_def_cfa_expression: return "DW_CFA_def_cfa_expression";
+ case DW_CFA_expression: return "DW_CFA_expression";
+ case DW_CFA_offset_extended_sf: return "DW_CFA_offset_extended_sf";
+ case DW_CFA_def_cfa_sf: return "DW_CFA_def_cfa_sf";
+ case DW_CFA_def_cfa_offset_sf: return "DW_CFA_def_cfa_offset_sf";
+ case DW_CFA_val_offset: return "DW_CFA_val_offset";
+ case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf";
+ case DW_CFA_val_expression: return "DW_CFA_val_expression";
+ case DW_CFA_MIPS_advance_loc8: return "DW_CFA_MIPS_advance_loc8";
+ case DW_CFA_GNU_window_save: return "DW_CFA_GNU_window_save";
+ case DW_CFA_GNU_args_size: return "DW_CFA_GNU_args_size";
+ case DW_CFA_lo_user: return "DW_CFA_lo_user";
+ case DW_CFA_hi_user: return "DW_CFA_hi_user";
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp
new file mode 100644
index 000000000000..f14cb45d9dc0
--- /dev/null
+++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp
@@ -0,0 +1,189 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system DynamicLibrary concept.
+//
+// FIXME: This file leaks ExplicitSymbols and OpenedHandles!
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+#include <cstdio>
+#include <cstring>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+static llvm::StringMap<void *> *ExplicitSymbols = 0;
+
+namespace {
+
+struct ExplicitSymbolsDeleter {
+ ~ExplicitSymbolsDeleter() {
+ delete ExplicitSymbols;
+ }
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
+
+
+static llvm::sys::SmartMutex<true>& getMutex() {
+ static llvm::sys::SmartMutex<true> HandlesMutex;
+ return HandlesMutex;
+}
+
+void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName,
+ void *symbolValue) {
+ SmartScopedLock<true> lock(getMutex());
+ if (ExplicitSymbols == 0)
+ ExplicitSymbols = new StringMap<void*>();
+ (*ExplicitSymbols)[symbolName] = symbolValue;
+}
+
+char llvm::sys::DynamicLibrary::Invalid = 0;
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/DynamicLibrary.inc"
+
+#else
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+static DenseSet<void *> *OpenedHandles = 0;
+
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ SmartScopedLock<true> lock(getMutex());
+
+ void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL);
+ if (handle == 0) {
+ if (errMsg) *errMsg = dlerror();
+ return DynamicLibrary();
+ }
+
+#ifdef __CYGWIN__
+ // Cygwin searches symbols only in the main
+ // with the handle of dlopen(NULL, RTLD_GLOBAL).
+ if (filename == NULL)
+ handle = RTLD_DEFAULT;
+#endif
+
+ if (OpenedHandles == 0)
+ OpenedHandles = new DenseSet<void *>();
+
+ // If we've already loaded this library, dlclose() the handle in order to
+ // keep the internal refcount at +1.
+ if (!OpenedHandles->insert(handle).second)
+ dlclose(handle);
+
+ return DynamicLibrary(handle);
+}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ if (!isValid())
+ return NULL;
+ return dlsym(Data, symbolName);
+}
+
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ if (errMsg) *errMsg = "dlopen() not supported on this platform";
+ return DynamicLibrary();
+}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ return NULL;
+}
+
+#endif
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
+ SmartScopedLock<true> Lock(getMutex());
+
+ // First check symbols added via AddSymbol().
+ if (ExplicitSymbols) {
+ StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
+
+ if (i != ExplicitSymbols->end())
+ return i->second;
+ }
+
+#if HAVE_DLFCN_H
+ // Now search the libraries.
+ if (OpenedHandles) {
+ for (DenseSet<void *>::iterator I = OpenedHandles->begin(),
+ E = OpenedHandles->end(); I != E; ++I) {
+ //lt_ptr ptr = lt_dlsym(*I, symbolName);
+ void *ptr = dlsym(*I, symbolName);
+ if (ptr) {
+ return ptr;
+ }
+ }
+ }
+#endif
+
+ if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
+ return Result;
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__) and !defined(__ANDROID__)
+ {
+ EXPLICIT_SYMBOL(stderr);
+ EXPLICIT_SYMBOL(stdout);
+ EXPLICIT_SYMBOL(stdin);
+ }
+#else
+ // For everything else, we want to check to make sure the symbol isn't defined
+ // as a macro before using EXPLICIT_SYMBOL.
+ {
+#ifndef stdin
+ EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+ EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+ EXPLICIT_SYMBOL(stderr);
+#endif
+ }
+#endif
+#undef EXPLICIT_SYMBOL
+
+ return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/contrib/llvm/lib/Support/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp
new file mode 100644
index 000000000000..730220f47d92
--- /dev/null
+++ b/contrib/llvm/lib/Support/Errno.cpp
@@ -0,0 +1,79 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/raw_ostream.h"
+
+#if HAVE_STRING_H
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+std::string StrError() {
+ return StrError(errno);
+}
+#endif // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+ const int MaxErrStrLen = 2000;
+ char buffer[MaxErrStrLen];
+ buffer[0] = '\0';
+ std::string str;
+#ifdef HAVE_STRERROR_R
+ // strerror_r is thread-safe.
+ if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+ // glibc defines its own incompatible version of strerror_r
+ // which may not use the buffer supplied.
+ str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+ strerror_r(errnum,buffer,MaxErrStrLen-1);
+ str = buffer;
+# endif
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
+ if (errnum) {
+ strerror_s(buffer, MaxErrStrLen - 1, errnum);
+ str = buffer;
+ }
+#elif defined(HAVE_STRERROR)
+ // Copy the thread un-safe result of strerror into
+ // the buffer as fast as possible to minimize impact
+ // of collision of strerror in multiple threads.
+ if (errnum)
+ str = strerror(errnum);
+#else
+ // Strange that this system doesn't even have strerror
+ // but, oh well, just use a generic message
+ raw_string_ostream stream(str);
+ stream << "Error #" << errnum;
+ stream.flush();
+#endif
+ return str;
+}
+
+} // namespace sys
+} // namespace llvm
+
+#endif // HAVE_STRING_H
diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp
new file mode 100644
index 000000000000..f4b591e777eb
--- /dev/null
+++ b/contrib/llvm/lib/Support/ErrorHandling.cpp
@@ -0,0 +1,99 @@
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate fatal error conditions. Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
+using namespace llvm;
+
+static fatal_error_handler_t ErrorHandler = 0;
+static void *ErrorHandlerUserData = 0;
+
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+ void *user_data) {
+ assert(!llvm_is_multithreaded() &&
+ "Cannot register error handlers after starting multithreaded mode!\n");
+ assert(!ErrorHandler && "Error handler already registered!\n");
+ ErrorHandler = handler;
+ ErrorHandlerUserData = user_data;
+}
+
+void llvm::remove_fatal_error_handler() {
+ ErrorHandler = 0;
+}
+
+void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
+}
+
+void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
+}
+
+void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
+}
+
+void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
+ if (ErrorHandler) {
+ ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag);
+ } else {
+ // Blast the result out to stderr. We don't try hard to make sure this
+ // succeeds (e.g. handling EINTR) and we can't use errs() here because
+ // raw ostreams can call report_fatal_error.
+ SmallVector<char, 64> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "LLVM ERROR: " << Reason << "\n";
+ StringRef MessageStr = OS.str();
+ ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+ (void)written; // If something went wrong, we deliberately just give up.
+ }
+
+ // If we reached here, we are failing ungracefully. Run the interrupt handlers
+ // to make sure any special cleanups get done, in particular that we remove
+ // files registered with RemoveFileOnSignal.
+ sys::RunInterruptHandlers();
+
+ exit(1);
+}
+
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+ unsigned line) {
+ // This code intentionally doesn't call the ErrorHandler callback, because
+ // llvm_unreachable is intended to be used to indicate "impossible"
+ // situations, and not legitimate runtime errors.
+ if (msg)
+ dbgs() << msg << "\n";
+ dbgs() << "UNREACHABLE executed";
+ if (file)
+ dbgs() << " at " << file << ":" << line;
+ dbgs() << "!\n";
+ abort();
+}
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
new file mode 100644
index 000000000000..1ee69b60234f
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -0,0 +1,119 @@
+//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility for creating a in-memory buffer that will be written to a file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+using llvm::sys::fs::mapped_file_region;
+
+namespace llvm {
+FileOutputBuffer::FileOutputBuffer(mapped_file_region * R,
+ StringRef Path, StringRef TmpPath)
+ : Region(R)
+ , FinalPath(Path)
+ , TempPath(TmpPath) {
+}
+
+FileOutputBuffer::~FileOutputBuffer() {
+ bool Existed;
+ sys::fs::remove(Twine(TempPath), Existed);
+}
+
+error_code FileOutputBuffer::create(StringRef FilePath,
+ size_t Size,
+ OwningPtr<FileOutputBuffer> &Result,
+ unsigned Flags) {
+ // If file already exists, it must be a regular file (to be mappable).
+ sys::fs::file_status Stat;
+ error_code EC = sys::fs::status(FilePath, Stat);
+ switch (Stat.type()) {
+ case sys::fs::file_type::file_not_found:
+ // If file does not exist, we'll create one.
+ break;
+ case sys::fs::file_type::regular_file: {
+ // If file is not currently writable, error out.
+ // FIXME: There is no sys::fs:: api for checking this.
+ // FIXME: In posix, you use the access() call to check this.
+ }
+ break;
+ default:
+ if (EC)
+ return EC;
+ else
+ return make_error_code(errc::operation_not_permitted);
+ }
+
+ // Delete target file.
+ bool Existed;
+ EC = sys::fs::remove(FilePath, Existed);
+ if (EC)
+ return EC;
+
+ // Create new file in same directory but with random name.
+ SmallString<128> TempFilePath;
+ int FD;
+ EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",
+ FD, TempFilePath, false, 0644);
+ if (EC)
+ return EC;
+
+ OwningPtr<mapped_file_region> MappedFile(new mapped_file_region(
+ FD, true, mapped_file_region::readwrite, Size, 0, EC));
+ if (EC)
+ return EC;
+
+ // If requested, make the output file executable.
+ if ( Flags & F_executable ) {
+ sys::fs::file_status Stat2;
+ EC = sys::fs::status(Twine(TempFilePath), Stat2);
+ if (EC)
+ return EC;
+
+ sys::fs::perms new_perms = Stat2.permissions();
+ if ( new_perms & sys::fs::owner_read )
+ new_perms |= sys::fs::owner_exe;
+ if ( new_perms & sys::fs::group_read )
+ new_perms |= sys::fs::group_exe;
+ if ( new_perms & sys::fs::others_read )
+ new_perms |= sys::fs::others_exe;
+ new_perms |= sys::fs::add_perms;
+ EC = sys::fs::permissions(Twine(TempFilePath), new_perms);
+ if (EC)
+ return EC;
+ }
+
+ Result.reset(new FileOutputBuffer(MappedFile.get(), FilePath, TempFilePath));
+ if (Result)
+ MappedFile.take();
+
+ return error_code::success();
+}
+
+error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
+ // Unmap buffer, letting OS flush dirty pages to file on disk.
+ Region.reset(0);
+
+ // If requested, resize file as part of commit.
+ if ( NewSmallerSize != -1 ) {
+ error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+ if (EC)
+ return EC;
+ }
+
+ // Rename file to final name.
+ return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+}
+} // namespace
diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp
new file mode 100644
index 000000000000..4d7b2391f01e
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileUtilities.cpp
@@ -0,0 +1,280 @@
+//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+static bool isSignedChar(char C) {
+ return (C == '+' || C == '-');
+}
+
+static bool isExponentChar(char C) {
+ switch (C) {
+ case 'D': // Strange exponential notation.
+ case 'd': // Strange exponential notation.
+ case 'e':
+ case 'E': return true;
+ default: return false;
+ }
+}
+
+static bool isNumberChar(char C) {
+ switch (C) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '.': return true;
+ default: return isSignedChar(C) || isExponentChar(C);
+ }
+}
+
+static const char *BackupNumber(const char *Pos, const char *FirstChar) {
+ // If we didn't stop in the middle of a number, don't backup.
+ if (!isNumberChar(*Pos)) return Pos;
+
+ // Otherwise, return to the start of the number.
+ bool HasPeriod = false;
+ while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+ // Backup over at most one period.
+ if (Pos[-1] == '.') {
+ if (HasPeriod)
+ break;
+ HasPeriod = true;
+ }
+
+ --Pos;
+ if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
+ break;
+ }
+ return Pos;
+}
+
+/// EndOfNumber - Return the first character that is not part of the specified
+/// number. This assumes that the buffer is null terminated, so it won't fall
+/// off the end.
+static const char *EndOfNumber(const char *Pos) {
+ while (isNumberChar(*Pos))
+ ++Pos;
+ return Pos;
+}
+
+/// CompareNumbers - compare two numbers, returning true if they are different.
+static bool CompareNumbers(const char *&F1P, const char *&F2P,
+ const char *F1End, const char *F2End,
+ double AbsTolerance, double RelTolerance,
+ std::string *ErrorMsg) {
+ const char *F1NumEnd, *F2NumEnd;
+ double V1 = 0.0, V2 = 0.0;
+
+ // If one of the positions is at a space and the other isn't, chomp up 'til
+ // the end of the space.
+ while (isspace(static_cast<unsigned char>(*F1P)) && F1P != F1End)
+ ++F1P;
+ while (isspace(static_cast<unsigned char>(*F2P)) && F2P != F2End)
+ ++F2P;
+
+ // If we stop on numbers, compare their difference.
+ if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
+ // The diff failed.
+ F1NumEnd = F1P;
+ F2NumEnd = F2P;
+ } else {
+ // Note that some ugliness is built into this to permit support for numbers
+ // that use "D" or "d" as their exponential marker, e.g. "1.234D45". This
+ // occurs in 200.sixtrack in spec2k.
+ V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
+ V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
+
+ if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
+
+ V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
+ F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
+ }
+
+ if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
+
+ V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
+ F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
+ }
+ }
+
+ if (F1NumEnd == F1P || F2NumEnd == F2P) {
+ if (ErrorMsg) {
+ *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
+ *ErrorMsg += F1P[0];
+ *ErrorMsg += "' and '";
+ *ErrorMsg += F2P[0];
+ *ErrorMsg += "'";
+ }
+ return true;
+ }
+
+ // Check to see if these are inside the absolute tolerance
+ if (AbsTolerance < std::abs(V1-V2)) {
+ // Nope, check the relative tolerance...
+ double Diff;
+ if (V2)
+ Diff = std::abs(V1/V2 - 1.0);
+ else if (V1)
+ Diff = std::abs(V2/V1 - 1.0);
+ else
+ Diff = 0; // Both zero.
+ if (Diff > RelTolerance) {
+ if (ErrorMsg) {
+ raw_string_ostream(*ErrorMsg)
+ << "Compared: " << V1 << " and " << V2 << '\n'
+ << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
+ << "Out of tolerance: rel/abs: " << RelTolerance << '/'
+ << AbsTolerance;
+ }
+ return true;
+ }
+ }
+
+ // Otherwise, advance our read pointers to the end of the numbers.
+ F1P = F1NumEnd; F2P = F2NumEnd;
+ return false;
+}
+
+/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
+/// files match, 1 if they are different, and 2 if there is a file error. This
+/// function differs from DiffFiles in that you can specify an absolete and
+/// relative FP error that is allowed to exist. If you specify a string to fill
+/// in for the error option, it will set the string to an error message if an
+/// error occurs, allowing the caller to distinguish between a failed diff and a
+/// file system error.
+///
+int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+ const sys::PathWithStatus &FileB,
+ double AbsTol, double RelTol,
+ std::string *Error) {
+ const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error);
+ if (!FileAStat)
+ return 2;
+ const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error);
+ if (!FileBStat)
+ return 2;
+
+ // Check for zero length files because some systems croak when you try to
+ // mmap an empty file.
+ size_t A_size = FileAStat->getSize();
+ size_t B_size = FileBStat->getSize();
+
+ // If they are both zero sized then they're the same
+ if (A_size == 0 && B_size == 0)
+ return 0;
+
+ // If only one of them is zero sized then they can't be the same
+ if ((A_size == 0 || B_size == 0)) {
+ if (Error)
+ *Error = "Files differ: one is zero-sized, the other isn't";
+ return 1;
+ }
+
+ // Now its safe to mmap the files into memory because both files
+ // have a non-zero size.
+ OwningPtr<MemoryBuffer> F1;
+ if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+ if (Error)
+ *Error = ec.message();
+ return 2;
+ }
+ OwningPtr<MemoryBuffer> F2;
+ if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+ if (Error)
+ *Error = ec.message();
+ return 2;
+ }
+
+ // Okay, now that we opened the files, scan them for the first difference.
+ const char *File1Start = F1->getBufferStart();
+ const char *File2Start = F2->getBufferStart();
+ const char *File1End = F1->getBufferEnd();
+ const char *File2End = F2->getBufferEnd();
+ const char *F1P = File1Start;
+ const char *F2P = File2Start;
+
+ // Are the buffers identical? Common case: Handle this efficiently.
+ if (A_size == B_size &&
+ std::memcmp(File1Start, File2Start, A_size) == 0)
+ return 0;
+
+ // Otherwise, we are done a tolerances are set.
+ if (AbsTol == 0 && RelTol == 0) {
+ if (Error)
+ *Error = "Files differ without tolerance allowance";
+ return 1; // Files different!
+ }
+
+ bool CompareFailed = false;
+ while (1) {
+ // Scan for the end of file or next difference.
+ while (F1P < File1End && F2P < File2End && *F1P == *F2P)
+ ++F1P, ++F2P;
+
+ if (F1P >= File1End || F2P >= File2End) break;
+
+ // Okay, we must have found a difference. Backup to the start of the
+ // current number each stream is at so that we can compare from the
+ // beginning.
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
+ CompareFailed = true;
+ break;
+ }
+ }
+
+ // Okay, we reached the end of file. If both files are at the end, we
+ // succeeded.
+ bool F1AtEnd = F1P >= File1End;
+ bool F2AtEnd = F2P >= File2End;
+ if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
+ // Else, we might have run off the end due to a number: backup and retry.
+ if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
+ if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
+ CompareFailed = true;
+
+ // If we found the end, we succeeded.
+ if (F1P < File1End || F2P < File2End)
+ CompareFailed = true;
+ }
+
+ return CompareFailed;
+}
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
new file mode 100644
index 000000000000..36e33b5aafa3
--- /dev/null
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -0,0 +1,424 @@
+//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a hash set that can be used to remove duplication of
+// nodes in a graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+ return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+ if (Size != RHS.Size) return false;
+ return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+/// Used to compare the "ordering" of two nodes as defined by the
+/// profiled bits and their ordering defined by memcmp().
+bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
+ if (Size != RHS.Size)
+ return Size < RHS.Size;
+ return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeID Implementation
+
+/// Add* - Add various data types to Bit data.
+///
+void FoldingSetNodeID::AddPointer(const void *Ptr) {
+ // Note: this adds pointers to the hash using sizes and endianness that
+ // depend on the host. It doesn't matter however, because hashing on
+ // pointer values in inherently unstable. Nothing should depend on the
+ // ordering of nodes in the folding set.
+ Bits.append(reinterpret_cast<unsigned *>(&Ptr),
+ reinterpret_cast<unsigned *>(&Ptr+1));
+}
+void FoldingSetNodeID::AddInteger(signed I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(unsigned I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(long I) {
+ AddInteger((unsigned long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long I) {
+ if (sizeof(long) == sizeof(int))
+ AddInteger(unsigned(I));
+ else if (sizeof(long) == sizeof(long long)) {
+ AddInteger((unsigned long long)I);
+ } else {
+ llvm_unreachable("unexpected sizeof(long)");
+ }
+}
+void FoldingSetNodeID::AddInteger(long long I) {
+ AddInteger((unsigned long long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long long I) {
+ AddInteger(unsigned(I));
+ if ((uint64_t)(unsigned)I != I)
+ Bits.push_back(unsigned(I >> 32));
+}
+
+void FoldingSetNodeID::AddString(StringRef String) {
+ unsigned Size = String.size();
+ Bits.push_back(Size);
+ if (!Size) return;
+
+ unsigned Units = Size / 4;
+ unsigned Pos = 0;
+ const unsigned *Base = (const unsigned*) String.data();
+
+ // If the string is aligned do a bulk transfer.
+ if (!((intptr_t)Base & 3)) {
+ Bits.append(Base, Base + Units);
+ Pos = (Units + 1) * 4;
+ } else {
+ // Otherwise do it the hard way.
+ // To be compatible with above bulk transfer, we need to take endianness
+ // into account.
+ if (sys::isBigEndianHost()) {
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+ ((unsigned char)String[Pos - 3] << 16) |
+ ((unsigned char)String[Pos - 2] << 8) |
+ (unsigned char)String[Pos - 1];
+ Bits.push_back(V);
+ }
+ } else {
+ assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+ ((unsigned char)String[Pos - 2] << 16) |
+ ((unsigned char)String[Pos - 3] << 8) |
+ (unsigned char)String[Pos - 4];
+ Bits.push_back(V);
+ }
+ }
+ }
+
+ // With the leftover bits.
+ unsigned V = 0;
+ // Pos will have overshot size by 4 - #bytes left over.
+ // No need to take endianness into account here - this is always executed.
+ switch (Pos - Size) {
+ case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
+ case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+ case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
+ default: return; // Nothing left.
+ }
+
+ Bits.push_back(V);
+}
+
+// AddNodeID - Adds the Bit data of another ID to *this.
+void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
+ Bits.append(ID.Bits.begin(), ID.Bits.end());
+}
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
+/// lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeID::ComputeHash() const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
+ return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
+}
+
+/// Used to compare the "ordering" of two nodes as defined by the
+/// profiled bits and their ordering defined by memcmp().
+bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
+ return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
+}
+
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+ unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+ std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+ return FoldingSetNodeIDRef(New, Bits.size());
+}
+
+//===----------------------------------------------------------------------===//
+/// Helper functions for FoldingSetImpl.
+
+/// GetNextPtr - In order to save space, each bucket is a
+/// singly-linked-list. In order to make deletion more efficient, we make
+/// the list circular, so we can delete a node without computing its hash.
+/// The problem with this is that the start of the hash buckets are not
+/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
+/// use GetBucketPtr when this happens.
+static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+ // The low bit is set if this is the pointer back to the bucket.
+ if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
+ return 0;
+
+ return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+}
+
+
+/// testing.
+static void **GetBucketPtr(void *NextInBucketPtr) {
+ intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
+ assert((Ptr & 1) && "Not a bucket pointer");
+ return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
+}
+
+/// GetBucketFor - Hash the specified node ID and return the hash bucket for
+/// the specified ID.
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
+ // NumBuckets is always a power of 2.
+ unsigned BucketNum = Hash & (NumBuckets-1);
+ return Buckets + BucketNum;
+}
+
+/// AllocateBuckets - Allocated initialized bucket memory.
+static void **AllocateBuckets(unsigned NumBuckets) {
+ void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+ return Buckets;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetImpl Implementation
+
+FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+ assert(5 < Log2InitSize && Log2InitSize < 32 &&
+ "Initial hash table size out of range");
+ NumBuckets = 1 << Log2InitSize;
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
+}
+FoldingSetImpl::~FoldingSetImpl() {
+ free(Buckets);
+}
+void FoldingSetImpl::clear() {
+ // Set all but the last bucket to null pointers.
+ memset(Buckets, 0, NumBuckets*sizeof(void*));
+
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+
+ // Reset the node count to zero.
+ NumNodes = 0;
+}
+
+/// GrowHashTable - Double the size of the hash table and rehash everything.
+///
+void FoldingSetImpl::GrowHashTable() {
+ void **OldBuckets = Buckets;
+ unsigned OldNumBuckets = NumBuckets;
+ NumBuckets <<= 1;
+
+ // Clear out new buckets.
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
+
+ // Walk the old buckets, rehashing nodes into their new place.
+ FoldingSetNodeID TempID;
+ for (unsigned i = 0; i != OldNumBuckets; ++i) {
+ void *Probe = OldBuckets[i];
+ if (!Probe) continue;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ // Figure out the next link, remove NodeInBucket from the old link.
+ Probe = NodeInBucket->getNextInBucket();
+ NodeInBucket->SetNextInBucket(0);
+
+ // Insert the node into the new bucket, after recomputing the hash.
+ InsertNode(NodeInBucket,
+ GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+ Buckets, NumBuckets));
+ TempID.clear();
+ }
+ }
+
+ free(OldBuckets);
+}
+
+/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
+/// return it. If not, return the insertion token that will make insertion
+/// faster.
+FoldingSetImpl::Node
+*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+ unsigned IDHash = ID.ComputeHash();
+ void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
+ void *Probe = *Bucket;
+
+ InsertPos = 0;
+
+ FoldingSetNodeID TempID;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
+ return NodeInBucket;
+ TempID.clear();
+
+ Probe = NodeInBucket->getNextInBucket();
+ }
+
+ // Didn't find the node, return null with the bucket as the InsertPos.
+ InsertPos = Bucket;
+ return 0;
+}
+
+/// InsertNode - Insert the specified node into the folding set, knowing that it
+/// is not already in the map. InsertPos must be obtained from
+/// FindNodeOrInsertPos.
+void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+ assert(N->getNextInBucket() == 0);
+ // Do we need to grow the hashtable?
+ if (NumNodes+1 > NumBuckets*2) {
+ GrowHashTable();
+ FoldingSetNodeID TempID;
+ InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
+ }
+
+ ++NumNodes;
+
+ /// The insert position is actually a bucket pointer.
+ void **Bucket = static_cast<void**>(InsertPos);
+
+ void *Next = *Bucket;
+
+ // If this is the first insertion into this bucket, its next pointer will be
+ // null. Pretend as if it pointed to itself, setting the low bit to indicate
+ // that it is a pointer to the bucket.
+ if (Next == 0)
+ Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
+
+ // Set the node's next pointer, and make the bucket point to the node.
+ N->SetNextInBucket(Next);
+ *Bucket = N;
+}
+
+/// RemoveNode - Remove a node from the folding set, returning true if one was
+/// removed or false if the node was not in the folding set.
+bool FoldingSetImpl::RemoveNode(Node *N) {
+ // Because each bucket is a circular list, we don't need to compute N's hash
+ // to remove it.
+ void *Ptr = N->getNextInBucket();
+ if (Ptr == 0) return false; // Not in folding set.
+
+ --NumNodes;
+ N->SetNextInBucket(0);
+
+ // Remember what N originally pointed to, either a bucket or another node.
+ void *NodeNextPtr = Ptr;
+
+ // Chase around the list until we find the node (or bucket) which points to N.
+ while (true) {
+ if (Node *NodeInBucket = GetNextPtr(Ptr)) {
+ // Advance pointer.
+ Ptr = NodeInBucket->getNextInBucket();
+
+ // We found a node that points to N, change it to point to N's next node,
+ // removing N from the list.
+ if (Ptr == N) {
+ NodeInBucket->SetNextInBucket(NodeNextPtr);
+ return true;
+ }
+ } else {
+ void **Bucket = GetBucketPtr(Ptr);
+ Ptr = *Bucket;
+
+ // If we found that the bucket points to N, update the bucket to point to
+ // whatever is next.
+ if (Ptr == N) {
+ *Bucket = NodeNextPtr;
+ return true;
+ }
+ }
+ }
+}
+
+/// GetOrInsertNode - If there is an existing simple Node exactly
+/// equal to the specified node, return it. Otherwise, insert 'N' and it
+/// instead.
+FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+ FoldingSetNodeID ID;
+ GetNodeProfile(N, ID);
+ void *IP;
+ if (Node *E = FindNodeOrInsertPos(ID, IP))
+ return E;
+ InsertNode(N, IP);
+ return N;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetIteratorImpl Implementation
+
+FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
+ // Skip to the first non-null non-self-cycle bucket.
+ while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+ ++Bucket;
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+}
+
+void FoldingSetIteratorImpl::advance() {
+ // If there is another link within this bucket, go to it.
+ void *Probe = NodePtr->getNextInBucket();
+
+ if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
+ NodePtr = NextNodeInBucket;
+ else {
+ // Otherwise, this is the last link in this bucket.
+ void **Bucket = GetBucketPtr(Probe);
+
+ // Skip to the next non-null non-self-cycle bucket.
+ do {
+ ++Bucket;
+ } while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetBucketIteratorImpl Implementation
+
+FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
+ Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+}
diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp
new file mode 100644
index 000000000000..231ae48759e2
--- /dev/null
+++ b/contrib/llvm/lib/Support/FormattedStream.cpp
@@ -0,0 +1,101 @@
+//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of formatted_raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+/// CountColumns - Examine the given char sequence and figure out which
+/// column we end up in after output.
+///
+static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) {
+ // Keep track of the current column by scanning the string for
+ // special characters
+
+ for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {
+ ++Column;
+ if (*Ptr == '\n' || *Ptr == '\r')
+ Column = 0;
+ else if (*Ptr == '\t')
+ // Assumes tab stop = 8 characters.
+ Column += (8 - (Column & 0x7)) & 0x7;
+ }
+
+ return Column;
+}
+
+/// ComputeColumn - Examine the current output and figure out which
+/// column we end up in after output.
+void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
+ // If our previous scan pointer is inside the buffer, assume we already
+ // scanned those bytes. This depends on raw_ostream to not change our buffer
+ // in unexpected ways.
+ if (Ptr <= Scanned && Scanned <= Ptr + Size) {
+ // Scan all characters added since our last scan to determine the new
+ // column.
+ ColumnScanned = CountColumns(ColumnScanned, Scanned,
+ Size - (Scanned - Ptr));
+ } else
+ ColumnScanned = CountColumns(ColumnScanned, Ptr, Size);
+
+ // Update the scanning pointer.
+ Scanned = Ptr + Size;
+}
+
+/// PadToColumn - Align the output to some column number.
+///
+/// \param NewCol - The column to move to.
+///
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
+ // Figure out what's in the buffer and add it to the column count.
+ ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
+
+ // Output spaces until we reach the desired column.
+ indent(std::max(int(NewCol - ColumnScanned), 1));
+ return *this;
+}
+
+void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+ // Figure out what's in the buffer and add it to the column count.
+ ComputeColumn(Ptr, Size);
+
+ // Write the data to the underlying stream (which is unbuffered, so
+ // the data will be immediately written out).
+ TheStream->write(Ptr, Size);
+
+ // Reset the scanning pointer.
+ Scanned = 0;
+}
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output. Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &llvm::fouts() {
+ static formatted_raw_ostream S(outs());
+ return S;
+}
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error. Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &llvm::ferrs() {
+ static formatted_raw_ostream S(errs());
+ return S;
+}
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// the debug stream. Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &llvm::fdbgs() {
+ static formatted_raw_ostream S(dbgs());
+ return S;
+}
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
new file mode 100644
index 000000000000..bff182f30e35
--- /dev/null
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -0,0 +1,209 @@
+//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements misc. GraphWriter support routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+using namespace llvm;
+
+static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
+ cl::desc("Execute graph viewer in the background. Creates tmp file litter."));
+
+std::string llvm::DOT::EscapeString(const std::string &Label) {
+ std::string Str(Label);
+ for (unsigned i = 0; i != Str.length(); ++i)
+ switch (Str[i]) {
+ case '\n':
+ Str.insert(Str.begin()+i, '\\'); // Escape character...
+ ++i;
+ Str[i] = 'n';
+ break;
+ case '\t':
+ Str.insert(Str.begin()+i, ' '); // Convert to two spaces
+ ++i;
+ Str[i] = ' ';
+ break;
+ case '\\':
+ if (i+1 != Str.length())
+ switch (Str[i+1]) {
+ case 'l': continue; // don't disturb \l
+ case '|': case '{': case '}':
+ Str.erase(Str.begin()+i); continue;
+ default: break;
+ }
+ case '{': case '}':
+ case '<': case '>':
+ case '|': case '"':
+ Str.insert(Str.begin()+i, '\\'); // Escape character...
+ ++i; // don't infinite loop
+ break;
+ }
+ return Str;
+}
+
+/// \brief Get a color string for this node number. Simply round-robin selects
+/// from a reasonable number of colors.
+StringRef llvm::DOT::getColorString(unsigned ColorNumber) {
+ static const int NumColors = 20;
+ static const char* Colors[NumColors] = {
+ "aaaaaa", "aa0000", "00aa00", "aa5500", "0055ff", "aa00aa", "00aaaa",
+ "555555", "ff5555", "55ff55", "ffff55", "5555ff", "ff55ff", "55ffff",
+ "ffaaaa", "aaffaa", "ffffaa", "aaaaff", "ffaaff", "aaffff"};
+ return Colors[ColorNumber % NumColors];
+}
+
+// Execute the graph viewer. Return true if successful.
+static bool LLVM_ATTRIBUTE_UNUSED
+ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
+ const sys::Path &Filename, bool wait, std::string &ErrMsg) {
+ if (wait) {
+ if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return false;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+ }
+ else {
+ sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+ errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+ }
+ return true;
+}
+
+void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
+ GraphProgram::Name program) {
+ wait &= !ViewBackground;
+ std::string ErrMsg;
+#if HAVE_GRAPHVIZ
+ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
+
+ std::vector<const char*> args;
+ args.push_back(Graphviz.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+ errs() << "Running 'Graphviz' program... ";
+ if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
+ return;
+
+#elif HAVE_XDOT_PY
+ std::vector<const char*> args;
+ args.push_back(LLVM_PATH_XDOT_PY);
+ args.push_back(Filename.c_str());
+
+ switch (program) {
+ case GraphProgram::DOT: args.push_back("-f"); args.push_back("dot"); break;
+ case GraphProgram::FDP: args.push_back("-f"); args.push_back("fdp"); break;
+ case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+ case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+ case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+ }
+
+ args.push_back(0);
+
+ errs() << "Running 'xdot.py' program... ";
+ if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg))
+ return;
+
+#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
+ HAVE_TWOPI || HAVE_CIRCO))
+ sys::Path PSFilename = Filename;
+ PSFilename.appendSuffix("ps");
+
+ sys::Path prog;
+
+ // Set default grapher
+#if HAVE_CIRCO
+ prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+#if HAVE_TWOPI
+ prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if HAVE_NEATO
+ prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if HAVE_FDP
+ prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if HAVE_DOT
+ prog = sys::Path(LLVM_PATH_DOT);
+#endif
+
+ // Find which program the user wants
+#if HAVE_DOT
+ if (program == GraphProgram::DOT)
+ prog = sys::Path(LLVM_PATH_DOT);
+#endif
+#if (HAVE_FDP)
+ if (program == GraphProgram::FDP)
+ prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if (HAVE_NEATO)
+ if (program == GraphProgram::NEATO)
+ prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if (HAVE_TWOPI)
+ if (program == GraphProgram::TWOPI)
+ prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if (HAVE_CIRCO)
+ if (program == GraphProgram::CIRCO)
+ prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+
+ std::vector<const char*> args;
+ args.push_back(prog.c_str());
+ args.push_back("-Tps");
+ args.push_back("-Nfontname=Courier");
+ args.push_back("-Gsize=7.5,10");
+ args.push_back(Filename.c_str());
+ args.push_back("-o");
+ args.push_back(PSFilename.c_str());
+ args.push_back(0);
+
+ errs() << "Running '" << prog.str() << "' program... ";
+
+ if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
+ return;
+
+ sys::Path gv(LLVM_PATH_GV);
+ args.clear();
+ args.push_back(gv.c_str());
+ args.push_back(PSFilename.c_str());
+ args.push_back("--spartan");
+ args.push_back(0);
+
+ ErrMsg.clear();
+ if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
+ return;
+
+#elif HAVE_DOTTY
+ sys::Path dotty(LLVM_PATH_DOTTY);
+
+ std::vector<const char*> args;
+ args.push_back(dotty.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
+ wait = false;
+#endif
+ errs() << "Running 'dotty' program... ";
+ if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
+ return;
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Hashing.cpp b/contrib/llvm/lib/Support/Hashing.cpp
new file mode 100644
index 000000000000..c69efb7c3cc9
--- /dev/null
+++ b/contrib/llvm/lib/Support/Hashing.cpp
@@ -0,0 +1,29 @@
+//===-------------- lib/Support/Hashing.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides implementation bits for the LLVM common hashing
+// infrastructure. Documentation and most of the other information is in the
+// header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Hashing.h"
+
+using namespace llvm;
+
+// Provide a definition and static initializer for the fixed seed. This
+// initializer should always be zero to ensure its value can never appear to be
+// non-zero, even during dynamic initialization.
+size_t llvm::hashing::detail::fixed_seed_override = 0;
+
+// Implement the function for forced setting of the fixed seed.
+// FIXME: Use atomic operations here so that there is no data race.
+void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
+ hashing::detail::fixed_seed_override = fixed_value;
+}
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
new file mode 100644
index 000000000000..73d98d148746
--- /dev/null
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -0,0 +1,619 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Host.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string.h>
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Host.inc"
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//
+// Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+ #else
+ return true;
+ #endif
+#else
+ return true;
+#endif
+}
+
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ int rEAX, rEDX;
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
+ unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+ int rEAX = 0; // Ensures we return false
+#endif
+ return (rEAX & 6) == 6;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+std::string sys::getHostCPUName() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectX86FamilyModel(EAX, Family, Model);
+
+ bool HasSSE3 = (ECX & 0x1);
+ // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+ // indicates that the AVX registers will be saved and restored on context
+ // switch, then we have full AVX support.
+ const unsigned AVXBits = (1 << 27) | (1 << 28);
+ bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
+ GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ switch (Model) {
+ case 0: // Intel486 DX processors
+ case 1: // Intel486 DX processors
+ case 2: // Intel486 SX processors
+ case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+ // IntelDX2 processors
+ case 4: // Intel486 SL processor
+ case 5: // IntelSX2 processors
+ case 7: // Write-Back Enhanced IntelDX2 processors
+ case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+ default: return "i486";
+ }
+ case 5:
+ switch (Model) {
+ case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
+ // Pentium processors (60, 66)
+ case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
+ // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+ // 150, 166, 200)
+ case 3: // Pentium OverDrive processors for Intel486 processor-based
+ // systems
+ return "pentium";
+
+ case 4: // Pentium OverDrive processor with MMX technology for Pentium
+ // processor (75, 90, 100, 120, 133), Pentium processor with
+ // MMX technology (166, 200)
+ return "pentium-mmx";
+
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: // Pentium Pro processor
+ return "pentiumpro";
+
+ case 3: // Intel Pentium II OverDrive processor, Pentium II processor,
+ // model 03
+ case 5: // Pentium II processor, model 05, Pentium II Xeon processor,
+ // model 05, and Intel Celeron processor, model 05
+ case 6: // Celeron processor, model 06
+ return "pentium2";
+
+ case 7: // Pentium III processor, model 07, and Pentium III Xeon
+ // processor, model 07
+ case 8: // Pentium III processor, model 08, Pentium III Xeon processor,
+ // model 08, and Celeron processor, model 08
+ case 10: // Pentium III Xeon processor, model 0Ah
+ case 11: // Pentium III processor, model 0Bh
+ return "pentium3";
+
+ case 9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+ case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+ // 0Dh. All processors are manufactured using the 90 nm process.
+ return "pentium-m";
+
+ case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+ // 0Eh. All processors are manufactured using the 65 nm process.
+ return "yonah";
+
+ case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+ // mobile processor, Intel Core 2 Extreme processor, Intel
+ // Pentium Dual-Core processor, Intel Xeon processor, model
+ // 0Fh. All processors are manufactured using the 65 nm process.
+ case 22: // Intel Celeron processor model 16h. All processors are
+ // manufactured using the 65 nm process
+ return "core2";
+
+ case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+ // Integrated Processor with Intel QuickAssist Technology
+ return "i686"; // FIXME: ???
+
+ case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+ // 17h. All processors are manufactured using the 45 nm process.
+ //
+ // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ return "penryn";
+
+ case 26: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 45 nm process.
+ case 29: // Intel Xeon processor MP. All processors are manufactured using
+ // the 45 nm process.
+ case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
+ // As found in a Summer 2010 model iMac.
+ case 37: // Intel Core i7, laptop version.
+ case 44: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 32 nm process.
+ case 46: // Nehalem EX
+ case 47: // Westmere EX
+ return "corei7";
+
+ // SandyBridge:
+ case 42: // Intel Core i7 processor. All processors are manufactured
+ // using the 32 nm process.
+ case 45:
+ // Not all Sandy Bridge processors support AVX (such as the Pentium
+ // versions instead of the i7 versions).
+ return HasAVX ? "corei7-avx" : "corei7";
+
+ // Ivy Bridge:
+ case 58:
+ // Not all Ivy Bridge processors support AVX (such as the Pentium
+ // versions instead of the i7 versions).
+ return HasAVX ? "core-avx-i" : "corei7";
+
+ case 28: // Most 45 nm Intel Atom processors
+ case 38: // 45 nm Atom Lincroft
+ case 39: // 32 nm Atom Medfield
+ case 53: // 32 nm Atom Midview
+ case 54: // 32 nm Atom Midview
+ return "atom";
+
+ default: return (Em64T) ? "x86-64" : "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
+ // model 00h and manufactured using the 0.18 micron process.
+ case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+ // processor MP, and Intel Celeron processor. All processors are
+ // model 01h and manufactured using the 0.18 micron process.
+ case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+ // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+ // processor, and Mobile Intel Celeron processor. All processors
+ // are model 02h and manufactured using the 0.13 micron process.
+ return (Em64T) ? "x86-64" : "pentium4";
+
+ case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+ // processor. All processors are model 03h and manufactured using
+ // the 90 nm process.
+ case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+ // Pentium D processor, Intel Xeon processor, Intel Xeon
+ // processor MP, Intel Celeron D processor. All processors are
+ // model 04h and manufactured using the 90 nm process.
+ case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
+ // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+ // MP, Intel Celeron D processor. All processors are model 06h
+ // and manufactured using the 65 nm process.
+ return (Em64T) ? "nocona" : "prescott";
+
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ case 10: return "geode";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ if (HasSSE3)
+ return "k8-sse3";
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ case 16:
+ return "amdfam10";
+ case 20:
+ return "btver1";
+ case 21:
+ if (Model <= 15)
+ return "bdver1";
+ else if (Model <= 31)
+ return "bdver2";
+ default:
+ return "generic";
+ }
+ }
+ return "generic";
+}
+#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
+std::string sys::getHostCPUName() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
+std::string sys::getHostCPUName() {
+ // Access to the Processor Version Register (PVR) on PowerPC is privileged,
+ // and so we must use an operating-system interface to determine the current
+ // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
+ const char *generic = "generic";
+
+ // Note: We cannot mmap /proc/cpuinfo here and then process the resulting
+ // memory buffer because the 'file' has 0 size (it can be read from only
+ // as a stream).
+
+ std::string Err;
+ DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
+ if (!DS) {
+ DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n");
+ return generic;
+ }
+
+ // The cpu line is second (after the 'processor: 0' line), so if this
+ // buffer is too small then something has changed (or is wrong).
+ char buffer[1024];
+ size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer));
+ delete DS;
+
+ const char *CPUInfoStart = buffer;
+ const char *CPUInfoEnd = buffer + CPUInfoSize;
+
+ const char *CIP = CPUInfoStart;
+
+ const char *CPUStart = 0;
+ size_t CPULen = 0;
+
+ // We need to find the first line which starts with cpu, spaces, and a colon.
+ // After the colon, there may be some additional spaces and then the cpu type.
+ while (CIP < CPUInfoEnd && CPUStart == 0) {
+ if (CIP < CPUInfoEnd && *CIP == '\n')
+ ++CIP;
+
+ if (CIP < CPUInfoEnd && *CIP == 'c') {
+ ++CIP;
+ if (CIP < CPUInfoEnd && *CIP == 'p') {
+ ++CIP;
+ if (CIP < CPUInfoEnd && *CIP == 'u') {
+ ++CIP;
+ while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
+ ++CIP;
+
+ if (CIP < CPUInfoEnd && *CIP == ':') {
+ ++CIP;
+ while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
+ ++CIP;
+
+ if (CIP < CPUInfoEnd) {
+ CPUStart = CIP;
+ while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
+ *CIP != ',' && *CIP != '\n'))
+ ++CIP;
+ CPULen = CIP - CPUStart;
+ }
+ }
+ }
+ }
+ }
+
+ if (CPUStart == 0)
+ while (CIP < CPUInfoEnd && *CIP != '\n')
+ ++CIP;
+ }
+
+ if (CPUStart == 0)
+ return generic;
+
+ return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
+ .Case("604e", "604e")
+ .Case("604", "604")
+ .Case("7400", "7400")
+ .Case("7410", "7400")
+ .Case("7447", "7400")
+ .Case("7455", "7450")
+ .Case("G4", "g4")
+ .Case("POWER4", "970")
+ .Case("PPC970FX", "970")
+ .Case("PPC970MP", "970")
+ .Case("G5", "g5")
+ .Case("POWER5", "g5")
+ .Case("A2", "a2")
+ .Case("POWER6", "pwr6")
+ .Case("POWER7", "pwr7")
+ .Default(generic);
+}
+#elif defined(__linux__) && defined(__arm__)
+std::string sys::getHostCPUName() {
+ // The cpuid register on arm is not accessible from user space. On Linux,
+ // it is exposed through the /proc/cpuinfo file.
+ // Note: We cannot mmap /proc/cpuinfo here and then process the resulting
+ // memory buffer because the 'file' has 0 size (it can be read from only
+ // as a stream).
+
+ std::string Err;
+ DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
+ if (!DS) {
+ DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n");
+ return "generic";
+ }
+
+ // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line
+ // in all cases.
+ char buffer[1024];
+ size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer));
+ delete DS;
+
+ StringRef Str(buffer, CPUInfoSize);
+
+ SmallVector<StringRef, 32> Lines;
+ Str.split(Lines, "\n");
+
+ // Look for the CPU implementer line.
+ StringRef Implementer;
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU implementer"))
+ Implementer = Lines[I].substr(15).ltrim("\t :");
+
+ if (Implementer == "0x41") // ARM Ltd.
+ // Look for the CPU part line.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU part"))
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+ .Case("0x926", "arm926ej-s")
+ .Case("0xb02", "mpcore")
+ .Case("0xb36", "arm1136j-s")
+ .Case("0xb56", "arm1156t2-s")
+ .Case("0xb76", "arm1176jz-s")
+ .Case("0xc08", "cortex-a8")
+ .Case("0xc09", "cortex-a9")
+ .Case("0xc0f", "cortex-a15")
+ .Case("0xc20", "cortex-m0")
+ .Case("0xc23", "cortex-m3")
+ .Case("0xc24", "cortex-m4")
+ .Default("generic");
+
+ return "generic";
+}
+#else
+std::string sys::getHostCPUName() {
+ return "generic";
+}
+#endif
+
+#if defined(__linux__) && defined(__arm__)
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+ std::string Err;
+ DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
+ if (!DS) {
+ DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n");
+ return false;
+ }
+
+ // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
+ // in all cases.
+ char buffer[1024];
+ size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer));
+ delete DS;
+
+ StringRef Str(buffer, CPUInfoSize);
+
+ SmallVector<StringRef, 32> Lines;
+ Str.split(Lines, "\n");
+
+ // Look for the CPU implementer line.
+ StringRef Implementer;
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU implementer"))
+ Implementer = Lines[I].substr(15).ltrim("\t :");
+
+ if (Implementer == "0x41") { // ARM Ltd.
+ SmallVector<StringRef, 32> CPUFeatures;
+
+ // Look for the CPU features.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("Features")) {
+ Lines[I].split(CPUFeatures, " ");
+ break;
+ }
+
+ for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
+ StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
+ .Case("half", "fp16")
+ .Case("neon", "neon")
+ .Case("vfpv3", "vfp3")
+ .Case("vfpv3d16", "d16")
+ .Case("vfpv4", "vfp4")
+ .Case("idiva", "hwdiv-arm")
+ .Case("idivt", "hwdiv")
+ .Default("");
+
+ if (LLVMFeatureStr != "")
+ Features.GetOrCreateValue(LLVMFeatureStr).setValue(true);
+ }
+
+ return true;
+ }
+
+ return false;
+}
+#else
+bool sys::getHostCPUFeatures(StringMap<bool> &Features){
+ return false;
+}
+#endif
+
+std::string sys::getProcessTriple() {
+ Triple PT(LLVM_HOSTTRIPLE);
+
+ if (sizeof(void *) == 8 && PT.isArch32Bit())
+ PT = PT.get64BitArchVariant();
+ if (sizeof(void *) == 4 && PT.isArch64Bit())
+ PT = PT.get32BitArchVariant();
+
+ return PT.str();
+}
diff --git a/contrib/llvm/lib/Support/IncludeFile.cpp b/contrib/llvm/lib/Support/IncludeFile.cpp
new file mode 100644
index 000000000000..e67acb3d1d67
--- /dev/null
+++ b/contrib/llvm/lib/Support/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/Support/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/Support/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
new file mode 100644
index 000000000000..11344956e4c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+ assert(NumClasses == 0 && "grow() called after compress().");
+ EC.reserve(N);
+ while (EC.size() < N)
+ EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+ assert(NumClasses == 0 && "join() called after compress().");
+ unsigned eca = EC[a];
+ unsigned ecb = EC[b];
+ // Update pointers while searching for the leaders, compressing the paths
+ // incrementally. The larger leader will eventually be updated, joining the
+ // classes.
+ while (eca != ecb)
+ if (eca < ecb)
+ EC[b] = eca, b = ecb, ecb = EC[b];
+ else
+ EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+ assert(NumClasses == 0 && "findLeader() called after compress().");
+ while (a != EC[a])
+ a = EC[a];
+ return a;
+}
+
+void IntEqClasses::compress() {
+ if (NumClasses)
+ return;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+ if (!NumClasses)
+ return;
+ SmallVector<unsigned, 8> Leader;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ if (EC[i] < Leader.size())
+ EC[i] = Leader[EC[i]];
+ else
+ Leader.push_back(EC[i] = i);
+ NumClasses = 0;
+}
diff --git a/contrib/llvm/lib/Support/IntervalMap.cpp b/contrib/llvm/lib/Support/IntervalMap.cpp
new file mode 100644
index 000000000000..4dfcc404ca42
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+ assert(!path.empty() && "Can't replace missing root");
+ path.front() = Entry(Root, Size, Offsets.first);
+ path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go left.
+ unsigned l = Level - 1;
+ while (l && path[l].offset == 0)
+ --l;
+
+ // We can't go left.
+ if (path[l].offset == 0)
+ return NodeRef();
+
+ // NR is the subtree containing our left sibling.
+ NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+ // Keep right all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(NR.size() - 1);
+ return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go left.
+ unsigned l = 0;
+ if (valid()) {
+ l = Level - 1;
+ while (path[l].offset == 0) {
+ assert(l != 0 && "Cannot move beyond begin()");
+ --l;
+ }
+ } else if (height() < Level)
+ // end() may have created a height=0 path.
+ path.resize(Level + 1, Entry(0, 0, 0));
+
+ // NR is the subtree containing our left sibling.
+ --path[l].offset;
+ NodeRef NR = subtree(l);
+
+ // Get the rightmost node in the subtree.
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, NR.size() - 1);
+ NR = NR.subtree(NR.size() - 1);
+ }
+ path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // We can't go right.
+ if (atLastEntry(l))
+ return NodeRef();
+
+ // NR is the subtree containing our right sibling.
+ NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+ // Keep left all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(0);
+ return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // NR is the subtree containing our right sibling. If we hit end(), we have
+ // offset(0) == node(0).size().
+ if (++path[l].offset == path[l].size)
+ return;
+ NodeRef NR = subtree(l);
+
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, 0);
+ NR = NR.subtree(0);
+ }
+ path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+ const unsigned *CurSize, unsigned NewSize[],
+ unsigned Position, bool Grow) {
+ assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+ assert(Position <= Elements && "Invalid position");
+ if (!Nodes)
+ return IdxPair();
+
+ // Trivial algorithm: left-leaning even distribution.
+ const unsigned PerNode = (Elements + Grow) / Nodes;
+ const unsigned Extra = (Elements + Grow) % Nodes;
+ IdxPair PosPair = IdxPair(Nodes, 0);
+ unsigned Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ Sum += NewSize[n] = PerNode + (n < Extra);
+ if (PosPair.first == Nodes && Sum > Position)
+ PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+ }
+ assert(Sum == Elements + Grow && "Bad distribution sum");
+
+ // Subtract the Grow element that was added.
+ if (Grow) {
+ assert(PosPair.first < Nodes && "Bad algebra");
+ assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+ --NewSize[PosPair.first];
+ }
+
+#ifndef NDEBUG
+ Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ assert(NewSize[n] <= Capacity && "Overallocated node");
+ Sum += NewSize[n];
+ }
+ assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+ return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp b/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp
new file mode 100644
index 000000000000..a8b45593ae70
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp
@@ -0,0 +1,14 @@
+//== IntrusiveRefCntPtr.cpp - Smart Refcounting Pointer ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+
+using namespace llvm;
+
+void RefCountedBaseVPTR::anchor() { }
diff --git a/contrib/llvm/lib/Support/IsInf.cpp b/contrib/llvm/lib/Support/IsInf.cpp
new file mode 100644
index 000000000000..d6da0c99e8d8
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsInf.cpp
@@ -0,0 +1,49 @@
+//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isinf()
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISINF_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISINF_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISINF_IN_CMATH
+# include <cmath>
+using std::isinf;
+#elif HAVE_FINITE_IN_IEEEFP_H
+// A handy workaround I found at http://www.unixguide.net/sun/faq ...
+// apparently this has been a problem with Solaris for years.
+# include <ieeefp.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isinf(X) (!_finite(X))
+#elif defined(_AIX) && defined(__GNUC__)
+// GCC's fixincludes seems to be removing the isinf() declaration from the
+// system header /usr/include/math.h
+# include <math.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(__hpux)
+// HP-UX is "special"
+#include <math.h>
+static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
+#else
+# error "Don't know how to get isinf()"
+#endif
+
+namespace llvm {
+
+int IsInf(float f) { return isinf(f); }
+int IsInf(double d) { return isinf(d); }
+
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/IsNAN.cpp b/contrib/llvm/lib/Support/IsNAN.cpp
new file mode 100644
index 000000000000..bdfdfbf3155d
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsNAN.cpp
@@ -0,0 +1,33 @@
+//===-- IsNAN.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isnan().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISNAN_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISNAN_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISNAN_IN_CMATH
+# include <cmath>
+using std::isnan;
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isnan _isnan
+#else
+# error "Don't know how to get isnan()"
+#endif
+
+namespace llvm {
+ int IsNAN(float f) { return isnan(f); }
+ int IsNAN(double d) { return isnan(d); }
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp
new file mode 100644
index 000000000000..17b9b6c47d60
--- /dev/null
+++ b/contrib/llvm/lib/Support/Locale.cpp
@@ -0,0 +1,10 @@
+#include "llvm/Support/Locale.h"
+#include "llvm/Config/config.h"
+
+#ifdef __APPLE__
+#include "LocaleXlocale.inc"
+#elif LLVM_ON_WIN32
+#include "LocaleWindows.inc"
+#else
+#include "LocaleGeneric.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/LocaleGeneric.inc b/contrib/llvm/lib/Support/LocaleGeneric.inc
new file mode 100644
index 000000000000..278deee3e4dd
--- /dev/null
+++ b/contrib/llvm/lib/Support/LocaleGeneric.inc
@@ -0,0 +1,17 @@
+#include <cwctype>
+
+namespace llvm {
+namespace sys {
+namespace locale {
+
+int columnWidth(StringRef s) {
+ return s.size();
+}
+
+bool isPrint(int c) {
+ return iswprint(c);
+}
+
+}
+}
+}
diff --git a/contrib/llvm/lib/Support/LocaleWindows.inc b/contrib/llvm/lib/Support/LocaleWindows.inc
new file mode 100644
index 000000000000..28e429c0cb7d
--- /dev/null
+++ b/contrib/llvm/lib/Support/LocaleWindows.inc
@@ -0,0 +1,15 @@
+namespace llvm {
+namespace sys {
+namespace locale {
+
+int columnWidth(StringRef s) {
+ return s.size();
+}
+
+bool isPrint(int c) {
+ return ' ' <= c && c <= '~';
+}
+
+}
+}
+}
diff --git a/contrib/llvm/lib/Support/LocaleXlocale.inc b/contrib/llvm/lib/Support/LocaleXlocale.inc
new file mode 100644
index 000000000000..389fe3d1d4fd
--- /dev/null
+++ b/contrib/llvm/lib/Support/LocaleXlocale.inc
@@ -0,0 +1,61 @@
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include <cassert>
+#include <xlocale.h>
+
+
+namespace {
+ struct locale_holder {
+ locale_holder()
+ : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE))
+ {
+ assert(NULL!=l);
+ }
+ ~locale_holder() {
+ freelocale(l);
+ }
+
+ int mbswidth(llvm::SmallString<16> s) const {
+ // this implementation assumes no '\0' in s
+ assert(s.size()==strlen(s.c_str()));
+
+ size_t size = mbstowcs_l(NULL,s.c_str(),0,l);
+ assert(size!=(size_t)-1);
+ if (size==0)
+ return 0;
+ llvm::SmallVector<wchar_t,200> ws(size);
+ size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l);
+ assert(ws.size()==size);
+ return wcswidth_l(&ws[0],ws.size(),l);
+ }
+
+ int isprint(int c) const {
+ return iswprint_l(c,l);
+ }
+
+ private:
+
+ locale_t l;
+ };
+
+ llvm::ManagedStatic<locale_holder> l;
+}
+
+namespace llvm {
+namespace sys {
+namespace locale {
+
+int columnWidth(StringRef s) {
+ int width = l->mbswidth(s);
+ assert(width>=0);
+ return width;
+}
+
+bool isPrint(int c) {
+ return l->isprint(c);
+}
+
+}
+}
+}
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
new file mode 100644
index 000000000000..92d8b83cf94e
--- /dev/null
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -0,0 +1,226 @@
+//===--- LockFileManager.cpp - File-level Locking Utility------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/LockFileManager.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <sys/stat.h>
+#include <sys/types.h>
+#if LLVM_ON_WIN32
+#include <windows.h>
+#endif
+#if LLVM_ON_UNIX
+#include <unistd.h>
+#endif
+using namespace llvm;
+
+/// \brief Attempt to read the lock file with the given name, if it exists.
+///
+/// \param LockFileName The name of the lock file to read.
+///
+/// \returns The process ID of the process that owns this lock file
+Optional<std::pair<std::string, int> >
+LockFileManager::readLockFile(StringRef LockFileName) {
+ // Check whether the lock file exists. If not, clearly there's nothing
+ // to read, so we just return.
+ bool Exists = false;
+ if (sys::fs::exists(LockFileName, Exists) || !Exists)
+ return None;
+
+ // Read the owning host and PID out of the lock file. If it appears that the
+ // owning process is dead, the lock file is invalid.
+ int PID = 0;
+ std::string Hostname;
+ std::ifstream Input(LockFileName.str().c_str());
+ if (Input >> Hostname >> PID && PID > 0 &&
+ processStillExecuting(Hostname, PID))
+ return std::make_pair(Hostname, PID);
+
+ // Delete the lock file. It's invalid anyway.
+ bool Existed;
+ sys::fs::remove(LockFileName, Existed);
+ return None;
+}
+
+bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
+#if LLVM_ON_UNIX && !defined(__ANDROID__)
+ char MyHostname[256];
+ MyHostname[255] = 0;
+ MyHostname[0] = 0;
+ gethostname(MyHostname, 255);
+ // Check whether the process is dead. If so, we're done.
+ if (MyHostname == Hostname && getsid(PID) == -1 && errno == ESRCH)
+ return false;
+#endif
+
+ return true;
+}
+
+LockFileManager::LockFileManager(StringRef FileName)
+{
+ this->FileName = FileName;
+ LockFileName = FileName;
+ LockFileName += ".lock";
+
+ // If the lock file already exists, don't bother to try to create our own
+ // lock file; it won't work anyway. Just figure out who owns this lock file.
+ if ((Owner = readLockFile(LockFileName)))
+ return;
+
+ // Create a lock file that is unique to this instance.
+ UniqueLockFileName = LockFileName;
+ UniqueLockFileName += "-%%%%%%%%";
+ int UniqueLockFileID;
+ if (error_code EC
+ = sys::fs::unique_file(UniqueLockFileName.str(),
+ UniqueLockFileID,
+ UniqueLockFileName,
+ /*makeAbsolute=*/false)) {
+ Error = EC;
+ return;
+ }
+
+ // Write our process ID to our unique lock file.
+ {
+ raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
+
+#if LLVM_ON_UNIX
+ // FIXME: move getpid() call into LLVM
+ char hostname[256];
+ hostname[255] = 0;
+ hostname[0] = 0;
+ gethostname(hostname, 255);
+ Out << hostname << ' ' << getpid();
+#else
+ Out << "localhost 1";
+#endif
+ Out.close();
+
+ if (Out.has_error()) {
+ // We failed to write out PID, so make up an excuse, remove the
+ // unique lock file, and fail.
+ Error = make_error_code(errc::no_space_on_device);
+ bool Existed;
+ sys::fs::remove(UniqueLockFileName.c_str(), Existed);
+ return;
+ }
+ }
+
+ // Create a hard link from the lock file name. If this succeeds, we're done.
+ error_code EC
+ = sys::fs::create_hard_link(UniqueLockFileName.str(),
+ LockFileName.str());
+ if (EC == errc::success)
+ return;
+
+ // Creating the hard link failed.
+
+#ifdef LLVM_ON_UNIX
+ // The creation of the hard link may appear to fail, but if stat'ing the
+ // unique file returns a link count of 2, then we can still declare success.
+ struct stat StatBuf;
+ if (stat(UniqueLockFileName.c_str(), &StatBuf) == 0 &&
+ StatBuf.st_nlink == 2)
+ return;
+#endif
+
+ // Someone else managed to create the lock file first. Wipe out our unique
+ // lock file (it's useless now) and read the process ID from the lock file.
+ bool Existed;
+ sys::fs::remove(UniqueLockFileName.str(), Existed);
+ if ((Owner = readLockFile(LockFileName)))
+ return;
+
+ // There is a lock file that nobody owns; try to clean it up and report
+ // an error.
+ sys::fs::remove(LockFileName.str(), Existed);
+ Error = EC;
+}
+
+LockFileManager::LockFileState LockFileManager::getState() const {
+ if (Owner)
+ return LFS_Shared;
+
+ if (Error)
+ return LFS_Error;
+
+ return LFS_Owned;
+}
+
+LockFileManager::~LockFileManager() {
+ if (getState() != LFS_Owned)
+ return;
+
+ // Since we own the lock, remove the lock file and our own unique lock file.
+ bool Existed;
+ sys::fs::remove(LockFileName.str(), Existed);
+ sys::fs::remove(UniqueLockFileName.str(), Existed);
+}
+
+void LockFileManager::waitForUnlock() {
+ if (getState() != LFS_Shared)
+ return;
+
+#if LLVM_ON_WIN32
+ unsigned long Interval = 1;
+#else
+ struct timespec Interval;
+ Interval.tv_sec = 0;
+ Interval.tv_nsec = 1000000;
+#endif
+ // Don't wait more than an hour for the file to appear.
+ const unsigned MaxSeconds = 3600;
+ bool LockFileGone = false;
+ do {
+ // Sleep for the designated interval, to allow the owning process time to
+ // finish up and remove the lock file.
+ // FIXME: Should we hook in to system APIs to get a notification when the
+ // lock file is deleted?
+#if LLVM_ON_WIN32
+ Sleep(Interval);
+#else
+ nanosleep(&Interval, NULL);
+#endif
+ // If the lock file no longer exists, wait for the actual file.
+ bool Exists = false;
+ if (!LockFileGone) {
+ if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
+ LockFileGone = true;
+ Exists = false;
+ }
+ }
+ if (LockFileGone) {
+ if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+ return;
+ }
+
+ if (!processStillExecuting((*Owner).first, (*Owner).second))
+ return;
+
+ // Exponentially increase the time we wait for the lock to be removed.
+#if LLVM_ON_WIN32
+ Interval *= 2;
+#else
+ Interval.tv_sec *= 2;
+ Interval.tv_nsec *= 2;
+ if (Interval.tv_nsec >= 1000000000) {
+ ++Interval.tv_sec;
+ Interval.tv_nsec -= 1000000000;
+ }
+#endif
+ } while (
+#if LLVM_ON_WIN32
+ Interval < MaxSeconds * 1000
+#else
+ Interval.tv_sec < (time_t)MaxSeconds
+#endif
+ );
+
+ // Give up.
+}
diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp
new file mode 100644
index 000000000000..098cccb68df5
--- /dev/null
+++ b/contrib/llvm/lib/Support/ManagedStatic.cpp
@@ -0,0 +1,81 @@
+//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ManagedStatic class and llvm_shutdown().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Atomic.h"
+#include <cassert>
+using namespace llvm;
+
+static const ManagedStaticBase *StaticList = 0;
+
+void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
+ void (*Deleter)(void*)) const {
+ if (llvm_is_multithreaded()) {
+ llvm_acquire_global_lock();
+
+ if (Ptr == 0) {
+ void* tmp = Creator ? Creator() : 0;
+
+ TsanHappensBefore(this);
+ sys::MemoryFence();
+
+ // This write is racy against the first read in the ManagedStatic
+ // accessors. The race is benign because it does a second read after a
+ // memory fence, at which point it isn't possible to get a partial value.
+ TsanIgnoreWritesBegin();
+ Ptr = tmp;
+ TsanIgnoreWritesEnd();
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+
+ llvm_release_global_lock();
+ } else {
+ assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+ "Partially initialized ManagedStatic!?");
+ Ptr = Creator ? Creator() : 0;
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+}
+
+void ManagedStaticBase::destroy() const {
+ assert(DeleterFn && "ManagedStatic not initialized correctly!");
+ assert(StaticList == this &&
+ "Not destroyed in reverse order of construction?");
+ // Unlink from list.
+ StaticList = Next;
+ Next = 0;
+
+ // Destroy memory.
+ DeleterFn(Ptr);
+
+ // Cleanup.
+ Ptr = 0;
+ DeleterFn = 0;
+}
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm::llvm_shutdown() {
+ while (StaticList)
+ StaticList->destroy();
+
+ if (llvm_is_multithreaded()) llvm_stop_multithreaded();
+}
diff --git a/contrib/llvm/lib/Support/Memory.cpp b/contrib/llvm/lib/Support/Memory.cpp
new file mode 100644
index 000000000000..f9a4903ad015
--- /dev/null
+++ b/contrib/llvm/lib/Support/Memory.cpp
@@ -0,0 +1,25 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Valgrind.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Memory.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
new file mode 100644
index 000000000000..7c5ab96a764a
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -0,0 +1,426 @@
+//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <new>
+#include <sys/stat.h>
+#include <sys/types.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+// Simplistic definitinos of these macros to allow files to be read with
+// MapInFilePages.
+#ifndef S_ISREG
+#define S_ISREG(x) (1)
+#endif
+#ifndef S_ISBLK
+#define S_ISBLK(x) (0)
+#endif
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+MemoryBuffer::~MemoryBuffer() { }
+
+/// init - Initialize this MemoryBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
+ bool RequiresNullTerminator) {
+ assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
+ "Buffer is not null terminated!");
+ BufferStart = BufStart;
+ BufferEnd = BufEnd;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+/// CopyStringRef - Copies contents of a StringRef into a block of memory and
+/// null-terminates it.
+static void CopyStringRef(char *Memory, StringRef Data) {
+ memcpy(Memory, Data.data(), Data.size());
+ Memory[Data.size()] = 0; // Null terminate string.
+}
+
+namespace {
+struct NamedBufferAlloc {
+ StringRef Name;
+ NamedBufferAlloc(StringRef Name) : Name(Name) {}
+};
+}
+
+void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
+ char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
+ CopyStringRef(Mem + N, Alloc.Name);
+ return Mem;
+}
+
+namespace {
+/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
+class MemoryBufferMem : public MemoryBuffer {
+public:
+ MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
+ init(InputData.begin(), InputData.end(), RequiresNullTerminator);
+ }
+
+ virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char*>(this + 1);
+ }
+
+ virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
+ return MemoryBuffer_Malloc;
+ }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note
+/// that InputData must be a null terminated if RequiresNullTerminator is true!
+MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
+ StringRef BufferName,
+ bool RequiresNullTerminator) {
+ return new (NamedBufferAlloc(BufferName))
+ MemoryBufferMem(InputData, RequiresNullTerminator);
+}
+
+/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+/// copying the contents and taking ownership of it. This has no requirements
+/// on EndPtr[0].
+MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
+ StringRef BufferName) {
+ MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
+ if (!Buf) return 0;
+ memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
+ InputData.size());
+ return Buf;
+}
+
+/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+/// that is not initialized. Note that the caller should initialize the
+/// memory allocated by this method. The memory is owned by the MemoryBuffer
+/// object.
+MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
+ StringRef BufferName) {
+ // Allocate space for the MemoryBuffer, the data and the name. It is important
+ // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+ size_t AlignedStringLen =
+ RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
+ sizeof(void*)); // TODO: Is sizeof(void*) enough?
+ size_t RealLen = AlignedStringLen + Size + 1;
+ char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+ if (!Mem) return 0;
+
+ // The name is stored after the class itself.
+ CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+
+ // The buffer begins after the name and must be aligned.
+ char *Buf = Mem + AlignedStringLen;
+ Buf[Size] = 0; // Null terminate buffer.
+
+ return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
+}
+
+/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+/// is completely initialized to zeros. Note that the caller should
+/// initialize the memory allocated by this method. The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
+ MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
+ if (!SB) return 0;
+ memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
+ return SB;
+}
+
+
+/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+/// if the Filename is "-". If an error occurs, this returns null and fills
+/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
+/// returns an empty buffer.
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
+ if (Filename == "-")
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
+}
+
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
+ if (strcmp(Filename, "-") == 0)
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+///
+/// This handles converting the offset into a legal offset on the platform.
+class MemoryBufferMMapFile : public MemoryBuffer {
+ sys::fs::mapped_file_region MFR;
+
+ static uint64_t getLegalMapOffset(uint64_t Offset) {
+ return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
+ }
+
+ static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
+ return Len + (Offset - getLegalMapOffset(Offset));
+ }
+
+ const char *getStart(uint64_t Len, uint64_t Offset) {
+ return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
+ }
+
+public:
+ MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+ uint64_t Offset, error_code EC)
+ : MFR(FD, false, sys::fs::mapped_file_region::readonly,
+ getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
+ if (!EC) {
+ const char *Start = getStart(Len, Offset);
+ init(Start, Start + Len, RequiresNullTerminator);
+ }
+ }
+
+ virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char *>(this + 1);
+ }
+
+ virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
+ return MemoryBuffer_MMap;
+ }
+};
+}
+
+static error_code getMemoryBufferForStream(int FD,
+ StringRef BufferName,
+ OwningPtr<MemoryBuffer> &result) {
+ const ssize_t ChunkSize = 4096*4;
+ SmallString<ChunkSize> Buffer;
+ ssize_t ReadBytes;
+ // Read into Buffer until we hit EOF.
+ do {
+ Buffer.reserve(Buffer.size() + ChunkSize);
+ ReadBytes = read(FD, Buffer.end(), ChunkSize);
+ if (ReadBytes == -1) {
+ if (errno == EINTR) continue;
+ return error_code(errno, posix_category());
+ }
+ Buffer.set_size(Buffer.size() + ReadBytes);
+ } while (ReadBytes != 0);
+
+ result.reset(MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
+ return error_code::success();
+}
+
+error_code MemoryBuffer::getFile(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize,
+ bool RequiresNullTerminator) {
+ // Ensure the path is null terminated.
+ SmallString<256> PathBuf(Filename.begin(), Filename.end());
+ return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize,
+ RequiresNullTerminator);
+}
+
+error_code MemoryBuffer::getFile(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize,
+ bool RequiresNullTerminator) {
+ // FIXME: Review if this check is unnecessary on windows as well.
+#ifdef LLVM_ON_WIN32
+ // First check that the "file" is not a directory
+ bool is_dir = false;
+ error_code err = sys::fs::is_directory(Filename, is_dir);
+ if (err)
+ return err;
+ if (is_dir)
+ return make_error_code(errc::is_a_directory);
+#endif
+
+ int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+ OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
+#endif
+ int FD = ::open(Filename, OpenFlags);
+ if (FD == -1)
+ return error_code(errno, posix_category());
+
+ error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize,
+ 0, RequiresNullTerminator);
+ close(FD);
+ return ret;
+}
+
+static bool shouldUseMmap(int FD,
+ size_t FileSize,
+ size_t MapSize,
+ off_t Offset,
+ bool RequiresNullTerminator,
+ int PageSize) {
+ // We don't use mmap for small files because this can severely fragment our
+ // address space.
+ if (MapSize < 4096*4)
+ return false;
+
+ if (!RequiresNullTerminator)
+ return true;
+
+
+ // If we don't know the file size, use fstat to find out. fstat on an open
+ // file descriptor is cheaper than stat on a random path.
+ // FIXME: this chunk of code is duplicated, but it avoids a fstat when
+ // RequiresNullTerminator = false and MapSize != -1.
+ if (FileSize == size_t(-1)) {
+ struct stat FileInfo;
+ // TODO: This should use fstat64 when available.
+ if (fstat(FD, &FileInfo) == -1) {
+ return error_code(errno, posix_category());
+ }
+ FileSize = FileInfo.st_size;
+ }
+
+ // If we need a null terminator and the end of the map is inside the file,
+ // we cannot use mmap.
+ size_t End = Offset + MapSize;
+ assert(End <= FileSize);
+ if (End != FileSize)
+ return false;
+
+ // Don't try to map files that are exactly a multiple of the system page size
+ // if we need a null terminator.
+ if ((FileSize & (PageSize -1)) == 0)
+ return false;
+
+ return true;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ uint64_t FileSize, uint64_t MapSize,
+ int64_t Offset,
+ bool RequiresNullTerminator) {
+ static int PageSize = sys::process::get_self()->page_size();
+
+ // Default is to map the full file.
+ if (MapSize == uint64_t(-1)) {
+ // If we don't know the file size, use fstat to find out. fstat on an open
+ // file descriptor is cheaper than stat on a random path.
+ if (FileSize == uint64_t(-1)) {
+ struct stat FileInfo;
+ // TODO: This should use fstat64 when available.
+ if (fstat(FD, &FileInfo) == -1) {
+ return error_code(errno, posix_category());
+ }
+
+ // If this not a file or a block device (e.g. it's a named pipe
+ // or character device), we can't trust the size. Create the memory
+ // buffer by copying off the stream.
+ if (!S_ISREG(FileInfo.st_mode) && !S_ISBLK(FileInfo.st_mode)) {
+ return getMemoryBufferForStream(FD, Filename, result);
+ }
+
+ FileSize = FileInfo.st_size;
+ }
+ MapSize = FileSize;
+ }
+
+ if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
+ PageSize)) {
+ error_code EC;
+ result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
+ RequiresNullTerminator, FD, MapSize, Offset, EC));
+ if (!EC)
+ return error_code::success();
+ }
+
+ MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
+ if (!Buf) {
+ // Failed to create a buffer. The only way it can fail is if
+ // new(std::nothrow) returns 0.
+ return make_error_code(errc::not_enough_memory);
+ }
+
+ OwningPtr<MemoryBuffer> SB(Buf);
+ char *BufPtr = const_cast<char*>(SB->getBufferStart());
+
+ size_t BytesLeft = MapSize;
+#ifndef HAVE_PREAD
+ if (lseek(FD, Offset, SEEK_SET) == -1)
+ return error_code(errno, posix_category());
+#endif
+
+ while (BytesLeft) {
+#ifdef HAVE_PREAD
+ ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
+#else
+ ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+#endif
+ if (NumRead == -1) {
+ if (errno == EINTR)
+ continue;
+ // Error while reading.
+ return error_code(errno, posix_category());
+ }
+ if (NumRead == 0) {
+ assert(0 && "We got inaccurate FileSize value or fstat reported an "
+ "invalid file size.");
+ *BufPtr = '\0'; // null-terminate at the actual size.
+ break;
+ }
+ BytesLeft -= NumRead;
+ BufPtr += NumRead;
+ }
+
+ result.swap(SB);
+ return error_code::success();
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
+ // Read in all of the data from stdin, we cannot mmap stdin.
+ //
+ // FIXME: That isn't necessarily true, we should try to mmap stdin and
+ // fallback if it fails.
+ sys::Program::ChangeStdinToBinary();
+
+ return getMemoryBufferForStream(0, "<stdin>", result);
+}
diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp
new file mode 100644
index 000000000000..b20ab8923813
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryObject.cpp
@@ -0,0 +1,37 @@
+//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryObject.h"
+using namespace llvm;
+
+MemoryObject::~MemoryObject() {
+}
+
+int MemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ uint64_t current = address;
+ uint64_t limit = getBase() + getExtent();
+
+ if (current + size > limit)
+ return -1;
+
+ while (current - address < size) {
+ if (readByte(current, &buf[(current - address)]))
+ return -1;
+
+ current++;
+ }
+
+ if (copied)
+ *copied = current - address;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp
new file mode 100644
index 000000000000..4e4a026b2f0d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Mutex.cpp
@@ -0,0 +1,129 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+// Construct a Mutex using pthread calls
+MutexImpl::MutexImpl( bool recursive)
+ : data_(0)
+{
+ // Declare the pthread_mutex data structures
+ pthread_mutex_t* mutex =
+ static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+ pthread_mutexattr_t attr;
+
+ // Initialize the mutex attributes
+ int errorcode = pthread_mutexattr_init(&attr);
+ assert(errorcode == 0); (void)errorcode;
+
+ // Initialize the mutex as a recursive mutex, if requested, or normal
+ // otherwise.
+ int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+ errorcode = pthread_mutexattr_settype(&attr, kind);
+ assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \
+ !defined(__DragonFly__) && !defined(__Bitrig__)
+ // Make it a process local mutex
+ errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+ assert(errorcode == 0);
+#endif
+
+ // Initialize the mutex
+ errorcode = pthread_mutex_init(mutex, &attr);
+ assert(errorcode == 0);
+
+ // Destroy the attributes
+ errorcode = pthread_mutexattr_destroy(&attr);
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = mutex;
+}
+
+// Destruct a Mutex
+MutexImpl::~MutexImpl()
+{
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+ pthread_mutex_destroy(mutex);
+ free(mutex);
+}
+
+bool
+MutexImpl::acquire()
+{
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_lock(mutex);
+ return errorcode == 0;
+}
+
+bool
+MutexImpl::release()
+{
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_unlock(mutex);
+ return errorcode == 0;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_trylock(mutex);
+ return errorcode == 0;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
new file mode 100644
index 000000000000..d0703754e04f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -0,0 +1,295 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FileSystem.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+namespace {
+using support::ulittle32_t;
+}
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+ return path == that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+ return path < that.path;
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+ assert(magic && "Invalid magic number string");
+ assert(length >=4 && "Invalid magic number length");
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return Bitcode_FileType;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return Bitcode_FileType;
+ break;
+ case '!':
+ if (length >= 8)
+ if (memcmp(magic,"!<arch>\n",8) == 0)
+ return Archive_FileType;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ bool Data2MSB = magic[5] == 2;
+ unsigned high = Data2MSB ? 16 : 17;
+ unsigned low = Data2MSB ? 17 : 16;
+ if (length >= 18 && magic[high] == 0)
+ switch (magic[low]) {
+ default: break;
+ case 1: return ELF_Relocatable_FileType;
+ case 2: return ELF_Executable_FileType;
+ case 3: return ELF_SharedObject_FileType;
+ case 4: return ELF_Core_FileType;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (length >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ }
+ break;
+
+ // The two magic numbers for mach-o are:
+ // 0xfeedface - 32-bit mach-o
+ // 0xfeedfacf - 64-bit mach-o
+ case 0xFE:
+ case 0xCE:
+ case 0xCF: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) &&
+ (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+ /* Native endian */
+ if (length >= 16) type = magic[14] << 8 | magic[15];
+ } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+ magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+ magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (length >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return Mach_O_Object_FileType;
+ case 2: return Mach_O_Executable_FileType;
+ case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+ case 4: return Mach_O_Core_FileType;
+ case 5: return Mach_O_PreloadExecutable_FileType;
+ case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ case 7: return Mach_O_DynamicLinker_FileType;
+ case 8: return Mach_O_Bundle_FileType;
+ case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+ case 10: return Mach_O_DSYMCompanion_FileType;
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return COFF_FileType;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return COFF_FileType;
+ break;
+
+ case 0x4d: // Possible MS-DOS stub on Windows PE file
+ if (magic[1] == 0x5a) {
+ uint32_t off = *reinterpret_cast<const ulittle32_t *>(magic + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (off < length && memcmp(magic + off, "PE\0\0",4) == 0)
+ return COFF_FileType;
+ }
+ break;
+
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return COFF_FileType;
+ break;
+
+ default:
+ break;
+ }
+ return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ return type == fs::file_magic::archive;
+}
+
+bool
+Path::isDynamicLibrary() const {
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ switch (type) {
+ default: return false;
+ case fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+ case fs::file_magic::elf_shared_object:
+ case fs::file_magic::pecoff_executable: return true;
+ }
+}
+
+bool
+Path::isObjectFile() const {
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown)
+ return false;
+ return true;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+ std::vector<sys::Path> LibPaths;
+ GetSystemLibraryPaths(LibPaths);
+ for (unsigned i = 0; i < LibPaths.size(); ++i) {
+ sys::Path FullPath(LibPaths[i]);
+ FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+ if (FullPath.isDynamicLibrary())
+ return FullPath;
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix("a");
+ if (FullPath.isArchive())
+ return FullPath;
+ }
+ return sys::Path();
+}
+
+StringRef Path::GetDLLSuffix() {
+ return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+ if (!suffix.empty()) {
+ path.append(".");
+ path.append(suffix);
+ }
+}
+
+bool
+Path::isBitcodeFile() const {
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ return type == fs::file_magic::bitcode;
+}
+
+bool Path::hasMagicNumber(StringRef Magic) const {
+ std::string actualMagic;
+ if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+ return Magic == actualMagic;
+ return false;
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+ const char* at = path;
+ const char* delim = strchr(at, PathSeparator);
+ Path tmpPath;
+ while (delim != 0) {
+ std::string tmp(at, size_t(delim-at));
+ if (tmpPath.set(tmp))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ at = delim + 1;
+ delim = strchr(at, PathSeparator);
+ }
+
+ if (*at != 0)
+ if (tmpPath.set(std::string(at)))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+}
+
+static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
+ assert(Sep[0] != '\0' && Sep[1] == '\0' &&
+ "Sep must be a 1-character string literal.");
+ if (path.empty())
+ return ".";
+
+ // If the path is all slashes, return a single slash.
+ // Otherwise, remove all trailing slashes.
+
+ signed pos = static_cast<signed>(path.size()) - 1;
+
+ while (pos >= 0 && path[pos] == Sep[0])
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep[0] ? Sep : ".";
+
+ // Any slashes left?
+ signed i = 0;
+
+ while (i < pos && path[i] != Sep[0])
+ ++i;
+
+ if (i == pos) // No slashes? Return "."
+ return ".";
+
+ // There is at least one slash left. Remove all trailing non-slashes.
+ while (pos >= 0 && path[pos] != Sep[0])
+ --pos;
+
+ // Remove any trailing slashes.
+ while (pos >= 0 && path[pos] == Sep[0])
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep[0] ? Sep : ".";
+
+ return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/Path.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
new file mode 100644
index 000000000000..58a6ea720e73
--- /dev/null
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -0,0 +1,946 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
+
+namespace {
+ using llvm::StringRef;
+ using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+ const char *separators = "\\/";
+ const char prefered_separator = '\\';
+#else
+ const char separators = '/';
+ const char prefered_separator = '/';
+#endif
+
+ StringRef find_first_component(StringRef path) {
+ // Look for this first component in the following order.
+ // * empty (in this case we return an empty string)
+ // * either C: or {//,\\}net.
+ // * {/,\}
+ // * {.,..}
+ // * {file,directory}name
+
+ if (path.empty())
+ return path;
+
+#ifdef LLVM_ON_WIN32
+ // C:
+ if (path.size() >= 2 && std::isalpha(static_cast<unsigned char>(path[0])) &&
+ path[1] == ':')
+ return path.substr(0, 2);
+#endif
+
+ // //net
+ if ((path.size() > 2) &&
+ is_separator(path[0]) &&
+ path[0] == path[1] &&
+ !is_separator(path[2])) {
+ // Find the next directory separator.
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ // {/,\}
+ if (is_separator(path[0]))
+ return path.substr(0, 1);
+
+ if (path.startswith(".."))
+ return path.substr(0, 2);
+
+ if (path[0] == '.')
+ return path.substr(0, 1);
+
+ // * {file,directory}name
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ size_t filename_pos(StringRef str) {
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return 0;
+
+ if (str.size() > 0 && is_separator(str[str.size() - 1]))
+ return str.size() - 1;
+
+ size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+ if (pos == StringRef::npos)
+ pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+ if (pos == StringRef::npos ||
+ (pos == 1 && is_separator(str[0])))
+ return 0;
+
+ return pos + 1;
+ }
+
+ size_t root_dir_start(StringRef str) {
+ // case "c:/"
+#ifdef LLVM_ON_WIN32
+ if (str.size() > 2 &&
+ str[1] == ':' &&
+ is_separator(str[2]))
+ return 2;
+#endif
+
+ // case "//"
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return StringRef::npos;
+
+ // case "//net"
+ if (str.size() > 3 &&
+ is_separator(str[0]) &&
+ str[0] == str[1] &&
+ !is_separator(str[2])) {
+ return str.find_first_of(separators, 2);
+ }
+
+ // case "/"
+ if (str.size() > 0 && is_separator(str[0]))
+ return 0;
+
+ return StringRef::npos;
+ }
+
+ size_t parent_path_end(StringRef path) {
+ size_t end_pos = filename_pos(path);
+
+ bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+ // Skip separators except for root dir.
+ size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(path[end_pos - 1]))
+ --end_pos;
+
+ if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+ return StringRef::npos;
+
+ return end_pos;
+ }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+const_iterator begin(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Component = find_first_component(path);
+ i.Position = 0;
+ return i;
+}
+
+const_iterator end(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Position = path.size();
+ return i;
+}
+
+const_iterator &const_iterator::operator++() {
+ assert(Position < Path.size() && "Tried to increment past end!");
+
+ // Increment Position to past the current component
+ Position += Component.size();
+
+ // Check for end.
+ if (Position == Path.size()) {
+ Component = StringRef();
+ return *this;
+ }
+
+ // Both POSIX and Windows treat paths that begin with exactly two separators
+ // specially.
+ bool was_net = Component.size() > 2 &&
+ is_separator(Component[0]) &&
+ Component[1] == Component[0] &&
+ !is_separator(Component[2]);
+
+ // Handle separators.
+ if (is_separator(Path[Position])) {
+ // Root dir.
+ if (was_net
+#ifdef LLVM_ON_WIN32
+ // c:/
+ || Component.endswith(":")
+#endif
+ ) {
+ Component = Path.substr(Position, 1);
+ return *this;
+ }
+
+ // Skip extra separators.
+ while (Position != Path.size() &&
+ is_separator(Path[Position])) {
+ ++Position;
+ }
+
+ // Treat trailing '/' as a '.'.
+ if (Position == Path.size()) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+ }
+
+ // Find next component.
+ size_t end_pos = Path.find_first_of(separators, Position);
+ Component = Path.slice(Position, end_pos);
+
+ return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+ // If we're at the end and the previous char was a '/', return '.'.
+ if (Position == Path.size() &&
+ Path.size() > 1 &&
+ is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+ && Path[Position - 2] != ':'
+#endif
+ ) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+
+ // Skip separators unless it's the root directory.
+ size_t root_dir_pos = root_dir_start(Path);
+ size_t end_pos = Position;
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(Path[end_pos - 1]))
+ --end_pos;
+
+ // Find next separator.
+ size_t start_pos = filename_pos(Path.substr(0, end_pos));
+ Component = Path.slice(start_pos, end_pos);
+ Position = start_pos;
+ return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+ return Path.begin() == RHS.Path.begin() &&
+ Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+ return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+ return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ if ((++pos != e) && is_separator((*pos)[0])) {
+ // {C:/,//net/}, so get the first two components.
+ return path.substr(0, b->size() + pos->size());
+ } else {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // POSIX style root directory.
+ if (is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+ const_iterator b = begin(path),
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // No path or no name.
+ return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if ((has_net || has_drive) &&
+ // {C:,//net}, skip to the next component.
+ (++pos != e) && is_separator((*pos)[0])) {
+ return *pos;
+ }
+
+ // POSIX style root directory.
+ if (!has_net && is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ // No path or no root.
+ return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+ StringRef root = root_path(path);
+ return path.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+ const Twine &b,
+ const Twine &c,
+ const Twine &d) {
+ SmallString<32> a_storage;
+ SmallString<32> b_storage;
+ SmallString<32> c_storage;
+ SmallString<32> d_storage;
+
+ SmallVector<StringRef, 4> components;
+ if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+ if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+ if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+ if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+ for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+ e = components.end();
+ i != e; ++i) {
+ bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+ bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+ bool is_root_name = has_root_name(*i);
+
+ if (path_has_sep) {
+ // Strip separators from beginning of component.
+ size_t loc = i->find_first_not_of(separators);
+ StringRef c = i->substr(loc);
+
+ // Append it.
+ path.append(c.begin(), c.end());
+ continue;
+ }
+
+ if (!component_has_sep && !(path.empty() || is_root_name)) {
+ // Add a separator.
+ path.push_back(prefered_separator);
+ }
+
+ path.append(i->begin(), i->end());
+ }
+}
+
+void append(SmallVectorImpl<char> &path,
+ const_iterator begin, const_iterator end) {
+ for (; begin != end; ++begin)
+ path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+ size_t end_pos = parent_path_end(path);
+ if (end_pos == StringRef::npos)
+ return StringRef();
+ else
+ return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+ size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+ if (end_pos != StringRef::npos)
+ path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+ StringRef p(path.begin(), path.size());
+ SmallString<32> ext_storage;
+ StringRef ext = extension.toStringRef(ext_storage);
+
+ // Erase existing extension.
+ size_t pos = p.find_last_of('.');
+ if (pos != StringRef::npos && pos >= filename_pos(p))
+ path.set_size(pos);
+
+ // Append '.' if needed.
+ if (ext.size() > 0 && ext[0] != '.')
+ path.push_back('.');
+
+ // Append extension.
+ path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+ // Clear result.
+ result.clear();
+#ifdef LLVM_ON_WIN32
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+ result.reserve(p.size());
+ for (StringRef::const_iterator i = p.begin(),
+ e = p.end();
+ i != e;
+ ++i) {
+ if (*i == '/')
+ result.push_back('\\');
+ else
+ result.push_back(*i);
+ }
+#else
+ path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+ return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return fname;
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return fname;
+ else
+ return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return StringRef();
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return StringRef();
+ else
+ return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+ switch(value) {
+#ifdef LLVM_ON_WIN32
+ case '\\': // fall through
+#endif
+ case '/': return true;
+ default: return false;
+ }
+}
+
+void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
+ result.clear();
+
+#ifdef __APPLE__
+ // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+ int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+ : _CS_DARWIN_USER_CACHE_DIR;
+ size_t ConfLen = confstr(ConfName, 0, 0);
+ if (ConfLen > 0) {
+ do {
+ result.resize(ConfLen);
+ ConfLen = confstr(ConfName, result.data(), result.size());
+ } while (ConfLen > 0 && ConfLen != result.size());
+
+ if (ConfLen > 0) {
+ assert(result.back() == 0);
+ result.pop_back();
+ return;
+ }
+
+ result.clear();
+ }
+#endif
+
+ // Check whether the temporary directory is specified by an environment
+ // variable.
+ const char *EnvironmentVariable;
+#ifdef LLVM_ON_WIN32
+ EnvironmentVariable = "TEMP";
+#else
+ EnvironmentVariable = "TMPDIR";
+#endif
+ if (char *RequestedDir = getenv(EnvironmentVariable)) {
+ result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
+ return;
+ }
+
+ // Fall back to a system default.
+ const char *DefaultResult;
+#ifdef LLVM_ON_WIN32
+ (void)erasedOnReboot;
+ DefaultResult = "C:\\TEMP";
+#else
+ if (erasedOnReboot)
+ DefaultResult = "/tmp";
+ else
+ DefaultResult = "/var/tmp";
+#endif
+ result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
+}
+
+bool has_root_name(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+ rootName = has_root_name(p);
+#else
+ rootName = true;
+#endif
+
+ return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+ return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+ StringRef p(path.data(), path.size());
+
+ bool rootDirectory = path::has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+ rootName = path::has_root_name(p);
+#else
+ rootName = true;
+#endif
+
+ // Already absolute.
+ if (rootName && rootDirectory)
+ return error_code::success();
+
+ // All of the following conditions will need the current directory.
+ SmallString<128> current_dir;
+ if (error_code ec = current_path(current_dir)) return ec;
+
+ // Relative path. Prepend the current directory.
+ if (!rootName && !rootDirectory) {
+ // Append path to the current directory.
+ path::append(current_dir, p);
+ // Set path to the result.
+ path.swap(current_dir);
+ return error_code::success();
+ }
+
+ if (!rootName && rootDirectory) {
+ StringRef cdrn = path::root_name(current_dir);
+ SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+ path::append(curDirRootName, p);
+ // Set path to the result.
+ path.swap(curDirRootName);
+ return error_code::success();
+ }
+
+ if (rootName && !rootDirectory) {
+ StringRef pRootName = path::root_name(p);
+ StringRef bRootDirectory = path::root_directory(current_dir);
+ StringRef bRelativePath = path::relative_path(current_dir);
+ StringRef pRelativePath = path::relative_path(p);
+
+ SmallString<128> res;
+ path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+ path.swap(res);
+ return error_code::success();
+ }
+
+ llvm_unreachable("All rootName and rootDirectory combinations should have "
+ "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ StringRef parent = path::parent_path(p);
+ if (!parent.empty()) {
+ bool parent_exists;
+ if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+ if (!parent_exists)
+ if (error_code ec = create_directories(parent, existed)) return ec;
+ }
+
+ return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+ return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+ return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+ return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_directory(st);
+ return error_code::success();
+}
+
+bool is_regular_file(file_status status) {
+ return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_regular_file(st);
+ return error_code::success();
+}
+
+bool is_symlink(file_status status) {
+ return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_symlink(st);
+ return error_code::success();
+}
+
+bool is_other(file_status status) {
+ return exists(status) &&
+ !is_regular_file(status) &&
+ !is_directory(status) &&
+ !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st) {
+ SmallString<128> path(Path.begin(), Path.end());
+ path::remove_filename(path);
+ path::append(path, filename);
+ Path = path.str();
+ Status = st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+ SmallString<32> MagicStorage;
+ StringRef Magic = magic.toStringRef(MagicStorage);
+ SmallString<32> Buffer;
+
+ if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+ if (ec == errc::value_too_large) {
+ // Magic.size() > file_size(Path).
+ result = false;
+ return error_code::success();
+ }
+ return ec;
+ }
+
+ result = Magic == Buffer;
+ return error_code::success();
+}
+
+/// @brief Identify the magic in magic.
+file_magic identify_magic(StringRef magic) {
+ if (magic.size() < 4)
+ return file_magic::unknown;
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return file_magic::bitcode;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return file_magic::bitcode;
+ break;
+ case '!':
+ if (magic.size() >= 8)
+ if (memcmp(magic.data(),"!<arch>\n",8) == 0)
+ return file_magic::archive;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ if (magic.size() >= 18 && magic[17] == 0)
+ switch (magic[16]) {
+ default: break;
+ case 1: return file_magic::elf_relocatable;
+ case 2: return file_magic::elf_executable;
+ case 3: return file_magic::elf_shared_object;
+ case 4: return file_magic::elf_core;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (magic.size() >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return file_magic::macho_dynamically_linked_shared_lib;
+ }
+ break;
+
+ // The two magic numbers for mach-o are:
+ // 0xfeedface - 32-bit mach-o
+ // 0xfeedfacf - 64-bit mach-o
+ case 0xFE:
+ case 0xCE:
+ case 0xCF: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) &&
+ (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+ /* Native endian */
+ if (magic.size() >= 16) type = magic[14] << 8 | magic[15];
+ } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+ magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+ magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (magic.size() >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return file_magic::macho_object;
+ case 2: return file_magic::macho_executable;
+ case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
+ case 4: return file_magic::macho_core;
+ case 5: return file_magic::macho_preload_executabl;
+ case 6: return file_magic::macho_dynamically_linked_shared_lib;
+ case 7: return file_magic::macho_dynamic_linker;
+ case 8: return file_magic::macho_bundle;
+ case 9: return file_magic::macho_dynamic_linker;
+ case 10: return file_magic::macho_dsym_companion;
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return file_magic::coff_object;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return file_magic::coff_object;
+ break;
+
+ case 0x4d: // Possible MS-DOS stub on Windows PE file
+ if (magic[1] == 0x5a) {
+ uint32_t off =
+ *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0)
+ return file_magic::pecoff_executable;
+ }
+ break;
+
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return file_magic::coff_object;
+ break;
+
+ default:
+ break;
+ }
+ return file_magic::unknown;
+}
+
+error_code identify_magic(const Twine &path, file_magic &result) {
+ SmallString<32> Magic;
+ error_code ec = get_magic(path, Magic.capacity(), Magic);
+ if (ec && ec != errc::value_too_large)
+ return ec;
+
+ result = identify_magic(Magic);
+ return error_code::success();
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+ if (ft == file_type::directory_file) {
+ // This code would be a lot better with exceptions ;/.
+ error_code ec;
+ directory_iterator i(path, ec);
+ if (ec) return ec;
+ for (directory_iterator e; i != e; i.increment(ec)) {
+ if (ec) return ec;
+ file_status st;
+ if (error_code ec = i->status(st)) return ec;
+ if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+ }
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count; // Include the directory itself in the items removed.
+ } else {
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count;
+ }
+
+ return error_code::success();
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ file_status fs;
+ if (error_code ec = status(path, fs))
+ return ec;
+ num_removed = 0;
+ return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+ return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PluginLoader.cpp b/contrib/llvm/lib/Support/PluginLoader.cpp
new file mode 100644
index 000000000000..358137f08f5f
--- /dev/null
+++ b/contrib/llvm/lib/Support/PluginLoader.cpp
@@ -0,0 +1,47 @@
+//===-- PluginLoader.cpp - Implement -load command line option ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -load <plugin> command line option handler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+static ManagedStatic<std::vector<std::string> > Plugins;
+static ManagedStatic<sys::SmartMutex<true> > PluginsLock;
+
+void PluginLoader::operator=(const std::string &Filename) {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ std::string Error;
+ if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
+ errs() << "Error opening '" << Filename << "': " << Error
+ << "\n -load request ignored.\n";
+ } else {
+ Plugins->push_back(Filename);
+ }
+}
+
+unsigned PluginLoader::getNumPlugins() {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ return Plugins.isConstructed() ? Plugins->size() : 0;
+}
+
+std::string &PluginLoader::getPlugin(unsigned num) {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ assert(Plugins.isConstructed() && num < Plugins->size() &&
+ "Asking for an out of bounds plugin");
+ return (*Plugins)[num];
+}
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
new file mode 100644
index 000000000000..23ee5ab105ae
--- /dev/null
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -0,0 +1,137 @@
+//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Support/raw_ostream.h"
+
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+#include <CrashReporterClient.h>
+#endif
+
+using namespace llvm;
+
+namespace llvm {
+ bool DisablePrettyStackTrace = false;
+}
+
+// FIXME: This should be thread local when llvm supports threads.
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
+
+static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
+ unsigned NextID = 0;
+ if (Entry->getNextEntry())
+ NextID = PrintStack(Entry->getNextEntry(), OS);
+ OS << NextID << ".\t";
+ {
+ sys::Watchdog W(5);
+ Entry->print(OS);
+ }
+
+ return NextID+1;
+}
+
+/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+static void PrintCurStackTrace(raw_ostream &OS) {
+ // Don't print an empty trace.
+ if (PrettyStackTraceHead.get() == 0) return;
+
+ // If there are pretty stack frames registered, walk and emit them.
+ OS << "Stack dump:\n";
+
+ PrintStack(PrettyStackTraceHead.get(), OS);
+ OS.flush();
+}
+
+// Integrate with crash reporter libraries.
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
+// If any clients of llvm try to link to libCrashReporterClient.a themselves,
+// only one crash info struct will be used.
+extern "C" {
+CRASH_REPORTER_CLIENT_HIDDEN
+struct crashreporter_annotations_t gCRAnnotations
+ __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
+ = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
+}
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
+#endif
+
+
+/// CrashHandler - This callback is run if a fatal signal is delivered to the
+/// process, it prints the pretty stack trace.
+static void CrashHandler(void *) {
+#ifndef __APPLE__
+ // On non-apple systems, just emit the crash stack trace to stderr.
+ PrintCurStackTrace(errs());
+#else
+ // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
+ // put it into __crashreporter_info__.
+ SmallString<2048> TmpStr;
+ {
+ raw_svector_ostream Stream(TmpStr);
+ PrintCurStackTrace(Stream);
+ }
+
+ if (!TmpStr.empty()) {
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+ // Cast to void to avoid warning.
+ (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO
+ __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+#endif
+ errs() << TmpStr.str();
+ }
+
+#endif
+}
+
+static bool RegisterCrashPrinter() {
+ if (!DisablePrettyStackTrace)
+ sys::AddSignalHandler(CrashHandler, 0);
+ return false;
+}
+
+PrettyStackTraceEntry::PrettyStackTraceEntry() {
+ // The first time this is called, we register the crash printer.
+ static bool HandlerRegistered = RegisterCrashPrinter();
+ (void)HandlerRegistered;
+
+ // Link ourselves.
+ NextEntry = PrettyStackTraceHead.get();
+ PrettyStackTraceHead.set(this);
+}
+
+PrettyStackTraceEntry::~PrettyStackTraceEntry() {
+ assert(PrettyStackTraceHead.get() == this &&
+ "Pretty stack trace entry destruction is out of order");
+ PrettyStackTraceHead.set(getNextEntry());
+}
+
+void PrettyStackTraceString::print(raw_ostream &OS) const {
+ OS << Str << "\n";
+}
+
+void PrettyStackTraceProgram::print(raw_ostream &OS) const {
+ OS << "Program arguments: ";
+ // Print the argument list.
+ for (unsigned i = 0, e = ArgC; i != e; ++i)
+ OS << ArgV[i] << ' ';
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
new file mode 100644
index 000000000000..2c0d37bb3299
--- /dev/null
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -0,0 +1,89 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
+
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+// Empty virtual destructor to anchor the vtable for the process class.
+process::~process() {}
+
+self_process *process::get_self() {
+ // Use a function local static for thread safe initialization and allocate it
+ // as a raw pointer to ensure it is never destroyed.
+ static self_process *SP = new self_process();
+
+ return SP;
+}
+
+#if defined(_MSC_VER)
+// Visual Studio complains that the self_process destructor never exits. This
+// doesn't make much sense, as that's the whole point of calling abort... Just
+// silence this warning.
+#pragma warning(push)
+#pragma warning(disable:4722)
+#endif
+
+// The destructor for the self_process subclass must never actually be
+// executed. There should be at most one instance of this class, and that
+// instance should live until the process terminates to avoid the potential for
+// racy accesses during shutdown.
+self_process::~self_process() {
+ llvm_unreachable("This destructor must never be executed!");
+}
+
+/// \brief A helper function to compute the elapsed wall-time since the program
+/// started.
+///
+/// Note that this routine actually computes the elapsed wall time since the
+/// first time it was called. However, we arrange to have it called during the
+/// startup of the process to get approximately correct results.
+static TimeValue getElapsedWallTime() {
+ static TimeValue &StartTime = *new TimeValue(TimeValue::now());
+ return TimeValue::now() - StartTime;
+}
+
+/// \brief A special global variable to ensure we call \c getElapsedWallTime
+/// during global initialization of the program.
+///
+/// Note that this variable is never referenced elsewhere. Doing so could
+/// create race conditions during program startup or shutdown.
+static volatile TimeValue DummyTimeValue = getElapsedWallTime();
+
+// Implement this routine by using the static helpers above. They're already
+// portable.
+TimeValue self_process::get_wall_time() const {
+ return getElapsedWallTime();
+}
+
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Process.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Program.cpp b/contrib/llvm/lib/Support/Program.cpp
new file mode 100644
index 000000000000..201d5c0d3056
--- /dev/null
+++ b/contrib/llvm/lib/Support/Program.cpp
@@ -0,0 +1,60 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+int
+Program::ExecuteAndWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned secondsToWait,
+ unsigned memoryLimit,
+ std::string* ErrMsg,
+ bool *ExecutionFailed) {
+ Program prg;
+ if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) {
+ if (ExecutionFailed) *ExecutionFailed = false;
+ return prg.Wait(path, secondsToWait, ErrMsg);
+ }
+ if (ExecutionFailed) *ExecutionFailed = true;
+ return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ Program prg;
+ prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Program.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp
new file mode 100644
index 000000000000..6a34f2d08524
--- /dev/null
+++ b/contrib/llvm/lib/Support/RWMutex.cpp
@@ -0,0 +1,125 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+ : data_(0)
+{
+ // Declare the pthread_rwlock data structures
+ pthread_rwlock_t* rwlock =
+ static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+ // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+ bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+ // Initialize the rwlock
+ int errorcode = pthread_rwlock_init(rwlock, NULL);
+ (void)errorcode;
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = rwlock;
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+ pthread_rwlock_destroy(rwlock);
+ free(rwlock);
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_rdlock(rwlock);
+ return errorcode == 0;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_wrlock(rwlock);
+ return errorcode == 0;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
new file mode 100644
index 000000000000..efc8b90a0090
--- /dev/null
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -0,0 +1,170 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "regex_impl.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(StringRef regex, unsigned Flags) {
+ unsigned flags = 0;
+ preg = new llvm_regex();
+ preg->re_endp = regex.end();
+ if (Flags & IgnoreCase)
+ flags |= REG_ICASE;
+ if (Flags & Newline)
+ flags |= REG_NEWLINE;
+ if (!(Flags & BasicRegex))
+ flags |= REG_EXTENDED;
+ error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
+}
+
+Regex::~Regex() {
+ llvm_regfree(preg);
+ delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+ if (!error)
+ return true;
+
+ size_t len = llvm_regerror(error, preg, NULL, 0);
+
+ Error.resize(len);
+ llvm_regerror(error, preg, &Error[0], len);
+ return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+ return preg->re_nsub;
+}
+
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+ unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+ // pmatch needs to have at least one element.
+ SmallVector<llvm_regmatch_t, 8> pm;
+ pm.resize(nmatch > 0 ? nmatch : 1);
+ pm[0].rm_so = 0;
+ pm[0].rm_eo = String.size();
+
+ int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+ if (rc == REG_NOMATCH)
+ return false;
+ if (rc != 0) {
+ // regexec can fail due to invalid pattern or running out of memory.
+ error = rc;
+ return false;
+ }
+
+ // There was a match.
+
+ if (Matches) { // match position requested
+ Matches->clear();
+
+ for (unsigned i = 0; i != nmatch; ++i) {
+ if (pm[i].rm_so == -1) {
+ // this group didn't match
+ Matches->push_back(StringRef());
+ continue;
+ }
+ assert(pm[i].rm_eo >= pm[i].rm_so);
+ Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+ pm[i].rm_eo-pm[i].rm_so));
+ }
+ }
+
+ return true;
+}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
diff --git a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 000000000000..2d23902bb85c
--- /dev/null
+++ b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,58 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file pulls the addresses of certain symbols out of the linker. It must
+// include as few header files as possible because it declares the symbols as
+// void*, which would conflict with the actual symbol type if any header
+// declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+ extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+ // If this is darwin, it has some funky issues, try to solve them here. Some
+ // important symbols are marked 'private external' which doesn't allow
+ // SearchForAddressOfSymbol to find them. As such, we special case them here,
+ // there is only a small handful of them.
+
+#ifdef __APPLE__
+ {
+ // __eprintf is sometimes used for assert() handling on x86.
+ //
+ // FIXME: Currently disabled when using Clang, as we don't always have our
+ // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+ EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+ }
+#endif
+
+#ifdef __CYGWIN__
+ {
+ EXPLICIT_SYMBOL(_alloca);
+ EXPLICIT_SYMBOL(__main);
+ }
+#endif
+
+#undef EXPLICIT_SYMBOL
+ return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+ return DoSearch(symbolName);
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
new file mode 100644
index 000000000000..a11789372d93
--- /dev/null
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Signals.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp
new file mode 100644
index 000000000000..f0fed7792ce6
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp
@@ -0,0 +1,273 @@
+//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
+// overview of the algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cstdlib>
+
+using namespace llvm;
+
+void SmallPtrSetImpl::shrink_and_clear() {
+ assert(!isSmall() && "Can't shrink a small set!");
+ free(CurArray);
+
+ // Reduce the number of buckets.
+ CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32;
+ NumElements = NumTombstones = 0;
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
+ assert(CurArray && "Failed to allocate memory?");
+ memset(CurArray, -1, CurArraySize*sizeof(void*));
+}
+
+bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is already in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr)
+ return false;
+
+ // Nope, there isn't. If we stay small, just 'pushback' now.
+ if (NumElements < CurArraySize-1) {
+ SmallArray[NumElements++] = Ptr;
+ return true;
+ }
+ // Otherwise, hit the big set case, which will call grow.
+ }
+
+ if (NumElements*4 >= CurArraySize*3) {
+ // If more than 3/4 of the array is full, grow.
+ Grow(CurArraySize < 64 ? 128 : CurArraySize*2);
+ } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) {
+ // If fewer of 1/8 of the array is empty (meaning that many are filled with
+ // tombstones), rehash.
+ Grow(CurArraySize);
+ }
+
+ // Okay, we know we have space. Find a hash bucket.
+ const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
+ if (*Bucket == Ptr) return false; // Already inserted, good.
+
+ // Otherwise, insert it!
+ if (*Bucket == getTombstoneMarker())
+ --NumTombstones;
+ *Bucket = Ptr;
+ ++NumElements; // Track density.
+ return true;
+}
+
+bool SmallPtrSetImpl::erase_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr) {
+ // If it is in the set, replace this element.
+ *APtr = E[-1];
+ E[-1] = getEmptyMarker();
+ --NumElements;
+ return true;
+ }
+
+ return false;
+ }
+
+ // Okay, we know we have space. Find a hash bucket.
+ void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
+ if (*Bucket != Ptr) return false; // Not in the set?
+
+ // Set this as a tombstone.
+ *Bucket = getTombstoneMarker();
+ --NumElements;
+ ++NumTombstones;
+ return true;
+}
+
+const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
+ unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1);
+ unsigned ArraySize = CurArraySize;
+ unsigned ProbeAmt = 1;
+ const void *const *Array = CurArray;
+ const void *const *Tombstone = 0;
+ while (1) {
+ // Found Ptr's bucket?
+ if (Array[Bucket] == Ptr)
+ return Array+Bucket;
+
+ // If we found an empty bucket, the pointer doesn't exist in the set.
+ // Return a tombstone if we've seen one so far, or the empty bucket if
+ // not.
+ if (Array[Bucket] == getEmptyMarker())
+ return Tombstone ? Tombstone : Array+Bucket;
+
+ // If this is a tombstone, remember it. If Ptr ends up not in the set, we
+ // prefer to return it than something that would require more probing.
+ if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
+ Tombstone = Array+Bucket; // Remember the first tombstone found.
+
+ // It's a hash collision or a tombstone. Reprobe.
+ Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
+ }
+}
+
+/// Grow - Allocate a larger backing store for the buckets and move it over.
+///
+void SmallPtrSetImpl::Grow(unsigned NewSize) {
+ // Allocate at twice as many buckets, but at least 128.
+ unsigned OldSize = CurArraySize;
+
+ const void **OldBuckets = CurArray;
+ bool WasSmall = isSmall();
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * NewSize);
+ assert(CurArray && "Failed to allocate memory?");
+ CurArraySize = NewSize;
+ memset(CurArray, -1, NewSize*sizeof(void*));
+
+ // Copy over all the elements.
+ if (WasSmall) {
+ // Small sets store their elements in order.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements;
+ BucketPtr != E; ++BucketPtr) {
+ const void *Elt = *BucketPtr;
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+ } else {
+ // Copy over all valid entries.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize;
+ BucketPtr != E; ++BucketPtr) {
+ // Copy over the element if it is valid.
+ const void *Elt = *BucketPtr;
+ if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+
+ free(OldBuckets);
+ NumTombstones = 0;
+ }
+}
+
+SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
+ const SmallPtrSetImpl& that) {
+ SmallArray = SmallStorage;
+
+ // If we're becoming small, prepare to insert into our stack space
+ if (that.isSmall()) {
+ CurArray = SmallArray;
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else {
+ CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = that.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize);
+
+ NumElements = that.NumElements;
+ NumTombstones = that.NumTombstones;
+}
+
+/// CopyFrom - implement operator= from a smallptrset that has the same pointer
+/// type, but may have a different small size.
+void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
+ if (isSmall() && RHS.isSmall())
+ assert(CurArraySize == RHS.CurArraySize &&
+ "Cannot assign sets with different small sizes");
+
+ // If we're becoming small, prepare to insert into our stack space
+ if (RHS.isSmall()) {
+ if (!isSmall())
+ free(CurArray);
+ CurArray = SmallArray;
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else if (CurArraySize != RHS.CurArraySize) {
+ if (isSmall())
+ CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
+ else
+ CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize);
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = RHS.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, RHS.CurArray, sizeof(void*)*CurArraySize);
+
+ NumElements = RHS.NumElements;
+ NumTombstones = RHS.NumTombstones;
+}
+
+void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) {
+ if (this == &RHS) return;
+
+ // We can only avoid copying elements if neither set is small.
+ if (!this->isSmall() && !RHS.isSmall()) {
+ std::swap(this->CurArray, RHS.CurArray);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->NumTombstones, RHS.NumTombstones);
+ return;
+ }
+
+ // FIXME: From here on we assume that both sets have the same small size.
+
+ // If only RHS is small, copy the small elements into LHS and move the pointer
+ // from LHS to RHS.
+ if (!this->isSmall() && RHS.isSmall()) {
+ std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize,
+ this->SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ RHS.CurArray = this->CurArray;
+ RHS.NumTombstones = this->NumTombstones;
+ this->CurArray = this->SmallArray;
+ this->NumTombstones = 0;
+ return;
+ }
+
+ // If only LHS is small, copy the small elements into RHS and move the pointer
+ // from RHS to LHS.
+ if (this->isSmall() && !RHS.isSmall()) {
+ std::copy(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(RHS.NumElements, this->NumElements);
+ std::swap(RHS.CurArraySize, this->CurArraySize);
+ this->CurArray = RHS.CurArray;
+ this->NumTombstones = RHS.NumTombstones;
+ RHS.CurArray = RHS.SmallArray;
+ RHS.NumTombstones = 0;
+ return;
+ }
+
+ // Both a small, just swap the small elements.
+ assert(this->isSmall() && RHS.isSmall());
+ assert(this->CurArraySize == RHS.CurArraySize);
+ std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+}
+
+SmallPtrSetImpl::~SmallPtrSetImpl() {
+ if (!isSmall())
+ free(CurArray);
+}
diff --git a/contrib/llvm/lib/Support/SmallVector.cpp b/contrib/llvm/lib/Support/SmallVector.cpp
new file mode 100644
index 000000000000..f9c0e78270c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallVector.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+/// grow_pod - This is an implementation of the grow() method which only works
+/// on POD-like datatypes and is out of line to reduce code duplication.
+void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes,
+ size_t TSize) {
+ size_t CurSizeBytes = size_in_bytes();
+ size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
+ if (NewCapacityInBytes < MinSizeInBytes)
+ NewCapacityInBytes = MinSizeInBytes;
+
+ void *NewElts;
+ if (BeginX == FirstEl) {
+ NewElts = malloc(NewCapacityInBytes);
+
+ // Copy the elements over. No need to run dtors on PODs.
+ memcpy(NewElts, this->BeginX, CurSizeBytes);
+ } else {
+ // If this wasn't grown from the inline copy, grow the allocated space.
+ NewElts = realloc(this->BeginX, NewCapacityInBytes);
+ }
+
+ this->EndX = (char*)NewElts+CurSizeBytes;
+ this->BeginX = NewElts;
+ this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
+}
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
new file mode 100644
index 000000000000..fac3cad5cc25
--- /dev/null
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -0,0 +1,489 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class. This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Locale.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+static const size_t TabStop = 8;
+
+namespace {
+ struct LineNoCacheTy {
+ int LastQueryBufferID;
+ const char *LastQuery;
+ unsigned LineNoOfQuery;
+ };
+}
+
+static LineNoCacheTy *getCache(void *Ptr) {
+ return (LineNoCacheTy*)Ptr;
+}
+
+
+SourceMgr::~SourceMgr() {
+ // Delete the line # cache if allocated.
+ if (LineNoCacheTy *Cache = getCache(LineNoCache))
+ delete Cache;
+
+ while (!Buffers.empty()) {
+ delete Buffers.back().Buffer;
+ Buffers.pop_back();
+ }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs. If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+ SMLoc IncludeLoc,
+ std::string &IncludedFile) {
+ OwningPtr<MemoryBuffer> NewBuf;
+ IncludedFile = Filename;
+ MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+
+ // If the file didn't exist directly, see if it's in an include path.
+ for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+ IncludedFile = IncludeDirectories[i] + "/" + Filename;
+ MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+ }
+
+ if (NewBuf == 0) return ~0U;
+
+ return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+ for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+ if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+ // Use <= here so that a pointer to the null at the end of the buffer
+ // is included as part of the buffer.
+ Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+ return i;
+ return -1;
+}
+
+/// getLineAndColumn - Find the line and column number for the specified
+/// location in the specified file. This is not a fast method.
+std::pair<unsigned, unsigned>
+SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const {
+ if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID != -1 && "Invalid Location!");
+
+ MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+
+ // Count the number of \n's between the start of the file and the specified
+ // location.
+ unsigned LineNo = 1;
+
+ const char *BufStart = Buff->getBufferStart();
+ const char *Ptr = BufStart;
+
+ // If we have a line number cache, and if the query is to a later point in the
+ // same file, start searching from the last query location. This optimizes
+ // for the case when multiple diagnostics come out of one file in order.
+ if (LineNoCacheTy *Cache = getCache(LineNoCache))
+ if (Cache->LastQueryBufferID == BufferID &&
+ Cache->LastQuery <= Loc.getPointer()) {
+ Ptr = Cache->LastQuery;
+ LineNo = Cache->LineNoOfQuery;
+ }
+
+ // Scan for the location being queried, keeping track of the number of lines
+ // we see.
+ for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+ if (*Ptr == '\n') ++LineNo;
+
+ // Allocate the line number cache if it doesn't exist.
+ if (LineNoCache == 0)
+ LineNoCache = new LineNoCacheTy();
+
+ // Update the line # cache.
+ LineNoCacheTy &Cache = *getCache(LineNoCache);
+ Cache.LastQueryBufferID = BufferID;
+ Cache.LastQuery = Ptr;
+ Cache.LineNoOfQuery = LineNo;
+
+ size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
+ if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
+ return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
+ if (IncludeLoc == SMLoc()) return; // Top of stack.
+
+ int CurBuf = FindBufferContainingLoc(IncludeLoc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+ OS << "Included from "
+ << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+/// GetMessage - Return an SMDiagnostic at the specified location with the
+/// specified string.
+///
+/// @param Type - If non-null, the kind of message (e.g., "error") which is
+/// prefixed to the message.
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg,
+ ArrayRef<SMRange> Ranges,
+ ArrayRef<SMFixIt> FixIts) const {
+
+ // First thing to do: find the current buffer containing the specified
+ // location to pull out the source line.
+ SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
+ std::pair<unsigned, unsigned> LineAndCol;
+ const char *BufferID = "<unknown>";
+ std::string LineStr;
+
+ if (Loc.isValid()) {
+ int CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+ BufferID = CurMB->getBufferIdentifier();
+
+ // Scan backward to find the start of the line.
+ const char *LineStart = Loc.getPointer();
+ const char *BufStart = CurMB->getBufferStart();
+ while (LineStart != BufStart && LineStart[-1] != '\n' &&
+ LineStart[-1] != '\r')
+ --LineStart;
+
+ // Get the end of the line.
+ const char *LineEnd = Loc.getPointer();
+ const char *BufEnd = CurMB->getBufferEnd();
+ while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
+ ++LineEnd;
+ LineStr = std::string(LineStart, LineEnd);
+
+ // Convert any ranges to column ranges that only intersect the line of the
+ // location.
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+ SMRange R = Ranges[i];
+ if (!R.isValid()) continue;
+
+ // If the line doesn't contain any part of the range, then ignore it.
+ if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
+ continue;
+
+ // Ignore pieces of the range that go onto other lines.
+ if (R.Start.getPointer() < LineStart)
+ R.Start = SMLoc::getFromPointer(LineStart);
+ if (R.End.getPointer() > LineEnd)
+ R.End = SMLoc::getFromPointer(LineEnd);
+
+ // Translate from SMLoc ranges to column ranges.
+ // FIXME: Handle multibyte characters.
+ ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
+ R.End.getPointer()-LineStart));
+ }
+
+ LineAndCol = getLineAndColumn(Loc, CurBuf);
+ }
+
+ return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
+ LineAndCol.second-1, Kind, Msg.str(),
+ LineStr, ColRanges, FixIts);
+}
+
+void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg, ArrayRef<SMRange> Ranges,
+ ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
+ SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts);
+
+ // Report the message with the diagnostic handler if present.
+ if (DiagHandler) {
+ DiagHandler(Diagnostic, DiagContext);
+ return;
+ }
+
+ raw_ostream &OS = errs();
+
+ if (Loc != SMLoc()) {
+ int CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+ }
+
+ Diagnostic.print(0, OS, ShowColors);
+}
+
+//===----------------------------------------------------------------------===//
+// SMDiagnostic Implementation
+//===----------------------------------------------------------------------===//
+
+SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
+ int Line, int Col, SourceMgr::DiagKind Kind,
+ StringRef Msg, StringRef LineStr,
+ ArrayRef<std::pair<unsigned,unsigned> > Ranges,
+ ArrayRef<SMFixIt> Hints)
+ : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
+ Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
+ FixIts(Hints.begin(), Hints.end()) {
+ std::sort(FixIts.begin(), FixIts.end());
+}
+
+static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
+ ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
+ if (FixIts.empty())
+ return;
+
+ const char *LineStart = SourceLine.begin();
+ const char *LineEnd = SourceLine.end();
+
+ size_t PrevHintEndCol = 0;
+
+ for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
+ I != E; ++I) {
+ // If the fixit contains a newline or tab, ignore it.
+ if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
+ continue;
+
+ SMRange R = I->getRange();
+
+ // If the line doesn't contain any part of the range, then ignore it.
+ if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
+ continue;
+
+ // Translate from SMLoc to column.
+ // Ignore pieces of the range that go onto other lines.
+ // FIXME: Handle multibyte characters in the source line.
+ unsigned FirstCol;
+ if (R.Start.getPointer() < LineStart)
+ FirstCol = 0;
+ else
+ FirstCol = R.Start.getPointer() - LineStart;
+
+ // If we inserted a long previous hint, push this one forwards, and add
+ // an extra space to show that this is not part of the previous
+ // completion. This is sort of the best we can do when two hints appear
+ // to overlap.
+ //
+ // Note that if this hint is located immediately after the previous
+ // hint, no space will be added, since the location is more important.
+ unsigned HintCol = FirstCol;
+ if (HintCol < PrevHintEndCol)
+ HintCol = PrevHintEndCol + 1;
+
+ // FIXME: This assertion is intended to catch unintended use of multibyte
+ // characters in fixits. If we decide to do this, we'll have to track
+ // separate byte widths for the source and fixit lines.
+ assert((size_t)llvm::sys::locale::columnWidth(I->getText()) ==
+ I->getText().size());
+
+ // This relies on one byte per column in our fixit hints.
+ unsigned LastColumnModified = HintCol + I->getText().size();
+ if (LastColumnModified > FixItLine.size())
+ FixItLine.resize(LastColumnModified, ' ');
+
+ std::copy(I->getText().begin(), I->getText().end(),
+ FixItLine.begin() + HintCol);
+
+ PrevHintEndCol = LastColumnModified;
+
+ // For replacements, mark the removal range with '~'.
+ // FIXME: Handle multibyte characters in the source line.
+ unsigned LastCol;
+ if (R.End.getPointer() >= LineEnd)
+ LastCol = LineEnd - LineStart;
+ else
+ LastCol = R.End.getPointer() - LineStart;
+
+ std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
+ }
+}
+
+static void printSourceLine(raw_ostream &S, StringRef LineContents) {
+ // Print out the source line one character at a time, so we can expand tabs.
+ for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
+ if (LineContents[i] != '\t') {
+ S << LineContents[i];
+ ++OutCol;
+ continue;
+ }
+
+ // If we have a tab, emit at least one space, then round up to 8 columns.
+ do {
+ S << ' ';
+ ++OutCol;
+ } while ((OutCol % TabStop) != 0);
+ }
+ S << '\n';
+}
+
+static bool isNonASCII(char c) {
+ return c & 0x80;
+}
+
+void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
+ bool ShowColors) const {
+ // Display colors only if OS supports colors.
+ ShowColors &= S.has_colors();
+
+ if (ShowColors)
+ S.changeColor(raw_ostream::SAVEDCOLOR, true);
+
+ if (ProgName && ProgName[0])
+ S << ProgName << ": ";
+
+ if (!Filename.empty()) {
+ if (Filename == "-")
+ S << "<stdin>";
+ else
+ S << Filename;
+
+ if (LineNo != -1) {
+ S << ':' << LineNo;
+ if (ColumnNo != -1)
+ S << ':' << (ColumnNo+1);
+ }
+ S << ": ";
+ }
+
+ switch (Kind) {
+ case SourceMgr::DK_Error:
+ if (ShowColors)
+ S.changeColor(raw_ostream::RED, true);
+ S << "error: ";
+ break;
+ case SourceMgr::DK_Warning:
+ if (ShowColors)
+ S.changeColor(raw_ostream::MAGENTA, true);
+ S << "warning: ";
+ break;
+ case SourceMgr::DK_Note:
+ if (ShowColors)
+ S.changeColor(raw_ostream::BLACK, true);
+ S << "note: ";
+ break;
+ }
+
+ if (ShowColors) {
+ S.resetColor();
+ S.changeColor(raw_ostream::SAVEDCOLOR, true);
+ }
+
+ S << Message << '\n';
+
+ if (ShowColors)
+ S.resetColor();
+
+ if (LineNo == -1 || ColumnNo == -1)
+ return;
+
+ // FIXME: If there are multibyte or multi-column characters in the source, all
+ // our ranges will be wrong. To do this properly, we'll need a byte-to-column
+ // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
+ // expanding them later, and bail out rather than show incorrect ranges and
+ // misaligned fixits for any other odd characters.
+ if (std::find_if(LineContents.begin(), LineContents.end(), isNonASCII) !=
+ LineContents.end()) {
+ printSourceLine(S, LineContents);
+ return;
+ }
+ size_t NumColumns = LineContents.size();
+
+ // Build the line with the caret and ranges.
+ std::string CaretLine(NumColumns+1, ' ');
+
+ // Expand any ranges.
+ for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
+ std::pair<unsigned, unsigned> R = Ranges[r];
+ std::fill(&CaretLine[R.first],
+ &CaretLine[std::min((size_t)R.second, CaretLine.size())],
+ '~');
+ }
+
+ // Add any fix-its.
+ // FIXME: Find the beginning of the line properly for multibyte characters.
+ std::string FixItInsertionLine;
+ buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
+ makeArrayRef(Loc.getPointer() - ColumnNo,
+ LineContents.size()));
+
+ // Finally, plop on the caret.
+ if (unsigned(ColumnNo) <= NumColumns)
+ CaretLine[ColumnNo] = '^';
+ else
+ CaretLine[NumColumns] = '^';
+
+ // ... and remove trailing whitespace so the output doesn't wrap for it. We
+ // know that the line isn't completely empty because it has the caret in it at
+ // least.
+ CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
+
+ printSourceLine(S, LineContents);
+
+ if (ShowColors)
+ S.changeColor(raw_ostream::GREEN, true);
+
+ // Print out the caret line, matching tabs in the source line.
+ for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
+ if (i >= LineContents.size() || LineContents[i] != '\t') {
+ S << CaretLine[i];
+ ++OutCol;
+ continue;
+ }
+
+ // Okay, we have a tab. Insert the appropriate number of characters.
+ do {
+ S << CaretLine[i];
+ ++OutCol;
+ } while ((OutCol % TabStop) != 0);
+ }
+ S << '\n';
+
+ if (ShowColors)
+ S.resetColor();
+
+ // Print out the replacement line, matching tabs in the source line.
+ if (FixItInsertionLine.empty())
+ return;
+
+ for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) {
+ if (i >= LineContents.size() || LineContents[i] != '\t') {
+ S << FixItInsertionLine[i];
+ ++OutCol;
+ continue;
+ }
+
+ // Okay, we have a tab. Insert the appropriate number of characters.
+ do {
+ S << FixItInsertionLine[i];
+ // FIXME: This is trying not to break up replacements, but then to re-sync
+ // with the tabs between replacements. This will fail, though, if two
+ // fix-it replacements are exactly adjacent, or if a fix-it contains a
+ // space. Really we should be precomputing column widths, which we'll
+ // need anyway for multibyte chars.
+ if (FixItInsertionLine[i] != ' ')
+ ++i;
+ ++OutCol;
+ } while (((OutCol % TabStop) != 0) && i != e);
+ }
+ S << '\n';
+}
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
new file mode 100644
index 000000000000..9c28176b730e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -0,0 +1,170 @@
+//===-- Statistic.cpp - Easy way to expose stats information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the 'Statistic' class, which is designed to be an easy
+// way to expose various success metrics from passes. These statistics are
+// printed at the end of a run, when the -stats command line option is enabled
+// on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstEliminated("GCSE", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstEliminated;
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+/// -stats - Command line option to cause transformations to emit stats about
+/// what they did.
+///
+static cl::opt<bool>
+Enabled(
+ "stats",
+ cl::desc("Enable statistics output from program (available with Asserts)"));
+
+
+namespace {
+/// StatisticInfo - This class is used in a ManagedStatic so that it is created
+/// on demand (when the first statistic is bumped) and destroyed only when
+/// llvm_shutdown is called. We print statistics from the destructor.
+class StatisticInfo {
+ std::vector<const Statistic*> Stats;
+ friend void llvm::PrintStatistics();
+ friend void llvm::PrintStatistics(raw_ostream &OS);
+public:
+ ~StatisticInfo();
+
+ void addStatistic(const Statistic *S) {
+ Stats.push_back(S);
+ }
+};
+}
+
+static ManagedStatic<StatisticInfo> StatInfo;
+static ManagedStatic<sys::SmartMutex<true> > StatLock;
+
+/// RegisterStatistic - The first time a statistic is bumped, this method is
+/// called.
+void Statistic::RegisterStatistic() {
+ // If stats are enabled, inform StatInfo that this statistic should be
+ // printed.
+ sys::SmartScopedLock<true> Writer(*StatLock);
+ if (!Initialized) {
+ if (Enabled)
+ StatInfo->addStatistic(this);
+
+ TsanHappensBefore(this);
+ sys::MemoryFence();
+ // Remember we have been registered.
+ TsanIgnoreWritesBegin();
+ Initialized = true;
+ TsanIgnoreWritesEnd();
+ }
+}
+
+namespace {
+
+struct NameCompare {
+ bool operator()(const Statistic *LHS, const Statistic *RHS) const {
+ int Cmp = std::strcmp(LHS->getName(), RHS->getName());
+ if (Cmp != 0) return Cmp < 0;
+
+ // Secondary key is the description.
+ return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+ }
+};
+
+}
+
+// Print information when destroyed, iff command line option is specified.
+StatisticInfo::~StatisticInfo() {
+ llvm::PrintStatistics();
+}
+
+void llvm::EnableStatistics() {
+ Enabled.setValue(true);
+}
+
+bool llvm::AreStatisticsEnabled() {
+ return Enabled;
+}
+
+void llvm::PrintStatistics(raw_ostream &OS) {
+ StatisticInfo &Stats = *StatInfo;
+
+ // Figure out how long the biggest Value and Name fields are.
+ unsigned MaxNameLen = 0, MaxValLen = 0;
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+ MaxValLen = std::max(MaxValLen,
+ (unsigned)utostr(Stats.Stats[i]->getValue()).size());
+ MaxNameLen = std::max(MaxNameLen,
+ (unsigned)std::strlen(Stats.Stats[i]->getName()));
+ }
+
+ // Sort the fields by name.
+ std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
+
+ // Print out the statistics header...
+ OS << "===" << std::string(73, '-') << "===\n"
+ << " ... Statistics Collected ...\n"
+ << "===" << std::string(73, '-') << "===\n\n";
+
+ // Print all of the statistics.
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i)
+ OS << format("%*u %-*s - %s\n",
+ MaxValLen, Stats.Stats[i]->getValue(),
+ MaxNameLen, Stats.Stats[i]->getName(),
+ Stats.Stats[i]->getDesc());
+
+ OS << '\n'; // Flush the output stream.
+ OS.flush();
+
+}
+
+void llvm::PrintStatistics() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ StatisticInfo &Stats = *StatInfo;
+
+ // Statistics not enabled?
+ if (Stats.Stats.empty()) return;
+
+ // Get the stream to write to.
+ raw_ostream &OutStream = *CreateInfoOutputFile();
+ PrintStatistics(OutStream);
+ delete &OutStream; // Close the file.
+#else
+ // Check if the -stats option is set instead of checking
+ // !Stats.Stats.empty(). In release builds, Statistics operators
+ // do nothing, so stats are never Registered.
+ if (Enabled) {
+ // Get the stream to write to.
+ raw_ostream &OutStream = *CreateInfoOutputFile();
+ OutStream << "Statistics are disabled. "
+ << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
+ OutStream.flush();
+ delete &OutStream; // Close the file.
+ }
+#endif
+}
diff --git a/contrib/llvm/lib/Support/StreamableMemoryObject.cpp b/contrib/llvm/lib/Support/StreamableMemoryObject.cpp
new file mode 100644
index 000000000000..59e27a263e06
--- /dev/null
+++ b/contrib/llvm/lib/Support/StreamableMemoryObject.cpp
@@ -0,0 +1,146 @@
+//===- StreamableMemoryObject.cpp - Streamable data interface -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StreamableMemoryObject.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstring>
+
+
+using namespace llvm;
+
+namespace {
+
+class RawMemoryObject : public StreamableMemoryObject {
+public:
+ RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
+ FirstChar(Start), LastChar(End) {
+ assert(LastChar >= FirstChar && "Invalid start/end range");
+ }
+
+ virtual uint64_t getBase() const LLVM_OVERRIDE { return 0; }
+ virtual uint64_t getExtent() const LLVM_OVERRIDE {
+ return LastChar - FirstChar;
+ }
+ virtual int readByte(uint64_t address, uint8_t* ptr) const LLVM_OVERRIDE;
+ virtual int readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const LLVM_OVERRIDE;
+ virtual const uint8_t *getPointer(uint64_t address,
+ uint64_t size) const LLVM_OVERRIDE;
+ virtual bool isValidAddress(uint64_t address) const LLVM_OVERRIDE {
+ return validAddress(address);
+ }
+ virtual bool isObjectEnd(uint64_t address) const LLVM_OVERRIDE {
+ return objectEnd(address);
+ }
+
+private:
+ const uint8_t* const FirstChar;
+ const uint8_t* const LastChar;
+
+ // These are implemented as inline functions here to avoid multiple virtual
+ // calls per public function
+ bool validAddress(uint64_t address) const {
+ return static_cast<ptrdiff_t>(address) < LastChar - FirstChar;
+ }
+ bool objectEnd(uint64_t address) const {
+ return static_cast<ptrdiff_t>(address) == LastChar - FirstChar;
+ }
+
+ RawMemoryObject(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
+ void operator=(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
+};
+
+int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+ if (!validAddress(address)) return -1;
+ *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
+ return 0;
+}
+
+int RawMemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
+ memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
+ if (copied) *copied = size;
+ return size;
+}
+
+const uint8_t *RawMemoryObject::getPointer(uint64_t address,
+ uint64_t size) const {
+ return FirstChar + address;
+}
+} // anonymous namespace
+
+namespace llvm {
+// If the bitcode has a header, then its size is known, and we don't have to
+// block until we actually want to read it.
+bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
+ if (ObjectSize && address < ObjectSize) return true;
+ return fetchToPos(address);
+}
+
+bool StreamingMemoryObject::isObjectEnd(uint64_t address) const {
+ if (ObjectSize) return address == ObjectSize;
+ fetchToPos(address);
+ return address == ObjectSize && address != 0;
+}
+
+uint64_t StreamingMemoryObject::getExtent() const {
+ if (ObjectSize) return ObjectSize;
+ size_t pos = BytesRead + kChunkSize;
+ // keep fetching until we run out of bytes
+ while (fetchToPos(pos)) pos += kChunkSize;
+ return ObjectSize;
+}
+
+int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+ if (!fetchToPos(address)) return -1;
+ *ptr = Bytes[address + BytesSkipped];
+ return 0;
+}
+
+int StreamingMemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ if (!fetchToPos(address + size - 1)) return -1;
+ memcpy(buf, &Bytes[address + BytesSkipped], size);
+ if (copied) *copied = size;
+ return 0;
+}
+
+bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
+ if (BytesRead < s) return true;
+ BytesSkipped = s;
+ BytesRead -= s;
+ return false;
+}
+
+void StreamingMemoryObject::setKnownObjectSize(size_t size) {
+ ObjectSize = size;
+ Bytes.reserve(size);
+}
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+ const unsigned char *Start, const unsigned char *End) {
+ return new RawMemoryObject(Start, End);
+}
+
+StreamableMemoryObject::~StreamableMemoryObject() { }
+
+StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
+ Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
+ ObjectSize(0), EOFReached(false) {
+ BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+}
+}
diff --git a/contrib/llvm/lib/Support/StringExtras.cpp b/contrib/llvm/lib/Support/StringExtras.cpp
new file mode 100644
index 000000000000..d77ad7f55a18
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringExtras.cpp
@@ -0,0 +1,59 @@
+//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+/// StrInStrNoCase - Portable version of strcasestr. Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case. Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
+ size_t N = s2.size(), M = s1.size();
+ if (N > M)
+ return StringRef::npos;
+ for (size_t i = 0, e = M - N + 1; i != e; ++i)
+ if (s1.substr(i, N).equals_lower(s2))
+ return i;
+ return StringRef::npos;
+}
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string. If
+/// there are no tokens in the source string, an empty string is returned.
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+ StringRef Delimiters) {
+ // Figure out where the token starts.
+ StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+
+ // Find the next occurrence of the delimiter.
+ StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+
+ return std::make_pair(Source.slice(Start, End), Source.substr(End));
+}
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void llvm::SplitString(StringRef Source,
+ SmallVectorImpl<StringRef> &OutFragments,
+ StringRef Delimiters) {
+ std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
+ while (!S.first.empty()) {
+ OutFragments.push_back(S.first);
+ S = getToken(S.second, Delimiters);
+ }
+}
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
new file mode 100644
index 000000000000..9ac1f867fdd4
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -0,0 +1,238 @@
+//===--- StringMap.cpp - String Hash table map implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+using namespace llvm;
+
+StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
+ ItemSize = itemSize;
+
+ // If a size is specified, initialize the table with that many buckets.
+ if (InitSize) {
+ init(InitSize);
+ return;
+ }
+
+ // Otherwise, initialize it with zero buckets to avoid the allocation.
+ TheTable = 0;
+ NumBuckets = 0;
+ NumItems = 0;
+ NumTombstones = 0;
+}
+
+void StringMapImpl::init(unsigned InitSize) {
+ assert((InitSize & (InitSize-1)) == 0 &&
+ "Init Size must be a power of 2 or zero!");
+ NumBuckets = InitSize ? InitSize : 16;
+ NumItems = 0;
+ NumTombstones = 0;
+
+ TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
+ sizeof(StringMapEntryBase **) +
+ sizeof(unsigned));
+
+ // Allocate one extra bucket, set it to look filled so the iterators stop at
+ // end.
+ TheTable[NumBuckets] = (StringMapEntryBase*)2;
+}
+
+
+/// LookupBucketFor - Look up the bucket that the specified string should end
+/// up in. If it already exists as a key in the map, the Item pointer for the
+/// specified bucket will be non-null. Otherwise, it will be null. In either
+/// case, the FullHashValue field of the bucket will be set to the hash value
+/// of the string.
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) { // Hash table unallocated so far?
+ init(16);
+ HTSize = NumBuckets;
+ }
+ unsigned FullHashValue = HashString(Name);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
+ unsigned ProbeAmt = 1;
+ int FirstTombstone = -1;
+ while (1) {
+ StringMapEntryBase *BucketItem = TheTable[BucketNo];
+ // If we found an empty bucket, this key isn't in the table yet, return it.
+ if (LLVM_LIKELY(BucketItem == 0)) {
+ // If we found a tombstone, we want to reuse the tombstone instead of an
+ // empty bucket. This reduces probing.
+ if (FirstTombstone != -1) {
+ HashTable[FirstTombstone] = FullHashValue;
+ return FirstTombstone;
+ }
+
+ HashTable[BucketNo] = FullHashValue;
+ return BucketNo;
+ }
+
+ if (BucketItem == getTombstoneVal()) {
+ // Skip over tombstones. However, remember the first one we see.
+ if (FirstTombstone == -1) FirstTombstone = BucketNo;
+ } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because Name isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+
+/// FindKey - Look up the bucket that contains the specified key. If it exists
+/// in the map, return the bucket number of the key. Otherwise return -1.
+/// This does not modify the map.
+int StringMapImpl::FindKey(StringRef Key) const {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) return -1; // Really empty table?
+ unsigned FullHashValue = HashString(Key);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
+ unsigned ProbeAmt = 1;
+ while (1) {
+ StringMapEntryBase *BucketItem = TheTable[BucketNo];
+ // If we found an empty bucket, this key isn't in the table yet, return.
+ if (LLVM_LIKELY(BucketItem == 0))
+ return -1;
+
+ if (BucketItem == getTombstoneVal()) {
+ // Ignore tombstones.
+ } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because NameStart isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+/// delete it. This aborts if the value isn't in the table.
+void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
+ const char *VStr = (char*)V + ItemSize;
+ StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
+ (void)V2;
+ assert(V == V2 && "Didn't find key?");
+}
+
+/// RemoveKey - Remove the StringMapEntry for the specified key from the
+/// table, returning it. If the key is not in the table, this returns null.
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
+ int Bucket = FindKey(Key);
+ if (Bucket == -1) return 0;
+
+ StringMapEntryBase *Result = TheTable[Bucket];
+ TheTable[Bucket] = getTombstoneVal();
+ --NumItems;
+ ++NumTombstones;
+ assert(NumItems + NumTombstones <= NumBuckets);
+
+ return Result;
+}
+
+
+
+/// RehashTable - Grow the table, redistributing values into the buckets with
+/// the appropriate mod-of-hashtable-size.
+void StringMapImpl::RehashTable() {
+ unsigned NewSize;
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
+ // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow/rehash the table.
+ if (NumItems*4 > NumBuckets*3) {
+ NewSize = NumBuckets*2;
+ } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
+ NewSize = NumBuckets;
+ } else {
+ return;
+ }
+
+ // Allocate one extra bucket which will always be non-empty. This allows the
+ // iterators to stop at end.
+ StringMapEntryBase **NewTableArray =
+ (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
+ sizeof(unsigned));
+ unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
+ NewTableArray[NewSize] = (StringMapEntryBase*)2;
+
+ // Rehash all the items into their new buckets. Luckily :) we already have
+ // the hash values available, so we don't have to rehash any strings.
+ for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+ StringMapEntryBase *Bucket = TheTable[I];
+ if (Bucket && Bucket != getTombstoneVal()) {
+ // Fast case, bucket available.
+ unsigned FullHash = HashTable[I];
+ unsigned NewBucket = FullHash & (NewSize-1);
+ if (NewTableArray[NewBucket] == 0) {
+ NewTableArray[FullHash & (NewSize-1)] = Bucket;
+ NewHashArray[FullHash & (NewSize-1)] = FullHash;
+ continue;
+ }
+
+ // Otherwise probe for a spot.
+ unsigned ProbeSize = 1;
+ do {
+ NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
+ } while (NewTableArray[NewBucket]);
+
+ // Finally found a slot. Fill it in.
+ NewTableArray[NewBucket] = Bucket;
+ NewHashArray[NewBucket] = FullHash;
+ }
+ }
+
+ free(TheTable);
+
+ TheTable = NewTableArray;
+ NumBuckets = NewSize;
+ NumTombstones = 0;
+}
diff --git a/contrib/llvm/lib/Support/StringPool.cpp b/contrib/llvm/lib/Support/StringPool.cpp
new file mode 100644
index 000000000000..ff607cf8c4ad
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringPool.cpp
@@ -0,0 +1,35 @@
+//===-- StringPool.cpp - Interned string pool -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringPool class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+StringPool::StringPool() {}
+
+StringPool::~StringPool() {
+ assert(InternTable.empty() && "PooledStringPtr leaked!");
+}
+
+PooledStringPtr StringPool::intern(StringRef Key) {
+ table_t::iterator I = InternTable.find(Key);
+ if (I != InternTable.end())
+ return PooledStringPtr(&*I);
+
+ entry_t *S = entry_t::Create(Key.begin(), Key.end());
+ S->getValue().Pool = this;
+ InternTable.insert(S);
+
+ return PooledStringPtr(S);
+}
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
new file mode 100644
index 000000000000..d7a0bfa41005
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -0,0 +1,467 @@
+//===-- StringRef.cpp - Lightweight String References ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/edit_distance.h"
+#include <bitset>
+
+using namespace llvm;
+
+// MSVC emits references to this into the translation units which reference it.
+#ifndef _MSC_VER
+const size_t StringRef::npos;
+#endif
+
+static char ascii_tolower(char x) {
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+ return x;
+}
+
+static char ascii_toupper(char x) {
+ if (x >= 'a' && x <= 'z')
+ return x - 'a' + 'A';
+ return x;
+}
+
+static bool ascii_isdigit(char x) {
+ return x >= '0' && x <= '9';
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+ for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ unsigned char LHC = ascii_tolower(Data[I]);
+ unsigned char RHC = ascii_tolower(RHS.Data[I]);
+ if (LHC != RHC)
+ return LHC < RHC ? -1 : 1;
+ }
+
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+ for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ // Check for sequences of digits.
+ if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+ // The longer sequence of numbers is considered larger.
+ // This doesn't really handle prefixed zeros well.
+ size_t J;
+ for (J = I + 1; J != E + 1; ++J) {
+ bool ld = J < Length && ascii_isdigit(Data[J]);
+ bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+ if (ld != rd)
+ return rd ? -1 : 1;
+ if (!rd)
+ break;
+ }
+ // The two number sequences have the same length (J-I), just memcmp them.
+ if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
+ return Res < 0 ? -1 : 1;
+ // Identical number sequences, continue search after the numbers.
+ I = J - 1;
+ continue;
+ }
+ if (Data[I] != RHS.Data[I])
+ return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+ }
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
+// Compute the edit distance between the two given strings.
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+ bool AllowReplacements,
+ unsigned MaxEditDistance) {
+ return llvm::ComputeEditDistance(
+ llvm::ArrayRef<char>(data(), size()),
+ llvm::ArrayRef<char>(Other.data(), Other.size()),
+ AllowReplacements, MaxEditDistance);
+}
+
+//===----------------------------------------------------------------------===//
+// String Operations
+//===----------------------------------------------------------------------===//
+
+std::string StringRef::lower() const {
+ std::string Result(size(), char());
+ for (size_type i = 0, e = size(); i != e; ++i) {
+ Result[i] = ascii_tolower(Data[i]);
+ }
+ return Result;
+}
+
+std::string StringRef::upper() const {
+ std::string Result(size(), char());
+ for (size_type i = 0, e = size(); i != e; ++i) {
+ Result[i] = ascii_toupper(Data[i]);
+ }
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// String Searching
+//===----------------------------------------------------------------------===//
+
+
+/// find - Search for the first string \arg Str in the string.
+///
+/// \return - The index of the first occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::find(StringRef Str, size_t From) const {
+ size_t N = Str.size();
+ if (N > Length)
+ return npos;
+
+ // For short haystacks or unsupported needles fall back to the naive algorithm
+ if (Length < 16 || N > 255 || N == 0) {
+ for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+ if (substr(i, N).equals(Str))
+ return i;
+ return npos;
+ }
+
+ if (From >= Length)
+ return npos;
+
+ // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
+ uint8_t BadCharSkip[256];
+ std::memset(BadCharSkip, N, 256);
+ for (unsigned i = 0; i != N-1; ++i)
+ BadCharSkip[(uint8_t)Str[i]] = N-1-i;
+
+ unsigned Len = Length-From, Pos = From;
+ while (Len >= N) {
+ if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
+ return Pos;
+
+ // Otherwise skip the appropriate number of bytes.
+ uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
+ Len -= Skip;
+ Pos += Skip;
+ }
+
+ return npos;
+}
+
+/// rfind - Search for the last string \arg Str in the string.
+///
+/// \return - The index of the last occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::rfind(StringRef Str) const {
+ size_t N = Str.size();
+ if (N > Length)
+ return npos;
+ for (size_t i = Length - N + 1, e = 0; i != e;) {
+ --i;
+ if (substr(i, N).equals(Str))
+ return i;
+ }
+ return npos;
+}
+
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (Data[i] != C)
+ return i;
+ return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (!CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ if (CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+/// find_last_not_of - Find the last character in the string that is not
+/// \arg C, or npos if not found.
+StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
+ for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ if (Data[i] != C)
+ return i;
+ return npos;
+}
+
+/// find_last_not_of - Find the last character in the string that is not in
+/// \arg Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0, e = Chars.size(); i != e; ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ if (!CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+void StringRef::split(SmallVectorImpl<StringRef> &A,
+ StringRef Separators, int MaxSplit,
+ bool KeepEmpty) const {
+ StringRef rest = *this;
+
+ // rest.data() is used to distinguish cases like "a," that splits into
+ // "a" + "" and "a" that splits into "a" + 0.
+ for (int splits = 0;
+ rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+ ++splits) {
+ std::pair<StringRef, StringRef> p = rest.split(Separators);
+
+ if (KeepEmpty || p.first.size() != 0)
+ A.push_back(p.first);
+ rest = p.second;
+ }
+ // If we have a tail left, add it.
+ if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+ A.push_back(rest);
+}
+
+//===----------------------------------------------------------------------===//
+// Helpful Algorithms
+//===----------------------------------------------------------------------===//
+
+/// count - Return the number of non-overlapped occurrences of \arg Str in
+/// the string.
+size_t StringRef::count(StringRef Str) const {
+ size_t Count = 0;
+ size_t N = Str.size();
+ if (N > Length)
+ return 0;
+ for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+ if (substr(i, N).equals(Str))
+ ++Count;
+ return Count;
+}
+
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+ if (Str.startswith("0x")) {
+ Str = Str.substr(2);
+ return 16;
+ }
+
+ if (Str.startswith("0b")) {
+ Str = Str.substr(2);
+ return 2;
+ }
+
+ if (Str.startswith("0o")) {
+ Str = Str.substr(2);
+ return 8;
+ }
+
+ if (Str.startswith("0"))
+ return 8;
+
+ return 10;
+}
+
+
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+ unsigned long long &Result) {
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Parse all the bytes of the string given this radix. Watch for overflow.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ unsigned long long PrevResult = Result;
+ Result = Result*Radix+CharVal;
+
+ // Check for overflow by shifting back and seeing if bits were lost.
+ if (Result/Radix < PrevResult)
+ return true;
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
+
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+ long long &Result) {
+ unsigned long long ULLVal;
+
+ // Handle positive strings first.
+ if (Str.empty() || Str.front() != '-') {
+ if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
+ // Check for value so large it overflows a signed value.
+ (long long)ULLVal < 0)
+ return true;
+ Result = ULLVal;
+ return false;
+ }
+
+ // Get the positive part of the value.
+ if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
+ // Reject values so large they'd overflow as negative signed, but allow
+ // "-0". This negates the unsigned so that the negative isn't undefined
+ // on signed overflow.
+ (long long)-ULLVal > 0)
+ return true;
+
+ Result = -ULLVal;
+ return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+ StringRef Str = *this;
+
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ assert(Radix > 1 && Radix <= 36);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Skip leading zeroes. This can be a significant improvement if
+ // it means we don't need > 64 bits.
+ while (!Str.empty() && Str.front() == '0')
+ Str = Str.substr(1);
+
+ // If it was nothing but zeroes....
+ if (Str.empty()) {
+ Result = APInt(64, 0);
+ return false;
+ }
+
+ // (Over-)estimate the required number of bits.
+ unsigned Log2Radix = 0;
+ while ((1U << Log2Radix) < Radix) Log2Radix++;
+ bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+ unsigned BitWidth = Log2Radix * Str.size();
+ if (BitWidth < Result.getBitWidth())
+ BitWidth = Result.getBitWidth(); // don't shrink the result
+ else if (BitWidth > Result.getBitWidth())
+ Result = Result.zext(BitWidth);
+
+ APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+ if (!IsPowerOf2Radix) {
+ // These must have the same bit-width as Result.
+ RadixAP = APInt(BitWidth, Radix);
+ CharAP = APInt(BitWidth, 0);
+ }
+
+ // Parse all the bytes of the string given this radix.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ if (IsPowerOf2Radix) {
+ Result <<= Log2Radix;
+ Result |= CharVal;
+ } else {
+ Result *= RadixAP;
+ CharAP = CharVal;
+ Result += CharAP;
+ }
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
+
+
+// Implementation of StringRef hashing.
+hash_code llvm::hash_value(StringRef S) {
+ return hash_combine_range(S.begin(), S.end());
+}
diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp
new file mode 100644
index 000000000000..54b5e97bfe18
--- /dev/null
+++ b/contrib/llvm/lib/Support/SystemUtils.cpp
@@ -0,0 +1,55 @@
+//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
+ bool print_warning) {
+ if (stream_to_check.is_displayed()) {
+ if (print_warning) {
+ errs() << "WARNING: You're attempting to print out a bitcode file.\n"
+ "This is inadvisable as it may cause display problems. If\n"
+ "you REALLY want to taste LLVM bitcode first-hand, you\n"
+ "can force output with the `-f' option.\n\n";
+ }
+ return true;
+ }
+ return false;
+}
+
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
+/// @brief Find a named executable.
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+ const char *Argv0, void *MainAddr) {
+ // Check the directory that the calling program is in. We can do
+ // this if ProgramPath contains at least one / character, indicating that it
+ // is a relative path to the executable itself.
+ sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
+ Result.eraseComponent();
+
+ if (!Result.isEmpty()) {
+ Result.appendComponent(ExeName);
+ Result.appendSuffix(sys::Path::GetEXESuffix());
+ }
+
+ return Result;
+}
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
new file mode 100644
index 000000000000..9c81327615c6
--- /dev/null
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -0,0 +1,163 @@
+//===--- TargetRegistry.cpp - Target registration -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <vector>
+using namespace llvm;
+
+// Clients are responsible for avoid race conditions in registration.
+static Target *FirstTarget = 0;
+
+TargetRegistry::iterator TargetRegistry::begin() {
+ return iterator(FirstTarget);
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
+ Triple &TheTriple,
+ std::string &Error) {
+ // Allocate target machine. First, check whether the user has explicitly
+ // specified an architecture to compile for. If so we have to look it up by
+ // name, because it might be a backend that has no mapping to a target triple.
+ const Target *TheTarget = 0;
+ if (!ArchName.empty()) {
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ if (ArchName == it->getName()) {
+ TheTarget = &*it;
+ break;
+ }
+ }
+
+ if (!TheTarget) {
+ Error = "error: invalid target '" + ArchName + "'.\n";
+ return 0;
+ }
+
+ // Adjust the triple to match (if known), otherwise stick with the
+ // given triple.
+ Triple::ArchType Type = Triple::getArchTypeForLLVMName(ArchName);
+ if (Type != Triple::UnknownArch)
+ TheTriple.setArch(Type);
+ } else {
+ // Get the target specific parser.
+ std::string TempError;
+ TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError);
+ if (TheTarget == 0) {
+ Error = ": error: unable to get target for '"
+ + TheTriple.getTriple()
+ + "', see --version and --triple.\n";
+ return 0;
+ }
+ }
+
+ return TheTarget;
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &TT,
+ std::string &Error) {
+ // Provide special warning when no targets are initialized.
+ if (begin() == end()) {
+ Error = "Unable to find target for this triple (no targets are registered)";
+ return 0;
+ }
+ const Target *Best = 0, *EquallyBest = 0;
+ unsigned BestQuality = 0;
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (unsigned Qual = it->TripleMatchQualityFn(TT)) {
+ if (!Best || Qual > BestQuality) {
+ Best = &*it;
+ EquallyBest = 0;
+ BestQuality = Qual;
+ } else if (Qual == BestQuality)
+ EquallyBest = &*it;
+ }
+ }
+
+ if (!Best) {
+ Error = "No available targets are compatible with this triple, "
+ "see -version for the available targets.";
+ return 0;
+ }
+
+ // Otherwise, take the best target, but make sure we don't have two equally
+ // good best targets.
+ if (EquallyBest) {
+ Error = std::string("Cannot choose between targets \"") +
+ Best->Name + "\" and \"" + EquallyBest->Name + "\"";
+ return 0;
+ }
+
+ return Best;
+}
+
+void TargetRegistry::RegisterTarget(Target &T,
+ const char *Name,
+ const char *ShortDesc,
+ Target::TripleMatchQualityFnTy TQualityFn,
+ bool HasJIT) {
+ assert(Name && ShortDesc && TQualityFn &&
+ "Missing required target information!");
+
+ // Check if this target has already been initialized, we allow this as a
+ // convenience to some clients.
+ if (T.Name)
+ return;
+
+ // Add to the list of targets.
+ T.Next = FirstTarget;
+ FirstTarget = &T;
+
+ T.Name = Name;
+ T.ShortDesc = ShortDesc;
+ T.TripleMatchQualityFn = TQualityFn;
+ T.HasJIT = HasJIT;
+}
+
+const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
+ const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error);
+
+ if (TheTarget && !TheTarget->hasJIT()) {
+ Error = "No JIT compatible target available for this host";
+ return 0;
+ }
+
+ return TheTarget;
+}
+
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+ typedef std::pair<StringRef, const Target*> pair_ty;
+ return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first);
+}
+
+void TargetRegistry::printRegisteredTargetsForVersion() {
+ std::vector<std::pair<StringRef, const Target*> > Targets;
+ size_t Width = 0;
+ for (TargetRegistry::iterator I = TargetRegistry::begin(),
+ E = TargetRegistry::end();
+ I != E; ++I) {
+ Targets.push_back(std::make_pair(I->getName(), &*I));
+ Width = std::max(Width, Targets.back().first.size());
+ }
+ array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
+
+ raw_ostream &OS = outs();
+ OS << " Registered Targets:\n";
+ for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+ OS << " " << Targets[i].first;
+ OS.indent(Width - Targets[i].first.size()) << " - "
+ << Targets[i].second->getShortDescription() << '\n';
+ }
+ if (Targets.empty())
+ OS << " (none)\n";
+}
diff --git a/contrib/llvm/lib/Support/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp
new file mode 100644
index 000000000000..0587aaec7e68
--- /dev/null
+++ b/contrib/llvm/lib/Support/ThreadLocal.cpp
@@ -0,0 +1,92 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) {
+ typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1];
+ void **pd = reinterpret_cast<void**>(&data);
+ *pd = const_cast<void*>(d);
+}
+const void* ThreadLocalImpl::getInstance() {
+ void **pd = reinterpret_cast<void**>(&data);
+ return *pd;
+}
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data() {
+ typedef int SIZE_TOO_BIG[sizeof(pthread_key_t) <= sizeof(data) ? 1 : -1];
+ pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data);
+ int errorcode = pthread_key_create(key, NULL);
+ assert(errorcode == 0);
+ (void) errorcode;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+ pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data);
+ int errorcode = pthread_key_delete(*key);
+ assert(errorcode == 0);
+ (void) errorcode;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+ pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data);
+ int errorcode = pthread_setspecific(*key, d);
+ assert(errorcode == 0);
+ (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+ pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data);
+ return pthread_getspecific(*key);
+}
+
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 set in Support/ThreadLocal.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp
new file mode 100644
index 000000000000..13fba2ea2584
--- /dev/null
+++ b/contrib/llvm/lib/Support/Threading.cpp
@@ -0,0 +1,147 @@
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Threading.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#if LLVM_ENABLE_THREADS != 0
+ assert(!multithreaded_mode && "Already multithreaded!");
+ multithreaded_mode = true;
+ global_lock = new sys::Mutex(true);
+
+ // We fence here to ensure that all initialization is complete BEFORE we
+ // return from llvm_start_multithreaded().
+ sys::MemoryFence();
+ return true;
+#else
+ return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#if LLVM_ENABLE_THREADS != 0
+ assert(multithreaded_mode && "Not currently multithreaded!");
+
+ // We fence here to insure that all threaded operations are complete BEFORE we
+ // return from llvm_stop_multithreaded().
+ sys::MemoryFence();
+
+ multithreaded_mode = false;
+ delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+ return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+ if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+ if (multithreaded_mode) global_lock->release();
+}
+
+#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+ void (*UserFn)(void *);
+ void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+ ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+ TI->UserFn(TI->UserData);
+ return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ ThreadInfo Info = { Fn, UserData };
+ pthread_attr_t Attr;
+ pthread_t Thread;
+
+ // Construct the attributes object.
+ if (::pthread_attr_init(&Attr) != 0)
+ return;
+
+ // Set the requested stack size, if given.
+ if (RequestedStackSize != 0) {
+ if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+ goto error;
+ }
+
+ // Construct and execute the thread.
+ if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+ goto error;
+
+ // Wait for the thread and clean up.
+ ::pthread_join(Thread, 0);
+
+ error:
+ ::pthread_attr_destroy(&Attr);
+}
+#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#include "Windows/Windows.h"
+#include <process.h>
+
+struct ThreadInfo {
+ void (*func)(void*);
+ void *param;
+};
+
+static unsigned __stdcall ThreadCallback(void *param) {
+ struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param);
+ info->func(info->param);
+
+ return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ struct ThreadInfo param = { Fn, UserData };
+
+ HANDLE hThread = (HANDLE)::_beginthreadex(NULL,
+ RequestedStackSize, ThreadCallback,
+ &param, 0, NULL);
+
+ if (hThread) {
+ // We actually don't care whether the wait succeeds or fails, in
+ // the same way we don't care whether the pthread_join call succeeds
+ // or fails. There's not much we could do if this were to fail. But
+ // on success, this call will wait until the thread finishes executing
+ // before returning.
+ (void)::WaitForSingleObject(hThread, INFINITE);
+ ::CloseHandle(hThread);
+ }
+}
+#else
+// Support for non-Win32, non-pthread implementation.
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ (void) RequestedStackSize;
+ Fn(UserData);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
new file mode 100644
index 000000000000..bd8af174bcd0
--- /dev/null
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -0,0 +1,62 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue::SecondsType
+ TimeValue::PosixZeroTimeSeconds = -946684800;
+const TimeValue::SecondsType
+ TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
+
+const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( PosixZeroTimeSeconds,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( Win32ZeroTimeSeconds,0 );
+
+void
+TimeValue::normalize( void ) {
+ if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+ } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+ }
+
+ if (seconds_ >= 1 && nanos_ < 0) {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } else if (seconds_ < 0 && nanos_ > 0) {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/TimeValue.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
new file mode 100644
index 000000000000..896d869aa1e7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -0,0 +1,389 @@
+//===-- Timer.cpp - Interval Timing Support -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interval Timing implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
+// of constructor/destructor ordering being unspecified by C++. Basically the
+// problem is that a Statistic object gets destroyed, which ends up calling
+// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
+// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
+// would get destroyed before the Statistic, causing havoc to ensue. We "fix"
+// this by creating the string the first time it is needed and never destroying
+// it.
+static ManagedStatic<std::string> LibSupportInfoOutputFilename;
+static std::string &getLibSupportInfoOutputFilename() {
+ return *LibSupportInfoOutputFilename;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
+namespace {
+ static cl::opt<bool>
+ TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
+ "tracking (this may be slow)"),
+ cl::Hidden);
+
+ static cl::opt<std::string, true>
+ InfoOutputFilename("info-output-file", cl::value_desc("filename"),
+ cl::desc("File to append -stats and -timer output to"),
+ cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+}
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+raw_ostream *llvm::CreateInfoOutputFile() {
+ const std::string &OutputFilename = getLibSupportInfoOutputFilename();
+ if (OutputFilename.empty())
+ return new raw_fd_ostream(2, false); // stderr.
+ if (OutputFilename == "-")
+ return new raw_fd_ostream(1, false); // stdout.
+
+ // Append mode is used because the info output file is opened and closed
+ // each time -stats or -time-passes wants to print output to it. To
+ // compensate for this, the test-suite Makefiles have code to delete the
+ // info output file before running commands which write to it.
+ std::string Error;
+ raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
+ Error, raw_fd_ostream::F_Append);
+ if (Error.empty())
+ return Result;
+
+ errs() << "Error opening info-output-file '"
+ << OutputFilename << " for appending!\n";
+ delete Result;
+ return new raw_fd_ostream(2, false); // stderr.
+}
+
+
+static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *getDefaultTimerGroup() {
+ TimerGroup *tmp = DefaultTimerGroup;
+ sys::MemoryFence();
+ if (tmp) return tmp;
+
+ llvm_acquire_global_lock();
+ tmp = DefaultTimerGroup;
+ if (!tmp) {
+ tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+ sys::MemoryFence();
+ DefaultTimerGroup = tmp;
+ }
+ llvm_release_global_lock();
+
+ return tmp;
+}
+
+//===----------------------------------------------------------------------===//
+// Timer Implementation
+//===----------------------------------------------------------------------===//
+
+void Timer::init(StringRef N) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = getDefaultTimerGroup();
+ TG->addTimer(*this);
+}
+
+void Timer::init(StringRef N, TimerGroup &tg) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = &tg;
+ TG->addTimer(*this);
+}
+
+Timer::~Timer() {
+ if (!TG) return; // Never initialized, or already cleared.
+ TG->removeTimer(*this);
+}
+
+static inline size_t getMemUsage() {
+ if (!TrackSpace) return 0;
+ return sys::Process::GetMallocUsage();
+}
+
+TimeRecord TimeRecord::getCurrentTime(bool Start) {
+ TimeRecord Result;
+ sys::TimeValue now(0,0), user(0,0), sys(0,0);
+
+ if (Start) {
+ Result.MemUsed = getMemUsage();
+ sys::Process::GetTimeUsage(now, user, sys);
+ } else {
+ sys::Process::GetTimeUsage(now, user, sys);
+ Result.MemUsed = getMemUsage();
+ }
+
+ Result.WallTime = now.seconds() + now.microseconds() / 1000000.0;
+ Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+ Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0;
+ return Result;
+}
+
+static ManagedStatic<std::vector<Timer*> > ActiveTimers;
+
+void Timer::startTimer() {
+ Started = true;
+ ActiveTimers->push_back(this);
+ Time -= TimeRecord::getCurrentTime(true);
+}
+
+void Timer::stopTimer() {
+ Time += TimeRecord::getCurrentTime(false);
+
+ if (ActiveTimers->back() == this) {
+ ActiveTimers->pop_back();
+ } else {
+ std::vector<Timer*>::iterator I =
+ std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
+ assert(I != ActiveTimers->end() && "stop but no startTimer?");
+ ActiveTimers->erase(I);
+ }
+}
+
+static void printVal(double Val, double Total, raw_ostream &OS) {
+ if (Total < 1e-7) // Avoid dividing by zero.
+ OS << " ----- ";
+ else
+ OS << format(" %7.4f (%5.1f%%)", Val, Val*100/Total);
+}
+
+void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
+ if (Total.getUserTime())
+ printVal(getUserTime(), Total.getUserTime(), OS);
+ if (Total.getSystemTime())
+ printVal(getSystemTime(), Total.getSystemTime(), OS);
+ if (Total.getProcessTime())
+ printVal(getProcessTime(), Total.getProcessTime(), OS);
+ printVal(getWallTime(), Total.getWallTime(), OS);
+
+ OS << " ";
+
+ if (Total.getMemUsed())
+ OS << format("%9" PRId64 " ", (int64_t)getMemUsed());
+}
+
+
+//===----------------------------------------------------------------------===//
+// NamedRegionTimer Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+typedef StringMap<Timer> Name2TimerMap;
+
+class Name2PairMap {
+ StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
+public:
+ ~Name2PairMap() {
+ for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator
+ I = Map.begin(), E = Map.end(); I != E; ++I)
+ delete I->second.first;
+ }
+
+ Timer &get(StringRef Name, StringRef GroupName) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
+
+ if (!GroupEntry.first)
+ GroupEntry.first = new TimerGroup(GroupName);
+
+ Timer &T = GroupEntry.second[Name];
+ if (!T.isInitialized())
+ T.init(Name, *GroupEntry.first);
+ return T;
+ }
+};
+
+}
+
+static ManagedStatic<Name2TimerMap> NamedTimers;
+static ManagedStatic<Name2PairMap> NamedGroupedTimers;
+
+static Timer &getNamedRegionTimer(StringRef Name) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ Timer &T = (*NamedTimers)[Name];
+ if (!T.isInitialized())
+ T.init(Name);
+ return T;
+}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
+
+//===----------------------------------------------------------------------===//
+// TimerGroup Implementation
+//===----------------------------------------------------------------------===//
+
+/// TimerGroupList - This is the global list of TimerGroups, maintained by the
+/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+static TimerGroup *TimerGroupList = 0;
+
+TimerGroup::TimerGroup(StringRef name)
+ : Name(name.begin(), name.end()), FirstTimer(0) {
+
+ // Add the group to TimerGroupList.
+ sys::SmartScopedLock<true> L(*TimerLock);
+ if (TimerGroupList)
+ TimerGroupList->Prev = &Next;
+ Next = TimerGroupList;
+ Prev = &TimerGroupList;
+ TimerGroupList = this;
+}
+
+TimerGroup::~TimerGroup() {
+ // If the timer group is destroyed before the timers it owns, accumulate and
+ // print the timing data.
+ while (FirstTimer != 0)
+ removeTimer(*FirstTimer);
+
+ // Remove the group from the TimerGroupList.
+ sys::SmartScopedLock<true> L(*TimerLock);
+ *Prev = Next;
+ if (Next)
+ Next->Prev = Prev;
+}
+
+
+void TimerGroup::removeTimer(Timer &T) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // If the timer was started, move its data to TimersToPrint.
+ if (T.Started)
+ TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+
+ T.TG = 0;
+
+ // Unlink the timer from our list.
+ *T.Prev = T.Next;
+ if (T.Next)
+ T.Next->Prev = T.Prev;
+
+ // Print the report when all timers in this group are destroyed if some of
+ // them were started.
+ if (FirstTimer != 0 || TimersToPrint.empty())
+ return;
+
+ raw_ostream *OutStream = CreateInfoOutputFile();
+ PrintQueuedTimers(*OutStream);
+ delete OutStream; // Close the file.
+}
+
+void TimerGroup::addTimer(Timer &T) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // Add the timer to our list.
+ if (FirstTimer)
+ FirstTimer->Prev = &T.Next;
+ T.Next = FirstTimer;
+ T.Prev = &FirstTimer;
+ FirstTimer = &T;
+}
+
+void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
+ // Sort the timers in descending order by amount of time taken.
+ std::sort(TimersToPrint.begin(), TimersToPrint.end());
+
+ TimeRecord Total;
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+ Total += TimersToPrint[i].first;
+
+ // Print out timing header.
+ OS << "===" << std::string(73, '-') << "===\n";
+ // Figure out how many spaces to indent TimerGroup name.
+ unsigned Padding = (80-Name.length())/2;
+ if (Padding > 80) Padding = 0; // Don't allow "negative" numbers
+ OS.indent(Padding) << Name << '\n';
+ OS << "===" << std::string(73, '-') << "===\n";
+
+ // If this is not an collection of ungrouped times, print the total time.
+ // Ungrouped timers don't really make sense to add up. We still print the
+ // TOTAL line to make the percentages make sense.
+ if (this != DefaultTimerGroup)
+ OS << format(" Total Execution Time: %5.4f seconds (%5.4f wall clock)\n",
+ Total.getProcessTime(), Total.getWallTime());
+ OS << '\n';
+
+ if (Total.getUserTime())
+ OS << " ---User Time---";
+ if (Total.getSystemTime())
+ OS << " --System Time--";
+ if (Total.getProcessTime())
+ OS << " --User+System--";
+ OS << " ---Wall Time---";
+ if (Total.getMemUsed())
+ OS << " ---Mem---";
+ OS << " --- Name ---\n";
+
+ // Loop through all of the timing data, printing it out.
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
+ const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
+ Entry.first.print(Total, OS);
+ OS << Entry.second << '\n';
+ }
+
+ Total.print(Total, OS);
+ OS << "Total\n\n";
+ OS.flush();
+
+ TimersToPrint.clear();
+}
+
+/// print - Print any started timers in this group and zero them.
+void TimerGroup::print(raw_ostream &OS) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // See if any of our timers were started, if so add them to TimersToPrint and
+ // reset them.
+ for (Timer *T = FirstTimer; T; T = T->Next) {
+ if (!T->Started) continue;
+ TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+
+ // Clear out the time.
+ T->Started = 0;
+ T->Time = TimeRecord();
+ }
+
+ // If any timers were started, print the group.
+ if (!TimersToPrint.empty())
+ PrintQueuedTimers(OS);
+}
+
+/// printAll - This static method prints all timers and clears them all out.
+void TimerGroup::printAll(raw_ostream &OS) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+ TG->print(OS);
+}
diff --git a/contrib/llvm/lib/Support/ToolOutputFile.cpp b/contrib/llvm/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 000000000000..e7ca927ea537
--- /dev/null
+++ b/contrib/llvm/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+ : Filename(filename), Keep(false) {
+ // Arrange for the file to be deleted if the process is killed.
+ if (Filename != "-")
+ sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+ // Delete the file if the client hasn't told us not to.
+ if (!Keep && Filename != "-")
+ sys::Path(Filename).eraseFromDisk();
+
+ // Ok, the file is successfully written and closed, or deleted. There's no
+ // further need to clean it up on signals.
+ if (Filename != "-")
+ sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Installer(filename),
+ OS(filename, ErrorInfo, Flags) {
+ // If open fails, no cleanup is needed.
+ if (!ErrorInfo.empty())
+ Installer.Keep = true;
+}
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
new file mode 100644
index 000000000000..d2508ac1ef3a
--- /dev/null
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -0,0 +1,785 @@
+//===--- Triple.cpp - Target triple helper class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+using namespace llvm;
+
+const char *Triple::getArchTypeName(ArchType Kind) {
+ switch (Kind) {
+ case UnknownArch: return "unknown";
+
+ case aarch64: return "aarch64";
+ case arm: return "arm";
+ case hexagon: return "hexagon";
+ case mips: return "mips";
+ case mipsel: return "mipsel";
+ case mips64: return "mips64";
+ case mips64el:return "mips64el";
+ case msp430: return "msp430";
+ case ppc64: return "powerpc64";
+ case ppc: return "powerpc";
+ case r600: return "r600";
+ case sparc: return "sparc";
+ case sparcv9: return "sparcv9";
+ case tce: return "tce";
+ case thumb: return "thumb";
+ case x86: return "i386";
+ case x86_64: return "x86_64";
+ case xcore: return "xcore";
+ case mblaze: return "mblaze";
+ case nvptx: return "nvptx";
+ case nvptx64: return "nvptx64";
+ case le32: return "le32";
+ case amdil: return "amdil";
+ case spir: return "spir";
+ case spir64: return "spir64";
+ }
+
+ llvm_unreachable("Invalid ArchType!");
+}
+
+const char *Triple::getArchTypePrefix(ArchType Kind) {
+ switch (Kind) {
+ default:
+ return 0;
+
+ case aarch64: return "aarch64";
+
+ case arm:
+ case thumb: return "arm";
+
+ case ppc64:
+ case ppc: return "ppc";
+
+ case mblaze: return "mblaze";
+
+ case mips:
+ case mipsel:
+ case mips64:
+ case mips64el:return "mips";
+
+ case hexagon: return "hexagon";
+
+ case r600: return "r600";
+
+ case sparcv9:
+ case sparc: return "sparc";
+
+ case x86:
+ case x86_64: return "x86";
+
+ case xcore: return "xcore";
+
+ case nvptx: return "nvptx";
+ case nvptx64: return "nvptx";
+ case le32: return "le32";
+ case amdil: return "amdil";
+ case spir: return "spir";
+ case spir64: return "spir";
+ }
+}
+
+const char *Triple::getVendorTypeName(VendorType Kind) {
+ switch (Kind) {
+ case UnknownVendor: return "unknown";
+
+ case Apple: return "apple";
+ case PC: return "pc";
+ case SCEI: return "scei";
+ case BGP: return "bgp";
+ case BGQ: return "bgq";
+ case Freescale: return "fsl";
+ case IBM: return "ibm";
+ }
+
+ llvm_unreachable("Invalid VendorType!");
+}
+
+const char *Triple::getOSTypeName(OSType Kind) {
+ switch (Kind) {
+ case UnknownOS: return "unknown";
+
+ case AuroraUX: return "auroraux";
+ case Cygwin: return "cygwin";
+ case Darwin: return "darwin";
+ case DragonFly: return "dragonfly";
+ case FreeBSD: return "freebsd";
+ case IOS: return "ios";
+ case KFreeBSD: return "kfreebsd";
+ case Linux: return "linux";
+ case Lv2: return "lv2";
+ case MacOSX: return "macosx";
+ case MinGW32: return "mingw32";
+ case NetBSD: return "netbsd";
+ case OpenBSD: return "openbsd";
+ case Solaris: return "solaris";
+ case Win32: return "win32";
+ case Haiku: return "haiku";
+ case Minix: return "minix";
+ case RTEMS: return "rtems";
+ case NaCl: return "nacl";
+ case CNK: return "cnk";
+ case Bitrig: return "bitrig";
+ case AIX: return "aix";
+ }
+
+ llvm_unreachable("Invalid OSType");
+}
+
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+ switch (Kind) {
+ case UnknownEnvironment: return "unknown";
+ case GNU: return "gnu";
+ case GNUEABIHF: return "gnueabihf";
+ case GNUEABI: return "gnueabi";
+ case GNUX32: return "gnux32";
+ case EABI: return "eabi";
+ case MachO: return "macho";
+ case Android: return "android";
+ case ELF: return "elf";
+ }
+
+ llvm_unreachable("Invalid EnvironmentType!");
+}
+
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+ return StringSwitch<Triple::ArchType>(Name)
+ .Case("aarch64", aarch64)
+ .Case("arm", arm)
+ .Case("mips", mips)
+ .Case("mipsel", mipsel)
+ .Case("mips64", mips64)
+ .Case("mips64el", mips64el)
+ .Case("msp430", msp430)
+ .Case("ppc64", ppc64)
+ .Case("ppc32", ppc)
+ .Case("ppc", ppc)
+ .Case("mblaze", mblaze)
+ .Case("r600", r600)
+ .Case("hexagon", hexagon)
+ .Case("sparc", sparc)
+ .Case("sparcv9", sparcv9)
+ .Case("tce", tce)
+ .Case("thumb", thumb)
+ .Case("x86", x86)
+ .Case("x86-64", x86_64)
+ .Case("xcore", xcore)
+ .Case("nvptx", nvptx)
+ .Case("nvptx64", nvptx64)
+ .Case("le32", le32)
+ .Case("amdil", amdil)
+ .Case("spir", spir)
+ .Case("spir64", spir64)
+ .Default(UnknownArch);
+}
+
+// Returns architecture name that is understood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+ if (!isOSDarwin() && getVendor() != Triple::Apple)
+ return NULL;
+
+ return StringSwitch<const char*>(getArchName())
+ .Case("i386", "i386")
+ .Case("x86_64", "x86_64")
+ .Case("powerpc", "ppc")
+ .Case("powerpc64", "ppc64")
+ .Cases("mblaze", "microblaze", "mblaze")
+ .Case("arm", "arm")
+ .Cases("armv4t", "thumbv4t", "armv4t")
+ .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
+ .Cases("armv6", "thumbv6", "armv6")
+ .Cases("armv7", "thumbv7", "armv7")
+ .Case("r600", "r600")
+ .Case("nvptx", "nvptx")
+ .Case("nvptx64", "nvptx64")
+ .Case("le32", "le32")
+ .Case("amdil", "amdil")
+ .Case("spir", "spir")
+ .Case("spir64", "spir64")
+ .Default(NULL);
+}
+
+static Triple::ArchType parseArch(StringRef ArchName) {
+ return StringSwitch<Triple::ArchType>(ArchName)
+ .Cases("i386", "i486", "i586", "i686", Triple::x86)
+ // FIXME: Do we need to support these?
+ .Cases("i786", "i886", "i986", Triple::x86)
+ .Cases("amd64", "x86_64", Triple::x86_64)
+ .Case("powerpc", Triple::ppc)
+ .Cases("powerpc64", "ppu", Triple::ppc64)
+ .Case("mblaze", Triple::mblaze)
+ .Case("aarch64", Triple::aarch64)
+ .Cases("arm", "xscale", Triple::arm)
+ // FIXME: It would be good to replace these with explicit names for all the
+ // various suffixes supported.
+ .StartsWith("armv", Triple::arm)
+ .Case("thumb", Triple::thumb)
+ .StartsWith("thumbv", Triple::thumb)
+ .Case("msp430", Triple::msp430)
+ .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
+ .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
+ .Cases("mips64", "mips64eb", Triple::mips64)
+ .Case("mips64el", Triple::mips64el)
+ .Case("r600", Triple::r600)
+ .Case("hexagon", Triple::hexagon)
+ .Case("sparc", Triple::sparc)
+ .Case("sparcv9", Triple::sparcv9)
+ .Case("tce", Triple::tce)
+ .Case("xcore", Triple::xcore)
+ .Case("nvptx", Triple::nvptx)
+ .Case("nvptx64", Triple::nvptx64)
+ .Case("le32", Triple::le32)
+ .Case("amdil", Triple::amdil)
+ .Case("spir", Triple::spir)
+ .Case("spir64", Triple::spir64)
+ .Default(Triple::UnknownArch);
+}
+
+static Triple::VendorType parseVendor(StringRef VendorName) {
+ return StringSwitch<Triple::VendorType>(VendorName)
+ .Case("apple", Triple::Apple)
+ .Case("pc", Triple::PC)
+ .Case("scei", Triple::SCEI)
+ .Case("bgp", Triple::BGP)
+ .Case("bgq", Triple::BGQ)
+ .Case("fsl", Triple::Freescale)
+ .Case("ibm", Triple::IBM)
+ .Default(Triple::UnknownVendor);
+}
+
+static Triple::OSType parseOS(StringRef OSName) {
+ return StringSwitch<Triple::OSType>(OSName)
+ .StartsWith("auroraux", Triple::AuroraUX)
+ .StartsWith("cygwin", Triple::Cygwin)
+ .StartsWith("darwin", Triple::Darwin)
+ .StartsWith("dragonfly", Triple::DragonFly)
+ .StartsWith("freebsd", Triple::FreeBSD)
+ .StartsWith("ios", Triple::IOS)
+ .StartsWith("kfreebsd", Triple::KFreeBSD)
+ .StartsWith("linux", Triple::Linux)
+ .StartsWith("lv2", Triple::Lv2)
+ .StartsWith("macosx", Triple::MacOSX)
+ .StartsWith("mingw32", Triple::MinGW32)
+ .StartsWith("netbsd", Triple::NetBSD)
+ .StartsWith("openbsd", Triple::OpenBSD)
+ .StartsWith("solaris", Triple::Solaris)
+ .StartsWith("win32", Triple::Win32)
+ .StartsWith("haiku", Triple::Haiku)
+ .StartsWith("minix", Triple::Minix)
+ .StartsWith("rtems", Triple::RTEMS)
+ .StartsWith("nacl", Triple::NaCl)
+ .StartsWith("cnk", Triple::CNK)
+ .StartsWith("bitrig", Triple::Bitrig)
+ .StartsWith("aix", Triple::AIX)
+ .Default(Triple::UnknownOS);
+}
+
+static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
+ return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
+ .StartsWith("eabi", Triple::EABI)
+ .StartsWith("gnueabihf", Triple::GNUEABIHF)
+ .StartsWith("gnueabi", Triple::GNUEABI)
+ .StartsWith("gnux32", Triple::GNUX32)
+ .StartsWith("gnu", Triple::GNU)
+ .StartsWith("macho", Triple::MachO)
+ .StartsWith("android", Triple::Android)
+ .StartsWith("elf", Triple::ELF)
+ .Default(Triple::UnknownEnvironment);
+}
+
+/// \brief Construct a triple from the string representation provided.
+///
+/// This stores the string representation and parses the various pieces into
+/// enum members.
+Triple::Triple(const Twine &Str)
+ : Data(Str.str()),
+ Arch(parseArch(getArchName())),
+ Vendor(parseVendor(getVendorName())),
+ OS(parseOS(getOSName())),
+ Environment(parseEnvironment(getEnvironmentName())) {
+}
+
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, and OS.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members. It leaves the environment unknown and omits it from
+/// the string representation.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr)
+ : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()),
+ Arch(parseArch(ArchStr.str())),
+ Vendor(parseVendor(VendorStr.str())),
+ OS(parseOS(OSStr.str())),
+ Environment() {
+}
+
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, OS, and environment.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
+ const Twine &EnvironmentStr)
+ : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') +
+ EnvironmentStr).str()),
+ Arch(parseArch(ArchStr.str())),
+ Vendor(parseVendor(VendorStr.str())),
+ OS(parseOS(OSStr.str())),
+ Environment(parseEnvironment(EnvironmentStr.str())) {
+}
+
+std::string Triple::normalize(StringRef Str) {
+ // Parse into components.
+ SmallVector<StringRef, 4> Components;
+ Str.split(Components, "-");
+
+ // If the first component corresponds to a known architecture, preferentially
+ // use it for the architecture. If the second component corresponds to a
+ // known vendor, preferentially use it for the vendor, etc. This avoids silly
+ // component movement when a component parses as (eg) both a valid arch and a
+ // valid os.
+ ArchType Arch = UnknownArch;
+ if (Components.size() > 0)
+ Arch = parseArch(Components[0]);
+ VendorType Vendor = UnknownVendor;
+ if (Components.size() > 1)
+ Vendor = parseVendor(Components[1]);
+ OSType OS = UnknownOS;
+ if (Components.size() > 2)
+ OS = parseOS(Components[2]);
+ EnvironmentType Environment = UnknownEnvironment;
+ if (Components.size() > 3)
+ Environment = parseEnvironment(Components[3]);
+
+ // Note which components are already in their final position. These will not
+ // be moved.
+ bool Found[4];
+ Found[0] = Arch != UnknownArch;
+ Found[1] = Vendor != UnknownVendor;
+ Found[2] = OS != UnknownOS;
+ Found[3] = Environment != UnknownEnvironment;
+
+ // If they are not there already, permute the components into their canonical
+ // positions by seeing if they parse as a valid architecture, and if so moving
+ // the component to the architecture position etc.
+ for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
+ if (Found[Pos])
+ continue; // Already in the canonical position.
+
+ for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+ // Do not reparse any components that already matched.
+ if (Idx < array_lengthof(Found) && Found[Idx])
+ continue;
+
+ // Does this component parse as valid for the target position?
+ bool Valid = false;
+ StringRef Comp = Components[Idx];
+ switch (Pos) {
+ default: llvm_unreachable("unexpected component type!");
+ case 0:
+ Arch = parseArch(Comp);
+ Valid = Arch != UnknownArch;
+ break;
+ case 1:
+ Vendor = parseVendor(Comp);
+ Valid = Vendor != UnknownVendor;
+ break;
+ case 2:
+ OS = parseOS(Comp);
+ Valid = OS != UnknownOS;
+ break;
+ case 3:
+ Environment = parseEnvironment(Comp);
+ Valid = Environment != UnknownEnvironment;
+ break;
+ }
+ if (!Valid)
+ continue; // Nope, try the next component.
+
+ // Move the component to the target position, pushing any non-fixed
+ // components that are in the way to the right. This tends to give
+ // good results in the common cases of a forgotten vendor component
+ // or a wrongly positioned environment.
+ if (Pos < Idx) {
+ // Insert left, pushing the existing components to the right. For
+ // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+ StringRef CurrentComponent(""); // The empty component.
+ // Replace the component we are moving with an empty component.
+ std::swap(CurrentComponent, Components[Idx]);
+ // Insert the component being moved at Pos, displacing any existing
+ // components to the right.
+ for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+ // Skip over any fixed components.
+ while (i < array_lengthof(Found) && Found[i])
+ ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ }
+ } else if (Pos > Idx) {
+ // Push right by inserting empty components until the component at Idx
+ // reaches the target position Pos. For example, pc-a -> -pc-a when
+ // moving pc to the second position.
+ do {
+ // Insert one empty component at Idx.
+ StringRef CurrentComponent(""); // The empty component.
+ for (unsigned i = Idx; i < Components.size();) {
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ // If it was placed on top of an empty component then we are done.
+ if (CurrentComponent.empty())
+ break;
+ // Advance to the next component, skipping any fixed components.
+ while (++i < array_lengthof(Found) && Found[i])
+ ;
+ }
+ // The last component was pushed off the end - append it.
+ if (!CurrentComponent.empty())
+ Components.push_back(CurrentComponent);
+
+ // Advance Idx to the component's new position.
+ while (++Idx < array_lengthof(Found) && Found[Idx])
+ ;
+ } while (Idx < Pos); // Add more until the final position is reached.
+ }
+ assert(Pos < Components.size() && Components[Pos] == Comp &&
+ "Component moved wrong!");
+ Found[Pos] = true;
+ break;
+ }
+ }
+
+ // Special case logic goes here. At this point Arch, Vendor and OS have the
+ // correct values for the computed components.
+
+ // Stick the corrected components back together to form the normalized string.
+ std::string Normalized;
+ for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+ if (i) Normalized += '-';
+ Normalized += Components[i];
+ }
+ return Normalized;
+}
+
+StringRef Triple::getArchName() const {
+ return StringRef(Data).split('-').first; // Isolate first component
+}
+
+StringRef Triple::getVendorName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ return Tmp.split('-').first; // Isolate second component
+}
+
+StringRef Triple::getOSName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ Tmp = Tmp.split('-').second; // Strip second component
+ return Tmp.split('-').first; // Isolate third component
+}
+
+StringRef Triple::getEnvironmentName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ Tmp = Tmp.split('-').second; // Strip second component
+ return Tmp.split('-').second; // Strip third component
+}
+
+StringRef Triple::getOSAndEnvironmentName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ return Tmp.split('-').second; // Strip second component
+}
+
+static unsigned EatNumber(StringRef &Str) {
+ assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+ unsigned Result = 0;
+
+ do {
+ // Consume the leading digit.
+ Result = Result*10 + (Str[0] - '0');
+
+ // Eat the digit.
+ Str = Str.substr(1);
+ } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9');
+
+ return Result;
+}
+
+void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ StringRef OSName = getOSName();
+
+ // Assume that the OS portion of the triple starts with the canonical name.
+ StringRef OSTypeName = getOSTypeName(getOS());
+ if (OSName.startswith(OSTypeName))
+ OSName = OSName.substr(OSTypeName.size());
+
+ // Any unset version defaults to 0.
+ Major = Minor = Micro = 0;
+
+ // Parse up to three components.
+ unsigned *Components[3] = { &Major, &Minor, &Micro };
+ for (unsigned i = 0; i != 3; ++i) {
+ if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+ break;
+
+ // Consume the leading number.
+ *Components[i] = EatNumber(OSName);
+
+ // Consume the separator, if present.
+ if (OSName.startswith("."))
+ OSName = OSName.substr(1);
+ }
+}
+
+bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ getOSVersion(Major, Minor, Micro);
+
+ switch (getOS()) {
+ default: llvm_unreachable("unexpected OS for Darwin triple");
+ case Darwin:
+ // Default to darwin8, i.e., MacOSX 10.4.
+ if (Major == 0)
+ Major = 8;
+ // Darwin version numbers are skewed from OS X versions.
+ if (Major < 4)
+ return false;
+ Micro = 0;
+ Minor = Major - 4;
+ Major = 10;
+ break;
+ case MacOSX:
+ // Default to 10.4.
+ if (Major == 0) {
+ Major = 10;
+ Minor = 4;
+ }
+ if (Major != 10)
+ return false;
+ break;
+ case IOS:
+ // Ignore the version from the triple. This is only handled because the
+ // the clang driver combines OS X and IOS support into a common Darwin
+ // toolchain that wants to know the OS X version number even when targeting
+ // IOS.
+ Major = 10;
+ Minor = 4;
+ Micro = 0;
+ break;
+ }
+ return true;
+}
+
+void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ switch (getOS()) {
+ default: llvm_unreachable("unexpected OS for Darwin triple");
+ case Darwin:
+ case MacOSX:
+ // Ignore the version from the triple. This is only handled because the
+ // the clang driver combines OS X and IOS support into a common Darwin
+ // toolchain that wants to know the iOS version number even when targeting
+ // OS X.
+ Major = 3;
+ Minor = 0;
+ Micro = 0;
+ break;
+ case IOS:
+ getOSVersion(Major, Minor, Micro);
+ // Default to 3.0.
+ if (Major == 0)
+ Major = 3;
+ break;
+ }
+}
+
+void Triple::setTriple(const Twine &Str) {
+ *this = Triple(Str);
+}
+
+void Triple::setArch(ArchType Kind) {
+ setArchName(getArchTypeName(Kind));
+}
+
+void Triple::setVendor(VendorType Kind) {
+ setVendorName(getVendorTypeName(Kind));
+}
+
+void Triple::setOS(OSType Kind) {
+ setOSName(getOSTypeName(Kind));
+}
+
+void Triple::setEnvironment(EnvironmentType Kind) {
+ setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
+void Triple::setArchName(StringRef Str) {
+ // Work around a miscompilation bug for Twines in gcc 4.0.3.
+ SmallString<64> Triple;
+ Triple += Str;
+ Triple += "-";
+ Triple += getVendorName();
+ Triple += "-";
+ Triple += getOSAndEnvironmentName();
+ setTriple(Triple.str());
+}
+
+void Triple::setVendorName(StringRef Str) {
+ setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setOSName(StringRef Str) {
+ if (hasEnvironment())
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
+ "-" + getEnvironmentName());
+ else
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+void Triple::setEnvironmentName(StringRef Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+ "-" + Str);
+}
+
+void Triple::setOSAndEnvironmentName(StringRef Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+ switch (Arch) {
+ case llvm::Triple::UnknownArch:
+ return 0;
+
+ case llvm::Triple::msp430:
+ return 16;
+
+ case llvm::Triple::amdil:
+ case llvm::Triple::arm:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::le32:
+ case llvm::Triple::mblaze:
+ case llvm::Triple::mips:
+ case llvm::Triple::mipsel:
+ case llvm::Triple::nvptx:
+ case llvm::Triple::ppc:
+ case llvm::Triple::r600:
+ case llvm::Triple::sparc:
+ case llvm::Triple::tce:
+ case llvm::Triple::thumb:
+ case llvm::Triple::x86:
+ case llvm::Triple::xcore:
+ case llvm::Triple::spir:
+ return 32;
+
+ case llvm::Triple::aarch64:
+ case llvm::Triple::mips64:
+ case llvm::Triple::mips64el:
+ case llvm::Triple::nvptx64:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::sparcv9:
+ case llvm::Triple::x86_64:
+ case llvm::Triple::spir64:
+ return 64;
+ }
+ llvm_unreachable("Invalid architecture value");
+}
+
+bool Triple::isArch64Bit() const {
+ return getArchPointerBitWidth(getArch()) == 64;
+}
+
+bool Triple::isArch32Bit() const {
+ return getArchPointerBitWidth(getArch()) == 32;
+}
+
+bool Triple::isArch16Bit() const {
+ return getArchPointerBitWidth(getArch()) == 16;
+}
+
+Triple Triple::get32BitArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::aarch64:
+ case Triple::msp430:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::amdil:
+ case Triple::spir:
+ case Triple::arm:
+ case Triple::hexagon:
+ case Triple::le32:
+ case Triple::mblaze:
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::nvptx:
+ case Triple::ppc:
+ case Triple::r600:
+ case Triple::sparc:
+ case Triple::tce:
+ case Triple::thumb:
+ case Triple::x86:
+ case Triple::xcore:
+ // Already 32-bit.
+ break;
+
+ case Triple::mips64: T.setArch(Triple::mips); break;
+ case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::nvptx64: T.setArch(Triple::nvptx); break;
+ case Triple::ppc64: T.setArch(Triple::ppc); break;
+ case Triple::sparcv9: T.setArch(Triple::sparc); break;
+ case Triple::x86_64: T.setArch(Triple::x86); break;
+ case Triple::spir64: T.setArch(Triple::spir); break;
+ }
+ return T;
+}
+
+Triple Triple::get64BitArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::amdil:
+ case Triple::arm:
+ case Triple::hexagon:
+ case Triple::le32:
+ case Triple::mblaze:
+ case Triple::msp430:
+ case Triple::r600:
+ case Triple::tce:
+ case Triple::thumb:
+ case Triple::xcore:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::aarch64:
+ case Triple::spir64:
+ case Triple::mips64:
+ case Triple::mips64el:
+ case Triple::nvptx64:
+ case Triple::ppc64:
+ case Triple::sparcv9:
+ case Triple::x86_64:
+ // Already 64-bit.
+ break;
+
+ case Triple::mips: T.setArch(Triple::mips64); break;
+ case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::nvptx: T.setArch(Triple::nvptx64); break;
+ case Triple::ppc: T.setArch(Triple::ppc64); break;
+ case Triple::sparc: T.setArch(Triple::sparcv9); break;
+ case Triple::x86: T.setArch(Triple::x86_64); break;
+ case Triple::spir: T.setArch(Triple::spir64); break;
+ }
+ return T;
+}
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
new file mode 100644
index 000000000000..3d04bc34f0eb
--- /dev/null
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -0,0 +1,171 @@
+//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+std::string Twine::str() const {
+ // If we're storing only a std::string, just return it.
+ if (LHSKind == StdStringKind && RHSKind == EmptyKind)
+ return *LHS.stdString;
+
+ // Otherwise, flatten and copy the contents first.
+ SmallString<256> Vec;
+ return toStringRef(Vec).str();
+}
+
+void Twine::toVector(SmallVectorImpl<char> &Out) const {
+ raw_svector_ostream OS(Out);
+ print(OS);
+}
+
+StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
+ if (isSingleStringRef())
+ return getSingleStringRef();
+ toVector(Out);
+ return StringRef(Out.data(), Out.size());
+}
+
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+ if (isUnary()) {
+ switch (getLHSKind()) {
+ case CStringKind:
+ // Already null terminated, yay!
+ return StringRef(LHS.cString);
+ case StdStringKind: {
+ const std::string *str = LHS.stdString;
+ return StringRef(str->c_str(), str->size());
+ }
+ default:
+ break;
+ }
+ }
+ toVector(Out);
+ Out.push_back(0);
+ Out.pop_back();
+ return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, Child Ptr,
+ NodeKind Kind) const {
+ switch (Kind) {
+ case Twine::NullKind: break;
+ case Twine::EmptyKind: break;
+ case Twine::TwineKind:
+ Ptr.twine->print(OS);
+ break;
+ case Twine::CStringKind:
+ OS << Ptr.cString;
+ break;
+ case Twine::StdStringKind:
+ OS << *Ptr.stdString;
+ break;
+ case Twine::StringRefKind:
+ OS << *Ptr.stringRef;
+ break;
+ case Twine::CharKind:
+ OS << Ptr.character;
+ break;
+ case Twine::DecUIKind:
+ OS << Ptr.decUI;
+ break;
+ case Twine::DecIKind:
+ OS << Ptr.decI;
+ break;
+ case Twine::DecULKind:
+ OS << *Ptr.decUL;
+ break;
+ case Twine::DecLKind:
+ OS << *Ptr.decL;
+ break;
+ case Twine::DecULLKind:
+ OS << *Ptr.decULL;
+ break;
+ case Twine::DecLLKind:
+ OS << *Ptr.decLL;
+ break;
+ case Twine::UHexKind:
+ OS.write_hex(*Ptr.uHex);
+ break;
+ }
+}
+
+void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
+ NodeKind Kind) const {
+ switch (Kind) {
+ case Twine::NullKind:
+ OS << "null"; break;
+ case Twine::EmptyKind:
+ OS << "empty"; break;
+ case Twine::TwineKind:
+ OS << "rope:";
+ Ptr.twine->printRepr(OS);
+ break;
+ case Twine::CStringKind:
+ OS << "cstring:\""
+ << Ptr.cString << "\"";
+ break;
+ case Twine::StdStringKind:
+ OS << "std::string:\""
+ << Ptr.stdString << "\"";
+ break;
+ case Twine::StringRefKind:
+ OS << "stringref:\""
+ << Ptr.stringRef << "\"";
+ break;
+ case Twine::CharKind:
+ OS << "char:\"" << Ptr.character << "\"";
+ break;
+ case Twine::DecUIKind:
+ OS << "decUI:\"" << Ptr.decUI << "\"";
+ break;
+ case Twine::DecIKind:
+ OS << "decI:\"" << Ptr.decI << "\"";
+ break;
+ case Twine::DecULKind:
+ OS << "decUL:\"" << *Ptr.decUL << "\"";
+ break;
+ case Twine::DecLKind:
+ OS << "decL:\"" << *Ptr.decL << "\"";
+ break;
+ case Twine::DecULLKind:
+ OS << "decULL:\"" << *Ptr.decULL << "\"";
+ break;
+ case Twine::DecLLKind:
+ OS << "decLL:\"" << *Ptr.decLL << "\"";
+ break;
+ case Twine::UHexKind:
+ OS << "uhex:\"" << Ptr.uHex << "\"";
+ break;
+ }
+}
+
+void Twine::print(raw_ostream &OS) const {
+ printOneChild(OS, LHS, getLHSKind());
+ printOneChild(OS, RHS, getRHSKind());
+}
+
+void Twine::printRepr(raw_ostream &OS) const {
+ OS << "(Twine ";
+ printOneChildRepr(OS, LHS, getLHSKind());
+ OS << " ";
+ printOneChildRepr(OS, RHS, getRHSKind());
+ OS << ")";
+}
+
+void Twine::dump() const {
+ print(llvm::dbgs());
+}
+
+void Twine::dumpRepr() const {
+ printRepr(llvm::dbgs());
+}
diff --git a/contrib/llvm/lib/Support/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc
new file mode 100644
index 000000000000..7f79db074135
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Host.inc
@@ -0,0 +1,69 @@
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <cctype>
+#include <string>
+#include <cstdlib> // ::getenv
+
+using namespace llvm;
+
+#ifdef __FreeBSD__
+std::string sys::getDefaultTargetTriple() {
+ return LLVM_DEFAULT_TARGET_TRIPLE;
+}
+#else // __FreeBSD__
+static std::string getOSVersion() {
+ struct utsname info;
+
+ if (uname(&info))
+ return "";
+
+ return info.release;
+}
+
+std::string sys::getDefaultTargetTriple() {
+ StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
+ std::pair<StringRef, StringRef> ArchSplit = TargetTripleString.split('-');
+
+ // Normalize the arch, since the target triple may not actually match the target.
+ std::string Arch = ArchSplit.first;
+
+ std::string Triple(Arch);
+ Triple += '-';
+ Triple += ArchSplit.second;
+
+ // Force i<N>86 to i386.
+ if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+ Triple[2] == '8' && Triple[3] == '6')
+ Triple[1] = '3';
+
+ // On darwin, we want to update the version to match that of the
+ // target.
+ std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+ if (DarwinDashIdx != std::string::npos) {
+ Triple.resize(DarwinDashIdx + strlen("-darwin"));
+ Triple += getOSVersion();
+ }
+
+ return Triple;
+}
+#endif // __FreeBSD__
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
new file mode 100644
index 000000000000..f397408c7743
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -0,0 +1,353 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+#if defined(__mips__)
+# if defined(__OpenBSD__)
+# include <mips64/sysarch.h>
+# else
+# include <sys/cachectl.h>
+# endif
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+namespace {
+
+int getPosixProtectionFlags(unsigned Flags) {
+ switch (Flags) {
+ case llvm::sys::Memory::MF_READ:
+ return PROT_READ;
+ case llvm::sys::Memory::MF_WRITE:
+ return PROT_WRITE;
+ case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE:
+ return PROT_READ | PROT_WRITE;
+ case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
+ return PROT_READ | PROT_EXEC;
+ case llvm::sys::Memory::MF_READ |
+ llvm::sys::Memory::MF_WRITE |
+ llvm::sys::Memory::MF_EXEC:
+ return PROT_READ | PROT_WRITE | PROT_EXEC;
+ case llvm::sys::Memory::MF_EXEC:
+#if defined(__FreeBSD__)
+ // On PowerPC, having an executable page that has no read permission
+ // can have unintended consequences. The function InvalidateInstruction-
+ // Cache uses instructions dcbf and icbi, both of which are treated by
+ // the processor as loads. If the page has no read permissions,
+ // executing these instructions will result in a segmentation fault.
+ // Somehow, this problem is not present on Linux, but it does happen
+ // on FreeBSD.
+ return PROT_READ | PROT_EXEC;
+#else
+ return PROT_EXEC;
+#endif
+ default:
+ llvm_unreachable("Illegal memory protection flag specified!");
+ }
+ // Provide a default return value as required by some compilers.
+ return PROT_NONE;
+}
+
+} // namespace
+
+namespace llvm {
+namespace sys {
+
+MemoryBlock
+Memory::allocateMappedMemory(size_t NumBytes,
+ const MemoryBlock *const NearBlock,
+ unsigned PFlags,
+ error_code &EC) {
+ EC = error_code::success();
+ if (NumBytes == 0)
+ return MemoryBlock();
+
+ static const size_t PageSize = process::get_self()->page_size();
+ const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
+
+ int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+ static int zero_fd = open("/dev/zero", O_RDWR);
+ if (zero_fd == -1) {
+ EC = error_code(errno, system_category());
+ return MemoryBlock();
+ }
+ fd = zero_fd;
+#endif
+
+ int MMFlags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+ MAP_ANONYMOUS
+#else
+ MAP_ANON
+#endif
+ ; // Ends statement above
+
+ int Protect = getPosixProtectionFlags(PFlags);
+
+ // Use any near hint and the page size to set a page-aligned starting address
+ uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
+ NearBlock->size() : 0;
+ if (Start && Start % PageSize)
+ Start += PageSize - Start % PageSize;
+
+ void *Addr = ::mmap(reinterpret_cast<void*>(Start), PageSize*NumPages,
+ Protect, MMFlags, fd, 0);
+ if (Addr == MAP_FAILED) {
+ if (NearBlock) //Try again without a near hint
+ return allocateMappedMemory(NumBytes, 0, PFlags, EC);
+
+ EC = error_code(errno, system_category());
+ return MemoryBlock();
+ }
+
+ MemoryBlock Result;
+ Result.Address = Addr;
+ Result.Size = NumPages*PageSize;
+
+ if (PFlags & MF_EXEC)
+ Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+
+ return Result;
+}
+
+error_code
+Memory::releaseMappedMemory(MemoryBlock &M) {
+ if (M.Address == 0 || M.Size == 0)
+ return error_code::success();
+
+ if (0 != ::munmap(M.Address, M.Size))
+ return error_code(errno, system_category());
+
+ M.Address = 0;
+ M.Size = 0;
+
+ return error_code::success();
+}
+
+error_code
+Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
+ if (M.Address == 0 || M.Size == 0)
+ return error_code::success();
+
+ if (!Flags)
+ return error_code(EINVAL, generic_category());
+
+ int Protect = getPosixProtectionFlags(Flags);
+
+ int Result = ::mprotect(M.Address, M.Size, Protect);
+ if (Result != 0)
+ return error_code(errno, system_category());
+
+ if (Flags & MF_EXEC)
+ Memory::InvalidateInstructionCache(M.Address, M.Size);
+
+ return error_code::success();
+}
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions. This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it. Getting this type of memory
+/// is very OS specific.
+///
+MemoryBlock
+Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+ std::string *ErrMsg) {
+ if (NumBytes == 0) return MemoryBlock();
+
+ size_t PageSize = process::get_self()->page_size();
+ size_t NumPages = (NumBytes+PageSize-1)/PageSize;
+
+ int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+ static int zero_fd = open("/dev/zero", O_RDWR);
+ if (zero_fd == -1) {
+ MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+ return MemoryBlock();
+ }
+ fd = zero_fd;
+#endif
+
+ int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+ MAP_ANONYMOUS
+#else
+ MAP_ANON
+#endif
+ ;
+
+ void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+ NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+ void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
+ flags, fd, 0);
+#else
+ void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+ flags, fd, 0);
+#endif
+ if (pa == MAP_FAILED) {
+ if (NearBlock) //Try again without a near hint
+ return AllocateRWX(NumBytes, 0);
+
+ MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+ return MemoryBlock();
+ }
+
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(PageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+ return MemoryBlock();
+ }
+
+ kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(PageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect RW failed");
+ return MemoryBlock();
+ }
+#endif
+
+ MemoryBlock result;
+ result.Address = pa;
+ result.Size = NumPages*PageSize;
+
+ return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ if (M.Address == 0 || M.Size == 0) return false;
+ if (0 != ::munmap(M.Address, M.Size))
+ return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+ return false;
+}
+
+bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void Memory::InvalidateInstructionCache(const void *Addr,
+ size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+# if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+ sys_icache_invalidate(const_cast<void *>(Addr), Len);
+# endif
+
+#else
+
+# if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+ const size_t LineSize = 32;
+
+ const intptr_t Mask = ~(LineSize - 1);
+ const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+ const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("dcbf 0, %0" : : "r"(Line));
+ asm volatile("sync");
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("icbi 0, %0" : : "r"(Line));
+ asm volatile("isync");
+# elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__)
+ // FIXME: Can we safely always call this for __GNUC__ everywhere?
+ const char *Start = static_cast<const char *>(Addr);
+ const char *End = Start + Len;
+ __clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
+# elif defined(__mips__)
+ const char *Start = static_cast<const char *>(Addr);
+# if defined(ANDROID)
+ // The declaration of "cacheflush" in Android bionic:
+ // extern int cacheflush(long start, long end, long flags);
+ const char *End = Start + Len;
+ long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
+ long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
+ cacheflush(LStart, LEnd, BCACHE);
+# else
+ cacheflush(const_cast<char *>(Start), Len, BCACHE);
+# endif
+# endif
+
+#endif // end apple
+
+ ValgrindDiscardTranslations(Addr, Len);
+}
+
+} // namespace sys
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/Mutex.inc b/contrib/llvm/lib/Support/Unix/Mutex.inc
new file mode 100644
index 000000000000..fe6b17041457
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Mutex.inc
@@ -0,0 +1,43 @@
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+MutexImpl::MutexImpl( bool recursive)
+{
+}
+
+MutexImpl::~MutexImpl()
+{
+}
+
+bool
+MutexImpl::release()
+{
+ return true;
+}
+
+bool
+MutexImpl::tryacquire( void )
+{
+ return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
new file mode 100644
index 000000000000..6a5ebb8cd9c7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -0,0 +1,900 @@
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+// For GNU Hurd
+#if defined(__GNU__) && !defined(MAXPATHLEN)
+# define MAXPATHLEN 4096
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+ return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+const char sys::PathSeparator = ':';
+
+StringRef Path::GetEXESuffix() {
+ return StringRef();
+}
+
+Path::Path(StringRef p)
+ : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(StringRef that) {
+ path.assign(that.data(), that.size());
+ return *this;
+}
+
+bool
+Path::isValid() const {
+ // Empty paths are considered invalid here.
+ // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+ // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+ // handle names longer than some limit, it'll report this on demand using
+ // ENAMETOLONG.
+ return !path.empty();
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ if (NameLen == 0)
+ return false;
+ return NameStart[0] == '/';
+}
+
+bool
+Path::isAbsolute() const {
+ if (path.empty())
+ return false;
+ return path[0] == '/';
+}
+
+Path
+Path::GetRootDirectory() {
+ Path result;
+ result.set("/");
+ return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+ // The best way is with mkdtemp but that's not available on many systems,
+ // Linux and FreeBSD have it. Others probably won't.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ if (0 == mkdtemp(pathname)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#elif defined(HAVE_MKSTEMP)
+ // If no mkdtemp is available, mkstemp can be used to create a temporary file
+ // which is then removed and created as a directory. We prefer this over
+ // mktemp because of mktemp's inherent security and threading risks. We still
+ // have a slight race condition from the time the temporary file is created to
+ // the time it is re-created as a directoy.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ int fd = 0;
+ if (-1 == (fd = mkstemp(pathname))) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ ::close(fd);
+ ::unlink(pathname); // start race condition, ignore errors
+ if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#elif defined(HAVE_MKTEMP)
+ // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+ // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+ // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+ // the XXXXXX with the pid of the process and a letter. That leads to only
+ // twenty six temporary files that can be generated.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ char *TmpName = ::mktemp(pathname);
+ if (TmpName == 0) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create unique directory name");
+ return Path();
+ }
+ if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(TmpName);
+#else
+ // This is the worst case implementation. tempnam(3) leaks memory unless its
+ // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+ // issues. The mktemp(3) function doesn't have enough variability in the
+ // temporary name generated. So, we provide our own implementation that
+ // increments an integer from a random number seeded by the current time. This
+ // should be sufficiently unique that we don't have many collisions between
+ // processes. Generally LLVM processes don't run very long and don't use very
+ // many temporary files so this shouldn't be a big issue for LLVM.
+ static time_t num = ::time(0);
+ char pathname[MAXPATHLEN];
+ do {
+ num++;
+ sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+ } while ( 0 == access(pathname, F_OK ) );
+ if (-1 == ::mkdir(pathname, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+ char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#endif
+ // FIXME: Should this look at LD_LIBRARY_PATH too?
+ Paths.push_back(sys::Path("/usr/local/lib/"));
+ Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+ Paths.push_back(sys::Path("/usr/lib/"));
+ Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ const char* home = getenv("HOME");
+ Path result;
+ if (home && result.set(home))
+ return result;
+ result.set("/");
+ return result;
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAXPATHLEN];
+ if (!getcwd(pathname, MAXPATHLEN)) {
+ assert(false && "Could not query current working directory.");
+ return Path();
+ }
+
+ return Path(pathname);
+}
+
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
+ defined(__linux__) || defined(__CYGWIN__)
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+ const char *dir, const char *bin)
+{
+ struct stat sb;
+
+ snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
+ if (realpath(buf, ret) == NULL)
+ return (1);
+ if (stat(buf, &sb) != 0)
+ return (1);
+
+ return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+ char *pv, *s, *t, buf[PATH_MAX];
+
+ /* First approach: absolute path. */
+ if (bin[0] == '/') {
+ if (test_dir(buf, ret, "/", bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Second approach: relative path. */
+ if (strchr(bin, '/') != NULL) {
+ if (getcwd(buf, PATH_MAX) == NULL)
+ return (NULL);
+ if (test_dir(buf, ret, buf, bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Third approach: $PATH */
+ if ((pv = getenv("PATH")) == NULL)
+ return (NULL);
+ s = pv = strdup(pv);
+ if (pv == NULL)
+ return (NULL);
+ while ((t = strsep(&s, ":")) != NULL) {
+ if (test_dir(buf, ret, t, bin) == 0) {
+ free(pv);
+ return (ret);
+ }
+ }
+ free(pv);
+ return (NULL);
+}
+#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__APPLE__)
+ // On OS X the executable path is saved to the stack by dyld. Reading it
+ // from there is much faster than calling dladdr, especially for large
+ // binaries with symbols.
+ char exe_path[MAXPATHLEN];
+ uint32_t size = sizeof(exe_path);
+ if (_NSGetExecutablePath(exe_path, &size) == 0) {
+ char link_path[MAXPATHLEN];
+ if (realpath(exe_path, link_path))
+ return Path(link_path);
+ }
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
+ char exe_path[PATH_MAX];
+
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(exe_path);
+#elif defined(__linux__) || defined(__CYGWIN__)
+ char exe_path[MAXPATHLEN];
+ StringRef aPath("/proc/self/exe");
+ if (sys::fs::exists(aPath)) {
+ // /proc is not always mounted under Linux (chroot for example).
+ ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+ if (len >= 0)
+ return Path(StringRef(exe_path, len));
+ } else {
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(exe_path);
+ }
+#elif defined(HAVE_DLFCN_H)
+ // Use dladdr to get executable path if available.
+ Dl_info DLInfo;
+ int err = dladdr(MainAddr, &DLInfo);
+ if (err == 0)
+ return Path();
+
+ // If the filename is a symlink, we need to resolve and return the location of
+ // the actual executable.
+ char link_path[MAXPATHLEN];
+ if (realpath(DLInfo.dli_fname, link_path))
+ return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
+#endif
+ return Path();
+}
+
+
+StringRef Path::getDirname() const {
+ return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef(path).substr(slash);
+ else
+ return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef();
+ else
+ return StringRef(path).substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char Buf[1025];
+ int fd = ::open(path.c_str(), O_RDONLY);
+ if (fd < 0)
+ return false;
+ ssize_t bytes_read = ::read(fd, Buf, len);
+ ::close(fd);
+ if (ssize_t(len) != bytes_read)
+ return false;
+ Magic.assign(Buf, len);
+ return true;
+}
+
+bool
+Path::exists() const {
+ return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+ struct stat buf;
+ if (0 != lstat(path.c_str(), &buf))
+ return false;
+ return S_ISLNK(buf.st_mode);
+}
+
+
+bool
+Path::canRead() const {
+ return 0 == access(path.c_str(), R_OK);
+}
+
+bool
+Path::canWrite() const {
+ return 0 == access(path.c_str(), W_OK);
+}
+
+bool
+Path::isRegularFile() const {
+ // Get the status so we can determine if it's a file or directory
+ struct stat buf;
+
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+
+ if (S_ISREG(buf.st_mode))
+ return true;
+
+ return false;
+}
+
+bool
+Path::canExecute() const {
+ if (0 != access(path.c_str(), R_OK | X_OK ))
+ return false;
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ if (!S_ISREG(buf.st_mode))
+ return false;
+ return true;
+}
+
+StringRef
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash
+ if (pos == path.length()-1) {
+ // Find the second to last slash
+ size_t pos2 = path.rfind('/', pos-1);
+ if (pos2 == std::string::npos)
+ return StringRef(path).substr(0,pos);
+ else
+ return StringRef(path).substr(pos2+1,pos-pos2-1);
+ }
+ // Return everything after the last slash
+ return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return 0;
+ }
+ status.fileSize = buf.st_size;
+ status.modTime.fromEpochTime(buf.st_mtime);
+ status.mode = buf.st_mode;
+ status.user = buf.st_uid;
+ status.group = buf.st_gid;
+ status.uniqueID = uint64_t(buf.st_ino);
+ status.isDir = S_ISDIR(buf.st_mode);
+ status.isFile = S_ISREG(buf.st_mode);
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+ // Get the umask value from the operating system. We want to use it
+ // when changing the file's permissions. Since calling umask() sets
+ // the umask and returns its old value, we must call it a second
+ // time to reset it to the user's preference.
+ int mask = umask(0777); // The arg. to umask is arbitrary.
+ umask(mask); // Restore the umask.
+
+ // Get the file's current mode.
+ struct stat buf;
+ if (0 != stat(File.c_str(), &buf))
+ return false;
+ // Change the file to have whichever permissions bits from 'bits'
+ // that the umask would not disable.
+ if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+ return false;
+ return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0444))
+ return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0222))
+ return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0111))
+ return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ DIR* direntries = ::opendir(path.c_str());
+ if (direntries == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+ std::string dirPath = path;
+ if (!lastIsSlash(dirPath))
+ dirPath += '/';
+
+ result.clear();
+ struct dirent* de = ::readdir(direntries);
+ for ( ; de != 0; de = ::readdir(direntries)) {
+ if (de->d_name[0] != '.') {
+ Path aPath(dirPath + (const char*)de->d_name);
+ struct stat st;
+ if (0 != lstat(aPath.path.c_str(), &st)) {
+ if (S_ISLNK(st.st_mode))
+ continue; // dangling symlink -- ignore
+ return MakeErrMsg(ErrMsg,
+ aPath.path + ": can't determine file object type");
+ }
+ result.insert(aPath);
+ }
+ }
+
+ closedir(direntries);
+ return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+ if (a_path.empty())
+ return false;
+ path = a_path;
+ return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+ if (name.empty())
+ return false;
+ if (!lastIsSlash(path))
+ path += '/';
+ path += name;
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == 0 || slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ if (slashpos == path.size() - 1)
+ slashpos = path.rfind('/',slashpos-1);
+ if (slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ path.erase(slashpos);
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ path.erase(dotpos, path.size()-dotpos);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+ if (access(beg, R_OK | W_OK) == 0)
+ return false;
+
+ if (create_parents) {
+
+ char* c = end;
+
+ for (; c != beg; --c)
+ if (*c == '/') {
+
+ // Recurse to handling the parent directory.
+ *c = '\0';
+ bool x = createDirectoryHelper(beg, c, create_parents);
+ *c = '/';
+
+ // Return if we encountered an error.
+ if (x)
+ return true;
+
+ break;
+ }
+ }
+
+ return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+ // Get a writeable copy of the path name
+ std::string pathname(path);
+
+ // Null-terminate the last component
+ size_t lastchar = path.length() - 1 ;
+
+ if (pathname[lastchar] != '/')
+ ++lastchar;
+
+ pathname[lastchar] = '\0';
+
+ if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+ return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
+
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ if (makeUnique( reuse_current, ErrMsg ))
+ return true;
+
+ // create the file
+ int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ // Get the status so we can determine if it's a file or directory.
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return true;
+ }
+
+ // Note: this check catches strange situations. In all cases, LLVM should
+ // only be involved in the creation and deletion of regular files. This
+ // check ensures that what we're trying to erase is a regular file. It
+ // effectively prevents LLVM from erasing things like /dev/null, any block
+ // special file, or other things that aren't "regular" files.
+ if (S_ISREG(buf.st_mode)) {
+ if (unlink(path.c_str()) != 0)
+ return MakeErrMsg(ErrStr, path + ": can't destroy file");
+ return false;
+ }
+
+ if (!S_ISDIR(buf.st_mode)) {
+ if (ErrStr) *ErrStr = "not a file or directory";
+ return true;
+ }
+
+ if (remove_contents) {
+ // Recursively descend the directory to remove its contents.
+ std::string cmd = "/bin/rm -rf " + path;
+ if (system(cmd.c_str()) != 0) {
+ MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+ return true;
+ }
+ return false;
+ }
+
+ // Otherwise, try to just remove the one directory.
+ std::string pathname(path);
+ size_t lastchar = path.length() - 1;
+ if (pathname[lastchar] == '/')
+ pathname[lastchar] = '\0';
+ else
+ pathname[lastchar+1] = '\0';
+
+ if (rmdir(pathname.c_str()) != 0)
+ return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
+ return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (0 != ::rename(path.c_str(), newName.c_str()))
+ return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+ newName.str() + "'");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+ struct utimbuf utb;
+ utb.actime = si.modTime.toPosixTime();
+ utb.modtime = utb.actime;
+ if (0 != ::utime(path.c_str(),&utb))
+ return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+ if (0 != ::chmod(path.c_str(),si.mode))
+ return MakeErrMsg(ErrStr, path + ": can't set mode");
+ return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+ int inFile = -1;
+ int outFile = -1;
+ inFile = ::open(Src.c_str(), O_RDONLY);
+ if (inFile == -1)
+ return MakeErrMsg(ErrMsg, Src.str() +
+ ": can't open source file to copy");
+
+ outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+ if (outFile == -1) {
+ ::close(inFile);
+ return MakeErrMsg(ErrMsg, Dest.str() +
+ ": can't create destination file for copy");
+ }
+
+ char Buffer[16*1024];
+ while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+ if (Amt == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
+ }
+ } else {
+ char *BufPtr = Buffer;
+ while (Amt) {
+ ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+ if (AmtWritten == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Dest.str() +
+ ": can't write destination file");
+ }
+ } else {
+ Amt -= AmtWritten;
+ BufPtr += AmtWritten;
+ }
+ }
+ }
+ }
+ ::close(inFile);
+ ::close(outFile);
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
+ return false; // File doesn't exist already, just use it!
+
+ // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+ // mktemp or our own implementation.
+ // This uses std::vector instead of SmallVector to avoid a dependence on
+ // libSupport. And performance isn't critical here.
+ std::vector<char> Buf;
+ Buf.resize(path.size()+8);
+ char *FNBuffer = &Buf[0];
+ path.copy(FNBuffer,path.size());
+ bool isdir;
+ if (!fs::is_directory(path, isdir) && isdir)
+ strcpy(FNBuffer+path.size(), "/XXXXXX");
+ else
+ strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+ int TempFD;
+ if ((TempFD = mkstemp(FNBuffer)) == -1)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // We don't need to hold the temp file descriptor... we will trust that no one
+ // will overwrite/delete the file before we can open it again.
+ close(TempFD);
+
+ // Save the name
+ path = FNBuffer;
+
+ // By default mkstemp sets the mode to 0600, so update mode bits now.
+ AddPermissionBits (*this, 0666);
+#elif defined(HAVE_MKTEMP)
+ // If we don't have mkstemp, use the old and obsolete mktemp function.
+ if (mktemp(FNBuffer) == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // Save the name
+ path = FNBuffer;
+#else
+ // Okay, looks like we have to do it all by our lonesome.
+ static unsigned FCounter = 0;
+ // Try to initialize with unique value.
+ if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+ char* pos = strstr(FNBuffer, "XXXXXX");
+ do {
+ if (++FCounter > 0xFFFFFF) {
+ return MakeErrMsg(ErrMsg,
+ path + ": can't make unique filename: too many files");
+ }
+ sprintf(pos, "%06X", FCounter);
+ path = FNBuffer;
+ } while (exists());
+ // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+ // LLVM.
+#endif
+ return false;
+}
+
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+ int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+ Flags |= MAP_FILE;
+#endif
+ void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset);
+ if (BasePtr == MAP_FAILED)
+ return 0;
+ return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) {
+ const void *Addr = static_cast<const void *>(BasePtr);
+ ::munmap(const_cast<void *>(Addr), FileSize);
+}
+
+} // end llvm namespace
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
new file mode 100644
index 000000000000..a3dfd4b0a32d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,695 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/Process.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
+// Both stdio.h and cstdio are included via different pathes and
+// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros
+// either.
+#undef ferror
+#undef feof
+
+// For GNU Hurd
+#if defined(__GNU__) && !defined(PATH_MAX)
+# define PATH_MAX 4096
+#endif
+
+using namespace llvm;
+
+namespace {
+ /// This class automatically closes the given file descriptor when it goes out
+ /// of scope. You can take back explicit ownership of the file descriptor by
+ /// calling take(). The destructor does not verify that close was successful.
+ /// Therefore, never allow this class to call close on a file descriptor that
+ /// has been read from or written to.
+ struct AutoFD {
+ int FileDescriptor;
+
+ AutoFD(int fd) : FileDescriptor(fd) {}
+ ~AutoFD() {
+ if (FileDescriptor >= 0)
+ ::close(FileDescriptor);
+ }
+
+ int take() {
+ int ret = FileDescriptor;
+ FileDescriptor = -1;
+ return ret;
+ }
+
+ operator int() const {return FileDescriptor;}
+ };
+
+ error_code TempDir(SmallVectorImpl<char> &result) {
+ // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+ const char *dir = 0;
+ (dir = std::getenv("TMPDIR" )) ||
+ (dir = std::getenv("TMP" )) ||
+ (dir = std::getenv("TEMP" )) ||
+ (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+ (dir = P_tmpdir) ||
+#endif
+ (dir = "/tmp");
+
+ result.clear();
+ StringRef d(dir);
+ result.append(d.begin(), d.end());
+ return error_code::success();
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+#ifdef MAXPATHLEN
+ result.reserve(MAXPATHLEN);
+#else
+// For GNU Hurd
+ result.reserve(1024);
+#endif
+
+ while (true) {
+ if (::getcwd(result.data(), result.capacity()) == 0) {
+ // See if there was a real error.
+ if (errno != errc::not_enough_memory)
+ return error_code(errno, system_category());
+ // Otherwise there just wasn't enough space.
+ result.reserve(result.capacity() * 2);
+ } else
+ break;
+ }
+
+ result.set_size(strlen(result.data()));
+ return error_code::success();
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ const size_t buf_sz = 32768;
+ char buffer[buf_sz];
+ int from_file = -1, to_file = -1;
+
+ // Open from.
+ if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+ return error_code(errno, system_category());
+ AutoFD from_fd(from_file);
+
+ // Stat from.
+ struct stat from_stat;
+ if (::stat(f.begin(), &from_stat) != 0)
+ return error_code(errno, system_category());
+
+ // Setup to flags.
+ int to_flags = O_CREAT | O_WRONLY;
+ if (copt == copy_option::fail_if_exists)
+ to_flags |= O_EXCL;
+
+ // Open to.
+ if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+ return error_code(errno, system_category());
+ AutoFD to_fd(to_file);
+
+ // Copy!
+ ssize_t sz, sz_read = 1, sz_write;
+ while (sz_read > 0 &&
+ (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+ // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+ // Marc Rochkind, Addison-Wesley, 2004, page 94
+ sz_write = 0;
+ do {
+ if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+ sz_read = sz; // cause read loop termination.
+ break; // error.
+ }
+ sz_write += sz;
+ } while (sz_write < sz_read);
+ }
+
+ // After all the file operations above the return value of close actually
+ // matters.
+ if (::close(from_fd.take()) < 0) sz_read = -1;
+ if (::close(to_fd.take()) < 0) sz_read = -1;
+
+ // Check for errors.
+ if (sz_read < 0)
+ return error_code(errno, system_category());
+
+ return error_code::success();
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+ if (errno != errc::file_exists)
+ return error_code(errno, system_category());
+ existed = true;
+ } else
+ existed = false;
+
+ return error_code::success();
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::link(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return error_code::success();
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::symlink(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return error_code::success();
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::remove(p.begin()) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ existed = false;
+ } else
+ existed = true;
+
+ return error_code::success();
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::rename(f.begin(), t.begin()) == -1) {
+ // If it's a cross device link, copy then delete, otherwise return the error
+ if (errno == EXDEV) {
+ if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+ return ec;
+ bool Existed;
+ if (error_code ec = remove(from, Existed))
+ return ec;
+ } else
+ return error_code(errno, system_category());
+ }
+
+ return error_code::success();
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::truncate(p.begin(), size) == -1)
+ return error_code(errno, system_category());
+
+ return error_code::success();
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::access(p.begin(), F_OK) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ result = false;
+ } else
+ result = true;
+
+ return error_code::success();
+}
+
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.fs_st_dev == B.fs_st_dev &&
+ A.fs_st_ino == B.fs_st_ino;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
+ return error_code::success();
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) == -1)
+ return error_code(errno, system_category());
+ if (!S_ISREG(status.st_mode))
+ return make_error_code(errc::operation_not_permitted);
+
+ result = status.st_size;
+ return error_code::success();
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) != 0) {
+ error_code ec(errno, system_category());
+ if (ec == errc::no_such_file_or_directory)
+ result = file_status(file_type::file_not_found);
+ else
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ perms prms = static_cast<perms>(status.st_mode & perms_mask);
+
+ if (S_ISDIR(status.st_mode))
+ result = file_status(file_type::directory_file, prms);
+ else if (S_ISREG(status.st_mode))
+ result = file_status(file_type::regular_file, prms);
+ else if (S_ISBLK(status.st_mode))
+ result = file_status(file_type::block_file, prms);
+ else if (S_ISCHR(status.st_mode))
+ result = file_status(file_type::character_file, prms);
+ else if (S_ISFIFO(status.st_mode))
+ result = file_status(file_type::fifo_file, prms);
+ else if (S_ISSOCK(status.st_mode))
+ result = file_status(file_type::socket_file, prms);
+ else
+ result = file_status(file_type::type_unknown, prms);
+
+ result.fs_st_dev = status.st_dev;
+ result.fs_st_ino = status.st_ino;
+
+ return error_code::success();
+}
+
+// Modifies permissions on a file.
+error_code permissions(const Twine &path, perms prms) {
+ if ((prms & add_perms) && (prms & remove_perms))
+ llvm_unreachable("add_perms and remove_perms are mutually exclusive");
+
+ // Get current permissions
+ file_status info;
+ if (error_code ec = status(path, info)) {
+ return ec;
+ }
+
+ // Set updated permissions.
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+ perms permsToSet;
+ if (prms & add_perms) {
+ permsToSet = (info.permissions() | prms) & perms_mask;
+ } else if (prms & remove_perms) {
+ permsToSet = (info.permissions() & ~prms) & perms_mask;
+ } else {
+ permsToSet = prms & perms_mask;
+ }
+ if (::chmod(p.begin(), static_cast<mode_t>(permsToSet))) {
+ return error_code(errno, system_category());
+ }
+
+ return error_code::success();
+}
+
+// Since this is most often used for temporary files, mode defaults to 0600.
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path,
+ bool makeAbsolute, unsigned mode) {
+ SmallString<128> Model;
+ model.toVector(Model);
+ // Null terminate.
+ Model.c_str();
+
+ if (makeAbsolute) {
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(Twine(Model));
+ if (!absolute) {
+ SmallString<128> TDir;
+ if (error_code ec = TempDir(TDir)) return ec;
+ path::append(TDir, Twine(Model));
+ Model.swap(TDir);
+ }
+ }
+
+ // From here on, DO NOT modify model. It may be needed if the randomly chosen
+ // path already exists.
+ SmallString<128> RandomPath = Model;
+
+retry_random_path:
+ // Replace '%' with random chars.
+ for (unsigned i = 0, e = Model.size(); i != e; ++i) {
+ if (Model[i] == '%')
+ RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
+ }
+
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
+ // Try to open + create the file.
+rety_open_create:
+ int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode);
+ if (RandomFD == -1) {
+ int SavedErrno = errno;
+ // If the file existed, try again, otherwise, error.
+ if (SavedErrno == errc::file_exists)
+ goto retry_random_path;
+ // If path prefix doesn't exist, try to create it.
+ if (SavedErrno == errc::no_such_file_or_directory &&
+ !exists(path::parent_path(RandomPath)) &&
+ !TriedToCreateParent) {
+ TriedToCreateParent = true;
+ StringRef p(RandomPath);
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // Don't try to create network paths.
+ if (i->size() > 2 && (*i)[0] == '/' &&
+ (*i)[1] == '/' &&
+ (*i)[2] != '/')
+ return make_error_code(errc::no_such_file_or_directory);
+ if (::mkdir(dir_to_create.c_str(), 0700) == -1 &&
+ errno != errc::file_exists)
+ return error_code(errno, system_category());
+ }
+ }
+ goto rety_open_create;
+ }
+
+ return error_code(SavedErrno, system_category());
+ }
+
+ // Make the path absolute.
+ char real_path_buff[PATH_MAX + 1];
+ if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+ int error = errno;
+ ::close(RandomFD);
+ ::unlink(RandomPath.c_str());
+ return error_code(error, system_category());
+ }
+
+ result_path.clear();
+ StringRef d(real_path_buff);
+ result_path.append(d.begin(), d.end());
+
+ result_fd = RandomFD;
+ return error_code::success();
+}
+
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+ AutoFD ScopedFD(FD);
+ if (!CloseFD)
+ ScopedFD.take();
+
+ // Figure out how large the file is.
+ struct stat FileInfo;
+ if (fstat(FD, &FileInfo) == -1)
+ return error_code(errno, system_category());
+ uint64_t FileSize = FileInfo.st_size;
+
+ if (Size == 0)
+ Size = FileSize;
+ else if (FileSize < Size) {
+ // We need to grow the file.
+ if (ftruncate(FD, Size) == -1)
+ return error_code(errno, system_category());
+ }
+
+ int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
+ int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#ifdef MAP_FILE
+ flags |= MAP_FILE;
+#endif
+ Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
+ if (Mapping == MAP_FAILED)
+ return error_code(errno, system_category());
+ return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping() {
+ // Make sure that the requested size fits within SIZE_T.
+ if (length > std::numeric_limits<size_t>::max()) {
+ ec = make_error_code(errc::invalid_argument);
+ return;
+ }
+
+ SmallString<128> path_storage;
+ StringRef name = path.toNullTerminatedStringRef(path_storage);
+ int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
+ int ofd = ::open(name.begin(), oflags);
+ if (ofd == -1) {
+ ec = error_code(errno, system_category());
+ return;
+ }
+
+ ec = init(ofd, true, offset);
+ if (ec)
+ Mapping = 0;
+}
+
+mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping() {
+ // Make sure that the requested size fits within SIZE_T.
+ if (length > std::numeric_limits<size_t>::max()) {
+ ec = make_error_code(errc::invalid_argument);
+ return;
+ }
+
+ ec = init(fd, closefd, offset);
+ if (ec)
+ Mapping = 0;
+}
+
+mapped_file_region::~mapped_file_region() {
+ if (Mapping)
+ ::munmap(Mapping, Size);
+}
+
+#if LLVM_HAS_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+ : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
+ other.Mapping = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Size;
+}
+
+char *mapped_file_region::data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+ return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+ return process::get_self()->page_size();
+}
+
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
+ SmallString<128> path_null(path);
+ DIR *directory = ::opendir(path_null.c_str());
+ if (directory == 0)
+ return error_code(errno, system_category());
+
+ it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+ // Add something for replace_filename to replace.
+ path::append(path_null, ".");
+ it.CurrentEntry = directory_entry(path_null.str());
+ return directory_iterator_increment(it);
+}
+
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
+ if (it.IterationHandle)
+ ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return error_code::success();
+}
+
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+ errno = 0;
+ dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+ if (cur_dir == 0 && errno != 0) {
+ return error_code(errno, system_category());
+ } else if (cur_dir != 0) {
+ StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+ if ((name.size() == 1 && name[0] == '.') ||
+ (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+ return directory_iterator_increment(it);
+ it.CurrentEntry.replace_filename(name);
+ } else
+ return directory_iterator_destruct(it);
+
+ return error_code::success();
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> PathStorage;
+ StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+ result.set_size(0);
+
+ // Open path.
+ std::FILE *file = std::fopen(Path.data(), "rb");
+ if (file == 0)
+ return error_code(errno, system_category());
+
+ // Reserve storage.
+ result.reserve(len);
+
+ // Read magic!
+ size_t size = std::fread(result.data(), 1, len, file);
+ if (std::ferror(file) != 0) {
+ std::fclose(file);
+ return error_code(errno, system_category());
+ } else if (size != result.size()) {
+ if (std::feof(file) != 0) {
+ std::fclose(file);
+ result.set_size(size);
+ return make_error_code(errc::value_too_large);
+ }
+ }
+ std::fclose(file);
+ result.set_size(len);
+ return error_code::success();
+}
+
+error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
+ bool map_writable, void *&result) {
+ SmallString<128> path_storage;
+ StringRef name = path.toNullTerminatedStringRef(path_storage);
+ int oflags = map_writable ? O_RDWR : O_RDONLY;
+ int ofd = ::open(name.begin(), oflags);
+ if ( ofd == -1 )
+ return error_code(errno, system_category());
+ AutoFD fd(ofd);
+ int flags = map_writable ? MAP_SHARED : MAP_PRIVATE;
+ int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ;
+#ifdef MAP_FILE
+ flags |= MAP_FILE;
+#endif
+ result = ::mmap(0, size, prot, flags, fd, file_offset);
+ if (result == MAP_FAILED) {
+ return error_code(errno, system_category());
+ }
+
+ return error_code::success();
+}
+
+error_code unmap_file_pages(void *base, size_t size) {
+ if ( ::munmap(base, size) == -1 )
+ return error_code(errno, system_category());
+
+ return error_code::success();
+}
+
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
new file mode 100644
index 000000000000..9a4454f1c650
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -0,0 +1,343 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/TimeValue.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+// DragonFlyBSD, OpenBSD, and Bitrig have deprecated <malloc.h> for
+// <stdlib.h> instead. Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
+ !defined(__OpenBSD__) && !defined(__Bitrig__)
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+# include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+
+process::id_type self_process::get_id() {
+ return getpid();
+}
+
+static std::pair<TimeValue, TimeValue> getRUsageTimes() {
+#if defined(HAVE_GETRUSAGE)
+ struct rusage RU;
+ ::getrusage(RUSAGE_SELF, &RU);
+ return std::make_pair(
+ TimeValue(
+ static_cast<TimeValue::SecondsType>(RU.ru_utime.tv_sec),
+ static_cast<TimeValue::NanoSecondsType>(
+ RU.ru_utime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)),
+ TimeValue(
+ static_cast<TimeValue::SecondsType>(RU.ru_stime.tv_sec),
+ static_cast<TimeValue::NanoSecondsType>(
+ RU.ru_stime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)));
+#else
+#warning Cannot get usage times on this platform
+ return std::make_pair(TimeValue(), TimeValue());
+#endif
+}
+
+TimeValue self_process::get_user_time() const {
+#if _POSIX_TIMERS > 0 && _POSIX_CPUTIME > 0
+ // Try to get a high resolution CPU timer.
+ struct timespec TS;
+ if (::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &TS) == 0)
+ return TimeValue(static_cast<TimeValue::SecondsType>(TS.tv_sec),
+ static_cast<TimeValue::NanoSecondsType>(TS.tv_nsec));
+#endif
+
+ // Otherwise fall back to rusage based timing.
+ return getRUsageTimes().first;
+}
+
+TimeValue self_process::get_system_time() const {
+ // We can only collect system time by inspecting the results of getrusage.
+ return getRUsageTimes().second;
+}
+
+static unsigned getPageSize() {
+#if defined(__CYGWIN__)
+ // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+ // memory protection and mmap() is 4k.
+ // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+ const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+ const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+ long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+ return static_cast<unsigned>(page_size);
+}
+
+// This constructor guaranteed to be run exactly once on a single thread, and
+// sets up various process invariants that can be queried cheaply from then on.
+self_process::self_process() : PageSize(getPageSize()) {
+}
+
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+ struct mallinfo mi;
+ mi = ::mallinfo();
+ return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+ malloc_statistics_t Stats;
+ malloc_zone_statistics(malloc_default_zone(), &Stats);
+ return Stats.size_in_use; // darwin
+#elif defined(HAVE_SBRK)
+ // Note this is only an approximation and more closely resembles
+ // the value returned by mallinfo in the arena field.
+ static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+ char *EndOfMemory = (char*)sbrk(0);
+ if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+ return EndOfMemory - StartOfMemory;
+ else
+ return 0;
+#else
+#warning Cannot get malloc info on this platform
+ return 0;
+#endif
+}
+
+void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
+ TimeValue &sys_time) {
+ elapsed = TimeValue::now();
+ llvm::tie(user_time, sys_time) = getRUsageTimes();
+}
+
+int Process::GetCurrentUserId() {
+ return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+ return getgid();
+}
+
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+ struct rlimit rlim;
+ rlim.rlim_cur = rlim.rlim_max = 0;
+ setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
+ // Disable crash reporting on Mac OS X 10.0-10.4
+
+ // get information about the original set of exception ports for the task
+ mach_msg_type_number_t Count = 0;
+ exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+ exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+ exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+ kern_return_t err =
+ task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+ &Count, OriginalPorts, OriginalBehaviors,
+ OriginalFlavors);
+ if (err == KERN_SUCCESS) {
+ // replace each with MACH_PORT_NULL.
+ for (unsigned i = 0; i != Count; ++i)
+ task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+ MACH_PORT_NULL, OriginalBehaviors[i],
+ OriginalFlavors[i]);
+ }
+
+ // Disable crash reporting on Mac OS X 10.5
+ signal(SIGABRT, _exit);
+ signal(SIGILL, _exit);
+ signal(SIGFPE, _exit);
+ signal(SIGSEGV, _exit);
+ signal(SIGBUS, _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+ return FileDescriptorIsDisplayed(STDIN_FILENO);
+}
+
+bool Process::StandardOutIsDisplayed() {
+ return FileDescriptorIsDisplayed(STDOUT_FILENO);
+}
+
+bool Process::StandardErrIsDisplayed() {
+ return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+#if HAVE_ISATTY
+ return isatty(fd);
+#else
+ // If we don't have isatty, just return false.
+ return false;
+#endif
+}
+
+static unsigned getColumns(int FileID) {
+ // If COLUMNS is defined in the environment, wrap to that many columns.
+ if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+ int Columns = std::atoi(ColumnsStr);
+ if (Columns > 0)
+ return Columns;
+ }
+
+ unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+ // Try to determine the width of the terminal.
+ struct winsize ws;
+ // Zero-fill ws to avoid a false positive from MemorySanitizer.
+ memset(&ws, 0, sizeof(ws));
+ if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+ Columns = ws.ws_col;
+#endif
+
+ return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+ if (!StandardOutIsDisplayed())
+ return 0;
+
+ return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+ if (!StandardErrIsDisplayed())
+ return 0;
+
+ return getColumns(2);
+}
+
+static bool terminalHasColors() {
+ if (const char *term = std::getenv("TERM")) {
+ // Most modern terminals support ANSI escape sequences for colors.
+ // We could check terminfo, or have a list of known terms that support
+ // colors, but that would be overkill.
+ // The user can always ask for no colors by setting TERM to dumb, or
+ // using a commandline flag.
+ return strcmp(term, "dumb") != 0;
+ }
+ return false;
+}
+
+bool Process::FileDescriptorHasColors(int fd) {
+ // A file descriptor has colors if it is displayed and the terminal has
+ // colors.
+ return FileDescriptorIsDisplayed(fd) && terminalHasColors();
+}
+
+bool Process::StandardOutHasColors() {
+ return FileDescriptorHasColors(STDOUT_FILENO);
+}
+
+bool Process::StandardErrHasColors() {
+ return FileDescriptorHasColors(STDERR_FILENO);
+}
+
+bool Process::ColorNeedsFlush() {
+ // No, we use ANSI escape sequences.
+ return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+ COLOR(FGBG, "0", BOLD),\
+ COLOR(FGBG, "1", BOLD),\
+ COLOR(FGBG, "2", BOLD),\
+ COLOR(FGBG, "3", BOLD),\
+ COLOR(FGBG, "4", BOLD),\
+ COLOR(FGBG, "5", BOLD),\
+ COLOR(FGBG, "6", BOLD),\
+ COLOR(FGBG, "7", BOLD)\
+ }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+ return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+ return "\033[1m";
+}
+
+const char *Process::OutputReverse() {
+ return "\033[7m";
+}
+
+const char *Process::ResetColor() {
+ return "\033[0m";
+}
+
+#if !defined(HAVE_ARC4RANDOM)
+static unsigned GetRandomNumberSeed() {
+ // Attempt to get the initial seed from /dev/urandom, if possible.
+ if (FILE *RandomSource = ::fopen("/dev/urandom", "r")) {
+ unsigned seed;
+ int count = ::fread((void *)&seed, sizeof(seed), 1, RandomSource);
+ ::fclose(RandomSource);
+
+ // Return the seed if the read was successful.
+ if (count == 1)
+ return seed;
+ }
+
+ // Otherwise, swizzle the current time and the process ID to form a reasonable
+ // seed.
+ TimeValue Now = TimeValue::now();
+ return hash_combine(Now.seconds(), Now.nanoseconds(), ::getpid());
+}
+#endif
+
+unsigned llvm::sys::Process::GetRandomNumber() {
+#if defined(HAVE_ARC4RANDOM)
+ return arc4random();
+#else
+ static int x = (::srand(GetRandomNumberSeed()), 0);
+ (void)x;
+ return ::rand();
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
new file mode 100644
index 000000000000..117151c91d8b
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -0,0 +1,412 @@
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FileSystem.h"
+#include <llvm/Config/config.h>
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+ extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ // Use the given path verbatim if it contains any slashes; this matches
+ // the behavior of sh(1) and friends.
+ if (progName.find('/') != std::string::npos)
+ return temp;
+
+ // At this point, the file name is valid and does not contain slashes. Search
+ // for it through the directories specified in the PATH environment variable.
+
+ // Get the path. If its empty, we can't do anything to find it.
+ const char *PathStr = getenv("PATH");
+ if (PathStr == 0)
+ return Path();
+
+ // Now we have a colon separated list of directories to search; try them.
+ size_t PathLen = strlen(PathStr);
+ while (PathLen) {
+ // Find the first colon...
+ const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+ // Check to see if this first directory contains the executable...
+ Path FilePath;
+ if (FilePath.set(std::string(PathStr,Colon))) {
+ FilePath.appendComponent(progName);
+ if (FilePath.canExecute())
+ return FilePath; // Found the executable!
+ }
+
+ // Nope it wasn't in this directory, check the next path in the list!
+ PathLen -= Colon-PathStr;
+ PathStr = Colon;
+
+ // Advance past duplicate colons
+ while (*PathStr == ':') {
+ PathStr++;
+ PathLen--;
+ }
+ }
+ return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+ if (Path == 0) // Noop
+ return false;
+ const char *File;
+ if (Path->isEmpty())
+ // Redirect empty paths to /dev/null
+ File = "/dev/null";
+ else
+ File = Path->c_str();
+
+ // Open the file
+ int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+ if (InFD == -1) {
+ MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
+ + (FD == 0 ? "input" : "output"));
+ return true;
+ }
+
+ // Install it as the requested FD
+ if (dup2(InFD, FD) == -1) {
+ MakeErrMsg(ErrMsg, "Cannot dup2");
+ close(InFD);
+ return true;
+ }
+ close(InFD); // Close the original FD
+ return false;
+}
+
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+ posix_spawn_file_actions_t *FileActions) {
+ if (Path == 0) // Noop
+ return false;
+ const char *File;
+ if (Path->isEmpty())
+ // Redirect empty paths to /dev/null
+ File = "/dev/null";
+ else
+ File = Path->c_str();
+
+ if (int Err = posix_spawn_file_actions_addopen(FileActions, FD,
+ File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+ return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+ return false;
+}
+#endif
+
+static void TimeOutHandler(int Sig) {
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
+ struct rlimit r;
+ __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+ // Heap size
+ getrlimit (RLIMIT_DATA, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+ // Resident set size.
+ getrlimit (RLIMIT_RSS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it.
+ // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb
+ // of virtual memory for shadow memory mapping.
+#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD
+ // Virtual memory.
+ getrlimit (RLIMIT_AS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+#endif
+}
+
+bool
+Program::Execute(const Path &path, const char **args, const char **envp,
+ const Path **redirects, unsigned memoryLimit,
+ std::string *ErrMsg) {
+ // If this OS has posix_spawn and there is no memory limit being implied, use
+ // posix_spawn. It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+ if (memoryLimit == 0) {
+ posix_spawn_file_actions_t FileActionsStore;
+ posix_spawn_file_actions_t *FileActions = 0;
+
+ if (redirects) {
+ FileActions = &FileActionsStore;
+ posix_spawn_file_actions_init(FileActions);
+
+ // Redirect stdin/stdout.
+ if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+ RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+ return false;
+ if (redirects[1] == 0 || redirects[2] == 0 ||
+ *redirects[1] != *redirects[2]) {
+ // Just redirect stderr
+ if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+ } else {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the FD already open for stdout.
+ if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2))
+ return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+ }
+ }
+
+ if (!envp)
+#if !defined(__APPLE__)
+ envp = const_cast<const char **>(environ);
+#else
+ // environ is missing in dylibs.
+ envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+ // Explicitly initialized to prevent what appears to be a valgrind false
+ // positive.
+ pid_t PID = 0;
+ int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0,
+ const_cast<char **>(args), const_cast<char **>(envp));
+
+ if (FileActions)
+ posix_spawn_file_actions_destroy(FileActions);
+
+ if (Err)
+ return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+
+ Data_ = reinterpret_cast<void*>(PID);
+ return true;
+ }
+#endif
+
+ // Create a child process.
+ int child = fork();
+ switch (child) {
+ // An error occurred: Return to the caller.
+ case -1:
+ MakeErrMsg(ErrMsg, "Couldn't fork");
+ return false;
+
+ // Child process: Execute the program.
+ case 0: {
+ // Redirect file descriptors...
+ if (redirects) {
+ // Redirect stdin
+ if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
+ // Redirect stdout
+ if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+ if (redirects[1] && redirects[2] &&
+ *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the FD already open for stdout.
+ if (-1 == dup2(1,2)) {
+ MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+ return false;
+ }
+ } else {
+ // Just redirect stderr
+ if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
+ }
+ }
+
+ // Set memory limits
+ if (memoryLimit!=0) {
+ SetMemoryLimits(memoryLimit);
+ }
+
+ // Execute!
+ if (envp != 0)
+ execve(path.c_str(),
+ const_cast<char **>(args),
+ const_cast<char **>(envp));
+ else
+ execv(path.c_str(),
+ const_cast<char **>(args));
+ // If the execve() failed, we should exit. Follow Unix protocol and
+ // return 127 if the executable was not found, and 126 otherwise.
+ // Use _exit rather than exit so that atexit functions and static
+ // object destructors cloned from the parent process aren't
+ // redundantly run, and so that any data buffered in stdio buffers
+ // cloned from the parent aren't redundantly written out.
+ _exit(errno == ENOENT ? 127 : 126);
+ }
+
+ // Parent process: Break out of the switch to do our processing.
+ default:
+ break;
+ }
+
+ Data_ = reinterpret_cast<void*>(child);
+
+ return true;
+}
+
+int
+Program::Wait(const sys::Path &path,
+ unsigned secondsToWait,
+ std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
+ struct sigaction Act, Old;
+
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return -1;
+ }
+
+ // Install a timeout handler. The handler itself does nothing, but the simple
+ // fact of having a handler at all causes the wait below to return with EINTR,
+ // unlike if we used SIG_IGN.
+ if (secondsToWait) {
+ memset(&Act, 0, sizeof(Act));
+ Act.sa_handler = TimeOutHandler;
+ sigemptyset(&Act.sa_mask);
+ sigaction(SIGALRM, &Act, &Old);
+ alarm(secondsToWait);
+ }
+
+ // Parent process: Wait for the child process to terminate.
+ int status;
+ uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+ pid_t child = static_cast<pid_t>(pid);
+ while (waitpid(pid, &status, 0) != child)
+ if (secondsToWait && errno == EINTR) {
+ // Kill the child.
+ kill(child, SIGKILL);
+
+ // Turn off the alarm and restore the signal handler
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+
+ // Wait for child to die
+ if (wait(&status) != child)
+ MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+ else
+ MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+ return -2; // Timeout detected
+ } else if (errno != EINTR) {
+ MakeErrMsg(ErrMsg, "Error waiting for child process");
+ return -1;
+ }
+
+ // We exited normally without timeout, so turn off the timer.
+ if (secondsToWait) {
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+ }
+
+ // Return the proper exit status. Detect error conditions
+ // so we can return -1 for them and set ErrMsg informatively.
+ int result = 0;
+ if (WIFEXITED(status)) {
+ result = WEXITSTATUS(status);
+#ifdef HAVE_POSIX_SPAWN
+ // The posix_spawn child process returns 127 on any kind of error.
+ // Following the POSIX convention for command-line tools (which posix_spawn
+ // itself apparently does not), check to see if the failure was due to some
+ // reason other than the file not existing, and return 126 in this case.
+ bool Exists;
+ if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+ result = 126;
+#endif
+ if (result == 127) {
+ if (ErrMsg)
+ *ErrMsg = llvm::sys::StrError(ENOENT);
+ return -1;
+ }
+ if (result == 126) {
+ if (ErrMsg)
+ *ErrMsg = "Program could not be executed";
+ return -1;
+ }
+ } else if (WIFSIGNALED(status)) {
+ if (ErrMsg) {
+ *ErrMsg = strsignal(WTERMSIG(status));
+#ifdef WCOREDUMP
+ if (WCOREDUMP(status))
+ *ErrMsg += " (core dumped)";
+#endif
+ }
+ // Return a special value to indicate that the process received an unhandled
+ // signal during execution as opposed to failing to execute.
+ return -2;
+ }
+ return result;
+#else
+ if (ErrMsg)
+ *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+ return -1;
+#endif
+}
+
+error_code Program::ChangeStdinToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return make_error_code(errc::success);
+}
+
+error_code Program::ChangeStdoutToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return make_error_code(errc::success);
+}
+
+error_code Program::ChangeStderrToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return make_error_code(errc::success);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/README.txt b/contrib/llvm/lib/Support/Unix/README.txt
new file mode 100644
index 000000000000..3d547c2990d5
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix - only code that is truly generic to all UNIX platforms
+ Posix - code that is specific to Posix variants of UNIX
+ SUS - code that is specific to the Single Unix Specification
+ SysV - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/contrib/llvm/lib/Support/Unix/RWMutex.inc b/contrib/llvm/lib/Support/Unix/RWMutex.inc
new file mode 100644
index 000000000000..40e87ff13111
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
new file mode 100644
index 000000000000..66338f17d88f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -0,0 +1,377 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+#if HAVE_EXECINFO_H
+# include <execinfo.h> // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+#if HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig); // defined below.
+
+static SmartMutex<true> SignalsMutex;
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<std::string> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+
+// IntSigs - Signals that represent requested termination. There's no bug
+// or failure, or if there is, it's not our direct responsibility. For whatever
+// reason, our continued execution is no longer desirable.
+static const int IntSigs[] = {
+ SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+ IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that represent that we have a bug, and our prompt
+// termination has been ordered.
+static const int KillSigs[] = {
+ SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
+#ifdef SIGSYS
+ , SIGSYS
+#endif
+#ifdef SIGXCPU
+ , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+ , SIGXFSZ
+#endif
+#ifdef SIGEMT
+ , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+ KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+ struct sigaction SA;
+ int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+ assert(NumRegisteredSignals <
+ sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+ "Out of space for signal handlers!");
+
+ struct sigaction NewHandler;
+
+ NewHandler.sa_handler = SignalHandler;
+ NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+ sigemptyset(&NewHandler.sa_mask);
+
+ // Install the new handler, save the old one in RegisteredSignalInfo.
+ sigaction(Signal, &NewHandler,
+ &RegisteredSignalInfo[NumRegisteredSignals].SA);
+ RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+ ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+ // If the handlers are already registered, we're done.
+ if (NumRegisteredSignals != 0) return;
+
+ std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+ std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+ // Restore all of the signal handlers to how they were before we showed up.
+ for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+ sigaction(RegisteredSignalInfo[i].SigNo,
+ &RegisteredSignalInfo[i].SA, 0);
+ NumRegisteredSignals = 0;
+}
+
+
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+/// NB: This must be an async signal safe function. It cannot allocate or free
+/// memory, even in debug builds.
+static void RemoveFilesToRemove() {
+ // We avoid iterators in case of debug iterators that allocate or release
+ // memory.
+ for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) {
+ // We rely on a std::string implementation for which repeated calls to
+ // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
+ // strings to try to ensure this is safe.
+ const char *path = FilesToRemove[i].c_str();
+
+ // Get the status so we can determine if it's a file or directory. If we
+ // can't stat the file, ignore it.
+ struct stat buf;
+ if (stat(path, &buf) != 0)
+ continue;
+
+ // If this is not a regular file, ignore it. We want to prevent removal of
+ // special files like /dev/null, even if the compiler is being run with the
+ // super-user permissions.
+ if (!S_ISREG(buf.st_mode))
+ continue;
+
+ // Otherwise, remove the file. We ignore any errors here as there is nothing
+ // else we can do.
+ unlink(path);
+ }
+}
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+ // Restore the signal behavior to default, so that the program actually
+ // crashes when we return and the signal reissues. This also ensures that if
+ // we crash in our signal handler that the program will terminate immediately
+ // instead of recursing in the signal handler.
+ UnregisterHandlers();
+
+ // Unmask all potentially blocked kill signals.
+ sigset_t SigMask;
+ sigfillset(&SigMask);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ SignalsMutex.acquire();
+ RemoveFilesToRemove();
+
+ if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+ if (InterruptFunction) {
+ void (*IF)() = InterruptFunction;
+ SignalsMutex.release();
+ InterruptFunction = 0;
+ IF(); // run the interrupt function.
+ return;
+ }
+
+ SignalsMutex.release();
+ raise(Sig); // Execute the default handler.
+ return;
+ }
+
+ SignalsMutex.release();
+
+ // Otherwise if it is a fault (like SEGV) run any handler.
+ for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+ CallBacksToRun[i].first(CallBacksToRun[i].second);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+ SignalsMutex.acquire();
+ RemoveFilesToRemove();
+ SignalsMutex.release();
+}
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+ SignalsMutex.acquire();
+ InterruptFunction = IF;
+ SignalsMutex.release();
+ RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+ std::string* ErrMsg) {
+ SignalsMutex.acquire();
+ std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0];
+ FilesToRemove.push_back(Filename.str());
+
+ // We want to call 'c_str()' on every std::string in this vector so that if
+ // the underlying implementation requires a re-allocation, it happens here
+ // rather than inside of the signal handler. If we see the vector grow, we
+ // have to call it on every entry. If it remains in place, we only need to
+ // call it on the latest one.
+ if (OldPtr == &FilesToRemove[0])
+ FilesToRemove.back().c_str();
+ else
+ for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i)
+ FilesToRemove[i].c_str();
+
+ SignalsMutex.release();
+
+ RegisterHandlers();
+ return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ SignalsMutex.acquire();
+ std::vector<std::string>::reverse_iterator RI =
+ std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename.str());
+ std::vector<std::string>::iterator I = FilesToRemove.end();
+ if (RI != FilesToRemove.rend())
+ I = FilesToRemove.erase(RI.base()-1);
+
+ // We need to call c_str() on every element which would have been moved by
+ // the erase. These elements, in a C++98 implementation where c_str()
+ // requires a reallocation on the first call may have had the call to c_str()
+ // made on insertion become invalid by being copied down an element.
+ for (std::vector<std::string>::iterator E = FilesToRemove.end(); I != E; ++I)
+ I->c_str();
+
+ SignalsMutex.release();
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+void llvm::sys::PrintStackTrace(FILE *FD) {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+ static void* StackTrace[256];
+ // Use backtrace() to output a backtrace on Linux systems with glibc.
+ int depth = backtrace(StackTrace,
+ static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+ int width = 0;
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+
+ int nwidth;
+ if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+ else nwidth = strlen(name) - 1;
+
+ if (nwidth > width) width = nwidth;
+ }
+
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+
+ fprintf(FD, "%-2d", i);
+
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+ if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
+ else fprintf(FD, " %-*s", width, name+1);
+
+ fprintf(FD, " %#0*lx",
+ (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+ if (dlinfo.dli_sname != NULL) {
+ int res;
+ fputc(' ', FD);
+ char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+ if (d == NULL) fputs(dlinfo.dli_sname, FD);
+ else fputs(d, FD);
+ free(d);
+
+ // FIXME: When we move to C++11, use %t length modifier. It's not in
+ // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
+ // the stack offset for a stack dump isn't likely to cause any problems.
+ fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]-
+ (char*)dlinfo.dli_saddr));
+ }
+ fputc('\n', FD);
+ }
+#else
+ backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+static void PrintStackTraceSignalHandler(void *) {
+ PrintStackTrace(stderr);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+ AddSignalHandler(PrintStackTraceSignalHandler, 0);
+
+#if defined(__APPLE__)
+ // Environment variable to disable any kind of crash dialog.
+ if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+ mach_port_t self = mach_task_self();
+
+ exception_mask_t mask = EXC_MASK_CRASH;
+
+ kern_return_t ret = task_set_exception_ports(self,
+ mask,
+ MACH_PORT_NULL,
+ EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES,
+ THREAD_STATE_NONE);
+ (void)ret;
+ }
+#endif
+}
+
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+#include <signal.h>
+#include <pthread.h>
+
+int raise(int sig) {
+ return pthread_kill(pthread_self(), sig);
+}
+
+void __assert_rtn(const char *func,
+ const char *file,
+ int line,
+ const char *expr) {
+ if (func)
+ fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+ expr, func, file, line);
+ else
+ fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+ expr, file, line);
+ abort();
+}
+
+void abort() {
+ raise(SIGABRT);
+ usleep(1000);
+ __builtin_trap();
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
new file mode 100644
index 000000000000..2b4c9017cd91
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -0,0 +1,26 @@
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
+}
diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
new file mode 100644
index 000000000000..df8558bf8bed
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc
@@ -0,0 +1,57 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+ using namespace sys;
+
+std::string TimeValue::str() const {
+ char buffer[32];
+
+ time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+ asctime_r(localtime(&ourTime), buffer);
+#else
+ ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+ std::string result(buffer);
+ return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+ struct timeval the_time;
+ timerclear(&the_time);
+ if (0 != ::gettimeofday(&the_time,0)) {
+ // This is *really* unlikely to occur because the only gettimeofday
+ // errors concern the timezone parameter which we're passing in as 0.
+ // In the unlikely case it does happen, just return MinTime, no error
+ // message needed.
+ return MinTime;
+ }
+
+ return TimeValue(
+ static_cast<TimeValue::SecondsType>( the_time.tv_sec +
+ PosixZeroTimeSeconds ),
+ static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+ NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
new file mode 100644
index 000000000000..051f56f96922
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -0,0 +1,81 @@
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/Errno.h"
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+#endif
+#include <time.h>
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+ std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+ if (!ErrMsg)
+ return true;
+ if (errnum == -1)
+ errnum = errno;
+ *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
+ return true;
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/Watchdog.inc b/contrib/llvm/lib/Support/Unix/Watchdog.inc
new file mode 100644
index 000000000000..5d89c0e51b11
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Watchdog.inc
@@ -0,0 +1,32 @@
+//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+namespace llvm {
+ namespace sys {
+ Watchdog::Watchdog(unsigned int seconds) {
+#ifdef HAVE_UNISTD_H
+ alarm(seconds);
+#endif
+ }
+
+ Watchdog::~Watchdog() {
+#ifdef HAVE_UNISTD_H
+ alarm(0);
+#endif
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Support/Unix/system_error.inc b/contrib/llvm/lib/Support/Unix/system_error.inc
new file mode 100644
index 000000000000..681e919edb4e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return error_condition(ev, system_category());
+#endif // ELAST
+ return error_condition(ev, generic_category());
+}
diff --git a/contrib/llvm/lib/Support/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
new file mode 100644
index 000000000000..2b250a357758
--- /dev/null
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -0,0 +1,67 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+// defined. If we have valgrind.h but valgrind isn't running, its macros are
+// no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+ return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized. If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+ if (NotUnderValgrind)
+ return false;
+ return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+ if (NotUnderValgrind)
+ return;
+
+ VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+ return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif // !HAVE_VALGRIND_VALGRIND_H
+
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
+// These functions require no implementation, tsan just looks at the arguments
+// they're called with.
+extern "C" {
+void AnnotateHappensBefore(const char *file, int line,
+ const volatile void *cv) {}
+void AnnotateHappensAfter(const char *file, int line,
+ const volatile void *cv) {}
+void AnnotateIgnoreWritesBegin(const char *file, int line) {}
+void AnnotateIgnoreWritesEnd(const char *file, int line) {}
+}
+#endif
diff --git a/contrib/llvm/lib/Support/Watchdog.cpp b/contrib/llvm/lib/Support/Watchdog.cpp
new file mode 100644
index 000000000000..724aa001f16e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Watchdog.cpp
@@ -0,0 +1,23 @@
+//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Watchdog.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Watchdog.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
new file mode 100644
index 000000000000..83da82a949ca
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
@@ -0,0 +1,163 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+ #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static DenseSet<HMODULE> *OpenedHandles;
+
+extern "C" {
+
+ static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName,
+ ULONG_PTR ModuleBase,
+ ULONG ModuleSize,
+ PVOID UserContext)
+ {
+ // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded
+ // into the process.
+ if (stricmp(ModuleName, "msvci70") != 0 &&
+ stricmp(ModuleName, "msvcirt") != 0 &&
+ stricmp(ModuleName, "msvcp50") != 0 &&
+ stricmp(ModuleName, "msvcp60") != 0 &&
+ stricmp(ModuleName, "msvcp70") != 0 &&
+ stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+ // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+ // Otherwise, user should be aware, what he's doing :)
+ stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+ stricmp(ModuleName, "msvcrt20") != 0 &&
+ stricmp(ModuleName, "msvcrt40") != 0) {
+ OpenedHandles->insert((HMODULE)ModuleBase);
+ }
+ return TRUE;
+ }
+}
+
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ SmartScopedLock<true> lock(getMutex());
+
+ if (!filename) {
+ // When no file is specified, enumerate all DLLs and EXEs in the process.
+ if (OpenedHandles == 0)
+ OpenedHandles = new DenseSet<HMODULE>();
+
+ EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+ // Dummy library that represents "search all handles".
+ // This is mostly to ensure that the return value still shows up as "valid".
+ return DynamicLibrary(&OpenedHandles);
+ }
+
+ HMODULE a_handle = LoadLibrary(filename);
+
+ if (a_handle == 0) {
+ MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
+ return DynamicLibrary();
+ }
+
+ if (OpenedHandles == 0)
+ OpenedHandles = new DenseSet<HMODULE>();
+
+ // If we've already loaded this library, FreeLibrary() the handle in order to
+ // keep the internal refcount at +1.
+ if (!OpenedHandles->insert(a_handle).second)
+ FreeLibrary(a_handle);
+
+ return DynamicLibrary(a_handle);
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#define EXPLICIT_SYMBOL(SYM) \
+ extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ SmartScopedLock<true> Lock(getMutex());
+
+ // First check symbols added via AddSymbol().
+ if (ExplicitSymbols) {
+ StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
+
+ if (i != ExplicitSymbols->end())
+ return i->second;
+ }
+
+ // Now search the libraries.
+ if (OpenedHandles) {
+ for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(),
+ E = OpenedHandles->end(); I != E; ++I) {
+ FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+ if (ptr) {
+ return (void *)(intptr_t)ptr;
+ }
+ }
+ }
+
+ #define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+ #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
+ if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
+ {
+ #include "explicit_symbols.inc"
+ }
+
+ #undef EXPLICIT_SYMBOL
+ #undef EXPLICIT_SYMBOL2
+
+ return 0;
+}
+
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ if (!isValid())
+ return NULL;
+ if (Data == &OpenedHandles)
+ return SearchForAddressOfSymbol(symbolName);
+ return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName);
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Host.inc b/contrib/llvm/lib/Support/Windows/Host.inc
new file mode 100644
index 000000000000..2e6d6f190370
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Host.inc
@@ -0,0 +1,22 @@
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getDefaultTargetTriple() {
+ return LLVM_DEFAULT_TARGET_TRIPLE;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
new file mode 100644
index 000000000000..4c5aebd5e71a
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -0,0 +1,235 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
+
+// The Windows.h header must be the last one included.
+#include "Windows.h"
+
+namespace {
+
+DWORD getWindowsProtectionFlags(unsigned Flags) {
+ switch (Flags) {
+ // Contrary to what you might expect, the Windows page protection flags
+ // are not a bitwise combination of RWX values
+ case llvm::sys::Memory::MF_READ:
+ return PAGE_READONLY;
+ case llvm::sys::Memory::MF_WRITE:
+ // Note: PAGE_WRITE is not supported by VirtualProtect
+ return PAGE_READWRITE;
+ case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE:
+ return PAGE_READWRITE;
+ case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE_READ;
+ case llvm::sys::Memory::MF_READ |
+ llvm::sys::Memory::MF_WRITE |
+ llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE_READWRITE;
+ case llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE;
+ default:
+ llvm_unreachable("Illegal memory protection flag specified!");
+ }
+ // Provide a default return value as required by some compilers.
+ return PAGE_NOACCESS;
+}
+
+size_t getAllocationGranularity() {
+ SYSTEM_INFO Info;
+ ::GetSystemInfo(&Info);
+ if (Info.dwPageSize > Info.dwAllocationGranularity)
+ return Info.dwPageSize;
+ else
+ return Info.dwAllocationGranularity;
+}
+
+} // namespace
+
+namespace llvm {
+namespace sys {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
+ const MemoryBlock *const NearBlock,
+ unsigned Flags,
+ error_code &EC) {
+ EC = error_code::success();
+ if (NumBytes == 0)
+ return MemoryBlock();
+
+ // While we'd be happy to allocate single pages, the Windows allocation
+ // granularity may be larger than a single page (in practice, it is 64K)
+ // so mapping less than that will create an unreachable fragment of memory.
+ static const size_t Granularity = getAllocationGranularity();
+ const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity;
+
+ uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
+ NearBlock->size()
+ : NULL;
+
+ // If the requested address is not aligned to the allocation granularity,
+ // round up to get beyond NearBlock. VirtualAlloc would have rounded down.
+ if (Start && Start % Granularity != 0)
+ Start += Granularity - Start % Granularity;
+
+ DWORD Protect = getWindowsProtectionFlags(Flags);
+
+ void *PA = ::VirtualAlloc(reinterpret_cast<void*>(Start),
+ NumBlocks*Granularity,
+ MEM_RESERVE | MEM_COMMIT, Protect);
+ if (PA == NULL) {
+ if (NearBlock) {
+ // Try again without the NearBlock hint
+ return allocateMappedMemory(NumBytes, NULL, Flags, EC);
+ }
+ EC = error_code(::GetLastError(), system_category());
+ return MemoryBlock();
+ }
+
+ MemoryBlock Result;
+ Result.Address = PA;
+ Result.Size = NumBlocks*Granularity;
+ ;
+ if (Flags & MF_EXEC)
+ Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+
+ return Result;
+}
+
+error_code Memory::releaseMappedMemory(MemoryBlock &M) {
+ if (M.Address == 0 || M.Size == 0)
+ return error_code::success();
+
+ if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+ return error_code(::GetLastError(), system_category());
+
+ M.Address = 0;
+ M.Size = 0;
+
+ return error_code::success();
+}
+
+error_code Memory::protectMappedMemory(const MemoryBlock &M,
+ unsigned Flags) {
+ if (M.Address == 0 || M.Size == 0)
+ return error_code::success();
+
+ DWORD Protect = getWindowsProtectionFlags(Flags);
+
+ DWORD OldFlags;
+ if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags))
+ return error_code(::GetLastError(), system_category());
+
+ if (Flags & MF_EXEC)
+ Memory::InvalidateInstructionCache(M.Address, M.Size);
+
+ return error_code::success();
+}
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void Memory::InvalidateInstructionCache(
+ const void *Addr, size_t Len) {
+ FlushInstructionCache(GetCurrentProcess(), Addr, Len);
+}
+
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+ const MemoryBlock *NearBlock,
+ std::string *ErrMsg) {
+ MemoryBlock MB;
+ error_code EC;
+ MB = allocateMappedMemory(NumBytes, NearBlock,
+ MF_READ|MF_WRITE|MF_EXEC, EC);
+ if (EC != error_code::success() && ErrMsg) {
+ MakeErrMsg(ErrMsg, EC.message());
+ }
+ return MB;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ error_code EC = releaseMappedMemory(M);
+ if (EC == error_code::success())
+ return false;
+ MakeErrMsg(ErrMsg, EC.message());
+ return true;
+}
+
+static DWORD getProtection(const void *addr) {
+ MEMORY_BASIC_INFORMATION info;
+ if (sizeof(info) == ::VirtualQuery(addr, &info, sizeof(info))) {
+ return info.Protect;
+ }
+ return 0;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+ if (!setRangeWritable(M.Address, M.Size)) {
+ return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: ");
+ }
+ return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+ if (!setRangeExecutable(M.Address, M.Size)) {
+ return MakeErrMsg(ErrMsg, "Cannot set memory to executable: ");
+ }
+ return true;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+ DWORD prot = getProtection(Addr);
+ if (!prot)
+ return false;
+
+ if (prot == PAGE_EXECUTE || prot == PAGE_EXECUTE_READ) {
+ prot = PAGE_EXECUTE_READWRITE;
+ } else if (prot == PAGE_NOACCESS || prot == PAGE_READONLY) {
+ prot = PAGE_READWRITE;
+ }
+
+ DWORD oldProt;
+ Memory::InvalidateInstructionCache(Addr, Size);
+ return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
+ == TRUE;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+ DWORD prot = getProtection(Addr);
+ if (!prot)
+ return false;
+
+ if (prot == PAGE_NOACCESS) {
+ prot = PAGE_EXECUTE;
+ } else if (prot == PAGE_READONLY) {
+ prot = PAGE_EXECUTE_READ;
+ } else if (prot == PAGE_READWRITE) {
+ prot = PAGE_EXECUTE_READWRITE;
+ }
+
+ DWORD oldProt;
+ Memory::InvalidateInstructionCache(Addr, Size);
+ return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
+ == TRUE;
+}
+
+} // namespace sys
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Windows/Mutex.inc b/contrib/llvm/lib/Support/Windows/Mutex.inc
new file mode 100644
index 000000000000..583dc6359a16
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+MutexImpl::MutexImpl(bool /*recursive*/)
+{
+ data_ = new CRITICAL_SECTION;
+ InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+MutexImpl::~MutexImpl()
+{
+ DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+ delete (LPCRITICAL_SECTION)data_;
+ data_ = 0;
+}
+
+bool
+MutexImpl::acquire()
+{
+ EnterCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+MutexImpl::release()
+{
+ LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+ return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
new file mode 100644
index 000000000000..f4898e619abf
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -0,0 +1,926 @@
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <malloc.h>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes. As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory. Any other use of a trailing slash is stripped. Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+ for (size_t i = 0; i < s.size(); i++)
+ if (s[i] == '\\')
+ s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+
+const char PathSeparator = ';';
+
+StringRef Path::GetEXESuffix() {
+ return "exe";
+}
+
+Path::Path(llvm::StringRef p)
+ : path(p) {
+ FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {
+ FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(StringRef that) {
+ path.assign(that.data(), that.size());
+ FlipBackSlashes(path);
+ return *this;
+}
+
+bool
+Path::isValid() const {
+ if (path.empty())
+ return false;
+
+ size_t len = path.size();
+ // If there is a null character, it and all its successors are ignored.
+ size_t pos = path.find_first_of('\0');
+ if (pos != std::string::npos)
+ len = pos;
+
+ // If there is a colon, it must be the second character, preceded by a letter
+ // and followed by something.
+ pos = path.rfind(':',len);
+ size_t rootslash = 0;
+ if (pos != std::string::npos) {
+ if (pos != 1 || !isalpha(static_cast<unsigned char>(path[0])) || len < 3)
+ return false;
+ rootslash = 2;
+ }
+
+ // Look for a UNC path, and if found adjust our notion of the root slash.
+ if (len > 3 && path[0] == '/' && path[1] == '/') {
+ rootslash = path.find('/', 2);
+ if (rootslash == std::string::npos)
+ rootslash = 0;
+ }
+
+ // Check for illegal characters.
+ if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+ "\013\014\015\016\017\020\021\022\023\024\025\026"
+ "\027\030\031\032\033\034\035\036\037")
+ != std::string::npos)
+ return false;
+
+ // Remove trailing slash, unless it's a root slash.
+ if (len > rootslash+1 && path[len-1] == '/')
+ path.erase(--len);
+
+ // Check each component for legality.
+ for (pos = 0; pos < len; ++pos) {
+ // A component may not end in a space.
+ if (path[pos] == ' ') {
+ if (pos+1 == len || path[pos+1] == '/' || path[pos+1] == '\0')
+ return false;
+ }
+
+ // A component may not end in a period.
+ if (path[pos] == '.') {
+ if (pos+1 == len || path[pos+1] == '/') {
+ // Unless it is the pseudo-directory "."...
+ if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+ return true;
+ // or "..".
+ if (pos > 0 && path[pos-1] == '.') {
+ if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+ return true;
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void Path::makeAbsolute() {
+ TCHAR FullPath[MAX_PATH + 1] = {0};
+ LPTSTR FilePart = NULL;
+
+ DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+ sizeof(FullPath)/sizeof(FullPath[0]),
+ FullPath, &FilePart);
+
+ if (0 == RetLength) {
+ // FIXME: Report the error GetLastError()
+ assert(0 && "Unable to make absolute path!");
+ } else if (RetLength > MAX_PATH) {
+ // FIXME: Report too small buffer (needed RetLength bytes).
+ assert(0 && "Unable to make absolute path!");
+ } else {
+ path = FullPath;
+ }
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ // FIXME: This does not handle correctly an absolute path starting from
+ // a drive letter or in UNC format.
+ switch (NameLen) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return NameStart[0] == '/';
+ default:
+ return
+ (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+ (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+ }
+}
+
+bool
+Path::isAbsolute() const {
+ // FIXME: This does not handle correctly an absolute path starting from
+ // a drive letter or in UNC format.
+ switch (path.length()) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return path[0] == '/';
+ default:
+ return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+ }
+}
+
+static Path *TempDirectory;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+ if (TempDirectory) {
+#if defined(_MSC_VER)
+ // Visual Studio gets confused and emits a diagnostic about calling exists,
+ // even though this is the implementation for PathV1. Temporarily
+ // disable the deprecated warning message
+ #pragma warning(push)
+ #pragma warning(disable:4996)
+#endif
+ assert(TempDirectory->exists() && "Who has removed TempDirectory?");
+#if defined(_MSC_VER)
+ #pragma warning(pop)
+#endif
+ return *TempDirectory;
+ }
+
+ char pathname[MAX_PATH];
+ if (!GetTempPath(MAX_PATH, pathname)) {
+ if (ErrMsg)
+ *ErrMsg = "Can't determine temporary directory";
+ return Path();
+ }
+
+ Path result;
+ result.set(pathname);
+
+ // Append a subdirectory based on our process id so multiple LLVMs don't
+ // step on each other's toes.
+#ifdef __MINGW32__
+ // Mingw's Win32 header files are broken.
+ sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+ sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+ result.appendComponent(pathname);
+
+ // If there's a directory left over from a previous LLVM execution that
+ // happened to have the same process id, get rid of it.
+ result.eraseFromDisk(true);
+
+ // And finally (re-)create the empty directory.
+ result.createDirectoryOnDisk(false);
+ TempDirectory = new Path(result);
+ return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+ // This is the only notion that that Windows has of a root directory. Nothing
+ // is here except for drives.
+ return Path("file:///");
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+ char buff[MAX_PATH];
+ // Generic form of C:\Windows\System32
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get system directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
+
+ // Reset buff.
+ buff[0] = 0;
+ // Generic form of C:\Windows
+ res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get windows directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ char buff[MAX_PATH];
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK)
+ assert(0 && "Failed to get user home directory");
+ return Path(buff);
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAX_PATH];
+ ::GetCurrentDirectoryA(MAX_PATH,pathname);
+ return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+ char pathname[MAX_PATH];
+ DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+ return ret != MAX_PATH ? Path(pathname) : Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+StringRef Path::getDirname() const {
+ return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef(path).substr(slash);
+ else
+ return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef("");
+ else
+ return StringRef(path).substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) &&
+ (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::isSymLink() const {
+ DWORD attributes = GetFileAttributes(path.c_str());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES)
+ // There's no sane way to report this :(.
+ assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+ // This isn't exactly what defines a NTFS symlink, but it is only true for
+ // paths that act like a symlink.
+ return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
+Path::canRead() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isRegularFile() const {
+ bool res;
+ if (fs::is_regular_file(path, res))
+ return false;
+ return res;
+}
+
+StringRef
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash, we have a root directory
+ if (pos == path.length()-1)
+ return path;
+
+ // Return everything after the last slash
+ return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+ ": Can't get status: ");
+ return 0;
+ }
+
+ status.fileSize = fi.nFileSizeHigh;
+ status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+ status.fileSize += fi.nFileSizeLow;
+
+ status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+ status.user = 9999; // Not applicable to Windows, so...
+ status.group = 9999; // Not applicable to Windows, so...
+
+ // FIXME: this is only unique if the file is accessed by the same file path.
+ // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+ // numbers, but the concept doesn't exist in Windows.
+ status.uniqueID = 0;
+ for (unsigned i = 0; i < path.length(); ++i)
+ status.uniqueID += path[i];
+
+ ULARGE_INTEGER ui;
+ ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+ ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+ status.modTime.fromWin32Time(ui.QuadPart);
+
+ status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ // All files are readable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ DWORD attr = GetFileAttributes(path.c_str());
+
+ // If it doesn't exist, we're done.
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ return false;
+
+ if (attr & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+ MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ // All files are executable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrMsg, path + ": can't get status of file");
+ return true;
+ }
+
+ if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ if (ErrMsg)
+ *ErrMsg = path + ": not a directory";
+ return true;
+ }
+
+ result.clear();
+ WIN32_FIND_DATA fd;
+ std::string searchpath = path;
+ if (path.size() == 0 || searchpath[path.size()-1] == '/')
+ searchpath += "*";
+ else
+ searchpath += "/*";
+
+ HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+ if (h == INVALID_HANDLE_VALUE) {
+ if (GetLastError() == ERROR_FILE_NOT_FOUND)
+ return true; // not really an error, now is it?
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+
+ do {
+ if (fd.cFileName[0] == '.')
+ continue;
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ result.insert(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+ return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+ if (a_path.empty())
+ return false;
+ std::string save(path);
+ path = a_path;
+ FlipBackSlashes(path);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+ if (name.empty())
+ return false;
+ std::string save(path);
+ if (!path.empty()) {
+ size_t last = path.size() - 1;
+ if (path[last] != '/')
+ path += '/';
+ }
+ path += name;
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+ return false;
+ std::string save(path);
+ path.erase(slashpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ std::string save(path);
+ path.erase(dotpos, path.size()-dotpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+ if (ErrMsg)
+ *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+ return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+ // Get a writeable copy of the path name
+ size_t len = path.length();
+ char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+ path.copy(pathname, len);
+ pathname[len] = 0;
+
+ // Make sure it ends with a slash.
+ if (len == 0 || pathname[len - 1] != '/') {
+ pathname[len] = '/';
+ pathname[++len] = 0;
+ }
+
+ // Determine starting point for initial / search.
+ char *next = pathname;
+ if (pathname[0] == '/' && pathname[1] == '/') {
+ // Skip host name.
+ next = strchr(pathname+2, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ // Skip share name.
+ next = strchr(next+1, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+ next++;
+ if (*next == 0)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ } else {
+ if (pathname[1] == ':')
+ next += 2; // skip drive letter
+ if (*next == '/')
+ next++; // skip root directory
+ }
+
+ // If we're supposed to create intermediate directories
+ if (create_parents) {
+ // Loop through the directory components until we're done
+ while (*next) {
+ next = strchr(next, '/');
+ *next = 0;
+ if (!CreateDirectory(pathname, NULL) &&
+ GetLastError() != ERROR_ALREADY_EXISTS)
+ return MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": Can't create directory: ");
+ *next++ = '/';
+ }
+ } else {
+ // Drop trailing slash.
+ pathname[len-1] = 0;
+ if (!CreateDirectory(pathname, NULL) &&
+ GetLastError() != ERROR_ALREADY_EXISTS) {
+ return MakeErrMsg(ErrMsg, std::string(pathname) +
+ ": Can't create directory: ");
+ }
+ }
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+ CloseHandle(h);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+ return true;
+
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+ // If it doesn't exist, we're done.
+ bool Exists;
+ if (fs::exists(path, Exists) || !Exists)
+ return false;
+
+ char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+ int lastchar = path.length() - 1 ;
+ path.copy(pathname, lastchar+1);
+
+ // Make path end with '/*'.
+ if (pathname[lastchar] != '/')
+ pathname[++lastchar] = '/';
+ pathname[lastchar+1] = '*';
+ pathname[lastchar+2] = 0;
+
+ if (remove_contents) {
+ WIN32_FIND_DATA fd;
+ HANDLE h = FindFirstFile(pathname, &fd);
+
+ // It's a bad idea to alter the contents of a directory while enumerating
+ // its contents. So build a list of its contents first, then destroy them.
+
+ if (h != INVALID_HANDLE_VALUE) {
+ std::vector<Path> list;
+
+ do {
+ if (strcmp(fd.cFileName, ".") == 0)
+ continue;
+ if (strcmp(fd.cFileName, "..") == 0)
+ continue;
+
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ list.push_back(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+
+ for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+ ++I) {
+ Path &aPath = *I;
+ aPath.eraseFromDisk(true);
+ }
+ } else {
+ if (GetLastError() != ERROR_FILE_NOT_FOUND)
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+ }
+
+ pathname[lastchar] = 0;
+ if (!RemoveDirectory(pathname))
+ return MakeErrMsg(ErrStr,
+ std::string(pathname) + ": Can't destroy directory: ");
+ return false;
+ } else {
+ // Read-only files cannot be deleted on Windows. Must remove the read-only
+ // attribute first.
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ }
+
+ if (!DeleteFile(path.c_str()))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ return false;
+ }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char* buf = reinterpret_cast<char*>(alloca(len));
+
+ HANDLE h = CreateFile(path.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return false;
+
+ DWORD nRead = 0;
+ BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+ CloseHandle(h);
+
+ if (!ret || nRead != len)
+ return false;
+
+ Magic = std::string(buf, len);
+ return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+ return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+ + "': ");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+ // FIXME: should work on directories also.
+ if (!si.isFile) {
+ return true;
+ }
+
+ HANDLE h = CreateFile(path.c_str(),
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return true;
+
+ BY_HANDLE_FILE_INFORMATION bhfi;
+ if (!GetFileInformationByHandle(h, &bhfi)) {
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+ }
+
+ ULARGE_INTEGER ui;
+ ui.QuadPart = si.modTime.toWin32Time();
+ FILETIME ft;
+ ft.dwLowDateTime = ui.LowPart;
+ ft.dwHighDateTime = ui.HighPart;
+ BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ if (!ret) {
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+ }
+
+ // Best we can do with Unix permission bits is to interpret the owner
+ // writable bit.
+ if (si.mode & 0200) {
+ if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ } else {
+ if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ }
+
+ return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+ // Can't use CopyFile macro defined in Windows.h because it would mess up the
+ // above line. We use the expansion it would have in a non-UNICODE build.
+ if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+ return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+ "' to '" + Dest.str() + "': ");
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
+ return false; // File doesn't exist already, just use it!
+
+ // Reserve space for -XXXXXX at the end.
+ char *FNBuffer = (char*) alloca(path.size()+8);
+ unsigned offset = path.size();
+ path.copy(FNBuffer, offset);
+
+ // Find a numeric suffix that isn't used by an existing file. Assume there
+ // won't be more than 1 million files with the same prefix. Probably a safe
+ // bet.
+ static int FCounter = -1;
+ if (FCounter < 0) {
+ // Give arbitrary initial seed.
+ // FIXME: We should use sys::fs::unique_file() in future.
+ LARGE_INTEGER cnt64;
+ DWORD x = GetCurrentProcessId();
+ x = (x << 16) | (x >> 16);
+ if (QueryPerformanceCounter(&cnt64)) // RDTSC
+ x ^= cnt64.HighPart ^ cnt64.LowPart;
+ FCounter = x % 1000000;
+ }
+ do {
+ sprintf(FNBuffer+offset, "-%06u", FCounter);
+ if (++FCounter > 999999)
+ FCounter = 0;
+ path = FNBuffer;
+ } while (!fs::exists(path, Exists) && Exists);
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ makeUnique(reuse_current, ErrMsg);
+
+ // Now go and create it
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+ CloseHandle(h);
+ return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+ return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, size_t FileSize) {
+ assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc
new file mode 100644
index 000000000000..23f3d14f91f0
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,1022 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#undef max
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+ typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+ /*__in*/ LPCWSTR lpSymlinkFileName,
+ /*__in*/ LPCWSTR lpTargetFileName,
+ /*__in*/ DWORD dwFlags);
+
+ PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+ ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+ "CreateSymbolicLinkW"));
+
+ error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+ int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), 0);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf16.reserve(len + 1);
+ utf16.set_size(len);
+
+ len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), utf16.size());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf16 null terminated.
+ utf16.push_back(0);
+ utf16.pop_back();
+
+ return error_code::success();
+ }
+
+ error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8) {
+ // Get length.
+ int len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.begin(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf8.reserve(len);
+ utf8.set_size(len);
+
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.data(), utf8.size(),
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf8 null terminated.
+ utf8.push_back(0);
+ utf8.pop_back();
+
+ return error_code::success();
+ }
+
+ error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+ retry_temp_dir:
+ DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ if (len > result.capacity()) {
+ result.reserve(len);
+ goto retry_temp_dir;
+ }
+
+ result.set_size(len);
+ return error_code::success();
+ }
+
+ bool is_separator(const wchar_t value) {
+ switch (value) {
+ case L'\\':
+ case L'/':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+ SmallVector<wchar_t, 128> cur_path;
+ cur_path.reserve(128);
+retry_cur_dir:
+ DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+ // A zero return value indicates a failure other than insufficient space.
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // If there's insufficient space, the len returned is larger than the len
+ // given.
+ if (len > cur_path.capacity()) {
+ cur_path.reserve(len);
+ goto retry_cur_dir;
+ }
+
+ cur_path.set_size(len);
+ // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+ // Find out how much space we need. Sadly, this function doesn't return the
+ // size needed unless you tell it the result size is 0, which means you
+ // _always_ have to call it twice.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return make_error_code(windows_error(::GetLastError()));
+
+ result.reserve(len);
+ result.set_size(len);
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), result.size(),
+ NULL, NULL);
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ return error_code::success();
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ // Copy the file.
+ BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+ copt != copy_option::overwrite_if_exists);
+
+ if (res == 0)
+ return windows_error(::GetLastError());
+
+ return error_code::success();
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::already_exists)
+ existed = true;
+ else
+ return ec;
+ } else
+ existed = false;
+
+ return error_code::success();
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+ return windows_error(::GetLastError());
+
+ return error_code::success();
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Only do it if the function is available at runtime.
+ if (!create_symbolic_link_api)
+ return make_error_code(errc::function_not_supported);
+
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+ return windows_error(::GetLastError());
+
+ return error_code::success();
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (st.type() == file_type::directory_file) {
+ if (!::RemoveDirectoryW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ } else {
+ if (!::DeleteFileW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ }
+
+ return error_code::success();
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ error_code ec = error_code::success();
+ for (int i = 0; i < 2000; i++) {
+ if (::MoveFileExW(wide_from.begin(), wide_to.begin(),
+ MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+ return error_code::success();
+ ec = windows_error(::GetLastError());
+ if (ec != windows_error::access_denied)
+ break;
+ // Retry MoveFile() at ACCESS_DENIED.
+ // System scanners (eg. indexer) might open the source file when
+ // It is written and closed.
+ ::Sleep(1);
+ }
+
+ return ec;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE);
+ if (fd == -1)
+ return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+ errno_t error = ::_chsize_s(fd, size);
+#else
+ errno_t error = ::_chsize(fd, size);
+#endif
+ ::close(fd);
+ return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES) {
+ // See if the file didn't actually exist.
+ error_code ec = make_error_code(windows_error(::GetLastError()));
+ if (ec != windows_error::file_not_found &&
+ ec != windows_error::path_not_found)
+ return ec;
+ result = false;
+ } else
+ result = true;
+ return error_code::success();
+}
+
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.FileIndexHigh == B.FileIndexHigh &&
+ A.FileIndexLow == B.FileIndexLow &&
+ A.FileSizeHigh == B.FileSizeHigh &&
+ A.FileSizeLow == B.FileSizeLow &&
+ A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
+ A.LastWriteTimeLow == B.LastWriteTimeLow &&
+ A.VolumeSerialNumber == B.VolumeSerialNumber;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
+ return error_code::success();
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ WIN32_FILE_ATTRIBUTE_DATA FileData;
+ if (!::GetFileAttributesExW(path_utf16.begin(),
+ ::GetFileExInfoStandard,
+ &FileData))
+ return windows_error(::GetLastError());
+
+ result =
+ (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+ + FileData.nFileSizeLow;
+
+ return error_code::success();
+}
+
+static bool isReservedName(StringRef path) {
+ // This list of reserved names comes from MSDN, at:
+ // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx
+ static const char *sReservedNames[] = { "nul", "con", "prn", "aux",
+ "com1", "com2", "com3", "com4", "com5", "com6",
+ "com7", "com8", "com9", "lpt1", "lpt2", "lpt3",
+ "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" };
+
+ // First, check to see if this is a device namespace, which always
+ // starts with \\.\, since device namespaces are not legal file paths.
+ if (path.startswith("\\\\.\\"))
+ return true;
+
+ // Then compare against the list of ancient reserved names
+ for (size_t i = 0; i < sizeof(sReservedNames) / sizeof(const char *); ++i) {
+ if (path.equals_lower(sReservedNames[i]))
+ return true;
+ }
+
+ // The path isn't what we consider reserved.
+ return false;
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ StringRef path8 = path.toStringRef(path_storage);
+ if (isReservedName(path8)) {
+ result = file_status(file_type::character_file);
+ return error_code::success();
+ }
+
+ if (error_code ec = UTF8ToUTF16(path8, path_utf16))
+ return ec;
+
+ DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ goto handle_status_error;
+
+ // Handle reparse points.
+ if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+ ScopedFileHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (!h)
+ goto handle_status_error;
+ }
+
+ if (attr & FILE_ATTRIBUTE_DIRECTORY)
+ result = file_status(file_type::directory_file);
+ else {
+ result = file_status(file_type::regular_file);
+ ScopedFileHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (!h)
+ goto handle_status_error;
+ BY_HANDLE_FILE_INFORMATION Info;
+ if (!::GetFileInformationByHandle(h, &Info))
+ goto handle_status_error;
+ result.FileIndexHigh = Info.nFileIndexHigh;
+ result.FileIndexLow = Info.nFileIndexLow;
+ result.FileSizeHigh = Info.nFileSizeHigh;
+ result.FileSizeLow = Info.nFileSizeLow;
+ result.LastWriteTimeHigh = Info.ftLastWriteTime.dwHighDateTime;
+ result.LastWriteTimeLow = Info.ftLastWriteTime.dwLowDateTime;
+ result.VolumeSerialNumber = Info.dwVolumeSerialNumber;
+ }
+
+ return error_code::success();
+
+handle_status_error:
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_not_found ||
+ ec == windows_error::path_not_found)
+ result = file_status(file_type::file_not_found);
+ else if (ec == windows_error::sharing_violation)
+ result = file_status(file_type::type_unknown);
+ else {
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ return error_code::success();
+}
+
+
+// Modifies permissions on a file.
+error_code permissions(const Twine &path, perms prms) {
+#if 0 // verify code below before enabling:
+ // If the permissions bits are not trying to modify
+ // "write" permissions, there is nothing to do.
+ if (!(prms & (owner_write|group_write|others_write)))
+ return error_code::success();
+
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+ if (prms & add_perms) {
+ attributes &= ~FILE_ATTRIBUTE_READONLY;
+ }
+ else if (prms & remove_perms) {
+ attributes |= FILE_ATTRIBUTE_READONLY;
+ }
+ else {
+ assert(0 && "neither add_perms or remove_perms is set");
+ }
+
+ if ( ! ::SetFileAttributesW(path_utf16.begin(), attributes))
+ return windows_error(::GetLastError());
+#endif
+ return error_code::success();
+}
+
+
+// FIXME: mode should be used here and default to user r/w only,
+// it currently comes in as a UNIX mode.
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path,
+ bool makeAbsolute, unsigned mode) {
+ // Use result_path as temp storage.
+ result_path.set_size(0);
+ StringRef m = model.toStringRef(result_path);
+
+ SmallVector<wchar_t, 128> model_utf16;
+ if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+ if (makeAbsolute) {
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(m);
+
+ if (!absolute) {
+ SmallVector<wchar_t, 64> temp_dir;
+ if (error_code ec = TempDir(temp_dir)) return ec;
+ // Handle c: by removing it.
+ if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+ model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+ }
+ model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+ }
+ }
+
+ // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+ // needed if the randomly chosen path already exists.
+ SmallVector<wchar_t, 128> random_path_utf16;
+
+ // Get a Crypto Provider for CryptGenRandom.
+ HCRYPTPROV HCPC;
+ if (!::CryptAcquireContextW(&HCPC,
+ NULL,
+ NULL,
+ PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT))
+ return windows_error(::GetLastError());
+ ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+ random_path_utf16.set_size(0);
+ for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+ e = model_utf16.end();
+ i != e; ++i) {
+ if (*i == L'%') {
+ BYTE val = 0;
+ if (!::CryptGenRandom(CryptoProvider, 1, &val))
+ return windows_error(::GetLastError());
+ random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+ }
+ else
+ random_path_utf16.push_back(*i);
+ }
+ // Make random_path_utf16 null terminated.
+ random_path_utf16.push_back(0);
+ random_path_utf16.pop_back();
+
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
+ // Try to create + open the path.
+retry_create_file:
+ HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+ GENERIC_READ | GENERIC_WRITE,
+ FILE_SHARE_READ,
+ NULL,
+ // Return ERROR_FILE_EXISTS if the file
+ // already exists.
+ CREATE_NEW,
+ FILE_ATTRIBUTE_TEMPORARY,
+ NULL);
+ if (TempFileHandle == INVALID_HANDLE_VALUE) {
+ // If the file existed, try again, otherwise, error.
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_exists)
+ goto retry_random_path;
+ // Check for non-existing parent directories.
+ if (ec == windows_error::path_not_found && !TriedToCreateParent) {
+ TriedToCreateParent = true;
+
+ // Create the directories using result_path as temp storage.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path))
+ return ec;
+ StringRef p(result_path.begin(), result_path.size());
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // If c: doesn't exist, bail.
+ if (i->endswith(":"))
+ return ec;
+
+ SmallVector<wchar_t, 64> dir_to_create_utf16;
+ if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+ return ec;
+
+ // Create the directory.
+ if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+ return windows_error(::GetLastError());
+ }
+ }
+ goto retry_create_file;
+ }
+ return ec;
+ }
+
+ // Set result_path to the utf-8 representation of the path.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path)) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ return ec;
+ }
+
+ // Convert the Windows API file handle into a C-runtime handle.
+ int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+ if (fd == -1) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ // MSDN doesn't say anything about _open_osfhandle setting errno or
+ // GetLastError(), so just return invalid_handle.
+ return windows_error::invalid_handle;
+ }
+
+ result_fd = fd;
+ return error_code::success();
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+ result.set_size(0);
+
+ // Convert path to UTF-16.
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ // Open file.
+ HANDLE file = ::CreateFileW(c_str(path_utf16),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_READONLY,
+ NULL);
+ if (file == INVALID_HANDLE_VALUE)
+ return windows_error(::GetLastError());
+
+ // Allocate buffer.
+ result.reserve(len);
+
+ // Get magic!
+ DWORD bytes_read = 0;
+ BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+ error_code ec = windows_error(::GetLastError());
+ ::CloseHandle(file);
+ if (!read_success || (bytes_read != len)) {
+ // Set result size to the number of bytes read if it's valid.
+ if (bytes_read <= len)
+ result.set_size(bytes_read);
+ // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+ return ec;
+ }
+
+ result.set_size(len);
+ return error_code::success();
+}
+
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+ FileDescriptor = FD;
+ // Make sure that the requested size fits within SIZE_T.
+ if (Size > std::numeric_limits<SIZE_T>::max()) {
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
+ ::CloseHandle(FileHandle);
+ return make_error_code(errc::invalid_argument);
+ }
+
+ DWORD flprotect;
+ switch (Mode) {
+ case readonly: flprotect = PAGE_READONLY; break;
+ case readwrite: flprotect = PAGE_READWRITE; break;
+ case priv: flprotect = PAGE_WRITECOPY; break;
+ default: llvm_unreachable("invalid mapping mode");
+ }
+
+ FileMappingHandle = ::CreateFileMapping(FileHandle,
+ 0,
+ flprotect,
+ Size >> 32,
+ Size & 0xffffffff,
+ 0);
+ if (FileMappingHandle == NULL) {
+ error_code ec = windows_error(GetLastError());
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+
+ DWORD dwDesiredAccess;
+ switch (Mode) {
+ case readonly: dwDesiredAccess = FILE_MAP_READ; break;
+ case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break;
+ case priv: dwDesiredAccess = FILE_MAP_COPY; break;
+ default: llvm_unreachable("invalid mapping mode");
+ }
+ Mapping = ::MapViewOfFile(FileMappingHandle,
+ dwDesiredAccess,
+ Offset >> 32,
+ Offset & 0xffffffff,
+ Size);
+ if (Mapping == NULL) {
+ error_code ec = windows_error(GetLastError());
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+
+ if (Size == 0) {
+ MEMORY_BASIC_INFORMATION mbi;
+ SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
+ if (Result == 0) {
+ error_code ec = windows_error(GetLastError());
+ ::UnmapViewOfFile(Mapping);
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+ Size = mbi.RegionSize;
+ }
+
+ // Close all the handles except for the view. It will keep the other handles
+ // alive.
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor); // Also closes FileHandle.
+ } else
+ ::CloseHandle(FileHandle);
+ return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping()
+ , FileDescriptor()
+ , FileHandle(INVALID_HANDLE_VALUE)
+ , FileMappingHandle() {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ // Convert path to UTF-16.
+ if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)))
+ return;
+
+ // Get file handle for creating a file mapping.
+ FileHandle = ::CreateFileW(c_str(path_utf16),
+ Mode == readonly ? GENERIC_READ
+ : GENERIC_READ | GENERIC_WRITE,
+ Mode == readonly ? FILE_SHARE_READ
+ : 0,
+ 0,
+ Mode == readonly ? OPEN_EXISTING
+ : OPEN_ALWAYS,
+ Mode == readonly ? FILE_ATTRIBUTE_READONLY
+ : FILE_ATTRIBUTE_NORMAL,
+ 0);
+ if (FileHandle == INVALID_HANDLE_VALUE) {
+ ec = windows_error(::GetLastError());
+ return;
+ }
+
+ FileDescriptor = 0;
+ ec = init(FileDescriptor, true, offset);
+ if (ec) {
+ Mapping = FileMappingHandle = 0;
+ FileHandle = INVALID_HANDLE_VALUE;
+ FileDescriptor = 0;
+ }
+}
+
+mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping()
+ , FileDescriptor(fd)
+ , FileHandle(INVALID_HANDLE_VALUE)
+ , FileMappingHandle() {
+ FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+ if (FileHandle == INVALID_HANDLE_VALUE) {
+ if (closefd)
+ _close(FileDescriptor);
+ FileDescriptor = 0;
+ ec = make_error_code(errc::bad_file_descriptor);
+ return;
+ }
+
+ ec = init(FileDescriptor, closefd, offset);
+ if (ec) {
+ Mapping = FileMappingHandle = 0;
+ FileHandle = INVALID_HANDLE_VALUE;
+ FileDescriptor = 0;
+ }
+}
+
+mapped_file_region::~mapped_file_region() {
+ if (Mapping)
+ ::UnmapViewOfFile(Mapping);
+}
+
+#if LLVM_HAS_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+ : Mode(other.Mode)
+ , Size(other.Size)
+ , Mapping(other.Mapping)
+ , FileDescriptor(other.FileDescriptor)
+ , FileHandle(other.FileHandle)
+ , FileMappingHandle(other.FileMappingHandle) {
+ other.Mapping = other.FileMappingHandle = 0;
+ other.FileHandle = INVALID_HANDLE_VALUE;
+ other.FileDescriptor = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Size;
+}
+
+char *mapped_file_region::data() const {
+ assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+ SYSTEM_INFO SysInfo;
+ ::GetSystemInfo(&SysInfo);
+ return SysInfo.dwAllocationGranularity;
+}
+
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path,
+ path_utf16))
+ return ec;
+
+ // Convert path to the format that Windows is happy with.
+ if (path_utf16.size() > 0 &&
+ !is_separator(path_utf16[path.size() - 1]) &&
+ path_utf16[path.size() - 1] != L':') {
+ path_utf16.push_back(L'\\');
+ path_utf16.push_back(L'*');
+ } else {
+ path_utf16.push_back(L'*');
+ }
+
+ // Get the first directory entry.
+ WIN32_FIND_DATAW FirstFind;
+ ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+ if (!FindHandle)
+ return windows_error(::GetLastError());
+
+ size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+ while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+ FirstFind.cFileName[1] == L'.'))
+ if (!::FindNextFileW(FindHandle, &FirstFind)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return detail::directory_iterator_destruct(it);
+ return ec;
+ } else
+ FilenameLen = ::wcslen(FirstFind.cFileName);
+
+ // Construct the current directory entry.
+ SmallString<128> directory_entry_name_utf8;
+ if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+ ::wcslen(FirstFind.cFileName),
+ directory_entry_name_utf8))
+ return ec;
+
+ it.IterationHandle = intptr_t(FindHandle.take());
+ SmallString<128> directory_entry_path(path);
+ path::append(directory_entry_path, directory_entry_name_utf8.str());
+ it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+ return error_code::success();
+}
+
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
+ if (it.IterationHandle != 0)
+ // Closes the handle if it's valid.
+ ScopedFindHandle close(HANDLE(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return error_code::success();
+}
+
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+ WIN32_FIND_DATAW FindData;
+ if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return detail::directory_iterator_destruct(it);
+ return ec;
+ }
+
+ size_t FilenameLen = ::wcslen(FindData.cFileName);
+ if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+ FindData.cFileName[1] == L'.'))
+ return directory_iterator_increment(it);
+
+ SmallString<128> directory_entry_path_utf8;
+ if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+ ::wcslen(FindData.cFileName),
+ directory_entry_path_utf8))
+ return ec;
+
+ it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+ return error_code::success();
+}
+
+error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
+ bool map_writable, void *&result) {
+ assert(0 && "NOT IMPLEMENTED");
+ return windows_error::invalid_function;
+}
+
+error_code unmap_file_pages(void *base, size_t size) {
+ assert(0 && "NOT IMPLEMENTED");
+ return windows_error::invalid_function;
+}
+
+
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
new file mode 100644
index 000000000000..ad9412852f10
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -0,0 +1,278 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <direct.h>
+#include <io.h>
+#include <malloc.h>
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+ #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+# define _HEAPOK (-2)
+#endif
+
+using namespace llvm;
+using namespace sys;
+
+
+process::id_type self_process::get_id() {
+ return GetCurrentProcess();
+}
+
+static TimeValue getTimeValueFromFILETIME(FILETIME Time) {
+ ULARGE_INTEGER TimeInteger;
+ TimeInteger.LowPart = Time.dwLowDateTime;
+ TimeInteger.HighPart = Time.dwHighDateTime;
+
+ // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+ return TimeValue(
+ static_cast<TimeValue::SecondsType>(TimeInteger.QuadPart / 10000000),
+ static_cast<TimeValue::NanoSecondsType>(
+ (TimeInteger.QuadPart % 10000000) * 100));
+}
+
+TimeValue self_process::get_user_time() const {
+ FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+ if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+ &UserTime) == 0)
+ return TimeValue();
+
+ return getTimeValueFromFILETIME(UserTime);
+}
+
+TimeValue self_process::get_system_time() const {
+ FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+ if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+ &UserTime) == 0)
+ return TimeValue();
+
+ return getTimeValueFromFILETIME(KernelTime);
+}
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once to initialize the self_process member below.
+static unsigned getPageSize() {
+ // NOTE: A 32-bit application running under WOW64 is supposed to use
+ // GetNativeSystemInfo. However, this interface is not present prior
+ // to Windows XP so to use it requires dynamic linking. It is not clear
+ // how this affects the reported page size, if at all. One could argue
+ // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return static_cast<unsigned>(info.dwPageSize);
+}
+
+// This constructor guaranteed to be run exactly once on a single thread, and
+// sets up various process invariants that can be queried cheaply from then on.
+self_process::self_process() : PageSize(getPageSize()) {
+}
+
+
+size_t
+Process::GetMallocUsage()
+{
+ _HEAPINFO hinfo;
+ hinfo._pentry = NULL;
+
+ size_t size = 0;
+
+ while (_heapwalk(&hinfo) == _HEAPOK)
+ size += hinfo._size;
+
+ return size;
+}
+
+void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
+ TimeValue &sys_time) {
+ elapsed = TimeValue::now();
+
+ FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+ if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+ &UserTime) == 0)
+ return;
+
+ user_time = getTimeValueFromFILETIME(UserTime);
+ sys_time = getTimeValueFromFILETIME(KernelTime);
+}
+
+int Process::GetCurrentUserId()
+{
+ return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+ return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+ // Windows doesn't do core files, but it does do modal pop-up message
+ // boxes. As this method is used by bugpoint, preventing these pop-ups
+ // is the moral equivalent of suppressing core files.
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+ return FileDescriptorIsDisplayed(0);
+}
+
+bool Process::StandardOutIsDisplayed() {
+ return FileDescriptorIsDisplayed(1);
+}
+
+bool Process::StandardErrIsDisplayed() {
+ return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+ DWORD Mode; // Unused
+ return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
+}
+
+unsigned Process::StandardOutColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+// The terminal always has colors.
+bool Process::FileDescriptorHasColors(int fd) {
+ return FileDescriptorIsDisplayed(fd);
+}
+
+bool Process::StandardOutHasColors() {
+ return FileDescriptorHasColors(1);
+}
+
+bool Process::StandardErrHasColors() {
+ return FileDescriptorHasColors(2);
+}
+
+namespace {
+class DefaultColors
+{
+ private:
+ WORD defaultColor;
+ public:
+ DefaultColors()
+ :defaultColor(GetCurrentColor()) {}
+ static unsigned GetCurrentColor() {
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+ return csbi.wAttributes;
+ return 0;
+ }
+ WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+ return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+ WORD colors = DefaultColors::GetCurrentColor();
+ if (bg)
+ colors |= BACKGROUND_INTENSITY;
+ else
+ colors |= FOREGROUND_INTENSITY;
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+ return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+ WORD colors;
+ if (bg) {
+ colors = ((code&1) ? BACKGROUND_RED : 0) |
+ ((code&2) ? BACKGROUND_GREEN : 0 ) |
+ ((code&4) ? BACKGROUND_BLUE : 0);
+ if (bold)
+ colors |= BACKGROUND_INTENSITY;
+ } else {
+ colors = ((code&1) ? FOREGROUND_RED : 0) |
+ ((code&2) ? FOREGROUND_GREEN : 0 ) |
+ ((code&4) ? FOREGROUND_BLUE : 0);
+ if (bold)
+ colors |= FOREGROUND_INTENSITY;
+ }
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+ return 0;
+}
+
+static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) {
+ CONSOLE_SCREEN_BUFFER_INFO info;
+ GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &info);
+ return info.wAttributes;
+}
+
+const char *Process::OutputReverse() {
+ const WORD attributes
+ = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE));
+
+ const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN |
+ FOREGROUND_RED | FOREGROUND_INTENSITY;
+ const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN |
+ BACKGROUND_RED | BACKGROUND_INTENSITY;
+ const WORD color_mask = foreground_mask | background_mask;
+
+ WORD new_attributes =
+ ((attributes & FOREGROUND_BLUE )?BACKGROUND_BLUE :0) |
+ ((attributes & FOREGROUND_GREEN )?BACKGROUND_GREEN :0) |
+ ((attributes & FOREGROUND_RED )?BACKGROUND_RED :0) |
+ ((attributes & FOREGROUND_INTENSITY)?BACKGROUND_INTENSITY:0) |
+ ((attributes & BACKGROUND_BLUE )?FOREGROUND_BLUE :0) |
+ ((attributes & BACKGROUND_GREEN )?FOREGROUND_GREEN :0) |
+ ((attributes & BACKGROUND_RED )?FOREGROUND_RED :0) |
+ ((attributes & BACKGROUND_INTENSITY)?FOREGROUND_INTENSITY:0) |
+ 0;
+ new_attributes = (attributes & ~color_mask) | (new_attributes & color_mask);
+
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), new_attributes);
+ return 0;
+}
+
+const char *Process::ResetColor() {
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
new file mode 100644
index 000000000000..691d6d455501
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -0,0 +1,399 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <fcntl.h>
+#include <io.h>
+#include <malloc.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct Win32ProcessInfo {
+ HANDLE hProcess;
+ DWORD dwProcessId;
+ };
+}
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+ if (Data_) {
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ CloseHandle(wpi->hProcess);
+ delete wpi;
+ Data_ = 0;
+ }
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ // Return paths with slashes verbatim.
+ if (progName.find('\\') != std::string::npos ||
+ progName.find('/') != std::string::npos)
+ return temp;
+
+ // At this point, the file name is valid and does not contain slashes.
+ // Let Windows search for it.
+ char buffer[MAX_PATH];
+ char *dummy = NULL;
+ DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+ buffer, &dummy);
+
+ // See if it wasn't found.
+ if (len == 0)
+ return Path();
+
+ // See if we got the entire path.
+ if (len < MAX_PATH)
+ return Path(buffer);
+
+ // Buffer was too small; grow and retry.
+ while (true) {
+ char *b = reinterpret_cast<char *>(_alloca(len+1));
+ DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+ // It is unlikely the search failed, but it's always possible some file
+ // was added or removed since the last search, so be paranoid...
+ if (len2 == 0)
+ return Path();
+ else if (len2 <= len)
+ return Path(b);
+
+ len = len2;
+ }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+ HANDLE h;
+ if (path == 0) {
+ DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+ GetCurrentProcess(), &h,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ return h;
+ }
+
+ const char *fname;
+ if (path->isEmpty())
+ fname = "NUL";
+ else
+ fname = path->c_str();
+
+ SECURITY_ATTRIBUTES sa;
+ sa.nLength = sizeof(sa);
+ sa.lpSecurityDescriptor = 0;
+ sa.bInheritHandle = TRUE;
+
+ h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+ &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+ (fd ? "input: " : "output: "));
+ }
+
+ return h;
+}
+
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+ return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
+}
+
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+ unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+ while (*Str != '\0') {
+ if (*Str == '\"')
+ ++len;
+
+ ++len;
+ ++Str;
+ }
+
+ return len;
+}
+
+
+bool
+Program::Execute(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ if (Data_) {
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ CloseHandle(wpi->hProcess);
+ delete wpi;
+ Data_ = 0;
+ }
+
+ if (!path.canExecute()) {
+ if (ErrMsg)
+ *ErrMsg = "program not executable";
+ return false;
+ }
+
+ // Windows wants a command line, not an array of args, to pass to the new
+ // process. We have to concatenate them all, while quoting the args that
+ // have embedded spaces (or are empty).
+
+ // First, determine the length of the command line.
+ unsigned len = 0;
+ for (unsigned i = 0; args[i]; i++) {
+ len += ArgLenWithQuotes(args[i]) + 1;
+ }
+
+ // Now build the command line.
+ char *command = reinterpret_cast<char *>(_alloca(len+1));
+ char *p = command;
+
+ for (unsigned i = 0; args[i]; i++) {
+ const char *arg = args[i];
+
+ bool needsQuoting = ArgNeedsQuotes(arg);
+ if (needsQuoting)
+ *p++ = '"';
+
+ while (*arg != '\0') {
+ if (*arg == '\"')
+ *p++ = '\\';
+
+ *p++ = *arg++;
+ }
+
+ if (needsQuoting)
+ *p++ = '"';
+ *p++ = ' ';
+ }
+
+ *p = 0;
+
+ // The pointer to the environment block for the new process.
+ char *envblock = 0;
+
+ if (envp) {
+ // An environment block consists of a null-terminated block of
+ // null-terminated strings. Convert the array of environment variables to
+ // an environment block by concatenating them.
+
+ // First, determine the length of the environment block.
+ len = 0;
+ for (unsigned i = 0; envp[i]; i++)
+ len += strlen(envp[i]) + 1;
+
+ // Now build the environment block.
+ envblock = reinterpret_cast<char *>(_alloca(len+1));
+ p = envblock;
+
+ for (unsigned i = 0; envp[i]; i++) {
+ const char *ev = envp[i];
+ size_t len = strlen(ev) + 1;
+ memcpy(p, ev, len);
+ p += len;
+ }
+
+ *p = 0;
+ }
+
+ // Create a child process.
+ STARTUPINFO si;
+ memset(&si, 0, sizeof(si));
+ si.cb = sizeof(si);
+ si.hStdInput = INVALID_HANDLE_VALUE;
+ si.hStdOutput = INVALID_HANDLE_VALUE;
+ si.hStdError = INVALID_HANDLE_VALUE;
+
+ if (redirects) {
+ si.dwFlags = STARTF_USESTDHANDLES;
+
+ si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+ if (si.hStdInput == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, "can't redirect stdin");
+ return false;
+ }
+ si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+ if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ MakeErrMsg(ErrMsg, "can't redirect stdout");
+ return false;
+ }
+ if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the handle already open for stdout.
+ DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+ GetCurrentProcess(), &si.hStdError,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ } else {
+ // Just redirect stderr
+ si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+ if (si.hStdError == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ MakeErrMsg(ErrMsg, "can't redirect stderr");
+ return false;
+ }
+ }
+ }
+
+ PROCESS_INFORMATION pi;
+ memset(&pi, 0, sizeof(pi));
+
+ fflush(stdout);
+ fflush(stderr);
+ BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+ envblock, NULL, &si, &pi);
+ DWORD err = GetLastError();
+
+ // Regardless of whether the process got created or not, we are done with
+ // the handles we created for it to inherit.
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ CloseHandle(si.hStdError);
+
+ // Now return an error if the process didn't get created.
+ if (!rc) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+ path.str() + "'");
+ return false;
+ }
+ Win32ProcessInfo* wpi = new Win32ProcessInfo;
+ wpi->hProcess = pi.hProcess;
+ wpi->dwProcessId = pi.dwProcessId;
+ Data_ = wpi;
+
+ // Make sure these get closed no matter what.
+ ScopedCommonHandle hThread(pi.hThread);
+
+ // Assign the process to a job if a memory limit is defined.
+ ScopedJobHandle hJob;
+ if (memoryLimit != 0) {
+ hJob = CreateJobObject(0, 0);
+ bool success = false;
+ if (hJob) {
+ JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+ memset(&jeli, 0, sizeof(jeli));
+ jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+ jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+ if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+ &jeli, sizeof(jeli))) {
+ if (AssignProcessToJobObject(hJob, pi.hProcess))
+ success = true;
+ }
+ }
+ if (!success) {
+ SetLastError(GetLastError());
+ MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+ TerminateProcess(pi.hProcess, 1);
+ WaitForSingleObject(pi.hProcess, INFINITE);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int
+Program::Wait(const Path &path,
+ unsigned secondsToWait,
+ std::string* ErrMsg) {
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return -1;
+ }
+
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ HANDLE hProcess = wpi->hProcess;
+
+ // Wait for the process to terminate.
+ DWORD millisecondsToWait = INFINITE;
+ if (secondsToWait > 0)
+ millisecondsToWait = secondsToWait * 1000;
+
+ if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+ if (!TerminateProcess(hProcess, 1)) {
+ MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+ // -2 indicates a crash or timeout as opposed to failure to execute.
+ return -2;
+ }
+ WaitForSingleObject(hProcess, INFINITE);
+ }
+
+ // Get its exit status.
+ DWORD status;
+ BOOL rc = GetExitCodeProcess(hProcess, &status);
+ DWORD err = GetLastError();
+
+ if (!rc) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, "Failed getting status for program.");
+ // -2 indicates a crash or timeout as opposed to failure to execute.
+ return -2;
+ }
+
+ if (!status)
+ return 0;
+
+ // Pass 10(Warning) and 11(Error) to the callee as negative value.
+ if ((status & 0xBFFF0000U) == 0x80000000U)
+ return (int)status;
+
+ if (status & 0xFF)
+ return status & 0x7FFFFFFF;
+
+ return 1;
+}
+
+error_code Program::ChangeStdinToBinary(){
+ int result = _setmode( _fileno(stdin), _O_BINARY );
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
+}
+
+error_code Program::ChangeStdoutToBinary(){
+ int result = _setmode( _fileno(stdout), _O_BINARY );
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
+}
+
+error_code Program::ChangeStderrToBinary(){
+ int result = _setmode( _fileno(stderr), _O_BINARY );
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc
new file mode 100644
index 000000000000..95939237ba61
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc
@@ -0,0 +1,134 @@
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+namespace llvm {
+using namespace sys;
+
+// Windows has slim read-writer lock support on Vista and higher, so we
+// will attempt to load the APIs. If they exist, we will use them, and
+// if not, we will fall back on critical sections. When we drop support
+// for XP, we can stop lazy-loading these APIs and just use them directly.
+#if defined(__MINGW32__)
+ // Taken from WinNT.h
+ typedef struct _RTL_SRWLOCK {
+ PVOID Ptr;
+ } RTL_SRWLOCK, *PRTL_SRWLOCK;
+
+ // Taken from WinBase.h
+ typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK;
+#endif
+
+static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL;
+
+static bool sHasSRW = false;
+
+static bool loadSRW() {
+ static bool sChecked = false;
+ if (!sChecked) {
+ sChecked = true;
+
+ HMODULE hLib = ::LoadLibrary(TEXT("Kernel32"));
+ if (hLib) {
+ fpInitializeSRWLock =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "InitializeSRWLock");
+ fpAcquireSRWLockExclusive =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "AcquireSRWLockExclusive");
+ fpAcquireSRWLockShared =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "AcquireSRWLockShared");
+ fpReleaseSRWLockExclusive =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "ReleaseSRWLockExclusive");
+ fpReleaseSRWLockShared =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "ReleaseSRWLockShared");
+ ::FreeLibrary(hLib);
+
+ if (fpInitializeSRWLock != NULL) {
+ sHasSRW = true;
+ }
+ }
+ }
+ return sHasSRW;
+}
+
+RWMutexImpl::RWMutexImpl() {
+ if (loadSRW()) {
+ data_ = calloc(1, sizeof(SRWLOCK));
+ fpInitializeSRWLock(static_cast<PSRWLOCK>(data_));
+ } else {
+ data_ = calloc(1, sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
+}
+
+RWMutexImpl::~RWMutexImpl() {
+ if (sHasSRW) {
+ // Nothing to do in the case of slim reader/writers
+ } else {
+ DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ free(data_);
+ }
+}
+
+bool RWMutexImpl::reader_acquire() {
+ if (sHasSRW) {
+ fpAcquireSRWLockShared(static_cast<PSRWLOCK>(data_));
+ } else {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ if (sHasSRW) {
+ fpReleaseSRWLockShared(static_cast<PSRWLOCK>(data_));
+ } else {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ if (sHasSRW) {
+ fpAcquireSRWLockExclusive(static_cast<PSRWLOCK>(data_));
+ } else {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ if (sHasSRW) {
+ fpReleaseSRWLockExclusive(static_cast<PSRWLOCK>(data_));
+ } else {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
+ return true;
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
new file mode 100644
index 000000000000..3dd6660b031d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -0,0 +1,492 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <algorithm>
+#include <stdio.h>
+#include <vector>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef _MSC_VER
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#elif __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+ #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+ // The version of g++ that comes with MinGW does *not* properly understand
+ // the ll format specifier for printf. However, MinGW passes the format
+ // specifiers on to the MSVCRT entirely, and the CRT understands the ll
+ // specifier. So these warnings are spurious in this case. Since we compile
+ // with -Wall, this will generate these warnings which should be ignored. So
+ // we will turn off the warnings for this just file. However, MinGW also does
+ // not support push and pop for diagnostics, so we have to manually turn it
+ // back on at the end of the file.
+ #pragma GCC diagnostic ignored "-Wformat"
+ #pragma GCC diagnostic ignored "-Wformat-extra-args"
+
+ #if !defined(__MINGW64_VERSION_MAJOR)
+ // MinGW.org does not have updated support for the 64-bit versions of the
+ // DebugHlp APIs. So we will have to load them manually. The structures and
+ // method signatures were pulled from DbgHelp.h in the Windows Platform SDK,
+ // and adjusted for brevity.
+ typedef struct _IMAGEHLP_LINE64 {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD LineNumber;
+ PCHAR FileName;
+ DWORD64 Address;
+ } IMAGEHLP_LINE64, *PIMAGEHLP_LINE64;
+
+ typedef struct _IMAGEHLP_SYMBOL64 {
+ DWORD SizeOfStruct;
+ DWORD64 Address;
+ DWORD Size;
+ DWORD Flags;
+ DWORD MaxNameLength;
+ CHAR Name[1];
+ } IMAGEHLP_SYMBOL64, *PIMAGEHLP_SYMBOL64;
+
+ typedef struct _tagADDRESS64 {
+ DWORD64 Offset;
+ WORD Segment;
+ ADDRESS_MODE Mode;
+ } ADDRESS64, *LPADDRESS64;
+
+ typedef struct _KDHELP64 {
+ DWORD64 Thread;
+ DWORD ThCallbackStack;
+ DWORD ThCallbackBStore;
+ DWORD NextCallback;
+ DWORD FramePointer;
+ DWORD64 KiCallUserMode;
+ DWORD64 KeUserCallbackDispatcher;
+ DWORD64 SystemRangeStart;
+ DWORD64 KiUserExceptionDispatcher;
+ DWORD64 StackBase;
+ DWORD64 StackLimit;
+ DWORD64 Reserved[5];
+ } KDHELP64, *PKDHELP64;
+
+ typedef struct _tagSTACKFRAME64 {
+ ADDRESS64 AddrPC;
+ ADDRESS64 AddrReturn;
+ ADDRESS64 AddrFrame;
+ ADDRESS64 AddrStack;
+ ADDRESS64 AddrBStore;
+ PVOID FuncTableEntry;
+ DWORD64 Params[4];
+ BOOL Far;
+ BOOL Virtual;
+ DWORD64 Reserved[3];
+ KDHELP64 KdHelp;
+ } STACKFRAME64, *LPSTACKFRAME64;
+
+typedef BOOL (__stdcall *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,
+ DWORD64 qwBaseAddress, PVOID lpBuffer, DWORD nSize,
+ LPDWORD lpNumberOfBytesRead);
+
+typedef PVOID (__stdcall *PFUNCTION_TABLE_ACCESS_ROUTINE64)( HANDLE ahProcess,
+ DWORD64 AddrBase);
+
+typedef DWORD64 (__stdcall *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess,
+ DWORD64 Address);
+
+typedef DWORD64 (__stdcall *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess,
+ HANDLE hThread, LPADDRESS64 lpaddr);
+
+typedef BOOL (WINAPI *fpStackWalk64)(DWORD, HANDLE, HANDLE, LPSTACKFRAME64,
+ PVOID, PREAD_PROCESS_MEMORY_ROUTINE64,
+ PFUNCTION_TABLE_ACCESS_ROUTINE64,
+ PGET_MODULE_BASE_ROUTINE64,
+ PTRANSLATE_ADDRESS_ROUTINE64);
+static fpStackWalk64 StackWalk64;
+
+typedef DWORD64 (WINAPI *fpSymGetModuleBase64)(HANDLE, DWORD64);
+static fpSymGetModuleBase64 SymGetModuleBase64;
+
+typedef BOOL (WINAPI *fpSymGetSymFromAddr64)(HANDLE, DWORD64,
+ PDWORD64, PIMAGEHLP_SYMBOL64);
+static fpSymGetSymFromAddr64 SymGetSymFromAddr64;
+
+typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64,
+ PDWORD, PIMAGEHLP_LINE64);
+static fpSymGetLineFromAddr64 SymGetLineFromAddr64;
+
+typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
+static fpSymFunctionTableAccess64 SymFunctionTableAccess64;
+
+static bool load64BitDebugHelp(void) {
+ HMODULE hLib = ::LoadLibrary("Dbghelp.dll");
+ if (hLib) {
+ StackWalk64 = (fpStackWalk64)
+ ::GetProcAddress(hLib, "StackWalk64");
+ SymGetModuleBase64 = (fpSymGetModuleBase64)
+ ::GetProcAddress(hLib, "SymGetModuleBase64");
+ SymGetSymFromAddr64 = (fpSymGetSymFromAddr64)
+ ::GetProcAddress(hLib, "SymGetSymFromAddr64");
+ SymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
+ ::GetProcAddress(hLib, "SymGetLineFromAddr64");
+ SymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
+ ::GetProcAddress(hLib, "SymFunctionTableAccess64");
+ }
+ return StackWalk64 != NULL;
+}
+ #endif // !defined(__MINGW64_VERSION_MAJOR)
+#endif // __MINGW32__
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static bool ExitOnUnhandledExceptions = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs. This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+ // Don't cause a DebugBreak() on return.
+ if (Return)
+ *Return = 0;
+
+ switch (ReportType) {
+ default:
+ case _CRT_ASSERT:
+ fprintf(stderr, "CRT assert: %s\n", Message);
+ // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+ // exception code? Perhaps SetErrorMode() handles this.
+ _exit(3);
+ break;
+ case _CRT_ERROR:
+ fprintf(stderr, "CRT error: %s\n", Message);
+ // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+ // exception code? Perhaps SetErrorMode() handles this.
+ _exit(3);
+ break;
+ case _CRT_WARN:
+ fprintf(stderr, "CRT warn: %s\n", Message);
+ break;
+ }
+
+ // Don't call _CrtDbgReport.
+ return TRUE;
+}
+#endif
+
+static void RegisterHandler() {
+#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR)
+ // On MinGW.org, we need to load up the symbols explicitly, because the
+ // Win32 framework they include does not have support for the 64-bit
+ // versions of the APIs we need. If we cannot load up the APIs (which
+ // would be unexpected as they should exist on every version of Windows
+ // we support), we will bail out since there would be nothing to report.
+ if (!load64BitDebugHelp()) {
+ assert(false && "These APIs should always be available");
+ return;
+ }
+#endif
+
+ if (RegisteredUnhandledExceptionFilter) {
+ EnterCriticalSection(&CriticalSection);
+ return;
+ }
+
+ // Now's the time to create the critical section. This is the first time
+ // through here, and there's only one thread.
+ InitializeCriticalSection(&CriticalSection);
+
+ // Enter it immediately. Now if someone hits CTRL/C, the console handler
+ // can't proceed until the globals are updated.
+ EnterCriticalSection(&CriticalSection);
+
+ RegisteredUnhandledExceptionFilter = true;
+ OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+ SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+ // Environment variable to disable any kind of crash dialog.
+ if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+#ifdef _MSC_VER
+ _CrtSetReportHook(CRTReportHook);
+#endif
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
+ ExitOnUnhandledExceptions = true;
+ }
+
+ // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+ // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+ RegisterHandler();
+
+ if (CleanupExecuted) {
+ if (ErrMsg)
+ *ErrMsg = "Process terminating -- cannot register for removal";
+ return true;
+ }
+
+ if (FilesToRemove == NULL)
+ FilesToRemove = new std::vector<sys::Path>;
+
+ FilesToRemove->push_back(Filename);
+
+ LeaveCriticalSection(&CriticalSection);
+ return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ if (FilesToRemove == NULL)
+ return;
+
+ RegisterHandler();
+
+ FilesToRemove->push_back(Filename);
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+ if (I != FilesToRemove->rend())
+ FilesToRemove->erase(I.base()-1);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+ RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::PrintStackTrace(FILE *) {
+ // FIXME: Implement.
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+ RegisterHandler();
+ InterruptFunction = IF;
+ LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ if (CallBacksToRun == 0)
+ CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+ CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
+}
+}
+
+static void Cleanup() {
+ EnterCriticalSection(&CriticalSection);
+
+ // Prevent other thread from registering new files and directories for
+ // removal, should we be executing because of the console handler callback.
+ CleanupExecuted = true;
+
+ // FIXME: open files cannot be deleted.
+
+ if (FilesToRemove != NULL)
+ while (!FilesToRemove->empty()) {
+ FilesToRemove->back().eraseFromDisk();
+ FilesToRemove->pop_back();
+ }
+
+ if (CallBacksToRun)
+ for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+ (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+ Cleanup();
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+ Cleanup();
+
+ // Initialize the STACKFRAME structure.
+ STACKFRAME64 StackFrame;
+ memset(&StackFrame, 0, sizeof(StackFrame));
+
+ DWORD machineType;
+#if defined(_M_X64)
+ machineType = IMAGE_FILE_MACHINE_AMD64;
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Rip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+#elif defined(_M_IX86)
+ machineType = IMAGE_FILE_MACHINE_I386;
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+#endif
+
+ HANDLE hProcess = GetCurrentProcess();
+ HANDLE hThread = GetCurrentThread();
+
+ // Initialize the symbol handler.
+ SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+ SymInitialize(hProcess, NULL, TRUE);
+
+ while (true) {
+ if (!StackWalk64(machineType, hProcess, hThread, &StackFrame,
+ ep->ContextRecord, NULL, SymFunctionTableAccess64,
+ SymGetModuleBase64, NULL)) {
+ break;
+ }
+
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+
+ // Print the PC in hexadecimal.
+ DWORD64 PC = StackFrame.AddrPC.Offset;
+#if defined(_M_X64)
+ fprintf(stderr, "0x%016llX", PC);
+#elif defined(_M_IX86)
+ fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC));
+#endif
+
+ // Print the parameters. Assume there are four.
+#if defined(_M_X64)
+ fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
+ StackFrame.Params[0],
+ StackFrame.Params[1],
+ StackFrame.Params[2],
+ StackFrame.Params[3]);
+#elif defined(_M_IX86)
+ fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+ static_cast<DWORD>(StackFrame.Params[0]),
+ static_cast<DWORD>(StackFrame.Params[1]),
+ static_cast<DWORD>(StackFrame.Params[2]),
+ static_cast<DWORD>(StackFrame.Params[3]));
+#endif
+ // Verify the PC belongs to a module in this process.
+ if (!SymGetModuleBase64(hProcess, PC)) {
+ fputs(" <unknown module>\n", stderr);
+ continue;
+ }
+
+ // Print the symbol name.
+ char buffer[512];
+ IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
+
+ DWORD64 dwDisp;
+ if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
+ fputc('\n', stderr);
+ continue;
+ }
+
+ buffer[511] = 0;
+ if (dwDisp > 0)
+ fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp);
+ else
+ fprintf(stderr, ", %s", symbol->Name);
+
+ // Print the source file and line number information.
+ IMAGEHLP_LINE64 line;
+ DWORD dwLineDisp;
+ memset(&line, 0, sizeof(line));
+ line.SizeOfStruct = sizeof(line);
+ if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
+ fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+ if (dwLineDisp > 0)
+ fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp);
+ }
+
+ fputc('\n', stderr);
+ }
+
+ if (ExitOnUnhandledExceptions)
+ _exit(ep->ExceptionRecord->ExceptionCode);
+
+ // Allow dialog box to pop up allowing choice to start debugger.
+ if (OldFilter)
+ return (*OldFilter)(ep);
+ else
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+ // We are running in our very own thread, courtesy of Windows.
+ EnterCriticalSection(&CriticalSection);
+ Cleanup();
+
+ // If an interrupt function has been set, go and run one it; otherwise,
+ // the process dies.
+ void (*IF)() = InterruptFunction;
+ InterruptFunction = 0; // Don't run it on another CTRL-C.
+
+ if (IF) {
+ // Note: if the interrupt function throws an exception, there is nothing
+ // to catch it in this thread so it will kill the process.
+ IF(); // Run it now.
+ LeaveCriticalSection(&CriticalSection);
+ return TRUE; // Don't kill the process.
+ }
+
+ // Allow normal processing to take place; i.e., the process dies.
+ LeaveCriticalSection(&CriticalSection);
+ return FALSE;
+}
+
+#if __MINGW32__
+ // We turned these warnings off for this file so that MinGW-g++ doesn't
+ // complain about the ll format specifiers used. Now we are turning the
+ // warnings back on. If MinGW starts to support diagnostic stacks, we can
+ // replace this with a pop.
+ #pragma GCC diagnostic warning "-Wformat"
+ #pragma GCC diagnostic warning "-Wformat-extra-args"
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/ThreadLocal.inc b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
new file mode 100644
index 000000000000..057deb325d6e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
@@ -0,0 +1,53 @@
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data() {
+ typedef int SIZE_TOO_BIG[sizeof(DWORD) <= sizeof(data) ? 1 : -1];
+ DWORD* tls = reinterpret_cast<DWORD*>(&data);
+ *tls = TlsAlloc();
+ assert(*tls != TLS_OUT_OF_INDEXES);
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+ DWORD* tls = reinterpret_cast<DWORD*>(&data);
+ TlsFree(*tls);
+}
+
+const void* ThreadLocalImpl::getInstance() {
+ DWORD* tls = reinterpret_cast<DWORD*>(&data);
+ return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+ DWORD* tls = reinterpret_cast<DWORD*>(&data);
+ int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+ assert(errorcode != 0);
+ (void)errorcode;
+}
+
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
new file mode 100644
index 000000000000..12275526f1c8
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+ uint64_t ft;
+ GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+ TimeValue t(0, 0);
+ t.fromWin32Time(ft);
+ return t;
+}
+
+std::string TimeValue::str() const {
+#ifdef __MINGW32__
+ // This ban may be lifted by either:
+ // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+ // (ii) configure tests for either the time_t or __time64_t type.
+ time_t ourTime = time_t(this->toEpochTime());
+ struct tm *lt = ::localtime(&ourTime);
+#else
+ __time64_t ourTime = this->toEpochTime();
+ struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+ char buffer[25];
+ strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+ return std::string(buffer);
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Watchdog.inc b/contrib/llvm/lib/Support/Windows/Watchdog.inc
new file mode 100644
index 000000000000..fab2bdf2a941
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Watchdog.inc
@@ -0,0 +1,24 @@
+//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Windows implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: implement.
+// Currently this is only used by PrettyStackTrace which is also unimplemented
+// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer
+// and a second thread to run the TimerAPCProc.
+
+namespace llvm {
+ namespace sys {
+ Watchdog::Watchdog(unsigned int seconds) {}
+ Watchdog::~Watchdog() {}
+ }
+}
diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h
new file mode 100644
index 000000000000..5c1da0d617aa
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Windows.h
@@ -0,0 +1,150 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows XP(5.1) API.
+#define _WIN32_WINNT 0x0501
+#define _WIN32_IE 0x0600 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <wincrypt.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+ if (!ErrMsg)
+ return true;
+ char *buffer = NULL;
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ *ErrMsg = prefix + buffer;
+ LocalFree(buffer);
+ return true;
+}
+
+template <typename HandleTraits>
+class ScopedHandle {
+ typedef typename HandleTraits::handle_type handle_type;
+ handle_type Handle;
+
+ ScopedHandle(const ScopedHandle &other); // = delete;
+ void operator=(const ScopedHandle &other); // = delete;
+public:
+ ScopedHandle()
+ : Handle(HandleTraits::GetInvalid()) {}
+
+ explicit ScopedHandle(handle_type h)
+ : Handle(h) {}
+
+ ~ScopedHandle() {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
+ }
+
+ handle_type take() {
+ handle_type t = Handle;
+ Handle = HandleTraits::GetInvalid();
+ return t;
+ }
+
+ ScopedHandle &operator=(handle_type h) {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
+ Handle = h;
+ return *this;
+ }
+
+ // True if Handle is valid.
+ operator bool() const {
+ return HandleTraits::IsValid(Handle) ? true : false;
+ }
+
+ operator handle_type() const {
+ return Handle;
+ }
+};
+
+struct CommonHandleTraits {
+ typedef HANDLE handle_type;
+
+ static handle_type GetInvalid() {
+ return INVALID_HANDLE_VALUE;
+ }
+
+ static void Close(handle_type h) {
+ ::CloseHandle(h);
+ }
+
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
+ }
+};
+
+struct JobHandleTraits : CommonHandleTraits {
+ static handle_type GetInvalid() {
+ return NULL;
+ }
+};
+
+struct CryptContextTraits : CommonHandleTraits {
+ typedef HCRYPTPROV handle_type;
+
+ static handle_type GetInvalid() {
+ return 0;
+ }
+
+ static void Close(handle_type h) {
+ ::CryptReleaseContext(h, 0);
+ }
+
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
+ }
+};
+
+struct FindHandleTraits : CommonHandleTraits {
+ static void Close(handle_type h) {
+ ::FindClose(h);
+ }
+};
+
+struct FileHandleTraits : CommonHandleTraits {};
+
+typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
+typedef ScopedHandle<FileHandleTraits> ScopedFileHandle;
+typedef ScopedHandle<CryptContextTraits> ScopedCryptContext;
+typedef ScopedHandle<FindHandleTraits> ScopedFindHandle;
+typedef ScopedHandle<JobHandleTraits> ScopedJobHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+ str.push_back(0);
+ str.pop_back();
+ return str.data();
+}
+} // end namespace llvm.
diff --git a/contrib/llvm/lib/Support/Windows/explicit_symbols.inc b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 000000000000..379645d2ff60
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+ EXPLICIT_SYMBOL(_alloca)
+ EXPLICIT_SYMBOL2(alloca, _alloca)
+#endif
+#ifdef HAVE___ALLOCA
+ EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+ EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+ EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+ EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+ EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+ EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+ EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+ EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+ EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+ EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+ EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+ EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+ EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+ EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+ EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+ EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+ EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+ EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+ EXPLICIT_SYMBOL2(alloca, _alloca_probe)
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/system_error.inc b/contrib/llvm/lib/Support/Windows/system_error.inc
new file mode 100644
index 000000000000..37ec81dd363c
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ LPVOID lpMsgBuf = 0;
+ DWORD retval = ::FormatMessageA(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ ev,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+ (LPSTR) &lpMsgBuf,
+ 0,
+ NULL);
+ if (retval == 0) {
+ ::LocalFree(lpMsgBuf);
+ return std::string("Unknown error");
+ }
+
+ std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+ ::LocalFree(lpMsgBuf);
+
+ while (str.size()
+ && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+ str.erase( str.size()-1 );
+ if (str.size() && str[str.size()-1] == '.')
+ str.erase( str.size()-1 );
+ return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+ switch (ev) {
+ MAP_ERR_TO_COND(0, success);
+ // Windows system -> posix_errno decode table ---------------------------//
+ // see WinError.h comments for descriptions of errors
+ MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+ MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+ MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+ MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+ MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+ MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+ MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+ MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_HANDLE_EOF, value_too_large);
+ MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+ MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+ MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE, cross_device_link);
+ MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+ MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED, operation_canceled);
+ MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+ MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+ MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+ MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+ MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+ MAP_ERR_TO_COND(WSAEADDRINUSE, address_in_use);
+ MAP_ERR_TO_COND(WSAEADDRNOTAVAIL, address_not_available);
+ MAP_ERR_TO_COND(WSAEAFNOSUPPORT, address_family_not_supported);
+ MAP_ERR_TO_COND(WSAEALREADY, connection_already_in_progress);
+ MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+ MAP_ERR_TO_COND(WSAECONNABORTED, connection_aborted);
+ MAP_ERR_TO_COND(WSAECONNREFUSED, connection_refused);
+ MAP_ERR_TO_COND(WSAECONNRESET, connection_reset);
+ MAP_ERR_TO_COND(WSAEDESTADDRREQ, destination_address_required);
+ MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+ MAP_ERR_TO_COND(WSAEHOSTUNREACH, host_unreachable);
+ MAP_ERR_TO_COND(WSAEINPROGRESS, operation_in_progress);
+ MAP_ERR_TO_COND(WSAEINTR, interrupted);
+ MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+ MAP_ERR_TO_COND(WSAEISCONN, already_connected);
+ MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+ MAP_ERR_TO_COND(WSAEMSGSIZE, message_size);
+ MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+ MAP_ERR_TO_COND(WSAENETDOWN, network_down);
+ MAP_ERR_TO_COND(WSAENETRESET, network_reset);
+ MAP_ERR_TO_COND(WSAENETUNREACH, network_unreachable);
+ MAP_ERR_TO_COND(WSAENOBUFS, no_buffer_space);
+ MAP_ERR_TO_COND(WSAENOPROTOOPT, no_protocol_option);
+ MAP_ERR_TO_COND(WSAENOTCONN, not_connected);
+ MAP_ERR_TO_COND(WSAENOTSOCK, not_a_socket);
+ MAP_ERR_TO_COND(WSAEOPNOTSUPP, operation_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTONOSUPPORT, protocol_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTOTYPE, wrong_protocol_type);
+ MAP_ERR_TO_COND(WSAETIMEDOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEWOULDBLOCK, operation_would_block);
+ default: return error_condition(ev, system_category());
+ }
+}
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
new file mode 100644
index 000000000000..2cead20c0b21
--- /dev/null
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -0,0 +1,2147 @@
+//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a YAML parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace yaml;
+
+enum UnicodeEncodingForm {
+ UEF_UTF32_LE, ///< UTF-32 Little Endian
+ UEF_UTF32_BE, ///< UTF-32 Big Endian
+ UEF_UTF16_LE, ///< UTF-16 Little Endian
+ UEF_UTF16_BE, ///< UTF-16 Big Endian
+ UEF_UTF8, ///< UTF-8 or ascii.
+ UEF_Unknown ///< Not a valid Unicode encoding.
+};
+
+/// EncodingInfo - Holds the encoding type and length of the byte order mark if
+/// it exists. Length is in {0, 2, 3, 4}.
+typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+
+/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
+/// encoding form of \a Input.
+///
+/// @param Input A string of length 0 or more.
+/// @returns An EncodingInfo indicating the Unicode encoding form of the input
+/// and how long the byte order mark is if one exists.
+static EncodingInfo getUnicodeEncoding(StringRef Input) {
+ if (Input.size() == 0)
+ return std::make_pair(UEF_Unknown, 0);
+
+ switch (uint8_t(Input[0])) {
+ case 0x00:
+ if (Input.size() >= 4) {
+ if ( Input[1] == 0
+ && uint8_t(Input[2]) == 0xFE
+ && uint8_t(Input[3]) == 0xFF)
+ return std::make_pair(UEF_UTF32_BE, 4);
+ if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
+ return std::make_pair(UEF_UTF32_BE, 0);
+ }
+
+ if (Input.size() >= 2 && Input[1] != 0)
+ return std::make_pair(UEF_UTF16_BE, 0);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFF:
+ if ( Input.size() >= 4
+ && uint8_t(Input[1]) == 0xFE
+ && Input[2] == 0
+ && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 4);
+
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
+ return std::make_pair(UEF_UTF16_LE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFE:
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
+ return std::make_pair(UEF_UTF16_BE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xEF:
+ if ( Input.size() >= 3
+ && uint8_t(Input[1]) == 0xBB
+ && uint8_t(Input[2]) == 0xBF)
+ return std::make_pair(UEF_UTF8, 3);
+ return std::make_pair(UEF_Unknown, 0);
+ }
+
+ // It could still be utf-32 or utf-16.
+ if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 0);
+
+ if (Input.size() >= 2 && Input[1] == 0)
+ return std::make_pair(UEF_UTF16_LE, 0);
+
+ return std::make_pair(UEF_UTF8, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// Token - A single YAML token.
+struct Token : ilist_node<Token> {
+ enum TokenKind {
+ TK_Error, // Uninitialized token.
+ TK_StreamStart,
+ TK_StreamEnd,
+ TK_VersionDirective,
+ TK_TagDirective,
+ TK_DocumentStart,
+ TK_DocumentEnd,
+ TK_BlockEntry,
+ TK_BlockEnd,
+ TK_BlockSequenceStart,
+ TK_BlockMappingStart,
+ TK_FlowEntry,
+ TK_FlowSequenceStart,
+ TK_FlowSequenceEnd,
+ TK_FlowMappingStart,
+ TK_FlowMappingEnd,
+ TK_Key,
+ TK_Value,
+ TK_Scalar,
+ TK_Alias,
+ TK_Anchor,
+ TK_Tag
+ } Kind;
+
+ /// A string of length 0 or more whose begin() points to the logical location
+ /// of the token in the input.
+ StringRef Range;
+
+ Token() : Kind(TK_Error) {}
+};
+}
+}
+
+namespace llvm {
+template<>
+struct ilist_sentinel_traits<Token> {
+ Token *createSentinel() const {
+ return &Sentinel;
+ }
+ static void destroySentinel(Token*) {}
+
+ Token *provideInitialHead() const { return createSentinel(); }
+ Token *ensureHead(Token*) const { return createSentinel(); }
+ static void noteHead(Token*, Token*) {}
+
+private:
+ mutable Token Sentinel;
+};
+
+template<>
+struct ilist_node_traits<Token> {
+ Token *createNode(const Token &V) {
+ return new (Alloc.Allocate<Token>()) Token(V);
+ }
+ static void deleteNode(Token *V) {}
+
+ void addNodeToList(Token *) {}
+ void removeNodeFromList(Token *) {}
+ void transferNodesFromList(ilist_node_traits & /*SrcTraits*/,
+ ilist_iterator<Token> /*first*/,
+ ilist_iterator<Token> /*last*/) {}
+
+ BumpPtrAllocator Alloc;
+};
+}
+
+typedef ilist<Token> TokenQueueT;
+
+namespace {
+/// @brief This struct is used to track simple keys.
+///
+/// Simple keys are handled by creating an entry in SimpleKeys for each Token
+/// which could legally be the start of a simple key. When peekNext is called,
+/// if the Token To be returned is referenced by a SimpleKey, we continue
+/// tokenizing until that potential simple key has either been found to not be
+/// a simple key (we moved on to the next line or went further than 1024 chars).
+/// Or when we run into a Value, and then insert a Key token (and possibly
+/// others) before the SimpleKey's Tok.
+struct SimpleKey {
+ TokenQueueT::iterator Tok;
+ unsigned Column;
+ unsigned Line;
+ unsigned FlowLevel;
+ bool IsRequired;
+
+ bool operator ==(const SimpleKey &Other) {
+ return Tok == Other.Tok;
+ }
+};
+}
+
+/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
+/// subsequence and the subsequence's length in code units (uint8_t).
+/// A length of 0 represents an error.
+typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+
+static UTF8Decoded decodeUTF8(StringRef Range) {
+ StringRef::iterator Position= Range.begin();
+ StringRef::iterator End = Range.end();
+ // 1 byte: [0x00, 0x7f]
+ // Bit pattern: 0xxxxxxx
+ if ((*Position & 0x80) == 0) {
+ return std::make_pair(*Position, 1);
+ }
+ // 2 bytes: [0x80, 0x7ff]
+ // Bit pattern: 110xxxxx 10xxxxxx
+ if (Position + 1 != End &&
+ ((*Position & 0xE0) == 0xC0) &&
+ ((*(Position + 1) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x1F) << 6) |
+ (*(Position + 1) & 0x3F);
+ if (codepoint >= 0x80)
+ return std::make_pair(codepoint, 2);
+ }
+ // 3 bytes: [0x8000, 0xffff]
+ // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
+ if (Position + 2 != End &&
+ ((*Position & 0xF0) == 0xE0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x0F) << 12) |
+ ((*(Position + 1) & 0x3F) << 6) |
+ (*(Position + 2) & 0x3F);
+ // Codepoints between 0xD800 and 0xDFFF are invalid, as
+ // they are high / low surrogate halves used by UTF-16.
+ if (codepoint >= 0x800 &&
+ (codepoint < 0xD800 || codepoint > 0xDFFF))
+ return std::make_pair(codepoint, 3);
+ }
+ // 4 bytes: [0x10000, 0x10FFFF]
+ // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (Position + 3 != End &&
+ ((*Position & 0xF8) == 0xF0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80) &&
+ ((*(Position + 3) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x07) << 18) |
+ ((*(Position + 1) & 0x3F) << 12) |
+ ((*(Position + 2) & 0x3F) << 6) |
+ (*(Position + 3) & 0x3F);
+ if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
+ return std::make_pair(codepoint, 4);
+ }
+ return std::make_pair(0, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// @brief Scans YAML tokens from a MemoryBuffer.
+class Scanner {
+public:
+ Scanner(const StringRef Input, SourceMgr &SM);
+ Scanner(MemoryBuffer *Buffer, SourceMgr &SM_);
+
+ /// @brief Parse the next token and return it without popping it.
+ Token &peekNext();
+
+ /// @brief Parse the next token and pop it from the queue.
+ Token getNext();
+
+ void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ SM.PrintMessage(Loc, Kind, Message, Ranges);
+ }
+
+ void setError(const Twine &Message, StringRef::iterator Position) {
+ if (Current >= End)
+ Current = End - 1;
+
+ // Don't print out more errors after the first one we encounter. The rest
+ // are just the result of the first, and have no meaning.
+ if (!Failed)
+ printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
+ Failed = true;
+ }
+
+ void setError(const Twine &Message) {
+ setError(Message, Current);
+ }
+
+ /// @brief Returns true if an error occurred while parsing.
+ bool failed() {
+ return Failed;
+ }
+
+private:
+ StringRef currentInput() {
+ return StringRef(Current, End - Current);
+ }
+
+ /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
+ /// at \a Position.
+ ///
+ /// If the UTF-8 code units starting at Position do not form a well-formed
+ /// code unit subsequence, then the Unicode scalar value is 0, and the length
+ /// is 0.
+ UTF8Decoded decodeUTF8(StringRef::iterator Position) {
+ return ::decodeUTF8(StringRef(Position, End - Position));
+ }
+
+ // The following functions are based on the gramar rules in the YAML spec. The
+ // style of the function names it meant to closely match how they are written
+ // in the spec. The number within the [] is the number of the grammar rule in
+ // the spec.
+ //
+ // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
+ //
+ // c-
+ // A production starting and ending with a special character.
+ // b-
+ // A production matching a single line break.
+ // nb-
+ // A production starting and ending with a non-break character.
+ // s-
+ // A production starting and ending with a white space character.
+ // ns-
+ // A production starting and ending with a non-space character.
+ // l-
+ // A production matching complete line(s).
+
+ /// @brief Skip a single nb-char[27] starting at Position.
+ ///
+ /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
+ /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
+ ///
+ /// @returns The code unit after the nb-char, or Position if it's not an
+ /// nb-char.
+ StringRef::iterator skip_nb_char(StringRef::iterator Position);
+
+ /// @brief Skip a single b-break[28] starting at Position.
+ ///
+ /// A b-break is 0xD 0xA | 0xD | 0xA
+ ///
+ /// @returns The code unit after the b-break, or Position if it's not a
+ /// b-break.
+ StringRef::iterator skip_b_break(StringRef::iterator Position);
+
+ /// @brief Skip a single s-white[33] starting at Position.
+ ///
+ /// A s-white is 0x20 | 0x9
+ ///
+ /// @returns The code unit after the s-white, or Position if it's not a
+ /// s-white.
+ StringRef::iterator skip_s_white(StringRef::iterator Position);
+
+ /// @brief Skip a single ns-char[34] starting at Position.
+ ///
+ /// A ns-char is nb-char - s-white
+ ///
+ /// @returns The code unit after the ns-char, or Position if it's not a
+ /// ns-char.
+ StringRef::iterator skip_ns_char(StringRef::iterator Position);
+
+ typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+ /// @brief Skip minimal well-formed code unit subsequences until Func
+ /// returns its input.
+ ///
+ /// @returns The code unit after the last minimal well-formed code unit
+ /// subsequence that Func accepted.
+ StringRef::iterator skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position);
+
+ /// @brief Scan ns-uri-char[39]s starting at Cur.
+ ///
+ /// This updates Cur and Column while scanning.
+ ///
+ /// @returns A StringRef starting at Cur which covers the longest contiguous
+ /// sequence of ns-uri-char.
+ StringRef scan_ns_uri_char();
+
+ /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
+ StringRef scan_ns_plain_one_line();
+
+ /// @brief Consume a minimal well-formed code unit subsequence starting at
+ /// \a Cur. Return false if it is not the same Unicode scalar value as
+ /// \a Expected. This updates \a Column.
+ bool consume(uint32_t Expected);
+
+ /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
+ void skip(uint32_t Distance);
+
+ /// @brief Return true if the minimal well-formed code unit subsequence at
+ /// Pos is whitespace or a new line
+ bool isBlankOrBreak(StringRef::iterator Position);
+
+ /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
+ void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired);
+
+ /// @brief Remove simple keys that can no longer be valid simple keys.
+ ///
+ /// Invalid simple keys are not on the current line or are further than 1024
+ /// columns back.
+ void removeStaleSimpleKeyCandidates();
+
+ /// @brief Remove all simple keys on FlowLevel \a Level.
+ void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
+
+ /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
+ /// tokens if needed.
+ bool unrollIndent(int ToColumn);
+
+ /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
+ /// if needed.
+ bool rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint);
+
+ /// @brief Skip whitespace and comments until the start of the next token.
+ void scanToNextToken();
+
+ /// @brief Must be the first token generated.
+ bool scanStreamStart();
+
+ /// @brief Generate tokens needed to close out the stream.
+ bool scanStreamEnd();
+
+ /// @brief Scan a %BLAH directive.
+ bool scanDirective();
+
+ /// @brief Scan a ... or ---.
+ bool scanDocumentIndicator(bool IsStart);
+
+ /// @brief Scan a [ or { and generate the proper flow collection start token.
+ bool scanFlowCollectionStart(bool IsSequence);
+
+ /// @brief Scan a ] or } and generate the proper flow collection end token.
+ bool scanFlowCollectionEnd(bool IsSequence);
+
+ /// @brief Scan the , that separates entries in a flow collection.
+ bool scanFlowEntry();
+
+ /// @brief Scan the - that starts block sequence entries.
+ bool scanBlockEntry();
+
+ /// @brief Scan an explicit ? indicating a key.
+ bool scanKey();
+
+ /// @brief Scan an explicit : indicating a value.
+ bool scanValue();
+
+ /// @brief Scan a quoted scalar.
+ bool scanFlowScalar(bool IsDoubleQuoted);
+
+ /// @brief Scan an unquoted scalar.
+ bool scanPlainScalar();
+
+ /// @brief Scan an Alias or Anchor starting with * or &.
+ bool scanAliasOrAnchor(bool IsAlias);
+
+ /// @brief Scan a block scalar starting with | or >.
+ bool scanBlockScalar(bool IsLiteral);
+
+ /// @brief Scan a tag of the form !stuff.
+ bool scanTag();
+
+ /// @brief Dispatch to the next scanning function based on \a *Cur.
+ bool fetchMoreTokens();
+
+ /// @brief The SourceMgr used for diagnostics and buffer management.
+ SourceMgr &SM;
+
+ /// @brief The original input.
+ MemoryBuffer *InputBuffer;
+
+ /// @brief The current position of the scanner.
+ StringRef::iterator Current;
+
+ /// @brief The end of the input (one past the last character).
+ StringRef::iterator End;
+
+ /// @brief Current YAML indentation level in spaces.
+ int Indent;
+
+ /// @brief Current column number in Unicode code points.
+ unsigned Column;
+
+ /// @brief Current line number.
+ unsigned Line;
+
+ /// @brief How deep we are in flow style containers. 0 Means at block level.
+ unsigned FlowLevel;
+
+ /// @brief Are we at the start of the stream?
+ bool IsStartOfStream;
+
+ /// @brief Can the next token be the start of a simple key?
+ bool IsSimpleKeyAllowed;
+
+ /// @brief True if an error has occurred.
+ bool Failed;
+
+ /// @brief Queue of tokens. This is required to queue up tokens while looking
+ /// for the end of a simple key. And for cases where a single character
+ /// can produce multiple tokens (e.g. BlockEnd).
+ TokenQueueT TokenQueue;
+
+ /// @brief Indentation levels.
+ SmallVector<int, 4> Indents;
+
+ /// @brief Potential simple keys.
+ SmallVector<SimpleKey, 4> SimpleKeys;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
+static void encodeUTF8( uint32_t UnicodeScalarValue
+ , SmallVectorImpl<char> &Result) {
+ if (UnicodeScalarValue <= 0x7F) {
+ Result.push_back(UnicodeScalarValue & 0x7F);
+ } else if (UnicodeScalarValue <= 0x7FF) {
+ uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
+ uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ } else if (UnicodeScalarValue <= 0xFFFF) {
+ uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ } else if (UnicodeScalarValue <= 0x10FFFF) {
+ uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
+ uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ Result.push_back(FourthByte);
+ }
+}
+
+bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
+ SourceMgr SM;
+ Scanner scanner(Input, SM);
+ while (true) {
+ Token T = scanner.getNext();
+ switch (T.Kind) {
+ case Token::TK_StreamStart:
+ OS << "Stream-Start: ";
+ break;
+ case Token::TK_StreamEnd:
+ OS << "Stream-End: ";
+ break;
+ case Token::TK_VersionDirective:
+ OS << "Version-Directive: ";
+ break;
+ case Token::TK_TagDirective:
+ OS << "Tag-Directive: ";
+ break;
+ case Token::TK_DocumentStart:
+ OS << "Document-Start: ";
+ break;
+ case Token::TK_DocumentEnd:
+ OS << "Document-End: ";
+ break;
+ case Token::TK_BlockEntry:
+ OS << "Block-Entry: ";
+ break;
+ case Token::TK_BlockEnd:
+ OS << "Block-End: ";
+ break;
+ case Token::TK_BlockSequenceStart:
+ OS << "Block-Sequence-Start: ";
+ break;
+ case Token::TK_BlockMappingStart:
+ OS << "Block-Mapping-Start: ";
+ break;
+ case Token::TK_FlowEntry:
+ OS << "Flow-Entry: ";
+ break;
+ case Token::TK_FlowSequenceStart:
+ OS << "Flow-Sequence-Start: ";
+ break;
+ case Token::TK_FlowSequenceEnd:
+ OS << "Flow-Sequence-End: ";
+ break;
+ case Token::TK_FlowMappingStart:
+ OS << "Flow-Mapping-Start: ";
+ break;
+ case Token::TK_FlowMappingEnd:
+ OS << "Flow-Mapping-End: ";
+ break;
+ case Token::TK_Key:
+ OS << "Key: ";
+ break;
+ case Token::TK_Value:
+ OS << "Value: ";
+ break;
+ case Token::TK_Scalar:
+ OS << "Scalar: ";
+ break;
+ case Token::TK_Alias:
+ OS << "Alias: ";
+ break;
+ case Token::TK_Anchor:
+ OS << "Anchor: ";
+ break;
+ case Token::TK_Tag:
+ OS << "Tag: ";
+ break;
+ case Token::TK_Error:
+ break;
+ }
+ OS << T.Range << "\n";
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+bool yaml::scanTokens(StringRef Input) {
+ llvm::SourceMgr SM;
+ llvm::yaml::Scanner scanner(Input, SM);
+ for (;;) {
+ llvm::yaml::Token T = scanner.getNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+std::string yaml::escape(StringRef Input) {
+ std::string EscapedInput;
+ for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
+ if (*i == '\\')
+ EscapedInput += "\\\\";
+ else if (*i == '"')
+ EscapedInput += "\\\"";
+ else if (*i == 0)
+ EscapedInput += "\\0";
+ else if (*i == 0x07)
+ EscapedInput += "\\a";
+ else if (*i == 0x08)
+ EscapedInput += "\\b";
+ else if (*i == 0x09)
+ EscapedInput += "\\t";
+ else if (*i == 0x0A)
+ EscapedInput += "\\n";
+ else if (*i == 0x0B)
+ EscapedInput += "\\v";
+ else if (*i == 0x0C)
+ EscapedInput += "\\f";
+ else if (*i == 0x0D)
+ EscapedInput += "\\r";
+ else if (*i == 0x1B)
+ EscapedInput += "\\e";
+ else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
+ std::string HexStr = utohexstr(*i);
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
+ UTF8Decoded UnicodeScalarValue
+ = decodeUTF8(StringRef(i, Input.end() - i));
+ if (UnicodeScalarValue.second == 0) {
+ // Found invalid char.
+ SmallString<4> Val;
+ encodeUTF8(0xFFFD, Val);
+ EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+ // FIXME: Error reporting.
+ return EscapedInput;
+ }
+ if (UnicodeScalarValue.first == 0x85)
+ EscapedInput += "\\N";
+ else if (UnicodeScalarValue.first == 0xA0)
+ EscapedInput += "\\_";
+ else if (UnicodeScalarValue.first == 0x2028)
+ EscapedInput += "\\L";
+ else if (UnicodeScalarValue.first == 0x2029)
+ EscapedInput += "\\P";
+ else {
+ std::string HexStr = utohexstr(UnicodeScalarValue.first);
+ if (HexStr.size() <= 2)
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 4)
+ EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 8)
+ EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
+ }
+ i += UnicodeScalarValue.second - 1;
+ } else
+ EscapedInput.push_back(*i);
+ }
+ return EscapedInput;
+}
+
+Scanner::Scanner(StringRef Input, SourceMgr &sm)
+ : SM(sm)
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , Failed(false) {
+ InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+ Current = InputBuffer->getBufferStart();
+ End = InputBuffer->getBufferEnd();
+}
+
+Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_)
+ : SM(SM_)
+ , InputBuffer(Buffer)
+ , Current(InputBuffer->getBufferStart())
+ , End(InputBuffer->getBufferEnd())
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , Failed(false) {
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+}
+
+Token &Scanner::peekNext() {
+ // If the current token is a possible simple key, keep parsing until we
+ // can confirm.
+ bool NeedMore = false;
+ while (true) {
+ if (TokenQueue.empty() || NeedMore) {
+ if (!fetchMoreTokens()) {
+ TokenQueue.clear();
+ TokenQueue.push_back(Token());
+ return TokenQueue.front();
+ }
+ }
+ assert(!TokenQueue.empty() &&
+ "fetchMoreTokens lied about getting tokens!");
+
+ removeStaleSimpleKeyCandidates();
+ SimpleKey SK;
+ SK.Tok = TokenQueue.front();
+ if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
+ == SimpleKeys.end())
+ break;
+ else
+ NeedMore = true;
+ }
+ return TokenQueue.front();
+}
+
+Token Scanner::getNext() {
+ Token Ret = peekNext();
+ // TokenQueue can be empty if there was an error getting the next token.
+ if (!TokenQueue.empty())
+ TokenQueue.pop_front();
+
+ // There cannot be any referenced Token's if the TokenQueue is empty. So do a
+ // quick deallocation of them all.
+ if (TokenQueue.empty()) {
+ TokenQueue.Alloc.Reset();
+ }
+
+ return Ret;
+}
+
+StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ // Check 7 bit c-printable - b-char.
+ if ( *Position == 0x09
+ || (*Position >= 0x20 && *Position <= 0x7E))
+ return Position + 1;
+
+ // Check for valid UTF-8.
+ if (uint8_t(*Position) & 0x80) {
+ UTF8Decoded u8d = decodeUTF8(Position);
+ if ( u8d.second != 0
+ && u8d.first != 0xFEFF
+ && ( u8d.first == 0x85
+ || ( u8d.first >= 0xA0
+ && u8d.first <= 0xD7FF)
+ || ( u8d.first >= 0xE000
+ && u8d.first <= 0xFFFD)
+ || ( u8d.first >= 0x10000
+ && u8d.first <= 0x10FFFF)))
+ return Position + u8d.second;
+ }
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == 0x0D) {
+ if (Position + 1 != End && *(Position + 1) == 0x0A)
+ return Position + 2;
+ return Position + 1;
+ }
+
+ if (*Position == 0x0A)
+ return Position + 1;
+ return Position;
+}
+
+
+StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position + 1;
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position;
+ return skip_nb_char(Position);
+}
+
+StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position) {
+ while (true) {
+ StringRef::iterator i = (this->*Func)(Position);
+ if (i == Position)
+ break;
+ Position = i;
+ }
+ return Position;
+}
+
+static bool is_ns_hex_digit(const char C) {
+ return (C >= '0' && C <= '9')
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+static bool is_ns_word_char(const char C) {
+ return C == '-'
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+StringRef Scanner::scan_ns_uri_char() {
+ StringRef::iterator Start = Current;
+ while (true) {
+ if (Current == End)
+ break;
+ if (( *Current == '%'
+ && Current + 2 < End
+ && is_ns_hex_digit(*(Current + 1))
+ && is_ns_hex_digit(*(Current + 2)))
+ || is_ns_word_char(*Current)
+ || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
+ != StringRef::npos) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ }
+ return StringRef(Start, Current - Start);
+}
+
+StringRef Scanner::scan_ns_plain_one_line() {
+ StringRef::iterator start = Current;
+ // The first character must already be verified.
+ ++Current;
+ while (true) {
+ if (Current == End) {
+ break;
+ } else if (*Current == ':') {
+ // Check if the next character is a ns-char.
+ if (Current + 1 == End)
+ break;
+ StringRef::iterator i = skip_ns_char(Current + 1);
+ if (Current + 1 != i) {
+ Current = i;
+ Column += 2; // Consume both the ':' and ns-char.
+ } else
+ break;
+ } else if (*Current == '#') {
+ // Check if the previous character was a ns-char.
+ // The & 0x80 check is to check for the trailing byte of a utf-8
+ if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ } else {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ return StringRef(start, Current - start);
+}
+
+bool Scanner::consume(uint32_t Expected) {
+ if (Expected >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (Current == End)
+ return false;
+ if (uint8_t(*Current) >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (uint8_t(*Current) == Expected) {
+ ++Current;
+ ++Column;
+ return true;
+ }
+ return false;
+}
+
+void Scanner::skip(uint32_t Distance) {
+ Current += Distance;
+ Column += Distance;
+ assert(Current <= End && "Skipped past the end");
+}
+
+bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
+ if (Position == End)
+ return false;
+ if ( *Position == ' ' || *Position == '\t'
+ || *Position == '\r' || *Position == '\n')
+ return true;
+ return false;
+}
+
+void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired) {
+ if (IsSimpleKeyAllowed) {
+ SimpleKey SK;
+ SK.Tok = Tok;
+ SK.Line = Line;
+ SK.Column = AtColumn;
+ SK.IsRequired = IsRequired;
+ SK.FlowLevel = FlowLevel;
+ SimpleKeys.push_back(SK);
+ }
+}
+
+void Scanner::removeStaleSimpleKeyCandidates() {
+ for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
+ i != SimpleKeys.end();) {
+ if (i->Line != Line || i->Column + 1024 < Column) {
+ if (i->IsRequired)
+ setError( "Could not find expected : for simple key"
+ , i->Tok->Range.begin());
+ i = SimpleKeys.erase(i);
+ } else
+ ++i;
+ }
+}
+
+void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
+ if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
+ SimpleKeys.pop_back();
+}
+
+bool Scanner::unrollIndent(int ToColumn) {
+ Token T;
+ // Indentation is ignored in flow.
+ if (FlowLevel != 0)
+ return true;
+
+ while (Indent > ToColumn) {
+ T.Kind = Token::TK_BlockEnd;
+ T.Range = StringRef(Current, 1);
+ TokenQueue.push_back(T);
+ Indent = Indents.pop_back_val();
+ }
+
+ return true;
+}
+
+bool Scanner::rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint) {
+ if (FlowLevel)
+ return true;
+ if (Indent < ToColumn) {
+ Indents.push_back(Indent);
+ Indent = ToColumn;
+
+ Token T;
+ T.Kind = Kind;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.insert(InsertPoint, T);
+ }
+ return true;
+}
+
+void Scanner::scanToNextToken() {
+ while (true) {
+ while (*Current == ' ' || *Current == '\t') {
+ skip(1);
+ }
+
+ // Skip comment.
+ if (*Current == '#') {
+ while (true) {
+ // This may skip more than one byte, thus Column is only incremented
+ // for code points.
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+
+ // Skip EOL.
+ StringRef::iterator i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Line;
+ Column = 0;
+ // New lines may start a simple key.
+ if (!FlowLevel)
+ IsSimpleKeyAllowed = true;
+ }
+}
+
+bool Scanner::scanStreamStart() {
+ IsStartOfStream = false;
+
+ EncodingInfo EI = getUnicodeEncoding(currentInput());
+
+ Token T;
+ T.Kind = Token::TK_StreamStart;
+ T.Range = StringRef(Current, EI.second);
+ TokenQueue.push_back(T);
+ Current += EI.second;
+ return true;
+}
+
+bool Scanner::scanStreamEnd() {
+ // Force an ending new line if one isn't present.
+ if (Column != 0) {
+ Column = 0;
+ ++Line;
+ }
+
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = Token::TK_StreamEnd;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanDirective() {
+ // Reset the indentation level.
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ StringRef::iterator Start = Current;
+ consume('%');
+ StringRef::iterator NameStart = Current;
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ StringRef Name(NameStart, Current - NameStart);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+
+ if (Name == "YAML") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Token T;
+ T.Kind = Token::TK_VersionDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+ }
+ return false;
+}
+
+bool Scanner::scanDocumentIndicator(bool IsStart) {
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
+ T.Range = StringRef(Current, 3);
+ skip(3);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanFlowCollectionStart(bool IsSequence) {
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceStart
+ : Token::TK_FlowMappingStart;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+
+ // [ and { may begin a simple key.
+ saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+
+ // And may also be followed by a simple key.
+ IsSimpleKeyAllowed = true;
+ ++FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = false;
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
+ : Token::TK_FlowMappingEnd;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ if (FlowLevel)
+ --FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowEntry() {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_FlowEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanBlockEntry() {
+ rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_BlockEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanKey() {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = !FlowLevel;
+
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanValue() {
+ // If the previous token could have been a simple key, insert the key token
+ // into the token queue.
+ if (!SimpleKeys.empty()) {
+ SimpleKey SK = SimpleKeys.pop_back_val();
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = SK.Tok->Range;
+ TokenQueueT::iterator i, e;
+ for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
+ if (i == SK.Tok)
+ break;
+ }
+ assert(i != e && "SimpleKey not in token queue!");
+ i = TokenQueue.insert(i, T);
+
+ // We may also need to add a Block-Mapping-Start token.
+ rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
+
+ IsSimpleKeyAllowed = false;
+ } else {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+ IsSimpleKeyAllowed = !FlowLevel;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Value;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+ StringRef::iterator Position) {
+ assert(Position - 1 >= First);
+ StringRef::iterator I = Position - 1;
+ // We calculate the number of consecutive '\'s before the current position
+ // by iterating backwards through our string.
+ while (I >= First && *I == '\\') --I;
+ // (Position - 1 - I) now contains the number of '\'s before the current
+ // position. If it is odd, the character at 'Position' was escaped.
+ return (Position - 1 - I) % 2 == 1;
+}
+
+bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ if (IsDoubleQuoted) {
+ do {
+ ++Current;
+ while (Current != End && *Current != '"')
+ ++Current;
+ // Repeat until the previous character was not a '\' or was an escaped
+ // backslash.
+ } while ( Current != End
+ && *(Current - 1) == '\\'
+ && wasEscaped(Start + 1, Current));
+ } else {
+ skip(1);
+ while (true) {
+ // Skip a ' followed by another '.
+ if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
+ skip(2);
+ continue;
+ } else if (*Current == '\'')
+ break;
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ Column = 0;
+ ++Line;
+ } else {
+ if (i == End)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ }
+
+ if (Current == End) {
+ setError("Expected quote at end of scalar", Current);
+ return false;
+ }
+
+ skip(1); // Skip ending quote.
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanPlainScalar() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ unsigned LeadingBlanks = 0;
+ assert(Indent >= -1 && "Indent must be >= -1 !");
+ unsigned indent = static_cast<unsigned>(Indent + 1);
+ while (true) {
+ if (*Current == '#')
+ break;
+
+ while (!isBlankOrBreak(Current)) {
+ if ( FlowLevel && *Current == ':'
+ && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+ setError("Found unexpected ':' while scanning a plain scalar", Current);
+ return false;
+ }
+
+ // Check for the end of the plain scalar.
+ if ( (*Current == ':' && isBlankOrBreak(Current + 1))
+ || ( FlowLevel
+ && (StringRef(Current, 1).find_first_of(",:?[]{}")
+ != StringRef::npos)))
+ break;
+
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ // Are we at the end?
+ if (!isBlankOrBreak(Current))
+ break;
+
+ // Eat blanks.
+ StringRef::iterator Tmp = Current;
+ while (isBlankOrBreak(Tmp)) {
+ StringRef::iterator i = skip_s_white(Tmp);
+ if (i != Tmp) {
+ if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
+ setError("Found invalid tab character in indentation", Tmp);
+ return false;
+ }
+ Tmp = i;
+ ++Column;
+ } else {
+ i = skip_b_break(Tmp);
+ if (!LeadingBlanks)
+ LeadingBlanks = 1;
+ Tmp = i;
+ Column = 0;
+ ++Line;
+ }
+ }
+
+ if (!FlowLevel && Column < indent)
+ break;
+
+ Current = Tmp;
+ }
+ if (Start == Current) {
+ setError("Got empty plain scalar", Start);
+ return false;
+ }
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Plain scalars can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanAliasOrAnchor(bool IsAlias) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1);
+ while(true) {
+ if ( *Current == '[' || *Current == ']'
+ || *Current == '{' || *Current == '}'
+ || *Current == ','
+ || *Current == ':')
+ break;
+ StringRef::iterator i = skip_ns_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty alias or anchor", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Alias and anchors can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+ StringRef::iterator Start = Current;
+ skip(1); // Eat | or >
+ while(true) {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ if (Column == 0)
+ break;
+ i = skip_b_break(Current);
+ if (i != Current) {
+ // We got a line break.
+ Column = 0;
+ ++Line;
+ Current = i;
+ continue;
+ } else {
+ // There was an error, which should already have been printed out.
+ return false;
+ }
+ }
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty block scalar", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanTag() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1); // Eat !.
+ if (Current == End || isBlankOrBreak(Current)); // An empty tag.
+ else if (*Current == '<') {
+ skip(1);
+ scan_ns_uri_char();
+ if (!consume('>'))
+ return false;
+ } else {
+ // FIXME: Actually parse the c-ns-shorthand-tag rule.
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ }
+
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Tags can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::fetchMoreTokens() {
+ if (IsStartOfStream)
+ return scanStreamStart();
+
+ scanToNextToken();
+
+ if (Current == End)
+ return scanStreamEnd();
+
+ removeStaleSimpleKeyCandidates();
+
+ unrollIndent(Column);
+
+ if (Column == 0 && *Current == '%')
+ return scanDirective();
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '-'
+ && *(Current + 1) == '-'
+ && *(Current + 2) == '-'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(true);
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '.'
+ && *(Current + 1) == '.'
+ && *(Current + 2) == '.'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(false);
+
+ if (*Current == '[')
+ return scanFlowCollectionStart(true);
+
+ if (*Current == '{')
+ return scanFlowCollectionStart(false);
+
+ if (*Current == ']')
+ return scanFlowCollectionEnd(true);
+
+ if (*Current == '}')
+ return scanFlowCollectionEnd(false);
+
+ if (*Current == ',')
+ return scanFlowEntry();
+
+ if (*Current == '-' && isBlankOrBreak(Current + 1))
+ return scanBlockEntry();
+
+ if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanKey();
+
+ if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanValue();
+
+ if (*Current == '*')
+ return scanAliasOrAnchor(true);
+
+ if (*Current == '&')
+ return scanAliasOrAnchor(false);
+
+ if (*Current == '!')
+ return scanTag();
+
+ if (*Current == '|' && !FlowLevel)
+ return scanBlockScalar(true);
+
+ if (*Current == '>' && !FlowLevel)
+ return scanBlockScalar(false);
+
+ if (*Current == '\'')
+ return scanFlowScalar(false);
+
+ if (*Current == '"')
+ return scanFlowScalar(true);
+
+ // Get a plain scalar.
+ StringRef FirstChar(Current, 1);
+ if (!(isBlankOrBreak(Current)
+ || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
+ || (*Current == '-' && !isBlankOrBreak(Current + 1))
+ || (!FlowLevel && (*Current == '?' || *Current == ':')
+ && isBlankOrBreak(Current + 1))
+ || (!FlowLevel && *Current == ':'
+ && Current + 2 < End
+ && *(Current + 1) == ':'
+ && !isBlankOrBreak(Current + 2)))
+ return scanPlainScalar();
+
+ setError("Unrecognized character while tokenizing.");
+ return false;
+}
+
+Stream::Stream(StringRef Input, SourceMgr &SM)
+ : scanner(new Scanner(Input, SM))
+ , CurrentDoc(0) {}
+
+Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM)
+ : scanner(new Scanner(InputBuffer, SM))
+ , CurrentDoc(0) {}
+
+Stream::~Stream() {}
+
+bool Stream::failed() { return scanner->failed(); }
+
+void Stream::printError(Node *N, const Twine &Msg) {
+ SmallVector<SMRange, 1> Ranges;
+ Ranges.push_back(N->getSourceRange());
+ scanner->printError( N->getSourceRange().Start
+ , SourceMgr::DK_Error
+ , Msg
+ , Ranges);
+}
+
+void Stream::handleYAMLDirective(const Token &t) {
+ // TODO: Ensure version is 1.x.
+}
+
+document_iterator Stream::begin() {
+ if (CurrentDoc)
+ report_fatal_error("Can only iterate over the stream once");
+
+ // Skip Stream-Start.
+ scanner->getNext();
+
+ CurrentDoc.reset(new Document(*this));
+ return document_iterator(CurrentDoc);
+}
+
+document_iterator Stream::end() {
+ return document_iterator();
+}
+
+void Stream::skip() {
+ for (document_iterator i = begin(), e = end(); i != e; ++i)
+ i->skip();
+}
+
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+ : Doc(D)
+ , TypeID(Type)
+ , Anchor(A) {
+ SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
+ SourceRange = SMRange(Start, Start);
+}
+
+Token &Node::peekNext() {
+ return Doc->peekNext();
+}
+
+Token Node::getNext() {
+ return Doc->getNext();
+}
+
+Node *Node::parseBlockNode() {
+ return Doc->parseBlockNode();
+}
+
+BumpPtrAllocator &Node::getAllocator() {
+ return Doc->NodeAllocator;
+}
+
+void Node::setError(const Twine &Msg, Token &Tok) const {
+ Doc->setError(Msg, Tok);
+}
+
+bool Node::failed() const {
+ return Doc->failed();
+}
+
+
+
+StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
+ // TODO: Handle newlines properly. We need to remove leading whitespace.
+ if (Value[0] == '"') { // Double quoted.
+ // Pull off the leading and trailing "s.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ // Search for characters that would require unescaping the value.
+ StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
+ if (i != StringRef::npos)
+ return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+ return UnquotedValue;
+ } else if (Value[0] == '\'') { // Single quoted.
+ // Pull off the leading and trailing 's.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ StringRef::size_type i = UnquotedValue.find('\'');
+ if (i != StringRef::npos) {
+ // We're going to need Storage.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ Storage.push_back('\'');
+ UnquotedValue = UnquotedValue.substr(i + 2);
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+ }
+ return UnquotedValue;
+ }
+ // Plain or block.
+ return Value.rtrim(" ");
+}
+
+StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
+ , StringRef::size_type i
+ , SmallVectorImpl<char> &Storage)
+ const {
+ // Use Storage to build proper value.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
+ // Insert all previous chars into Storage.
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ // Chop off inserted chars.
+ UnquotedValue = UnquotedValue.substr(i);
+
+ assert(!UnquotedValue.empty() && "Can't be empty!");
+
+ // Parse escape or line break.
+ switch (UnquotedValue[0]) {
+ case '\r':
+ case '\n':
+ Storage.push_back('\n');
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ UnquotedValue = UnquotedValue.substr(1);
+ break;
+ default:
+ if (UnquotedValue.size() == 1)
+ // TODO: Report error.
+ break;
+ UnquotedValue = UnquotedValue.substr(1);
+ switch (UnquotedValue[0]) {
+ default: {
+ Token T;
+ T.Range = StringRef(UnquotedValue.begin(), 1);
+ setError("Unrecognized escape code!", T);
+ return "";
+ }
+ case '\r':
+ case '\n':
+ // Remove the new line.
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ // If this was just a single byte newline, it will get skipped
+ // below.
+ break;
+ case '0':
+ Storage.push_back(0x00);
+ break;
+ case 'a':
+ Storage.push_back(0x07);
+ break;
+ case 'b':
+ Storage.push_back(0x08);
+ break;
+ case 't':
+ case 0x09:
+ Storage.push_back(0x09);
+ break;
+ case 'n':
+ Storage.push_back(0x0A);
+ break;
+ case 'v':
+ Storage.push_back(0x0B);
+ break;
+ case 'f':
+ Storage.push_back(0x0C);
+ break;
+ case 'r':
+ Storage.push_back(0x0D);
+ break;
+ case 'e':
+ Storage.push_back(0x1B);
+ break;
+ case ' ':
+ Storage.push_back(0x20);
+ break;
+ case '"':
+ Storage.push_back(0x22);
+ break;
+ case '/':
+ Storage.push_back(0x2F);
+ break;
+ case '\\':
+ Storage.push_back(0x5C);
+ break;
+ case 'N':
+ encodeUTF8(0x85, Storage);
+ break;
+ case '_':
+ encodeUTF8(0xA0, Storage);
+ break;
+ case 'L':
+ encodeUTF8(0x2028, Storage);
+ break;
+ case 'P':
+ encodeUTF8(0x2029, Storage);
+ break;
+ case 'x': {
+ if (UnquotedValue.size() < 3)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(2);
+ break;
+ }
+ case 'u': {
+ if (UnquotedValue.size() < 5)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(4);
+ break;
+ }
+ case 'U': {
+ if (UnquotedValue.size() < 9)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(8);
+ break;
+ }
+ }
+ UnquotedValue = UnquotedValue.substr(1);
+ }
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+}
+
+Node *KeyValueNode::getKey() {
+ if (Key)
+ return Key;
+ // Handle implicit null keys.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_Value
+ || t.Kind == Token::TK_Error) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+ if (t.Kind == Token::TK_Key)
+ getNext(); // skip TK_Key.
+ }
+
+ // Handle explicit null keys.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We've got a normal key.
+ return Key = parseBlockNode();
+}
+
+Node *KeyValueNode::getValue() {
+ if (Value)
+ return Value;
+ getKey()->skip();
+ if (failed())
+ return Value = new (getAllocator()) NullNode(Doc);
+
+ // Handle implicit null values.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_FlowMappingEnd
+ || t.Kind == Token::TK_Key
+ || t.Kind == Token::TK_FlowEntry
+ || t.Kind == Token::TK_Error) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ if (t.Kind != Token::TK_Value) {
+ setError("Unexpected token in Key Value.", t);
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+ getNext(); // skip TK_Value.
+ }
+
+ // Handle explicit null values.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We got a normal value.
+ return Value = parseBlockNode();
+}
+
+void MappingNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry) {
+ CurrentEntry->skip();
+ if (Type == MT_Inline) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ }
+ Token T = peekNext();
+ if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
+ // KeyValueNode eats the TK_Key. That way it can detect null keys.
+ CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
+ } else if (Type == MT_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError("Unexpected token. Expected Key or Block End", T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ return increment();
+ case Token::TK_FlowMappingEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
+ "Mapping End."
+ , T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ }
+}
+
+void SequenceNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry)
+ CurrentEntry->skip();
+ Token T = peekNext();
+ if (SeqType == ST_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Block Entry or Block End."
+ , T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Indentless) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ default:
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Flow) {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ WasPreviousTokenFlowEntry = true;
+ return increment();
+ case Token::TK_FlowSequenceEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ case Token::TK_StreamEnd:
+ case Token::TK_DocumentEnd:
+ case Token::TK_DocumentStart:
+ setError("Could not find closing ]!", T);
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ if (!WasPreviousTokenFlowEntry) {
+ setError("Expected , between entries!", T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ }
+ // Otherwise it must be a flow entry.
+ CurrentEntry = parseBlockNode();
+ if (!CurrentEntry) {
+ IsAtEnd = true;
+ }
+ WasPreviousTokenFlowEntry = false;
+ break;
+ }
+ }
+}
+
+Document::Document(Stream &S) : stream(S), Root(0) {
+ if (parseDirectives())
+ expectToken(Token::TK_DocumentStart);
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_DocumentStart)
+ getNext();
+}
+
+bool Document::skip() {
+ if (stream.scanner->failed())
+ return false;
+ if (!Root)
+ getRoot();
+ Root->skip();
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ return false;
+ if (T.Kind == Token::TK_DocumentEnd) {
+ getNext();
+ return skip();
+ }
+ return true;
+}
+
+Token &Document::peekNext() {
+ return stream.scanner->peekNext();
+}
+
+Token Document::getNext() {
+ return stream.scanner->getNext();
+}
+
+void Document::setError(const Twine &Message, Token &Location) const {
+ stream.scanner->setError(Message, Location.Range.begin());
+}
+
+bool Document::failed() const {
+ return stream.scanner->failed();
+}
+
+Node *Document::parseBlockNode() {
+ Token T = peekNext();
+ // Handle properties.
+ Token AnchorInfo;
+parse_property:
+ switch (T.Kind) {
+ case Token::TK_Alias:
+ getNext();
+ return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
+ case Token::TK_Anchor:
+ if (AnchorInfo.Kind == Token::TK_Anchor) {
+ setError("Already encountered an anchor for this node!", T);
+ return 0;
+ }
+ AnchorInfo = getNext(); // Consume TK_Anchor.
+ T = peekNext();
+ goto parse_property;
+ case Token::TK_Tag:
+ getNext(); // Skip TK_Tag.
+ T = peekNext();
+ goto parse_property;
+ default:
+ break;
+ }
+
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ // We got an unindented BlockEntry sequence. This is not terminated with
+ // a BlockEnd.
+ // Don't eat the TK_BlockEntry, SequenceNode needs it.
+ return new (NodeAllocator) SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Indentless);
+ case Token::TK_BlockSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Block);
+ case Token::TK_BlockMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Block);
+ case Token::TK_FlowSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Flow);
+ case Token::TK_FlowMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Flow);
+ case Token::TK_Scalar:
+ getNext();
+ return new (NodeAllocator)
+ ScalarNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , T.Range);
+ case Token::TK_Key:
+ // Don't eat the TK_Key, KeyValueNode expects it.
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Inline);
+ case Token::TK_DocumentStart:
+ case Token::TK_DocumentEnd:
+ case Token::TK_StreamEnd:
+ default:
+ // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
+ // !!null null.
+ return new (NodeAllocator) NullNode(stream.CurrentDoc);
+ case Token::TK_Error:
+ return 0;
+ }
+ llvm_unreachable("Control flow shouldn't reach here.");
+ return 0;
+}
+
+bool Document::parseDirectives() {
+ bool isDirective = false;
+ while (true) {
+ Token T = peekNext();
+ if (T.Kind == Token::TK_TagDirective) {
+ handleTagDirective(getNext());
+ isDirective = true;
+ } else if (T.Kind == Token::TK_VersionDirective) {
+ stream.handleYAMLDirective(getNext());
+ isDirective = true;
+ } else
+ break;
+ }
+ return isDirective;
+}
+
+bool Document::expectToken(int TK) {
+ Token T = getNext();
+ if (T.Kind != TK) {
+ setError("Unexpected token", T);
+ return false;
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
new file mode 100644
index 000000000000..9da2aa7c841d
--- /dev/null
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -0,0 +1,827 @@
+//===- lib/Support/YAMLTraits.cpp -----------------------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+using namespace llvm;
+using namespace yaml;
+
+//===----------------------------------------------------------------------===//
+// IO
+//===----------------------------------------------------------------------===//
+
+IO::IO(void *Context) : Ctxt(Context) {
+}
+
+IO::~IO() {
+}
+
+void *IO::getContext() {
+ return Ctxt;
+}
+
+void IO::setContext(void *Context) {
+ Ctxt = Context;
+}
+
+//===----------------------------------------------------------------------===//
+// Input
+//===----------------------------------------------------------------------===//
+
+Input::Input(StringRef InputContent, void *Ctxt)
+ : IO(Ctxt),
+ Strm(new Stream(InputContent, SrcMgr)),
+ CurrentNode(NULL) {
+ DocIterator = Strm->begin();
+}
+
+Input::~Input() {
+
+}
+
+error_code Input::error() {
+ return EC;
+}
+
+void Input::setDiagHandler(SourceMgr::DiagHandlerTy Handler, void *Ctxt) {
+ SrcMgr.setDiagHandler(Handler, Ctxt);
+}
+
+bool Input::outputting() {
+ return false;
+}
+
+bool Input::setCurrentDocument() {
+ if (DocIterator != Strm->end()) {
+ Node *N = DocIterator->getRoot();
+ if (isa<NullNode>(N)) {
+ // Empty files are allowed and ignored
+ ++DocIterator;
+ return setCurrentDocument();
+ }
+ TopNode.reset(this->createHNodes(N));
+ CurrentNode = TopNode.get();
+ return true;
+ }
+ return false;
+}
+
+void Input::nextDocument() {
+ ++DocIterator;
+}
+
+void Input::beginMapping() {
+ if (EC)
+ return;
+ MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+ if (MN) {
+ MN->ValidKeys.clear();
+ }
+}
+
+bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
+ void *&SaveInfo) {
+ UseDefault = false;
+ if (EC)
+ return false;
+ MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+ if (!MN) {
+ setError(CurrentNode, "not a mapping");
+ return false;
+ }
+ MN->ValidKeys.push_back(Key);
+ HNode *Value = MN->Mapping[Key];
+ if (!Value) {
+ if (Required)
+ setError(CurrentNode, Twine("missing required key '") + Key + "'");
+ else
+ UseDefault = true;
+ return false;
+ }
+ SaveInfo = CurrentNode;
+ CurrentNode = Value;
+ return true;
+}
+
+void Input::postflightKey(void *saveInfo) {
+ CurrentNode = reinterpret_cast<HNode *>(saveInfo);
+}
+
+void Input::endMapping() {
+ if (EC)
+ return;
+ MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+ if (!MN)
+ return;
+ for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(),
+ End = MN->Mapping.end(); i != End; ++i) {
+ if (!MN->isValidKey(i->first)) {
+ setError(i->second, Twine("unknown key '") + i->first + "'");
+ break;
+ }
+ }
+}
+
+unsigned Input::beginSequence() {
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ return SQ->Entries.size();
+ }
+ return 0;
+}
+
+void Input::endSequence() {
+}
+
+bool Input::preflightElement(unsigned Index, void *&SaveInfo) {
+ if (EC)
+ return false;
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ SaveInfo = CurrentNode;
+ CurrentNode = SQ->Entries[Index];
+ return true;
+ }
+ return false;
+}
+
+void Input::postflightElement(void *SaveInfo) {
+ CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
+}
+
+unsigned Input::beginFlowSequence() {
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ return SQ->Entries.size();
+ }
+ return 0;
+}
+
+bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
+ if (EC)
+ return false;
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ SaveInfo = CurrentNode;
+ CurrentNode = SQ->Entries[index];
+ return true;
+ }
+ return false;
+}
+
+void Input::postflightFlowElement(void *SaveInfo) {
+ CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
+}
+
+void Input::endFlowSequence() {
+}
+
+void Input::beginEnumScalar() {
+ ScalarMatchFound = false;
+}
+
+bool Input::matchEnumScalar(const char *Str, bool) {
+ if (ScalarMatchFound)
+ return false;
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+ if (SN->value().equals(Str)) {
+ ScalarMatchFound = true;
+ return true;
+ }
+ }
+ return false;
+}
+
+void Input::endEnumScalar() {
+ if (!ScalarMatchFound) {
+ setError(CurrentNode, "unknown enumerated scalar");
+ }
+}
+
+bool Input::beginBitSetScalar(bool &DoClear) {
+ BitValuesUsed.clear();
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ BitValuesUsed.insert(BitValuesUsed.begin(), SQ->Entries.size(), false);
+ } else {
+ setError(CurrentNode, "expected sequence of bit values");
+ }
+ DoClear = true;
+ return true;
+}
+
+bool Input::bitSetMatch(const char *Str, bool) {
+ if (EC)
+ return false;
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ unsigned Index = 0;
+ for (std::vector<HNode *>::iterator i = SQ->Entries.begin(),
+ End = SQ->Entries.end(); i != End; ++i) {
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(*i)) {
+ if (SN->value().equals(Str)) {
+ BitValuesUsed[Index] = true;
+ return true;
+ }
+ } else {
+ setError(CurrentNode, "unexpected scalar in sequence of bit values");
+ }
+ ++Index;
+ }
+ } else {
+ setError(CurrentNode, "expected sequence of bit values");
+ }
+ return false;
+}
+
+void Input::endBitSetScalar() {
+ if (EC)
+ return;
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ assert(BitValuesUsed.size() == SQ->Entries.size());
+ for (unsigned i = 0; i < SQ->Entries.size(); ++i) {
+ if (!BitValuesUsed[i]) {
+ setError(SQ->Entries[i], "unknown bit value");
+ return;
+ }
+ }
+ }
+}
+
+void Input::scalarString(StringRef &S) {
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+ S = SN->value();
+ } else {
+ setError(CurrentNode, "unexpected scalar");
+ }
+}
+
+void Input::setError(HNode *hnode, const Twine &message) {
+ this->setError(hnode->_node, message);
+}
+
+void Input::setError(Node *node, const Twine &message) {
+ Strm->printError(node, message);
+ EC = make_error_code(errc::invalid_argument);
+}
+
+Input::HNode *Input::createHNodes(Node *N) {
+ SmallString<128> StringStorage;
+ if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
+ StringRef KeyStr = SN->getValue(StringStorage);
+ if (!StringStorage.empty()) {
+ // Copy string to permanent storage
+ unsigned Len = StringStorage.size();
+ char *Buf = StringAllocator.Allocate<char>(Len);
+ memcpy(Buf, &StringStorage[0], Len);
+ KeyStr = StringRef(Buf, Len);
+ }
+ return new ScalarHNode(N, KeyStr);
+ } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
+ SequenceHNode *SQHNode = new SequenceHNode(N);
+ for (SequenceNode::iterator i = SQ->begin(), End = SQ->end(); i != End;
+ ++i) {
+ HNode *Entry = this->createHNodes(i);
+ if (EC)
+ break;
+ SQHNode->Entries.push_back(Entry);
+ }
+ return SQHNode;
+ } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
+ MapHNode *mapHNode = new MapHNode(N);
+ for (MappingNode::iterator i = Map->begin(), End = Map->end(); i != End;
+ ++i) {
+ ScalarNode *KeyScalar = dyn_cast<ScalarNode>(i->getKey());
+ StringStorage.clear();
+ StringRef KeyStr = KeyScalar->getValue(StringStorage);
+ if (!StringStorage.empty()) {
+ // Copy string to permanent storage
+ unsigned Len = StringStorage.size();
+ char *Buf = StringAllocator.Allocate<char>(Len);
+ memcpy(Buf, &StringStorage[0], Len);
+ KeyStr = StringRef(Buf, Len);
+ }
+ HNode *ValueHNode = this->createHNodes(i->getValue());
+ if (EC)
+ break;
+ mapHNode->Mapping[KeyStr] = ValueHNode;
+ }
+ return mapHNode;
+ } else if (isa<NullNode>(N)) {
+ return new EmptyHNode(N);
+ } else {
+ setError(N, "unknown node kind");
+ return NULL;
+ }
+}
+
+bool Input::MapHNode::isValidKey(StringRef Key) {
+ for (SmallVector<const char *, 6>::iterator i = ValidKeys.begin(),
+ End = ValidKeys.end(); i != End; ++i) {
+ if (Key.equals(*i))
+ return true;
+ }
+ return false;
+}
+
+void Input::setError(const Twine &Message) {
+ this->setError(CurrentNode, Message);
+}
+
+Input::MapHNode::~MapHNode() {
+ for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end();
+ i != End; ++i) {
+ delete i->second;
+ }
+}
+
+Input::SequenceHNode::~SequenceHNode() {
+ for (std::vector<HNode*>::iterator i = Entries.begin(), End = Entries.end();
+ i != End; ++i) {
+ delete *i;
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Output
+//===----------------------------------------------------------------------===//
+
+Output::Output(raw_ostream &yout, void *context)
+ : IO(context),
+ Out(yout),
+ Column(0),
+ ColumnAtFlowStart(0),
+ NeedBitValueComma(false),
+ NeedFlowSequenceComma(false),
+ EnumerationMatchFound(false),
+ NeedsNewLine(false) {
+}
+
+Output::~Output() {
+}
+
+bool Output::outputting() {
+ return true;
+}
+
+void Output::beginMapping() {
+ StateStack.push_back(inMapFirstKey);
+ NeedsNewLine = true;
+}
+
+void Output::endMapping() {
+ StateStack.pop_back();
+}
+
+bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
+ bool &UseDefault, void *&) {
+ UseDefault = false;
+ if (Required || !SameAsDefault) {
+ this->newLineCheck();
+ this->paddedKey(Key);
+ return true;
+ }
+ return false;
+}
+
+void Output::postflightKey(void *) {
+ if (StateStack.back() == inMapFirstKey) {
+ StateStack.pop_back();
+ StateStack.push_back(inMapOtherKey);
+ }
+}
+
+void Output::beginDocuments() {
+ this->outputUpToEndOfLine("---");
+}
+
+bool Output::preflightDocument(unsigned index) {
+ if (index > 0)
+ this->outputUpToEndOfLine("\n---");
+ return true;
+}
+
+void Output::postflightDocument() {
+}
+
+void Output::endDocuments() {
+ output("\n...\n");
+}
+
+unsigned Output::beginSequence() {
+ StateStack.push_back(inSeq);
+ NeedsNewLine = true;
+ return 0;
+}
+
+void Output::endSequence() {
+ StateStack.pop_back();
+}
+
+bool Output::preflightElement(unsigned, void *&) {
+ return true;
+}
+
+void Output::postflightElement(void *) {
+}
+
+unsigned Output::beginFlowSequence() {
+ StateStack.push_back(inFlowSeq);
+ this->newLineCheck();
+ ColumnAtFlowStart = Column;
+ output("[ ");
+ NeedFlowSequenceComma = false;
+ return 0;
+}
+
+void Output::endFlowSequence() {
+ StateStack.pop_back();
+ this->outputUpToEndOfLine(" ]");
+}
+
+bool Output::preflightFlowElement(unsigned, void *&) {
+ if (NeedFlowSequenceComma)
+ output(", ");
+ if (Column > 70) {
+ output("\n");
+ for (int i = 0; i < ColumnAtFlowStart; ++i)
+ output(" ");
+ Column = ColumnAtFlowStart;
+ output(" ");
+ }
+ return true;
+}
+
+void Output::postflightFlowElement(void *) {
+ NeedFlowSequenceComma = true;
+}
+
+void Output::beginEnumScalar() {
+ EnumerationMatchFound = false;
+}
+
+bool Output::matchEnumScalar(const char *Str, bool Match) {
+ if (Match && !EnumerationMatchFound) {
+ this->newLineCheck();
+ this->outputUpToEndOfLine(Str);
+ EnumerationMatchFound = true;
+ }
+ return false;
+}
+
+void Output::endEnumScalar() {
+ if (!EnumerationMatchFound)
+ llvm_unreachable("bad runtime enum value");
+}
+
+bool Output::beginBitSetScalar(bool &DoClear) {
+ this->newLineCheck();
+ output("[ ");
+ NeedBitValueComma = false;
+ DoClear = false;
+ return true;
+}
+
+bool Output::bitSetMatch(const char *Str, bool Matches) {
+ if (Matches) {
+ if (NeedBitValueComma)
+ output(", ");
+ this->output(Str);
+ NeedBitValueComma = true;
+ }
+ return false;
+}
+
+void Output::endBitSetScalar() {
+ this->outputUpToEndOfLine(" ]");
+}
+
+void Output::scalarString(StringRef &S) {
+ this->newLineCheck();
+ if (S.find('\n') == StringRef::npos) {
+ // No embedded new-line chars, just print string.
+ this->outputUpToEndOfLine(S);
+ return;
+ }
+ unsigned i = 0;
+ unsigned j = 0;
+ unsigned End = S.size();
+ output("'"); // Starting single quote.
+ const char *Base = S.data();
+ while (j < End) {
+ // Escape a single quote by doubling it.
+ if (S[j] == '\'') {
+ output(StringRef(&Base[i], j - i + 1));
+ output("'");
+ i = j + 1;
+ }
+ ++j;
+ }
+ output(StringRef(&Base[i], j - i));
+ this->outputUpToEndOfLine("'"); // Ending single quote.
+}
+
+void Output::setError(const Twine &message) {
+}
+
+void Output::output(StringRef s) {
+ Column += s.size();
+ Out << s;
+}
+
+void Output::outputUpToEndOfLine(StringRef s) {
+ this->output(s);
+ if (StateStack.empty() || StateStack.back() != inFlowSeq)
+ NeedsNewLine = true;
+}
+
+void Output::outputNewLine() {
+ Out << "\n";
+ Column = 0;
+}
+
+// if seq at top, indent as if map, then add "- "
+// if seq in middle, use "- " if firstKey, else use " "
+//
+
+void Output::newLineCheck() {
+ if (!NeedsNewLine)
+ return;
+ NeedsNewLine = false;
+
+ this->outputNewLine();
+
+ assert(StateStack.size() > 0);
+ unsigned Indent = StateStack.size() - 1;
+ bool OutputDash = false;
+
+ if (StateStack.back() == inSeq) {
+ OutputDash = true;
+ } else if ((StateStack.size() > 1) && (StateStack.back() == inMapFirstKey) &&
+ (StateStack[StateStack.size() - 2] == inSeq)) {
+ --Indent;
+ OutputDash = true;
+ }
+
+ for (unsigned i = 0; i < Indent; ++i) {
+ output(" ");
+ }
+ if (OutputDash) {
+ output("- ");
+ }
+
+}
+
+void Output::paddedKey(StringRef key) {
+ output(key);
+ output(":");
+ const char *spaces = " ";
+ if (key.size() < strlen(spaces))
+ output(&spaces[key.size()]);
+ else
+ output(" ");
+}
+
+//===----------------------------------------------------------------------===//
+// traits for built-in types
+//===----------------------------------------------------------------------===//
+
+void ScalarTraits<bool>::output(const bool &Val, void *, raw_ostream &Out) {
+ Out << (Val ? "true" : "false");
+}
+
+StringRef ScalarTraits<bool>::input(StringRef Scalar, void *, bool &Val) {
+ if (Scalar.equals("true")) {
+ Val = true;
+ return StringRef();
+ } else if (Scalar.equals("false")) {
+ Val = false;
+ return StringRef();
+ }
+ return "invalid boolean";
+}
+
+void ScalarTraits<StringRef>::output(const StringRef &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<StringRef>::input(StringRef Scalar, void *,
+ StringRef &Val) {
+ Val = Scalar;
+ return StringRef();
+}
+
+void ScalarTraits<uint8_t>::output(const uint8_t &Val, void *,
+ raw_ostream &Out) {
+ // use temp uin32_t because ostream thinks uint8_t is a character
+ uint32_t Num = Val;
+ Out << Num;
+}
+
+StringRef ScalarTraits<uint8_t>::input(StringRef Scalar, void *, uint8_t &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid number";
+ if (n > 0xFF)
+ return "out of range number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<uint16_t>::output(const uint16_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<uint16_t>::input(StringRef Scalar, void *,
+ uint16_t &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid number";
+ if (n > 0xFFFF)
+ return "out of range number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<uint32_t>::output(const uint32_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<uint32_t>::input(StringRef Scalar, void *,
+ uint32_t &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid number";
+ if (n > 0xFFFFFFFFUL)
+ return "out of range number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<uint64_t>::output(const uint64_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<uint64_t>::input(StringRef Scalar, void *,
+ uint64_t &Val) {
+ unsigned long long N;
+ if (getAsUnsignedInteger(Scalar, 0, N))
+ return "invalid number";
+ Val = N;
+ return StringRef();
+}
+
+void ScalarTraits<int8_t>::output(const int8_t &Val, void *, raw_ostream &Out) {
+ // use temp in32_t because ostream thinks int8_t is a character
+ int32_t Num = Val;
+ Out << Num;
+}
+
+StringRef ScalarTraits<int8_t>::input(StringRef Scalar, void *, int8_t &Val) {
+ long long N;
+ if (getAsSignedInteger(Scalar, 0, N))
+ return "invalid number";
+ if ((N > 127) || (N < -128))
+ return "out of range number";
+ Val = N;
+ return StringRef();
+}
+
+void ScalarTraits<int16_t>::output(const int16_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<int16_t>::input(StringRef Scalar, void *, int16_t &Val) {
+ long long N;
+ if (getAsSignedInteger(Scalar, 0, N))
+ return "invalid number";
+ if ((N > INT16_MAX) || (N < INT16_MIN))
+ return "out of range number";
+ Val = N;
+ return StringRef();
+}
+
+void ScalarTraits<int32_t>::output(const int32_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<int32_t>::input(StringRef Scalar, void *, int32_t &Val) {
+ long long N;
+ if (getAsSignedInteger(Scalar, 0, N))
+ return "invalid number";
+ if ((N > INT32_MAX) || (N < INT32_MIN))
+ return "out of range number";
+ Val = N;
+ return StringRef();
+}
+
+void ScalarTraits<int64_t>::output(const int64_t &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+}
+
+StringRef ScalarTraits<int64_t>::input(StringRef Scalar, void *, int64_t &Val) {
+ long long N;
+ if (getAsSignedInteger(Scalar, 0, N))
+ return "invalid number";
+ Val = N;
+ return StringRef();
+}
+
+void ScalarTraits<double>::output(const double &Val, void *, raw_ostream &Out) {
+ Out << format("%g", Val);
+}
+
+StringRef ScalarTraits<double>::input(StringRef Scalar, void *, double &Val) {
+ SmallString<32> buff(Scalar.begin(), Scalar.end());
+ char *end;
+ Val = strtod(buff.c_str(), &end);
+ if (*end != '\0')
+ return "invalid floating point number";
+ return StringRef();
+}
+
+void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) {
+ Out << format("%g", Val);
+}
+
+StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) {
+ SmallString<32> buff(Scalar.begin(), Scalar.end());
+ char *end;
+ Val = strtod(buff.c_str(), &end);
+ if (*end != '\0')
+ return "invalid floating point number";
+ return StringRef();
+}
+
+void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) {
+ uint8_t Num = Val;
+ Out << format("0x%02X", Num);
+}
+
+StringRef ScalarTraits<Hex8>::input(StringRef Scalar, void *, Hex8 &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid hex8 number";
+ if (n > 0xFF)
+ return "out of range hex8 number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<Hex16>::output(const Hex16 &Val, void *, raw_ostream &Out) {
+ uint16_t Num = Val;
+ Out << format("0x%04X", Num);
+}
+
+StringRef ScalarTraits<Hex16>::input(StringRef Scalar, void *, Hex16 &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid hex16 number";
+ if (n > 0xFFFF)
+ return "out of range hex16 number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<Hex32>::output(const Hex32 &Val, void *, raw_ostream &Out) {
+ uint32_t Num = Val;
+ Out << format("0x%08X", Num);
+}
+
+StringRef ScalarTraits<Hex32>::input(StringRef Scalar, void *, Hex32 &Val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(Scalar, 0, n))
+ return "invalid hex32 number";
+ if (n > 0xFFFFFFFFUL)
+ return "out of range hex32 number";
+ Val = n;
+ return StringRef();
+}
+
+void ScalarTraits<Hex64>::output(const Hex64 &Val, void *, raw_ostream &Out) {
+ uint64_t Num = Val;
+ Out << format("0x%016llX", Num);
+}
+
+StringRef ScalarTraits<Hex64>::input(StringRef Scalar, void *, Hex64 &Val) {
+ unsigned long long Num;
+ if (getAsUnsignedInteger(Scalar, 0, Num))
+ return "invalid hex64 number";
+ Val = Num;
+ return StringRef();
+}
diff --git a/contrib/llvm/lib/Support/circular_raw_ostream.cpp b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
new file mode 100644
index 000000000000..ca0d30db388c
--- /dev/null
+++ b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
@@ -0,0 +1,45 @@
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for circular buffered streams.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/circular_raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+ if (BufferSize == 0) {
+ TheStream->write(Ptr, Size);
+ return;
+ }
+
+ // Write into the buffer, wrapping if necessary.
+ while (Size != 0) {
+ unsigned Bytes =
+ std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
+ memcpy(Cur, Ptr, Bytes);
+ Size -= Bytes;
+ Cur += Bytes;
+ if (Cur == BufferArray + BufferSize) {
+ // Reset the output pointer to the start of the buffer.
+ Cur = BufferArray;
+ Filled = true;
+ }
+ }
+}
+
+void circular_raw_ostream::flushBufferWithBanner() {
+ if (BufferSize != 0) {
+ // Write out the buffer
+ TheStream->write(Banner, std::strlen(Banner));
+ flushBuffer();
+ }
+}
diff --git a/contrib/llvm/lib/Support/raw_os_ostream.cpp b/contrib/llvm/lib/Support/raw_os_ostream.cpp
new file mode 100644
index 000000000000..44f2325d7f8a
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_os_ostream.cpp
@@ -0,0 +1,30 @@
+//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support adapting raw_ostream to std::ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+ flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
new file mode 100644
index 000000000000..a433088b1930
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -0,0 +1,789 @@
+//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for bulk buffered stream output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cctype>
+#include <cerrno>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(HAVE_FCNTL_H)
+# include <fcntl.h>
+#endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+# include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <fcntl.h>
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+# define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+# define STDERR_FILENO 2
+#endif
+#endif
+
+using namespace llvm;
+
+raw_ostream::~raw_ostream() {
+ // raw_ostream's subclasses should take care to flush the buffer
+ // in their destructors.
+ assert(OutBufCur == OutBufStart &&
+ "raw_ostream destructor called with non-empty buffer!");
+
+ if (BufferMode == InternalBuffer)
+ delete [] OutBufStart;
+}
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_ostream::handle() {}
+
+size_t raw_ostream::preferred_buffer_size() const {
+ // BUFSIZ is intended to be a reasonable default.
+ return BUFSIZ;
+}
+
+void raw_ostream::SetBuffered() {
+ // Ask the subclass to determine an appropriate buffer size.
+ if (size_t Size = preferred_buffer_size())
+ SetBufferSize(Size);
+ else
+ // It may return 0, meaning this stream should be unbuffered.
+ SetUnbuffered();
+}
+
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
+ BufferKind Mode) {
+ assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
+ (Mode != Unbuffered && BufferStart && Size)) &&
+ "stream must be unbuffered or have at least one byte");
+ // Make sure the current buffer is free of content (we can't flush here; the
+ // child buffer management logic will be in write_impl).
+ assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
+
+ if (BufferMode == InternalBuffer)
+ delete [] OutBufStart;
+ OutBufStart = BufferStart;
+ OutBufEnd = OutBufStart+Size;
+ OutBufCur = OutBufStart;
+ BufferMode = Mode;
+
+ assert(OutBufStart <= OutBufEnd && "Invalid size!");
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long N) {
+ if (N < 0) {
+ *this << '-';
+ // Avoid undefined behavior on LONG_MIN with a cast.
+ N = -(unsigned long)N;
+ }
+
+ return this->operator<<(static_cast<unsigned long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long long N) {
+ // Output using 32-bit div/mod when possible.
+ if (N == static_cast<unsigned long>(N))
+ return this->operator<<(static_cast<unsigned long>(N));
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long long N) {
+ if (N < 0) {
+ *this << '-';
+ // Avoid undefined behavior on INT64_MIN with a cast.
+ N = -(unsigned long long)N;
+ }
+
+ return this->operator<<(static_cast<unsigned long long>(N));
+}
+
+raw_ostream &raw_ostream::write_hex(unsigned long long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ uintptr_t x = N % 16;
+ *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
+ N /= 16;
+ }
+
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+ bool UseHexEscapes) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char c = Str[i];
+
+ switch (c) {
+ case '\\':
+ *this << '\\' << '\\';
+ break;
+ case '\t':
+ *this << '\\' << 't';
+ break;
+ case '\n':
+ *this << '\\' << 'n';
+ break;
+ case '"':
+ *this << '\\' << '"';
+ break;
+ default:
+ if (std::isprint(c)) {
+ *this << c;
+ break;
+ }
+
+ // Write out the escaped representation.
+ if (UseHexEscapes) {
+ *this << '\\' << 'x';
+ *this << hexdigit((c >> 4 & 0xF));
+ *this << hexdigit((c >> 0) & 0xF);
+ } else {
+ // Always use a full 3-character octal escape.
+ *this << '\\';
+ *this << char('0' + ((c >> 6) & 7));
+ *this << char('0' + ((c >> 3) & 7));
+ *this << char('0' + ((c >> 0) & 7));
+ }
+ }
+ }
+
+ return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const void *P) {
+ *this << '0' << 'x';
+
+ return write_hex((uintptr_t) P);
+}
+
+raw_ostream &raw_ostream::operator<<(double N) {
+#ifdef _WIN32
+ // On MSVCRT and compatible, output of %e is incompatible to Posix
+ // by default. Number of exponent digits should be at least 2. "%+03d"
+ // FIXME: Implement our formatter to here or Support/Format.h!
+ int fpcl = _fpclass(N);
+
+ // negative zero
+ if (fpcl == _FPCLASS_NZ)
+ return *this << "-0.000000e+00";
+
+ char buf[16];
+ unsigned len;
+ len = snprintf(buf, sizeof(buf), "%e", N);
+ if (len <= sizeof(buf) - 2) {
+ if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
+ int cs = buf[len - 4];
+ if (cs == '+' || cs == '-') {
+ int c1 = buf[len - 2];
+ int c0 = buf[len - 1];
+ if (isdigit(static_cast<unsigned char>(c1)) &&
+ isdigit(static_cast<unsigned char>(c0))) {
+ // Trim leading '0': "...e+012" -> "...e+12\0"
+ buf[len - 3] = c1;
+ buf[len - 2] = c0;
+ buf[--len] = 0;
+ }
+ }
+ }
+ return this->operator<<(buf);
+ }
+#endif
+ return this->operator<<(format("%e", N));
+}
+
+
+
+void raw_ostream::flush_nonempty() {
+ assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
+ size_t Length = OutBufCur - OutBufStart;
+ OutBufCur = OutBufStart;
+ write_impl(OutBufStart, Length);
+}
+
+raw_ostream &raw_ostream::write(unsigned char C) {
+ // Group exceptional cases into a single branch.
+ if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
+ if (LLVM_UNLIKELY(!OutBufStart)) {
+ if (BufferMode == Unbuffered) {
+ write_impl(reinterpret_cast<char*>(&C), 1);
+ return *this;
+ }
+ // Set up a buffer and start over.
+ SetBuffered();
+ return write(C);
+ }
+
+ flush_nonempty();
+ }
+
+ *OutBufCur++ = C;
+ return *this;
+}
+
+raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
+ // Group exceptional cases into a single branch.
+ if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
+ if (LLVM_UNLIKELY(!OutBufStart)) {
+ if (BufferMode == Unbuffered) {
+ write_impl(Ptr, Size);
+ return *this;
+ }
+ // Set up a buffer and start over.
+ SetBuffered();
+ return write(Ptr, Size);
+ }
+
+ size_t NumBytes = OutBufEnd - OutBufCur;
+
+ // If the buffer is empty at this point we have a string that is larger
+ // than the buffer. Directly write the chunk that is a multiple of the
+ // preferred buffer size and put the remainder in the buffer.
+ if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
+ size_t BytesToWrite = Size - (Size % NumBytes);
+ write_impl(Ptr, BytesToWrite);
+ size_t BytesRemaining = Size - BytesToWrite;
+ if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
+ // Too much left over to copy into our buffer.
+ return write(Ptr + BytesToWrite, BytesRemaining);
+ }
+ copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
+ return *this;
+ }
+
+ // We don't have enough space in the buffer to fit the string in. Insert as
+ // much as possible, flush and start over with the remainder.
+ copy_to_buffer(Ptr, NumBytes);
+ flush_nonempty();
+ return write(Ptr + NumBytes, Size - NumBytes);
+ }
+
+ copy_to_buffer(Ptr, Size);
+
+ return *this;
+}
+
+void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
+ assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
+
+ // Handle short strings specially, memcpy isn't very good at very short
+ // strings.
+ switch (Size) {
+ case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
+ case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
+ case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
+ case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+ case 0: break;
+ default:
+ memcpy(OutBufCur, Ptr, Size);
+ break;
+ }
+
+ OutBufCur += Size;
+}
+
+// Formatted output.
+raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
+ // If we have more than a few bytes left in our output buffer, try
+ // formatting directly onto its end.
+ size_t NextBufferSize = 127;
+ size_t BufferBytesLeft = OutBufEnd - OutBufCur;
+ if (BufferBytesLeft > 3) {
+ size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+
+ // Common case is that we have plenty of space.
+ if (BytesUsed <= BufferBytesLeft) {
+ OutBufCur += BytesUsed;
+ return *this;
+ }
+
+ // Otherwise, we overflowed and the return value tells us the size to try
+ // again with.
+ NextBufferSize = BytesUsed;
+ }
+
+ // If we got here, we didn't have enough space in the output buffer for the
+ // string. Try printing into a SmallVector that is resized to have enough
+ // space. Iterate until we win.
+ SmallVector<char, 128> V;
+
+ while (1) {
+ V.resize(NextBufferSize);
+
+ // Try formatting into the SmallVector.
+ size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
+
+ // If BytesUsed fit into the vector, we win.
+ if (BytesUsed <= NextBufferSize)
+ return write(V.data(), BytesUsed);
+
+ // Otherwise, try again with a new size.
+ assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
+ NextBufferSize = BytesUsed;
+ }
+}
+
+/// indent - Insert 'NumSpaces' spaces.
+raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
+ static const char Spaces[] = " "
+ " "
+ " ";
+
+ // Usually the indentation is small, handle it with a fastpath.
+ if (NumSpaces < array_lengthof(Spaces))
+ return write(Spaces, NumSpaces);
+
+ while (NumSpaces) {
+ unsigned NumToWrite = std::min(NumSpaces,
+ (unsigned)array_lengthof(Spaces)-1);
+ write(Spaces, NumToWrite);
+ NumSpaces -= NumToWrite;
+ }
+ return *this;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Formatted Output
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void format_object_base::home() {
+}
+
+//===----------------------------------------------------------------------===//
+// raw_fd_ostream
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - Open the specified file for writing. If an error
+/// occurs, information about the error is put into ErrorInfo, and the
+/// stream should be immediately destroyed; the string will be empty
+/// if no error occurred.
+raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Error(false), UseAtomicWrites(false), pos(0)
+{
+ assert(Filename != 0 && "Filename is null");
+ // Verify that we don't have both "append" and "excl".
+ assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
+ "Cannot specify both 'excl' and 'append' file creation flags!");
+
+ ErrorInfo.clear();
+
+ // Handle "-" as stdout. Note that when we do this, we consider ourself
+ // the owner of stdout. This means that we can do things like close the
+ // file descriptor when we're done and set the "binary" flag globally.
+ if (Filename[0] == '-' && Filename[1] == 0) {
+ FD = STDOUT_FILENO;
+ // If user requested binary then put stdout into binary mode if
+ // possible.
+ if (Flags & F_Binary)
+ sys::Program::ChangeStdoutToBinary();
+ // Close stdout when we're done, to detect any output errors.
+ ShouldClose = true;
+ return;
+ }
+
+ int OpenFlags = O_WRONLY|O_CREAT;
+#ifdef O_BINARY
+ if (Flags & F_Binary)
+ OpenFlags |= O_BINARY;
+#endif
+
+ if (Flags & F_Append)
+ OpenFlags |= O_APPEND;
+ else
+ OpenFlags |= O_TRUNC;
+ if (Flags & F_Excl)
+ OpenFlags |= O_EXCL;
+
+ while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+ if (errno != EINTR) {
+ ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+ ShouldClose = false;
+ return;
+ }
+ }
+
+ // Ok, we successfully opened the file, so it'll need to be closed.
+ ShouldClose = true;
+}
+
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+ : raw_ostream(unbuffered), FD(fd),
+ ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+ // Setting STDOUT and STDERR to binary mode is necessary in Win32
+ // to avoid undesirable linefeed conversion.
+ if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+ setmode(fd, O_BINARY);
+#endif
+
+ // Get the starting position.
+ off_t loc = ::lseek(FD, 0, SEEK_CUR);
+ if (loc == (off_t)-1)
+ pos = 0;
+ else
+ pos = static_cast<uint64_t>(loc);
+}
+
+raw_fd_ostream::~raw_fd_ostream() {
+ if (FD >= 0) {
+ flush();
+ if (ShouldClose)
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ }
+
+#ifdef __MINGW32__
+ // On mingw, global dtors should not call exit().
+ // report_fatal_error() invokes exit(). We know report_fatal_error()
+ // might not write messages to stderr when any errors were detected
+ // on FD == 2.
+ if (FD == 2) return;
+#endif
+
+ // If there are any pending errors, report them now. Clients wishing
+ // to avoid report_fatal_error calls should check for errors with
+ // has_error() and clear the error flag with clear_error() before
+ // destructing raw_ostream objects which may have errors.
+ if (has_error())
+ report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false);
+}
+
+
+void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
+ assert(FD >= 0 && "File already closed.");
+ pos += Size;
+
+ do {
+ ssize_t ret;
+
+ // Check whether we should attempt to use atomic writes.
+ if (LLVM_LIKELY(!UseAtomicWrites)) {
+ ret = ::write(FD, Ptr, Size);
+ } else {
+ // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+ const void *Addr = static_cast<const void *>(Ptr);
+ struct iovec IOV = {const_cast<void *>(Addr), Size };
+ ret = ::writev(FD, &IOV, 1);
+#else
+ ret = ::write(FD, Ptr, Size);
+#endif
+ }
+
+ if (ret < 0) {
+ // If it's a recoverable error, swallow it and retry the write.
+ //
+ // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+ // raw_ostream isn't designed to do non-blocking I/O. However, some
+ // programs, such as old versions of bjam, have mistakenly used
+ // O_NONBLOCK. For compatibility, emulate blocking semantics by
+ // spinning until the write succeeds. If you don't want spinning,
+ // don't use O_NONBLOCK file descriptors with raw_ostream.
+ if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+ || errno == EWOULDBLOCK
+#endif
+ )
+ continue;
+
+ // Otherwise it's a non-recoverable error. Note it and quit.
+ error_detected();
+ break;
+ }
+
+ // The write may have written some or all of the data. Update the
+ // size and buffer pointer to reflect the remainder that needs
+ // to be written. If there are no bytes left, we're done.
+ Ptr += ret;
+ Size -= ret;
+ } while (Size > 0);
+}
+
+void raw_fd_ostream::close() {
+ assert(ShouldClose);
+ ShouldClose = false;
+ flush();
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ FD = -1;
+}
+
+uint64_t raw_fd_ostream::seek(uint64_t off) {
+ flush();
+ pos = ::lseek(FD, off, SEEK_SET);
+ if (pos != off)
+ error_detected();
+ return pos;
+}
+
+size_t raw_fd_ostream::preferred_buffer_size() const {
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
+ // Windows and Minix have no st_blksize.
+ assert(FD >= 0 && "File not yet open!");
+ struct stat statbuf;
+ if (fstat(FD, &statbuf) != 0)
+ return 0;
+
+ // If this is a terminal, don't use buffering. Line buffering
+ // would be a more traditional thing to do, but it's not worth
+ // the complexity.
+ if (S_ISCHR(statbuf.st_mode) && isatty(FD))
+ return 0;
+ // Return the preferred block size.
+ return statbuf.st_blksize;
+#else
+ return raw_ostream::preferred_buffer_size();
+#endif
+}
+
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+ bool bg) {
+ if (sys::Process::ColorNeedsFlush())
+ flush();
+ const char *colorcode =
+ (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+ : sys::Process::OutputColor(colors, bold, bg);
+ if (colorcode) {
+ size_t len = strlen(colorcode);
+ write(colorcode, len);
+ // don't account colors towards output characters
+ pos -= len;
+ }
+ return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+ if (sys::Process::ColorNeedsFlush())
+ flush();
+ const char *colorcode = sys::Process::ResetColor();
+ if (colorcode) {
+ size_t len = strlen(colorcode);
+ write(colorcode, len);
+ // don't account colors towards output characters
+ pos -= len;
+ }
+ return *this;
+}
+
+raw_ostream &raw_fd_ostream::reverseColor() {
+ if (sys::Process::ColorNeedsFlush())
+ flush();
+ const char *colorcode = sys::Process::OutputReverse();
+ if (colorcode) {
+ size_t len = strlen(colorcode);
+ write(colorcode, len);
+ // don't account colors towards output characters
+ pos -= len;
+ }
+ return *this;
+}
+
+bool raw_fd_ostream::is_displayed() const {
+ return sys::Process::FileDescriptorIsDisplayed(FD);
+}
+
+bool raw_fd_ostream::has_colors() const {
+ return sys::Process::FileDescriptorHasColors(FD);
+}
+
+//===----------------------------------------------------------------------===//
+// outs(), errs(), nulls()
+//===----------------------------------------------------------------------===//
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &llvm::outs() {
+ // Set buffer settings to model stdout behavior.
+ // Delete the file descriptor when the program exists, forcing error
+ // detection. If you don't want this behavior, don't use outs().
+ static raw_fd_ostream S(STDOUT_FILENO, true);
+ return S;
+}
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &llvm::errs() {
+ // Set standard error to be unbuffered by default.
+ static raw_fd_ostream S(STDERR_FILENO, false, true);
+ return S;
+}
+
+/// nulls() - This returns a reference to a raw_ostream which discards output.
+raw_ostream &llvm::nulls() {
+ static raw_null_ostream S;
+ return S;
+}
+
+
+//===----------------------------------------------------------------------===//
+// raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+raw_string_ostream::~raw_string_ostream() {
+ flush();
+}
+
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.append(Ptr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+// raw_svector_ostream
+//===----------------------------------------------------------------------===//
+
+// The raw_svector_ostream implementation uses the SmallVector itself as the
+// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
+// always pointing past the end of the vector, but within the vector
+// capacity. This allows raw_ostream to write directly into the correct place,
+// and we only need to set the vector size when the data is flushed.
+
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+ // Set up the initial external buffer. We make sure that the buffer has at
+ // least 128 bytes free; raw_ostream itself only requires 64, but we want to
+ // make sure that we don't grow the buffer unnecessarily on destruction (when
+ // the data is flushed). See the FIXME below.
+ OS.reserve(OS.size() + 128);
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+raw_svector_ostream::~raw_svector_ostream() {
+ // FIXME: Prevent resizing during this flush().
+ flush();
+}
+
+/// resync - This is called when the SmallVector we're appending to is changed
+/// outside of the raw_svector_ostream's control. It is only safe to do this
+/// if the raw_svector_ostream has previously been flushed.
+void raw_svector_ostream::resync() {
+ assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
+
+ if (OS.capacity() - OS.size() < 64)
+ OS.reserve(OS.capacity() * 2);
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+ // If we're writing bytes from the end of the buffer into the smallvector, we
+ // don't need to copy the bytes, just commit the bytes because they are
+ // already in the right place.
+ if (Ptr == OS.end()) {
+ assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+ OS.set_size(OS.size() + Size);
+ } else {
+ assert(GetNumBytesInBuffer() == 0 &&
+ "Should be writing from buffer if some bytes in it");
+ // Otherwise, do copy the bytes.
+ OS.append(Ptr, Ptr+Size);
+ }
+
+ // Grow the vector if necessary.
+ if (OS.capacity() - OS.size() < 64)
+ OS.reserve(OS.capacity() * 2);
+
+ // Update the buffer position.
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+uint64_t raw_svector_ostream::current_pos() const {
+ return OS.size();
+}
+
+StringRef raw_svector_ostream::str() {
+ flush();
+ return StringRef(OS.begin(), OS.size());
+}
+
+//===----------------------------------------------------------------------===//
+// raw_null_ostream
+//===----------------------------------------------------------------------===//
+
+raw_null_ostream::~raw_null_ostream() {
+#ifndef NDEBUG
+ // ~raw_ostream asserts that the buffer is empty. This isn't necessary
+ // with raw_null_ostream, but it's better to have raw_null_ostream follow
+ // the rules than to change the rules just for raw_null_ostream.
+ flush();
+#endif
+}
+
+void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
+}
+
+uint64_t raw_null_ostream::current_pos() const {
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/regcclass.h b/contrib/llvm/lib/Support/regcclass.h
new file mode 100644
index 000000000000..2cea3e4e5406
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcclass.h
@@ -0,0 +1,70 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cclass.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+ const char *name;
+ const char *chars;
+ const char *multis;
+} cclasses[] = {
+ { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789", ""} ,
+ { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ ""} ,
+ { "blank", " \t", ""} ,
+ { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
+ { "digit", "0123456789", ""} ,
+ { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""} ,
+ { "lower", "abcdefghijklmnopqrstuvwxyz",
+ ""} ,
+ { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+ ""} ,
+ { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""} ,
+ { "space", "\t\n\v\f\r ", ""} ,
+ { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ ""} ,
+ { "xdigit", "0123456789ABCDEFabcdef",
+ ""} ,
+ { NULL, 0, "" }
+};
diff --git a/contrib/llvm/lib/Support/regcname.h b/contrib/llvm/lib/Support/regcname.h
new file mode 100644
index 000000000000..3c0bb248ffa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcname.h
@@ -0,0 +1,139 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cname.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+ const char *name;
+ char code;
+} cnames[] = {
+ { "NUL", '\0' },
+ { "SOH", '\001' },
+ { "STX", '\002' },
+ { "ETX", '\003' },
+ { "EOT", '\004' },
+ { "ENQ", '\005' },
+ { "ACK", '\006' },
+ { "BEL", '\007' },
+ { "alert", '\007' },
+ { "BS", '\010' },
+ { "backspace", '\b' },
+ { "HT", '\011' },
+ { "tab", '\t' },
+ { "LF", '\012' },
+ { "newline", '\n' },
+ { "VT", '\013' },
+ { "vertical-tab", '\v' },
+ { "FF", '\014' },
+ { "form-feed", '\f' },
+ { "CR", '\015' },
+ { "carriage-return", '\r' },
+ { "SO", '\016' },
+ { "SI", '\017' },
+ { "DLE", '\020' },
+ { "DC1", '\021' },
+ { "DC2", '\022' },
+ { "DC3", '\023' },
+ { "DC4", '\024' },
+ { "NAK", '\025' },
+ { "SYN", '\026' },
+ { "ETB", '\027' },
+ { "CAN", '\030' },
+ { "EM", '\031' },
+ { "SUB", '\032' },
+ { "ESC", '\033' },
+ { "IS4", '\034' },
+ { "FS", '\034' },
+ { "IS3", '\035' },
+ { "GS", '\035' },
+ { "IS2", '\036' },
+ { "RS", '\036' },
+ { "IS1", '\037' },
+ { "US", '\037' },
+ { "space", ' ' },
+ { "exclamation-mark", '!' },
+ { "quotation-mark", '"' },
+ { "number-sign", '#' },
+ { "dollar-sign", '$' },
+ { "percent-sign", '%' },
+ { "ampersand", '&' },
+ { "apostrophe", '\'' },
+ { "left-parenthesis", '(' },
+ { "right-parenthesis", ')' },
+ { "asterisk", '*' },
+ { "plus-sign", '+' },
+ { "comma", ',' },
+ { "hyphen", '-' },
+ { "hyphen-minus", '-' },
+ { "period", '.' },
+ { "full-stop", '.' },
+ { "slash", '/' },
+ { "solidus", '/' },
+ { "zero", '0' },
+ { "one", '1' },
+ { "two", '2' },
+ { "three", '3' },
+ { "four", '4' },
+ { "five", '5' },
+ { "six", '6' },
+ { "seven", '7' },
+ { "eight", '8' },
+ { "nine", '9' },
+ { "colon", ':' },
+ { "semicolon", ';' },
+ { "less-than-sign", '<' },
+ { "equals-sign", '=' },
+ { "greater-than-sign", '>' },
+ { "question-mark", '?' },
+ { "commercial-at", '@' },
+ { "left-square-bracket", '[' },
+ { "backslash", '\\' },
+ { "reverse-solidus", '\\' },
+ { "right-square-bracket", ']' },
+ { "circumflex", '^' },
+ { "circumflex-accent", '^' },
+ { "underscore", '_' },
+ { "low-line", '_' },
+ { "grave-accent", '`' },
+ { "left-brace", '{' },
+ { "left-curly-bracket", '{' },
+ { "vertical-line", '|' },
+ { "right-brace", '}' },
+ { "right-curly-bracket", '}' },
+ { "tilde", '~' },
+ { "DEL", '\177' },
+ { NULL, 0 }
+};
diff --git a/contrib/llvm/lib/Support/regcomp.c b/contrib/llvm/lib/Support/regcomp.c
new file mode 100644
index 000000000000..74d9186aaaa2
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcomp.c
@@ -0,0 +1,1553 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+#include "regcclass.h"
+#include "regcname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+ char *next; /* next character in RE */
+ char *end; /* end of string (-> NUL normally) */
+ int error; /* has an error been seen? */
+ sop *strip; /* malloced strip */
+ sopno ssize; /* malloced strip size (allocated) */
+ sopno slen; /* malloced strip length (used) */
+ int ncsalloc; /* number of csets allocated */
+ struct re_guts *g;
+# define NPAREN 10 /* we need to remember () 1-9 for back refs */
+ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
+ sopno pend[NPAREN]; /* -> ) ([0] unused) */
+};
+
+static void p_ere(struct parse *, int);
+static void p_ere_exp(struct parse *);
+static void p_str(struct parse *);
+static void p_bre(struct parse *, int, int);
+static int p_simp_re(struct parse *, int);
+static int p_count(struct parse *);
+static void p_bracket(struct parse *);
+static void p_b_term(struct parse *, cset *);
+static void p_b_cclass(struct parse *, cset *);
+static void p_b_eclass(struct parse *, cset *);
+static char p_b_symbol(struct parse *);
+static char p_b_coll_elem(struct parse *, int);
+static char othercase(int);
+static void bothcases(struct parse *, int);
+static void ordinary(struct parse *, int);
+static void nonnewline(struct parse *);
+static void repeat(struct parse *, sopno, int, int);
+static int seterr(struct parse *, int);
+static cset *allocset(struct parse *);
+static void freeset(struct parse *, cset *);
+static int freezeset(struct parse *, cset *);
+static int firstch(struct parse *, cset *);
+static int nch(struct parse *, cset *);
+static void mcadd(struct parse *, cset *, const char *);
+static void mcinvert(struct parse *, cset *);
+static void mccase(struct parse *, cset *);
+static int isinsets(struct re_guts *, int);
+static int samesets(struct re_guts *, int, int);
+static void categorize(struct parse *, struct re_guts *);
+static sopno dupl(struct parse *, sopno, sopno);
+static void doemit(struct parse *, sop, size_t);
+static void doinsert(struct parse *, sop, size_t, sopno);
+static void dofwd(struct parse *, sopno, sop);
+static void enlarge(struct parse *, sopno);
+static void stripsnug(struct parse *, struct re_guts *);
+static void findmust(struct parse *, struct re_guts *);
+static sopno pluscount(struct parse *, struct re_guts *);
+
+static char nuls[10]; /* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE: these know that the parse structure is named `p' !!!
+ */
+#define PEEK() (*p->next)
+#define PEEK2() (*(p->next+1))
+#define MORE() (p->next < p->end)
+#define MORE2() (p->next+1 < p->end)
+#define SEE(c) (MORE() && PEEK() == (c))
+#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
+#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define NEXT() (p->next++)
+#define NEXT2() (p->next += 2)
+#define NEXTn(n) (p->next += (n))
+#define GETNEXT() (*p->next++)
+#define SETERROR(e) seterr(p, (e))
+#define REQUIRE(co, e) (void)((co) || SETERROR(e))
+#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
+#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
+#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
+#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
+#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
+#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
+#define HERE() (p->slen)
+#define THERE() (p->slen - 1)
+#define THERETHERE() (p->slen - 2)
+#define DROP(n) (p->slen -= (n))
+
+#ifdef _POSIX2_RE_DUP_MAX
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#else
+#define DUPMAX 255
+#endif
+#define INFINITY (DUPMAX + 1)
+
+#ifndef NDEBUG
+static int never = 0; /* for use in asserts; shuts lint up */
+#else
+#define never 0 /* some <assert.h>s have bugs too */
+#endif
+
+/*
+ - llvm_regcomp - interface for parser and compilation
+ */
+int /* 0 success, otherwise REG_something */
+llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
+{
+ struct parse pa;
+ struct re_guts *g;
+ struct parse *p = &pa;
+ int i;
+ size_t len;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&~REG_DUMP)
+#endif
+
+ cflags = GOODFLAGS(cflags);
+ if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
+ return(REG_INVARG);
+
+ if (cflags&REG_PEND) {
+ if (preg->re_endp < pattern)
+ return(REG_INVARG);
+ len = preg->re_endp - pattern;
+ } else
+ len = strlen((const char *)pattern);
+
+ /* do the mallocs early so failure handling is easy */
+ g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+ (NC-1)*sizeof(cat_t));
+ if (g == NULL)
+ return(REG_ESPACE);
+ p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
+ p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+ p->slen = 0;
+ if (p->strip == NULL) {
+ free((char *)g);
+ return(REG_ESPACE);
+ }
+
+ /* set things up */
+ p->g = g;
+ p->next = (char *)pattern; /* convenience; we do not modify it */
+ p->end = p->next + len;
+ p->error = 0;
+ p->ncsalloc = 0;
+ for (i = 0; i < NPAREN; i++) {
+ p->pbegin[i] = 0;
+ p->pend[i] = 0;
+ }
+ g->csetsize = NC;
+ g->sets = NULL;
+ g->setbits = NULL;
+ g->ncsets = 0;
+ g->cflags = cflags;
+ g->iflags = 0;
+ g->nbol = 0;
+ g->neol = 0;
+ g->must = NULL;
+ g->mlen = 0;
+ g->nsub = 0;
+ g->ncategories = 1; /* category 0 is "everything else" */
+ g->categories = &g->catspace[-(CHAR_MIN)];
+ (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+ g->backrefs = 0;
+
+ /* do it */
+ EMIT(OEND, 0);
+ g->firststate = THERE();
+ if (cflags&REG_EXTENDED)
+ p_ere(p, OUT);
+ else if (cflags&REG_NOSPEC)
+ p_str(p);
+ else
+ p_bre(p, OUT, OUT);
+ EMIT(OEND, 0);
+ g->laststate = THERE();
+
+ /* tidy up loose ends and fill things in */
+ categorize(p, g);
+ stripsnug(p, g);
+ findmust(p, g);
+ g->nplus = pluscount(p, g);
+ g->magic = MAGIC2;
+ preg->re_nsub = g->nsub;
+ preg->re_g = g;
+ preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+ /* not debugging, so can't rely on the assert() in llvm_regexec() */
+ if (g->iflags&REGEX_BAD)
+ SETERROR(REG_ASSERT);
+#endif
+
+ /* win or lose, we're done */
+ if (p->error != 0) /* lose */
+ llvm_regfree(preg);
+ return(p->error);
+}
+
+/*
+ - p_ere - ERE parser top level, concatenation and alternation
+ */
+static void
+p_ere(struct parse *p, int stop) /* character this ERE should end at */
+{
+ char c;
+ sopno prevback = 0;
+ sopno prevfwd = 0;
+ sopno conc;
+ int first = 1; /* is this the first alternative? */
+
+ for (;;) {
+ /* do a bunch of concatenated expressions */
+ conc = HERE();
+ while (MORE() && (c = PEEK()) != '|' && c != stop)
+ p_ere_exp(p);
+ REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
+
+ if (!EAT('|'))
+ break; /* NOTE BREAK OUT */
+
+ if (first) {
+ INSERT(OCH_, conc); /* offset is wrong */
+ prevfwd = conc;
+ prevback = conc;
+ first = 0;
+ }
+ ASTERN(OOR1, prevback);
+ prevback = THERE();
+ AHEAD(prevfwd); /* fix previous offset */
+ prevfwd = HERE();
+ EMIT(OOR2, 0); /* offset is very wrong */
+ }
+
+ if (!first) { /* tail-end fixups */
+ AHEAD(prevfwd);
+ ASTERN(O_CH, prevback);
+ }
+
+ assert(!MORE() || SEE(stop));
+}
+
+/*
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ */
+static void
+p_ere_exp(struct parse *p)
+{
+ char c;
+ sopno pos;
+ int count;
+ int count2;
+ int backrefnum;
+ sopno subno;
+ int wascaret = 0;
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+
+ pos = HERE();
+ switch (c) {
+ case '(':
+ REQUIRE(MORE(), REG_EPAREN);
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ if (!SEE(')'))
+ p_ere(p, ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ MUSTEAT(')', REG_EPAREN);
+ break;
+#ifndef POSIX_MISTAKE
+ case ')': /* happens only if no current unmatched ( */
+ /*
+ * You may ask, why the ifndef? Because I didn't notice
+ * this until slightly too late for 1003.2, and none of the
+ * other 1003.2 regular-expression reviewers noticed it at
+ * all. So an unmatched ) is legal POSIX, at least until
+ * we can get it fixed.
+ */
+ SETERROR(REG_EPAREN);
+ break;
+#endif
+ case '^':
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ wascaret = 1;
+ break;
+ case '$':
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ break;
+ case '|':
+ SETERROR(REG_EMPTY);
+ break;
+ case '*':
+ case '+':
+ case '?':
+ SETERROR(REG_BADRPT);
+ break;
+ case '.':
+ if (p->g->cflags&REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case '\\':
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = GETNEXT();
+ if (c >= '1' && c <= '9') {
+ /* \[0-9] is taken to be a back-reference to a previously specified
+ * matching group. backrefnum will hold the number. The matching
+ * group must exist (i.e. if \4 is found there must have been at
+ * least 4 matching groups specified in the pattern previously).
+ */
+ backrefnum = c - '0';
+ if (p->pend[backrefnum] == 0) {
+ SETERROR(REG_ESUBREG);
+ break;
+ }
+
+ /* Make sure everything checks out and emit the sequence
+ * that marks a back-reference to the parse structure.
+ */
+ assert(backrefnum <= p->g->nsub);
+ EMIT(OBACK_, backrefnum);
+ assert(p->pbegin[backrefnum] != 0);
+ assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN);
+ assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN);
+ (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
+ EMIT(O_BACK, backrefnum);
+ p->g->backrefs = 1;
+ } else {
+ /* Other chars are simply themselves when escaped with a backslash.
+ */
+ ordinary(p, c);
+ }
+ break;
+ case '{': /* okay as ordinary except if digit follows */
+ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, c);
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ /* we call { a repetition if followed by a digit */
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+ return; /* no repetition, we're done */
+ NEXT();
+
+ REQUIRE(!wascaret, REG_BADRPT);
+ switch (c) {
+ case '*': /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ break;
+ case '+':
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ break;
+ case '?':
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, pos); /* offset slightly wrong */
+ ASTERN(OOR1, pos); /* this one's right */
+ AHEAD(pos); /* fix the OCH_ */
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case '{':
+ count = p_count(p);
+ if (EAT(',')) {
+ if (isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EAT('}')) { /* error heuristics */
+ while (MORE() && PEEK() != '}')
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+ return;
+ SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ */
+static void
+p_str(struct parse *p)
+{
+ REQUIRE(MORE(), REG_EMPTY);
+ while (MORE())
+ ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor. The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases. This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(struct parse *p,
+ int end1, /* first terminating character */
+ int end2) /* second terminating character */
+{
+ sopno start = HERE();
+ int first = 1; /* first subexpression? */
+ int wasdollar = 0;
+
+ if (EAT('^')) {
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ }
+ while (MORE() && !SEETWO(end1, end2)) {
+ wasdollar = p_simp_re(p, first);
+ first = 0;
+ }
+ if (wasdollar) { /* oops, that was a trailing anchor */
+ DROP(1);
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ }
+
+ REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ */
+static int /* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p,
+ int starordinary) /* is a leading * an ordinary character? */
+{
+ int c;
+ int count;
+ int count2;
+ sopno pos;
+ int i;
+ sopno subno;
+# define BACKSL (1<<CHAR_BIT)
+
+ pos = HERE(); /* repetion op, if any, covers from here */
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+ if (c == '\\') {
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = BACKSL | GETNEXT();
+ }
+ switch (c) {
+ case '.':
+ if (p->g->cflags&REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case BACKSL|'{':
+ SETERROR(REG_BADRPT);
+ break;
+ case BACKSL|'(':
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ /* the MORE here is an error heuristic */
+ if (MORE() && !SEETWO('\\', ')'))
+ p_bre(p, '\\', ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+ break;
+ case BACKSL|')': /* should not get here -- must be user */
+ case BACKSL|'}':
+ SETERROR(REG_EPAREN);
+ break;
+ case BACKSL|'1':
+ case BACKSL|'2':
+ case BACKSL|'3':
+ case BACKSL|'4':
+ case BACKSL|'5':
+ case BACKSL|'6':
+ case BACKSL|'7':
+ case BACKSL|'8':
+ case BACKSL|'9':
+ i = (c&~BACKSL) - '0';
+ assert(i < NPAREN);
+ if (p->pend[i] != 0) {
+ assert(i <= p->g->nsub);
+ EMIT(OBACK_, i);
+ assert(p->pbegin[i] != 0);
+ assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+ assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+ (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+ EMIT(O_BACK, i);
+ } else
+ SETERROR(REG_ESUBREG);
+ p->g->backrefs = 1;
+ break;
+ case '*':
+ REQUIRE(starordinary, REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, (char)c);
+ break;
+ }
+
+ if (EAT('*')) { /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ } else if (EATTWO('\\', '{')) {
+ count = p_count(p);
+ if (EAT(',')) {
+ if (MORE() && isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EATTWO('\\', '}')) { /* error heuristics */
+ while (MORE() && !SEETWO('\\', '}'))
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ } else if (c == '$') /* $ (but not \$) ends it */
+ return(1);
+
+ return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ */
+static int /* the value */
+p_count(struct parse *p)
+{
+ int count = 0;
+ int ndigits = 0;
+
+ while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+ count = count*10 + (GETNEXT() - '0');
+ ndigits++;
+ }
+
+ REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+ return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ *
+ * Note a significant property of this code: if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(struct parse *p)
+{
+ cset *cs;
+ int invert = 0;
+
+ /* Dept of Truly Sickening Special-Case Kludges */
+ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+ EMIT(OBOW, 0);
+ NEXTn(6);
+ return;
+ }
+ if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+ EMIT(OEOW, 0);
+ NEXTn(6);
+ return;
+ }
+
+ if ((cs = allocset(p)) == NULL) {
+ /* allocset did set error status in p */
+ return;
+ }
+
+ if (EAT('^'))
+ invert++; /* make note to invert set at end */
+ if (EAT(']'))
+ CHadd(cs, ']');
+ else if (EAT('-'))
+ CHadd(cs, '-');
+ while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+ p_b_term(p, cs);
+ if (EAT('-'))
+ CHadd(cs, '-');
+ MUSTEAT(']', REG_EBRACK);
+
+ if (p->error != 0) { /* don't mess things up further */
+ freeset(p, cs);
+ return;
+ }
+
+ if (p->g->cflags&REG_ICASE) {
+ int i;
+ int ci;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i) && isalpha(i)) {
+ ci = othercase(i);
+ if (ci != i)
+ CHadd(cs, ci);
+ }
+ if (cs->multis != NULL)
+ mccase(p, cs);
+ }
+ if (invert) {
+ int i;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i))
+ CHsub(cs, i);
+ else
+ CHadd(cs, i);
+ if (p->g->cflags&REG_NEWLINE)
+ CHsub(cs, '\n');
+ if (cs->multis != NULL)
+ mcinvert(p, cs);
+ }
+
+ assert(cs->multis == NULL); /* xxx */
+
+ if (nch(p, cs) == 1) { /* optimize singleton sets */
+ ordinary(p, firstch(p, cs));
+ freeset(p, cs);
+ } else
+ EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ */
+static void
+p_b_term(struct parse *p, cset *cs)
+{
+ char c;
+ char start, finish;
+ int i;
+
+ /* classify what we've got */
+ switch ((MORE()) ? PEEK() : '\0') {
+ case '[':
+ c = (MORE2()) ? PEEK2() : '\0';
+ break;
+ case '-':
+ SETERROR(REG_ERANGE);
+ return; /* NOTE RETURN */
+ break;
+ default:
+ c = '\0';
+ break;
+ }
+
+ switch (c) {
+ case ':': /* character class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+ p_b_cclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+ break;
+ case '=': /* equivalence class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+ p_b_eclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+ break;
+ default: /* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+ start = p_b_symbol(p);
+ if (SEE('-') && MORE2() && PEEK2() != ']') {
+ /* range */
+ NEXT();
+ if (EAT('-'))
+ finish = '-';
+ else
+ finish = p_b_symbol(p);
+ } else
+ finish = start;
+/* xxx what about signed chars here... */
+ REQUIRE(start <= finish, REG_ERANGE);
+ for (i = start; i <= finish; i++)
+ CHadd(cs, i);
+ break;
+ }
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ */
+static void
+p_b_cclass(struct parse *p, cset *cs)
+{
+ char *sp = p->next;
+ struct cclass *cp;
+ size_t len;
+ const char *u;
+ char c;
+
+ while (MORE() && isalpha((uch)PEEK()))
+ NEXT();
+ len = p->next - sp;
+ for (cp = cclasses; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ break;
+ if (cp->name == NULL) {
+ /* oops, didn't find it */
+ SETERROR(REG_ECTYPE);
+ return;
+ }
+
+ u = cp->chars;
+ while ((c = *u++) != '\0')
+ CHadd(cs, c);
+ for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+ MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(struct parse *p, cset *cs)
+{
+ char c;
+
+ c = p_b_coll_elem(p, '=');
+ CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ */
+static char /* value of symbol */
+p_b_symbol(struct parse *p)
+{
+ char value;
+
+ REQUIRE(MORE(), REG_EBRACK);
+ if (!EATTWO('[', '.'))
+ return(GETNEXT());
+
+ /* collating symbol */
+ value = p_b_coll_elem(p, '.');
+ REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+ return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ */
+static char /* value of collating element */
+p_b_coll_elem(struct parse *p,
+ int endc) /* name ended by endc,']' */
+{
+ char *sp = p->next;
+ struct cname *cp;
+ int len;
+
+ while (MORE() && !SEETWO(endc, ']'))
+ NEXT();
+ if (!MORE()) {
+ SETERROR(REG_EBRACK);
+ return(0);
+ }
+ len = p->next - sp;
+ for (cp = cnames; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ return(cp->code); /* known name */
+ if (len == 1)
+ return(*sp); /* single character */
+ SETERROR(REG_ECOLLATE); /* neither */
+ return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ */
+static char /* if no counterpart, return ch */
+othercase(int ch)
+{
+ ch = (uch)ch;
+ assert(isalpha(ch));
+ if (isupper(ch))
+ return ((uch)tolower(ch));
+ else if (islower(ch))
+ return ((uch)toupper(ch));
+ else /* peculiar, but could happen */
+ return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(struct parse *p, int ch)
+{
+ char *oldnext = p->next;
+ char *oldend = p->end;
+ char bracket[3];
+
+ ch = (uch)ch;
+ assert(othercase(ch) != ch); /* p_bracket() would recurse */
+ p->next = bracket;
+ p->end = bracket+2;
+ bracket[0] = ch;
+ bracket[1] = ']';
+ bracket[2] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+2);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ */
+static void
+ordinary(struct parse *p, int ch)
+{
+ cat_t *cap = p->g->categories;
+
+ if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+ bothcases(p, ch);
+ else {
+ EMIT(OCHAR, (uch)ch);
+ if (cap[ch] == 0)
+ cap[ch] = p->g->ncategories++;
+ }
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(struct parse *p)
+{
+ char *oldnext = p->next;
+ char *oldend = p->end;
+ char bracket[4];
+
+ p->next = bracket;
+ p->end = bracket+3;
+ bracket[0] = '^';
+ bracket[1] = '\n';
+ bracket[2] = ']';
+ bracket[3] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+3);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ */
+static void
+repeat(struct parse *p,
+ sopno start, /* operand from here to end of strip */
+ int from, /* repeated from this number */
+ int to) /* to this number of times (maybe INFINITY) */
+{
+ sopno finish = HERE();
+# define N 2
+# define INF 3
+# define REP(f, t) ((f)*8 + (t))
+# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+ sopno copy;
+
+ if (p->error != 0) /* head off possible runaway recursion */
+ return;
+
+ assert(from <= to);
+
+ switch (REP(MAP(from), MAP(to))) {
+ case REP(0, 0): /* must be user doing this */
+ DROP(finish-start); /* drop the operand */
+ break;
+ case REP(0, 1): /* as x{1,1}? */
+ case REP(0, N): /* as x{1,n}? */
+ case REP(0, INF): /* as x{1,}? */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start); /* offset is wrong... */
+ repeat(p, start+1, 1, to);
+ ASTERN(OOR1, start);
+ AHEAD(start); /* ... fix it */
+ EMIT(OOR2, 0);
+ AHEAD(THERE());
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case REP(1, 1): /* trivial case */
+ /* done */
+ break;
+ case REP(1, N): /* as x?x{1,n-1} */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start);
+ ASTERN(OOR1, start);
+ AHEAD(start);
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ copy = dupl(p, start+1, finish+1);
+ assert(copy == finish+4);
+ repeat(p, copy, 1, to-1);
+ break;
+ case REP(1, INF): /* as x+ */
+ INSERT(OPLUS_, start);
+ ASTERN(O_PLUS, start);
+ break;
+ case REP(N, N): /* as xx{m-1,n-1} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to-1);
+ break;
+ case REP(N, INF): /* as xx{n-1,INF} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to);
+ break;
+ default: /* "can't happen" */
+ SETERROR(REG_ASSERT); /* just in case */
+ break;
+ }
+}
+
+/*
+ - seterr - set an error condition
+ */
+static int /* useless but makes type checking happy */
+seterr(struct parse *p, int e)
+{
+ if (p->error == 0) /* keep earliest error condition */
+ p->error = e;
+ p->next = nuls; /* try to bring things to a halt */
+ p->end = nuls;
+ return(0); /* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ */
+static cset *
+allocset(struct parse *p)
+{
+ int no = p->g->ncsets++;
+ size_t nc;
+ size_t nbytes;
+ cset *cs;
+ size_t css = (size_t)p->g->csetsize;
+ int i;
+
+ if (no >= p->ncsalloc) { /* need another column of space */
+ void *ptr;
+
+ p->ncsalloc += CHAR_BIT;
+ nc = p->ncsalloc;
+ assert(nc % CHAR_BIT == 0);
+ nbytes = nc / CHAR_BIT * css;
+
+ ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
+ if (ptr == NULL)
+ goto nomem;
+ p->g->sets = ptr;
+
+ ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
+ if (ptr == NULL)
+ goto nomem;
+ p->g->setbits = ptr;
+
+ for (i = 0; i < no; i++)
+ p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+
+ (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+ }
+ /* XXX should not happen */
+ if (p->g->sets == NULL || p->g->setbits == NULL)
+ goto nomem;
+
+ cs = &p->g->sets[no];
+ cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+ cs->mask = 1 << ((no) % CHAR_BIT);
+ cs->hash = 0;
+ cs->smultis = 0;
+ cs->multis = NULL;
+
+ return(cs);
+nomem:
+ free(p->g->sets);
+ p->g->sets = NULL;
+ free(p->g->setbits);
+ p->g->setbits = NULL;
+
+ SETERROR(REG_ESPACE);
+ /* caller's responsibility not to do set ops */
+ return(NULL);
+}
+
+/*
+ - freeset - free a now-unused set
+ */
+static void
+freeset(struct parse *p, cset *cs)
+{
+ size_t i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ CHsub(cs, i);
+ if (cs == top-1) /* recover only the easy case */
+ p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ *
+ * The main task here is merging identical sets. This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int /* set number */
+freezeset(struct parse *p, cset *cs)
+{
+ uch h = cs->hash;
+ size_t i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ cset *cs2;
+ size_t css = (size_t)p->g->csetsize;
+
+ /* look for an earlier one which is the same */
+ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+ if (cs2->hash == h && cs2 != cs) {
+ /* maybe */
+ for (i = 0; i < css; i++)
+ if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+ break; /* no */
+ if (i == css)
+ break; /* yes */
+ }
+
+ if (cs2 < top) { /* found one */
+ freeset(p, cs);
+ cs = cs2;
+ }
+
+ return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ */
+static int /* character; there is no "none" value */
+firstch(struct parse *p, cset *cs)
+{
+ size_t i;
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ return((char)i);
+ assert(never);
+ return(0); /* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ */
+static int
+nch(struct parse *p, cset *cs)
+{
+ size_t i;
+ size_t css = (size_t)p->g->csetsize;
+ int n = 0;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ n++;
+ return(n);
+}
+
+/*
+ - mcadd - add a collating element to a cset
+ */
+static void
+mcadd( struct parse *p, cset *cs, const char *cp)
+{
+ size_t oldend = cs->smultis;
+ void *np;
+
+ cs->smultis += strlen(cp) + 1;
+ np = realloc(cs->multis, cs->smultis);
+ if (np == NULL) {
+ if (cs->multis)
+ free(cs->multis);
+ cs->multis = NULL;
+ SETERROR(REG_ESPACE);
+ return;
+ }
+ cs->multis = np;
+
+ llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+}
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mcinvert(struct parse *p, cset *cs)
+{
+ assert(cs->multis == NULL); /* xxx */
+}
+
+/*
+ - mccase - add case counterparts of the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mccase(struct parse *p, cset *cs)
+{
+ assert(cs->multis == NULL); /* xxx */
+}
+
+/*
+ - isinsets - is this character in any sets?
+ */
+static int /* predicate */
+isinsets(struct re_guts *g, int c)
+{
+ uch *col;
+ int i;
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ unsigned uc = (uch)c;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc] != 0)
+ return(1);
+ return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ */
+static int /* predicate */
+samesets(struct re_guts *g, int c1, int c2)
+{
+ uch *col;
+ int i;
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ unsigned uc1 = (uch)c1;
+ unsigned uc2 = (uch)c2;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc1] != col[uc2])
+ return(0);
+ return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ */
+static void
+categorize(struct parse *p, struct re_guts *g)
+{
+ cat_t *cats = g->categories;
+ int c;
+ int c2;
+ cat_t cat;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (cats[c] == 0 && isinsets(g, c)) {
+ cat = g->ncategories++;
+ cats[c] = cat;
+ for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+ if (cats[c2] == 0 && samesets(g, c, c2))
+ cats[c2] = cat;
+ }
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ */
+static sopno /* start of duplicate */
+dupl(struct parse *p,
+ sopno start, /* from here */
+ sopno finish) /* to this less one */
+{
+ sopno ret = HERE();
+ sopno len = finish - start;
+
+ assert(finish >= start);
+ if (len == 0)
+ return(ret);
+ enlarge(p, p->ssize + len); /* this many unexpected additions */
+ assert(p->ssize >= p->slen + len);
+ (void) memmove((char *)(p->strip + p->slen),
+ (char *)(p->strip + start), (size_t)len*sizeof(sop));
+ p->slen += len;
+ return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ *
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures. Maybe later.
+ */
+static void
+doemit(struct parse *p, sop op, size_t opnd)
+{
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ /* deal with oversize operands ("can't happen", more or less) */
+ assert(opnd < 1<<OPSHIFT);
+
+ /* deal with undersized strip */
+ if (p->slen >= p->ssize)
+ enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
+ assert(p->slen < p->ssize);
+
+ /* finally, it's all reduced to the easy case */
+ p->strip[p->slen++] = SOP(op, opnd);
+}
+
+/*
+ - doinsert - insert a sop into the strip
+ */
+static void
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+{
+ sopno sn;
+ sop s;
+ int i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ sn = HERE();
+ EMIT(op, opnd); /* do checks, ensure space */
+ assert(HERE() == sn+1);
+ s = p->strip[sn];
+
+ /* adjust paren pointers */
+ assert(pos > 0);
+ for (i = 1; i < NPAREN; i++) {
+ if (p->pbegin[i] >= pos) {
+ p->pbegin[i]++;
+ }
+ if (p->pend[i] >= pos) {
+ p->pend[i]++;
+ }
+ }
+
+ memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+ (HERE()-pos-1)*sizeof(sop));
+ p->strip[pos] = s;
+}
+
+/*
+ - dofwd - complete a forward reference
+ */
+static void
+dofwd(struct parse *p, sopno pos, sop value)
+{
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ assert(value < 1<<OPSHIFT);
+ p->strip[pos] = OP(p->strip[pos]) | value;
+}
+
+/*
+ - enlarge - enlarge the strip
+ */
+static void
+enlarge(struct parse *p, sopno size)
+{
+ sop *sp;
+
+ if (p->ssize >= size)
+ return;
+
+ sp = (sop *)realloc(p->strip, size*sizeof(sop));
+ if (sp == NULL) {
+ SETERROR(REG_ESPACE);
+ return;
+ }
+ p->strip = sp;
+ p->ssize = size;
+}
+
+/*
+ - stripsnug - compact the strip
+ */
+static void
+stripsnug(struct parse *p, struct re_guts *g)
+{
+ g->nstates = p->slen;
+ g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+ if (g->strip == NULL) {
+ SETERROR(REG_ESPACE);
+ g->strip = p->strip;
+ }
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ *
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences. Someday. This code is simple and finds most
+ * of the interesting cases.
+ *
+ * Note that must and mlen got initialized during setup.
+ */
+static void
+findmust(struct parse *p, struct re_guts *g)
+{
+ sop *scan;
+ sop *start = 0; /* start initialized in the default case, after that */
+ sop *newstart = 0; /* newstart was initialized in the OCHAR case */
+ sopno newlen;
+ sop s;
+ char *cp;
+ sopno i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ /* find the longest OCHAR sequence in strip */
+ newlen = 0;
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OCHAR: /* sequence member */
+ if (newlen == 0) /* new sequence */
+ newstart = scan - 1;
+ newlen++;
+ break;
+ case OPLUS_: /* things that don't break one */
+ case OLPAREN:
+ case ORPAREN:
+ break;
+ case OQUEST_: /* things that must be skipped */
+ case OCH_:
+ scan--;
+ do {
+ scan += OPND(s);
+ s = *scan;
+ /* assert() interferes w debug printouts */
+ if (OP(s) != O_QUEST && OP(s) != O_CH &&
+ OP(s) != OOR2) {
+ g->iflags |= REGEX_BAD;
+ return;
+ }
+ } while (OP(s) != O_QUEST && OP(s) != O_CH);
+ /* fallthrough */
+ default: /* things that break a sequence */
+ if (newlen > g->mlen) { /* ends one */
+ start = newstart;
+ g->mlen = newlen;
+ }
+ newlen = 0;
+ break;
+ }
+ } while (OP(s) != OEND);
+
+ if (g->mlen == 0) /* there isn't one */
+ return;
+
+ /* turn it into a character string */
+ g->must = malloc((size_t)g->mlen + 1);
+ if (g->must == NULL) { /* argh; just forget it */
+ g->mlen = 0;
+ return;
+ }
+ cp = g->must;
+ scan = start;
+ for (i = g->mlen; i > 0; i--) {
+ while (OP(s = *scan++) != OCHAR)
+ continue;
+ assert(cp < g->must + g->mlen);
+ *cp++ = (char)OPND(s);
+ }
+ assert(cp == g->must + g->mlen);
+ *cp++ = '\0'; /* just on general principles */
+}
+
+/*
+ - pluscount - count + nesting
+ */
+static sopno /* nesting depth */
+pluscount(struct parse *p, struct re_guts *g)
+{
+ sop *scan;
+ sop s;
+ sopno plusnest = 0;
+ sopno maxnest = 0;
+
+ if (p->error != 0)
+ return(0); /* there may not be an OEND */
+
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OPLUS_:
+ plusnest++;
+ break;
+ case O_PLUS:
+ if (plusnest > maxnest)
+ maxnest = plusnest;
+ plusnest--;
+ break;
+ }
+ } while (OP(s) != OEND);
+ if (plusnest != 0)
+ g->iflags |= REGEX_BAD;
+ return(maxnest);
+}
diff --git a/contrib/llvm/lib/Support/regengine.inc b/contrib/llvm/lib/Support/regengine.inc
new file mode 100644
index 000000000000..7e41f96f359d
--- /dev/null
+++ b/contrib/llvm/lib/Support/regengine.inc
@@ -0,0 +1,1034 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends. This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define matcher smatcher
+#define fast sfast
+#define slow sslow
+#define dissect sdissect
+#define backref sbackref
+#define step sstep
+#define print sprint
+#define at sat
+#define match smat
+#define nope snope
+#endif
+#ifdef LNAMES
+#define matcher lmatcher
+#define fast lfast
+#define slow lslow
+#define dissect ldissect
+#define backref lbackref
+#define step lstep
+#define print lprint
+#define at lat
+#define match lmat
+#define nope lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+ struct re_guts *g;
+ int eflags;
+ llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
+ const char *offp; /* offsets work from here */
+ const char *beginp; /* start of string -- virtual NUL precedes */
+ const char *endp; /* end of string -- virtual NUL here */
+ const char *coldp; /* can be no match starting before here */
+ const char **lastpos; /* [nplus+1] */
+ STATEVARS;
+ states st; /* current states */
+ states fresh; /* states for a fresh start */
+ states tmp; /* temporary */
+ states empty; /* empty set of states */
+};
+
+static int matcher(struct re_guts *, const char *, size_t,
+ llvm_regmatch_t[], int);
+static const char *dissect(struct match *, const char *, const char *, sopno,
+ sopno);
+static const char *backref(struct match *, const char *, const char *, sopno,
+ sopno, sopno, int);
+static const char *fast(struct match *, const char *, const char *, sopno, sopno);
+static const char *slow(struct match *, const char *, const char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION 100
+#define BOL (OUT+1)
+#define EOL (BOL+1)
+#define BOLEOL (BOL+2)
+#define NOTHING (BOL+3)
+#define BOW (BOL+4)
+#define EOW (BOL+5)
+#define CODEMAX (BOL+5) /* highest code used */
+#define NONCHAR(c) ((c) > CHAR_MAX)
+#define NNONCHAR (CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define SP(t, s, c) print(m, t, s, c, stdout)
+#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
+#define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define SP(t, s, c) /* nothing */
+#define AT(t, p1, p2, s1, s2) /* nothing */
+#define NOTE(s) /* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int /* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, const char *string, size_t nmatch,
+ llvm_regmatch_t pmatch[],
+ int eflags)
+{
+ const char *endp;
+ size_t i;
+ struct match mv;
+ struct match *m = &mv;
+ const char *dp;
+ const sopno gf = g->firststate+1; /* +1 for OEND */
+ const sopno gl = g->laststate;
+ const char *start;
+ const char *stop;
+
+ /* simplify the situation where possible */
+ if (g->cflags&REG_NOSUB)
+ nmatch = 0;
+ if (eflags&REG_STARTEND) {
+ start = string + pmatch[0].rm_so;
+ stop = string + pmatch[0].rm_eo;
+ } else {
+ start = string;
+ stop = start + strlen(start);
+ }
+ if (stop < start)
+ return(REG_INVARG);
+
+ /* prescreening; this does wonders for this rather slow code */
+ if (g->must != NULL) {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] && stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
+
+ /* match struct setup */
+ m->g = g;
+ m->eflags = eflags;
+ m->pmatch = NULL;
+ m->lastpos = NULL;
+ m->offp = string;
+ m->beginp = start;
+ m->endp = stop;
+ STATESETUP(m, 4);
+ SETUP(m->st);
+ SETUP(m->fresh);
+ SETUP(m->tmp);
+ SETUP(m->empty);
+ CLEAR(m->empty);
+
+ /* this loop does only one repetition except for backrefs */
+ for (;;) {
+ endp = fast(m, start, stop, gf, gl);
+ if (endp == NULL) { /* a miss */
+ free(m->pmatch);
+ free((void*)m->lastpos);
+ STATETEARDOWN(m);
+ return(REG_NOMATCH);
+ }
+ if (nmatch == 0 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* where? */
+ assert(m->coldp != NULL);
+ for (;;) {
+ NOTE("finding start");
+ endp = slow(m, m->coldp, stop, gf, gl);
+ if (endp != NULL)
+ break;
+ assert(m->coldp < m->endp);
+ m->coldp++;
+ }
+ if (nmatch == 1 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* oh my, he wants the subexpressions... */
+ if (m->pmatch == NULL)
+ m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+ sizeof(llvm_regmatch_t));
+ if (m->pmatch == NULL) {
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ for (i = 1; i <= m->g->nsub; i++)
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+ if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+ NOTE("dissecting");
+ dp = dissect(m, m->coldp, endp, gf, gl);
+ } else {
+ if (g->nplus > 0 && m->lastpos == NULL)
+ m->lastpos = (const char **)malloc((g->nplus+1) *
+ sizeof(char *));
+ if (g->nplus > 0 && m->lastpos == NULL) {
+ free(m->pmatch);
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ NOTE("backref dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ }
+ if (dp != NULL)
+ break;
+
+ /* uh-oh... we couldn't find a subexpression-level match */
+ assert(g->backrefs); /* must be back references doing it */
+ assert(g->nplus == 0 || m->lastpos != NULL);
+ for (;;) {
+ if (dp != NULL || endp <= m->coldp)
+ break; /* defeat */
+ NOTE("backoff");
+ endp = slow(m, m->coldp, endp-1, gf, gl);
+ if (endp == NULL)
+ break; /* defeat */
+ /* try it on a shorter possibility */
+#ifndef NDEBUG
+ for (i = 1; i <= m->g->nsub; i++) {
+ assert(m->pmatch[i].rm_so == -1);
+ assert(m->pmatch[i].rm_eo == -1);
+ }
+#endif
+ NOTE("backoff dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ }
+ assert(dp == NULL || dp == endp);
+ if (dp != NULL) /* found a shorter one */
+ break;
+
+ /* despite initial appearances, there is no match here */
+ NOTE("false alarm");
+ if (m->coldp == stop)
+ break;
+ start = m->coldp + 1; /* recycle starting later */
+ }
+
+ /* fill in the details if requested */
+ if (nmatch > 0) {
+ pmatch[0].rm_so = m->coldp - m->offp;
+ pmatch[0].rm_eo = endp - m->offp;
+ }
+ if (nmatch > 1) {
+ assert(m->pmatch != NULL);
+ for (i = 1; i < nmatch; i++)
+ if (i <= m->g->nsub)
+ pmatch[i] = m->pmatch[i];
+ else {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+ }
+
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
+ STATETEARDOWN(m);
+ return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static const char * /* == stop (success) always */
+dissect(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ int i;
+ sopno ss; /* start sop of current subRE */
+ sopno es; /* end sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ const char *stp; /* string matched by it cannot pass here */
+ const char *rest; /* start of rest of string */
+ const char *tail; /* string unmatched by rest of RE */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *sep; /* end of string matched by subsubRE */
+ const char *oldssp; /* previous ssp */
+
+ AT("diss", start, stop, startst, stopst);
+ sp = start;
+ for (ss = startst; ss < stopst; ss = es) {
+ /* identify end of subRE */
+ es = ss;
+ switch (OP(m->g->strip[es])) {
+ case OPLUS_:
+ case OQUEST_:
+ es += OPND(m->g->strip[es]);
+ break;
+ case OCH_:
+ while (OP(m->g->strip[es]) != O_CH)
+ es += OPND(m->g->strip[es]);
+ break;
+ }
+ es++;
+
+ /* figure out what it matched */
+ switch (OP(m->g->strip[ss])) {
+ case OEND:
+ assert(nope);
+ break;
+ case OCHAR:
+ sp++;
+ break;
+ case OBOL:
+ case OEOL:
+ case OBOW:
+ case OEOW:
+ break;
+ case OANY:
+ case OANYOF:
+ sp++;
+ break;
+ case OBACK_:
+ case O_BACK:
+ assert(nope);
+ break;
+ /* cases where length of match is hard to find */
+ case OQUEST_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ /* did innards match? */
+ if (slow(m, sp, rest, ssub, esub) != NULL) {
+ const char *dp = dissect(m, sp, rest, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == rest);
+ } else /* no */
+ assert(sp == rest);
+ sp = rest;
+ break;
+ case OPLUS_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ ssp = sp;
+ oldssp = ssp;
+ for (;;) { /* find last match of innards */
+ sep = slow(m, ssp, rest, ssub, esub);
+ if (sep == NULL || sep == ssp)
+ break; /* failed or matched null */
+ oldssp = ssp; /* on to next try */
+ ssp = sep;
+ }
+ if (sep == NULL) {
+ /* last successful match */
+ sep = ssp;
+ ssp = oldssp;
+ }
+ assert(sep == rest); /* must exhaust substring */
+ assert(slow(m, ssp, sep, ssub, esub) == rest);
+ {
+ const char *dp = dissect(m, ssp, sep, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == sep);
+ }
+ sp = rest;
+ break;
+ case OCH_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = ss + OPND(m->g->strip[ss]) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ if (slow(m, sp, rest, ssub, esub) == rest)
+ break; /* it matched all of it */
+ /* that one missed, try next one */
+ assert(OP(m->g->strip[esub]) == OOR1);
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ {
+ const char *dp = dissect(m, sp, rest, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == rest);
+ }
+ sp = rest;
+ break;
+ case O_PLUS:
+ case O_QUEST:
+ case OOR1:
+ case OOR2:
+ case O_CH:
+ assert(nope);
+ break;
+ case OLPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_so = sp - m->offp;
+ break;
+ case ORPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_eo = sp - m->offp;
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+ }
+
+ assert(sp == stop);
+ return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static const char * /* == stop (success) or NULL (failure) */
+backref(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst, sopno lev, int rec) /* PLUS nesting level */
+{
+ int i;
+ sopno ss; /* start sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *dp;
+ size_t len;
+ int hard;
+ sop s;
+ llvm_regoff_t offsave;
+ cset *cs;
+
+ AT("back", start, stop, startst, stopst);
+ sp = start;
+
+ /* get as far as we can with easy stuff */
+ hard = 0;
+ for (ss = startst; !hard && ss < stopst; ss++)
+ switch (OP(s = m->g->strip[ss])) {
+ case OCHAR:
+ if (sp == stop || *sp++ != (char)OPND(s))
+ return(NULL);
+ break;
+ case OANY:
+ if (sp == stop)
+ return(NULL);
+ sp++;
+ break;
+ case OANYOF:
+ cs = &m->g->sets[OPND(s)];
+ if (sp == stop || !CHIN(cs, *sp++))
+ return(NULL);
+ break;
+ case OBOL:
+ if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOL:
+ if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OBOW:
+ if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) ||
+ (sp > m->beginp &&
+ !ISWORD(*(sp-1))) ) &&
+ (sp < m->endp && ISWORD(*sp)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOW:
+ if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) ||
+ (sp < m->endp && !ISWORD(*sp)) ) &&
+ (sp > m->beginp && ISWORD(*(sp-1))) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case O_QUEST:
+ break;
+ case OOR1: /* matches null but needs to skip */
+ ss++;
+ s = m->g->strip[ss];
+ do {
+ assert(OP(s) == OOR2);
+ ss += OPND(s);
+ } while (OP(s = m->g->strip[ss]) != O_CH);
+ /* note that the ss++ gets us past the O_CH */
+ break;
+ default: /* have to make a choice */
+ hard = 1;
+ break;
+ }
+ if (!hard) { /* that was it! */
+ if (sp != stop)
+ return(NULL);
+ return(sp);
+ }
+ ss--; /* adjust for the for's final increment */
+
+ /* the hard stuff */
+ AT("hard", sp, stop, ss, stopst);
+ s = m->g->strip[ss];
+ switch (OP(s)) {
+ case OBACK_: /* the vilest depths */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ if (m->pmatch[i].rm_eo == -1)
+ return(NULL);
+ assert(m->pmatch[i].rm_so != -1);
+ len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+ if (len == 0 && rec++ > MAX_RECURSION)
+ return(NULL);
+ assert(stop - m->beginp >= len);
+ if (sp > stop - len)
+ return(NULL); /* not enough left to match */
+ ssp = m->offp + m->pmatch[i].rm_so;
+ if (memcmp(sp, ssp, len) != 0)
+ return(NULL);
+ while (m->g->strip[ss] != SOP(O_BACK, i))
+ ss++;
+ return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+ break;
+ case OQUEST_: /* to null or not */
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp); /* not */
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+ break;
+ case OPLUS_:
+ assert(m->lastpos != NULL);
+ assert(lev+1 <= m->g->nplus);
+ m->lastpos[lev+1] = sp;
+ return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+ break;
+ case O_PLUS:
+ if (sp == m->lastpos[lev]) /* last pass matched null */
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ /* try another pass */
+ m->lastpos[lev] = sp;
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+ if (dp == NULL)
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ else
+ return(dp);
+ break;
+ case OCH_: /* find the right one, if any */
+ ssub = ss + 1;
+ esub = ss + OPND(s) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ dp = backref(m, sp, stop, ssub, esub, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ /* that one missed, try next one */
+ if (OP(m->g->strip[esub]) == O_CH)
+ return(NULL); /* there is none */
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ break;
+ case OLPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_so;
+ m->pmatch[i].rm_so = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_so = offsave;
+ return(NULL);
+ break;
+ case ORPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_eo;
+ m->pmatch[i].rm_eo = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_eo = offsave;
+ return(NULL);
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+
+ /* "can't happen" */
+ assert(nope);
+ /* NOTREACHED */
+ return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static const char * /* where tentative match ended, or NULL */
+fast(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ states st = m->st;
+ states fresh = m->fresh;
+ states tmp = m->tmp;
+ const char *p = start;
+ int c = (start == m->beginp) ? OUT : *(start-1);
+ int lastc; /* previous c */
+ int flagch;
+ int i;
+ const char *coldp; /* last p after which no match was underway */
+
+ CLEAR(st);
+ SET1(st, startst);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ ASSIGN(fresh, st);
+ SP("start", st, *p);
+ coldp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+ if (EQ(st, fresh))
+ coldp = p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, fresh);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("aft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ assert(coldp != NULL);
+ m->coldp = coldp;
+ if (ISSET(st, stopst))
+ return(p+1);
+ else
+ return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static const char * /* where it ended */
+slow(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ states st = m->st;
+ states empty = m->empty;
+ states tmp = m->tmp;
+ const char *p = start;
+ int c = (start == m->beginp) ? OUT : *(start-1);
+ int lastc; /* previous c */
+ int flagch;
+ int i;
+ const char *matchp; /* last p at which a match ended */
+
+ AT("slow", start, stop, startst, stopst);
+ CLEAR(st);
+ SET1(st, startst);
+ SP("sstart", st, *p);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ matchp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst))
+ matchp = p;
+ if (EQ(st, empty) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, empty);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("saft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+ sopno start, /* start state within strip */
+ sopno stop, /* state after stop state within strip */
+ states bef, /* states reachable before */
+ int ch, /* character or NONCHAR code */
+ states aft) /* states already known reachable after */
+{
+ cset *cs;
+ sop s;
+ sopno pc;
+ onestate here; /* note, macros know this name */
+ sopno look;
+ int i;
+
+ for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+ s = g->strip[pc];
+ switch (OP(s)) {
+ case OEND:
+ assert(pc == stop-1);
+ break;
+ case OCHAR:
+ /* only characters can match */
+ assert(!NONCHAR(ch) || ch != (char)OPND(s));
+ if (ch == (char)OPND(s))
+ FWD(aft, bef, 1);
+ break;
+ case OBOL:
+ if (ch == BOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OEOL:
+ if (ch == EOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OBOW:
+ if (ch == BOW)
+ FWD(aft, bef, 1);
+ break;
+ case OEOW:
+ if (ch == EOW)
+ FWD(aft, bef, 1);
+ break;
+ case OANY:
+ if (!NONCHAR(ch))
+ FWD(aft, bef, 1);
+ break;
+ case OANYOF:
+ cs = &g->sets[OPND(s)];
+ if (!NONCHAR(ch) && CHIN(cs, ch))
+ FWD(aft, bef, 1);
+ break;
+ case OBACK_: /* ignored here */
+ case O_BACK:
+ FWD(aft, aft, 1);
+ break;
+ case OPLUS_: /* forward, this is just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case O_PLUS: /* both forward and back */
+ FWD(aft, aft, 1);
+ i = ISSETBACK(aft, OPND(s));
+ BACK(aft, aft, OPND(s));
+ if (!i && ISSETBACK(aft, OPND(s))) {
+ /* oho, must reconsider loop body */
+ pc -= OPND(s) + 1;
+ INIT(here, pc);
+ }
+ break;
+ case OQUEST_: /* two branches, both forward */
+ FWD(aft, aft, 1);
+ FWD(aft, aft, OPND(s));
+ break;
+ case O_QUEST: /* just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case OLPAREN: /* not significant here */
+ case ORPAREN:
+ FWD(aft, aft, 1);
+ break;
+ case OCH_: /* mark the first two branches */
+ FWD(aft, aft, 1);
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ break;
+ case OOR1: /* done a branch, find the O_CH */
+ if (ISSTATEIN(aft, here)) {
+ for (look = 1;
+ OP(s = g->strip[pc+look]) != O_CH;
+ look += OPND(s))
+ assert(OP(s) == OOR2);
+ FWD(aft, aft, look);
+ }
+ break;
+ case OOR2: /* propagate OCH_'s marking */
+ FWD(aft, aft, 1);
+ if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ }
+ break;
+ case O_CH: /* just empty */
+ FWD(aft, aft, 1);
+ break;
+ default: /* ooooops... */
+ assert(nope);
+ break;
+ }
+ }
+
+ return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+ struct re_guts *g = m->g;
+ int i;
+ int first = 1;
+
+ if (!(m->eflags&REG_TRACE))
+ return;
+
+ (void)fprintf(d, "%s", caption);
+ if (ch != '\0')
+ (void)fprintf(d, " %s", pchar(ch));
+ for (i = 0; i < g->nstates; i++)
+ if (ISSET(st, i)) {
+ (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+ first = 0;
+ }
+ (void)fprintf(d, "\n");
+}
+
+/*
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+ sopno stopst)
+{
+ if (!(m->eflags&REG_TRACE))
+ return;
+
+ (void)printf("%s %s-", title, pchar(*start));
+ (void)printf("%s ", pchar(*stop));
+ (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define PCHARDONE /* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c? Well, yes. But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient. It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char * /* -> representation */
+pchar(int ch)
+{
+ static char pbuf[10];
+
+ if (isprint(ch) || ch == ' ')
+ (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+ else
+ (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+ return(pbuf);
+}
+#endif
+#endif
+
+#undef matcher
+#undef fast
+#undef slow
+#undef dissect
+#undef backref
+#undef step
+#undef print
+#undef at
+#undef match
+#undef nope
diff --git a/contrib/llvm/lib/Support/regerror.c b/contrib/llvm/lib/Support/regerror.c
new file mode 100644
index 000000000000..1d67c9a2b03b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regerror.c
@@ -0,0 +1,135 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regerror.c 8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+static const char *regatoi(const llvm_regex_t *, char *, int);
+
+static struct rerr {
+ int code;
+ const char *name;
+ const char *explain;
+} rerrs[] = {
+ { REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" },
+ { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
+ { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+ { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
+ { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+ { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
+ { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
+ { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
+ { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+ { REG_ERANGE, "REG_ERANGE", "invalid character range" },
+ { REG_ESPACE, "REG_ESPACE", "out of memory" },
+ { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
+ { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
+ { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+ { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
+ { 0, "", "*** unknown regexp error code ***" }
+};
+
+/*
+ - llvm_regerror - the interface to error numbers
+ = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+ struct rerr *r;
+ size_t len;
+ int target = errcode &~ REG_ITOA;
+ const char *s;
+ char convbuf[50];
+
+ if (errcode == REG_ATOI)
+ s = regatoi(preg, convbuf, sizeof convbuf);
+ else {
+ for (r = rerrs; r->code != 0; r++)
+ if (r->code == target)
+ break;
+
+ if (errcode&REG_ITOA) {
+ if (r->code != 0) {
+ assert(strlen(r->name) < sizeof(convbuf));
+ (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
+ } else
+ (void)snprintf(convbuf, sizeof convbuf,
+ "REG_0x%x", target);
+ s = convbuf;
+ } else
+ s = r->explain;
+ }
+
+ len = strlen(s) + 1;
+ if (errbuf_size > 0) {
+ llvm_strlcpy(errbuf, s, errbuf_size);
+ }
+
+ return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ */
+static const char *
+regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
+{
+ struct rerr *r;
+
+ for (r = rerrs; r->code != 0; r++)
+ if (strcmp(r->name, preg->re_endp) == 0)
+ break;
+ if (r->code == 0)
+ return("0");
+
+ (void)snprintf(localbuf, localbufsize, "%d", r->code);
+ return(localbuf);
+}
diff --git a/contrib/llvm/lib/Support/regex2.h b/contrib/llvm/lib/Support/regex2.h
new file mode 100644
index 000000000000..21659c34449a
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex2.h
@@ -0,0 +1,157 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regex2.h 8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * internals of regex_t
+ */
+#define MAGIC1 ((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker. (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination. Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ * OOR1 and OOR2 are respectively the end and the beginning of one of
+ * the branches. Note that there is an implicit OOR2 following OCH_
+ * and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef unsigned long sop; /* strip operator */
+typedef long sopno;
+#define OPRMASK 0xf8000000LU
+#define OPDMASK 0x07ffffffLU
+#define OPSHIFT ((unsigned)27)
+#define OP(n) ((n)&OPRMASK)
+#define OPND(n) ((n)&OPDMASK)
+#define SOP(op, opnd) ((op)|(opnd))
+/* operators meaning operand */
+/* (back, fwd are offsets) */
+#define OEND (1LU<<OPSHIFT) /* endmarker - */
+#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
+#define OBOL (3LU<<OPSHIFT) /* left anchor - */
+#define OEOL (4LU<<OPSHIFT) /* right anchor - */
+#define OANY (5LU<<OPSHIFT) /* . - */
+#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
+#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
+#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
+#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
+#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
+#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
+#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
+#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
+#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
+#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
+#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
+#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
+#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
+#define OBOW (19LU<<OPSHIFT) /* begin word - */
+#define OEOW (20LU<<OPSHIFT) /* end word - */
+
+/*
+ * Structure for [] character-set representation. Character sets are
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
+ * The individual set therefore has both a pointer to the byte vector
+ * and a mask to pick out the relevant bit of each byte. A hash code
+ * simplifies testing whether two sets could be identical.
+ *
+ * This will get trickier for multicharacter collating elements. As
+ * preliminary hooks for dealing with such things, we also carry along
+ * a string of multi-character elements, and decide the size of the
+ * vectors at run time.
+ */
+typedef struct {
+ uch *ptr; /* -> uch [csetsize] */
+ uch mask; /* bit within array */
+ uch hash; /* hash code */
+ size_t smultis;
+ char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */
+#define MCsub(p, cs, cp) mcsub(p, cs, cp)
+#define MCin(p, cs, cp) mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+ int magic;
+# define MAGIC2 ((('R'^0200)<<8)|'E')
+ sop *strip; /* malloced area for strip */
+ int csetsize; /* number of bits in a cset vector */
+ int ncsets; /* number of csets in use */
+ cset *sets; /* -> cset [ncsets] */
+ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
+ int cflags; /* copy of llvm_regcomp() cflags argument */
+ sopno nstates; /* = number of sops */
+ sopno firststate; /* the initial OEND (normally 0) */
+ sopno laststate; /* the final OEND */
+ int iflags; /* internal flags */
+# define USEBOL 01 /* used ^ */
+# define USEEOL 02 /* used $ */
+# define REGEX_BAD 04 /* something wrong */
+ int nbol; /* number of ^ used */
+ int neol; /* number of $ used */
+ int ncategories; /* how many character categories */
+ cat_t *categories; /* ->catspace[-CHAR_MIN] */
+ char *must; /* match must contain this string */
+ int mlen; /* length of must */
+ size_t nsub; /* copy of re_nsub */
+ int backrefs; /* does it use back references? */
+ sopno nplus; /* how deep does it nest +s? */
+ /* catspace must be last */
+ cat_t catspace[1]; /* actually [NC] */
+};
+
+/* misc utilities */
+#define OUT (CHAR_MAX+1) /* a non-character value */
+#define ISWORD(c) (isalnum(c&0xff) || (c) == '_')
diff --git a/contrib/llvm/lib/Support/regex_impl.h b/contrib/llvm/lib/Support/regex_impl.h
new file mode 100644
index 000000000000..f8296c9ff75e
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex_impl.h
@@ -0,0 +1,108 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992 Henry Spencer.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer of the University of Toronto.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regex.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _REGEX_H_
+#define _REGEX_H_
+
+#include <sys/types.h>
+typedef off_t llvm_regoff_t;
+typedef struct {
+ llvm_regoff_t rm_so; /* start of match */
+ llvm_regoff_t rm_eo; /* end of match */
+} llvm_regmatch_t;
+
+typedef struct llvm_regex {
+ int re_magic;
+ size_t re_nsub; /* number of parenthesized subexpressions */
+ const char *re_endp; /* end pointer for REG_PEND */
+ struct re_guts *re_g; /* none of your business :-) */
+} llvm_regex_t;
+
+/* llvm_regcomp() flags */
+#define REG_BASIC 0000
+#define REG_EXTENDED 0001
+#define REG_ICASE 0002
+#define REG_NOSUB 0004
+#define REG_NEWLINE 0010
+#define REG_NOSPEC 0020
+#define REG_PEND 0040
+#define REG_DUMP 0200
+
+/* llvm_regerror() flags */
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+#define REG_EMPTY 14
+#define REG_ASSERT 15
+#define REG_INVARG 16
+#define REG_ATOI 255 /* convert name to number (!) */
+#define REG_ITOA 0400 /* convert number to name (!) */
+
+/* llvm_regexec() flags */
+#define REG_NOTBOL 00001
+#define REG_NOTEOL 00002
+#define REG_STARTEND 00004
+#define REG_TRACE 00400 /* tracing of execution */
+#define REG_LARGE 01000 /* force large representation */
+#define REG_BACKR 02000 /* force use of backref code */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int llvm_regcomp(llvm_regex_t *, const char *, int);
+size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+int llvm_regexec(const llvm_regex_t *, const char *, size_t,
+ llvm_regmatch_t [], int);
+void llvm_regfree(llvm_regex_t *);
+size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_REGEX_H_ */
diff --git a/contrib/llvm/lib/Support/regexec.c b/contrib/llvm/lib/Support/regexec.c
new file mode 100644
index 000000000000..bd5e72d4c522
--- /dev/null
+++ b/contrib/llvm/lib/Support/regexec.c
@@ -0,0 +1,162 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regexec.c 8.3 (Berkeley) 3/20/94
+ */
+
+/*
+ * the outer shell of llvm_regexec()
+ *
+ * This file includes engine.inc *twice*, after muchos fiddling with the
+ * macros that code uses. This lets the same code operate on two different
+ * representations for state sets.
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/* macros for manipulating states, small version */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define states1 long /* for later use in llvm_regexec() decision */
+#define states states1
+#define CLEAR(v) ((v) = 0)
+#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
+#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
+#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
+#define ASSIGN(d, s) ((d) = (s))
+#define EQ(a, b) ((a) == (b))
+#define STATEVARS long dummy /* dummy version */
+#define STATESETUP(m, n) /* nothing */
+#define STATETEARDOWN(m) /* nothing */
+#define SETUP(v) ((v) = 0)
+#define onestate long
+#define INIT(o, n) ((o) = (unsigned long)1 << (n))
+#define INC(o) ((o) = (unsigned long)(o) << 1)
+#define ISSTATEIN(v, o) (((v) & (o)) != 0)
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
+/* function names */
+#define SNAMES /* engine.inc looks after details */
+
+#include "regengine.inc"
+
+/* now undo things */
+#undef states
+#undef CLEAR
+#undef SET0
+#undef SET1
+#undef ISSET
+#undef ASSIGN
+#undef EQ
+#undef STATEVARS
+#undef STATESETUP
+#undef STATETEARDOWN
+#undef SETUP
+#undef onestate
+#undef INIT
+#undef INC
+#undef ISSTATEIN
+#undef FWD
+#undef BACK
+#undef ISSETBACK
+#undef SNAMES
+
+/* macros for manipulating states, large version */
+#define states char *
+#define CLEAR(v) memset(v, 0, m->g->nstates)
+#define SET0(v, n) ((v)[n] = 0)
+#define SET1(v, n) ((v)[n] = 1)
+#define ISSET(v, n) ((v)[n])
+#define ASSIGN(d, s) memmove(d, s, m->g->nstates)
+#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
+#define STATEVARS long vn; char *space
+#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
+ if ((m)->space == NULL) return(REG_ESPACE); \
+ (m)->vn = 0; }
+#define STATETEARDOWN(m) { free((m)->space); }
+#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
+#define onestate long
+#define INIT(o, n) ((o) = (n))
+#define INC(o) ((o)++)
+#define ISSTATEIN(v, o) ((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
+#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
+#define ISSETBACK(v, n) ((v)[here - (n)])
+/* function names */
+#define LNAMES /* flag */
+
+#include "regengine.inc"
+
+/*
+ - llvm_regexec - interface for matching
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call. Also, by this point the matchers
+ * have been prototyped.
+ */
+int /* 0 success, REG_NOMATCH failure */
+llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
+ llvm_regmatch_t pmatch[], int eflags)
+{
+ struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
+
+ if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+ return(REG_BADPAT);
+ assert(!(g->iflags&REGEX_BAD));
+ if (g->iflags&REGEX_BAD) /* backstop for no-debug case */
+ return(REG_BADPAT);
+ eflags = GOODFLAGS(eflags);
+
+ if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+ return(smatcher(g, string, nmatch, pmatch, eflags));
+ else
+ return(lmatcher(g, string, nmatch, pmatch, eflags));
+}
diff --git a/contrib/llvm/lib/Support/regfree.c b/contrib/llvm/lib/Support/regfree.c
new file mode 100644
index 000000000000..dc2b4af90fa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regfree.c
@@ -0,0 +1,72 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regfree.c 8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/*
+ - llvm_regfree - free everything
+ */
+void
+llvm_regfree(llvm_regex_t *preg)
+{
+ struct re_guts *g;
+
+ if (preg->re_magic != MAGIC1) /* oops */
+ return; /* nice to complain, but hard */
+
+ g = preg->re_g;
+ if (g == NULL || g->magic != MAGIC2) /* oops again */
+ return;
+ preg->re_magic = 0; /* mark it invalid */
+ g->magic = 0; /* mark it invalid */
+
+ if (g->strip != NULL)
+ free((char *)g->strip);
+ if (g->sets != NULL)
+ free((char *)g->sets);
+ if (g->setbits != NULL)
+ free((char *)g->setbits);
+ if (g->must != NULL)
+ free(g->must);
+ free((char *)g);
+}
diff --git a/contrib/llvm/lib/Support/regstrlcpy.c b/contrib/llvm/lib/Support/regstrlcpy.c
new file mode 100644
index 000000000000..8b68afdf75f1
--- /dev/null
+++ b/contrib/llvm/lib/Support/regstrlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * This code is derived from OpenBSD's libc, original license follows:
+ *
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "regex_impl.h"
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+llvm_strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
diff --git a/contrib/llvm/lib/Support/regutils.h b/contrib/llvm/lib/Support/regutils.h
new file mode 100644
index 000000000000..d0ee100a382b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regutils.h
@@ -0,0 +1,53 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)utils.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define NC (CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define NDEBUG /* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define memmove(d, s, c) bcopy(s, d, c)
+#endif
diff --git a/contrib/llvm/lib/Support/system_error.cpp b/contrib/llvm/lib/Support/system_error.cpp
new file mode 100644
index 000000000000..b22745afc330
--- /dev/null
+++ b/contrib/llvm/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <cstring>
+#include <string>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+ return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+ return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+ return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+ return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+ virtual const char* name() const LLVM_OVERRIDE;
+ virtual std::string message(int ev) const LLVM_OVERRIDE;
+};
+
+const char*
+_generic_error_category::name() const {
+ return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return std::string("unspecified generic_category error");
+#endif // ELAST
+ return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+ static _generic_error_category s;
+ return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+ virtual const char* name() const LLVM_OVERRIDE;
+ virtual std::string message(int ev) const LLVM_OVERRIDE;
+ virtual error_condition default_error_condition(int ev) const LLVM_OVERRIDE;
+};
+
+const char*
+_system_error_category::name() const {
+ return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+ static _system_error_category s;
+ return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+ return generic_category();
+#else
+ return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+ return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+ return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/contrib/llvm/lib/TableGen/Error.cpp b/contrib/llvm/lib/TableGen/Error.cpp
new file mode 100644
index 000000000000..928b1203cd8f
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/Error.cpp
@@ -0,0 +1,75 @@
+//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+
+namespace llvm {
+
+SourceMgr SrcMgr;
+unsigned ErrorsPrinted = 0;
+
+static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg) {
+ // Count the total number of errors printed.
+ // This is used to exit with an error code if there were any errors.
+ if (Kind == SourceMgr::DK_Error)
+ ++ErrorsPrinted;
+
+ SMLoc NullLoc;
+ if (Loc.empty())
+ Loc = NullLoc;
+ SrcMgr.PrintMessage(Loc.front(), Kind, Msg);
+ for (unsigned i = 1; i < Loc.size(); ++i)
+ SrcMgr.PrintMessage(Loc[i], SourceMgr::DK_Note,
+ "instantiated from multiclass");
+}
+
+void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
+ PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
+}
+
+void PrintWarning(const char *Loc, const Twine &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
+}
+
+void PrintWarning(const Twine &Msg) {
+ errs() << "warning:" << Msg << "\n";
+}
+
+void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+ PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
+}
+
+void PrintError(const char *Loc, const Twine &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+}
+
+void PrintError(const Twine &Msg) {
+ errs() << "error:" << Msg << "\n";
+}
+
+void PrintFatalError(const std::string &Msg) {
+ PrintError(Twine(Msg));
+ std::exit(1);
+}
+
+void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const std::string &Msg) {
+ PrintError(ErrorLoc, Msg);
+ std::exit(1);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
new file mode 100644
index 000000000000..dc4167b305ca
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -0,0 +1,130 @@
+//===- Main.cpp - Top-Level TableGen implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen is a tool which can be used to build up a description of something,
+// then invoke one or more "tablegen backends" to emit information about the
+// description in some predefined format. In practice, this is used by the LLVM
+// code generators to automate generation of a code generator through a
+// high-level description of the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGParser.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Main.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+ cl::opt<std::string>
+ OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
+ cl::init("-"));
+
+ cl::opt<std::string>
+ DependFilename("d",
+ cl::desc("Dependency filename"),
+ cl::value_desc("filename"),
+ cl::init(""));
+
+ cl::opt<std::string>
+ InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+ cl::list<std::string>
+ IncludeDirs("I", cl::desc("Directory of include files"),
+ cl::value_desc("directory"), cl::Prefix);
+}
+
+/// \brief Create a dependency file for `-d` option.
+///
+/// This functionality is really only for the benefit of the build system.
+/// It is similar to GCC's `-M*` family of options.
+static int createDependencyFile(const TGParser &Parser, const char *argv0) {
+ if (OutputFilename == "-") {
+ errs() << argv0 << ": the option -d must be used together with -o\n";
+ return 1;
+ }
+ std::string Error;
+ tool_output_file DepOut(DependFilename.c_str(), Error);
+ if (!Error.empty()) {
+ errs() << argv0 << ": error opening " << DependFilename
+ << ":" << Error << "\n";
+ return 1;
+ }
+ DepOut.os() << OutputFilename << ":";
+ const TGLexer::DependenciesMapTy &Dependencies = Parser.getDependencies();
+ for (TGLexer::DependenciesMapTy::const_iterator I = Dependencies.begin(),
+ E = Dependencies.end();
+ I != E; ++I) {
+ DepOut.os() << " " << I->first;
+ }
+ DepOut.os() << "\n";
+ DepOut.keep();
+ return 0;
+}
+
+namespace llvm {
+
+int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
+ RecordKeeper Records;
+
+ // Parse the input file.
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec =
+ MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
+ errs() << "Could not open input file '" << InputFilename << "': "
+ << ec.message() <<"\n";
+ return 1;
+ }
+ MemoryBuffer *F = File.take();
+
+ // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+ SrcMgr.AddNewSourceBuffer(F, SMLoc());
+
+ // Record the location of the include directory so that the lexer can find
+ // it later.
+ SrcMgr.setIncludeDirs(IncludeDirs);
+
+ TGParser Parser(SrcMgr, Records);
+
+ if (Parser.ParseFile())
+ return 1;
+
+ std::string Error;
+ tool_output_file Out(OutputFilename.c_str(), Error);
+ if (!Error.empty()) {
+ errs() << argv0 << ": error opening " << OutputFilename
+ << ":" << Error << "\n";
+ return 1;
+ }
+ if (!DependFilename.empty()) {
+ if (int Ret = createDependencyFile(Parser, argv0))
+ return Ret;
+ }
+
+ if (MainFn(Out.os(), Records))
+ return 1;
+
+ if (ErrorsPrinted > 0) {
+ errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
+ return 1;
+ }
+
+ // Declare success.
+ Out.keep();
+ return 0;
+}
+
+}
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
new file mode 100644
index 000000000000..9ad20532d7eb
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -0,0 +1,2057 @@
+//===- Record.cpp - Record implementation ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the tablegen record classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// std::string wrapper for DenseMap purposes
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// TableGenStringKey - This is a wrapper for std::string suitable for
+/// using as a key to a DenseMap. Because there isn't a particularly
+/// good way to indicate tombstone or empty keys for strings, we want
+/// to wrap std::string to indicate that this is a "special" string
+/// not expected to take on certain values (those of the tombstone and
+/// empty keys). This makes things a little safer as it clarifies
+/// that DenseMap is really not appropriate for general strings.
+
+class TableGenStringKey {
+public:
+ TableGenStringKey(const std::string &str) : data(str) {}
+ TableGenStringKey(const char *str) : data(str) {}
+
+ const std::string &str() const { return data; }
+
+ friend hash_code hash_value(const TableGenStringKey &Value) {
+ using llvm::hash_value;
+ return hash_value(Value.str());
+ }
+private:
+ std::string data;
+};
+
+/// Specialize DenseMapInfo for TableGenStringKey.
+template<> struct DenseMapInfo<TableGenStringKey> {
+ static inline TableGenStringKey getEmptyKey() {
+ TableGenStringKey Empty("<<<EMPTY KEY>>>");
+ return Empty;
+ }
+ static inline TableGenStringKey getTombstoneKey() {
+ TableGenStringKey Tombstone("<<<TOMBSTONE KEY>>>");
+ return Tombstone;
+ }
+ static unsigned getHashValue(const TableGenStringKey& Val) {
+ using llvm::hash_value;
+ return hash_value(Val);
+ }
+ static bool isEqual(const TableGenStringKey& LHS,
+ const TableGenStringKey& RHS) {
+ return LHS.str() == RHS.str();
+ }
+};
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// Type implementations
+//===----------------------------------------------------------------------===//
+
+BitRecTy BitRecTy::Shared;
+IntRecTy IntRecTy::Shared;
+StringRecTy StringRecTy::Shared;
+DagRecTy DagRecTy::Shared;
+
+void RecTy::anchor() { }
+void RecTy::dump() const { print(errs()); }
+
+ListRecTy *RecTy::getListTy() {
+ if (!ListTy)
+ ListTy = new ListRecTy(this);
+ return ListTy;
+}
+
+bool RecTy::baseClassOf(const RecTy *RHS) const{
+ assert (RHS && "NULL pointer");
+ return Kind == RHS->getRecTyKind();
+}
+
+Init *BitRecTy::convertValue(BitsInit *BI) {
+ if (BI->getNumBits() != 1) return 0; // Only accept if just one bit!
+ return BI->getBit(0);
+}
+
+Init *BitRecTy::convertValue(IntInit *II) {
+ int64_t Val = II->getValue();
+ if (Val != 0 && Val != 1) return 0; // Only accept 0 or 1 for a bit!
+
+ return BitInit::get(Val != 0);
+}
+
+Init *BitRecTy::convertValue(TypedInit *VI) {
+ RecTy *Ty = VI->getType();
+ if (isa<BitRecTy>(Ty) || isa<BitsRecTy>(Ty) || isa<IntRecTy>(Ty))
+ return VI; // Accept variable if it is already of bit type!
+ return 0;
+}
+
+bool BitRecTy::baseClassOf(const RecTy *RHS) const{
+ if(RecTy::baseClassOf(RHS) || getRecTyKind() == IntRecTyKind)
+ return true;
+ if(const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS))
+ return BitsTy->getNumBits() == 1;
+ return false;
+}
+
+BitsRecTy *BitsRecTy::get(unsigned Sz) {
+ static std::vector<BitsRecTy*> Shared;
+ if (Sz >= Shared.size())
+ Shared.resize(Sz + 1);
+ BitsRecTy *&Ty = Shared[Sz];
+ if (!Ty)
+ Ty = new BitsRecTy(Sz);
+ return Ty;
+}
+
+std::string BitsRecTy::getAsString() const {
+ return "bits<" + utostr(Size) + ">";
+}
+
+Init *BitsRecTy::convertValue(UnsetInit *UI) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = UnsetInit::get();
+
+ return BitsInit::get(NewBits);
+}
+
+Init *BitsRecTy::convertValue(BitInit *UI) {
+ if (Size != 1) return 0; // Can only convert single bit.
+ return BitsInit::get(UI);
+}
+
+/// canFitInBitfield - Return true if the number of bits is large enough to hold
+/// the integer value.
+static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
+ // For example, with NumBits == 4, we permit Values from [-7 .. 15].
+ return (NumBits >= sizeof(Value) * 8) ||
+ (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1);
+}
+
+/// convertValue from Int initializer to bits type: Split the integer up into the
+/// appropriate bits.
+///
+Init *BitsRecTy::convertValue(IntInit *II) {
+ int64_t Value = II->getValue();
+ // Make sure this bitfield is large enough to hold the integer value.
+ if (!canFitInBitfield(Value, Size))
+ return 0;
+
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = BitInit::get(Value & (1LL << i));
+
+ return BitsInit::get(NewBits);
+}
+
+Init *BitsRecTy::convertValue(BitsInit *BI) {
+ // If the number of bits is right, return it. Otherwise we need to expand or
+ // truncate.
+ if (BI->getNumBits() == Size) return BI;
+ return 0;
+}
+
+Init *BitsRecTy::convertValue(TypedInit *VI) {
+ if (Size == 1 && isa<BitRecTy>(VI->getType()))
+ return BitsInit::get(VI);
+
+ if (VI->getType()->typeIsConvertibleTo(this)) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = VarBitInit::get(VI, i);
+ return BitsInit::get(NewBits);
+ }
+
+ return 0;
+}
+
+bool BitsRecTy::baseClassOf(const RecTy *RHS) const{
+ if (RecTy::baseClassOf(RHS)) //argument and the receiver are the same type
+ return cast<BitsRecTy>(RHS)->Size == Size;
+ RecTyKind kind = RHS->getRecTyKind();
+ return (kind == BitRecTyKind && Size == 1) || (kind == IntRecTyKind);
+}
+
+Init *IntRecTy::convertValue(BitInit *BI) {
+ return IntInit::get(BI->getValue());
+}
+
+Init *IntRecTy::convertValue(BitsInit *BI) {
+ int64_t Result = 0;
+ for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
+ if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) {
+ Result |= Bit->getValue() << i;
+ } else {
+ return 0;
+ }
+ return IntInit::get(Result);
+}
+
+Init *IntRecTy::convertValue(TypedInit *TI) {
+ if (TI->getType()->typeIsConvertibleTo(this))
+ return TI; // Accept variable if already of the right type!
+ return 0;
+}
+
+bool IntRecTy::baseClassOf(const RecTy *RHS) const{
+ RecTyKind kind = RHS->getRecTyKind();
+ return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
+}
+
+Init *StringRecTy::convertValue(UnOpInit *BO) {
+ if (BO->getOpcode() == UnOpInit::CAST) {
+ Init *L = BO->getOperand()->convertInitializerTo(this);
+ if (L == 0) return 0;
+ if (L != BO->getOperand())
+ return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy);
+ return BO;
+ }
+
+ return convertValue((TypedInit*)BO);
+}
+
+Init *StringRecTy::convertValue(BinOpInit *BO) {
+ if (BO->getOpcode() == BinOpInit::STRCONCAT) {
+ Init *L = BO->getLHS()->convertInitializerTo(this);
+ Init *R = BO->getRHS()->convertInitializerTo(this);
+ if (L == 0 || R == 0) return 0;
+ if (L != BO->getLHS() || R != BO->getRHS())
+ return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy);
+ return BO;
+ }
+
+ return convertValue((TypedInit*)BO);
+}
+
+
+Init *StringRecTy::convertValue(TypedInit *TI) {
+ if (isa<StringRecTy>(TI->getType()))
+ return TI; // Accept variable if already of the right type!
+ return 0;
+}
+
+std::string ListRecTy::getAsString() const {
+ return "list<" + Ty->getAsString() + ">";
+}
+
+Init *ListRecTy::convertValue(ListInit *LI) {
+ std::vector<Init*> Elements;
+
+ // Verify that all of the elements of the list are subclasses of the
+ // appropriate class!
+ for (unsigned i = 0, e = LI->getSize(); i != e; ++i)
+ if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty))
+ Elements.push_back(CI);
+ else
+ return 0;
+
+ if (!isa<ListRecTy>(LI->getType()))
+ return 0;
+
+ return ListInit::get(Elements, this);
+}
+
+Init *ListRecTy::convertValue(TypedInit *TI) {
+ // Ensure that TI is compatible with our class.
+ if (ListRecTy *LRT = dyn_cast<ListRecTy>(TI->getType()))
+ if (LRT->getElementType()->typeIsConvertibleTo(getElementType()))
+ return TI;
+ return 0;
+}
+
+bool ListRecTy::baseClassOf(const RecTy *RHS) const{
+ if(const ListRecTy* ListTy = dyn_cast<ListRecTy>(RHS))
+ return ListTy->getElementType()->typeIsConvertibleTo(Ty);
+ return false;
+}
+
+Init *DagRecTy::convertValue(TypedInit *TI) {
+ if (TI->getType()->typeIsConvertibleTo(this))
+ return TI;
+ return 0;
+}
+
+Init *DagRecTy::convertValue(UnOpInit *BO) {
+ if (BO->getOpcode() == UnOpInit::CAST) {
+ Init *L = BO->getOperand()->convertInitializerTo(this);
+ if (L == 0) return 0;
+ if (L != BO->getOperand())
+ return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy);
+ return BO;
+ }
+ return 0;
+}
+
+Init *DagRecTy::convertValue(BinOpInit *BO) {
+ if (BO->getOpcode() == BinOpInit::CONCAT) {
+ Init *L = BO->getLHS()->convertInitializerTo(this);
+ Init *R = BO->getRHS()->convertInitializerTo(this);
+ if (L == 0 || R == 0) return 0;
+ if (L != BO->getLHS() || R != BO->getRHS())
+ return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy);
+ return BO;
+ }
+ return 0;
+}
+
+RecordRecTy *RecordRecTy::get(Record *R) {
+ return dyn_cast<RecordRecTy>(R->getDefInit()->getType());
+}
+
+std::string RecordRecTy::getAsString() const {
+ return Rec->getName();
+}
+
+Init *RecordRecTy::convertValue(DefInit *DI) {
+ // Ensure that DI is a subclass of Rec.
+ if (!DI->getDef()->isSubClassOf(Rec))
+ return 0;
+ return DI;
+}
+
+Init *RecordRecTy::convertValue(TypedInit *TI) {
+ // Ensure that TI is compatible with Rec.
+ if (RecordRecTy *RRT = dyn_cast<RecordRecTy>(TI->getType()))
+ if (RRT->getRecord()->isSubClassOf(getRecord()) ||
+ RRT->getRecord() == getRecord())
+ return TI;
+ return 0;
+}
+
+bool RecordRecTy::baseClassOf(const RecTy *RHS) const{
+ const RecordRecTy *RTy = dyn_cast<RecordRecTy>(RHS);
+ if (!RTy)
+ return false;
+
+ if (Rec == RTy->getRecord() || RTy->getRecord()->isSubClassOf(Rec))
+ return true;
+
+ const std::vector<Record*> &SC = Rec->getSuperClasses();
+ for (unsigned i = 0, e = SC.size(); i != e; ++i)
+ if (RTy->getRecord()->isSubClassOf(SC[i]))
+ return true;
+
+ return false;
+}
+
+/// resolveTypes - Find a common type that T1 and T2 convert to.
+/// Return 0 if no such type exists.
+///
+RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
+ if (T1->typeIsConvertibleTo(T2))
+ return T2;
+ if (T2->typeIsConvertibleTo(T1))
+ return T1;
+
+ // If one is a Record type, check superclasses
+ if (RecordRecTy *RecTy1 = dyn_cast<RecordRecTy>(T1)) {
+ // See if T2 inherits from a type T1 also inherits from
+ const std::vector<Record *> &T1SuperClasses =
+ RecTy1->getRecord()->getSuperClasses();
+ for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
+ iend = T1SuperClasses.end();
+ i != iend;
+ ++i) {
+ RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
+ RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
+ if (NewType1 != 0) {
+ if (NewType1 != SuperRecTy1) {
+ delete SuperRecTy1;
+ }
+ return NewType1;
+ }
+ }
+ }
+ if (RecordRecTy *RecTy2 = dyn_cast<RecordRecTy>(T2)) {
+ // See if T1 inherits from a type T2 also inherits from
+ const std::vector<Record *> &T2SuperClasses =
+ RecTy2->getRecord()->getSuperClasses();
+ for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
+ iend = T2SuperClasses.end();
+ i != iend;
+ ++i) {
+ RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
+ RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
+ if (NewType2 != 0) {
+ if (NewType2 != SuperRecTy2) {
+ delete SuperRecTy2;
+ }
+ return NewType2;
+ }
+ }
+ }
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Initializer implementations
+//===----------------------------------------------------------------------===//
+
+void Init::anchor() { }
+void Init::dump() const { return print(errs()); }
+
+void UnsetInit::anchor() { }
+
+UnsetInit *UnsetInit::get() {
+ static UnsetInit TheInit;
+ return &TheInit;
+}
+
+void BitInit::anchor() { }
+
+BitInit *BitInit::get(bool V) {
+ static BitInit True(true);
+ static BitInit False(false);
+
+ return V ? &True : &False;
+}
+
+static void
+ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
+ ID.AddInteger(Range.size());
+
+ for (ArrayRef<Init *>::iterator i = Range.begin(),
+ iend = Range.end();
+ i != iend;
+ ++i)
+ ID.AddPointer(*i);
+}
+
+BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
+ typedef FoldingSet<BitsInit> Pool;
+ static Pool ThePool;
+
+ FoldingSetNodeID ID;
+ ProfileBitsInit(ID, Range);
+
+ void *IP = 0;
+ if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ BitsInit *I = new BitsInit(Range);
+ ThePool.InsertNode(I, IP);
+
+ return I;
+}
+
+void BitsInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileBitsInit(ID, Bits);
+}
+
+Init *
+BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ SmallVector<Init *, 16> NewBits(Bits.size());
+
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= getNumBits())
+ return 0;
+ NewBits[i] = getBit(Bits[i]);
+ }
+ return BitsInit::get(NewBits);
+}
+
+std::string BitsInit::getAsString() const {
+ std::string Result = "{ ";
+ for (unsigned i = 0, e = getNumBits(); i != e; ++i) {
+ if (i) Result += ", ";
+ if (Init *Bit = getBit(e-i-1))
+ Result += Bit->getAsString();
+ else
+ Result += "*";
+ }
+ return Result + " }";
+}
+
+// Fix bit initializer to preserve the behavior that bit reference from a unset
+// bits initializer will resolve into VarBitInit to keep the field name and bit
+// number used in targets with fixed insn length.
+static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) {
+ if (RV || After != UnsetInit::get())
+ return After;
+ return Before;
+}
+
+// resolveReferences - If there are any field references that refer to fields
+// that have been filled in, we can propagate the values now.
+//
+Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ bool Changed = false;
+ SmallVector<Init *, 16> NewBits(getNumBits());
+
+ Init *CachedInit = 0;
+ Init *CachedBitVar = 0;
+ bool CachedBitVarChanged = false;
+
+ for (unsigned i = 0, e = getNumBits(); i != e; ++i) {
+ Init *CurBit = Bits[i];
+ Init *CurBitVar = CurBit->getBitVar();
+
+ NewBits[i] = CurBit;
+
+ if (CurBitVar == CachedBitVar) {
+ if (CachedBitVarChanged) {
+ Init *Bit = CachedInit->getBit(CurBit->getBitNum());
+ NewBits[i] = fixBitInit(RV, CurBit, Bit);
+ }
+ continue;
+ }
+ CachedBitVar = CurBitVar;
+ CachedBitVarChanged = false;
+
+ Init *B;
+ do {
+ B = CurBitVar;
+ CurBitVar = CurBitVar->resolveReferences(R, RV);
+ CachedBitVarChanged |= B != CurBitVar;
+ Changed |= B != CurBitVar;
+ } while (B != CurBitVar);
+ CachedInit = CurBitVar;
+
+ if (CachedBitVarChanged) {
+ Init *Bit = CurBitVar->getBit(CurBit->getBitNum());
+ NewBits[i] = fixBitInit(RV, CurBit, Bit);
+ }
+ }
+
+ if (Changed)
+ return BitsInit::get(NewBits);
+
+ return const_cast<BitsInit *>(this);
+}
+
+IntInit *IntInit::get(int64_t V) {
+ typedef DenseMap<int64_t, IntInit *> Pool;
+ static Pool ThePool;
+
+ IntInit *&I = ThePool[V];
+ if (!I) I = new IntInit(V);
+ return I;
+}
+
+std::string IntInit::getAsString() const {
+ return itostr(Value);
+}
+
+Init *
+IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ SmallVector<Init *, 16> NewBits(Bits.size());
+
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= 64)
+ return 0;
+
+ NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i]));
+ }
+ return BitsInit::get(NewBits);
+}
+
+void StringInit::anchor() { }
+
+StringInit *StringInit::get(StringRef V) {
+ typedef StringMap<StringInit *> Pool;
+ static Pool ThePool;
+
+ StringInit *&I = ThePool[V];
+ if (!I) I = new StringInit(V);
+ return I;
+}
+
+static void ProfileListInit(FoldingSetNodeID &ID,
+ ArrayRef<Init *> Range,
+ RecTy *EltTy) {
+ ID.AddInteger(Range.size());
+ ID.AddPointer(EltTy);
+
+ for (ArrayRef<Init *>::iterator i = Range.begin(),
+ iend = Range.end();
+ i != iend;
+ ++i)
+ ID.AddPointer(*i);
+}
+
+ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
+ typedef FoldingSet<ListInit> Pool;
+ static Pool ThePool;
+
+ // Just use the FoldingSetNodeID to compute a hash. Use a DenseMap
+ // for actual storage.
+ FoldingSetNodeID ID;
+ ProfileListInit(ID, Range, EltTy);
+
+ void *IP = 0;
+ if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ ListInit *I = new ListInit(Range, EltTy);
+ ThePool.InsertNode(I, IP);
+ return I;
+}
+
+void ListInit::Profile(FoldingSetNodeID &ID) const {
+ ListRecTy *ListType = dyn_cast<ListRecTy>(getType());
+ assert(ListType && "Bad type for ListInit!");
+ RecTy *EltTy = ListType->getElementType();
+
+ ProfileListInit(ID, Values, EltTy);
+}
+
+Init *
+ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
+ std::vector<Init*> Vals;
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] >= getSize())
+ return 0;
+ Vals.push_back(getElement(Elements[i]));
+ }
+ return ListInit::get(Vals, getType());
+}
+
+Record *ListInit::getElementAsRecord(unsigned i) const {
+ assert(i < Values.size() && "List element index out of range!");
+ DefInit *DI = dyn_cast<DefInit>(Values[i]);
+ if (DI == 0)
+ PrintFatalError("Expected record in list!");
+ return DI->getDef();
+}
+
+Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ std::vector<Init*> Resolved;
+ Resolved.reserve(getSize());
+ bool Changed = false;
+
+ for (unsigned i = 0, e = getSize(); i != e; ++i) {
+ Init *E;
+ Init *CurElt = getElement(i);
+
+ do {
+ E = CurElt;
+ CurElt = CurElt->resolveReferences(R, RV);
+ Changed |= E != CurElt;
+ } while (E != CurElt);
+ Resolved.push_back(E);
+ }
+
+ if (Changed)
+ return ListInit::get(Resolved, getType());
+ return const_cast<ListInit *>(this);
+}
+
+Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+ unsigned Elt) const {
+ if (Elt >= getSize())
+ return 0; // Out of range reference.
+ Init *E = getElement(Elt);
+ // If the element is set to some value, or if we are resolving a reference
+ // to a specific variable and that variable is explicitly unset, then
+ // replace the VarListElementInit with it.
+ if (IRV || !isa<UnsetInit>(E))
+ return E;
+ return 0;
+}
+
+std::string ListInit::getAsString() const {
+ std::string Result = "[";
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (i) Result += ", ";
+ Result += Values[i]->getAsString();
+ }
+ return Result + "]";
+}
+
+Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+ unsigned Elt) const {
+ Init *Resolved = resolveReferences(R, IRV);
+ OpInit *OResolved = dyn_cast<OpInit>(Resolved);
+ if (OResolved) {
+ Resolved = OResolved->Fold(&R, 0);
+ }
+
+ if (Resolved != this) {
+ TypedInit *Typed = dyn_cast<TypedInit>(Resolved);
+ assert(Typed && "Expected typed init for list reference");
+ if (Typed) {
+ Init *New = Typed->resolveListElementReference(R, IRV, Elt);
+ if (New)
+ return New;
+ return VarListElementInit::get(Typed, Elt);
+ }
+ }
+
+ return 0;
+}
+
+Init *OpInit::getBit(unsigned Bit) const {
+ if (getType() == BitRecTy::get())
+ return const_cast<OpInit*>(this);
+ return VarBitInit::get(const_cast<OpInit*>(this), Bit);
+}
+
+UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
+ typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key;
+
+ typedef DenseMap<Key, UnOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type));
+
+ UnOpInit *&I = ThePool[TheKey];
+ if (!I) I = new UnOpInit(opc, lhs, Type);
+ return I;
+}
+
+Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ case CAST: {
+ if (getType()->getAsString() == "string") {
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
+ return LHSs;
+
+ if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
+ return StringInit::get(LHSd->getDef()->getName());
+
+ if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
+ return StringInit::get(LHSi->getAsString());
+ } else {
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
+ std::string Name = LHSs->getValue();
+
+ // From TGParser::ParseIDValue
+ if (CurRec) {
+ if (const RecordVal *RV = CurRec->getValue(Name)) {
+ if (RV->getType() != getType())
+ PrintFatalError("type mismatch in cast");
+ return VarInit::get(Name, RV->getType());
+ }
+
+ Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name,
+ ":");
+
+ if (CurRec->isTemplateArg(TemplateArgName)) {
+ const RecordVal *RV = CurRec->getValue(TemplateArgName);
+ assert(RV && "Template arg doesn't exist??");
+
+ if (RV->getType() != getType())
+ PrintFatalError("type mismatch in cast");
+
+ return VarInit::get(TemplateArgName, RV->getType());
+ }
+ }
+
+ if (CurMultiClass) {
+ Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
+
+ if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+ assert(RV && "Template arg doesn't exist??");
+
+ if (RV->getType() != getType())
+ PrintFatalError("type mismatch in cast");
+
+ return VarInit::get(MCName, RV->getType());
+ }
+ }
+
+ if (Record *D = (CurRec->getRecords()).getDef(Name))
+ return DefInit::get(D);
+
+ PrintFatalError(CurRec->getLoc(),
+ "Undefined reference:'" + Name + "'\n");
+ }
+ }
+ break;
+ }
+ case HEAD: {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
+ if (LHSl->getSize() == 0) {
+ assert(0 && "Empty list in car");
+ return 0;
+ }
+ return LHSl->getElement(0);
+ }
+ break;
+ }
+ case TAIL: {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
+ if (LHSl->getSize() == 0) {
+ assert(0 && "Empty list in cdr");
+ return 0;
+ }
+ // Note the +1. We can't just pass the result of getValues()
+ // directly.
+ ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
+ ArrayRef<Init *>::iterator end = LHSl->getValues().end();
+ ListInit *Result =
+ ListInit::get(ArrayRef<Init *>(begin, end - begin),
+ LHSl->getType());
+ return Result;
+ }
+ break;
+ }
+ case EMPTY: {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
+ if (LHSl->getSize() == 0) {
+ return IntInit::get(1);
+ } else {
+ return IntInit::get(0);
+ }
+ }
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
+ if (LHSs->getValue().empty()) {
+ return IntInit::get(1);
+ } else {
+ return IntInit::get(0);
+ }
+ }
+
+ break;
+ }
+ }
+ return const_cast<UnOpInit *>(this);
+}
+
+Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+
+ if (LHS != lhs)
+ return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string UnOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
+ case HEAD: Result = "!head"; break;
+ case TAIL: Result = "!tail"; break;
+ case EMPTY: Result = "!empty"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ")";
+}
+
+BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
+ Init *rhs, RecTy *Type) {
+ typedef std::pair<
+ std::pair<std::pair<unsigned, Init *>, Init *>,
+ RecTy *
+ > Key;
+
+ typedef DenseMap<Key, BinOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs),
+ Type));
+
+ BinOpInit *&I = ThePool[TheKey];
+ if (!I) I = new BinOpInit(opc, lhs, rhs, Type);
+ return I;
+}
+
+Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ case CONCAT: {
+ DagInit *LHSs = dyn_cast<DagInit>(LHS);
+ DagInit *RHSs = dyn_cast<DagInit>(RHS);
+ if (LHSs && RHSs) {
+ DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator());
+ DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
+ if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+ PrintFatalError("Concated Dag operators do not match!");
+ std::vector<Init*> Args;
+ std::vector<std::string> ArgNames;
+ for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
+ Args.push_back(LHSs->getArg(i));
+ ArgNames.push_back(LHSs->getArgName(i));
+ }
+ for (unsigned i = 0, e = RHSs->getNumArgs(); i != e; ++i) {
+ Args.push_back(RHSs->getArg(i));
+ ArgNames.push_back(RHSs->getArgName(i));
+ }
+ return DagInit::get(LHSs->getOperator(), "", Args, ArgNames);
+ }
+ break;
+ }
+ case STRCONCAT: {
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
+ if (LHSs && RHSs)
+ return StringInit::get(LHSs->getValue() + RHSs->getValue());
+ break;
+ }
+ case EQ: {
+ // try to fold eq comparison for 'bit' and 'int', otherwise fallback
+ // to string objects.
+ IntInit *L =
+ dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
+ IntInit *R =
+ dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
+
+ if (L && R)
+ return IntInit::get(L->getValue() == R->getValue());
+
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
+
+ // Make sure we've resolved
+ if (LHSs && RHSs)
+ return IntInit::get(LHSs->getValue() == RHSs->getValue());
+
+ break;
+ }
+ case ADD:
+ case SHL:
+ case SRA:
+ case SRL: {
+ IntInit *LHSi = dyn_cast<IntInit>(LHS);
+ IntInit *RHSi = dyn_cast<IntInit>(RHS);
+ if (LHSi && RHSi) {
+ int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
+ int64_t Result;
+ switch (getOpcode()) {
+ default: llvm_unreachable("Bad opcode!");
+ case ADD: Result = LHSv + RHSv; break;
+ case SHL: Result = LHSv << RHSv; break;
+ case SRA: Result = LHSv >> RHSv; break;
+ case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
+ }
+ return IntInit::get(Result);
+ }
+ break;
+ }
+ }
+ return const_cast<BinOpInit *>(this);
+}
+
+Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+ Init *rhs = RHS->resolveReferences(R, RV);
+
+ if (LHS != lhs || RHS != rhs)
+ return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string BinOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case CONCAT: Result = "!con"; break;
+ case ADD: Result = "!add"; break;
+ case SHL: Result = "!shl"; break;
+ case SRA: Result = "!sra"; break;
+ case SRL: Result = "!srl"; break;
+ case EQ: Result = "!eq"; break;
+ case STRCONCAT: Result = "!strconcat"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
+}
+
+TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs,
+ Init *mhs, Init *rhs,
+ RecTy *Type) {
+ typedef std::pair<
+ std::pair<
+ std::pair<std::pair<unsigned, RecTy *>, Init *>,
+ Init *
+ >,
+ Init *
+ > Key;
+
+ typedef DenseMap<Key, TernOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(std::make_pair(std::make_pair(opc,
+ Type),
+ lhs),
+ mhs),
+ rhs));
+
+ TernOpInit *&I = ThePool[TheKey];
+ if (!I) I = new TernOpInit(opc, lhs, mhs, rhs, Type);
+ return I;
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+ Record *CurRec, MultiClass *CurMultiClass);
+
+static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
+ RecTy *Type, Record *CurRec,
+ MultiClass *CurMultiClass) {
+ std::vector<Init *> NewOperands;
+
+ TypedInit *TArg = dyn_cast<TypedInit>(Arg);
+
+ // If this is a dag, recurse
+ if (TArg && TArg->getType()->getAsString() == "dag") {
+ Init *Result = ForeachHelper(LHS, Arg, RHSo, Type,
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ return Result;
+ } else {
+ return 0;
+ }
+ }
+
+ for (int i = 0; i < RHSo->getNumOperands(); ++i) {
+ OpInit *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i));
+
+ if (RHSoo) {
+ Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
+ Type, CurRec, CurMultiClass);
+ if (Result != 0) {
+ NewOperands.push_back(Result);
+ } else {
+ NewOperands.push_back(Arg);
+ }
+ } else if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+ NewOperands.push_back(Arg);
+ } else {
+ NewOperands.push_back(RHSo->getOperand(i));
+ }
+ }
+
+ // Now run the operator and use its result as the new leaf
+ const OpInit *NewOp = RHSo->clone(NewOperands);
+ Init *NewVal = NewOp->Fold(CurRec, CurMultiClass);
+ if (NewVal != NewOp)
+ return NewVal;
+
+ return 0;
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+ Record *CurRec, MultiClass *CurMultiClass) {
+ DagInit *MHSd = dyn_cast<DagInit>(MHS);
+ ListInit *MHSl = dyn_cast<ListInit>(MHS);
+
+ OpInit *RHSo = dyn_cast<OpInit>(RHS);
+
+ if (!RHSo) {
+ PrintFatalError(CurRec->getLoc(), "!foreach requires an operator\n");
+ }
+
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+
+ if (!LHSt)
+ PrintFatalError(CurRec->getLoc(), "!foreach requires typed variable\n");
+
+ if ((MHSd && isa<DagRecTy>(Type)) || (MHSl && isa<ListRecTy>(Type))) {
+ if (MHSd) {
+ Init *Val = MHSd->getOperator();
+ Init *Result = EvaluateOperation(RHSo, LHS, Val,
+ Type, CurRec, CurMultiClass);
+ if (Result != 0) {
+ Val = Result;
+ }
+
+ std::vector<std::pair<Init *, std::string> > args;
+ for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
+ Init *Arg;
+ std::string ArgName;
+ Arg = MHSd->getArg(i);
+ ArgName = MHSd->getArgName(i);
+
+ // Process args
+ Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ Arg = Result;
+ }
+
+ // TODO: Process arg names
+ args.push_back(std::make_pair(Arg, ArgName));
+ }
+
+ return DagInit::get(Val, "", args);
+ }
+ if (MHSl) {
+ std::vector<Init *> NewOperands;
+ std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
+
+ for (std::vector<Init *>::iterator li = NewList.begin(),
+ liend = NewList.end();
+ li != liend;
+ ++li) {
+ Init *Item = *li;
+ NewOperands.clear();
+ for(int i = 0; i < RHSo->getNumOperands(); ++i) {
+ // First, replace the foreach variable with the list item
+ if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+ NewOperands.push_back(Item);
+ } else {
+ NewOperands.push_back(RHSo->getOperand(i));
+ }
+ }
+
+ // Now run the operator and use its result as the new list item
+ const OpInit *NewOp = RHSo->clone(NewOperands);
+ Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
+ if (NewItem != NewOp)
+ *li = NewItem;
+ }
+ return ListInit::get(NewList, MHSl->getType());
+ }
+ }
+ return 0;
+}
+
+Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ case SUBST: {
+ DefInit *LHSd = dyn_cast<DefInit>(LHS);
+ VarInit *LHSv = dyn_cast<VarInit>(LHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+
+ DefInit *MHSd = dyn_cast<DefInit>(MHS);
+ VarInit *MHSv = dyn_cast<VarInit>(MHS);
+ StringInit *MHSs = dyn_cast<StringInit>(MHS);
+
+ DefInit *RHSd = dyn_cast<DefInit>(RHS);
+ VarInit *RHSv = dyn_cast<VarInit>(RHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
+
+ if ((LHSd && MHSd && RHSd)
+ || (LHSv && MHSv && RHSv)
+ || (LHSs && MHSs && RHSs)) {
+ if (RHSd) {
+ Record *Val = RHSd->getDef();
+ if (LHSd->getAsString() == RHSd->getAsString()) {
+ Val = MHSd->getDef();
+ }
+ return DefInit::get(Val);
+ }
+ if (RHSv) {
+ std::string Val = RHSv->getName();
+ if (LHSv->getAsString() == RHSv->getAsString()) {
+ Val = MHSv->getName();
+ }
+ return VarInit::get(Val, getType());
+ }
+ if (RHSs) {
+ std::string Val = RHSs->getValue();
+
+ std::string::size_type found;
+ std::string::size_type idx = 0;
+ do {
+ found = Val.find(LHSs->getValue(), idx);
+ if (found != std::string::npos) {
+ Val.replace(found, LHSs->getValue().size(), MHSs->getValue());
+ }
+ idx = found + MHSs->getValue().size();
+ } while (found != std::string::npos);
+
+ return StringInit::get(Val);
+ }
+ }
+ break;
+ }
+
+ case FOREACH: {
+ Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ return Result;
+ }
+ break;
+ }
+
+ case IF: {
+ IntInit *LHSi = dyn_cast<IntInit>(LHS);
+ if (Init *I = LHS->convertInitializerTo(IntRecTy::get()))
+ LHSi = dyn_cast<IntInit>(I);
+ if (LHSi) {
+ if (LHSi->getValue()) {
+ return MHS;
+ } else {
+ return RHS;
+ }
+ }
+ break;
+ }
+ }
+
+ return const_cast<TernOpInit *>(this);
+}
+
+Init *TernOpInit::resolveReferences(Record &R,
+ const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+
+ if (Opc == IF && lhs != LHS) {
+ IntInit *Value = dyn_cast<IntInit>(lhs);
+ if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
+ Value = dyn_cast<IntInit>(I);
+ if (Value != 0) {
+ // Short-circuit
+ if (Value->getValue()) {
+ Init *mhs = MHS->resolveReferences(R, RV);
+ return (TernOpInit::get(getOpcode(), lhs, mhs,
+ RHS, getType()))->Fold(&R, 0);
+ } else {
+ Init *rhs = RHS->resolveReferences(R, RV);
+ return (TernOpInit::get(getOpcode(), lhs, MHS,
+ rhs, getType()))->Fold(&R, 0);
+ }
+ }
+ }
+
+ Init *mhs = MHS->resolveReferences(R, RV);
+ Init *rhs = RHS->resolveReferences(R, RV);
+
+ if (LHS != lhs || MHS != mhs || RHS != rhs)
+ return (TernOpInit::get(getOpcode(), lhs, mhs, rhs,
+ getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string TernOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case SUBST: Result = "!subst"; break;
+ case FOREACH: Result = "!foreach"; break;
+ case IF: Result = "!if"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", "
+ + RHS->getAsString() + ")";
+}
+
+RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
+ if (RecordRecTy *RecordType = dyn_cast<RecordRecTy>(getType()))
+ if (RecordVal *Field = RecordType->getRecord()->getValue(FieldName))
+ return Field->getType();
+ return 0;
+}
+
+Init *
+TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ BitsRecTy *T = dyn_cast<BitsRecTy>(getType());
+ if (T == 0) return 0; // Cannot subscript a non-bits variable.
+ unsigned NumBits = T->getNumBits();
+
+ SmallVector<Init *, 16> NewBits(Bits.size());
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= NumBits)
+ return 0;
+
+ NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), Bits[i]);
+ }
+ return BitsInit::get(NewBits);
+}
+
+Init *
+TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
+ ListRecTy *T = dyn_cast<ListRecTy>(getType());
+ if (T == 0) return 0; // Cannot subscript a non-list variable.
+
+ if (Elements.size() == 1)
+ return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]);
+
+ std::vector<Init*> ListInits;
+ ListInits.reserve(Elements.size());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+ ListInits.push_back(VarListElementInit::get(const_cast<TypedInit *>(this),
+ Elements[i]));
+ return ListInit::get(ListInits, T);
+}
+
+
+VarInit *VarInit::get(const std::string &VN, RecTy *T) {
+ Init *Value = StringInit::get(VN);
+ return VarInit::get(Value, T);
+}
+
+VarInit *VarInit::get(Init *VN, RecTy *T) {
+ typedef std::pair<RecTy *, Init *> Key;
+ typedef DenseMap<Key, VarInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, VN));
+
+ VarInit *&I = ThePool[TheKey];
+ if (!I) I = new VarInit(VN, T);
+ return I;
+}
+
+const std::string &VarInit::getName() const {
+ StringInit *NameString = dyn_cast<StringInit>(getNameInit());
+ assert(NameString && "VarInit name is not a string!");
+ return NameString->getValue();
+}
+
+Init *VarInit::getBit(unsigned Bit) const {
+ if (getType() == BitRecTy::get())
+ return const_cast<VarInit*>(this);
+ return VarBitInit::get(const_cast<VarInit*>(this), Bit);
+}
+
+Init *VarInit::resolveListElementReference(Record &R,
+ const RecordVal *IRV,
+ unsigned Elt) const {
+ if (R.isTemplateArg(getNameInit())) return 0;
+ if (IRV && IRV->getNameInit() != getNameInit()) return 0;
+
+ RecordVal *RV = R.getValue(getNameInit());
+ assert(RV && "Reference to a non-existent variable?");
+ ListInit *LI = dyn_cast<ListInit>(RV->getValue());
+ if (!LI) {
+ TypedInit *VI = dyn_cast<TypedInit>(RV->getValue());
+ assert(VI && "Invalid list element!");
+ return VarListElementInit::get(VI, Elt);
+ }
+
+ if (Elt >= LI->getSize())
+ return 0; // Out of range reference.
+ Init *E = LI->getElement(Elt);
+ // If the element is set to some value, or if we are resolving a reference
+ // to a specific variable and that variable is explicitly unset, then
+ // replace the VarListElementInit with it.
+ if (IRV || !isa<UnsetInit>(E))
+ return E;
+ return 0;
+}
+
+
+RecTy *VarInit::getFieldType(const std::string &FieldName) const {
+ if (RecordRecTy *RTy = dyn_cast<RecordRecTy>(getType()))
+ if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName))
+ return RV->getType();
+ return 0;
+}
+
+Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
+ const std::string &FieldName) const {
+ if (isa<RecordRecTy>(getType()))
+ if (const RecordVal *Val = R.getValue(VarName)) {
+ if (RV != Val && (RV || isa<UnsetInit>(Val->getValue())))
+ return 0;
+ Init *TheInit = Val->getValue();
+ assert(TheInit != this && "Infinite loop detected!");
+ if (Init *I = TheInit->getFieldInit(R, RV, FieldName))
+ return I;
+ else
+ return 0;
+ }
+ return 0;
+}
+
+/// resolveReferences - This method is used by classes that refer to other
+/// variables which may not be defined at the time the expression is formed.
+/// If a value is set for the variable later, this method will be called on
+/// users of the value to allow the value to propagate out.
+///
+Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ if (RecordVal *Val = R.getValue(VarName))
+ if (RV == Val || (RV == 0 && !isa<UnsetInit>(Val->getValue())))
+ return Val->getValue();
+ return const_cast<VarInit *>(this);
+}
+
+VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
+ typedef std::pair<TypedInit *, unsigned> Key;
+ typedef DenseMap<Key, VarBitInit *> Pool;
+
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, B));
+
+ VarBitInit *&I = ThePool[TheKey];
+ if (!I) I = new VarBitInit(T, B);
+ return I;
+}
+
+std::string VarBitInit::getAsString() const {
+ return TI->getAsString() + "{" + utostr(Bit) + "}";
+}
+
+Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *I = TI->resolveReferences(R, RV);
+ if (TI != I)
+ return I->getBit(getBitNum());
+
+ return const_cast<VarBitInit*>(this);
+}
+
+VarListElementInit *VarListElementInit::get(TypedInit *T,
+ unsigned E) {
+ typedef std::pair<TypedInit *, unsigned> Key;
+ typedef DenseMap<Key, VarListElementInit *> Pool;
+
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, E));
+
+ VarListElementInit *&I = ThePool[TheKey];
+ if (!I) I = new VarListElementInit(T, E);
+ return I;
+}
+
+std::string VarListElementInit::getAsString() const {
+ return TI->getAsString() + "[" + utostr(Element) + "]";
+}
+
+Init *
+VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ if (Init *I = getVariable()->resolveListElementReference(R, RV,
+ getElementNum()))
+ return I;
+ return const_cast<VarListElementInit *>(this);
+}
+
+Init *VarListElementInit::getBit(unsigned Bit) const {
+ if (getType() == BitRecTy::get())
+ return const_cast<VarListElementInit*>(this);
+ return VarBitInit::get(const_cast<VarListElementInit*>(this), Bit);
+}
+
+Init *VarListElementInit:: resolveListElementReference(Record &R,
+ const RecordVal *RV,
+ unsigned Elt) const {
+ Init *Result = TI->resolveListElementReference(R, RV, Element);
+
+ if (Result) {
+ if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) {
+ Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
+ if (Result2) return Result2;
+ return new VarListElementInit(TInit, Elt);
+ }
+ return Result;
+ }
+
+ return 0;
+}
+
+DefInit *DefInit::get(Record *R) {
+ return R->getDefInit();
+}
+
+RecTy *DefInit::getFieldType(const std::string &FieldName) const {
+ if (const RecordVal *RV = Def->getValue(FieldName))
+ return RV->getType();
+ return 0;
+}
+
+Init *DefInit::getFieldInit(Record &R, const RecordVal *RV,
+ const std::string &FieldName) const {
+ return Def->getValue(FieldName)->getValue();
+}
+
+
+std::string DefInit::getAsString() const {
+ return Def->getName();
+}
+
+FieldInit *FieldInit::get(Init *R, const std::string &FN) {
+ typedef std::pair<Init *, TableGenStringKey> Key;
+ typedef DenseMap<Key, FieldInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(R, FN));
+
+ FieldInit *&I = ThePool[TheKey];
+ if (!I) I = new FieldInit(R, FN);
+ return I;
+}
+
+Init *FieldInit::getBit(unsigned Bit) const {
+ if (getType() == BitRecTy::get())
+ return const_cast<FieldInit*>(this);
+ return VarBitInit::get(const_cast<FieldInit*>(this), Bit);
+}
+
+Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
+ unsigned Elt) const {
+ if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
+ if (ListInit *LI = dyn_cast<ListInit>(ListVal)) {
+ if (Elt >= LI->getSize()) return 0;
+ Init *E = LI->getElement(Elt);
+
+ // If the element is set to some value, or if we are resolving a
+ // reference to a specific variable and that variable is explicitly
+ // unset, then replace the VarListElementInit with it.
+ if (RV || !isa<UnsetInit>(E))
+ return E;
+ }
+ return 0;
+}
+
+Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *NewRec = RV ? Rec->resolveReferences(R, RV) : Rec;
+
+ Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName);
+ if (BitsVal) {
+ Init *BVR = BitsVal->resolveReferences(R, RV);
+ return BVR->isComplete() ? BVR : const_cast<FieldInit *>(this);
+ }
+
+ if (NewRec != Rec) {
+ return FieldInit::get(NewRec, FieldName);
+ }
+ return const_cast<FieldInit *>(this);
+}
+
+static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, const std::string &VN,
+ ArrayRef<Init *> ArgRange,
+ ArrayRef<std::string> NameRange) {
+ ID.AddPointer(V);
+ ID.AddString(VN);
+
+ ArrayRef<Init *>::iterator Arg = ArgRange.begin();
+ ArrayRef<std::string>::iterator Name = NameRange.begin();
+ while (Arg != ArgRange.end()) {
+ assert(Name != NameRange.end() && "Arg name underflow!");
+ ID.AddPointer(*Arg++);
+ ID.AddString(*Name++);
+ }
+ assert(Name == NameRange.end() && "Arg name overflow!");
+}
+
+DagInit *
+DagInit::get(Init *V, const std::string &VN,
+ ArrayRef<Init *> ArgRange,
+ ArrayRef<std::string> NameRange) {
+ typedef FoldingSet<DagInit> Pool;
+ static Pool ThePool;
+
+ FoldingSetNodeID ID;
+ ProfileDagInit(ID, V, VN, ArgRange, NameRange);
+
+ void *IP = 0;
+ if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ DagInit *I = new DagInit(V, VN, ArgRange, NameRange);
+ ThePool.InsertNode(I, IP);
+
+ return I;
+}
+
+DagInit *
+DagInit::get(Init *V, const std::string &VN,
+ const std::vector<std::pair<Init*, std::string> > &args) {
+ typedef std::pair<Init*, std::string> PairType;
+
+ std::vector<Init *> Args;
+ std::vector<std::string> Names;
+
+ for (std::vector<PairType>::const_iterator i = args.begin(),
+ iend = args.end();
+ i != iend;
+ ++i) {
+ Args.push_back(i->first);
+ Names.push_back(i->second);
+ }
+
+ return DagInit::get(V, VN, Args, Names);
+}
+
+void DagInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileDagInit(ID, Val, ValName, Args, ArgNames);
+}
+
+Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ std::vector<Init*> NewArgs;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ NewArgs.push_back(Args[i]->resolveReferences(R, RV));
+
+ Init *Op = Val->resolveReferences(R, RV);
+
+ if (Args != NewArgs || Op != Val)
+ return DagInit::get(Op, ValName, NewArgs, ArgNames);
+
+ return const_cast<DagInit *>(this);
+}
+
+
+std::string DagInit::getAsString() const {
+ std::string Result = "(" + Val->getAsString();
+ if (!ValName.empty())
+ Result += ":" + ValName;
+ if (Args.size()) {
+ Result += " " + Args[0]->getAsString();
+ if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0];
+ for (unsigned i = 1, e = Args.size(); i != e; ++i) {
+ Result += ", " + Args[i]->getAsString();
+ if (!ArgNames[i].empty()) Result += ":$" + ArgNames[i];
+ }
+ }
+ return Result + ")";
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other implementations
+//===----------------------------------------------------------------------===//
+
+RecordVal::RecordVal(Init *N, RecTy *T, unsigned P)
+ : Name(N), Ty(T), Prefix(P) {
+ Value = Ty->convertValue(UnsetInit::get());
+ assert(Value && "Cannot create unset value for current type!");
+}
+
+RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
+ : Name(StringInit::get(N)), Ty(T), Prefix(P) {
+ Value = Ty->convertValue(UnsetInit::get());
+ assert(Value && "Cannot create unset value for current type!");
+}
+
+const std::string &RecordVal::getName() const {
+ StringInit *NameString = dyn_cast<StringInit>(Name);
+ assert(NameString && "RecordVal name is not a string!");
+ return NameString->getValue();
+}
+
+void RecordVal::dump() const { errs() << *this; }
+
+void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
+ if (getPrefix()) OS << "field ";
+ OS << *getType() << " " << getNameInitAsString();
+
+ if (getValue())
+ OS << " = " << *getValue();
+
+ if (PrintSem) OS << ";\n";
+}
+
+unsigned Record::LastID = 0;
+
+void Record::init() {
+ checkName();
+
+ // Every record potentially has a def at the top. This value is
+ // replaced with the top-level def name at instantiation time.
+ RecordVal DN("NAME", StringRecTy::get(), 0);
+ addValue(DN);
+}
+
+void Record::checkName() {
+ // Ensure the record name has string type.
+ const TypedInit *TypedName = dyn_cast<const TypedInit>(Name);
+ assert(TypedName && "Record name is not typed!");
+ RecTy *Type = TypedName->getType();
+ if (!isa<StringRecTy>(Type))
+ PrintFatalError(getLoc(), "Record name is not a string!");
+}
+
+DefInit *Record::getDefInit() {
+ if (!TheInit)
+ TheInit = new DefInit(this, new RecordRecTy(this));
+ return TheInit;
+}
+
+const std::string &Record::getName() const {
+ const StringInit *NameString = dyn_cast<StringInit>(Name);
+ assert(NameString && "Record name is not a string!");
+ return NameString->getValue();
+}
+
+void Record::setName(Init *NewName) {
+ if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) {
+ TrackedRecords.removeDef(Name->getAsUnquotedString());
+ TrackedRecords.addDef(this);
+ } else if (TrackedRecords.getClass(Name->getAsUnquotedString()) == this) {
+ TrackedRecords.removeClass(Name->getAsUnquotedString());
+ TrackedRecords.addClass(this);
+ } // Otherwise this isn't yet registered.
+ Name = NewName;
+ checkName();
+ // DO NOT resolve record values to the name at this point because
+ // there might be default values for arguments of this def. Those
+ // arguments might not have been resolved yet so we don't want to
+ // prematurely assume values for those arguments were not passed to
+ // this def.
+ //
+ // Nonetheless, it may be that some of this Record's values
+ // reference the record name. Indeed, the reason for having the
+ // record name be an Init is to provide this flexibility. The extra
+ // resolve steps after completely instantiating defs takes care of
+ // this. See TGParser::ParseDef and TGParser::ParseDefm.
+}
+
+void Record::setName(const std::string &Name) {
+ setName(StringInit::get(Name));
+}
+
+/// resolveReferencesTo - If anything in this record refers to RV, replace the
+/// reference to RV with the RHS of RV. If RV is null, we resolve all possible
+/// references.
+void Record::resolveReferencesTo(const RecordVal *RV) {
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (RV == &Values[i]) // Skip resolve the same field as the given one
+ continue;
+ if (Init *V = Values[i].getValue())
+ if (Values[i].setValue(V->resolveReferences(*this, RV)))
+ PrintFatalError(getLoc(), "Invalid value is found when setting '"
+ + Values[i].getNameInitAsString()
+ + "' after resolving references"
+ + (RV ? " against '" + RV->getNameInitAsString()
+ + "' of ("
+ + RV->getValue()->getAsUnquotedString() + ")"
+ : "")
+ + "\n");
+ }
+ Init *OldName = getNameInit();
+ Init *NewName = Name->resolveReferences(*this, RV);
+ if (NewName != OldName) {
+ // Re-register with RecordKeeper.
+ setName(NewName);
+ }
+}
+
+void Record::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
+ OS << R.getNameInitAsString();
+
+ const std::vector<Init *> &TArgs = R.getTemplateArgs();
+ if (!TArgs.empty()) {
+ OS << "<";
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ const RecordVal *RV = R.getValue(TArgs[i]);
+ assert(RV && "Template argument record not found??");
+ RV->print(OS, false);
+ }
+ OS << ">";
+ }
+
+ OS << " {";
+ const std::vector<Record*> &SC = R.getSuperClasses();
+ if (!SC.empty()) {
+ OS << "\t//";
+ for (unsigned i = 0, e = SC.size(); i != e; ++i)
+ OS << " " << SC[i]->getNameInitAsString();
+ }
+ OS << "\n";
+
+ const std::vector<RecordVal> &Vals = R.getValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+ OS << Vals[i];
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+ OS << Vals[i];
+
+ return OS << "}\n";
+}
+
+/// getValueInit - Return the initializer for a value with the specified name,
+/// or abort if the field does not exist.
+///
+Init *Record::getValueInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+ return R->getValue();
+}
+
+
+/// getValueAsString - This method looks up the specified field and returns its
+/// value as a string, aborts if the field does not exist or if
+/// the value is not a string.
+///
+std::string Record::getValueAsString(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (StringInit *SI = dyn_cast<StringInit>(R->getValue()))
+ return SI->getValue();
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a string initializer!");
+}
+
+/// getValueAsBitsInit - This method looks up the specified field and returns
+/// its value as a BitsInit, aborts if the field does not exist or if
+/// the value is not the right type.
+///
+BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
+ return BI;
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a BitsInit initializer!");
+}
+
+/// getValueAsListInit - This method looks up the specified field and returns
+/// its value as a ListInit, aborting if the field does not exist or if
+/// the value is not the right type.
+///
+ListInit *Record::getValueAsListInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
+ return LI;
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a list initializer!");
+}
+
+/// getValueAsListOfDefs - This method looks up the specified field and returns
+/// its value as a vector of records, aborting if the field does not exist
+/// or if the value is not the right type.
+///
+std::vector<Record*>
+Record::getValueAsListOfDefs(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<Record*> Defs;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) {
+ Defs.push_back(DI->getDef());
+ } else {
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' list is not entirely DefInit!");
+ }
+ }
+ return Defs;
+}
+
+/// getValueAsInt - This method looks up the specified field and returns its
+/// value as an int64_t, aborting if the field does not exist or if the value
+/// is not the right type.
+///
+int64_t Record::getValueAsInt(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (IntInit *II = dyn_cast<IntInit>(R->getValue()))
+ return II->getValue();
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have an int initializer!");
+}
+
+/// getValueAsListOfInts - This method looks up the specified field and returns
+/// its value as a vector of integers, aborting if the field does not exist or
+/// if the value is not the right type.
+///
+std::vector<int64_t>
+Record::getValueAsListOfInts(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<int64_t> Ints;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) {
+ Ints.push_back(II->getValue());
+ } else {
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a list of ints initializer!");
+ }
+ }
+ return Ints;
+}
+
+/// getValueAsListOfStrings - This method looks up the specified field and
+/// returns its value as a vector of strings, aborting if the field does not
+/// exist or if the value is not the right type.
+///
+std::vector<std::string>
+Record::getValueAsListOfStrings(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<std::string> Strings;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) {
+ Strings.push_back(II->getValue());
+ } else {
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a list of strings initializer!");
+ }
+ }
+ return Strings;
+}
+
+/// getValueAsDef - This method looks up the specified field and returns its
+/// value as a Record, aborting if the field does not exist or if the value
+/// is not the right type.
+///
+Record *Record::getValueAsDef(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (DefInit *DI = dyn_cast<DefInit>(R->getValue()))
+ return DI->getDef();
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a def initializer!");
+}
+
+/// getValueAsBit - This method looks up the specified field and returns its
+/// value as a bit, aborting if the field does not exist or if the value is
+/// not the right type.
+///
+bool Record::getValueAsBit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
+ return BI->getValue();
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a bit initializer!");
+}
+
+bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (R->getValue() == UnsetInit::get()) {
+ Unset = true;
+ return false;
+ }
+ Unset = false;
+ if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
+ return BI->getValue();
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a bit initializer!");
+}
+
+/// getValueAsDag - This method looks up the specified field and returns its
+/// value as an Dag, aborting if the field does not exist or if the value is
+/// not the right type.
+///
+DagInit *Record::getValueAsDag(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName.str() + "'!\n");
+
+ if (DagInit *DI = dyn_cast<DagInit>(R->getValue()))
+ return DI;
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName.str() + "' does not have a dag initializer!");
+}
+
+
+void MultiClass::dump() const {
+ errs() << "Record:\n";
+ Rec.dump();
+
+ errs() << "Defs:\n";
+ for (RecordVector::const_iterator r = DefPrototypes.begin(),
+ rend = DefPrototypes.end();
+ r != rend;
+ ++r) {
+ (*r)->dump();
+ }
+}
+
+
+void RecordKeeper::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
+ OS << "------------- Classes -----------------\n";
+ const std::map<std::string, Record*> &Classes = RK.getClasses();
+ for (std::map<std::string, Record*>::const_iterator I = Classes.begin(),
+ E = Classes.end(); I != E; ++I)
+ OS << "class " << *I->second;
+
+ OS << "------------- Defs -----------------\n";
+ const std::map<std::string, Record*> &Defs = RK.getDefs();
+ for (std::map<std::string, Record*>::const_iterator I = Defs.begin(),
+ E = Defs.end(); I != E; ++I)
+ OS << "def " << *I->second;
+ return OS;
+}
+
+
+/// getAllDerivedDefinitions - This method returns all concrete definitions
+/// that derive from the specified class name. If a class with the specified
+/// name does not exist, an error is printed and true is returned.
+std::vector<Record*>
+RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
+ Record *Class = getClass(ClassName);
+ if (!Class)
+ PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n");
+
+ std::vector<Record*> Defs;
+ for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(),
+ E = getDefs().end(); I != E; ++I)
+ if (I->second->isSubClassOf(Class))
+ Defs.push_back(I->second);
+
+ return Defs;
+}
+
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+ Init *Name, const std::string &Scoper) {
+ RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
+
+ BinOpInit *NewName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurRec.getNameInit(),
+ StringInit::get(Scoper),
+ Type)->Fold(&CurRec, CurMultiClass),
+ Name,
+ Type);
+
+ if (CurMultiClass && Scoper != "::") {
+ NewName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurMultiClass->Rec.getNameInit(),
+ StringInit::get("::"),
+ Type)->Fold(&CurRec, CurMultiClass),
+ NewName->Fold(&CurRec, CurMultiClass),
+ Type);
+ }
+
+ return NewName->Fold(&CurRec, CurMultiClass);
+}
+
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+ const std::string &Name,
+ const std::string &Scoper) {
+ return QualifyName(CurRec, CurMultiClass, StringInit::get(Name), Scoper);
+}
diff --git a/contrib/llvm/lib/TableGen/StringMatcher.cpp b/contrib/llvm/lib/TableGen/StringMatcher.cpp
new file mode 100644
index 000000000000..16681702d1d6
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/StringMatcher.cpp
@@ -0,0 +1,149 @@
+//===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMatcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+/// FindFirstNonCommonLetter - Find the first character in the keys of the
+/// string pairs that is not shared across the whole set of strings. All
+/// strings are assumed to have the same length.
+static unsigned
+FindFirstNonCommonLetter(const std::vector<const
+ StringMatcher::StringPair*> &Matches) {
+ assert(!Matches.empty());
+ for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
+ // Check to see if letter i is the same across the set.
+ char Letter = Matches[0]->first[i];
+
+ for (unsigned str = 0, e = Matches.size(); str != e; ++str)
+ if (Matches[str]->first[i] != Letter)
+ return i;
+ }
+
+ return Matches[0]->first.size();
+}
+
+/// EmitStringMatcherForChar - Given a set of strings that are known to be the
+/// same length and whose characters leading up to CharNo are the same, emit
+/// code to verify that CharNo and later are the same.
+///
+/// \return - True if control can leave the emitted code fragment.
+bool StringMatcher::
+EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
+ unsigned CharNo, unsigned IndentCount) const {
+ assert(!Matches.empty() && "Must have at least one string to match!");
+ std::string Indent(IndentCount*2+4, ' ');
+
+ // If we have verified that the entire string matches, we're done: output the
+ // matching code.
+ if (CharNo == Matches[0]->first.size()) {
+ assert(Matches.size() == 1 && "Had duplicate keys to match on");
+
+ // If the to-execute code has \n's in it, indent each subsequent line.
+ StringRef Code = Matches[0]->second;
+
+ std::pair<StringRef, StringRef> Split = Code.split('\n');
+ OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
+
+ Code = Split.second;
+ while (!Code.empty()) {
+ Split = Code.split('\n');
+ OS << Indent << Split.first << "\n";
+ Code = Split.second;
+ }
+ return false;
+ }
+
+ // Bucket the matches by the character we are comparing.
+ std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+
+ for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+ MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
+
+
+ // If we have exactly one bucket to match, see how many characters are common
+ // across the whole set and match all of them at once.
+ if (MatchesByLetter.size() == 1) {
+ unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
+ unsigned NumChars = FirstNonCommonLetter-CharNo;
+
+ // Emit code to break out if the prefix doesn't match.
+ if (NumChars == 1) {
+ // Do the comparison with if (Str[1] != 'f')
+ // FIXME: Need to escape general characters.
+ OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
+ << Matches[0]->first[CharNo] << "')\n";
+ OS << Indent << " break;\n";
+ } else {
+ // Do the comparison with if memcmp(Str.data()+1, "foo", 3).
+ // FIXME: Need to escape general strings.
+ OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
+ << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
+ << NumChars << "))\n";
+ OS << Indent << " break;\n";
+ }
+
+ return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount);
+ }
+
+ // Otherwise, we have multiple possible things, emit a switch on the
+ // character.
+ OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
+ OS << Indent << "default: break;\n";
+
+ for (std::map<char, std::vector<const StringPair*> >::iterator LI =
+ MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
+ // TODO: escape hard stuff (like \n) if we ever care about it.
+ OS << Indent << "case '" << LI->first << "':\t // "
+ << LI->second.size() << " string";
+ if (LI->second.size() != 1) OS << 's';
+ OS << " to match.\n";
+ if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1))
+ OS << Indent << " break;\n";
+ }
+
+ OS << Indent << "}\n";
+ return true;
+}
+
+
+/// Emit - Top level entry point.
+///
+void StringMatcher::Emit(unsigned Indent) const {
+ // If nothing to match, just fall through.
+ if (Matches.empty()) return;
+
+ // First level categorization: group strings by length.
+ std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+
+ for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+ MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
+
+ // Output a switch statement on length and categorize the elements within each
+ // bin.
+ OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
+ OS.indent(Indent*2+2) << "default: break;\n";
+
+ for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+ MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
+ OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
+ << LI->second.size()
+ << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n";
+ if (EmitStringMatcherForChar(LI->second, 0, Indent))
+ OS.indent(Indent*2+4) << "break;\n";
+ }
+
+ OS.indent(Indent*2+2) << "}\n";
+}
diff --git a/contrib/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm/lib/TableGen/TGLexer.cpp
new file mode 100644
index 000000000000..c6be4f8a1189
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/TGLexer.cpp
@@ -0,0 +1,486 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGLexer.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+ CurBuffer = 0;
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ TokStart = 0;
+}
+
+SMLoc TGLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return tgtok::Error.
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+ PrintError(Loc, Msg);
+ return tgtok::Error;
+}
+
+int TGLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0: {
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = ParentIncludeLoc.getPointer();
+ return getNextChar();
+ }
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+ case '\n':
+ case '\r':
+ // Handle the newline character by ignoring it and incrementing the line
+ // count. However, be careful about 'dos style' files with \n\r in them.
+ // Only treat a \n\r or \r\n as a single line.
+ if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+ *CurPtr != CurChar)
+ ++CurPtr; // Eat the two char newline sequence.
+ return '\n';
+ }
+}
+
+int TGLexer::peekNextChar(int Index) {
+ return *(CurPtr + Index);
+}
+
+tgtok::TokKind TGLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "Unexpected character");
+ case EOF: return tgtok::Eof;
+ case ':': return tgtok::colon;
+ case ';': return tgtok::semi;
+ case '.': return tgtok::period;
+ case ',': return tgtok::comma;
+ case '<': return tgtok::less;
+ case '>': return tgtok::greater;
+ case ']': return tgtok::r_square;
+ case '{': return tgtok::l_brace;
+ case '}': return tgtok::r_brace;
+ case '(': return tgtok::l_paren;
+ case ')': return tgtok::r_paren;
+ case '=': return tgtok::equal;
+ case '?': return tgtok::question;
+ case '#': return tgtok::paste;
+
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '/':
+ // If this is the start of a // comment, skip until the end of the line or
+ // the end of the buffer.
+ if (*CurPtr == '/')
+ SkipBCPLComment();
+ else if (*CurPtr == '*') {
+ if (SkipCComment())
+ return tgtok::Error;
+ } else // Otherwise, this is an error.
+ return ReturnError(TokStart, "Unexpected character");
+ return LexToken();
+ case '-': case '+':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9': {
+ int NextChar = 0;
+ if (isdigit(CurChar)) {
+ // Allow identifiers to start with a number if it is followed by
+ // an identifier. This can happen with paste operations like
+ // foo#8i.
+ int i = 0;
+ do {
+ NextChar = peekNextChar(i++);
+ } while (isdigit(NextChar));
+
+ if (NextChar == 'x' || NextChar == 'b') {
+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+ // likely a number.
+ int NextNextChar = peekNextChar(i);
+ switch (NextNextChar) {
+ default:
+ break;
+ case '0': case '1':
+ if (NextChar == 'b')
+ return LexNumber();
+ // Fallthrough
+ case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (NextChar == 'x')
+ return LexNumber();
+ break;
+ }
+ }
+ }
+
+ if (isalpha(NextChar) || NextChar == '_')
+ return LexIdentifier();
+
+ return LexNumber();
+ }
+ case '"': return LexString();
+ case '$': return LexVarName();
+ case '[': return LexBracket();
+ case '!': return LexExclaim();
+ }
+}
+
+/// LexString - Lex "[^"]*"
+tgtok::TokKind TGLexer::LexString() {
+ const char *StrStart = CurPtr;
+
+ CurStrVal = "";
+
+ while (*CurPtr != '"') {
+ // If we hit the end of the buffer, report an error.
+ if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+ return ReturnError(StrStart, "End of file in string literal");
+
+ if (*CurPtr == '\n' || *CurPtr == '\r')
+ return ReturnError(StrStart, "End of line in string literal");
+
+ if (*CurPtr != '\\') {
+ CurStrVal += *CurPtr++;
+ continue;
+ }
+
+ ++CurPtr;
+
+ switch (*CurPtr) {
+ case '\\': case '\'': case '"':
+ // These turn into their literal character.
+ CurStrVal += *CurPtr++;
+ break;
+ case 't':
+ CurStrVal += '\t';
+ ++CurPtr;
+ break;
+ case 'n':
+ CurStrVal += '\n';
+ ++CurPtr;
+ break;
+
+ case '\n':
+ case '\r':
+ return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
+
+ // If we hit the end of the buffer, report an error.
+ case '\0':
+ if (CurPtr == CurBuf->getBufferEnd())
+ return ReturnError(StrStart, "End of file in string literal");
+ // FALL THROUGH
+ default:
+ return ReturnError(CurPtr, "invalid escape in string literal");
+ }
+ }
+
+ ++CurPtr;
+ return tgtok::StrVal;
+}
+
+tgtok::TokKind TGLexer::LexVarName() {
+ if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+ return ReturnError(TokStart, "Invalid variable name");
+
+ // Otherwise, we're ok, consume the rest of the characters.
+ const char *VarNameStart = CurPtr++;
+
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ CurStrVal.assign(VarNameStart, CurPtr);
+ return tgtok::VarName;
+}
+
+
+tgtok::TokKind TGLexer::LexIdentifier() {
+ // The first letter is [a-zA-Z_#].
+ const char *IdentStart = TokStart;
+
+ // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ // Check to see if this identifier is a keyword.
+ StringRef Str(IdentStart, CurPtr-IdentStart);
+
+ if (Str == "include") {
+ if (LexInclude()) return tgtok::Error;
+ return Lex();
+ }
+
+ tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
+ .Case("int", tgtok::Int)
+ .Case("bit", tgtok::Bit)
+ .Case("bits", tgtok::Bits)
+ .Case("string", tgtok::String)
+ .Case("list", tgtok::List)
+ .Case("code", tgtok::Code)
+ .Case("dag", tgtok::Dag)
+ .Case("class", tgtok::Class)
+ .Case("def", tgtok::Def)
+ .Case("foreach", tgtok::Foreach)
+ .Case("defm", tgtok::Defm)
+ .Case("multiclass", tgtok::MultiClass)
+ .Case("field", tgtok::Field)
+ .Case("let", tgtok::Let)
+ .Case("in", tgtok::In)
+ .Default(tgtok::Id);
+
+ if (Kind == tgtok::Id)
+ CurStrVal.assign(Str.begin(), Str.end());
+ return Kind;
+}
+
+/// LexInclude - We just read the "include" token. Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+ // The token after the include must be a string.
+ tgtok::TokKind Tok = LexToken();
+ if (Tok == tgtok::Error) return true;
+ if (Tok != tgtok::StrVal) {
+ PrintError(getLoc(), "Expected filename after include");
+ return true;
+ }
+
+ // Get the string.
+ std::string Filename = CurStrVal;
+ std::string IncludedFile;
+
+
+ CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
+ IncludedFile);
+ if (CurBuffer == -1) {
+ PrintError(getLoc(), "Could not find include file '" + Filename + "'");
+ return true;
+ }
+
+ DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
+ if (Found != Dependencies.end()) {
+ PrintError(getLoc(),
+ "File '" + IncludedFile + "' has already been included.");
+ SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
+ "previously included here");
+ return true;
+ }
+ Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
+ // Save the line number and lex buffer of the includer.
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+ ++CurPtr; // skip the second slash.
+ while (1) {
+ switch (*CurPtr) {
+ case '\n':
+ case '\r':
+ return; // Newline is end of comment.
+ case 0:
+ // If this is the end of the buffer, end the comment.
+ if (CurPtr == CurBuf->getBufferEnd())
+ return;
+ break;
+ }
+ // Otherwise, skip the character.
+ ++CurPtr;
+ }
+}
+
+/// SkipCComment - This skips C-style /**/ comments. The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+ ++CurPtr; // skip the star.
+ unsigned CommentDepth = 1;
+
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ PrintError(TokStart, "Unterminated comment!");
+ return true;
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ if (--CommentDepth == 0)
+ return false;
+ break;
+ case '/':
+ // Start of a nested comment?
+ if (CurPtr[0] != '*') break;
+ ++CurPtr;
+ ++CommentDepth;
+ break;
+ }
+ }
+}
+
+/// LexNumber - Lex:
+/// [-+]?[0-9]+
+/// 0x[0-9a-fA-F]+
+/// 0b[01]+
+tgtok::TokKind TGLexer::LexNumber() {
+ if (CurPtr[-1] == '0') {
+ if (CurPtr[0] == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(TokStart, "Hexadecimal number out of range");
+ }
+ return tgtok::IntVal;
+ } else if (CurPtr[0] == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return tgtok::IntVal;
+ }
+ }
+
+ // Check for a sign without a digit.
+ if (!isdigit(CurPtr[0])) {
+ if (CurPtr[-1] == '-')
+ return tgtok::minus;
+ else if (CurPtr[-1] == '+')
+ return tgtok::plus;
+ }
+
+ while (isdigit(CurPtr[0]))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return tgtok::IntVal;
+}
+
+/// LexBracket - We just read '['. If this is a code block, return it,
+/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+tgtok::TokKind TGLexer::LexBracket() {
+ if (CurPtr[0] != '{')
+ return tgtok::l_square;
+ ++CurPtr;
+ const char *CodeStart = CurPtr;
+ while (1) {
+ int Char = getNextChar();
+ if (Char == EOF) break;
+
+ if (Char != '}') continue;
+
+ Char = getNextChar();
+ if (Char == EOF) break;
+ if (Char == ']') {
+ CurStrVal.assign(CodeStart, CurPtr-2);
+ return tgtok::CodeFragment;
+ }
+ }
+
+ return ReturnError(CodeStart-2, "Unterminated Code Block");
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+tgtok::TokKind TGLexer::LexExclaim() {
+ if (!isalpha(*CurPtr))
+ return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
+
+ const char *Start = CurPtr++;
+ while (isalpha(*CurPtr))
+ ++CurPtr;
+
+ // Check to see which operator this is.
+ tgtok::TokKind Kind =
+ StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+ .Case("eq", tgtok::XEq)
+ .Case("if", tgtok::XIf)
+ .Case("head", tgtok::XHead)
+ .Case("tail", tgtok::XTail)
+ .Case("con", tgtok::XConcat)
+ .Case("add", tgtok::XADD)
+ .Case("shl", tgtok::XSHL)
+ .Case("sra", tgtok::XSRA)
+ .Case("srl", tgtok::XSRL)
+ .Case("cast", tgtok::XCast)
+ .Case("empty", tgtok::XEmpty)
+ .Case("subst", tgtok::XSubst)
+ .Case("foreach", tgtok::XForEach)
+ .Case("strconcat", tgtok::XStrConcat)
+ .Default(tgtok::Error);
+
+ return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+}
+
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
new file mode 100644
index 000000000000..d1bd70d2eca4
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -0,0 +1,132 @@
+//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGLEXER_H
+#define TGLEXER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/SMLoc.h"
+#include <cassert>
+#include <map>
+#include <string>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class SMLoc;
+class Twine;
+
+namespace tgtok {
+ enum TokKind {
+ // Markers
+ Eof, Error,
+
+ // Tokens with no info.
+ minus, plus, // - +
+ l_square, r_square, // [ ]
+ l_brace, r_brace, // { }
+ l_paren, r_paren, // ( )
+ less, greater, // < >
+ colon, semi, // : ;
+ comma, period, // , .
+ equal, question, // = ?
+ paste, // #
+
+ // Keywords.
+ Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
+ MultiClass, String,
+
+ // !keywords.
+ XConcat, XADD, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+ XForEach, XHead, XTail, XEmpty, XIf, XEq,
+
+ // Integer value.
+ IntVal,
+
+ // String valued tokens.
+ Id, StrVal, VarName, CodeFragment
+ };
+}
+
+/// TGLexer - TableGen Lexer class.
+class TGLexer {
+ SourceMgr &SrcMgr;
+
+ const char *CurPtr;
+ const MemoryBuffer *CurBuf;
+
+ // Information about the current token.
+ const char *TokStart;
+ tgtok::TokKind CurCode;
+ std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
+ int64_t CurIntVal; // This is valid for INTVAL.
+
+ /// CurBuffer - This is the current buffer index we're lexing from as managed
+ /// by the SourceMgr object.
+ int CurBuffer;
+
+public:
+ typedef std::map<std::string, SMLoc> DependenciesMapTy;
+private:
+ /// Dependencies - This is the list of all included files.
+ DependenciesMapTy Dependencies;
+
+public:
+ TGLexer(SourceMgr &SrcMgr);
+ ~TGLexer() {}
+
+ tgtok::TokKind Lex() {
+ return CurCode = LexToken();
+ }
+
+ const DependenciesMapTy &getDependencies() const {
+ return Dependencies;
+ }
+
+ tgtok::TokKind getCode() const { return CurCode; }
+
+ const std::string &getCurStrVal() const {
+ assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
+ CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
+ "This token doesn't have a string value");
+ return CurStrVal;
+ }
+ int64_t getCurIntVal() const {
+ assert(CurCode == tgtok::IntVal && "This token isn't an integer");
+ return CurIntVal;
+ }
+
+ SMLoc getLoc() const;
+
+private:
+ /// LexToken - Read the next token and return its code.
+ tgtok::TokKind LexToken();
+
+ tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
+
+ int getNextChar();
+ int peekNextChar(int Index);
+ void SkipBCPLComment();
+ bool SkipCComment();
+ tgtok::TokKind LexIdentifier();
+ bool LexInclude();
+ tgtok::TokKind LexString();
+ tgtok::TokKind LexVarName();
+ tgtok::TokKind LexNumber();
+ tgtok::TokKind LexBracket();
+ tgtok::TokKind LexExclaim();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp
new file mode 100644
index 000000000000..86ad2a6e3c09
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@@ -0,0 +1,2591 @@
+//===- TGParser.cpp - Parser for TableGen Files ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Parser for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGParser.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <sstream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Support Code for the Semantic Actions.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+struct SubClassReference {
+ SMRange RefRange;
+ Record *Rec;
+ std::vector<Init*> TemplateArgs;
+ SubClassReference() : Rec(0) {}
+
+ bool isInvalid() const { return Rec == 0; }
+};
+
+struct SubMultiClassReference {
+ SMRange RefRange;
+ MultiClass *MC;
+ std::vector<Init*> TemplateArgs;
+ SubMultiClassReference() : MC(0) {}
+
+ bool isInvalid() const { return MC == 0; }
+ void dump() const;
+};
+
+void SubMultiClassReference::dump() const {
+ errs() << "Multiclass:\n";
+
+ MC->dump();
+
+ errs() << "Template args:\n";
+ for (std::vector<Init *>::const_iterator i = TemplateArgs.begin(),
+ iend = TemplateArgs.end();
+ i != iend;
+ ++i) {
+ (*i)->dump();
+ }
+}
+
+} // end namespace llvm
+
+bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
+ if (CurRec == 0)
+ CurRec = &CurMultiClass->Rec;
+
+ if (RecordVal *ERV = CurRec->getValue(RV.getNameInit())) {
+ // The value already exists in the class, treat this as a set.
+ if (ERV->setValue(RV.getValue()))
+ return Error(Loc, "New definition of '" + RV.getName() + "' of type '" +
+ RV.getType()->getAsString() + "' is incompatible with " +
+ "previous definition of type '" +
+ ERV->getType()->getAsString() + "'");
+ } else {
+ CurRec->addValue(RV);
+ }
+ return false;
+}
+
+/// SetValue -
+/// Return true on error, false on success.
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
+ const std::vector<unsigned> &BitList, Init *V) {
+ if (!V) return false;
+
+ if (CurRec == 0) CurRec = &CurMultiClass->Rec;
+
+ RecordVal *RV = CurRec->getValue(ValName);
+ if (RV == 0)
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ + "' unknown!");
+
+ // Do not allow assignments like 'X = X'. This will just cause infinite loops
+ // in the resolution machinery.
+ if (BitList.empty())
+ if (VarInit *VI = dyn_cast<VarInit>(V))
+ if (VI->getNameInit() == ValName)
+ return false;
+
+ // If we are assigning to a subset of the bits in the value... then we must be
+ // assigning to a field of BitsRecTy, which must have a BitsInit
+ // initializer.
+ //
+ if (!BitList.empty()) {
+ BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue());
+ if (CurVal == 0)
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ + "' is not a bits type");
+
+ // Convert the incoming value to a bits type of the appropriate size...
+ Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
+ if (BI == 0) {
+ return Error(Loc, "Initializer is not compatible with bit range");
+ }
+
+ // We should have a BitsInit type now.
+ BitsInit *BInit = dyn_cast<BitsInit>(BI);
+ assert(BInit != 0);
+
+ SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
+
+ // Loop over bits, assigning values as appropriate.
+ for (unsigned i = 0, e = BitList.size(); i != e; ++i) {
+ unsigned Bit = BitList[i];
+ if (NewBits[Bit])
+ return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
+ ValName->getAsUnquotedString() + "' more than once");
+ NewBits[Bit] = BInit->getBit(i);
+ }
+
+ for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i)
+ if (NewBits[i] == 0)
+ NewBits[i] = CurVal->getBit(i);
+
+ V = BitsInit::get(NewBits);
+ }
+
+ if (RV->setValue(V))
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' of type '"
+ + RV->getType()->getAsString() +
+ "' is incompatible with initializer '" + V->getAsString()
+ + "'");
+ return false;
+}
+
+/// AddSubClass - Add SubClass as a subclass to CurRec, resolving its template
+/// args as SubClass's template arguments.
+bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
+ Record *SC = SubClass.Rec;
+ // Add all of the values in the subclass into the current class.
+ const std::vector<RecordVal> &Vals = SC->getValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (AddValue(CurRec, SubClass.RefRange.Start, Vals[i]))
+ return true;
+
+ const std::vector<Init *> &TArgs = SC->getTemplateArgs();
+
+ // Ensure that an appropriate number of template arguments are specified.
+ if (TArgs.size() < SubClass.TemplateArgs.size())
+ return Error(SubClass.RefRange.Start,
+ "More template args specified than expected");
+
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ if (i < SubClass.TemplateArgs.size()) {
+ // If a value is specified for this template arg, set it now.
+ if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
+ std::vector<unsigned>(), SubClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+ // Now remove it.
+ CurRec->removeValue(TArgs[i]);
+
+ } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+ return Error(SubClass.RefRange.Start,
+ "Value not specified for template argument #"
+ + utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+ + ") of subclass '" + SC->getNameInitAsString() + "'!");
+ }
+ }
+
+ // Since everything went well, we can now set the "superclass" list for the
+ // current record.
+ const std::vector<Record*> &SCs = SC->getSuperClasses();
+ ArrayRef<SMRange> SCRanges = SC->getSuperClassRanges();
+ for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
+ if (CurRec->isSubClassOf(SCs[i]))
+ return Error(SubClass.RefRange.Start,
+ "Already subclass of '" + SCs[i]->getName() + "'!\n");
+ CurRec->addSuperClass(SCs[i], SCRanges[i]);
+ }
+
+ if (CurRec->isSubClassOf(SC))
+ return Error(SubClass.RefRange.Start,
+ "Already subclass of '" + SC->getName() + "'!\n");
+ CurRec->addSuperClass(SC, SubClass.RefRange);
+ return false;
+}
+
+/// AddSubMultiClass - Add SubMultiClass as a subclass to
+/// CurMC, resolving its template args as SubMultiClass's
+/// template arguments.
+bool TGParser::AddSubMultiClass(MultiClass *CurMC,
+ SubMultiClassReference &SubMultiClass) {
+ MultiClass *SMC = SubMultiClass.MC;
+ Record *CurRec = &CurMC->Rec;
+
+ const std::vector<RecordVal> &MCVals = CurRec->getValues();
+
+ // Add all of the values in the subclass into the current class.
+ const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues();
+ for (unsigned i = 0, e = SMCVals.size(); i != e; ++i)
+ if (AddValue(CurRec, SubMultiClass.RefRange.Start, SMCVals[i]))
+ return true;
+
+ int newDefStart = CurMC->DefPrototypes.size();
+
+ // Add all of the defs in the subclass into the current multiclass.
+ for (MultiClass::RecordVector::const_iterator i = SMC->DefPrototypes.begin(),
+ iend = SMC->DefPrototypes.end();
+ i != iend;
+ ++i) {
+ // Clone the def and add it to the current multiclass
+ Record *NewDef = new Record(**i);
+
+ // Add all of the values in the superclass into the current def.
+ for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
+ if (AddValue(NewDef, SubMultiClass.RefRange.Start, MCVals[i]))
+ return true;
+
+ CurMC->DefPrototypes.push_back(NewDef);
+ }
+
+ const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
+
+ // Ensure that an appropriate number of template arguments are
+ // specified.
+ if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size())
+ return Error(SubMultiClass.RefRange.Start,
+ "More template args specified than expected");
+
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = SMCTArgs.size(); i != e; ++i) {
+ if (i < SubMultiClass.TemplateArgs.size()) {
+ // If a value is specified for this template arg, set it in the
+ // superclass now.
+ if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
+ std::vector<unsigned>(),
+ SubMultiClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(SMCTArgs[i]));
+
+ // Now remove it.
+ CurRec->removeValue(SMCTArgs[i]);
+
+ // If a value is specified for this template arg, set it in the
+ // new defs now.
+ for (MultiClass::RecordVector::iterator j =
+ CurMC->DefPrototypes.begin() + newDefStart,
+ jend = CurMC->DefPrototypes.end();
+ j != jend;
+ ++j) {
+ Record *Def = *j;
+
+ if (SetValue(Def, SubMultiClass.RefRange.Start, SMCTArgs[i],
+ std::vector<unsigned>(),
+ SubMultiClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ Def->resolveReferencesTo(Def->getValue(SMCTArgs[i]));
+
+ // Now remove it
+ Def->removeValue(SMCTArgs[i]);
+ }
+ } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
+ return Error(SubMultiClass.RefRange.Start,
+ "Value not specified for template argument #"
+ + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString()
+ + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
+ }
+ }
+
+ return false;
+}
+
+/// ProcessForeachDefs - Given a record, apply all of the variable
+/// values in all surrounding foreach loops, creating new records for
+/// each combination of values.
+bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc) {
+ if (Loops.empty())
+ return false;
+
+ // We want to instantiate a new copy of CurRec for each combination
+ // of nested loop iterator values. We don't want top instantiate
+ // any copies until we have values for each loop iterator.
+ IterSet IterVals;
+ return ProcessForeachDefs(CurRec, Loc, IterVals);
+}
+
+/// ProcessForeachDefs - Given a record, a loop and a loop iterator,
+/// apply each of the variable values in this loop and then process
+/// subloops.
+bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
+ // Recursively build a tuple of iterator values.
+ if (IterVals.size() != Loops.size()) {
+ assert(IterVals.size() < Loops.size());
+ ForeachLoop &CurLoop = Loops[IterVals.size()];
+ ListInit *List = dyn_cast<ListInit>(CurLoop.ListValue);
+ if (List == 0) {
+ Error(Loc, "Loop list is not a list");
+ return true;
+ }
+
+ // Process each value.
+ for (int64_t i = 0; i < List->getSize(); ++i) {
+ Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i);
+ IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal));
+ if (ProcessForeachDefs(CurRec, Loc, IterVals))
+ return true;
+ IterVals.pop_back();
+ }
+ return false;
+ }
+
+ // This is the bottom of the recursion. We have all of the iterator values
+ // for this point in the iteration space. Instantiate a new record to
+ // reflect this combination of values.
+ Record *IterRec = new Record(*CurRec);
+
+ // Set the iterator values now.
+ for (unsigned i = 0, e = IterVals.size(); i != e; ++i) {
+ VarInit *IterVar = IterVals[i].IterVar;
+ TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
+ if (IVal == 0) {
+ Error(Loc, "foreach iterator value is untyped");
+ return true;
+ }
+
+ IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
+
+ if (SetValue(IterRec, Loc, IterVar->getName(),
+ std::vector<unsigned>(), IVal)) {
+ Error(Loc, "when instantiating this def");
+ return true;
+ }
+
+ // Resolve it next.
+ IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName()));
+
+ // Remove it.
+ IterRec->removeValue(IterVar->getName());
+ }
+
+ if (Records.getDef(IterRec->getNameInitAsString())) {
+ Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
+ return true;
+ }
+
+ Records.addDef(IterRec);
+ IterRec->resolveReferences();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Code
+//===----------------------------------------------------------------------===//
+
+/// isObjectStart - Return true if this is a valid first token for an Object.
+static bool isObjectStart(tgtok::TokKind K) {
+ return K == tgtok::Class || K == tgtok::Def ||
+ K == tgtok::Defm || K == tgtok::Let ||
+ K == tgtok::MultiClass || K == tgtok::Foreach;
+}
+
+static std::string GetNewAnonymousName() {
+ static unsigned AnonCounter = 0;
+ unsigned Tmp = AnonCounter++; // MSVC2012 ICEs without this.
+ return "anonymous." + utostr(Tmp);
+}
+
+/// ParseObjectName - If an object name is specified, return it. Otherwise,
+/// return 0.
+/// ObjectName ::= Value [ '#' Value ]*
+/// ObjectName ::= /*empty*/
+///
+Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
+ switch (Lex.getCode()) {
+ case tgtok::colon:
+ case tgtok::semi:
+ case tgtok::l_brace:
+ // These are all of the tokens that can begin an object body.
+ // Some of these can also begin values but we disallow those cases
+ // because they are unlikely to be useful.
+ return 0;
+ default:
+ break;
+ }
+
+ Record *CurRec = 0;
+ if (CurMultiClass)
+ CurRec = &CurMultiClass->Rec;
+
+ RecTy *Type = 0;
+ if (CurRec) {
+ const TypedInit *CurRecName = dyn_cast<TypedInit>(CurRec->getNameInit());
+ if (!CurRecName) {
+ TokError("Record name is not typed!");
+ return 0;
+ }
+ Type = CurRecName->getType();
+ }
+
+ return ParseValue(CurRec, Type, ParseNameMode);
+}
+
+/// ParseClassID - Parse and resolve a reference to a class name. This returns
+/// null on error.
+///
+/// ClassID ::= ID
+///
+Record *TGParser::ParseClassID() {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected name for ClassID");
+ return 0;
+ }
+
+ Record *Result = Records.getClass(Lex.getCurStrVal());
+ if (Result == 0)
+ TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+
+ Lex.Lex();
+ return Result;
+}
+
+/// ParseMultiClassID - Parse and resolve a reference to a multiclass name.
+/// This returns null on error.
+///
+/// MultiClassID ::= ID
+///
+MultiClass *TGParser::ParseMultiClassID() {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected name for MultiClassID");
+ return 0;
+ }
+
+ MultiClass *Result = MultiClasses[Lex.getCurStrVal()];
+ if (Result == 0)
+ TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'");
+
+ Lex.Lex();
+ return Result;
+}
+
+/// ParseSubClassReference - Parse a reference to a subclass or to a templated
+/// subclass. This returns a SubClassRefTy with a null Record* on error.
+///
+/// SubClassRef ::= ClassID
+/// SubClassRef ::= ClassID '<' ValueList '>'
+///
+SubClassReference TGParser::
+ParseSubClassReference(Record *CurRec, bool isDefm) {
+ SubClassReference Result;
+ Result.RefRange.Start = Lex.getLoc();
+
+ if (isDefm) {
+ if (MultiClass *MC = ParseMultiClassID())
+ Result.Rec = &MC->Rec;
+ } else {
+ Result.Rec = ParseClassID();
+ }
+ if (Result.Rec == 0) return Result;
+
+ // If there is no template arg list, we're done.
+ if (Lex.getCode() != tgtok::less) {
+ Result.RefRange.End = Lex.getLoc();
+ return Result;
+ }
+ Lex.Lex(); // Eat the '<'
+
+ if (Lex.getCode() == tgtok::greater) {
+ TokError("subclass reference requires a non-empty list of template values");
+ Result.Rec = 0;
+ return Result;
+ }
+
+ Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
+ if (Result.TemplateArgs.empty()) {
+ Result.Rec = 0; // Error parsing value list.
+ return Result;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' in template value list");
+ Result.Rec = 0;
+ return Result;
+ }
+ Lex.Lex();
+ Result.RefRange.End = Lex.getLoc();
+
+ return Result;
+}
+
+/// ParseSubMultiClassReference - Parse a reference to a subclass or to a
+/// templated submulticlass. This returns a SubMultiClassRefTy with a null
+/// Record* on error.
+///
+/// SubMultiClassRef ::= MultiClassID
+/// SubMultiClassRef ::= MultiClassID '<' ValueList '>'
+///
+SubMultiClassReference TGParser::
+ParseSubMultiClassReference(MultiClass *CurMC) {
+ SubMultiClassReference Result;
+ Result.RefRange.Start = Lex.getLoc();
+
+ Result.MC = ParseMultiClassID();
+ if (Result.MC == 0) return Result;
+
+ // If there is no template arg list, we're done.
+ if (Lex.getCode() != tgtok::less) {
+ Result.RefRange.End = Lex.getLoc();
+ return Result;
+ }
+ Lex.Lex(); // Eat the '<'
+
+ if (Lex.getCode() == tgtok::greater) {
+ TokError("subclass reference requires a non-empty list of template values");
+ Result.MC = 0;
+ return Result;
+ }
+
+ Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
+ if (Result.TemplateArgs.empty()) {
+ Result.MC = 0; // Error parsing value list.
+ return Result;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' in template value list");
+ Result.MC = 0;
+ return Result;
+ }
+ Lex.Lex();
+ Result.RefRange.End = Lex.getLoc();
+
+ return Result;
+}
+
+/// ParseRangePiece - Parse a bit/value range.
+/// RangePiece ::= INTVAL
+/// RangePiece ::= INTVAL '-' INTVAL
+/// RangePiece ::= INTVAL INTVAL
+bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::IntVal) {
+ TokError("expected integer or bitrange");
+ return true;
+ }
+ int64_t Start = Lex.getCurIntVal();
+ int64_t End;
+
+ if (Start < 0)
+ return TokError("invalid range, cannot be negative");
+
+ switch (Lex.Lex()) { // eat first character.
+ default:
+ Ranges.push_back(Start);
+ return false;
+ case tgtok::minus:
+ if (Lex.Lex() != tgtok::IntVal) {
+ TokError("expected integer value as end of range");
+ return true;
+ }
+ End = Lex.getCurIntVal();
+ break;
+ case tgtok::IntVal:
+ End = -Lex.getCurIntVal();
+ break;
+ }
+ if (End < 0)
+ return TokError("invalid range, cannot be negative");
+ Lex.Lex();
+
+ // Add to the range.
+ if (Start < End) {
+ for (; Start <= End; ++Start)
+ Ranges.push_back(Start);
+ } else {
+ for (; Start >= End; --Start)
+ Ranges.push_back(Start);
+ }
+ return false;
+}
+
+/// ParseRangeList - Parse a list of scalars and ranges into scalar values.
+///
+/// RangeList ::= RangePiece (',' RangePiece)*
+///
+std::vector<unsigned> TGParser::ParseRangeList() {
+ std::vector<unsigned> Result;
+
+ // Parse the first piece.
+ if (ParseRangePiece(Result))
+ return std::vector<unsigned>();
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // Eat the comma.
+
+ // Parse the next range piece.
+ if (ParseRangePiece(Result))
+ return std::vector<unsigned>();
+ }
+ return Result;
+}
+
+/// ParseOptionalRangeList - Parse either a range list in <>'s or nothing.
+/// OptionalRangeList ::= '<' RangeList '>'
+/// OptionalRangeList ::= /*empty*/
+bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::less)
+ return false;
+
+ SMLoc StartLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '<'
+
+ // Parse the range list.
+ Ranges = ParseRangeList();
+ if (Ranges.empty()) return true;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of range list");
+ return Error(StartLoc, "to match this '<'");
+ }
+ Lex.Lex(); // eat the '>'.
+ return false;
+}
+
+/// ParseOptionalBitList - Parse either a bit list in {}'s or nothing.
+/// OptionalBitList ::= '{' RangeList '}'
+/// OptionalBitList ::= /*empty*/
+bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::l_brace)
+ return false;
+
+ SMLoc StartLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+
+ // Parse the range list.
+ Ranges = ParseRangeList();
+ if (Ranges.empty()) return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit list");
+ return Error(StartLoc, "to match this '{'");
+ }
+ Lex.Lex(); // eat the '}'.
+ return false;
+}
+
+
+/// ParseType - Parse and return a tblgen type. This returns null on error.
+///
+/// Type ::= STRING // string type
+/// Type ::= CODE // code type
+/// Type ::= BIT // bit type
+/// Type ::= BITS '<' INTVAL '>' // bits<x> type
+/// Type ::= INT // int type
+/// Type ::= LIST '<' Type '>' // list<x> type
+/// Type ::= DAG // dag type
+/// Type ::= ClassID // Record Type
+///
+RecTy *TGParser::ParseType() {
+ switch (Lex.getCode()) {
+ default: TokError("Unknown token when expecting a type"); return 0;
+ case tgtok::String: Lex.Lex(); return StringRecTy::get();
+ case tgtok::Code: Lex.Lex(); return StringRecTy::get();
+ case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
+ case tgtok::Int: Lex.Lex(); return IntRecTy::get();
+ case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
+ case tgtok::Id:
+ if (Record *R = ParseClassID()) return RecordRecTy::get(R);
+ return 0;
+ case tgtok::Bits: {
+ if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+ TokError("expected '<' after bits type");
+ return 0;
+ }
+ if (Lex.Lex() != tgtok::IntVal) { // Eat '<'
+ TokError("expected integer in bits<n> type");
+ return 0;
+ }
+ uint64_t Val = Lex.getCurIntVal();
+ if (Lex.Lex() != tgtok::greater) { // Eat count.
+ TokError("expected '>' at end of bits<n> type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '>'
+ return BitsRecTy::get(Val);
+ }
+ case tgtok::List: {
+ if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+ TokError("expected '<' after list type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '<'
+ RecTy *SubType = ParseType();
+ if (SubType == 0) return 0;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of list<ty> type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '>'
+ return ListRecTy::get(SubType);
+ }
+ }
+}
+
+/// ParseIDValue - Parse an ID as a value and decode what it means.
+///
+/// IDValue ::= ID [def local value]
+/// IDValue ::= ID [def template arg]
+/// IDValue ::= ID [multiclass local value]
+/// IDValue ::= ID [multiclass template argument]
+/// IDValue ::= ID [def name]
+///
+Init *TGParser::ParseIDValue(Record *CurRec, IDParseMode Mode) {
+ assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
+ std::string Name = Lex.getCurStrVal();
+ SMLoc Loc = Lex.getLoc();
+ Lex.Lex();
+ return ParseIDValue(CurRec, Name, Loc);
+}
+
+/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
+/// has already been read.
+Init *TGParser::ParseIDValue(Record *CurRec,
+ const std::string &Name, SMLoc NameLoc,
+ IDParseMode Mode) {
+ if (CurRec) {
+ if (const RecordVal *RV = CurRec->getValue(Name))
+ return VarInit::get(Name, RV->getType());
+
+ Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name, ":");
+
+ if (CurMultiClass)
+ TemplateArgName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+ "::");
+
+ if (CurRec->isTemplateArg(TemplateArgName)) {
+ const RecordVal *RV = CurRec->getValue(TemplateArgName);
+ assert(RV && "Template arg doesn't exist??");
+ return VarInit::get(TemplateArgName, RV->getType());
+ }
+ }
+
+ if (CurMultiClass) {
+ Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+ "::");
+
+ if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+ assert(RV && "Template arg doesn't exist??");
+ return VarInit::get(MCName, RV->getType());
+ }
+ }
+
+ // If this is in a foreach loop, make sure it's not a loop iterator
+ for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
+ i != iend;
+ ++i) {
+ VarInit *IterVar = dyn_cast<VarInit>(i->IterVar);
+ if (IterVar && IterVar->getName() == Name)
+ return IterVar;
+ }
+
+ if (Mode == ParseNameMode)
+ return StringInit::get(Name);
+
+ if (Record *D = Records.getDef(Name))
+ return DefInit::get(D);
+
+ if (Mode == ParseValueMode) {
+ Error(NameLoc, "Variable not defined: '" + Name + "'");
+ return 0;
+ }
+
+ return StringInit::get(Name);
+}
+
+/// ParseOperation - Parse an operator. This returns null on error.
+///
+/// Operation ::= XOperator ['<' Type '>'] '(' Args ')'
+///
+Init *TGParser::ParseOperation(Record *CurRec) {
+ switch (Lex.getCode()) {
+ default:
+ TokError("unknown operation");
+ return 0;
+ case tgtok::XHead:
+ case tgtok::XTail:
+ case tgtok::XEmpty:
+ case tgtok::XCast: { // Value ::= !unop '(' Value ')'
+ UnOpInit::UnaryOp Code;
+ RecTy *Type = 0;
+
+ switch (Lex.getCode()) {
+ default: llvm_unreachable("Unhandled code!");
+ case tgtok::XCast:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::CAST;
+
+ Type = ParseOperatorType();
+
+ if (Type == 0) {
+ TokError("did not get type for unary operator");
+ return 0;
+ }
+
+ break;
+ case tgtok::XHead:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::HEAD;
+ break;
+ case tgtok::XTail:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::TAIL;
+ break;
+ case tgtok::XEmpty:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::EMPTY;
+ Type = IntRecTy::get();
+ break;
+ }
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after unary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ Init *LHS = ParseValue(CurRec);
+ if (LHS == 0) return 0;
+
+ if (Code == UnOpInit::HEAD
+ || Code == UnOpInit::TAIL
+ || Code == UnOpInit::EMPTY) {
+ ListInit *LHSl = dyn_cast<ListInit>(LHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+ if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
+ TokError("expected list or string type argument in unary operator");
+ return 0;
+ }
+ if (LHSt) {
+ ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
+ StringRecTy *SType = dyn_cast<StringRecTy>(LHSt->getType());
+ if (LType == 0 && SType == 0) {
+ TokError("expected list or string type argumnet in unary operator");
+ return 0;
+ }
+ }
+
+ if (Code == UnOpInit::HEAD
+ || Code == UnOpInit::TAIL) {
+ if (LHSl == 0 && LHSt == 0) {
+ TokError("expected list type argumnet in unary operator");
+ return 0;
+ }
+
+ if (LHSl && LHSl->getSize() == 0) {
+ TokError("empty list argument in unary operator");
+ return 0;
+ }
+ if (LHSl) {
+ Init *Item = LHSl->getElement(0);
+ TypedInit *Itemt = dyn_cast<TypedInit>(Item);
+ if (Itemt == 0) {
+ TokError("untyped list element in unary operator");
+ return 0;
+ }
+ if (Code == UnOpInit::HEAD) {
+ Type = Itemt->getType();
+ } else {
+ Type = ListRecTy::get(Itemt->getType());
+ }
+ } else {
+ assert(LHSt && "expected list type argument in unary operator");
+ ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
+ if (LType == 0) {
+ TokError("expected list type argumnet in unary operator");
+ return 0;
+ }
+ if (Code == UnOpInit::HEAD) {
+ Type = LType->getElementType();
+ } else {
+ Type = LType;
+ }
+ }
+ }
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in unary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+ return (UnOpInit::get(Code, LHS, Type))->Fold(CurRec, CurMultiClass);
+ }
+
+ case tgtok::XConcat:
+ case tgtok::XADD:
+ case tgtok::XSRA:
+ case tgtok::XSRL:
+ case tgtok::XSHL:
+ case tgtok::XEq:
+ case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')'
+ tgtok::TokKind OpTok = Lex.getCode();
+ SMLoc OpLoc = Lex.getLoc();
+ Lex.Lex(); // eat the operation
+
+ BinOpInit::BinaryOp Code;
+ RecTy *Type = 0;
+
+ switch (OpTok) {
+ default: llvm_unreachable("Unhandled code!");
+ case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
+ case tgtok::XADD: Code = BinOpInit::ADD; Type = IntRecTy::get(); break;
+ case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break;
+ case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break;
+ case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break;
+ case tgtok::XEq: Code = BinOpInit::EQ; Type = BitRecTy::get(); break;
+ case tgtok::XStrConcat:
+ Code = BinOpInit::STRCONCAT;
+ Type = StringRecTy::get();
+ break;
+ }
+
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after binary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ SmallVector<Init*, 2> InitList;
+
+ InitList.push_back(ParseValue(CurRec));
+ if (InitList.back() == 0) return 0;
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // eat the ','
+
+ InitList.push_back(ParseValue(CurRec));
+ if (InitList.back() == 0) return 0;
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ // We allow multiple operands to associative operators like !strconcat as
+ // shorthand for nesting them.
+ if (Code == BinOpInit::STRCONCAT) {
+ while (InitList.size() > 2) {
+ Init *RHS = InitList.pop_back_val();
+ RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))
+ ->Fold(CurRec, CurMultiClass);
+ InitList.back() = RHS;
+ }
+ }
+
+ if (InitList.size() == 2)
+ return (BinOpInit::get(Code, InitList[0], InitList[1], Type))
+ ->Fold(CurRec, CurMultiClass);
+
+ Error(OpLoc, "expected two operands to operator");
+ return 0;
+ }
+
+ case tgtok::XIf:
+ case tgtok::XForEach:
+ case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ TernOpInit::TernaryOp Code;
+ RecTy *Type = 0;
+
+ tgtok::TokKind LexCode = Lex.getCode();
+ Lex.Lex(); // eat the operation
+ switch (LexCode) {
+ default: llvm_unreachable("Unhandled code!");
+ case tgtok::XIf:
+ Code = TernOpInit::IF;
+ break;
+ case tgtok::XForEach:
+ Code = TernOpInit::FOREACH;
+ break;
+ case tgtok::XSubst:
+ Code = TernOpInit::SUBST;
+ break;
+ }
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ Init *LHS = ParseValue(CurRec);
+ if (LHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::comma) {
+ TokError("expected ',' in ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ','
+
+ Init *MHS = ParseValue(CurRec);
+ if (MHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::comma) {
+ TokError("expected ',' in ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ','
+
+ Init *RHS = ParseValue(CurRec);
+ if (RHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in binary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ switch (LexCode) {
+ default: llvm_unreachable("Unhandled code!");
+ case tgtok::XIf: {
+ RecTy *MHSTy = 0;
+ RecTy *RHSTy = 0;
+
+ if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS))
+ MHSTy = MHSt->getType();
+ if (BitsInit *MHSbits = dyn_cast<BitsInit>(MHS))
+ MHSTy = BitsRecTy::get(MHSbits->getNumBits());
+ if (isa<BitInit>(MHS))
+ MHSTy = BitRecTy::get();
+
+ if (TypedInit *RHSt = dyn_cast<TypedInit>(RHS))
+ RHSTy = RHSt->getType();
+ if (BitsInit *RHSbits = dyn_cast<BitsInit>(RHS))
+ RHSTy = BitsRecTy::get(RHSbits->getNumBits());
+ if (isa<BitInit>(RHS))
+ RHSTy = BitRecTy::get();
+
+ // For UnsetInit, it's typed from the other hand.
+ if (isa<UnsetInit>(MHS))
+ MHSTy = RHSTy;
+ if (isa<UnsetInit>(RHS))
+ RHSTy = MHSTy;
+
+ if (!MHSTy || !RHSTy) {
+ TokError("could not get type for !if");
+ return 0;
+ }
+
+ if (MHSTy->typeIsConvertibleTo(RHSTy)) {
+ Type = RHSTy;
+ } else if (RHSTy->typeIsConvertibleTo(MHSTy)) {
+ Type = MHSTy;
+ } else {
+ TokError("inconsistent types for !if");
+ return 0;
+ }
+ break;
+ }
+ case tgtok::XForEach: {
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (MHSt == 0) {
+ TokError("could not get type for !foreach");
+ return 0;
+ }
+ Type = MHSt->getType();
+ break;
+ }
+ case tgtok::XSubst: {
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+ if (RHSt == 0) {
+ TokError("could not get type for !subst");
+ return 0;
+ }
+ Type = RHSt->getType();
+ break;
+ }
+ }
+ return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec,
+ CurMultiClass);
+ }
+ }
+}
+
+/// ParseOperatorType - Parse a type for an operator. This returns
+/// null on error.
+///
+/// OperatorType ::= '<' Type '>'
+///
+RecTy *TGParser::ParseOperatorType() {
+ RecTy *Type = 0;
+
+ if (Lex.getCode() != tgtok::less) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the <
+
+ Type = ParseType();
+
+ if (Type == 0) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the >
+
+ return Type;
+}
+
+
+/// ParseSimpleValue - Parse a tblgen value. This returns null on error.
+///
+/// SimpleValue ::= IDValue
+/// SimpleValue ::= INTVAL
+/// SimpleValue ::= STRVAL+
+/// SimpleValue ::= CODEFRAGMENT
+/// SimpleValue ::= '?'
+/// SimpleValue ::= '{' ValueList '}'
+/// SimpleValue ::= ID '<' ValueListNE '>'
+/// SimpleValue ::= '[' ValueList ']'
+/// SimpleValue ::= '(' IDValue DagArgList ')'
+/// SimpleValue ::= CONCATTOK '(' Value ',' Value ')'
+/// SimpleValue ::= ADDTOK '(' Value ',' Value ')'
+/// SimpleValue ::= SHLTOK '(' Value ',' Value ')'
+/// SimpleValue ::= SRATOK '(' Value ',' Value ')'
+/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
+/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
+///
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
+ IDParseMode Mode) {
+ Init *R = 0;
+ switch (Lex.getCode()) {
+ default: TokError("Unknown token when parsing a value"); break;
+ case tgtok::paste:
+ // This is a leading paste operation. This is deprecated but
+ // still exists in some .td files. Ignore it.
+ Lex.Lex(); // Skip '#'.
+ return ParseSimpleValue(CurRec, ItemType, Mode);
+ case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
+ case tgtok::StrVal: {
+ std::string Val = Lex.getCurStrVal();
+ Lex.Lex();
+
+ // Handle multiple consecutive concatenated strings.
+ while (Lex.getCode() == tgtok::StrVal) {
+ Val += Lex.getCurStrVal();
+ Lex.Lex();
+ }
+
+ R = StringInit::get(Val);
+ break;
+ }
+ case tgtok::CodeFragment:
+ R = StringInit::get(Lex.getCurStrVal());
+ Lex.Lex();
+ break;
+ case tgtok::question:
+ R = UnsetInit::get();
+ Lex.Lex();
+ break;
+ case tgtok::Id: {
+ SMLoc NameLoc = Lex.getLoc();
+ std::string Name = Lex.getCurStrVal();
+ if (Lex.Lex() != tgtok::less) // consume the Id.
+ return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue
+
+ // Value ::= ID '<' ValueListNE '>'
+ if (Lex.Lex() == tgtok::greater) {
+ TokError("expected non-empty value list");
+ return 0;
+ }
+
+ // This is a CLASS<initvalslist> expression. This is supposed to synthesize
+ // a new anonymous definition, deriving from CLASS<initvalslist> with no
+ // body.
+ Record *Class = Records.getClass(Name);
+ if (!Class) {
+ Error(NameLoc, "Expected a class name, got '" + Name + "'");
+ return 0;
+ }
+
+ std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
+ if (ValueList.empty()) return 0;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of value list");
+ return 0;
+ }
+ Lex.Lex(); // eat the '>'
+ SMLoc EndLoc = Lex.getLoc();
+
+ // Create the new record, set it as CurRec temporarily.
+ static unsigned AnonCounter = 0;
+ Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),
+ NameLoc,
+ Records,
+ /*IsAnonymous=*/true);
+ SubClassReference SCRef;
+ SCRef.RefRange = SMRange(NameLoc, EndLoc);
+ SCRef.Rec = Class;
+ SCRef.TemplateArgs = ValueList;
+ // Add info about the subclass to NewRec.
+ if (AddSubClass(NewRec, SCRef))
+ return 0;
+ NewRec->resolveReferences();
+ Records.addDef(NewRec);
+
+ // The result of the expression is a reference to the new record.
+ return DefInit::get(NewRec);
+ }
+ case tgtok::l_brace: { // Value ::= '{' ValueList '}'
+ SMLoc BraceLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+ std::vector<Init*> Vals;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ Vals = ParseValueList(CurRec);
+ if (Vals.empty()) return 0;
+ }
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit list value");
+ return 0;
+ }
+ Lex.Lex(); // eat the '}'
+
+ SmallVector<Init *, 16> NewBits(Vals.size());
+
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get());
+ if (Bit == 0) {
+ Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
+ ") is not convertable to a bit");
+ return 0;
+ }
+ NewBits[Vals.size()-i-1] = Bit;
+ }
+ return BitsInit::get(NewBits);
+ }
+ case tgtok::l_square: { // Value ::= '[' ValueList ']'
+ Lex.Lex(); // eat the '['
+ std::vector<Init*> Vals;
+
+ RecTy *DeducedEltTy = 0;
+ ListRecTy *GivenListTy = 0;
+
+ if (ItemType != 0) {
+ ListRecTy *ListType = dyn_cast<ListRecTy>(ItemType);
+ if (ListType == 0) {
+ std::stringstream s;
+ s << "Type mismatch for list, expected list type, got "
+ << ItemType->getAsString();
+ TokError(s.str());
+ return 0;
+ }
+ GivenListTy = ListType;
+ }
+
+ if (Lex.getCode() != tgtok::r_square) {
+ Vals = ParseValueList(CurRec, 0,
+ GivenListTy ? GivenListTy->getElementType() : 0);
+ if (Vals.empty()) return 0;
+ }
+ if (Lex.getCode() != tgtok::r_square) {
+ TokError("expected ']' at end of list value");
+ return 0;
+ }
+ Lex.Lex(); // eat the ']'
+
+ RecTy *GivenEltTy = 0;
+ if (Lex.getCode() == tgtok::less) {
+ // Optional list element type
+ Lex.Lex(); // eat the '<'
+
+ GivenEltTy = ParseType();
+ if (GivenEltTy == 0) {
+ // Couldn't parse element type
+ return 0;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of list element type");
+ return 0;
+ }
+ Lex.Lex(); // eat the '>'
+ }
+
+ // Check elements
+ RecTy *EltTy = 0;
+ for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
+ i != ie;
+ ++i) {
+ TypedInit *TArg = dyn_cast<TypedInit>(*i);
+ if (TArg == 0) {
+ TokError("Untyped list element");
+ return 0;
+ }
+ if (EltTy != 0) {
+ EltTy = resolveTypes(EltTy, TArg->getType());
+ if (EltTy == 0) {
+ TokError("Incompatible types in list elements");
+ return 0;
+ }
+ } else {
+ EltTy = TArg->getType();
+ }
+ }
+
+ if (GivenEltTy != 0) {
+ if (EltTy != 0) {
+ // Verify consistency
+ if (!EltTy->typeIsConvertibleTo(GivenEltTy)) {
+ TokError("Incompatible types in list elements");
+ return 0;
+ }
+ }
+ EltTy = GivenEltTy;
+ }
+
+ if (EltTy == 0) {
+ if (ItemType == 0) {
+ TokError("No type for list");
+ return 0;
+ }
+ DeducedEltTy = GivenListTy->getElementType();
+ } else {
+ // Make sure the deduced type is compatible with the given type
+ if (GivenListTy) {
+ if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) {
+ TokError("Element type mismatch for list");
+ return 0;
+ }
+ }
+ DeducedEltTy = EltTy;
+ }
+
+ return ListInit::get(Vals, DeducedEltTy);
+ }
+ case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')'
+ Lex.Lex(); // eat the '('
+ if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
+ TokError("expected identifier in dag init");
+ return 0;
+ }
+
+ Init *Operator = ParseValue(CurRec);
+ if (Operator == 0) return 0;
+
+ // If the operator name is present, parse it.
+ std::string OperatorName;
+ if (Lex.getCode() == tgtok::colon) {
+ if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+ TokError("expected variable name in dag operator");
+ return 0;
+ }
+ OperatorName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the VarName.
+ }
+
+ std::vector<std::pair<llvm::Init*, std::string> > DagArgs;
+ if (Lex.getCode() != tgtok::r_paren) {
+ DagArgs = ParseDagArgList(CurRec);
+ if (DagArgs.empty()) return 0;
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in dag init");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ return DagInit::get(Operator, OperatorName, DagArgs);
+ }
+
+ case tgtok::XHead:
+ case tgtok::XTail:
+ case tgtok::XEmpty:
+ case tgtok::XCast: // Value ::= !unop '(' Value ')'
+ case tgtok::XConcat:
+ case tgtok::XADD:
+ case tgtok::XSRA:
+ case tgtok::XSRL:
+ case tgtok::XSHL:
+ case tgtok::XEq:
+ case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XIf:
+ case tgtok::XForEach:
+ case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ return ParseOperation(CurRec);
+ }
+ }
+
+ return R;
+}
+
+/// ParseValue - Parse a tblgen value. This returns null on error.
+///
+/// Value ::= SimpleValue ValueSuffix*
+/// ValueSuffix ::= '{' BitList '}'
+/// ValueSuffix ::= '[' BitList ']'
+/// ValueSuffix ::= '.' ID
+///
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
+ Init *Result = ParseSimpleValue(CurRec, ItemType, Mode);
+ if (Result == 0) return 0;
+
+ // Parse the suffixes now if present.
+ while (1) {
+ switch (Lex.getCode()) {
+ default: return Result;
+ case tgtok::l_brace: {
+ if (Mode == ParseNameMode || Mode == ParseForeachMode)
+ // This is the beginning of the object body.
+ return Result;
+
+ SMLoc CurlyLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+ std::vector<unsigned> Ranges = ParseRangeList();
+ if (Ranges.empty()) return 0;
+
+ // Reverse the bitlist.
+ std::reverse(Ranges.begin(), Ranges.end());
+ Result = Result->convertInitializerBitRange(Ranges);
+ if (Result == 0) {
+ Error(CurlyLoc, "Invalid bit range for value");
+ return 0;
+ }
+
+ // Eat the '}'.
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit range list");
+ return 0;
+ }
+ Lex.Lex();
+ break;
+ }
+ case tgtok::l_square: {
+ SMLoc SquareLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '['
+ std::vector<unsigned> Ranges = ParseRangeList();
+ if (Ranges.empty()) return 0;
+
+ Result = Result->convertInitListSlice(Ranges);
+ if (Result == 0) {
+ Error(SquareLoc, "Invalid range for list slice");
+ return 0;
+ }
+
+ // Eat the ']'.
+ if (Lex.getCode() != tgtok::r_square) {
+ TokError("expected ']' at end of list slice");
+ return 0;
+ }
+ Lex.Lex();
+ break;
+ }
+ case tgtok::period:
+ if (Lex.Lex() != tgtok::Id) { // eat the .
+ TokError("expected field identifier after '.'");
+ return 0;
+ }
+ if (!Result->getFieldType(Lex.getCurStrVal())) {
+ TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
+ Result->getAsString() + "'");
+ return 0;
+ }
+ Result = FieldInit::get(Result, Lex.getCurStrVal());
+ Lex.Lex(); // eat field name
+ break;
+
+ case tgtok::paste:
+ SMLoc PasteLoc = Lex.getLoc();
+
+ // Create a !strconcat() operation, first casting each operand to
+ // a string if necessary.
+
+ TypedInit *LHS = dyn_cast<TypedInit>(Result);
+ if (!LHS) {
+ Error(PasteLoc, "LHS of paste is not typed!");
+ return 0;
+ }
+
+ if (LHS->getType() != StringRecTy::get()) {
+ LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get());
+ }
+
+ TypedInit *RHS = 0;
+
+ Lex.Lex(); // Eat the '#'.
+ switch (Lex.getCode()) {
+ case tgtok::colon:
+ case tgtok::semi:
+ case tgtok::l_brace:
+ // These are all of the tokens that can begin an object body.
+ // Some of these can also begin values but we disallow those cases
+ // because they are unlikely to be useful.
+
+ // Trailing paste, concat with an empty string.
+ RHS = StringInit::get("");
+ break;
+
+ default:
+ Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+ RHS = dyn_cast<TypedInit>(RHSResult);
+ if (!RHS) {
+ Error(PasteLoc, "RHS of paste is not typed!");
+ return 0;
+ }
+
+ if (RHS->getType() != StringRecTy::get()) {
+ RHS = UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get());
+ }
+
+ break;
+ }
+
+ Result = BinOpInit::get(BinOpInit::STRCONCAT, LHS, RHS,
+ StringRecTy::get())->Fold(CurRec, CurMultiClass);
+ break;
+ }
+ }
+}
+
+/// ParseDagArgList - Parse the argument list for a dag literal expression.
+///
+/// DagArg ::= Value (':' VARNAME)?
+/// DagArg ::= VARNAME
+/// DagArgList ::= DagArg
+/// DagArgList ::= DagArgList ',' DagArg
+std::vector<std::pair<llvm::Init*, std::string> >
+TGParser::ParseDagArgList(Record *CurRec) {
+ std::vector<std::pair<llvm::Init*, std::string> > Result;
+
+ while (1) {
+ // DagArg ::= VARNAME
+ if (Lex.getCode() == tgtok::VarName) {
+ // A missing value is treated like '?'.
+ Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal()));
+ Lex.Lex();
+ } else {
+ // DagArg ::= Value (':' VARNAME)?
+ Init *Val = ParseValue(CurRec);
+ if (Val == 0)
+ return std::vector<std::pair<llvm::Init*, std::string> >();
+
+ // If the variable name is present, add it.
+ std::string VarName;
+ if (Lex.getCode() == tgtok::colon) {
+ if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+ TokError("expected variable name in dag literal");
+ return std::vector<std::pair<llvm::Init*, std::string> >();
+ }
+ VarName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the VarName.
+ }
+
+ Result.push_back(std::make_pair(Val, VarName));
+ }
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat the ','
+ }
+
+ return Result;
+}
+
+
+/// ParseValueList - Parse a comma separated list of values, returning them as a
+/// vector. Note that this always expects to be able to parse at least one
+/// value. It returns an empty list if this is not possible.
+///
+/// ValueList ::= Value (',' Value)
+///
+std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
+ RecTy *EltTy) {
+ std::vector<Init*> Result;
+ RecTy *ItemType = EltTy;
+ unsigned int ArgN = 0;
+ if (ArgsRec != 0 && EltTy == 0) {
+ const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+ if (!TArgs.size()) {
+ TokError("template argument provided to non-template class");
+ return std::vector<Init*>();
+ }
+ const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+ if (!RV) {
+ errs() << "Cannot find template arg " << ArgN << " (" << TArgs[ArgN]
+ << ")\n";
+ }
+ assert(RV && "Template argument record not found??");
+ ItemType = RV->getType();
+ ++ArgN;
+ }
+ Result.push_back(ParseValue(CurRec, ItemType));
+ if (Result.back() == 0) return std::vector<Init*>();
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // Eat the comma
+
+ if (ArgsRec != 0 && EltTy == 0) {
+ const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+ if (ArgN >= TArgs.size()) {
+ TokError("too many template arguments");
+ return std::vector<Init*>();
+ }
+ const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+ assert(RV && "Template argument record not found??");
+ ItemType = RV->getType();
+ ++ArgN;
+ }
+ Result.push_back(ParseValue(CurRec, ItemType));
+ if (Result.back() == 0) return std::vector<Init*>();
+ }
+
+ return Result;
+}
+
+
+/// ParseDeclaration - Read a declaration, returning the name of field ID, or an
+/// empty string on error. This can happen in a number of different context's,
+/// including within a def or in the template args for a def (which which case
+/// CurRec will be non-null) and within the template args for a multiclass (in
+/// which case CurRec will be null, but CurMultiClass will be set). This can
+/// also happen within a def that is within a multiclass, which will set both
+/// CurRec and CurMultiClass.
+///
+/// Declaration ::= FIELD? Type ID ('=' Value)?
+///
+Init *TGParser::ParseDeclaration(Record *CurRec,
+ bool ParsingTemplateArgs) {
+ // Read the field prefix if present.
+ bool HasField = Lex.getCode() == tgtok::Field;
+ if (HasField) Lex.Lex();
+
+ RecTy *Type = ParseType();
+ if (Type == 0) return 0;
+
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("Expected identifier in declaration");
+ return 0;
+ }
+
+ SMLoc IdLoc = Lex.getLoc();
+ Init *DeclName = StringInit::get(Lex.getCurStrVal());
+ Lex.Lex();
+
+ if (ParsingTemplateArgs) {
+ if (CurRec) {
+ DeclName = QualifyName(*CurRec, CurMultiClass, DeclName, ":");
+ } else {
+ assert(CurMultiClass);
+ }
+ if (CurMultiClass)
+ DeclName = QualifyName(CurMultiClass->Rec, CurMultiClass, DeclName,
+ "::");
+ }
+
+ // Add the value.
+ if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
+ return 0;
+
+ // If a value is present, parse it.
+ if (Lex.getCode() == tgtok::equal) {
+ Lex.Lex();
+ SMLoc ValLoc = Lex.getLoc();
+ Init *Val = ParseValue(CurRec, Type);
+ if (Val == 0 ||
+ SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
+ return 0;
+ }
+
+ return DeclName;
+}
+
+/// ParseForeachDeclaration - Read a foreach declaration, returning
+/// the name of the declared object or a NULL Init on error. Return
+/// the name of the parsed initializer list through ForeachListName.
+///
+/// ForeachDeclaration ::= ID '=' '[' ValueList ']'
+/// ForeachDeclaration ::= ID '=' '{' RangeList '}'
+/// ForeachDeclaration ::= ID '=' RangePiece
+///
+VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("Expected identifier in foreach declaration");
+ return 0;
+ }
+
+ Init *DeclName = StringInit::get(Lex.getCurStrVal());
+ Lex.Lex();
+
+ // If a value is present, parse it.
+ if (Lex.getCode() != tgtok::equal) {
+ TokError("Expected '=' in foreach declaration");
+ return 0;
+ }
+ Lex.Lex(); // Eat the '='
+
+ RecTy *IterType = 0;
+ std::vector<unsigned> Ranges;
+
+ switch (Lex.getCode()) {
+ default: TokError("Unknown token when expecting a range list"); return 0;
+ case tgtok::l_square: { // '[' ValueList ']'
+ Init *List = ParseSimpleValue(0, 0, ParseForeachMode);
+ ForeachListValue = dyn_cast<ListInit>(List);
+ if (ForeachListValue == 0) {
+ TokError("Expected a Value list");
+ return 0;
+ }
+ RecTy *ValueType = ForeachListValue->getType();
+ ListRecTy *ListType = dyn_cast<ListRecTy>(ValueType);
+ if (ListType == 0) {
+ TokError("Value list is not of list type");
+ return 0;
+ }
+ IterType = ListType->getElementType();
+ break;
+ }
+
+ case tgtok::IntVal: { // RangePiece.
+ if (ParseRangePiece(Ranges))
+ return 0;
+ break;
+ }
+
+ case tgtok::l_brace: { // '{' RangeList '}'
+ Lex.Lex(); // eat the '{'
+ Ranges = ParseRangeList();
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit range list");
+ return 0;
+ }
+ Lex.Lex();
+ break;
+ }
+ }
+
+ if (!Ranges.empty()) {
+ assert(!IterType && "Type already initialized?");
+ IterType = IntRecTy::get();
+ std::vector<Init*> Values;
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i)
+ Values.push_back(IntInit::get(Ranges[i]));
+ ForeachListValue = ListInit::get(Values, IterType);
+ }
+
+ if (!IterType)
+ return 0;
+
+ return VarInit::get(DeclName, IterType);
+}
+
+/// ParseTemplateArgList - Read a template argument list, which is a non-empty
+/// sequence of template-declarations in <>'s. If CurRec is non-null, these are
+/// template args for a def, which may or may not be in a multiclass. If null,
+/// these are the template args for a multiclass.
+///
+/// TemplateArgList ::= '<' Declaration (',' Declaration)* '>'
+///
+bool TGParser::ParseTemplateArgList(Record *CurRec) {
+ assert(Lex.getCode() == tgtok::less && "Not a template arg list!");
+ Lex.Lex(); // eat the '<'
+
+ Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec;
+
+ // Read the first declaration.
+ Init *TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+ if (TemplArg == 0)
+ return true;
+
+ TheRecToAddTo->addTemplateArg(TemplArg);
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // eat the ','
+
+ // Read the following declarations.
+ TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+ if (TemplArg == 0)
+ return true;
+ TheRecToAddTo->addTemplateArg(TemplArg);
+ }
+
+ if (Lex.getCode() != tgtok::greater)
+ return TokError("expected '>' at end of template argument list");
+ Lex.Lex(); // eat the '>'.
+ return false;
+}
+
+
+/// ParseBodyItem - Parse a single item at within the body of a def or class.
+///
+/// BodyItem ::= Declaration ';'
+/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
+bool TGParser::ParseBodyItem(Record *CurRec) {
+ if (Lex.getCode() != tgtok::Let) {
+ if (ParseDeclaration(CurRec, false) == 0)
+ return true;
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' after declaration");
+ Lex.Lex();
+ return false;
+ }
+
+ // LET ID OptionalRangeList '=' Value ';'
+ if (Lex.Lex() != tgtok::Id)
+ return TokError("expected field identifier after let");
+
+ SMLoc IdLoc = Lex.getLoc();
+ std::string FieldName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the field name.
+
+ std::vector<unsigned> BitList;
+ if (ParseOptionalBitList(BitList))
+ return true;
+ std::reverse(BitList.begin(), BitList.end());
+
+ if (Lex.getCode() != tgtok::equal)
+ return TokError("expected '=' in let expression");
+ Lex.Lex(); // eat the '='.
+
+ RecordVal *Field = CurRec->getValue(FieldName);
+ if (Field == 0)
+ return TokError("Value '" + FieldName + "' unknown!");
+
+ RecTy *Type = Field->getType();
+
+ Init *Val = ParseValue(CurRec, Type);
+ if (Val == 0) return true;
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' after let expression");
+ Lex.Lex();
+
+ return SetValue(CurRec, IdLoc, FieldName, BitList, Val);
+}
+
+/// ParseBody - Read the body of a class or def. Return true on error, false on
+/// success.
+///
+/// Body ::= ';'
+/// Body ::= '{' BodyList '}'
+/// BodyList BodyItem*
+///
+bool TGParser::ParseBody(Record *CurRec) {
+ // If this is a null definition, just eat the semi and return.
+ if (Lex.getCode() == tgtok::semi) {
+ Lex.Lex();
+ return false;
+ }
+
+ if (Lex.getCode() != tgtok::l_brace)
+ return TokError("Expected ';' or '{' to start body");
+ // Eat the '{'.
+ Lex.Lex();
+
+ while (Lex.getCode() != tgtok::r_brace)
+ if (ParseBodyItem(CurRec))
+ return true;
+
+ // Eat the '}'.
+ Lex.Lex();
+ return false;
+}
+
+/// \brief Apply the current let bindings to \a CurRec.
+/// \returns true on error, false otherwise.
+bool TGParser::ApplyLetStack(Record *CurRec) {
+ for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+ for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+ if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+ LetStack[i][j].Bits, LetStack[i][j].Value))
+ return true;
+ return false;
+}
+
+/// ParseObjectBody - Parse the body of a def or class. This consists of an
+/// optional ClassList followed by a Body. CurRec is the current def or class
+/// that is being parsed.
+///
+/// ObjectBody ::= BaseClassList Body
+/// BaseClassList ::= /*empty*/
+/// BaseClassList ::= ':' BaseClassListNE
+/// BaseClassListNE ::= SubClassRef (',' SubClassRef)*
+///
+bool TGParser::ParseObjectBody(Record *CurRec) {
+ // If there is a baseclass list, read it.
+ if (Lex.getCode() == tgtok::colon) {
+ Lex.Lex();
+
+ // Read all of the subclasses.
+ SubClassReference SubClass = ParseSubClassReference(CurRec, false);
+ while (1) {
+ // Check for error.
+ if (SubClass.Rec == 0) return true;
+
+ // Add it.
+ if (AddSubClass(CurRec, SubClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubClass = ParseSubClassReference(CurRec, false);
+ }
+ }
+
+ if (ApplyLetStack(CurRec))
+ return true;
+
+ return ParseBody(CurRec);
+}
+
+/// ParseDef - Parse and return a top level or multiclass def, return the record
+/// corresponding to it. This returns null on error.
+///
+/// DefInst ::= DEF ObjectName ObjectBody
+///
+bool TGParser::ParseDef(MultiClass *CurMultiClass) {
+ SMLoc DefLoc = Lex.getLoc();
+ assert(Lex.getCode() == tgtok::Def && "Unknown tok");
+ Lex.Lex(); // Eat the 'def' token.
+
+ // Parse ObjectName and make a record for it.
+ Record *CurRec;
+ Init *Name = ParseObjectName(CurMultiClass);
+ if (Name)
+ CurRec = new Record(Name, DefLoc, Records);
+ else
+ CurRec = new Record(GetNewAnonymousName(), DefLoc, Records,
+ /*IsAnonymous=*/true);
+
+ if (!CurMultiClass && Loops.empty()) {
+ // Top-level def definition.
+
+ // Ensure redefinition doesn't happen.
+ if (Records.getDef(CurRec->getNameInitAsString())) {
+ Error(DefLoc, "def '" + CurRec->getNameInitAsString()
+ + "' already defined");
+ return true;
+ }
+ Records.addDef(CurRec);
+ } else if (CurMultiClass) {
+ // Otherwise, a def inside a multiclass, add it to the multiclass.
+ for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
+ if (CurMultiClass->DefPrototypes[i]->getNameInit()
+ == CurRec->getNameInit()) {
+ Error(DefLoc, "def '" + CurRec->getNameInitAsString() +
+ "' already defined in this multiclass!");
+ return true;
+ }
+ CurMultiClass->DefPrototypes.push_back(CurRec);
+ }
+
+ if (ParseObjectBody(CurRec))
+ return true;
+
+ if (CurMultiClass == 0) // Def's in multiclasses aren't really defs.
+ // See Record::setName(). This resolve step will see any new name
+ // for the def that might have been created when resolving
+ // inheritance, values and arguments above.
+ CurRec->resolveReferences();
+
+ // If ObjectBody has template arguments, it's an error.
+ assert(CurRec->getTemplateArgs().empty() && "How'd this get template args?");
+
+ if (CurMultiClass) {
+ // Copy the template arguments for the multiclass into the def.
+ const std::vector<Init *> &TArgs =
+ CurMultiClass->Rec.getTemplateArgs();
+
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]);
+ assert(RV && "Template arg doesn't exist?");
+ CurRec->addValue(*RV);
+ }
+ }
+
+ if (ProcessForeachDefs(CurRec, DefLoc)) {
+ Error(DefLoc,
+ "Could not process loops for def" + CurRec->getNameInitAsString());
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseForeach - Parse a for statement. Return the record corresponding
+/// to it. This returns true on error.
+///
+/// Foreach ::= FOREACH Declaration IN '{ ObjectList '}'
+/// Foreach ::= FOREACH Declaration IN Object
+///
+bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Foreach && "Unknown tok");
+ Lex.Lex(); // Eat the 'for' token.
+
+ // Make a temporary object to record items associated with the for
+ // loop.
+ ListInit *ListValue = 0;
+ VarInit *IterName = ParseForeachDeclaration(ListValue);
+ if (IterName == 0)
+ return TokError("expected declaration in for");
+
+ if (Lex.getCode() != tgtok::In)
+ return TokError("Unknown tok");
+ Lex.Lex(); // Eat the in
+
+ // Create a loop object and remember it.
+ Loops.push_back(ForeachLoop(IterName, ListValue));
+
+ if (Lex.getCode() != tgtok::l_brace) {
+ // FOREACH Declaration IN Object
+ if (ParseObject(CurMultiClass))
+ return true;
+ }
+ else {
+ SMLoc BraceLoc = Lex.getLoc();
+ // Otherwise, this is a group foreach.
+ Lex.Lex(); // eat the '{'.
+
+ // Parse the object list.
+ if (ParseObjectList(CurMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of foreach command");
+ return Error(BraceLoc, "to match this '{'");
+ }
+ Lex.Lex(); // Eat the }
+ }
+
+ // We've processed everything in this loop.
+ Loops.pop_back();
+
+ return false;
+}
+
+/// ParseClass - Parse a tblgen class definition.
+///
+/// ClassInst ::= CLASS ID TemplateArgList? ObjectBody
+///
+bool TGParser::ParseClass() {
+ assert(Lex.getCode() == tgtok::Class && "Unexpected token!");
+ Lex.Lex();
+
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected class name after 'class' keyword");
+
+ Record *CurRec = Records.getClass(Lex.getCurStrVal());
+ if (CurRec) {
+ // If the body was previously defined, this is an error.
+ if (CurRec->getValues().size() > 1 || // Account for NAME.
+ !CurRec->getSuperClasses().empty() ||
+ !CurRec->getTemplateArgs().empty())
+ return TokError("Class '" + CurRec->getNameInitAsString()
+ + "' already defined");
+ } else {
+ // If this is the first reference to this class, create and add it.
+ CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
+ Records.addClass(CurRec);
+ }
+ Lex.Lex(); // eat the name.
+
+ // If there are template args, parse them.
+ if (Lex.getCode() == tgtok::less)
+ if (ParseTemplateArgList(CurRec))
+ return true;
+
+ // Finally, parse the object body.
+ return ParseObjectBody(CurRec);
+}
+
+/// ParseLetList - Parse a non-empty list of assignment expressions into a list
+/// of LetRecords.
+///
+/// LetList ::= LetItem (',' LetItem)*
+/// LetItem ::= ID OptionalRangeList '=' Value
+///
+std::vector<LetRecord> TGParser::ParseLetList() {
+ std::vector<LetRecord> Result;
+
+ while (1) {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected identifier in let definition");
+ return std::vector<LetRecord>();
+ }
+ std::string Name = Lex.getCurStrVal();
+ SMLoc NameLoc = Lex.getLoc();
+ Lex.Lex(); // Eat the identifier.
+
+ // Check for an optional RangeList.
+ std::vector<unsigned> Bits;
+ if (ParseOptionalRangeList(Bits))
+ return std::vector<LetRecord>();
+ std::reverse(Bits.begin(), Bits.end());
+
+ if (Lex.getCode() != tgtok::equal) {
+ TokError("expected '=' in let expression");
+ return std::vector<LetRecord>();
+ }
+ Lex.Lex(); // eat the '='.
+
+ Init *Val = ParseValue(0);
+ if (Val == 0) return std::vector<LetRecord>();
+
+ // Now that we have everything, add the record.
+ Result.push_back(LetRecord(Name, Bits, Val, NameLoc));
+
+ if (Lex.getCode() != tgtok::comma)
+ return Result;
+ Lex.Lex(); // eat the comma.
+ }
+}
+
+/// ParseTopLevelLet - Parse a 'let' at top level. This can be a couple of
+/// different related productions. This works inside multiclasses too.
+///
+/// Object ::= LET LetList IN '{' ObjectList '}'
+/// Object ::= LET LetList IN Object
+///
+bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Let && "Unexpected token");
+ Lex.Lex();
+
+ // Add this entry to the let stack.
+ std::vector<LetRecord> LetInfo = ParseLetList();
+ if (LetInfo.empty()) return true;
+ LetStack.push_back(LetInfo);
+
+ if (Lex.getCode() != tgtok::In)
+ return TokError("expected 'in' at end of top-level 'let'");
+ Lex.Lex();
+
+ // If this is a scalar let, just handle it now
+ if (Lex.getCode() != tgtok::l_brace) {
+ // LET LetList IN Object
+ if (ParseObject(CurMultiClass))
+ return true;
+ } else { // Object ::= LETCommand '{' ObjectList '}'
+ SMLoc BraceLoc = Lex.getLoc();
+ // Otherwise, this is a group let.
+ Lex.Lex(); // eat the '{'.
+
+ // Parse the object list.
+ if (ParseObjectList(CurMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of top level let command");
+ return Error(BraceLoc, "to match this '{'");
+ }
+ Lex.Lex();
+ }
+
+ // Outside this let scope, this let block is not active.
+ LetStack.pop_back();
+ return false;
+}
+
+/// ParseMultiClass - Parse a multiclass definition.
+///
+/// MultiClassInst ::= MULTICLASS ID TemplateArgList?
+/// ':' BaseMultiClassList '{' MultiClassObject+ '}'
+/// MultiClassObject ::= DefInst
+/// MultiClassObject ::= MultiClassInst
+/// MultiClassObject ::= DefMInst
+/// MultiClassObject ::= LETCommand '{' ObjectList '}'
+/// MultiClassObject ::= LETCommand Object
+///
+bool TGParser::ParseMultiClass() {
+ assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token");
+ Lex.Lex(); // Eat the multiclass token.
+
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected identifier after multiclass for name");
+ std::string Name = Lex.getCurStrVal();
+
+ if (MultiClasses.count(Name))
+ return TokError("multiclass '" + Name + "' already defined");
+
+ CurMultiClass = MultiClasses[Name] = new MultiClass(Name,
+ Lex.getLoc(), Records);
+ Lex.Lex(); // Eat the identifier.
+
+ // If there are template args, parse them.
+ if (Lex.getCode() == tgtok::less)
+ if (ParseTemplateArgList(0))
+ return true;
+
+ bool inherits = false;
+
+ // If there are submulticlasses, parse them.
+ if (Lex.getCode() == tgtok::colon) {
+ inherits = true;
+
+ Lex.Lex();
+
+ // Read all of the submulticlasses.
+ SubMultiClassReference SubMultiClass =
+ ParseSubMultiClassReference(CurMultiClass);
+ while (1) {
+ // Check for error.
+ if (SubMultiClass.MC == 0) return true;
+
+ // Add it.
+ if (AddSubMultiClass(CurMultiClass, SubMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubMultiClass = ParseSubMultiClassReference(CurMultiClass);
+ }
+ }
+
+ if (Lex.getCode() != tgtok::l_brace) {
+ if (!inherits)
+ return TokError("expected '{' in multiclass definition");
+ else if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' in multiclass definition");
+ else
+ Lex.Lex(); // eat the ';'.
+ } else {
+ if (Lex.Lex() == tgtok::r_brace) // eat the '{'.
+ return TokError("multiclass must contain at least one def");
+
+ while (Lex.getCode() != tgtok::r_brace) {
+ switch (Lex.getCode()) {
+ default:
+ return TokError("expected 'let', 'def' or 'defm' in multiclass body");
+ case tgtok::Let:
+ case tgtok::Def:
+ case tgtok::Defm:
+ case tgtok::Foreach:
+ if (ParseObject(CurMultiClass))
+ return true;
+ break;
+ }
+ }
+ Lex.Lex(); // eat the '}'.
+ }
+
+ CurMultiClass = 0;
+ return false;
+}
+
+Record *TGParser::
+InstantiateMulticlassDef(MultiClass &MC,
+ Record *DefProto,
+ Init *DefmPrefix,
+ SMRange DefmPrefixRange) {
+ // We need to preserve DefProto so it can be reused for later
+ // instantiations, so create a new Record to inherit from it.
+
+ // Add in the defm name. If the defm prefix is empty, give each
+ // instantiated def a unique name. Otherwise, if "#NAME#" exists in the
+ // name, substitute the prefix for #NAME#. Otherwise, use the defm name
+ // as a prefix.
+
+ bool IsAnonymous = false;
+ if (DefmPrefix == 0) {
+ DefmPrefix = StringInit::get(GetNewAnonymousName());
+ IsAnonymous = true;
+ }
+
+ Init *DefName = DefProto->getNameInit();
+
+ StringInit *DefNameString = dyn_cast<StringInit>(DefName);
+
+ if (DefNameString != 0) {
+ // We have a fully expanded string so there are no operators to
+ // resolve. We should concatenate the given prefix and name.
+ DefName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ UnOpInit::get(UnOpInit::CAST, DefmPrefix,
+ StringRecTy::get())->Fold(DefProto, &MC),
+ DefName, StringRecTy::get())->Fold(DefProto, &MC);
+ }
+
+ // Make a trail of SMLocs from the multiclass instantiations.
+ SmallVector<SMLoc, 4> Locs(1, DefmPrefixRange.Start);
+ Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end());
+ Record *CurRec = new Record(DefName, Locs, Records, IsAnonymous);
+
+ SubClassReference Ref;
+ Ref.RefRange = DefmPrefixRange;
+ Ref.Rec = DefProto;
+ AddSubClass(CurRec, Ref);
+
+ // Set the value for NAME. We don't resolve references to it 'til later,
+ // though, so that uses in nested multiclass names don't get
+ // confused.
+ if (SetValue(CurRec, Ref.RefRange.Start, "NAME", std::vector<unsigned>(),
+ DefmPrefix)) {
+ Error(DefmPrefixRange.Start, "Could not resolve "
+ + CurRec->getNameInitAsString() + ":NAME to '"
+ + DefmPrefix->getAsUnquotedString() + "'");
+ return 0;
+ }
+
+ // If the DefNameString didn't resolve, we probably have a reference to
+ // NAME and need to replace it. We need to do at least this much greedily,
+ // otherwise nested multiclasses will end up with incorrect NAME expansions.
+ if (DefNameString == 0) {
+ RecordVal *DefNameRV = CurRec->getValue("NAME");
+ CurRec->resolveReferencesTo(DefNameRV);
+ }
+
+ if (!CurMultiClass) {
+ // Now that we're at the top level, resolve all NAME references
+ // in the resultant defs that weren't in the def names themselves.
+ RecordVal *DefNameRV = CurRec->getValue("NAME");
+ CurRec->resolveReferencesTo(DefNameRV);
+
+ // Now that NAME references are resolved and we're at the top level of
+ // any multiclass expansions, add the record to the RecordKeeper. If we are
+ // currently in a multiclass, it means this defm appears inside a
+ // multiclass and its name won't be fully resolvable until we see
+ // the top-level defm. Therefore, we don't add this to the
+ // RecordKeeper at this point. If we did we could get duplicate
+ // defs as more than one probably refers to NAME or some other
+ // common internal placeholder.
+
+ // Ensure redefinition doesn't happen.
+ if (Records.getDef(CurRec->getNameInitAsString())) {
+ Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() +
+ "' already defined, instantiating defm with subdef '" +
+ DefProto->getNameInitAsString() + "'");
+ return 0;
+ }
+
+ Records.addDef(CurRec);
+ }
+
+ return CurRec;
+}
+
+bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
+ Record *CurRec,
+ SMLoc DefmPrefixLoc,
+ SMLoc SubClassLoc,
+ const std::vector<Init *> &TArgs,
+ std::vector<Init *> &TemplateVals,
+ bool DeleteArgs) {
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ // Check if a value is specified for this temp-arg.
+ if (i < TemplateVals.size()) {
+ // Set it now.
+ if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
+ TemplateVals[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+ if (DeleteArgs)
+ // Now remove it.
+ CurRec->removeValue(TArgs[i]);
+
+ } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+ return Error(SubClassLoc, "value not specified for template argument #"+
+ utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+ + ") of multiclassclass '" + MC.Rec.getNameInitAsString()
+ + "'");
+ }
+ }
+ return false;
+}
+
+bool TGParser::ResolveMulticlassDef(MultiClass &MC,
+ Record *CurRec,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc) {
+ // If the mdef is inside a 'let' expression, add to each def.
+ if (ApplyLetStack(CurRec))
+ return Error(DefmPrefixLoc, "when instantiating this defm");
+
+ // Don't create a top level definition for defm inside multiclasses,
+ // instead, only update the prototypes and bind the template args
+ // with the new created definition.
+ if (!CurMultiClass)
+ return false;
+ for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
+ i != e; ++i)
+ if (CurMultiClass->DefPrototypes[i]->getNameInit()
+ == CurRec->getNameInit())
+ return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
+ "' already defined in this multiclass!");
+ CurMultiClass->DefPrototypes.push_back(CurRec);
+
+ // Copy the template arguments for the multiclass into the new def.
+ const std::vector<Init *> &TA =
+ CurMultiClass->Rec.getTemplateArgs();
+
+ for (unsigned i = 0, e = TA.size(); i != e; ++i) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
+ assert(RV && "Template arg doesn't exist?");
+ CurRec->addValue(*RV);
+ }
+
+ return false;
+}
+
+/// ParseDefm - Parse the instantiation of a multiclass.
+///
+/// DefMInst ::= DEFM ID ':' DefmSubClassRef ';'
+///
+bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
+ SMLoc DefmLoc = Lex.getLoc();
+ Init *DefmPrefix = 0;
+
+ if (Lex.Lex() == tgtok::Id) { // eat the defm.
+ DefmPrefix = ParseObjectName(CurMultiClass);
+ }
+
+ SMLoc DefmPrefixEndLoc = Lex.getLoc();
+ if (Lex.getCode() != tgtok::colon)
+ return TokError("expected ':' after defm identifier");
+
+ // Keep track of the new generated record definitions.
+ std::vector<Record*> NewRecDefs;
+
+ // This record also inherits from a regular class (non-multiclass)?
+ bool InheritFromClass = false;
+
+ // eat the colon.
+ Lex.Lex();
+
+ SMLoc SubClassLoc = Lex.getLoc();
+ SubClassReference Ref = ParseSubClassReference(0, true);
+
+ while (1) {
+ if (Ref.Rec == 0) return true;
+
+ // To instantiate a multiclass, we need to first get the multiclass, then
+ // instantiate each def contained in the multiclass with the SubClassRef
+ // template parameters.
+ MultiClass *MC = MultiClasses[Ref.Rec->getName()];
+ assert(MC && "Didn't lookup multiclass correctly?");
+ std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
+
+ // Verify that the correct number of template arguments were specified.
+ const std::vector<Init *> &TArgs = MC->Rec.getTemplateArgs();
+ if (TArgs.size() < TemplateVals.size())
+ return Error(SubClassLoc,
+ "more template args specified than multiclass expects");
+
+ // Loop over all the def's in the multiclass, instantiating each one.
+ for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
+ Record *DefProto = MC->DefPrototypes[i];
+
+ Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix,
+ SMRange(DefmLoc,
+ DefmPrefixEndLoc));
+ if (!CurRec)
+ return true;
+
+ if (ResolveMulticlassDefArgs(*MC, CurRec, DefmLoc, SubClassLoc,
+ TArgs, TemplateVals, true/*Delete args*/))
+ return Error(SubClassLoc, "could not instantiate def");
+
+ if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmLoc))
+ return Error(SubClassLoc, "could not instantiate def");
+
+ NewRecDefs.push_back(CurRec);
+ }
+
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+
+ SubClassLoc = Lex.getLoc();
+
+ // A defm can inherit from regular classes (non-multiclass) as
+ // long as they come in the end of the inheritance list.
+ InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0);
+
+ if (InheritFromClass)
+ break;
+
+ Ref = ParseSubClassReference(0, true);
+ }
+
+ if (InheritFromClass) {
+ // Process all the classes to inherit as if they were part of a
+ // regular 'def' and inherit all record values.
+ SubClassReference SubClass = ParseSubClassReference(0, false);
+ while (1) {
+ // Check for error.
+ if (SubClass.Rec == 0) return true;
+
+ // Get the expanded definition prototypes and teach them about
+ // the record values the current class to inherit has
+ for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) {
+ Record *CurRec = NewRecDefs[i];
+
+ // Add it.
+ if (AddSubClass(CurRec, SubClass))
+ return true;
+
+ if (ApplyLetStack(CurRec))
+ return true;
+ }
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubClass = ParseSubClassReference(0, false);
+ }
+ }
+
+ if (!CurMultiClass)
+ for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+ // See Record::setName(). This resolve step will see any new
+ // name for the def that might have been created when resolving
+ // inheritance, values and arguments above.
+ NewRecDefs[i]->resolveReferences();
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' at end of defm");
+ Lex.Lex();
+
+ return false;
+}
+
+/// ParseObject
+/// Object ::= ClassInst
+/// Object ::= DefInst
+/// Object ::= MultiClassInst
+/// Object ::= DefMInst
+/// Object ::= LETCommand '{' ObjectList '}'
+/// Object ::= LETCommand Object
+bool TGParser::ParseObject(MultiClass *MC) {
+ switch (Lex.getCode()) {
+ default:
+ return TokError("Expected class, def, defm, multiclass or let definition");
+ case tgtok::Let: return ParseTopLevelLet(MC);
+ case tgtok::Def: return ParseDef(MC);
+ case tgtok::Foreach: return ParseForeach(MC);
+ case tgtok::Defm: return ParseDefm(MC);
+ case tgtok::Class: return ParseClass();
+ case tgtok::MultiClass: return ParseMultiClass();
+ }
+}
+
+/// ParseObjectList
+/// ObjectList :== Object*
+bool TGParser::ParseObjectList(MultiClass *MC) {
+ while (isObjectStart(Lex.getCode())) {
+ if (ParseObject(MC))
+ return true;
+ }
+ return false;
+}
+
+bool TGParser::ParseFile() {
+ Lex.Lex(); // Prime the lexer.
+ if (ParseObjectList()) return true;
+
+ // If we have unread input at the end of the file, report it.
+ if (Lex.getCode() == tgtok::Eof)
+ return false;
+
+ return TokError("Unexpected input at top level");
+}
+
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
new file mode 100644
index 000000000000..044e3a02ba4b
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -0,0 +1,191 @@
+//===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Parser for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGPARSER_H
+#define TGPARSER_H
+
+#include "TGLexer.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <map>
+
+namespace llvm {
+ class Record;
+ class RecordVal;
+ class RecordKeeper;
+ class RecTy;
+ class Init;
+ struct MultiClass;
+ struct SubClassReference;
+ struct SubMultiClassReference;
+
+ struct LetRecord {
+ std::string Name;
+ std::vector<unsigned> Bits;
+ Init *Value;
+ SMLoc Loc;
+ LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V,
+ SMLoc L)
+ : Name(N), Bits(B), Value(V), Loc(L) {
+ }
+ };
+
+ /// ForeachLoop - Record the iteration state associated with a for loop.
+ /// This is used to instantiate items in the loop body.
+ struct ForeachLoop {
+ VarInit *IterVar;
+ ListInit *ListValue;
+
+ ForeachLoop(VarInit *IVar, ListInit *LValue)
+ : IterVar(IVar), ListValue(LValue) {}
+ };
+
+class TGParser {
+ TGLexer Lex;
+ std::vector<std::vector<LetRecord> > LetStack;
+ std::map<std::string, MultiClass*> MultiClasses;
+
+ /// Loops - Keep track of any foreach loops we are within.
+ ///
+ typedef std::vector<ForeachLoop> LoopVector;
+ LoopVector Loops;
+
+ /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the
+ /// current value.
+ MultiClass *CurMultiClass;
+
+ // Record tracker
+ RecordKeeper &Records;
+
+ // A "named boolean" indicating how to parse identifiers. Usually
+ // identifiers map to some existing object but in special cases
+ // (e.g. parsing def names) no such object exists yet because we are
+ // in the middle of creating in. For those situations, allow the
+ // parser to ignore missing object errors.
+ enum IDParseMode {
+ ParseValueMode, // We are parsing a value we expect to look up.
+ ParseNameMode, // We are parsing a name of an object that does not yet
+ // exist.
+ ParseForeachMode // We are parsing a foreach init.
+ };
+
+public:
+ TGParser(SourceMgr &SrcMgr, RecordKeeper &records) :
+ Lex(SrcMgr), CurMultiClass(0), Records(records) {}
+
+ /// ParseFile - Main entrypoint for parsing a tblgen file. These parser
+ /// routines return true on error, or false on success.
+ bool ParseFile();
+
+ bool Error(SMLoc L, const Twine &Msg) const {
+ PrintError(L, Msg);
+ return true;
+ }
+ bool TokError(const Twine &Msg) const {
+ return Error(Lex.getLoc(), Msg);
+ }
+ const TGLexer::DependenciesMapTy &getDependencies() const {
+ return Lex.getDependencies();
+ }
+
+private: // Semantic analysis methods.
+ bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
+ bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
+ const std::vector<unsigned> &BitList, Init *V);
+ bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
+ const std::vector<unsigned> &BitList, Init *V) {
+ return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
+ }
+ bool AddSubClass(Record *Rec, SubClassReference &SubClass);
+ bool AddSubMultiClass(MultiClass *CurMC,
+ SubMultiClassReference &SubMultiClass);
+
+ // IterRecord: Map an iterator name to a value.
+ struct IterRecord {
+ VarInit *IterVar;
+ Init *IterValue;
+ IterRecord(VarInit *Var, Init *Val) : IterVar(Var), IterValue(Val) {}
+ };
+
+ // IterSet: The set of all iterator values at some point in the
+ // iteration space.
+ typedef std::vector<IterRecord> IterSet;
+
+ bool ProcessForeachDefs(Record *CurRec, SMLoc Loc);
+ bool ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals);
+
+private: // Parser methods.
+ bool ParseObjectList(MultiClass *MC = 0);
+ bool ParseObject(MultiClass *MC);
+ bool ParseClass();
+ bool ParseMultiClass();
+ Record *InstantiateMulticlassDef(MultiClass &MC,
+ Record *DefProto,
+ Init *DefmPrefix,
+ SMRange DefmPrefixRange);
+ bool ResolveMulticlassDefArgs(MultiClass &MC,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc,
+ SMLoc SubClassLoc,
+ const std::vector<Init *> &TArgs,
+ std::vector<Init *> &TemplateVals,
+ bool DeleteArgs);
+ bool ResolveMulticlassDef(MultiClass &MC,
+ Record *CurRec,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc);
+ bool ParseDefm(MultiClass *CurMultiClass);
+ bool ParseDef(MultiClass *CurMultiClass);
+ bool ParseForeach(MultiClass *CurMultiClass);
+ bool ParseTopLevelLet(MultiClass *CurMultiClass);
+ std::vector<LetRecord> ParseLetList();
+
+ bool ParseObjectBody(Record *CurRec);
+ bool ParseBody(Record *CurRec);
+ bool ParseBodyItem(Record *CurRec);
+
+ bool ParseTemplateArgList(Record *CurRec);
+ Init *ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+ VarInit *ParseForeachDeclaration(ListInit *&ForeachListValue);
+
+ SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
+ SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
+
+ Init *ParseIDValue(Record *CurRec, IDParseMode Mode = ParseValueMode);
+ Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc,
+ IDParseMode Mode = ParseValueMode);
+ Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0,
+ IDParseMode Mode = ParseValueMode);
+ Init *ParseValue(Record *CurRec, RecTy *ItemType = 0,
+ IDParseMode Mode = ParseValueMode);
+ std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0,
+ RecTy *EltTy = 0);
+ std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
+ bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
+ bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
+ std::vector<unsigned> ParseRangeList();
+ bool ParseRangePiece(std::vector<unsigned> &Ranges);
+ RecTy *ParseType();
+ Init *ParseOperation(Record *CurRec);
+ RecTy *ParseOperatorType();
+ Init *ParseObjectName(MultiClass *CurMultiClass);
+ Record *ParseClassID();
+ MultiClass *ParseMultiClassID();
+ bool ApplyLetStack(Record *CurRec);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/TableGen/TableGenBackend.cpp b/contrib/llvm/lib/TableGen/TableGenBackend.cpp
new file mode 100644
index 000000000000..79d567753a6c
--- /dev/null
+++ b/contrib/llvm/lib/TableGen/TableGenBackend.cpp
@@ -0,0 +1,56 @@
+//===- TableGenBackend.cpp - Utilities for TableGen Backends ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides useful services for TableGen backends...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+
+using namespace llvm;
+
+const size_t MAX_LINE_LEN = 80U;
+
+static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
+ StringRef Suffix) {
+ size_t Pos = (size_t)OS.tell();
+ assert((MAX_LINE_LEN - Prefix.str().size() - Suffix.size() > 0) &&
+ "header line exceeds max limit");
+ OS << Prefix;
+ const size_t e = MAX_LINE_LEN - Suffix.size();
+ for (size_t i = (size_t)OS.tell() - Pos; i < e; ++i)
+ OS << Fill;
+ OS << Suffix << '\n';
+}
+
+void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) {
+ printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\");
+ printLine(OS, "|*", ' ', "*|");
+ size_t Pos = 0U;
+ size_t PosE;
+ StringRef Prefix("|*");
+ StringRef Suffix(" *|");
+ do{
+ size_t PSLen = Suffix.size() + Prefix.size();
+ PosE = Pos + ((MAX_LINE_LEN > (Desc.size() - PSLen)) ?
+ Desc.size() :
+ MAX_LINE_LEN - PSLen);
+ printLine(OS, Prefix + Desc.slice(Pos, PosE), ' ', Suffix);
+ Pos = PosE;
+ } while(Pos < Desc.size());
+ printLine(OS, Prefix, ' ', Suffix);
+ printLine(OS, Prefix + " Automatically generated file, do not edit!", ' ',
+ Suffix);
+ printLine(OS, Prefix, ' ', Suffix);
+ printLine(OS, "\\*===", '-', "===*/");
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
new file mode 100644
index 000000000000..4de4faa58182
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+FunctionPass *createAArch64BranchFixupPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
new file mode 100644
index 000000000000..e17052b4a565
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -0,0 +1,70 @@
+//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+ let InstructionSet = AArch64InstrInfo;
+ let AssemblyWriters = [A64InstPrinter];
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
new file mode 100644
index 000000000000..47ebb826e0d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+ // expected to be created.
+ assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ if (!MO.isReg())
+ return true;
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+ if (MO.isImm() && MO.getImm() == 0) {
+ O << Prefix << "zr";
+ return false;
+ } else if (MO.isReg()) {
+ if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+ O << (Prefix == 'x' ? "sp" : "wsp");
+ return false;
+ }
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O) {
+ StringRef Name;
+ StringRef Modifier;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for symbolic address constraint");
+ case MachineOperand::MO_GlobalAddress:
+ Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+ // Global variables may be accessed either via a GOT or in various fun and
+ // interesting TLS-model specific ways. Set the prefix modifier as
+ // appropriate here.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (GV->isThreadLocal()) {
+ switch (TM.getTLSModel(GV)) {
+ case TLSModel::GeneralDynamic:
+ Modifier = "tlsdesc";
+ break;
+ case TLSModel::LocalDynamic:
+ Modifier = "dtprel";
+ break;
+ case TLSModel::InitialExec:
+ Modifier = "gottprel";
+ break;
+ case TLSModel::LocalExec:
+ Modifier = "tprel";
+ break;
+ }
+ } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ Modifier = "got";
+ }
+ }
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Name = MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Name = GetCPISymbol(MO.getIndex())->getName();
+ break;
+ }
+
+ // Some instructions (notably ADRP) don't take the # prefix for
+ // immediates. Only print it if asked to.
+ if (PrintImmediatePrefix)
+ O << '#';
+
+ // Only need the joining "_" if both the prefix and the suffix are
+ // non-null. This little block simply takes care of the four possibly
+ // combinations involved there.
+ if (Modifier == "" && Suffix == "")
+ O << Name;
+ else if (Modifier == "" && Suffix != "")
+ O << ":" << Suffix << ':' << Name;
+ else if (Modifier != "" && Suffix == "")
+ O << ":" << Modifier << ':' << Name;
+ else
+ O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+ return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!ExtraCode || !ExtraCode[0]) {
+ // There's actually no operand modifier, which leads to a slightly eclectic
+ // set of behaviour which we have to handle here.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for inline assembly");
+ case MachineOperand::MO_Register:
+ // GCC prints the unmodified operand of a 'w' constraint as the vector
+ // register. Technically, we could allocate the argument as a VPR128, but
+ // that leads to extremely dodgy copies being generated to get the data
+ // there.
+ if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+ O << AArch64InstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << '#' << MO.getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+ O << "#0.0";
+ break;
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return printSymbolicAddress(MO, false, "", O);
+ }
+ return false;
+ }
+
+ // We have a real modifier to handle.
+ switch(ExtraCode[0]) {
+ default:
+ // See if this is a generic operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'w':
+ // Output 32-bit general register operand, constant zero as wzr, or stack
+ // pointer as wsp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR32RegClass, O);
+ case 'x':
+ // Output 64-bit general register operand, constant zero as xzr, or stack
+ // pointer as sp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR64RegClass, O);
+ case 'H':
+ // Output higher numbered of a 64-bit general register pair
+ case 'Q':
+ // Output least significant register of a 64-bit general register pair
+ case 'R':
+ // Output most significant register of a 64-bit general register pair
+
+ // FIXME note: these three operand modifiers will require, to some extent,
+ // adding a paired GPR64 register class. Initial investigation suggests that
+ // assertions are hit unless it has a type and is made legal for that type
+ // in ISelLowering. After that step is made, the number of modifications
+ // needed explodes (operation legality, calling conventions, stores, reg
+ // copies ...).
+ llvm_unreachable("FIXME: Unimplemented register pairs");
+ case 'b':
+ // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR8RegClass, O);
+ case 'h':
+ // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR16RegClass, O);
+ case 's':
+ // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR32RegClass, O);
+ case 'd':
+ // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR64RegClass, O);
+ case 'q':
+ // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR128RegClass, O);
+ case 'A':
+ // Output symbolic address with appropriate relocation modifier (also
+ // suitable for ADRP).
+ return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+ case 'L':
+ // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+ // modifier.
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+ case 'G':
+ // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+ // modifier (currently only for TLS local exec).
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+ }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Currently both the memory constraints (m and Q) behave the same and amount
+ // to the address as a single register. In future, we may allow "m" to provide
+ // both a base and an offset.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline assembly memory operand");
+ O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+ return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+ OS << '+' << MI->getOperand(1).getImm();
+ OS << ']';
+ OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ switch (MI->getOpcode()) {
+ case AArch64::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(0), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+ RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h
new file mode 100644
index 000000000000..af0c9fed066f
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h
@@ -0,0 +1,80 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ public:
+ explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ }
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ MCOperand lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const;
+
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitEndOfAsmFile(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+ /// reference, print out the appropriate asm string to represent it. If
+ /// appropriate, a relocation-specifier will be produced, composed of a
+ /// general class derived from the MO parameter and an instruction-specific
+ /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+ /// given.
+ bool printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ virtual const char *getPassName() const {
+ return "AArch64 Assembly Printer";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp
new file mode 100644
index 000000000000..71233ba5c3dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp
@@ -0,0 +1,600 @@
+//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that fixes AArch64 branches which have ended up out
+// of range for their immediate operands.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-branch-fixup"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+
+/// Return the worst case padding that could result from unknown offset bits.
+/// This does not include alignment padding caused by known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+namespace {
+ /// Due to limited PC-relative displacements, conditional branches to distant
+ /// blocks may need converting into an unconditional equivalent. For example:
+ /// tbz w1, #0, far_away
+ /// becomes
+ /// tbnz w1, #0, skip
+ /// b far_away
+ /// skip:
+ class AArch64BranchFixup : public MachineFunctionPass {
+ /// Information about the offset and size of a single basic block.
+ struct BasicBlockInfo {
+ /// Distance from the beginning of the function to the beginning of this
+ /// basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size of the basic block in bytes. If the block contains inline
+ /// assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// The number of low bits in Offset that are known to be exact. The
+ /// remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// When non-zero, the block contains instructions (inline asm) of unknown
+ /// size. The real size may be smaller than Size bytes by a multiple of 1
+ /// << Unalign.
+ uint8_t Unalign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ if (!LogAlign)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LogAlign, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(LogAlign, internalKnownBits());
+ }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// One per immediate branch, keeping the machine instruction pointer,
+ /// conditional or unconditional, the max displacement, and (if IsCond is
+ /// true) the corresponding inverted branch opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned OffsetBits : 31;
+ bool IsCond : 1;
+ ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
+ : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
+ };
+
+ /// Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ MachineFunction *MF;
+ const AArch64InstrInfo *TII;
+ public:
+ static char ID;
+ AArch64BranchFixup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AArch64 branch fixup pass";
+ }
+
+ private:
+ void initializeFunctionInfo();
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned OffsetBits);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify();
+ };
+ char AArch64BranchFixup::ID = 0;
+}
+
+/// check BBOffsets
+void AArch64BranchFixup::verify() {
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ unsigned MBBId = MBB->getNumber();
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void AArch64BranchFixup::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
+
+/// Returns an instance of the branch fixup pass.
+FunctionPass *llvm::createAArch64BranchFixupPass() {
+ return new AArch64BranchFixup();
+}
+
+bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ DEBUG(dbgs() << "***** AArch64BranchFixup ******");
+ TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block and location of each immediate branch.
+ initializeFunctionInfo();
+
+ // Iteratively fix up branches until there is no change.
+ unsigned NoBRIters = 0;
+ bool MadeChange = false;
+ while (true) {
+ DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ ImmBranches.clear();
+
+ return MadeChange;
+}
+
+/// Return true if the specified basic block can fallthrough into the block
+/// immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// Do the initial scan of the function, building up information about the sizes
+/// of each block, and each immediate branch.
+void AArch64BranchFixup::initializeFunctionInfo() {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool IsCond = false;
+
+ // The offsets encoded in instructions here scale by the instruction
+ // size (4 bytes), effectively increasing their range by 2 bits.
+ unsigned Bits = 0;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case AArch64::TBZxii:
+ case AArch64::TBZwii:
+ case AArch64::TBNZxii:
+ case AArch64::TBNZwii:
+ IsCond = true;
+ Bits = 14 + 2;
+ break;
+ case AArch64::Bcc:
+ case AArch64::CBZx:
+ case AArch64::CBZw:
+ case AArch64::CBNZx:
+ case AArch64::CBNZw:
+ IsCond = true;
+ Bits = 19 + 2;
+ break;
+ case AArch64::Bimm:
+ Bits = 26 + 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
+ }
+ }
+ }
+}
+
+/// Compute the size and some alignment information for MBB. This function
+/// updates BBInfo directly.
+void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->getInstSizeInBytes(*I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = 2;
+ }
+}
+
+/// Return the current offset of the specified machine instruction from the
+/// start of the function. This offset changes as stuff is moved around inside
+/// the function.
+unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+ return Offset;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+/// Returns true if the distance between specific MI and specific BB can fit in
+/// MI's displacement field.
+bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned OffsetBits) {
+ int64_t BrOffset = getOffsetOf(MI);
+ int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " bits available=" << OffsetBits
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ return isIntN(OffsetBits, DestOffset - BrOffset);
+}
+
+/// Fix up an immediate branch whose destination is too far away to fit in its
+/// displacement field.
+bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isMBB()) {
+ DestBB = MI->getOperand(i).getMBB();
+ break;
+ }
+ }
+ assert(DestBB && "Branch with no destination BB?");
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.OffsetBits))
+ return false;
+
+ assert(Br.IsCond && "Only conditional branches should need fixup");
+ return fixupConditionalBr(Br);
+}
+
+/// Fix up a conditional branch whose destination is too far away to fit in its
+/// displacement field. It is converted to an inverse conditional branch + an
+/// unconditional branch to the destination.
+bool
+AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned CondBrMBBOperand = 0;
+
+ // The general idea is to add an unconditional branch to the destination and
+ // invert the conditional branch to jump over it. Complications occur around
+ // fallthrough and unreachable ends to the block.
+ // b.lt L1
+ // =>
+ // b.ge L2
+ // b L1
+ // L2:
+
+ // First we invert the conditional branch, by creating a replacement if
+ // necessary. This if statement contains all the special handling of different
+ // branch types.
+ if (MI->getOpcode() == AArch64::Bcc) {
+ // The basic block is operand number 1 for Bcc
+ CondBrMBBOperand = 1;
+
+ A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
+ CC = A64InvertCondCode(CC);
+ MI->getOperand(0).setImm(CC);
+ } else {
+ MachineInstrBuilder InvertedMI;
+ int InvertedOpcode;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown branch type");
+ case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
+ case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
+ case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
+ case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
+ case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
+ case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
+ case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
+ case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
+ }
+
+ InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
+ for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
+ InvertedMI.addOperand(MI->getOperand(i));
+ if (MI->getOperand(i).isMBB())
+ CondBrMBBOperand = i;
+ }
+
+ MI->eraseFromParent();
+ MI = Br.MI = InvertedMI;
+ }
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through
+ // block. Otherwise, split the MBB before the next instruction.
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == AArch64::Bimm) {
+ // Last MI in the BB is an unconditional branch. We can swap destinations:
+ // b.eq L1 (temporarily b.ne L1 after first change)
+ // b L2
+ // =>
+ // b.ne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ MachineBasicBlock::iterator MBBI = MI; ++MBBI;
+ splitBlockBeforeInstr(MBBI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->getInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+
+ // After splitting and removing the unconditional branch from the original BB,
+ // the structure is now:
+ // oldbb:
+ // [things]
+ // b.invertedCC L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ //
+ // We now have to change the conditional branch to point to splitbb and add an
+ // unconditional branch after it to L1, giving the final structure:
+ // oldbb:
+ // [things]
+ // b.invertedCC splitbb
+ // b L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#"
+ << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new unconditional branch and fixup the destination of the
+ // conditional one. Also update the ImmBranch as well as adding a new entry
+ // for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
+ .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
+ MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
+
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+
+ // 26 bits written down in Bimm, specifying a multiple of 4.
+ unsigned OffsetBits = 26 + 2;
+ ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
+
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td
new file mode 100644
index 000000000000..b880d8373deb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td
@@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+// 1. Expose *some* way to express the concepts required to implement the
+// generic PCS from a front-end.
+// 2. Provide a sane ABI for pure LLVM.
+// 3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+// * Integer, float and vector arguments of all sizes which end up in
+// registers are passed and returned via the natural LLVM type.
+// * Structure arguments with size <= 16 bytes are passed and returned in
+// registers as similar integer or composite types. For example:
+// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+// * HFAs in registers follow rules similar to small structs: appropriate
+// composite types.
+// * Structure arguments with size > 16 bytes are passed via a pointer,
+// handled completely by the front-end.
+// * Structure return values > 16 bytes via an sret pointer argument.
+// * Other stack-based arguments (not large structs) are passed using byval
+// pointers. Padding arguments are added beforehand to guarantee a large
+// struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+ CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+ // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+ // concerns. However, this rule will be used by C/C++ frontends to implement
+ // structure return.
+ CCIfSRet<CCAssignToReg<[X8]>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Canonicalise the various types that live in different floating-point
+ // registers. This makes sense because the PCS does not distinguish Short
+ // Vectors and Floating-point types.
+ CCIfType<[v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCBitConvertToType<f128>>,
+
+ // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+ // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+ // argument is allocated to the least significant bits of register
+ // v[NSRN]. The NSRN is incremented by one. The argument has now been
+ // allocated."
+ CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+ // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+ // argument is allocated to SIMD and Floating-point registers (with one
+ // register per element of the HFA). The NSRN is incremented by the number of
+ // registers used. The argument has now been allocated."
+ //
+ // N.b. As above, this rule is the responsibility of the front-end.
+
+ // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+ // the argument is rounded up to the nearest multiple of 8 bytes."
+ //
+ // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+ // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the Argument's type."
+ //
+ // It is expected that these will be satisfied by adding dummy arguments to
+ // the prototype.
+
+ // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+ // type then the size of the argument is set to 8 bytes. The effect is as if
+ // the argument had been copied to the least significant bits of a 64-bit
+ // register and the remaining bits filled with unspecified values."
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+ // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+ // precision Floating-point or Short Vector Type, then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+ // argument. The argument has now been allocated."
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+ // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+ // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+ // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+ // one. The argument has now been allocated."
+
+ // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+ // represented as two i64s, the first one being split. If we delayed this
+ // operation C.8 would never be reached.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+ // Note: the promotion also implements C.14.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // And now the real implementation of C.7
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+ // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+ // up to the next even number."
+ //
+ // "C.9: If the argument is an Integral Type, the size of the argument is
+ // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+ // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+ // memory representation of the argument. The NGRN is incremented by two. The
+ // argument has now been allocated."
+ //
+ // Subtlety here: what if alignment is 16 but it is not an integral type? All
+ // floating-point types have been allocated already, which leaves composite
+ // types: this is why a front-end may need to produce i128 for a struct <= 16
+ // bytes.
+
+ // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+ // of the argument is not more than 8 minus NGRN, then the argument is copied
+ // into consecutive general-purpose registers, starting at x[NGRN]. The
+ // argument is passed as though it had been loaded into the registers from a
+ // double-word aligned address with an appropriate sequence of LDR
+ // instructions loading consecutive registers from memory (the contents of any
+ // unused parts of the registers are unspecified by this standard). The NGRN
+ // is incremented by the number of registers used. The argument has now been
+ // allocated."
+ //
+ // Another one that's the responsibility of the front-end (sigh).
+
+ // PCS: "C.11: The NGRN is set to 8."
+ CCCustom<"CC_AArch64NoMoreRegs">,
+
+ // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the argument's type."
+ //
+ // PCS: "C.13: If the argument is a composite type then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is by the size of the
+ // argument. The argument has now been allocated."
+ //
+ // Note that the effect of this corresponds to a memcpy rather than register
+ // stores so that the struct ends up correctly addressable at the adjusted
+ // NSAA.
+
+ // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+ // of the argument is set to 8 bytes. The effect is as if the argument was
+ // copied to the least significant bits of a 64-bit register and the remaining
+ // bits filled with unspecified values."
+ //
+ // Integer types were widened above. Floating-point and composite types have
+ // already been allocated completely. Nothing to do.
+
+ // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+ // is incremented by the size of the argument. The argument has now been
+ // allocated."
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+ (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+ (sequence "Q%u", 31, 0))>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
new file mode 100644
index 000000000000..dc41f2f60525
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -0,0 +1,633 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+ uint64_t &Initial,
+ uint64_t &Residual) const {
+ // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+ // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+ // 0x1f8, but stack adjustment should always be a multiple of 16.
+ if (Total <= 0x1f0) {
+ Initial = Total;
+ Residual = 0;
+ } else {
+ Initial = 0x1f0;
+ Residual = Total - Initial;
+ }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool NeedsFrameMoves = MMI.hasDebugInfo()
+ || MF.getFunction()->needsUnwindTableEntry();
+
+ uint64_t NumInitialBytes, NumResidualBytes;
+
+ // Currently we expect the stack to be laid out by
+ // sub sp, sp, #initial
+ // stp x29, x30, [sp, #offset]
+ // ...
+ // str xxx, [sp, #offset]
+ // sub sp, sp, #rest (possibly via extra instructions).
+ if (MFI->getCalleeSavedInfo().size()) {
+ // If there are callee-saved registers, we want to store them efficiently as
+ // a block, and virtual base assignment happens too early to do it for us so
+ // we adjust the stack in two phases: first just for callee-saved fiddling,
+ // then to allocate the rest of the frame.
+ splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+ } else {
+ // If there aren't any callee-saved registers, two-phase adjustment is
+ // inefficient. It's more efficient to adjust with NumInitialBytes too
+ // because when we're in a "callee pops argument space" situation, that pop
+ // must be tacked onto Initial for correctness.
+ NumInitialBytes = MFI->getStackSize();
+ NumResidualBytes = 0;
+ }
+
+ // Tell everyone else how much adjustment we're expecting them to use. In
+ // particular if an adjustment is required for a tail call the epilogue could
+ // have a different view of things.
+ FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+ MachineInstr::FrameSetup);
+
+ if (NeedsFrameMoves && NumInitialBytes) {
+ // We emit this update even if the CFA is set from a frame pointer later so
+ // that the CFA is valid in the interim.
+ MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(SPLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumInitialBytes);
+ Moves.push_back(MachineMove(SPLabel, Dst, Src));
+ }
+
+ // Otherwise we need to set the frame pointer and/or add a second stack
+ // adjustment.
+
+ bool FPNeedsSetting = hasFP(MF);
+ for (; MBBI != MBB.end(); ++MBBI) {
+ // Note that this search makes strong assumptions about the operation used
+ // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+ // change in future, but until then there's no point in implementing
+ // untestable more generic cases.
+ if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+ && MBBI->getOperand(0).getReg() == AArch64::X29) {
+ int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+ FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+ ++MBBI;
+ emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+ AArch64::X29,
+ NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+ MachineInstr::FrameSetup);
+
+ // The offset adjustment used when emitting debugging locations relative
+ // to whatever frame base is set. AArch64 uses the default frame base (FP
+ // or SP) and this adjusts the calculations to be correct.
+ MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+ - MFI->getStackSize());
+
+ if (NeedsFrameMoves) {
+ MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(FPLabel);
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+ Moves.push_back(MachineMove(FPLabel, Dst, Src));
+ }
+
+ FPNeedsSetting = false;
+ }
+
+ if (!MBBI->getFlag(MachineInstr::FrameSetup))
+ break;
+ }
+
+ assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+ MachineInstr::FrameSetup);
+
+ // Now we emit the rest of the frame setup information, if necessary: we've
+ // already noted the FP and initial SP moves so we're left with the prologue's
+ // final SP update and callee-saved register locations.
+ if (!NeedsFrameMoves)
+ return;
+
+ // Reuse the label if appropriate, so create it in this outer scope.
+ MCSymbol *CSLabel = 0;
+
+ // The rest of the stack adjustment
+ if (!hasFP(MF) && NumResidualBytes) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+
+ // And any callee-saved registers (it's fine to leave them to the end here,
+ // because the old values are still valid at this point.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.size()) {
+ if (!CSLabel) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+ }
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ MachineLocation Dst(MachineLocation::VirtualFP,
+ MFI->getObjectOffset(I->getFrameIdx()));
+ MachineLocation Src(I->getReg());
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+ }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ // Initial and residual are named for consitency with the prologue. Note that
+ // in the epilogue, the residual adjustment is executed first.
+ uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+ uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+ uint64_t ArgumentPopSize = 0;
+ if (RetOpcode == AArch64::TC_RETURNdi ||
+ RetOpcode == AArch64::TC_RETURNxi) {
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ MachineInstrBuilder MIB;
+ if (RetOpcode == AArch64::TC_RETURNdi) {
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+ if (JumpTarget.isGlobal()) {
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else {
+ assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else {
+ assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+ && "Unexpected tail call");
+
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+ MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ // Add the extra operands onto the new tail call instruction even though
+ // they're not used directly (so that liveness is tracked properly etc).
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ MIB->addOperand(MBBI->getOperand(i));
+
+
+ // Delete the pseudo instruction TC_RETURN.
+ MachineInstr *NewMI = prior(MBBI);
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+ }
+
+ assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+ && "refusing to adjust stack by misaligned amt");
+
+ // We may need to address callee-saved registers differently, so find out the
+ // bound on the frame indices.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The "residual" stack update comes first from this direction and guarantees
+ // that SP is NumInitialBytes below its value on function entry, either by a
+ // direct update or restoring it from the frame pointer.
+ if (NumInitialBytes + ArgumentPopSize != 0) {
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+ NumInitialBytes + ArgumentPopSize);
+ --MBBI;
+ }
+
+
+ // MBBI now points to the instruction just past the last callee-saved
+ // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+ // otherwise).
+
+ // Now we need to find out where to put the bulk of the stack adjustment
+ MachineBasicBlock::iterator FirstEpilogue = MBBI;
+ while (MBBI != MBB.begin()) {
+ --MBBI;
+
+ unsigned FrameOp;
+ for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+ if (MBBI->getOperand(FrameOp).isFI())
+ break;
+ }
+
+ // If this instruction doesn't have a frame index we've reached the end of
+ // the callee-save restoration.
+ if (FrameOp == MBBI->getNumOperands())
+ break;
+
+ // Likewise if it *is* a local reference, but not to a callee-saved object.
+ int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+ if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+ break;
+
+ FirstEpilogue = MBBI;
+ }
+
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ int64_t StaticFrameBase;
+ StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+ emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+ AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+ StaticFrameBase);
+ } else {
+ emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+ }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+ int FrameIndex,
+ unsigned &FrameReg,
+ int SPAdj,
+ bool IsCalleeSaveOp) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+ assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+ && "callee-saved register in unexpected place");
+
+ // If the frame for this function is particularly large, we adjust the stack
+ // in two phases which means the callee-save related operations see a
+ // different (intermediate) stack size.
+ int64_t FrameRegPos;
+ if (IsCalleeSaveOp) {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+ } else if (useFPForAddressing(MF)) {
+ // Have to use the frame pointer since we have no idea where SP is.
+ FrameReg = AArch64::X29;
+ FrameRegPos = FuncInfo->getFramePointerOffset();
+ } else {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+ }
+
+ return TopOfFrameOffset - FrameRegPos;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+ MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+ }
+
+ // If addressing of local variables is going to be more complicated than
+ // shoving a base register and an offset into the instruction then we may well
+ // need to scavenge registers. We should either specifically add an
+ // callee-save register for this purpose or allocate an extra spill slot.
+
+ bool BigStack =
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+ || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+ if (!BigStack)
+ return;
+
+ // We certainly need some slack space for the scavenger, preferably an extra
+ // register.
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ uint16_t ExtraReg = AArch64::NoRegister;
+
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+ !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+ ExtraReg = CSRegs[i];
+ break;
+ }
+ }
+
+ if (ExtraReg != 0) {
+ MF.getRegInfo().setPhysRegUsed(ExtraReg);
+ } else {
+ // Create a stack slot for scavenging purposes. PrologEpilogInserter
+ // helpfully places it near either SP or FP for us to avoid
+ // infinitely-regression during scavenging.
+ const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+ unsigned Reg) const {
+ // If @llvm.returnaddress is called then it will refer to X30 by some means;
+ // the prologue store does not kill the register.
+ if (Reg == AArch64::X30) {
+ if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+ && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+ return false;
+ }
+
+ // In all other cases, physical registers are dead after they've been saved
+ // but live at the beginning of the prologue block.
+ MBB.addLiveIn(Reg);
+ return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossClasses[],
+ unsigned NumClasses) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // A certain amount of implicit contract is present here. The actual stack
+ // offsets haven't been allocated officially yet, so for strictly correct code
+ // we rely on the fact that the elements of CSI are allocated in order
+ // starting at SP, purely as dictated by size and alignment. In practice since
+ // this function handles the only accesses to those slots it's not quite so
+ // important.
+ //
+ // We have also ordered the Callee-saved register list in AArch64CallingConv
+ // so that the above scheme puts registers in order: in particular we want
+ // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+ for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ // First we need to find out which register class the register belongs to so
+ // that we can use the correct load/store instrucitons.
+ unsigned ClassIdx;
+ for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+ if (PossClasses[ClassIdx].RegClass->contains(Reg))
+ break;
+ }
+ assert(ClassIdx != NumClasses
+ && "Asked to store register in unexpected class");
+ const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+ // Now we need to decide whether it's possible to emit a paired instruction:
+ // for this we want the next register to be in the same class.
+ MachineInstrBuilder NewMI;
+ bool Pair = false;
+ if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+ Pair = true;
+ unsigned StLow = 0, StHigh = 0;
+ if (isPrologue) {
+ // Most of these registers will be live-in to the MBB and killed by our
+ // store, though there are exceptions (see determinePrologueDeath).
+ StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+ StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ StLow = RegState::Define;
+ StHigh = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+ .addReg(CSI[i+1].getReg(), StLow)
+ .addReg(CSI[i].getReg(), StHigh);
+
+ // If it's a paired op, we've consumed two registers
+ ++i;
+ } else {
+ unsigned State;
+ if (isPrologue) {
+ State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ State = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL,
+ TII.get(PossClasses[ClassIdx].SingleOpcode))
+ .addReg(CSI[i].getReg(), State);
+ }
+
+ // Note that the FrameIdx refers to the second register in a pair: it will
+ // be allocated the smaller numeric address and so is the one an LDP/STP
+ // address must use.
+ int FrameIdx = CSI[i].getFrameIdx();
+ MachineMemOperand::MemOperandFlags Flags;
+ Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ Flags,
+ Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+ MFI.getObjectAlignment(FrameIdx));
+
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0) // address-register offset
+ .addMemOperand(MMO);
+
+ if (isPrologue)
+ NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+ // For aesthetic reasons, during an epilogue we want to emit complementary
+ // operations to the prologue, but in the opposite order. So we still
+ // iterate through the CalleeSavedInfo list in order, but we put the
+ // instructions successively earlier in the MBB.
+ if (!isPrologue)
+ --MBBI;
+ }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // This is a decision of ABI compliance. The AArch64 PCS gives various options
+ // for conformance, and even at the most stringent level more or less permits
+ // elimination for leaf functions because there's no loss of functionality
+ // (for debugging etc)..
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+ return true;
+
+ // The following are hard-limits: incorrect code will be generated if we try
+ // to omit the frame.
+ return (RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+ return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Of the various reasons for having a frame pointer, it's actually only
+ // variable-sized objects that prevent reservation of a call frame.
+ return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
+
+void
+AArch64FrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MI->getDebugLoc();
+ int Opcode = MI->getOpcode();
+ bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+ if (!hasReservedCallFrame(MF)) {
+ unsigned Align = getStackAlignment();
+
+ int64_t Amount = MI->getOperand(0).getImm();
+ Amount = RoundUpToAlignment(Amount, Align);
+ if (!IsDestroy) Amount = -Amount;
+
+ // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+ // doesn't have to pop anything), then the first operand will be zero too so
+ // this adjustment is a no-op.
+ if (CalleePopAmount == 0) {
+ // FIXME: in-function stack adjustment for calls is limited to 12-bits
+ // because there's no guaranteed temporary register available. Mostly call
+ // frames will be allocated at the start of a function so this is OK, but
+ // it is a limitation that needs dealing with.
+ assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+ }
+ } else if (CalleePopAmount != 0) {
+ // If the calling convention demands that the callee pops arguments from the
+ // stack, we want to add it back if we have a reserved call frame.
+ assert(CalleePopAmount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+ }
+
+ MBB.erase(MI);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
new file mode 100644
index 000000000000..45ea0ec8e071
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -0,0 +1,108 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the AArch64-specific parts of the TargetFrameLowering
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+ // In order to unify the spilling and restoring of callee-saved registers into
+ // emitFrameMemOps, we need to be able to specify which instructions to use
+ // for the relevant memory operations on each register class. An array of the
+ // following struct is populated and passed in to achieve this.
+ struct LoadStoreMethod {
+ const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+ // The preferred instruction.
+ unsigned PairOpcode; // E.g. LSPair64_STR
+
+ // Sometimes only a single register can be handled at once.
+ unsigned SingleOpcode; // E.g. LS64_STR
+ };
+protected:
+ const AArch64Subtarget &STI;
+
+public:
+ explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Decides how much stack adjustment to perform in each phase of the prologue
+ /// and epilogue.
+ void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+ uint64_t &Residual) const;
+
+ int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+ unsigned &FrameReg, int SPAdj,
+ bool IsCalleeSaveOp) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// If the register is X30 (i.e. LR) and the return address is used in the
+ /// function then the callee-save store doesn't actually kill the register,
+ /// otherwise it does.
+ bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+ /// This function emits the loads or stores required during prologue and
+ /// epilogue as efficiently as possible.
+ ///
+ /// The operations involved in setting up and tearing down the frame are
+ /// similar enough to warrant a shared function, particularly as discrepancies
+ /// between the two would be disastrous.
+ void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossibleClasses[],
+ unsigned NumClasses) const;
+
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+
+ virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+ /// On AA
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..46b822152a00
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -0,0 +1,415 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+ AArch64TargetMachine &TM;
+ const AArch64InstrInfo *TII;
+
+ /// Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+public:
+ explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "AArch64 Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+ template<unsigned MemSize>
+ bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN || CN->getZExtValue() % MemSize != 0
+ || CN->getZExtValue() / MemSize > 0xfff)
+ return false;
+
+ UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+ return true;
+ }
+
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+ template<unsigned RegWidth>
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+ return SelectTSTBOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+ SDNode *TrySelectToMoveImm(SDNode *N);
+ SDNode *LowerToFPLitPool(SDNode *Node);
+ SDNode *SelectToLitPool(SDNode *N);
+
+ SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ if (!CN) return false;
+
+ // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+ // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+ // x-register.
+ //
+ // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+ // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+ // integers.
+ bool IsExact;
+
+ // fbits is between 1 and 64 in the worst-case, which means the fmul
+ // could have 2^64 as an actual operand. Need 65 bits of precision.
+ APSInt IntVal(65, true);
+ CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+ // N.b. isPowerOf2 also checks for > 0.
+ if (!IsExact || !IntVal.isPowerOf2()) return false;
+ unsigned FBits = IntVal.logBase2();
+
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding FBits, but it must still be in range.
+ if (FBits == 0 || FBits > RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+ return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ switch (ConstraintCode) {
+ default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+ case 'm':
+ // FIXME: more freedom is actually permitted for 'm'. We can go
+ // hunting for a base and an offset if we want. Of course, since
+ // we don't really know how the operand is going to be used we're
+ // probably restricted to the load/store pair's simm7 as an offset
+ // range anyway.
+ case 'Q':
+ OutOps.push_back(Op);
+ }
+
+ return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+ ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+ if (!Imm || !Imm->getValueAPF().isPosZero())
+ return false;
+
+ // Doesn't actually carry any information, but keeps TableGen quiet.
+ Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+ uint32_t Bits;
+ uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+ SDNode *ResNode;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT DestType = Node->getValueType(0);
+ unsigned DestWidth = DestType.getSizeInBits();
+
+ unsigned MOVOpcode;
+ EVT MOVType;
+ int UImm16, Shift;
+ uint32_t LogicalBits;
+
+ uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+ if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+ } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+ } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+ // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+ // use a 32-bit instruction: "movn w0, 0xedbc".
+ MOVType = MVT::i32;
+ MOVOpcode = AArch64::MOVNwii;
+ } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
+ MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+ uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+ return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+ CurDAG->getRegister(ZR, DestType),
+ CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+ } else {
+ // Can't handle it in one instruction. There's scope for permitting two (or
+ // more) instructions, but that'll need more thought.
+ return NULL;
+ }
+
+ ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+ CurDAG->getTargetConstant(UImm16, MVT::i32),
+ CurDAG->getTargetConstant(Shift, MVT::i32));
+
+ if (MOVType != DestType) {
+ ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+ MVT::i64, MVT::i32, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ SDValue(ResNode, 0),
+ CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+ }
+
+ return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+ int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+ EVT DestType = Node->getValueType(0);
+ EVT PtrVT = TLI.getPointerTy();
+
+ // Since we may end up loading a 64-bit constant from a 32-bit entry the
+ // constant in the pool may have a different type to the eventual node.
+ ISD::LoadExtType Extension;
+ EVT MemType;
+
+ assert((DestType == MVT::i64 || DestType == MVT::i32)
+ && "Only expect integer constants at the moment");
+
+ if (DestType == MVT::i32) {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i32;
+ } else if (UnsignedVal <= UINT32_MAX) {
+ Extension = ISD::ZEXTLOAD;
+ MemType = MVT::i32;
+ } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+ Extension = ISD::SEXTLOAD;
+ MemType = MVT::i32;
+ } else {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i64;
+ }
+
+ Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
+ MemType.getSizeInBits()),
+ UnsignedVal);
+ SDValue PoolAddr;
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
+ PoolAddr,
+ MachinePointerInfo::getConstantPool(), MemType,
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ Alignment).getNode();
+}
+
+SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+ EVT PtrVT = TLI.getPointerTy();
+ EVT DestType = Node->getValueType(0);
+
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
+ SDValue PoolAddr;
+
+ assert(TM.getCodeModel() == CodeModel::Small &&
+ "Only small code model supported");
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ /* isInvariant = */ true,
+ Alignment).getNode();
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ uint64_t Val = CN->getZExtValue();
+
+ if (!isPowerOf2_64(Val)) return false;
+
+ unsigned TestedBit = Log2_64(Val);
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding TestedBit, but it must still be in range.
+ if (TestedBit >= RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+ // Dump information about the Node being selected
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ return NULL;
+ }
+
+ switch (Node->getOpcode()) {
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT PtrTy = TLI.getPointerTy();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+ TFI, CurDAG->getTargetConstant(0, PtrTy));
+ }
+ case ISD::ConstantPool: {
+ // Constant pools are fine, just create a Target entry.
+ ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+ const Constant *C = CN->getConstVal();
+ SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+ ReplaceUses(SDValue(Node, 0), CP);
+ return NULL;
+ }
+ case ISD::Constant: {
+ SDNode *ResNode = 0;
+ if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+ // XZR and WZR are probably even better than an actual move: most of the
+ // time they can be folded into another instruction with *no* cost.
+
+ EVT Ty = Node->getValueType(0);
+ assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+ uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+ ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ Node->getDebugLoc(),
+ Register, Ty).getNode();
+ }
+
+ // Next best option is a move-immediate, see if we can do that.
+ if (!ResNode) {
+ ResNode = TrySelectToMoveImm(Node);
+ }
+
+ if (ResNode)
+ return ResNode;
+
+ // If even that fails we fall back to a lit-pool entry at the moment. Future
+ // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
+ ResNode = SelectToLitPool(Node);
+ assert(ResNode && "We need *some* way to materialise a constant");
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ Node = ResNode;
+ break;
+ }
+ case ISD::ConstantFP: {
+ if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+ // FMOV will take care of it from TableGen
+ break;
+ }
+
+ SDNode *ResNode = LowerToFPLitPool(Node);
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ Node = ResNode;
+ break;
+ }
+ default:
+ break; // Let generic code handle it
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << "\n");
+
+ return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new AArch64DAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
new file mode 100644
index 000000000000..e9f449709c40
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -0,0 +1,2975 @@
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+
+using namespace llvm;
+
+static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
+ const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+ if (Subtarget->isTargetLinux())
+ return new AArch64LinuxTargetObjectFile();
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+ RegInfo(TM.getRegisterInfo()),
+ Itins(TM.getInstrItineraryData()) {
+
+ // SIMD compares set the entire lane's bits to 1
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // Scalar register <-> type mapping
+ addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
+ addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+ addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+ computeRegisterProperties();
+
+ // Some atomic operations can be folded into load-acquire or store-release
+ // instructions on AArch64. It's marginally simpler to let LLVM expand
+ // everything out to a barrier and then recombine the (few) barriers we can.
+ setInsertFencesForAtomic(true);
+ setTargetDAGCombine(ISD::ATOMIC_FENCE);
+ setTargetDAGCombine(ISD::ATOMIC_STORE);
+
+ // We combine OR nodes for bitfield and NEON BSL operations.
+ setTargetDAGCombine(ISD::OR);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::SRA);
+
+ // AArch64 does not have i1 loads, or much of anything for i1 really.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+ setStackPointerRegisterToSaveRestore(AArch64::XSP);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+
+ // We'll lower globals to wrappers for selection.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ // A64 instructions have the comparison predicate attached to the user of the
+ // result, but having a separate comparison is valuable for matching.
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // Legal floating-point operations.
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Legal);
+
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Legal);
+
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
+
+ // Illegal floating-point operations.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+
+ // Virtually no operation on f128 is legal, but LLVM can't expand them when
+ // there's a valid register class, so we need custom operations in most cases.
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+
+ // Lowering for many of the conversions is actually specified by the non-f128
+ // type. The LowerXXX function will be trivial when f128 isn't involved.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+
+ // This prevents LLVM trying to compress double constants into a floating
+ // constant-pool entry and trying to load from there. It's of doubtful benefit
+ // for A64: we'd need LDR followed by FCVT, I believe.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+ setExceptionPointerRegister(AArch64::X0);
+ setExceptionSelectorRegister(AArch64::X1);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+ // It's reasonably important that this value matches the "natural" legal
+ // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
+ // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
+ unsigned &strOpc) {
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for atomic binary op!");
+ case 1:
+ ldrOpc = AArch64::LDXR_byte;
+ strOpc = AArch64::STXR_byte;
+ break;
+ case 2:
+ ldrOpc = AArch64::LDXR_hword;
+ strOpc = AArch64::STXR_hword;
+ break;
+ case 4:
+ ldrOpc = AArch64::LDXR_word;
+ strOpc = AArch64::STXR_word;
+ break;
+ case 8:
+ ldrOpc = AArch64::LDXR_dword;
+ strOpc = AArch64::STXR_dword;
+ break;
+ }
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC
+ = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // <binop> scratch, dest, incr
+ // stxr stxr_status, scratch, ptr
+ // cbnz stxr_status, loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (BinOpcode) {
+ // All arithmetic operations we'll be creating are designed to take an extra
+ // shift or extend operand, which we can conveniently set to zero.
+
+ // Operand order needs to go the other way for NAND.
+ if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(incr).addReg(dest).addImm(0);
+ else
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(dest).addReg(incr).addImm(0);
+ }
+
+ // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned CmpOp,
+ A64CC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRC, *TRCsp;
+ if (Size == 8) {
+ TRC = &AArch64::GPR64RegClass;
+ TRCsp = &AArch64::GPR64xspRegClass;
+ } else {
+ TRC = &AArch64::GPR32RegClass;
+ TRCsp = &AArch64::GPR32wspRegClass;
+ }
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ MRI.constrainRegClass(scratch, TRCsp);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // cmp incr, dest (, sign extend if necessary)
+ // csel scratch, dest, incr, cond
+ // stxr stxr_status, scratch, ptr
+ // cbnz stxr_status, loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ // Build compare and cmov instructions.
+ MRI.constrainRegClass(incr, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(incr).addReg(oldval).addImm(0);
+
+ BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
+ scratch)
+ .addReg(oldval).addReg(incr).addImm(Cond);
+
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status)
+ .addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRCsp;
+ TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldxr dest, [ptr]
+ // cmp dest, oldval
+ // b.ne exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
+ MRI.constrainRegClass(dest, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(dest).addReg(oldval).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(exitMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex stxr_status, newval, [ptr]
+ // cbnz stxr_status, loop1MBB
+ BB = loop2MBB;
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loop1MBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // We materialise the F128CSEL pseudo-instruction using conditional branches
+ // and loads, giving an instruciton sequence like:
+ // str q0, [sp]
+ // b.ne IfTrue
+ // b Finish
+ // IfTrue:
+ // str q1, [sp]
+ // Finish:
+ // ldr q0, [sp]
+ //
+ // Using virtual registers would probably not be beneficial since COPY
+ // instructions are expensive for f128 (there's no actual instruction to
+ // implement them).
+ //
+ // An alternative would be to do an integer-CSEL on some address. E.g.:
+ // mov x0, sp
+ // add x1, sp, #16
+ // str q0, [x0]
+ // str q1, [x1]
+ // csel x0, x0, x1, ne
+ // ldr q0, [x0]
+ //
+ // It's unclear which approach is actually optimal.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineFunction *MF = MBB->getParent();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned IfTrueReg = MI->getOperand(1).getReg();
+ unsigned IfFalseReg = MI->getOperand(2).getReg();
+ unsigned CondCode = MI->getOperand(3).getImm();
+ bool NZCVKilled = MI->getOperand(4).isKill();
+
+ MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBB);
+ MF->insert(It, EndBB);
+
+ // Transfer rest of current basic-block to EndBB
+ EndBB->splice(EndBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // We need somewhere to store the f128 value needed.
+ int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
+
+ // [... start of incoming MBB ...]
+ // str qIFFALSE, [sp]
+ // b.cc IfTrue
+ // b Done
+ BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfFalseReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(AArch64::Bcc))
+ .addImm(CondCode)
+ .addMBB(TrueBB);
+ BuildMI(MBB, DL, TII->get(AArch64::Bimm))
+ .addMBB(EndBB);
+ MBB->addSuccessor(TrueBB);
+ MBB->addSuccessor(EndBB);
+
+ // IfTrue:
+ // str qIFTRUE, [sp]
+ BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfTrueReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
+ // blocks.
+ TrueBB->addSuccessor(EndBB);
+
+ // Done:
+ // ldr qDEST, [sp]
+ // [... rest of incoming MBB ...]
+ if (!NZCVKilled)
+ EndBB->addLiveIn(AArch64::NZCV);
+ MachineInstr *StartOfEnd = EndBB->begin();
+ BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ MI->eraseFromParent();
+ return EndBB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unhandled instruction with custom inserter");
+ case AArch64::F128CSEL:
+ return EmitF128CSEL(MI, MBB);
+ case AArch64::ATOMIC_LOAD_ADD_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_SUB_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_AND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_OR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_XOR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_NAND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_MIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
+
+ case AArch64::ATOMIC_LOAD_MAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
+
+ case AArch64::ATOMIC_LOAD_UMIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
+
+ case AArch64::ATOMIC_LOAD_UMAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
+
+ case AArch64::ATOMIC_SWAP_I8:
+ return emitAtomicBinary(MI, MBB, 1, 0);
+ case AArch64::ATOMIC_SWAP_I16:
+ return emitAtomicBinary(MI, MBB, 2, 0);
+ case AArch64::ATOMIC_SWAP_I32:
+ return emitAtomicBinary(MI, MBB, 4, 0);
+ case AArch64::ATOMIC_SWAP_I64:
+ return emitAtomicBinary(MI, MBB, 8, 0);
+
+ case AArch64::ATOMIC_CMP_SWAP_I8:
+ return emitAtomicCmpSwap(MI, MBB, 1);
+ case AArch64::ATOMIC_CMP_SWAP_I16:
+ return emitAtomicCmpSwap(MI, MBB, 2);
+ case AArch64::ATOMIC_CMP_SWAP_I32:
+ return emitAtomicCmpSwap(MI, MBB, 4);
+ case AArch64::ATOMIC_CMP_SWAP_I64:
+ return emitAtomicCmpSwap(MI, MBB, 8);
+ }
+}
+
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
+ case AArch64ISD::Call: return "AArch64ISD::Call";
+ case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
+ case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
+ case AArch64ISD::BFI: return "AArch64ISD::BFI";
+ case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
+ case AArch64ISD::Ret: return "AArch64ISD::Ret";
+ case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
+ case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
+ case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
+ case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
+ case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+ case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
+ case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
+
+ default: return NULL;
+ }
+}
+
+static const uint16_t AArch64FPRArgRegs[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+ AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
+};
+static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+
+static const uint16_t AArch64ArgRegs[] = {
+ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+ AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
+};
+static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+
+static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ // Mark all remaining general purpose registers as allocated. We don't
+ // backtrack: if (for example) an i128 gets put on the stack, no subsequent
+ // i64 will go in registers (C.11).
+ for (unsigned i = 0; i < NumArgRegs; ++i)
+ State.AllocateReg(AArch64ArgRegs[i]);
+
+ return false;
+}
+
+#include "AArch64GenCallingConv.inc"
+
+CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+
+ switch(CC) {
+ default: llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return CC_A64_APCS;
+ }
+}
+
+void
+AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ SmallVector<SDValue, 8> MemOps;
+
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
+ NumArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
+ NumFPRArgRegs);
+
+ unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
+ int GPRIdx = 0;
+ if (GPRSaveSize != 0) {
+ GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
+ }
+
+ unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+ int FPRIdx = 0;
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+ &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ }
+
+ int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
+
+ FuncInfo->setVariadicStackIdx(StackIdx);
+ FuncInfo->setVariadicGPRIdx(GPRIdx);
+ FuncInfo->setVariadicGPRSize(GPRSaveSize);
+ FuncInfo->setVariadicFPRIdx(FPRIdx);
+ FuncInfo->setVariadicFPRSize(FPRSaveSize);
+
+ if (!MemOps.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+ }
+}
+
+
+SDValue
+AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Byval is used for small structs and HFAs in the PCS, but the system
+ // should work in a non-compliant manner for larger structs.
+ EVT PtrTy = getPointerTy();
+ int Size = Flags.getByValSize();
+ unsigned NumRegs = (Size + 7) / 8;
+
+ unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
+ VA.getLocMemOffset(),
+ false);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ InVals.push_back(FrameIdxN);
+
+ continue;
+ } else if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ } else { // VA.isRegLoc()
+ assert(VA.isMemLoc());
+
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+
+
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned DestSize = VA.getValVT().getSizeInBits();
+ unsigned DestSubReg;
+
+ switch (DestSize) {
+ case 8: DestSubReg = AArch64::sub_8; break;
+ case 16: DestSubReg = AArch64::sub_16; break;
+ case 32: DestSubReg = AArch64::sub_32; break;
+ case 64: DestSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+ VA.getValVT(), ArgValue,
+ DAG.getTargetConstant(DestSubReg, MVT::i32)),
+ 0);
+ break;
+ }
+ }
+
+ InVals.push_back(ArgValue);
+ }
+
+ if (isVarArg)
+ SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+
+ unsigned StackArgSize = CCInfo.getNextStackOffset();
+ if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+ // This is a non-standard ABI so by fiat I say we're allowed to make full
+ // use of the stack area to be popped, which must be aligned to 16 bytes in
+ // any case:
+ StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+
+ // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+ // a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+ // This realignment carries over to the available bytes below. Our own
+ // callers will guarantee the space is free by giving an aligned value to
+ // CALLSEQ_START.
+ }
+ // Even if we're not expected to free up the space, it's useful to know how
+ // much is there while considering tail calls (because we can reuse it).
+ FuncInfo->setBytesInStackArgArea(StackArgSize);
+
+ return Chain;
+}
+
+SDValue
+AArch64TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ // PCS: "If the type, T, of the result of a function is such that
+ // void func(T arg) would require that arg be passed as a value in a
+ // register (or set of registers) according to the rules in 5.4, then the
+ // result is returned in the same registers as would be used for such an
+ // argument.
+ //
+ // Otherwise, the caller shall reserve a block of memory of sufficient
+ // size and alignment to hold the result. The address of the memory block
+ // shall be passed as an additional argument to the function in x8."
+ //
+ // This is implemented in two places. The register-return values are dealt
+ // with here, more complex returns are passed as an sret parameter, which
+ // means we don't have to worry about it during actual return.
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+
+
+ SDValue Arg = OutVals[i];
+
+ // There's no convenient note in the ABI about this as there is for normal
+ // arguments, but it says return values are passed in the same registers as
+ // an argument would be. I believe that includes the comments about
+ // unspecified higher bits, putting the burden of widening on the *caller*
+ // for return values.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt:
+ // Floating-point values should only be extended when they're going into
+ // memory, which can't happen here so an integer extend is acceptable.
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+
+ if (IsTailCall) {
+ IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+
+ // A sibling call is one where we're under the usual C ABI and not planning
+ // to change that but can still do a tail call:
+ if (!TailCallOpt && IsTailCall)
+ IsSibCall = true;
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ // On AArch64 (and all other architectures I'm aware of) the most this has to
+ // do is adjust the stack pointer.
+ unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
+ if (IsSibCall) {
+ // Since we're not changing the ABI to make this a tail call, the memory
+ // operands are already available in the caller's incoming argument space.
+ NumBytes = 0;
+ }
+
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0. Completely unused for non-tail calls.
+ int FPDiff = 0;
+
+ if (IsTailCall && !IsSibCall) {
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // can actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
+
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+ }
+
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
+ getPointerTy());
+
+ SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue Arg = OutVals[i];
+
+ // Callee does the actual widening, so all extensions just use an implicit
+ // definition of the rest of the Loc. Aesthetically, this would be nicer as
+ // an ANY_EXTEND, but that isn't valid for floating-point types and this
+ // alternative works on integer types too.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned SrcSize = VA.getValVT().getSizeInBits();
+ unsigned SrcSubReg;
+
+ switch (SrcSize) {
+ case 8: SrcSubReg = AArch64::sub_8; break;
+ case 16: SrcSubReg = AArch64::sub_16; break;
+ case 32: SrcSubReg = AArch64::sub_32; break;
+ case 64: SrcSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ VA.getLocVT(),
+ DAG.getUNDEF(VA.getLocVT()),
+ Arg,
+ DAG.getTargetConstant(SrcSubReg, MVT::i32)),
+ 0);
+
+ break;
+ }
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ // A normal register (sub-) argument. For now we just note it down because
+ // we want to copy things into registers as late as possible to avoid
+ // register-pressure (and possibly worse).
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc() && "unexpected argument location");
+
+ SDValue DstAddr;
+ MachinePointerInfo DstInfo;
+ if (IsTailCall) {
+ uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
+ VA.getLocVT().getSizeInBits();
+ OpSize = (OpSize + 7) / 8;
+ int32_t Offset = VA.getLocMemOffset() + FPDiff;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+ DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Make sure any stack arguments overlapping with where we're storing are
+ // loaded before this eventual operation. Otherwise they'll be clobbered.
+ Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+ } else {
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+
+ DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
+ }
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
+ SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile = */ false,
+ /*alwaysInline = */ false,
+ DstInfo, MachinePointerInfo(0));
+ MemOpChains.push_back(Cpy);
+ } else {
+ // Normal stack argument, put it where it's needed.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
+ false, false, 0);
+ MemOpChains.push_back(Store);
+ }
+ }
+
+ // The loads and stores generated above shouldn't clash with each
+ // other. Combining them with this TokenFactor notes that fact for the rest of
+ // the backend.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Most of the rest of the instructions need to be glued together; we don't
+ // want assignments to actual registers used by a call to be rearranged by a
+ // well-meaning scheduler.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // The linker is responsible for inserting veneers when necessary to put a
+ // function call destination in range, so we don't need to bother with a
+ // wrapper here.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // We don't usually want to end the call-sequence here because we would tidy
+ // the frame up *after* the call, however in the ABI-changing tail-call case
+ // we've carefully laid out the parameters so that when sp is reset they'll be
+ // in the correct location.
+ if (IsTailCall && !IsSibCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // We produce the following DAG scheme for the actual call instruction:
+ // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
+ //
+ // Most arguments aren't going to be used and just keep the values live as
+ // far as LLVM is concerned. It's expected to be selected as simply "bl
+ // callee" (for a direct, non-tail call).
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (IsTailCall) {
+ // Each tail call may have to adjust the stack by a different amount, so
+ // this information must travel along with the operation for eventual
+ // consumption by emitEpilogue.
+ Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ }
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+
+ // Add a register mask operand representing the call-preserved registers. This
+ // is used later in codegen to constrain register-allocation.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // If we needed glue, put it in as the last argument.
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ if (IsTailCall) {
+ return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Now we can reclaim the stack, just as well do it before working out where
+ // our return value is.
+ if (!IsSibCall) {
+ uint64_t CalleePopBytes
+ = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(CalleePopBytes, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ return LowerCallResult(Chain, InFlag, CallConv,
+ IsVarArg, Ins, dl, DAG, InVals);
+}
+
+SDValue
+AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ // Return values that are too big to fit into registers should use an sret
+ // pointer, so this can be a lot simpler than the main argument code.
+ assert(VA.isRegLoc() && "Memory locations not expected for call return");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ case CCValAssign::SExt:
+ case CCValAssign::AExt:
+ // Floating-point arguments only get extended/truncated if they're going
+ // in memory, so using the integer operation is acceptable here.
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+bool
+AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+
+ // For CallingConv::C this function knows whether the ABI needs
+ // changing. That's not true for other conventions so they will have to opt in
+ // manually.
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86) but less efficient and uglier in LowerCall.
+ for (Function::const_arg_iterator i = CallerF->arg_begin(),
+ e = CallerF->arg_end(); i != e; ++i)
+ if (i->hasByValAttr())
+ return false;
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Now we search for cases where we can use a tail call without changing the
+ // ABI. Sibcall is used in some places (particularly gcc) to refer to this
+ // concept.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!IsVarArg || CalleeCC == CallingConv::C)
+ && "Unexpected variadic calling convention");
+
+ if (IsVarArg && !Outs.empty()) {
+ // At least two cases here: if caller is fastcc then we can't have any
+ // memory arguments (we'd be expected to clean up the stack afterwards). If
+ // caller is C then we could potentially use its argument area.
+
+ // FIXME: for now we take the most conservative of these in both cases:
+ // disallow all variadic memory operands.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // Nothing more to check if the callee is taking no arguments
+ if (Outs.empty())
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+ const AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ // If the stack arguments for this call would fit into our own save area then
+ // the call can be made tail.
+ return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
+}
+
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+ bool TailCallOpt) const {
+ return CallCC == CallingConv::Fast && TailCallOpt;
+}
+
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+ return CallCC == CallingConv::Fast;
+}
+
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+ SelectionDAG &DAG,
+ MachineFrameInfo *MFI,
+ int ClobberedFI) const {
+ SmallVector<SDValue, 8> ArgChains;
+ int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
+ int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument corresponding
+ for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
+ UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0) {
+ int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+ int64_t InLastByte = InFirstByte;
+ InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+
+ if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+ (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+ ArgChains.push_back(SDValue(L, 1));
+ }
+
+ // Build a tokenfactor for all the chains.
+ return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETEQ: return A64CC::EQ;
+ case ISD::SETGT: return A64CC::GT;
+ case ISD::SETGE: return A64CC::GE;
+ case ISD::SETLT: return A64CC::LT;
+ case ISD::SETLE: return A64CC::LE;
+ case ISD::SETNE: return A64CC::NE;
+ case ISD::SETUGT: return A64CC::HI;
+ case ISD::SETUGE: return A64CC::HS;
+ case ISD::SETULT: return A64CC::LO;
+ case ISD::SETULE: return A64CC::LS;
+ default: llvm_unreachable("Unexpected condition code");
+ }
+}
+
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
+ // icmp is implemented using adds/subs immediate, which take an unsigned
+ // 12-bit immediate, optionally shifted left by 12 bits.
+
+ // Symmetric by using adds/subs
+ if (Val < 0)
+ Val = -Val;
+
+ return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
+}
+
+SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &A64cc,
+ SelectionDAG &DAG, DebugLoc &dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ int64_t C = 0;
+ EVT VT = RHSC->getValueType(0);
+ bool knownInvalid = false;
+
+ // I'm not convinced the rest of LLVM handles these edge cases properly, but
+ // we can at least get it right.
+ if (isSignedIntSetCC(CC)) {
+ C = RHSC->getSExtValue();
+ } else if (RHSC->getZExtValue() > INT64_MAX) {
+ // A 64-bit constant not representable by a signed 64-bit integer is far
+ // too big to fit into a SUBS immediate anyway.
+ knownInvalid = true;
+ } else {
+ C = RHSC->getZExtValue();
+ }
+
+ if (!knownInvalid && !isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ }
+ }
+ }
+
+ A64CC::CondCodes CondCode = IntCCToA64CC(CC);
+ A64cc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+}
+
+static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
+ A64CC::CondCodes &Alternative) {
+ A64CC::CondCodes CondCode = A64CC::Invalid;
+ Alternative = A64CC::Invalid;
+
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = A64CC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = A64CC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = A64CC::GE; break;
+ case ISD::SETOLT: CondCode = A64CC::MI; break;
+ case ISD::SETOLE: CondCode = A64CC::LS; break;
+ case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
+ case ISD::SETO: CondCode = A64CC::VC; break;
+ case ISD::SETUO: CondCode = A64CC::VS; break;
+ case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
+ case ISD::SETUGT: CondCode = A64CC::HI; break;
+ case ISD::SETUGE: CondCode = A64CC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = A64CC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = A64CC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = A64CC::NE; break;
+ }
+ return CondCode;
+}
+
+SDValue
+AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small
+ && "Only small code model supported at the moment");
+
+ // The most efficient code is PC-relative anyway for the small memory model,
+ // so we don't need to worry about relocation model.
+ return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+}
+
+
+// (BRCOND chain, val, dest)
+SDValue
+AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ SDValue TheBit = Op.getOperand(1);
+ SDValue DestBB = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
+ A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
+ DestBB);
+}
+
+// (BR_CC chain, condcode, lhs, rhs, dest)
+SDValue
+AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue DestBB = Op.getOperand(4);
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to runtime calls by a routine which sets
+ // LHS, RHS and CC appropriately for the rest of this function to continue.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, CmpOp, A64cc, DestBB);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, SetCC, A64cc, DestBB);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ A64BR_CC, SetCC, A64cc, DestBB);
+
+ }
+
+ return A64BR_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ ArgListTy Args;
+ ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
+
+ Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
+ 0, getLibcallCallingConv(Call), isTailCall,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, Op.getDebugLoc());
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+ // we make that distinction here.
+
+ // We support the small memory model for now.
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+ // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+ // to zero when they remain undefined. In PIC mode the GOT can take care of
+ // this, but in absolute mode we use a constant pool load.
+ SDValue PoolAddr;
+ PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(8, MVT::i32));
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalAddr;
+ }
+
+ if (Alignment == 0) {
+ const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
+ if (GVPtrTy->getElementType()->isSized()) {
+ Alignment
+ = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
+ } else {
+ // Be conservative if we can't guess, not that it really matters:
+ // functions and labels aren't valid for loads, and the methods used to
+ // actually calculate an address work with any alignment.
+ Alignment = 1;
+ }
+ }
+
+ unsigned char HiFixup, LoFixup;
+ bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+
+ if (UseGOT) {
+ HiFixup = AArch64II::MO_GOT;
+ LoFixup = AArch64II::MO_GOT_LO12;
+ Alignment = 8;
+ } else {
+ HiFixup = AArch64II::MO_NO_FLAG;
+ LoFixup = AArch64II::MO_LO12;
+ }
+
+ // AArch64's small model demands the following sequence:
+ // ADRP x0, somewhere
+ // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
+ SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ HiFixup),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ LoFixup),
+ DAG.getConstant(Alignment, MVT::i32));
+
+ if (UseGOT) {
+ GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
+ GlobalRef);
+ }
+
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalRef;
+}
+
+SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
+ SDValue DescAddr,
+ DebugLoc DL,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+
+ // The function we need to call is simply the first entry in the GOT for this
+ // descriptor, load it in preparation.
+ SDValue Func, Chain;
+ Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ DescAddr);
+
+ // The function takes only one argument: the address of the descriptor itself
+ // in X0.
+ SDValue Glue;
+ Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
+ Glue = Chain.getValue(1);
+
+ // Finally, there's a special calling-convention which means that the lookup
+ // must preserve all registers (except X0, obviously).
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *A64RI
+ = static_cast<const AArch64RegisterInfo *>(TRI);
+ const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
+
+ // We're now ready to populate the argument list, as with a normal call:
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Func);
+ Ops.push_back(SymAddr);
+ Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ Ops.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
+ Ops.size());
+ Glue = Chain.getValue(1);
+
+ // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
+ // back to the generic handling code.
+ return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ SDValue TPOff;
+ EVT PtrVT = getPointerTy();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+
+ SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+
+ if (Model == TLSModel::InitialExec) {
+ TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL_LO12),
+ DAG.getConstant(8, MVT::i32));
+ TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ TPOff);
+ } else if (Model == TLSModel::LocalExec) {
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
+ // Accesses used in this sequence go via the TLS descriptor which lives in
+ // the GOT. Prepare an address we can use to handle this.
+ SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
+
+ TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ } else if (Model == TLSModel::LocalDynamic) {
+ // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
+ // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
+ // the beginning of the module's TLS region, followed by a DTPREL offset
+ // calculation.
+
+ // These accesses will need deduplicating if there's more than one.
+ AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<AArch64MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+
+ // Get the location of _TLS_MODULE_BASE_:
+ SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
+
+ ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+ // Get the variable's offset from _TLS_MODULE_BASE_
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else
+ llvm_unreachable("Unsupported TLS access model");
+
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+}
+
+SDValue
+AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getValueType() != MVT::f128) {
+ // Legal for everything except f128.
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+
+SDValue
+AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ DebugLoc dl = JT->getDebugLoc();
+
+ // When compiling PIC, jump tables get put in the code section so a static
+ // relocation-style is acceptable for both cases.
+ return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ AArch64II::MO_LO12),
+ DAG.getConstant(1, MVT::i32));
+}
+
+// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
+SDValue
+AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue IfTrue = Op.getOperand(2);
+ SDValue IfFalse = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to libcalls, but slot in nicely here
+ // afterwards.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ CmpOp, IfTrue, IfFalse, A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
+ Op.getValueType(),
+ SetCC, IfTrue, IfFalse, A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ SetCC, IfTrue, A64SELECT_CC, A64cc);
+
+ }
+
+ return A64SELECT_CC;
+}
+
+// (SELECT testbit, iftrue, iffalse)
+SDValue
+AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue TheBit = Op.getOperand(0);
+ SDValue IfTrue = Op.getOperand(1);
+ SDValue IfFalse = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ A64CMP, IfTrue, IfFalse,
+ DAG.getConstant(A64CC::NE, MVT::i32));
+}
+
+// (SETCC lhs, rhs, condcode)
+SDValue
+AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ EVT VT = Op.getValueType();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
+ // for the rest of the function (some i32 or i64 values).
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (RHS.getNode() == 0) {
+ assert(LHS.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return LHS;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT),
+ DAG.getConstant(0, VT), A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
+ DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+ }
+
+ return A64SELECT_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+
+ // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
+ // rather than just 8.
+ return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(32, MVT::i32), 8, false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // The layout of the va_list struct is specified in the AArch64 Procedure Call
+ // Standard, section B.3.
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAList = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SmallVector<SDValue, 4> MemOps;
+
+ // void *__stack at offset 0
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
+ getPointerTy());
+ MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
+ MachinePointerInfo(SV), false, false, 0));
+
+ // void *__gr_top at offset 8
+ int GPRSize = FuncInfo->getVariadicGPRSize();
+ if (GPRSize > 0) {
+ SDValue GRTop, GRTopAddr;
+
+ GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(8, getPointerTy()));
+
+ GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+ GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
+ DAG.getConstant(GPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
+ MachinePointerInfo(SV, 8),
+ false, false, 0));
+ }
+
+ // void *__vr_top at offset 16
+ int FPRSize = FuncInfo->getVariadicFPRSize();
+ if (FPRSize > 0) {
+ SDValue VRTop, VRTopAddr;
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(16, getPointerTy()));
+
+ VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+ VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
+ DAG.getConstant(FPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
+ MachinePointerInfo(SV, 16),
+ false, false, 0));
+ }
+
+ // int __gr_offs at offset 24
+ SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(24, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+ GROffsAddr, MachinePointerInfo(SV, 24),
+ false, false, 0));
+
+ // int __vr_offs at offset 28
+ SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(28, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+ VROffsAddr, MachinePointerInfo(SV, 28),
+ false, false, 0));
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
+ case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
+ case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t TruncMask = N->getConstantOperandVal(1);
+ if (!isMask_64(TruncMask))
+ return SDValue();
+
+ uint64_t Width = CountPopulation_64(TruncMask);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+ uint64_t LSB = Shift->getConstantOperandVal(1);
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // An atomic operation followed by an acquiring atomic fence can be reduced to
+ // an acquiring load. The atomic operation provides a convenient pointer to
+ // load from. If the original operation was a load anyway we can actually
+ // combine the two operations into an acquiring load.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue AtomicOp = FenceNode->getOperand(0);
+ AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
+
+ // A fence on its own can't be optimised
+ if (!AtomicNode)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
+
+ if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
+ // the chain we use should be its input, otherwise we'll put our store after
+ // it so we use its output chain.
+ SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
+ AtomicNode->getChain() : AtomicOp;
+
+ // We have an acquire fence with a handy atomic operation nearby, we can
+ // convert the fence into a load-acquire, discarding the result.
+ DebugLoc DL = FenceNode->getDebugLoc();
+ SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
+ AtomicNode->getValueType(0),
+ Chain, // Chain
+ AtomicOp.getOperand(1), // Pointer
+ AtomicNode->getMemOperand(), Acquire,
+ FenceScope);
+
+ if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
+ DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
+
+ return Op.getValue(1);
+}
+
+static SDValue PerformATOMIC_STORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // A releasing atomic fence followed by an atomic store can be combined into a
+ // single store operation.
+ SelectionDAG &DAG = DCI.DAG;
+ AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
+ SDValue FenceOp = AtomicNode->getOperand(0);
+
+ if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
+
+ if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ DebugLoc DL = AtomicNode->getDebugLoc();
+ return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
+ FenceOp.getOperand(0), // Chain
+ AtomicNode->getOperand(1), // Pointer
+ AtomicNode->getOperand(2), // Value
+ AtomicNode->getMemOperand(), Release,
+ FenceScope);
+}
+
+/// For a true bitfield insert, the bits getting into that contiguous mask
+/// should come from the low part of an existing value: they must be formed from
+/// a compatible SHL operation (unless they're already low). This function
+/// checks that condition and returns the least-significant bit that's
+/// intended. If the operation not a field preparation, -1 is returned.
+static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+ SDValue &MaskedVal, uint64_t Mask) {
+ if (!isShiftedMask_64(Mask))
+ return -1;
+
+ // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
+ // instruction. BFI will do a left-shift by LSB before applying the mask we've
+ // spotted, so in general we should pre-emptively "undo" that by making sure
+ // the incoming bits have had a right-shift applied to them.
+ //
+ // This right shift, however, will combine with existing left/right shifts. In
+ // the simplest case of a completely straight bitfield operation, it will be
+ // expected to completely cancel out with an existing SHL. More complicated
+ // cases (e.g. bitfield to bitfield copy) may still need a real shift before
+ // the BFI.
+
+ uint64_t LSB = CountTrailingZeros_64(Mask);
+ int64_t ShiftRightRequired = LSB;
+ if (MaskedVal.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ } else if (MaskedVal.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ }
+
+ if (ShiftRightRequired > 0)
+ MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
+ DAG.getConstant(ShiftRightRequired, MVT::i64));
+ else if (ShiftRightRequired < 0) {
+ // We could actually end up with a residual left shift, for example with
+ // "struc.bitfield = val << 1".
+ MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
+ DAG.getConstant(-ShiftRightRequired, MVT::i64));
+ }
+
+ return LSB;
+}
+
+/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
+/// a mask and an extension. Returns true if a BFI was found and provides
+/// information on its surroundings.
+static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
+ bool &Extended) {
+ Extended = false;
+ if (N.getOpcode() == ISD::ZERO_EXTEND) {
+ Extended = true;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ Mask = N->getConstantOperandVal(1);
+ N = N.getOperand(0);
+ } else {
+ // Mask is the whole width.
+ Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
+ }
+
+ if (N.getOpcode() == AArch64ISD::BFI) {
+ BFI = N;
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
+/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
+/// can often be further combined with a larger mask. Ultimately, we want mask
+/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
+static SDValue tryCombineToBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
+ // abandon the effort.
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t LHSMask;
+ if (isa<ConstantSDNode>(LHS.getOperand(1)))
+ LHSMask = LHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
+ // is or abandon the effort.
+ SDValue RHS = N->getOperand(1);
+ if (RHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t RHSMask;
+ if (isa<ConstantSDNode>(RHS.getOperand(1)))
+ RHSMask = RHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // Can't do anything if the masks are incompatible.
+ if (LHSMask & RHSMask)
+ return SDValue();
+
+ // Now we need one of the masks to be a contiguous field. Without loss of
+ // generality that should be the RHS one.
+ SDValue Bitfield = LHS.getOperand(0);
+ if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
+ // We know that LHS is a candidate new value, and RHS isn't already a better
+ // one.
+ std::swap(LHS, RHS);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ // We've done our best to put the right operands in the right places, all we
+ // can do now is check whether a BFI exists.
+ Bitfield = RHS.getOperand(0);
+ int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
+ if (LSB == -1)
+ return SDValue();
+
+ uint32_t Width = CountPopulation_64(RHSMask);
+ assert(Width && "Expected non-zero bitfield width");
+
+ SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ LHS.getOperand(0), Bitfield,
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(Width, MVT::i64));
+
+ // Mask is trivial
+ if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+}
+
+/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
+/// original input. This is surprisingly common because SROA splits things up
+/// into i8 chunks, so the originally detected MaskedBFI may actually only act
+/// on the low (say) byte of a word. This is then orred into the rest of the
+/// word afterwards.
+///
+/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
+///
+/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
+/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
+/// involved.
+static SDValue tryCombineToLargerBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // First job is to hunt for a MaskedBFI on either the left or right. Swap
+ // operands if it's actually on the right.
+ SDValue BFI;
+ SDValue PossExtraMask;
+ uint64_t ExistingMask = 0;
+ bool Extended = false;
+ if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(1);
+ else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(0);
+ else
+ return SDValue();
+
+ // We can only combine a BFI with another compatible mask.
+ if (PossExtraMask.getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+
+ // Masks must be compatible.
+ if (ExtraMask & ExistingMask)
+ return SDValue();
+
+ SDValue OldBFIVal = BFI.getOperand(0);
+ SDValue NewBFIVal = BFI.getOperand(1);
+ if (Extended) {
+ // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
+ // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
+ // need to be made compatible.
+ assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
+ && "Invalid types for BFI");
+ OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
+ NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+ }
+
+ // We need the MaskedBFI to be combined with a mask of the *same* value.
+ if (PossExtraMask.getOperand(0) != OldBFIVal)
+ return SDValue();
+
+ BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ OldBFIVal, NewBFIVal,
+ BFI.getOperand(2), BFI.getOperand(3));
+
+ // If the masking is trivial, we don't need to create it.
+ if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(ExtraMask | ExistingMask, VT));
+}
+
+/// An EXTR instruction is made up of two shifts, ORed together. This helper
+/// searches for and classifies those shifts.
+static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
+ bool &FromHi) {
+ if (N.getOpcode() == ISD::SHL)
+ FromHi = false;
+ else if (N.getOpcode() == ISD::SRL)
+ FromHi = true;
+ else
+ return false;
+
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ return false;
+
+ ShiftAmount = N->getConstantOperandVal(1);
+ Src = N->getOperand(0);
+ return true;
+}
+
+/// EXTR instruction extracts a contiguous chunk of bits from two existing
+/// registers viewed as a high/low pair. This function looks for the pattern:
+/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
+/// EXTR. Can't quite be done in TableGen because the two immediates aren't
+/// independent.
+static SDValue tryCombineToEXTR(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue LHS;
+ uint32_t ShiftLHS = 0;
+ bool LHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
+ return SDValue();
+
+ SDValue RHS;
+ uint32_t ShiftRHS = 0;
+ bool RHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
+ return SDValue();
+
+ // If they're both trying to come from the high part of the register, they're
+ // not really an EXTR.
+ if (LHSFromHi == RHSFromHi)
+ return SDValue();
+
+ if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
+ return SDValue();
+
+ if (LHSFromHi) {
+ std::swap(LHS, RHS);
+ std::swap(ShiftLHS, ShiftRHS);
+ }
+
+ return DAG.getNode(AArch64ISD::EXTR, DL, VT,
+ LHS, RHS,
+ DAG.getConstant(ShiftRHS, MVT::i64));
+}
+
+/// Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // Attempt to recognise bitfield-insert operations.
+ SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ // Attempt to combine an existing MaskedBFI operation into one with a larger
+ // mask.
+ Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ Res = tryCombineToEXTR(N, DCI);
+ if (Res.getNode())
+ return Res;
+
+ return SDValue();
+}
+
+/// Target-specific dag combine xforms for ISD::SRA
+static SDValue PerformSRACombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraSignBits = N->getConstantOperandVal(1);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+
+ uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
+ uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
+ uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+
+SDValue
+AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
+ case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::SRA: return PerformSRACombine(N, DCI);
+ }
+ return SDValue();
+}
+
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'w': // An FP/SIMD vector register
+ return C_RegisterClass;
+ case 'I': // Constant that can be used with an ADD instruction
+ case 'J': // Constant that can be used with a SUB instruction
+ case 'K': // Constant that can be used with a 32-bit logical instruction
+ case 'L': // Constant that can be used with a 64-bit logical instruction
+ case 'M': // Constant that can be used as a 32-bit MOV immediate
+ case 'N': // Constant that can be used as a 64-bit MOV immediate
+ case 'Y': // Floating point constant zero
+ case 'Z': // Integer constant zero
+ return C_Other;
+ case 'Q': // A memory reference with base register and no offset
+ return C_Memory;
+ case 'S': // A symbolic address
+ return C_Other;
+ }
+ }
+
+ // FIXME: Ump, Utf, Usa, Ush
+ // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
+ // whatever they may be
+ // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+ // Usa: An absolute symbolic address
+ // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+ assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
+ && Constraint != "Ush" && "Unimplemented constraints");
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const {
+
+ llvm_unreachable("Constraint weight unimplemented");
+}
+
+void
+AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only length 1 constraints are C_Other.
+ if (Constraint.size() != 1) return;
+
+ // Only C_Other constraints get lowered like this. That means constants for us
+ // so return early if there's no hope the constraint can be lowered.
+
+ switch(Constraint[0]) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'Z': {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ uint64_t CVal = C->getZExtValue();
+ uint32_t Bits;
+
+ switch (Constraint[0]) {
+ default:
+ // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
+ // is a peculiarly useless SUB constraint.
+ llvm_unreachable("Unimplemented C_Other constraint");
+ case 'I':
+ if (CVal <= 0xfff)
+ break;
+ return;
+ case 'K':
+ if (A64Imms::isLogicalImm(32, CVal, Bits))
+ break;
+ return;
+ case 'L':
+ if (A64Imms::isLogicalImm(64, CVal, Bits))
+ break;
+ return;
+ case 'Z':
+ if (CVal == 0)
+ break;
+ return;
+ }
+
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+ case 'S': {
+ // An absolute symbolic address or label reference.
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+ GA->getValueType(0));
+ } else if (const BlockAddressSDNode *BA
+ = dyn_cast<BlockAddressSDNode>(Op)) {
+ Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
+ BA->getValueType(0));
+ } else if (const ExternalSymbolSDNode *ES
+ = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
+ ES->getValueType(0));
+ } else
+ return;
+ break;
+ }
+ case 'Y':
+ if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFP->isExactlyValue(0.0)) {
+ Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
+ break;
+ }
+ }
+ return;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // It's an unknown constraint for us. Let generic code have a go.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+ const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &AArch64::GPR32RegClass);
+ else if (VT == MVT::i64)
+ return std::make_pair(0U, &AArch64::GPR64RegClass);
+ break;
+ case 'w':
+ if (VT == MVT::f16)
+ return std::make_pair(0U, &AArch64::FPR16RegClass);
+ else if (VT == MVT::f32)
+ return std::make_pair(0U, &AArch64::FPR32RegClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
+ else if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, &AArch64::VPR64RegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
+ else if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &AArch64::VPR128RegClass);
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
new file mode 100644
index 000000000000..4960d286e9de
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // This is a conditional branch which also notes the flag needed
+ // (eq/sgt/...). A64 puts this information on the branches rather than
+ // compares as LLVM does.
+ BR_CC,
+
+ // A node to be selected to an actual call operation: either BL or BLR in
+ // the absence of tail calls.
+ Call,
+
+ // Indicates a floating-point immediate which fits into the format required
+ // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+ // value of that immediate.
+ FPMOV,
+
+ // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+ // and an LSB.
+ EXTR,
+
+ // Wraps a load from the GOT, which should always be performed with a 64-bit
+ // load instruction. This prevents the DAG combiner folding a truncate to
+ // form a smaller memory access.
+ GOTLoad,
+
+ // Performs a bitfield insert. Arguments are: the value being inserted into;
+ // the value being inserted; least significant bit changed; width of the
+ // field.
+ BFI,
+
+ // Simply a convenient node inserted during ISelLowering to represent
+ // procedure return. Will almost certainly be selected to "RET".
+ Ret,
+
+ /// Extracts a field of contiguous bits from the source and sign extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ SBFX,
+
+ /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+ /// main difference is that it only has the values and an A64 condition,
+ /// which will be produced by a setcc instruction.
+ SELECT_CC,
+
+ /// This serves most of the functions of the LLVM SETCC instruction, for two
+ /// purposes. First, it prevents optimisations from fiddling with the
+ /// compare after we've moved the CondCode information onto the SELECT_CC or
+ /// BR_CC instructions. Second, it gives a legal instruction for the actual
+ /// comparison.
+ ///
+ /// It keeps a record of the condition flags asked for because certain
+ /// instructions are only valid for a subset of condition codes.
+ SETCC,
+
+ // Designates a node which is a tail call: both a call and a return
+ // instruction as far as selction is concerned. It should be selected to an
+ // unconditional branch. Has the usual plethora of call operands, but: 1st
+ // is callee, 2nd is stack adjustment required immediately before branch.
+ TC_RETURN,
+
+ // Designates a call used to support the TLS descriptor ABI. The call itself
+ // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+ // var") must be attached somehow during code generation. It takes two
+ // operands: the callee and the symbol to be relocated against.
+ TLSDESCCALL,
+
+ // Leaf node which will be lowered to an appropriate MRS to obtain the
+ // thread pointer: TPIDR_EL0.
+ THREAD_POINTER,
+
+ /// Extracts a field of contiguous bits from the source and zero extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ UBFX,
+
+ // Wraps an address which the ISelLowering phase has decided should be
+ // created using the small absolute memory model: i.e. adrp/add or
+ // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+ // get selected.
+ WrapperSmall
+ };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+ explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const;
+
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ /// Finds the incoming stack arguments which overlap the given fixed stack
+ /// object and incorporates their load into the current chain. This prevents
+ /// an upcoming store from clobbering the stack argument before it's used.
+ SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo *MFI, int ClobberedFI) const;
+
+ EVT getSetCCResultType(EVT VT) const;
+
+ bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+ bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isLegalICmpImmediate(int64_t Val) const;
+ SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *
+ emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Size, unsigned Opcode) const;
+
+ MachineBasicBlock *
+ emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned CmpOp,
+ A64CC::CondCodes Cond) const;
+ MachineBasicBlock *
+ emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size) const;
+
+ MachineBasicBlock *
+ EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+ const AArch64Subtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
new file mode 100644
index 000000000000..cb93471058df
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,961 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file describes AArch64 instruction formats, down to the level of the
+// instruction's overall class.
+// ===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// A64 Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+// A64 is currently the only instruction set supported by the AArch64
+// architecture.
+class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : Instruction {
+ // All A64 instructions are 32-bit. This field will be filled in
+ // gradually going down the hierarchy.
+ field bits<32> Inst;
+
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+
+ // LLVM-level model of the AArch64/A64 distinction.
+ let Namespace = "AArch64";
+ let DecoderNamespace = "A64";
+ let Size = 4;
+
+ // Set the templated fields
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = patterns;
+ let Itinerary = itin;
+}
+
+class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
+ let Namespace = "AArch64";
+
+ let OutOperandList = outs;
+ let InOperandList= ins;
+ let Pattern = patterns;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// Represents a pseudo-instruction that represents a single A64 instruction for
+// whatever reason, the eventual result will be a 32-bit real instruction.
+class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
+ : PseudoInst<outs, ins, patterns> {
+ let Size = 4;
+}
+
+// As above, this will be a single A64 instruction, but we can actually give the
+// expansion in TableGen.
+class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
+ : A64PseudoInst<outs, ins, patterns>,
+ PseudoInstExpansion<Result>;
+
+
+// First, some common cross-hierarchy register formats.
+
+class A64InstRd<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rd;
+
+ let Inst{4-0} = Rd;
+}
+
+class A64InstRt<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt;
+
+ let Inst{4-0} = Rt;
+}
+
+
+class A64InstRdn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+class A64InstRtn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+// Instructions taking Rt,Rt2,Rn
+class A64InstRtt2n<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt2;
+
+ let Inst{14-10} = Rt2;
+}
+
+class A64InstRdnm<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ let Inst{20-16} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Actual A64 Instruction Formats
+//
+
+// Format for Add-subtract (extended register) instructions.
+class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<3> Imm3;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = opt;
+ let Inst{21} = 0b1;
+ // Rm inherited in 20-16
+ let Inst{15-13} = option;
+ let Inst{12-10} = Imm3;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (immediate) instructions.
+class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<12> Imm12;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b10001;
+ let Inst{23-22} = shift;
+ let Inst{21-10} = Imm12;
+}
+
+// Format for Add-subtract (shifted register) instructions.
+class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = shift;
+ let Inst{21} = 0b0;
+ // Rm inherited in 20-16
+ let Inst{15-10} = Imm6;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (with carry) instructions.
+class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010000;
+ // Rm inherited in 20-16
+ let Inst{15-10} = opcode2;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+
+// Format for Bitfield instructions
+class A64I_bitfield<bit sf, bits<2> opc, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{22} = n;
+ let Inst{21-16} = ImmR;
+ let Inst{15-10} = ImmS;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for compare and branch (immediate) instructions.
+class A64I_cmpbr<bit sf, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+
+ let Inst{31} = sf;
+ let Inst{30-25} = 0b011010;
+ let Inst{24} = op;
+ let Inst{23-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for conditional branch (immediate) instructions.
+class A64I_condbr<bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+ bits<4> Cond;
+
+ let Inst{31-25} = 0b0101010;
+ let Inst{24} = o1;
+ let Inst{23-5} = Label;
+ let Inst{4} = o0;
+ let Inst{3-0} = Cond;
+}
+
+// Format for conditional compare (immediate) instructions.
+class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> UImm5;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = UImm5;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b1;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional compare (register) instructions.
+class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b0;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional select instructions.
+class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010100;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = op2;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for data processing (1 source) instructions
+class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b1;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{20-16} = opcode2;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data processing (2 source) instructions
+class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data-processing (3 source) instructions
+
+class A64I_dp3<bit sf, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opcode{5-4};
+ let Inst{28-24} = 0b11011;
+ let Inst{23-21} = opcode{3-1};
+ // Inherits Rm in 20-16
+ let Inst{15} = opcode{0};
+ let Inst{14-10} = Ra;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for exception generation instructions
+class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = opc;
+ let Inst{20-5} = UImm16;
+ let Inst{4-2} = op2;
+ let Inst{1-0} = ll;
+}
+
+// Format for extract (immediate) instructions
+class A64I_extract<bit sf, bits<3> op, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> LSB;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = op{2-1};
+ let Inst{28-23} = 0b100111;
+ let Inst{22} = n;
+ let Inst{21} = op{0};
+ // Inherits Rm in bits 20-16
+ let Inst{15-10} = LSB;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for floating-point compare instructions.
+class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = op;
+ let Inst{13-10} = 0b1000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = opcode2;
+}
+
+// Format for floating-point conditional compare instructions.
+class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4} = op;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for floating-point conditional select instructions.
+class A64I_fpcondsel<bit m, bit s, bits<2> type,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point data-processing (1 source) instructions.
+class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (2 sources) instructions.
+class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (3 sources) instructions.
+class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11111;
+ let Inst{23-22} = type;
+ let Inst{21} = o1;
+ // Inherit Rm in 20-16
+ let Inst{15} = o0;
+ let Inst{14-10} = Ra;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> fixed-point conversion instructions.
+class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> Scale;
+
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b0;
+ let Inst{20-19} = mode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = Scale;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> integer conversion instructions.
+class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point immediate instructions.
+class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<8> Imm8;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-13} = Imm8;
+ let Inst{12-10} = 0b100;
+ let Inst{9-5} = imm5;
+ // Inherit Rd in 4-0
+}
+
+// Format for load-register (literal) instructions.
+class A64I_LDRlit<bits<2> opc, bit v,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Imm19;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = Imm19;
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store exclusive instructions.
+class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-30} = size;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = o2;
+ let Inst{22} = L;
+ let Inst{21} = o1;
+ let Inst{15} = o0;
+}
+
+class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rs;
+ let Inst{20-16} = Rs;
+}
+
+class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+// Format for load-store register (immediate post-indexed) instructions
+class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b01;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (immediate pre-indexed) instructions
+class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (unprivileged) instructions
+class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store (unscaled immediate) instructions.
+class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+
+// Format for load-store (unsigned immediate) instructions.
+class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<12> UImm12;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opc;
+ let Inst{21-10} = UImm12;
+}
+
+// Format for load-store register (register offset) instructions.
+class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ // Complex operand selection needed for these instructions, so they
+ // need an "addr" field for encoding/decoding to be generated.
+ bits<3> Ext;
+ // OptionHi = Ext{2-1}
+ // S = Ext{0}
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = Ext{2-1};
+ let Inst{13} = optionlo;
+ let Inst{12} = Ext{0};
+ let Inst{11-10} = 0b10;
+ // Inherits Rn in 9-5
+ // Inherits Rt in 4-0
+
+ let AddedComplexity = 50;
+}
+
+// Format for Load-store register pair (offset) instructions
+class A64I_LSPoffset<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b010;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (post-indexed) instructions
+class A64I_LSPpostind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b001;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (pre-indexed) instructions
+class A64I_LSPpreind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b011;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store non-temporal register pair (offset) instructions
+class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b000;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Logical (immediate) instructions
+class A64I_logicalimm<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bit N;
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
+ // selection), so we'll combine them into a single field here.
+ bits<13> Imm;
+ // N = Imm{12};
+ // ImmR = Imm{11-6};
+ // ImmS = Imm{5-0};
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100100;
+ let Inst{22} = Imm{12};
+ let Inst{21-16} = Imm{11-6};
+ let Inst{15-10} = Imm{5-0};
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Logical (shifted register) instructions
+class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-24} = 0b01010;
+ let Inst{23-22} = shift;
+ let Inst{21} = N;
+ // Rm inherited
+ let Inst{15-10} = Imm6;
+ // Rn inherited
+ // Rd inherited
+}
+
+// Format for Move wide (immediate)
+class A64I_movw<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+ bits<2> Shift; // Called "hw" officially
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = Shift;
+ let Inst{20-5} = UImm16;
+ // Inherits Rd in 4-0
+}
+
+// Format for PC-relative addressing instructions, ADR and ADRP.
+class A64I_PCADR<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<21> Label;
+
+ let Inst{31} = op;
+ let Inst{30-29} = Label{1-0};
+ let Inst{28-24} = 0b10000;
+ let Inst{23-5} = Label{20-2};
+}
+
+// Format for system instructions
+class A64I_system<bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<2> Op0;
+ bits<3> Op1;
+ bits<4> CRn;
+ bits<4> CRm;
+ bits<3> Op2;
+ bits<5> Rt;
+
+ let Inst{31-22} = 0b1101010100;
+ let Inst{21} = l;
+ let Inst{20-19} = Op0;
+ let Inst{18-16} = Op1;
+ let Inst{15-12} = CRn;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = Op2;
+ let Inst{4-0} = Rt;
+
+ // These instructions can do horrible things.
+ let hasSideEffects = 1;
+}
+
+// Format for unconditional branch (immediate) instructions
+class A64I_Bimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<26> Label;
+
+ let Inst{31} = op;
+ let Inst{30-26} = 0b00101;
+ let Inst{25-0} = Label;
+}
+
+// Format for Test & branch (immediate) instructions
+class A64I_TBimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<6> Imm;
+ bits<14> Label;
+
+ let Inst{31} = Imm{5};
+ let Inst{30-25} = 0b011011;
+ let Inst{24} = op;
+ let Inst{23-19} = Imm{4-0};
+ let Inst{18-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for Unconditional branch (register) instructions, including
+// RET. Shares no fields with instructions further up the hierarchy
+// so top-level.
+class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<5> Rn;
+
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = opc;
+ let Inst{20-16} = op2;
+ let Inst{15-10} = op3;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = op4;
+}
+
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
new file mode 100644
index 000000000000..cf3a2c3707d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -0,0 +1,822 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0;
+ unsigned ZeroReg = 0;
+ if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+ // E.g. ADD xDst, xsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+ // E.g. ADD wDST, wsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ // E.g. MSR NZCV, xDST
+ BuildMI(MBB, I, DL, get(AArch64::MSRix))
+ .addImm(A64SysReg::NZCV)
+ .addReg(SrcReg);
+ } else if (SrcReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(DestReg));
+ // E.g. MRS xDST, NZCV
+ BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+ .addImm(A64SysReg::NZCV);
+ } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+ assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+ // FIXME: there's no good way to do this, at least without NEON:
+ // + There's no single move instruction for q-registers
+ // + We can't create a spill slot and use normal STR/LDR because stack
+ // allocation has already happened
+ // + We can't go via X-registers with FMOV because register allocation has
+ // already happened.
+ // This may not be efficient, but at least it works.
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ } else {
+ llvm_unreachable("Unknown register class in copyPhysReg");
+ }
+
+ // E.g. ORR xDst, xzr, xSrc, lsl #0
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(ZeroReg)
+ .addReg(SrcReg)
+ .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0)
+ .addImm(Offset)
+ .addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+ return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+ Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+ Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+ Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ switch(I->getOpcode()) {
+ case AArch64::Bcc:
+ case AArch64::CBZw:
+ case AArch64::CBZx:
+ case AArch64::CBNZw:
+ case AArch64::CBNZx:
+ // These instructions just have one predicate operand in position 0 (either
+ // a condition code or a register being compared).
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ TBB = I->getOperand(1).getMBB();
+ return;
+ case AArch64::TBZwii:
+ case AArch64::TBZxii:
+ case AArch64::TBNZwii:
+ case AArch64::TBNZxii:
+ // These have two predicate operands: a register and a bit position.
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ Cond.push_back(I->getOperand(1));
+ TBB = I->getOperand(2).getMBB();
+ return;
+ default:
+ llvm_unreachable("Unknown conditional branch to classify");
+ }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == AArch64::Bimm) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranch(LastOpc)) {
+ classifyCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && LastOpc == AArch64::Bimm) {
+ while (SecondLastOpc == AArch64::Bimm) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (LastOpc == AArch64::Bimm) {
+ if (SecondLastOpc == AArch64::Bcc) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(SecondLastOpc)) {
+ classifyCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ switch (Cond[0].getImm()) {
+ case AArch64::Bcc: {
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+ CC = A64InvertCondCode(CC);
+ Cond[1].setImm(CC);
+ return false;
+ }
+ case AArch64::CBZw:
+ Cond[0].setImm(AArch64::CBNZw);
+ return false;
+ case AArch64::CBZx:
+ Cond[0].setImm(AArch64::CBNZx);
+ return false;
+ case AArch64::CBNZw:
+ Cond[0].setImm(AArch64::CBZw);
+ return false;
+ case AArch64::CBNZx:
+ Cond[0].setImm(AArch64::CBZx);
+ return false;
+ case AArch64::TBZwii:
+ Cond[0].setImm(AArch64::TBNZwii);
+ return false;
+ case AArch64::TBZxii:
+ Cond[0].setImm(AArch64::TBNZxii);
+ return false;
+ case AArch64::TBNZwii:
+ Cond[0].setImm(AArch64::TBZwii);
+ return false;
+ case AArch64::TBNZxii:
+ Cond[0].setImm(AArch64::TBZxii);
+ return false;
+ default:
+ llvm_unreachable("Unknown branch type");
+ }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ if (FBB == 0 && Cond.empty()) {
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+ return 1;
+ } else if (FBB == 0) {
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+ return 1;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+ return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::TLSDESC_BLRx: {
+ MachineInstr *NewMI =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+ .addOperand(MI.getOperand(1));
+ MI.setDesc(get(AArch64::BLRx));
+
+ llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+ return true;
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned StoreOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: StoreOp = AArch64::LS32_STR; break;
+ case 8: StoreOp = AArch64::LS64_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+ RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: StoreOp = AArch64::LSFP32_STR; break;
+ case 8: StoreOp = AArch64::LSFP64_STR; break;
+ case 16: StoreOp = AArch64::LSFP128_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+ NewMI.addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned LoadOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: LoadOp = AArch64::LS32_LDR; break;
+ case 8: LoadOp = AArch64::LS64_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+ || RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: LoadOp = AArch64::LSFP32_LDR; break;
+ case 8: LoadOp = AArch64::LSFP64_LDR; break;
+ case 16: LoadOp = AArch64::LSFP128_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+ unsigned Limit = (1 << 16) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+ // is the largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ Limit = std::min(Limit, 0xfffu);
+ break;
+ }
+
+ int AccessScale, MinOffset, MaxOffset;
+ getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+ Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+ int &AccessScale, int &MinOffset,
+ int &MaxOffset) const {
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unkown load/store kind");
+ case TargetOpcode::DBG_VALUE:
+ AccessScale = 1;
+ MinOffset = INT_MIN;
+ MaxOffset = INT_MAX;
+ return;
+ case AArch64::LS8_LDR: case AArch64::LS8_STR:
+ case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+ case AArch64::LDRSBw:
+ case AArch64::LDRSBx:
+ AccessScale = 1;
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ return;
+ case AArch64::LS16_LDR: case AArch64::LS16_STR:
+ case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+ case AArch64::LDRSHw:
+ case AArch64::LDRSHx:
+ AccessScale = 2;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS32_LDR: case AArch64::LS32_STR:
+ case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+ case AArch64::LDRSWx:
+ case AArch64::LDPSWx:
+ AccessScale = 4;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS64_LDR: case AArch64::LS64_STR:
+ case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+ case AArch64::PRFM:
+ AccessScale = 8;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+ AccessScale = 16;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+ case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+ AccessScale = 4;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+ case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+ AccessScale = 8;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+ AccessScale = 16;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ const MachineBasicBlock &MBB = *MI.getParent();
+ const MachineFunction &MF = *MBB.getParent();
+ const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ if (MI.getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+ if (MI.isLabel())
+ return 0;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case AArch64::TLSDESCCALL:
+ return 0;
+ default:
+ llvm_unreachable("Unknown instruction class");
+ }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MFI.getObjectOffset(FrameRegIdx);
+ llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+ if (NumBytes == 0 && DstReg == SrcReg)
+ return;
+ else if (abs64(NumBytes) & ~0xffffff) {
+ // Generically, we have to materialize the offset into a temporary register
+ // and subtract it. There are a couple of ways this could be done, for now
+ // we'll use a movz/movk or movn/movk sequence.
+ uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
+ .addImm(0xffff & Bits).addImm(0)
+ .setMIFlags(MIFlags);
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(1)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(2)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(3)
+ .setMIFlags(MIFlags);
+ }
+
+ // ADD DST, SRC, xTMP (, lsl #0)
+ unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+ BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(0)
+ .setMIFlag(MIFlags);
+ return;
+ }
+
+ // Now we know that the adjustment can be done in at most two add/sub
+ // (immediate) instructions, which is always more efficient than a
+ // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+ // Decide whether we're doing addition or subtraction
+ unsigned LowOp, HighOp;
+ if (NumBytes >= 0) {
+ LowOp = AArch64::ADDxxi_lsl0_s;
+ HighOp = AArch64::ADDxxi_lsl12_s;
+ } else {
+ LowOp = AArch64::SUBxxi_lsl0_s;
+ HighOp = AArch64::SUBxxi_lsl12_s;
+ NumBytes = abs64(NumBytes);
+ }
+
+ // If we're here, at the very least a move needs to be produced, which just
+ // happens to be materializable by an ADD.
+ if ((NumBytes & 0xfff) || NumBytes == 0) {
+ BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes & 0xfff)
+ .setMIFlag(MIFlags);
+
+ // Next update should use the register we've just defined.
+ SrcReg = DstReg;
+ }
+
+ if (NumBytes & 0xfff000) {
+ BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes >> 12)
+ .setMIFlag(MIFlags);
+ }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags) {
+ emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+ NumBytes, MIFlags);
+}
+
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ AArch64MachineFunctionInfo* MFI
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case AArch64::TLSDESC_BLRx:
+ // Make sure it's a local dynamic access.
+ if (!I->getOperand(1).isSymbol() ||
+ strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ break;
+
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+ // code sequence assumes the address will be.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ AArch64::X0)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ // Insert a copy from X0 to TLSBaseAddrReg for later.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(AArch64::X0);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
new file mode 100644
index 000000000000..22a2ab4cf60a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -0,0 +1,112 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+ const AArch64RegisterInfo RI;
+ const AArch64Subtarget &Subtarget;
+public:
+ explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ /// Look through the instructions in this function and work out the largest
+ /// the stack frame can be while maintaining the ability to address local
+ /// slots with no complexities.
+ unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+ /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+ /// immediate offset, this function determines the constraints required for
+ /// the immediate. It must satisfy:
+ /// + MinOffset <= imm <= MaxOffset
+ /// + imm % OffsetScale == 0
+ void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+ int &MinOffset, int &MaxOffset) const;
+
+
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
new file mode 100644
index 000000000000..37be5e4892e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,5099 @@
+//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AArch64 scalar instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Target-specific ISD nodes and profiles
+//===----------------------------------------------------------------------===//
+
+def SDT_A64ret : SDTypeProfile<0, 0, []>;
+def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
+ SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// (ins NZCV, Condition, Dest)
+def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
+def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
+
+// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
+def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>]>;
+def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
+
+// (outs NZCV), (ins LHS, RHS, Condition)
+def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>]>;
+def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
+
+
+// (outs GPR64), (ins)
+def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+// A64 compares don't care about the cond really (they set all flags) so a
+// simple binary operator is useful.
+def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, node:$rhs, cond)>;
+
+
+// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
+// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
+// and V flags can be set differently by this operation. It comes down to
+// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
+// then everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+
+// So, finally, the only LLVM-native comparisons that don't mention C and V are
+// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
+// absence of information about op2.
+def equality_cond : PatLeaf<(cond), [{
+ return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
+}]>;
+
+def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
+
+// There are two layers of indirection here, driven by the following
+// considerations.
+// + TableGen does not know CodeModel or Reloc so that decision should be
+// made for a variable/address at ISelLowering.
+// + The output of ISelLowering should be selectable (hence the Wrapper,
+// rather than a bare target opcode)
+def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i32>,
+ SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+
+
+def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
+ [SDNPHasChain]>;
+
+
+// (A64BFI LHS, RHS, LSB, Width)
+def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>,
+ SDTCisVT<4, i64>]>;
+
+def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
+
+// (A64EXTR HiReg, LoReg, LSB)
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>]>;
+def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+// (A64[SU]BFX Field, ImmR, ImmS).
+//
+// Note that ImmR and ImmS are already encoded for the actual instructions. The
+// more natural LSB and Width mix together to form ImmR and ImmS, something
+// which TableGen can't handle.
+def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
+
+def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+
+//===----------------------------------------------------------------------===//
+// Call sequence pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+
+def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
+// destination but needs a relocation against a fixed symbol. As such it has two
+// certain operands: the callee and the relocated variable.
+//
+// The TLS ABI only allows it to be selected to a BLR instructin (with
+// appropriate relocation).
+def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
+def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
+
+def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+
+// These pseudo-instructions have special semantics by virtue of being passed to
+// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
+// LowerCall to (in our case) tell the back-end about stack adjustments for
+// arguments passed on the stack. Here we select those markers to
+// pseudo-instructions which explicitly set the stack, and finally in the
+// RegisterInfo we convert them to a true stack adjustment.
+let Defs = [XSP], Uses = [XSP] in {
+ def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
+ [(AArch64callseq_start timm:$amt)]>;
+
+ def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operation pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+multiclass AtomicSizes<string opname> {
+ def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
+ def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
+ def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
+ def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
+ [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
+}
+}
+
+defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">;
+defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">;
+defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
+defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
+defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
+defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
+defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+ // These operations need a CMP to calculate the correct value
+ defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
+ defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
+ defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+ defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+def ATOMIC_CMP_SWAP_I8
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I16
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I32
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I64
+ : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
+ [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (extended register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
+
+// The RHS of these operations is conceptually a sign/zero-extended
+// register, optionally shifted left by 1-4. The extension can be a
+// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
+// must be specified with one exception:
+
+// If one of the registers is sp/wsp then LSL is an alias for UXTW in
+// 32-bit instructions and UXTX in 64-bit versions, the shift amount
+// is not optional in that case (but can explicitly be 0), and the
+// entire suffix can be skipped (e.g. "add sp, x3, x2").
+
+multiclass extend_operands<string PREFIX, string Diag> {
+ def _asmoperand : AsmOperandClass {
+ let Name = PREFIX;
+ let RenderMethod = "addRegExtendOperands";
+ let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
+ let DiagnosticType = "AddSubRegExtend" # Diag;
+ }
+
+ def _operand : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
+ let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
+ let DecoderMethod = "DecodeRegExtendOperand";
+ let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
+ }
+}
+
+defm UXTB : extend_operands<"UXTB", "Small">;
+defm UXTH : extend_operands<"UXTH", "Small">;
+defm UXTW : extend_operands<"UXTW", "Small">;
+defm UXTX : extend_operands<"UXTX", "Large">;
+defm SXTB : extend_operands<"SXTB", "Small">;
+defm SXTH : extend_operands<"SXTH", "Small">;
+defm SXTW : extend_operands<"SXTW", "Small">;
+defm SXTX : extend_operands<"SXTX", "Large">;
+
+def LSL_extasmoperand : AsmOperandClass {
+ let Name = "RegExtendLSL";
+ let RenderMethod = "addRegExtendOperands";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+def LSL_extoperand : Operand<i64> {
+ let ParserMatchClass = LSL_extasmoperand;
+}
+
+
+// The patterns for various sign-extensions are a little ugly and
+// non-uniform because everything has already been promoted to the
+// legal i64 and i32 types. We'll wrap the various variants up in a
+// class for use later.
+class extend_types {
+ dag uxtb; dag uxth; dag uxtw; dag uxtx;
+ dag sxtb; dag sxth; dag sxtw; dag sxtx;
+ ValueType ty;
+ RegisterClass GPR;
+}
+
+def extends_to_i64 : extend_types {
+ let uxtb = (and (anyext i32:$Rm), 255);
+ let uxth = (and (anyext i32:$Rm), 65535);
+ let uxtw = (zext i32:$Rm);
+ let uxtx = (i64 $Rm);
+
+ let sxtb = (sext_inreg (anyext i32:$Rm), i8);
+ let sxth = (sext_inreg (anyext i32:$Rm), i16);
+ let sxtw = (sext i32:$Rm);
+ let sxtx = (i64 $Rm);
+
+ let ty = i64;
+ let GPR = GPR64xsp;
+}
+
+
+def extends_to_i32 : extend_types {
+ let uxtb = (and i32:$Rm, 255);
+ let uxth = (and i32:$Rm, 65535);
+ let uxtw = (i32 i32:$Rm);
+ let uxtx = (i32 i32:$Rm);
+
+ let sxtb = (sext_inreg i32:$Rm, i8);
+ let sxth = (sext_inreg i32:$Rm, i16);
+ let sxtw = (i32 i32:$Rm);
+ let sxtx = (i32 i32:$Rm);
+
+ let ty = i32;
+ let GPR = GPR32wsp;
+}
+
+// Now, six of the extensions supported are easy and uniform: if the source size
+// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
+// those instructions in one block.
+
+// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
+// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
+// be impossible.
+// + Patterns are very different as well.
+// + Passing different registers would be ugly (more fields in extend_types
+// would probably be the best option).
+multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
+ SDPatternOperator opfrag,
+ dag outs, extend_types exts> {
+ def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+ NoItinerary>;
+
+ def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+ NoItinerary>;
+}
+
+// These two could be merge in with the above, but their patterns aren't really
+// necessary and the naming-scheme would necessarily break:
+multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
+ dag outs> {
+ def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
+ NoItinerary>;
+
+ def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: same as uxtx */],
+ NoItinerary>;
+}
+
+multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
+ def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No pattern: probably same as uxtw */],
+ NoItinerary>;
+
+ def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: probably same as uxtw */],
+ NoItinerary>;
+}
+
+class SetRD<RegisterClass RC, SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
+class SetNZCV<SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
+
+defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd)>;
+defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
+ (outs GPR32wsp:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd)>;
+defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
+ (outs GPR32wsp:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+
+let Defs = [NZCV] in {
+defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd)>;
+defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
+ (outs GPR32:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
+ (outs GPR32:$Rd)>;
+defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd)>;
+defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
+ (outs GPR32:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
+ (outs GPR32:$Rd)>;
+
+
+let Rd = 0b11111, isCompare = 1 in {
+defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
+defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
+defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
+defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
+}
+}
+
+// Now patterns for the operation without a shift being needed. No patterns are
+// created for uxtx/sxtx since they're non-uniform and it's expected that
+// add/sub (shifted register) will handle those cases anyway.
+multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
+ extend_types exts> {
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
+ (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
+ (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
+ (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
+
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
+ (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
+ (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
+ (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
+
+// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
+// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
+// operation. Also permitted in this case is complete omission of the argument,
+// which implies "lsl #0".
+multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+
+}
+
+defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+
+// Rd cannot be sp for flag-setting variants so only half of the aliases are
+// needed.
+defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
+defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
+
+// CMP unfortunately has to be different because the instruction doesn't have a
+// dest register.
+multiclass cmp_lsl_aliases<string asmop, Instruction inst,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+}
+
+defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
+defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
+
+// These instructions accept a 12-bit unsigned immediate, optionally shifted
+// left by 12 bits. Official assembly format specifies a 12 bit immediate with
+// one of "", "LSL #0", "LSL #12" supplementary operands.
+
+// There are surprisingly few ways to make this work with TableGen, so this
+// implementation has separate instructions for the "LSL #0" and "LSL #12"
+// variants.
+
+// If the MCInst retained a single combined immediate (which could be 0x123000,
+// for example) then both components (imm & shift) would have to be delegated to
+// a single assembly operand. This would entail a separate operand parser
+// (because the LSL would have to live in the same AArch64Operand as the
+// immediate to be accessible); assembly parsing is rather complex and
+// error-prone C++ code.
+//
+// By splitting the immediate, we can delegate handling this optional operand to
+// an InstAlias. Supporting functions to generate the correct MCInst are still
+// required, but these are essentially trivial and parsing can remain generic.
+//
+// Rejected plans with rationale:
+// ------------------------------
+//
+// In an ideal world you'de have two first class immediate operands (in
+// InOperandList, specifying imm12 and shift). Unfortunately this is not
+// selectable by any means I could discover.
+//
+// An Instruction with two MCOperands hidden behind a single entry in
+// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
+// but required more C++ code to handle encoding/decoding. Parsing (the intended
+// main beneficiary) ended up equally complex because of the optional nature of
+// "LSL #0".
+//
+// Attempting to circumvent the need for a custom OperandParser above by giving
+// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
+// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
+// should be parsed: there was no way to accommodate an "lsl #12".
+
+let ParserMethod = "ParseImmWithLSLOperand",
+ RenderMethod = "addImmWithLSLOperands" in {
+ // Derived PredicateMethod fields are different for each
+ def addsubimm_lsl0_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL0";
+ // If an error is reported against this operand, instruction could also be a
+ // register variant.
+ let DiagnosticType = "AddSubSecondSource";
+ }
+
+ def addsubimm_lsl12_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL12";
+ let DiagnosticType = "AddSubSecondSource";
+ }
+}
+
+def shr_12_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
+}]>;
+
+def shr_12_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
+}]>;
+
+def neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
+}]>;
+
+
+multiclass addsub_imm_operands<ValueType ty> {
+ let PrintMethod = "printAddSubImmLSL0Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl0_asmoperand in {
+ def _posimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
+ def _negimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
+ neg_XFORM>;
+ }
+
+ let PrintMethod = "printAddSubImmLSL12Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl12_asmoperand in {
+ def _posimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
+ shr_12_XFORM>;
+
+ def _negimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
+ shr_12_neg_XFORM>;
+ }
+}
+
+// The add operands don't need any transformation
+defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
+defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
+
+multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
+ string asmop, string cmpasmop,
+ Operand imm_operand, Operand cmp_imm_operand,
+ RegisterClass GPR, RegisterClass GPRsp,
+ AArch64Reg ZR, ValueType Ty> {
+ // All registers for non-S variants allow SP
+ def _s : A64I_addsubimm<sf, op, 0b0, shift,
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
+ [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
+ NoItinerary>;
+
+
+ // S variants can read SP but would write to ZR
+ def _S : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs GPR:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
+ [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
+ NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ // Note that the pattern here for ADDS is subtle. Canonically CMP
+ // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
+ // ADDS a, (-b). This is not true in general.
+ def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(cmpasmop, " $Rn, $Imm12"),
+ [(set NZCV,
+ (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
+ NoItinerary> {
+ let Rd = 0b11111;
+ let Defs = [NZCV];
+ let isCompare = 1;
+ }
+}
+
+
+multiclass addsubimm_shifts<string prefix, bit sf, bit op,
+ string asmop, string cmpasmop, string operand, string cmpoperand,
+ RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
+ ValueType Ty> {
+ defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl0"),
+ !cast<Operand>(cmpoperand # "_lsl0"),
+ GPR, GPRsp, ZR, Ty>;
+
+ defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl12"),
+ !cast<Operand>(cmpoperand # "_lsl12"),
+ GPR, GPRsp, ZR, Ty>;
+}
+
+defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
+ "addsubimm_operand_i32_posimm",
+ "addsubimm_operand_i32_negimm",
+ GPR32, GPR32wsp, WZR, i32>;
+defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
+ "addsubimm_operand_i64_posimm",
+ "addsubimm_operand_i64_negimm",
+ GPR64, GPR64xsp, XZR, i64>;
+defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
+ "addsubimm_operand_i32_negimm",
+ "addsubimm_operand_i32_posimm",
+ GPR32, GPR32wsp, WZR, i32>;
+defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
+ "addsubimm_operand_i64_negimm",
+ "addsubimm_operand_i64_posimm",
+ GPR64, GPR64xsp, XZR, i64>;
+
+multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
+ def _fromsp : InstAlias<"mov $Rd, $Rn",
+ (addop GPRsp:$Rd, SP:$Rn, 0),
+ 0b1>;
+
+ def _tosp : InstAlias<"mov $Rd, $Rn",
+ (addop SP:$Rd, GPRsp:$Rn, 0),
+ 0b1>;
+}
+
+// Recall Rxsp is a RegisterClass containing *just* xsp.
+defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
+defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
+
+//===-------------------------------
+// 1. The "shifed register" operands. Shared with logical insts.
+//===-------------------------------
+
+multiclass shift_operands<string prefix, string form> {
+ def _asmoperand_i32 : AsmOperandClass {
+ let Name = "Shift" # form # "i32";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", false>";
+ let DiagnosticType = "AddSubRegShift32";
+ }
+
+ // Note that the operand type is intentionally i64 because the DAGCombiner
+ // puts these into a canonical form.
+ def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ let DecoderMethod = "Decode32BitShiftOperand";
+ }
+
+ def _asmoperand_i64 : AsmOperandClass {
+ let Name = "Shift" # form # "i64";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", true>";
+ let DiagnosticType = "AddSubRegShift64";
+ }
+
+ def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ }
+}
+
+defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
+defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
+defm asr_operand : shift_operands<"asr_operand", "ASR">;
+
+// Not used for add/sub, but defined here for completeness. The "logical
+// (shifted register)" instructions *do* have an ROR variant.
+defm ror_operand : shift_operands<"ror_operand", "ROR">;
+
+//===-------------------------------
+// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
+//===-------------------------------
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_addsubshift<sf, op, s, 0b00,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, s, 0b01,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, s, 0b10,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
+ commutable, asmop, opfrag, i64, GPR64, defs>;
+ defm www : addsub_shifts<prefix # "www", 0b0, op, s,
+ commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
+defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+
+defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
+defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+
+//===-------------------------------
+// 1. The NEG/NEGS aliases
+//===-------------------------------
+
+multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
+ ValueType ty, Operand shift_operand, SDNode shiftop> {
+ def : InstAlias<"neg $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+ def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
+ (INST ZR, $Rm, shift_operand:$Imm6)>;
+}
+
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
+
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
+
+// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
+// be involved.
+class negs_alias<Instruction INST, RegisterClass GPR,
+ Register ZR, Operand shift_operand, SDNode shiftop>
+ : InstAlias<"negs $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+
+//===-------------------------------
+// 1. The CMP/CMN aliases
+//===-------------------------------
+
+multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR> {
+ let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
+
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (with carry) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
+
+multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
+ let Uses = [NZCV] in {
+ def www : A64I_addsubcarry<0b0, op, s, 0b000000,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+
+ def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+ }
+}
+
+let isCommutable = 1 in {
+ defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
+}
+
+defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
+
+let Defs = [NZCV] in {
+ let isCommutable = 1 in {
+ defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
+ }
+
+ defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
+}
+
+def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+
+// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
+// addition). So the flag-setting instructions are appropriate.
+def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
+def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
+def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
+def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield
+//===----------------------------------------------------------------------===//
+// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
+// UBFIZ, UBFX
+
+// Because of the rather complicated nearly-overlapping aliases, the decoding of
+// this range of instructions is handled manually. The architectural
+// instructions are BFM, SBFM and UBFM but a disassembler should never produce
+// these.
+//
+// In the end, the best option was to use BFM instructions for decoding under
+// almost all circumstances, but to create aliasing *Instructions* for each of
+// the canonical forms and specify a completely custom decoder which would
+// substitute the correct MCInst as needed.
+//
+// This also simplifies instruction selection, parsing etc because the MCInsts
+// have a shape that's closer to their use in code.
+
+//===-------------------------------
+// 1. The architectural BFM instructions
+//===-------------------------------
+
+def uimm5_asmoperand : AsmOperandClass {
+ let Name = "UImm5";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm5";
+}
+
+def uimm6_asmoperand : AsmOperandClass {
+ let Name = "UImm6";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm6";
+}
+
+def bitfield32_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+
+ let DecoderMethod = "DecodeBitfield32ImmOperand";
+}
+
+
+def bitfield64_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+
+ // Default decoder works in 64-bit case: the 6-bit field can take any value.
+}
+
+multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+}
+
+defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
+defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
+
+// BFM instructions modify the destination register rather than defining it
+// completely.
+def BFMwwii :
+ A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+def BFMxxii :
+ A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+
+//===-------------------------------
+// 2. Extend aliases to 64-bit dest
+//===-------------------------------
+
+// Unfortunately the extensions that end up as 64-bits cannot be handled by an
+// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
+// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
+// not capable of such a map as far as I'm aware
+
+// Note that these instructions are strictly more specific than the
+// BFM ones (in ImmR) so they can handle their own decoding.
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
+ string asmop, bits<6> imms, dag pattern>
+ : A64I_bitfield<sf, opc, sf,
+ (outs GPRDest:$Rd), (ins GPR32:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [(set dty:$Rd, pattern)], NoItinerary> {
+ let ImmR = 0b000000;
+ let ImmS = imms;
+}
+
+// Signed extensions
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
+ (sext_inreg (anyext i32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
+ (sext_inreg i32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
+ (sext_inreg (anyext i32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
+ (sext_inreg i32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
+
+// Unsigned extensions
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
+ (and i32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
+ (and i32:$Rn, 65535)>;
+
+// The 64-bit unsigned variants are not strictly architectural but recommended
+// for consistency.
+let isAsmParserOnly = 1 in {
+ def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
+ (and (anyext i32:$Rn), 255)>;
+ def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
+ (and (anyext i32:$Rn), 65535)>;
+}
+
+// Extra patterns for when the source register is actually 64-bits
+// too. There's no architectural difference here, it's just LLVM
+// shinanigans. There's no need for equivalent zero-extension patterns
+// because they'll already be caught by logical (immediate) matching.
+def : Pat<(sext_inreg i64:$Rn, i8),
+ (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i16),
+ (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i32),
+ (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
+
+
+//===-------------------------------
+// 3. Aliases for ASR and LSR (the simple shifts)
+//===-------------------------------
+
+// These also handle their own decoding because ImmS being set makes
+// them take precedence over BFM.
+multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_bitfield<0b0, opc, 0b0,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 31;
+ }
+
+ def xxi : A64I_bitfield<0b1, opc, 0b1,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 63;
+ }
+
+}
+
+defm ASR : A64I_shift<0b00, "asr", sra>;
+defm LSR : A64I_shift<0b10, "lsr", srl>;
+
+//===-------------------------------
+// 4. Aliases for LSL
+//===-------------------------------
+
+// Unfortunately LSL and subsequent aliases are much more complicated. We need
+// to be able to say certain output instruction fields depend in a complex
+// manner on combinations of input assembly fields).
+//
+// MIOperandInfo *might* have been able to do it, but at the cost of
+// significantly more C++ code.
+
+// N.b. contrary to usual practice these operands store the shift rather than
+// the machine bits in an MCInst. The complexity overhead of consistency
+// outweighed the benefits in this case (custom asmparser, printer and selection
+// vs custom encoder).
+def bitfield32_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+ let EncoderMethod = "getBitfield32LSLOpValue";
+}
+
+def bitfield64_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+ let EncoderMethod = "getBitfield64LSLOpValue";
+}
+
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
+ Operand operand>
+ : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
+ "lsl\t$Rd, $Rn, $FullImm",
+ [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
+ NoItinerary> {
+ bits<12> FullImm;
+ let ImmR = FullImm{5-0};
+ let ImmS = FullImm{11-6};
+
+ // No disassembler allowed because it would overlap with BFM which does the
+ // actual work.
+ let isAsmParserOnly = 1;
+}
+
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
+
+//===-------------------------------
+// 5. Aliases for bitfield extract instructions
+//===-------------------------------
+
+def bfx32_width_asmoperand : AsmOperandClass {
+ let Name = "BFX32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx32_width_asmoperand;
+}
+
+def bfx64_width_asmoperand : AsmOperandClass {
+ let Name = "BFX64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfx64_width : Operand<i64> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx64_width_asmoperand;
+}
+
+
+multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
+defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
+
+// Again, variants based on BFM modify Rd so need it as an input too.
+def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+// SBFX instructions can do a 1-instruction sign-extension of boolean values.
+def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
+def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
+ (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
+
+// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
+// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
+def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+ sub_32)>;
+
+//===-------------------------------
+// 6. Aliases for bitfield insert instructions
+//===-------------------------------
+
+def bfi32_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI32LSB";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addBFILSBOperands<32>";
+ let DiagnosticType = "UImm5";
+}
+
+def bfi32_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let PrintMethod = "printBFILSBOperand<32>";
+ let ParserMatchClass = bfi32_lsb_asmoperand;
+}
+
+def bfi64_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI64LSB";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addBFILSBOperands<64>";
+ let DiagnosticType = "UImm6";
+}
+
+def bfi64_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let PrintMethod = "printBFILSBOperand<64>";
+ let ParserMatchClass = bfi64_lsb_asmoperand;
+}
+
+// Width verification is performed during conversion so width operand can be
+// shared between 32/64-bit cases. Still needed for the print method though
+// because ImmR encodes "width - 1".
+def bfi32_width_asmoperand : AsmOperandClass {
+ let Name = "BFI32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfi32_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi32_width_asmoperand;
+}
+
+def bfi64_width_asmoperand : AsmOperandClass {
+ let Name = "BFI64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfi64_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi64_width_asmoperand;
+}
+
+multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
+defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
+
+
+def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+//===----------------------------------------------------------------------===//
+// Compare and branch (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: CBZ, CBNZ
+
+class label_asmoperand<int width, int scale> : AsmOperandClass {
+ let Name = "Label" # width # "_" # scale;
+ let PredicateMethod = "isLabel<" # width # "," # scale # ">";
+ let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
+ let DiagnosticType = "Label";
+}
+
+def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
+
+// All conditional immediate branches are the same really: 19 signed bits scaled
+// by the instruction-size (4).
+def bcc_target : Operand<OtherVT> {
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let ParserMatchClass = label_wid19_scal4_asmoperand;
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
+ let OperandType = "OPERAND_PCREL";
+}
+
+multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
+ let isBranch = 1, isTerminator = 1 in {
+ def x : A64I_cmpbr<0b1, op,
+ (outs),
+ (ins GPR64:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+
+ def w : A64I_cmpbr<0b0, op,
+ (outs),
+ (ins GPR32:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+ }
+}
+
+defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
+ return Imm == A64CC::EQ;
+}]> >;
+defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
+ return Imm == A64CC::NE;
+}]> >;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B.cc
+
+def cond_code_asmoperand : AsmOperandClass {
+ let Name = "CondCode";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 15;
+}]> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_asmoperand;
+}
+
+def Bcc : A64I_condbr<0b0, 0b0, (outs),
+ (ins cond_code:$Cond, bcc_target:$Label),
+ "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
+ NoItinerary> {
+ let Uses = [NZCV];
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+def uimm4_asmoperand : AsmOperandClass {
+ let Name = "UImm4";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm4";
+}
+
+def uimm4 : Operand<i32> {
+ let ParserMatchClass = uimm4_asmoperand;
+}
+
+def uimm5 : Operand<i32> {
+ let ParserMatchClass = uimm5_asmoperand;
+}
+
+// The only difference between this operand and the one for instructions like
+// B.cc is that it's parsed manually. The other get parsed implicitly as part of
+// the mnemonic handling.
+def cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "CondCodeOp";
+ let RenderMethod = "addCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code_op : Operand<i32> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_op_asmoperand;
+}
+
+class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
+ (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
+
+// Condition code which is encoded as the inversion (semantically rather than
+// bitwise) in the instruction.
+def inv_cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "InvCondCodeOp";
+ let RenderMethod = "addInvCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def inv_cond_code_op : Operand<i32> {
+ let ParserMatchClass = inv_cond_code_op_asmoperand;
+}
+
+// Having a separate operand for the selectable use-case is debatable, but gives
+// consistency with cond_code.
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
+ return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+}]>;
+
+def inv_cond_code
+ : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
+
+
+multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
+ SDPatternOperator select> {
+ let Uses = [NZCV] in {
+ def wwwc : A64I_condsel<0b0, op, 0b0, op2,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
+ NoItinerary>;
+
+
+ def xxxc : A64I_condsel<0b1, op, 0b0, op2,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+ }
+}
+
+def simple_select
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
+
+class complex_select<SDPatternOperator opnode>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
+
+
+defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
+defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
+ complex_select<PatFrag<(ops node:$val),
+ (add node:$val, 1)>>>;
+defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
+defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
+
+// Now the instruction aliases, which fit nicely into LLVM's model:
+
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+
+// Finally some helper patterns.
+
+// For CSET (a.k.a. zero-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// For CSETM (a.k.a. sign-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
+// commutativity. The instructions are to complex for isCommutable to be used,
+// so we have to create the patterns manually:
+
+// No commutable pattern for CSEL since the commuted version is isomorphic.
+
+// CSINC
+def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
+ (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
+ (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSINV
+def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+ (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+ (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSNEG
+def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+ (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+ (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (1 source) instructions
+//===----------------------------------------------------------------------===//
+// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
+
+// We define an unary operator which always fails. We will use this to
+// define unary operators that cannot be matched.
+
+class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
+ list<dag> patterns, RegisterClass GPRrc,
+ InstrItinClass itin>:
+ A64I_dp_1src<sf,
+ 0,
+ 0b00000,
+ opcode,
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ (outs GPRrc:$Rd),
+ (ins GPRrc:$Rn),
+ patterns,
+ itin>;
+
+multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
+ let hasSideEffects = 0 in {
+ def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
+ def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
+ }
+}
+
+defm RBIT : A64I_dp_1src<0b000000, "rbit">;
+defm CLS : A64I_dp_1src<0b000101, "cls">;
+defm CLZ : A64I_dp_1src<0b000100, "clz">;
+
+def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
+def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
+
+def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
+def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
+
+
+def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
+ [(set i32:$Rd, (bswap i32:$Rn))],
+ GPR32, NoItinerary>;
+def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
+ [(set i64:$Rd, (bswap i64:$Rn))],
+ GPR64, NoItinerary>;
+def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
+ [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
+ GPR64, NoItinerary>;
+def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
+ [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
+ GPR32,
+ NoItinerary>;
+def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
+// LSR, ASR, ROR
+
+
+class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
+ RegisterClass GPRsp,
+ InstrItinClass itin>:
+ A64I_dp_2src<sf,
+ opcode,
+ 0,
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, GPRsp:$Rm),
+ patterns,
+ itin>;
+
+multiclass dp_2src_crc<bit c, string asmop> {
+ def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
+ !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
+ def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
+ !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
+ def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
+ !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
+ def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
+ !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
+ NoItinerary>;
+}
+
+multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set i32:$Rd,
+ (op i32:$Rn, (i64 (zext i32:$Rm))))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+
+multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+// Here we define the data processing 2 source instructions.
+defm CRC32 : dp_2src_crc<0b0, "crc32">;
+defm CRC32C : dp_2src_crc<0b1, "crc32c">;
+
+defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+
+defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+
+// Extra patterns for an incoming 64-bit value for a 32-bit
+// operation. Since the LLVM operations are undefined (as in C) if the
+// RHS is out of range, it's perfectly permissible to discard the high
+// bits of the GPR64.
+def : Pat<(shl i32:$Rn, i64:$Rm),
+ (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(srl i32:$Rn, i64:$Rm),
+ (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(sra i32:$Rn, i64:$Rm),
+ (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(rotr i32:$Rn, i64:$Rm),
+ (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+
+// Here we define the aliases for the data processing 2 source instructions.
+def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
+def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
+def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
+def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
+// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
+
+class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
+ ValueType AccTy, RegisterClass SrcReg,
+ string asmop, dag pattern>
+ : A64I_dp3<sf, opcode,
+ (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
+ [(set AccTy:$Rd, pattern)], NoItinerary> {
+ RegisterClass AccGPR = AccReg;
+ RegisterClass SrcGPR = SrcReg;
+}
+
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
+ (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
+ (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
+ (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
+ (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
+ (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
+ (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
+ (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
+ (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
+ def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "umulh\t$Rd, $Rn, $Rm",
+ [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+
+ def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "smulh\t$Rd, $Rn, $Rm",
+ [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+}
+
+multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
+ Register ZR, dag pattern> {
+ def : InstAlias<asmop # " $Rd, $Rn, $Rm",
+ (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+
+ def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
+}
+
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
+
+defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
+ (sub 0, (mul i32:$Rn, i32:$Rm))>;
+defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
+ (sub 0, (mul i64:$Rn, i64:$Rm))>;
+
+defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
+ (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
+defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
+ (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
+defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+
+//===----------------------------------------------------------------------===//
+// Exception generation
+//===----------------------------------------------------------------------===//
+// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+
+def uimm16_asmoperand : AsmOperandClass {
+ let Name = "UImm16";
+ let PredicateMethod = "isUImm<16>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm16";
+}
+
+def uimm16 : Operand<i32> {
+ let ParserMatchClass = uimm16_asmoperand;
+}
+
+class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
+ : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
+ !strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
+def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
+def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
+def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
+def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
+
+def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
+def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
+def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
+
+// The immediate is optional for the DCPS instructions, defaulting to 0.
+def : InstAlias<"dcps1", (DCPS1i 0)>;
+def : InstAlias<"dcps2", (DCPS2i 0)>;
+def : InstAlias<"dcps3", (DCPS3i 0)>;
+
+//===----------------------------------------------------------------------===//
+// Extract (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: EXTR + alias ROR
+
+def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set i32:$Rd,
+ (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
+ NoItinerary>;
+def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set i64:$Rd,
+ (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
+ NoItinerary>;
+
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
+
+def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
+ (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
+ (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCMP, FCMPE
+
+def fpzero_asmoperand : AsmOperandClass {
+ let Name = "FPZero";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPZero";
+}
+
+def fpz32 : Operand<f32>,
+ ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+def fpz64 : Operand<f64>,
+ ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
+ def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
+ (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+ NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
+ (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
+ let Defs = [NZCV];
+ }
+}
+
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
+ (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
+ (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
+
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+ (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
+
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+ (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCCMP, FCCMPE
+
+class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
+ : A64I_fpccmp<0b0, 0b0, type, op,
+ (outs),
+ (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
+def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
+def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
+def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCSEL
+
+let Uses = [NZCV] in {
+ def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set f32:$Rd,
+ (simple_select f32:$Rn, f32:$Rm))],
+ NoItinerary>;
+
+
+ def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set f64:$Rd,
+ (simple_select f64:$Rn, f64:$Rm))],
+ NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (1 source)
+//===----------------------------------------------------------------------===//
+// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
+
+def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
+ [{ (void)N; return false; }]>;
+
+// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
+// syntax. Default to no pattern because most are odd enough not to have one.
+multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
+ SDPatternOperator opnode = FPNoUnop> {
+ def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set f32:$Rd, (opnode f32:$Rn))],
+ NoItinerary>;
+
+ def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set f64:$Rd, (opnode f64:$Rn))],
+ NoItinerary>;
+}
+
+defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
+defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
+defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
+defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+
+defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
+defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
+defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
+defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
+defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
+defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
+defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
+
+// The FCVT instrucitons have different source and destination register-types,
+// but the fields are uniform everywhere a D-register (say) crops up. Package
+// this information in a Record.
+class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
+ RegisterClass Class = rc;
+ ValueType VT = vt;
+ bit t1 = fld{1};
+ bit t0 = fld{0};
+}
+
+def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
+def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
+def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
+
+class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
+ : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
+ {0,0,0,1, DestReg.t1, DestReg.t0},
+ (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
+ "fcvt\t$Rd, $Rn",
+ [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
+
+def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
+def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
+def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
+def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
+def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
+def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
+
+def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>;
+
+multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
+ SDPatternOperator opnode> {
+ def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
+ (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
+ NoItinerary>;
+
+ def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
+ NoItinerary>;
+}
+
+let isCommutable = 1 in {
+ defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+ defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
+
+ // No patterns for these.
+ defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
+ defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
+ defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
+ defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
+
+ defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (fneg (fmul node:$lhs, node:$rhs))> >;
+}
+
+defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMADD, FMSUB, FNMADD, FNMSUB
+
+def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
+def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
+def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
+
+class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
+ bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
+ : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
+ (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
+ !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
+ [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
+ NoItinerary>;
+
+def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
+def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
+def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
+def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
+
+def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
+def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
+def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
+def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> fixed-point conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+// #1-#32 allowed, encoded as "64 - <specified imm>
+def fixedpos_asmoperand_i32 : AsmOperandClass {
+ let Name = "CVTFixedPos32";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<32>";
+ let DiagnosticType = "CVTFixedPos32";
+}
+
+// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
+def fixedpos_asmoperand_i64 : AsmOperandClass {
+ let Name = "CVTFixedPos64";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<64>";
+ let DiagnosticType = "CVTFixedPos64";
+}
+
+// We need the cartesian product of f32/f64 i32/i64 operands for
+// conversions:
+// + Selection needs to use operands of correct floating type
+// + Assembly parsing and decoding depend on integer width
+class cvtfix_i32_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i32;
+ let DecoderMethod = "DecodeCVT32FixedPosOperand";
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+class cvtfix_i64_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i64;
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+// Because of the proliferation of weird operands, it's not really
+// worth going for a multiclass here. Oh well.
+
+class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass GPR, RegisterClass FPR,
+ ValueType DstTy, ValueType SrcTy,
+ Operand scale_op, string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
+ (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
+ NoItinerary>;
+
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
+ cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
+ cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
+ cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
+ cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
+
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
+ cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
+ cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
+ cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
+ cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
+
+
+class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass FPR, RegisterClass GPR,
+ ValueType DstTy, ValueType SrcTy,
+ Operand scale_op, string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
+ (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
+ NoItinerary>;
+
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
+ cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
+ cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
+ cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
+ cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
+ cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
+ cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
+ cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
+ cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> integer conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass DestPR, RegisterClass SrcPR, string asmop>
+ : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+
+multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
+ def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
+ GPR32, FPR32, asmop # "s">;
+ def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
+ GPR64, FPR32, asmop # "s">;
+ def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
+ GPR32, FPR32, asmop # "u">;
+ def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
+ GPR64, FPR32, asmop # "u">;
+
+ def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
+ GPR32, FPR64, asmop # "s">;
+ def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
+ GPR64, FPR64, asmop # "s">;
+ def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
+ GPR32, FPR64, asmop # "u">;
+ def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
+ GPR64, FPR64, asmop # "u">;
+}
+
+defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
+defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
+defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
+defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
+defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+
+def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
+def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
+def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
+def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
+def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
+def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
+def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
+def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
+
+multiclass A64I_inttofp<bit o0, string asmop> {
+ def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
+ def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
+ def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
+ def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
+}
+
+defm S : A64I_inttofp<0b0, "scvtf">;
+defm U : A64I_inttofp<0b1, "ucvtf">;
+
+def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
+def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
+def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
+def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
+def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
+def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
+def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
+def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
+
+def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
+def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
+def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
+def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+
+def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
+def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
+def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
+def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
+
+def lane1_asmoperand : AsmOperandClass {
+ let Name = "Lane1";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "Lane1";
+}
+
+def lane1 : Operand<i32> {
+ let ParserMatchClass = lane1_asmoperand;
+ let PrintMethod = "printBareImmOperand";
+}
+
+let DecoderMethod = "DecodeFMOVLaneInstruction" in {
+ def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
+ (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
+ "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+
+ def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
+ (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
+ "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+}
+
+def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
+ (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
+
+def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
+ (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediate instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMOV
+
+def fpimm_asmoperand : AsmOperandClass {
+ let Name = "FMOVImm";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPImm";
+}
+
+// The MCOperand for these instructions are the encoded 8-bit values.
+def SDXF_fpimm : SDNodeXForm<fpimm, [{
+ uint32_t Imm8;
+ A64Imms::isFPImm(N->getValueAPF(), Imm8);
+ return CurDAG->getTargetConstant(Imm8, MVT::i32);
+}]>;
+
+class fmov_operand<ValueType FT>
+ : Operand<i32>,
+ PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
+ SDXF_fpimm> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = fpimm_asmoperand;
+}
+
+def fmov32_operand : fmov_operand<f32>;
+def fmov64_operand : fmov_operand<f64>;
+
+class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
+ Operand fmov_operand>
+ : A64I_fpimm<0b0, 0b0, type, 0b00000,
+ (outs Reg:$Rd),
+ (ins fmov_operand:$Imm8),
+ "fmov\t$Rd, $Imm8",
+ [(set VT:$Rd, fmov_operand:$Imm8)],
+ NoItinerary>;
+
+def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
+def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+
+//===----------------------------------------------------------------------===//
+// Load-register (literal) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDR, LDRSW, PRFM
+
+def ldrlit_label_asmoperand : AsmOperandClass {
+ let Name = "LoadLitLabel";
+ let RenderMethod = "addLabelOperands<19, 4>";
+ let DiagnosticType = "Label";
+}
+
+def ldrlit_label : Operand<i64> {
+ let EncoderMethod = "getLoadLitLabelOpValue";
+
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let ParserMatchClass = ldrlit_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Various instructions take an immediate value (which can always be used),
+// where some numbers have a symbolic name to make things easier. These operands
+// and the associated functions abstract away the differences.
+multiclass namedimm<string prefix, string mapper> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "NamedImm" # prefix;
+ let PredicateMethod = "isUImm";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
+ let DiagnosticType = "NamedImm_" # prefix;
+ }
+
+ def _op : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printNamedImmOperand<" # mapper # ">";
+ let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
+ }
+}
+
+defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+
+class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
+ list<dag> patterns = []>
+ : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
+ "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+
+let mayLoad = 1 in {
+ def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
+ def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
+}
+
+def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
+def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+
+let mayLoad = 1 in {
+ def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+
+
+ def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins ldrlit_label:$Imm19),
+ "ldrsw\t$Rt, $Imm19",
+ [], NoItinerary>;
+
+ def PRFM_lit : A64I_LDRlit<0b11, 0b0,
+ (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
+ "prfm\t$Rt, $Imm19",
+ [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load-store exclusive instructions
+//===----------------------------------------------------------------------===//
+// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
+// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
+// STLRH, STLR, LDARB, LDARH, LDAR
+
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fiels since Rt and Rn are always used.
+
+// This operand parses a GPR64xsp register, followed by an optional immediate
+// #0.
+def GPR64xsp0_asmoperand : AsmOperandClass {
+ let Name = "GPR64xsp0";
+ let PredicateMethod = "isWrappedReg";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "ParseLSXAddressOperand";
+ // Diagnostics are provided by ParserMethod
+}
+
+def GPR64xsp0 : RegisterOperand<GPR64xsp> {
+ let ParserMatchClass = GPR64xsp0_asmoperand;
+}
+
+//===----------------------------------
+// Store-exclusive (releasing & normal)
+//===----------------------------------
+
+class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [],NoItinerary>;
+
+ def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
+defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
+
+//===----------------------------------
+// Loads
+//===----------------------------------
+
+class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayLoad = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass A64I_LRex<string asmstr, bits<3> opcode> {
+ def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXR : A64I_LRex<"ldxr", 0b000>;
+defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
+defm LDAR : A64I_LRex<"ldar", 0b101>;
+
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
+
+def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>;
+def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
+def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>;
+def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
+
+//===----------------------------------
+// Store-release (no exclusivity)
+//===----------------------------------
+
+class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Release;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+def atomic_store_release_64 : releasing_store<atomic_store_64>;
+
+multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
+ (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
+ NoItinerary>;
+}
+
+defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
+
+//===----------------------------------
+// Store-exclusive pair (releasing & normal)
+//===----------------------------------
+
+class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stt2n <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+}
+
+
+multiclass A64I_SPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXP : A64I_SPex<"stxp", 0b010>;
+defm STLXP : A64I_SPex<"stlxp", 0b011>;
+
+//===----------------------------------
+// Load-exclusive pair (acquiring & normal)
+//===----------------------------------
+
+class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tt2n <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
+ pat, itin>{
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+multiclass A64I_LPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt, GPR32:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXP : A64I_LPex<"ldxp", 0b010>;
+defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unscaled immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (register offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (unsigned immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+
+// Note that patterns are much later on in a completely separate section (they
+// need ADRPxi to be defined).
+
+//===-------------------------------
+// 1. Various operands needed
+//===-------------------------------
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+// The addressing mode for these instructions consists of an unsigned 12-bit
+// immediate which is scaled by the size of the memory access.
+//
+// We represent this in the MC layer by two operands:
+// 1. A base register.
+// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
+// would have '1' in this field.
+// This means that separate functions are needed for converting representations
+// which *are* aware of the intended access size.
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+
+multiclass offsets_uimm12<int MemSize, string prefix> {
+ def uimm12_asmoperand : AsmOperandClass {
+ let Name = "OffsetUImm12_" # MemSize;
+ let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
+ let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreUImm12_" # MemSize;
+ }
+
+ // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
+ // complicates things beyond TableGen's ken.
+ def uimm12 : Operand<i64>,
+ ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+
+ let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
+ let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
+ }
+}
+
+defm byte_ : offsets_uimm12<1, "byte_">;
+defm hword_ : offsets_uimm12<2, "hword_">;
+defm word_ : offsets_uimm12<4, "word_">;
+defm dword_ : offsets_uimm12<8, "dword_">;
+defm qword_ : offsets_uimm12<16, "qword_">;
+
+//===-------------------------------
+// 1.1 Signed 9-bit immediate operands
+//===-------------------------------
+
+// The MCInst is expected to store the bit-wise encoding of the value,
+// which amounts to lopping off the extended sign bits.
+def SDXF_simm9 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
+}]>;
+
+def simm9_asmoperand : AsmOperandClass {
+ let Name = "SImm9";
+ let PredicateMethod = "isSImm<9>";
+ let RenderMethod = "addSImmOperands<9>";
+ let DiagnosticType = "LoadStoreSImm9";
+}
+
+def simm9 : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
+ SDXF_simm9> {
+ let PrintMethod = "printOffsetSImm9Operand";
+ let ParserMatchClass = simm9_asmoperand;
+}
+
+
+//===-------------------------------
+// 1.3 Register offset extensions
+//===-------------------------------
+
+// The assembly-syntax for these addressing-modes is:
+// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
+//
+// The essential semantics are:
+// + <amount> is a shift: #<log(transfer size)> or #0
+// + <R> can be W or X.
+// + If <R> is W, <extend> can be UXTW or SXTW
+// + If <R> is X, <extend> can be LSL or SXTX
+//
+// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
+// which will need separate instructions for LLVM type-consistency. We'll also
+// need separate operands, of course.
+multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
+ string Rm, string prefix> {
+ def regext_asmoperand : AsmOperandClass {
+ let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
+ let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
+ let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
+ }
+
+ def regext : Operand<i64> {
+ let PrintMethod
+ = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+
+ let DecoderMethod = "DecodeAddrRegExtendOperand";
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # regext_asmoperand);
+ }
+}
+
+multiclass regexts_wx<int MemSize, string prefix> {
+ // Rm is an X-register if LSL or SXTX are specified as the shift.
+ defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+
+ // Rm is a W-register if UXTW or SXTW are specified as the shift.
+ defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
+}
+
+defm byte_ : regexts_wx<1, "byte_">;
+defm hword_ : regexts_wx<2, "hword_">;
+defm word_ : regexts_wx<4, "word_">;
+defm dword_ : regexts_wx<8, "dword_">;
+defm qword_ : regexts_wx<16, "qword_">;
+
+
+//===------------------------------
+// 2. The instructions themselves.
+//===------------------------------
+
+// We have the following instructions to implement:
+// | | B | H | W | X |
+// |-----------------+-------+-------+-------+--------|
+// | unsigned str | STRB | STRH | STR | STR |
+// | unsigned ldr | LDRB | LDRH | LDR | LDR |
+// | signed ldr to W | LDRSB | LDRSH | - | - |
+// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+
+// This will instantiate the LDR/STR instructions you'd expect to use for an
+// unsigned datatype (first two rows above) or floating-point register, which is
+// reasonably uniform across all access sizes.
+
+
+//===------------------------------
+// 2.1 Regular instructions
+//===------------------------------
+
+// This class covers the basic unsigned or irrelevantly-signed loads and stores,
+// to general-purpose and floating-point registers.
+
+class AddrParams<string prefix> {
+ Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
+
+ Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
+ Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
+}
+
+def byte_addrparams : AddrParams<"byte">;
+def hword_addrparams : AddrParams<"hword">;
+def word_addrparams : AddrParams<"word">;
+def dword_addrparams : AddrParams<"dword">;
+def qword_addrparams : AddrParams<"qword">;
+
+multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
+ bit high_opc, string asmsuffix,
+ RegisterClass GPR, AddrParams params> {
+ // Unsigned immediate
+ def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset (four of these: load/store and Wm/Xm).
+ let mayLoad = 1 in {
+ def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ let mayStore = 1 in {
+ def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
+ params.regextWm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
+ params.regextXm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ // Unaligned immediate
+ def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Post-indexed
+ def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+}
+
+// STRB/LDRB: First define the instructions
+defm LS8
+ : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
+
+// STRH/LDRH
+defm LS16
+ : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
+
+
+// STR/LDR to/from a W register
+defm LS32
+ : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
+
+// STR/LDR to/from an X register
+defm LS64
+ : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+
+// STR/LDR to/from a B register
+defm LSFP8
+ : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
+
+// STR/LDR to/from an H register
+defm LSFP16
+ : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
+
+// STR/LDR to/from an S register
+defm LSFP32
+ : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
+// STR/LDR to/from a D register
+defm LSFP64
+ : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
+// STR/LDR to/from a Q register
+defm LSFP128
+ : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
+ qword_addrparams>;
+
+//===------------------------------
+// 2.3 Signed loads
+//===------------------------------
+
+// Byte and half-word signed loads can both go into either an X or a W register,
+// so it's worth factoring out. Signed word loads don't fit because there is no
+// W version.
+multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
+ string prefix> {
+ // Unsigned offset
+ def w : A64I_LSunsigimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def x : A64I_LSunsigimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset
+ let mayLoad = 1 in {
+ def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+ let mayLoad = 1 in {
+ // Unaligned offset
+ def w_U : A64I_LSunalimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x_U : A64I_LSunalimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+
+ // Post-indexed
+ def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+ } // let mayLoad = 1
+}
+
+// LDRSB
+defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
+// LDRSH
+defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
+
+// LDRSW: load a 32-bit register, sign-extending to 64-bits.
+def LDRSWx
+ : A64I_LSunsigimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
+ "ldrsw\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
+ (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
+
+
+def LDURSWx
+ : A64I_LSunalimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldursw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+def LDRSWx_PostInd
+ : A64I_LSpostind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+//===------------------------------
+// 2.4 Prefetch operations
+//===------------------------------
+
+def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
+ "prfm\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfm $Rt, [$Rn]",
+ (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR32:$Rm, dword_Wm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, dword_Xm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+
+def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+ (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "prfum\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfum $Rt, [$Rn]",
+ (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unprivileged) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
+
+// These instructions very much mirror the "unscaled immediate" loads, but since
+// there are no floating-point variants we need to split them out into their own
+// section to avoid instantiation of "ldtr d0, [sp]" etc.
+
+multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
+ string prefix> {
+ def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+
+ def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+
+ def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// STTRB/LDTRB: First define the instructions
+defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
+
+// STTRH/LDTRH
+defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
+
+// STTR/LDTR to/from a W register
+defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
+
+// STTR/LDTR to/from an X register
+defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
+
+// Now a class for the signed instructions that can go to either 32 or 64
+// bits...
+multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
+ let mayLoad = 1 in {
+ def w : A64I_LSunpriv<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x : A64I_LSunpriv<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+ }
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// LDTRSB
+defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
+// LDTRSH
+defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
+
+// And finally LDTRSW which only goes to 64 bits.
+def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrsw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store non-temporal register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STNP, LDNP
+
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+multiclass offsets_simm7<string MemSize, string prefix> {
+ // The bare signed 7-bit immediate is used in post-indexed instructions, but
+ // because of the scaling performed a generic "simm7" operand isn't
+ // appropriate here either.
+ def simm7_asmoperand : AsmOperandClass {
+ let Name = "SImm7_Scaled" # MemSize;
+ let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
+ let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreSImm7_" # MemSize;
+ }
+
+ def simm7 : Operand<i64> {
+ let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
+ }
+}
+
+defm word_ : offsets_simm7<"4", "word_">;
+defm dword_ : offsets_simm7<"8", "dword_">;
+defm qword_ : offsets_simm7<"16", "qword_">;
+
+multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
+ Operand simm7, string prefix> {
+ def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSPoffset<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
+ (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2,
+ GPR64xsp:$Rn,
+ simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+}
+
+
+defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
+defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
+defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
+defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
+ "LSFPPair128">;
+
+
+def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
+ (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
+
+def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+//===----------------------------------------------------------------------===//
+// Logical (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
+
+multiclass logical_imm_operands<string prefix, string note,
+ int size, ValueType VT> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "LogicalImm" # note # size;
+ let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
+ let RenderMethod = "addLogicalImmOperands<" # size # ">";
+ let DiagnosticType = "LogicalSecondSource";
+ }
+
+ def _operand
+ : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printLogicalImmOperand<" # size # ">";
+ let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
+ }
+}
+
+defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
+defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
+
+// The mov versions only differ in assembly parsing, where they
+// exclude values representable with either MOVZ or MOVN.
+defm logical_imm32_mov
+ : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
+defm logical_imm64_mov
+ : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
+
+
+multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set i32:$Rd,
+ (opnode i32:$Rn, logical_imm32_operand:$Imm))],
+ NoItinerary>;
+
+ def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set i64:$Rd,
+ (opnode i64:$Rn, logical_imm64_operand:$Imm))],
+ NoItinerary>;
+}
+
+defm AND : A64I_logimmSizes<0b00, "and", and>;
+defm ORR : A64I_logimmSizes<0b01, "orr", or>;
+defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
+
+let Defs = [NZCV] in {
+ def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+
+ def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+}
+
+
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+
+//===----------------------------------------------------------------------===//
+// Logical (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
+
+// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
+// behaves differently for unsigned comparisons, so we defensively only allow
+// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
+// equal to 0" and LLVM gives us this.
+def signed_cond : PatLeaf<(cond), [{
+ return !isUnsignedIntSetCC(N->get());
+}]>;
+
+
+// These instructions share their "shift" operands with add/sub (shifted
+// register instructions). They are defined there.
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
+ bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, opc, 0b10, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, opc, 0b11, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
+ commutable, asmop, opfrag, i64, GPR64, defs>;
+ defm www : logical_shifts<prefix # "www", 0b0, opc, N,
+ commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
+defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
+defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
+defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
+ PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs))>, []>;
+defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (or node:$lhs, (not node:$rhs))>, []>;
+defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (xor node:$lhs, (not node:$rhs))>, []>;
+defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs)),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+ let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"tst $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
+
+
+multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+ let isCommutable = 0, Rn = 0b11111 in {
+ def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (rotr ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"mvn $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(not ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
+}
+
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
+
+def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Move wide (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MOVN, MOVZ, MOVK + MOV aliases
+
+// A wide variety of different relocations are needed for variants of these
+// instructions, so it turns out that we need a different operand for all of
+// them.
+multiclass movw_operands<string prefix, string instname, int width> {
+ def _imm_asmoperand : AsmOperandClass {
+ let Name = instname # width # "Shifted" # shift;
+ let PredicateMethod = "is" # instname # width # "Imm";
+ let RenderMethod = "addMoveWideImmOperands";
+ let ParserMethod = "ParseImmWithLSLOperand";
+ let DiagnosticType = "MOVWUImm16";
+ }
+
+ def _imm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
+ let PrintMethod = "printMoveWideImmOperand";
+ let EncoderMethod = "getMoveWideImmOpValue";
+ let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movn32 : movw_operands<"movn32", "MOVN", 32>;
+defm movn64 : movw_operands<"movn64", "MOVN", 64>;
+defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
+defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
+defm movk32 : movw_operands<"movk32", "MOVK", 32>;
+defm movk64 : movw_operands<"movk64", "MOVK", 64>;
+
+multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
+ dag ins64bit> {
+
+ def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+
+ def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+}
+
+let isMoveImm = 1, isReMaterializable = 1,
+ isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+ defm MOVN : A64I_movwSizes<0b00, "movn",
+ (ins movn32_imm:$FullImm),
+ (ins movn64_imm:$FullImm)>;
+
+ // Some relocations are able to convert between a MOVZ and a MOVN. If these
+ // are applied the instruction must be emitted with the corresponding bits as
+ // 0, which means a MOVZ needs to override that bit from the default.
+ let PostEncoderMethod = "fixMOVZ" in
+ defm MOVZ : A64I_movwSizes<0b10, "movz",
+ (ins movz32_imm:$FullImm),
+ (ins movz64_imm:$FullImm)>;
+}
+
+let Constraints = "$src = $Rd" in
+defm MOVK : A64I_movwSizes<0b11, "movk",
+ (ins GPR32:$src, movk32_imm:$FullImm),
+ (ins GPR64:$src, movk64_imm:$FullImm)>;
+
+
+// And now the "MOV" aliases. These also need their own operands because what
+// they accept is completely different to what the base instructions accept.
+multiclass movalias_operand<string prefix, string basename,
+ string immpredicate, int width> {
+ def _asmoperand : AsmOperandClass {
+ let Name = basename # width # "MovAlias";
+ let PredicateMethod
+ = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
+ let RenderMethod
+ = "addMoveWideMovAliasOperands<" # width # ", "
+ # "A64Imms::" # immpredicate # ">";
+ }
+
+ def _movimm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
+defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
+defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
+defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
+
+// FIXME: these are officially canonical aliases, but TableGen is too limited to
+// print them at the moment. I believe in this case an "AliasPredicate" method
+// will need to be implemented. to allow it, as well as the more generally
+// useful handling of non-register, non-constant operands.
+class movalias<Instruction INST, RegisterClass GPR, Operand operand>
+ : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
+
+def : movalias<MOVZwii, GPR32, movz32_movimm>;
+def : movalias<MOVZxii, GPR64, movz64_movimm>;
+def : movalias<MOVNwii, GPR32, movn32_movimm>;
+def : movalias<MOVNxii, GPR64, movn64_movimm>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative addressing instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADR, ADRP
+
+def adr_label : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
+
+ // This label is a 21-bit offset from PC, unscaled
+ let PrintMethod = "printLabelOperand<21, 1>";
+ let ParserMatchClass = label_asmoperand<21, 1>;
+ let OperandType = "OPERAND_PCREL";
+}
+
+def adrp_label_asmoperand : AsmOperandClass {
+ let Name = "AdrpLabel";
+ let RenderMethod = "addLabelOperands<21, 4096>";
+ let DiagnosticType = "Label";
+}
+
+def adrp_label : Operand<i64> {
+ let EncoderMethod = "getAdrpLabelOpValue";
+
+ // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
+ let PrintMethod = "printLabelOperand<21, 4096>";
+ let ParserMatchClass = adrp_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+let hasSideEffects = 0 in {
+ def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
+ "adr\t$Rd, $Label", [], NoItinerary>;
+
+ def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
+ "adrp\t$Rd, $Label", [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// System instructions
+//===----------------------------------------------------------------------===//
+// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
+// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
+
+// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
+def uimm3_asmoperand : AsmOperandClass {
+ let Name = "UImm3";
+ let PredicateMethod = "isUImm<3>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm3";
+}
+
+def uimm3 : Operand<i32> {
+ let ParserMatchClass = uimm3_asmoperand;
+}
+
+// The HINT alias can accept a simple unsigned 7-bit immediate.
+def uimm7_asmoperand : AsmOperandClass {
+ let Name = "UImm7";
+ let PredicateMethod = "isUImm<7>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm7";
+}
+
+def uimm7 : Operand<i32> {
+ let ParserMatchClass = uimm7_asmoperand;
+}
+
+// Multiclass namedimm is defined with the prefetch operands. Most of these fit
+// into the NamedImmMapper scheme well: they either accept a named operand or
+// any immediate under a particular value (which may be 0, implying no immediate
+// is allowed).
+defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
+defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
+defm ic : namedimm<"ic", "A64IC::ICMapper">;
+defm dc : namedimm<"dc", "A64DC::DCMapper">;
+defm at : namedimm<"at", "A64AT::ATMapper">;
+defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
+
+// However, MRS and MSR are more complicated for a few reasons:
+// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
+// implementation-defined effect
+// * Most registers are shared, but some are read-only or write-only.
+// * There is a variant of MSR which accepts the same register name (SPSel),
+// but which would have a different encoding.
+
+// In principle these could be resolved in with more complicated subclasses of
+// NamedImmMapper, however that imposes an overhead on other "named
+// immediates". Both in concrete terms with virtual tables and in unnecessary
+// abstraction.
+
+// The solution adopted here is to take the MRS/MSR Mappers out of the usual
+// hierarchy (they're not derived from NamedImmMapper) and to add logic for
+// their special situation.
+def mrs_asmoperand : AsmOperandClass {
+ let Name = "MRS";
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MRS";
+}
+
+def mrs_op : Operand<i32> {
+ let ParserMatchClass = mrs_asmoperand;
+ let PrintMethod = "printMRSOperand";
+ let DecoderMethod = "DecodeMRSOperand";
+}
+
+def msr_asmoperand : AsmOperandClass {
+ let Name = "MSRWithReg";
+
+ // Note that SPSel is valid for both this and the pstate operands, but with
+ // different immediate encodings. This is why these operands provide a string
+ // AArch64Operand rather than an immediate. The overlap is small enough that
+ // it could be resolved with hackery now, but who can say in future?
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def msr_op : Operand<i32> {
+ let ParserMatchClass = msr_asmoperand;
+ let PrintMethod = "printMSROperand";
+ let DecoderMethod = "DecodeMSROperand";
+}
+
+def pstate_asmoperand : AsmOperandClass {
+ let Name = "MSRPState";
+ // See comment above about parser.
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def pstate_op : Operand<i32> {
+ let ParserMatchClass = pstate_asmoperand;
+ let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
+ let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
+}
+
+// When <CRn> is specified, an assembler should accept something like "C4", not
+// the usual "#4" immediate.
+def CRx_asmoperand : AsmOperandClass {
+ let Name = "CRx";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseCRxOperand";
+ // Diagnostics are handled in all cases by ParseCRxOperand.
+}
+
+def CRx : Operand<i32> {
+ let ParserMatchClass = CRx_asmoperand;
+ let PrintMethod = "printCRxOperand";
+}
+
+
+// Finally, we can start defining the instructions.
+
+// HINT is straightforward, with a few aliases.
+def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
+ [], NoItinerary> {
+ bits<7> UImm7;
+ let CRm = UImm7{6-3};
+ let Op2 = UImm7{2-0};
+
+ let Op0 = 0b00;
+ let Op1 = 0b011;
+ let CRn = 0b0010;
+ let Rt = 0b11111;
+}
+
+def : InstAlias<"nop", (HINTi 0)>;
+def : InstAlias<"yield", (HINTi 1)>;
+def : InstAlias<"wfe", (HINTi 2)>;
+def : InstAlias<"wfi", (HINTi 3)>;
+def : InstAlias<"sev", (HINTi 4)>;
+def : InstAlias<"sevl", (HINTi 5)>;
+
+// Quite a few instructions then follow a similar pattern of fixing common
+// fields in the bitpattern, we'll define a helper-class for them.
+class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
+ Operand operand, string asmop>
+ : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
+ [], NoItinerary> {
+ let Op0 = op0;
+ let Op1 = op1;
+ let CRn = crn;
+ let Op2 = op2;
+ let Rt = 0b11111;
+}
+
+
+def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
+def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
+def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
+def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
+
+def : InstAlias<"clrex", (CLREXi 0b1111)>;
+def : InstAlias<"isb", (ISBi 0b1111)>;
+
+// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
+// configurations at least.
+def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
+
+// Any SYS bitpattern can be represented with a complex and opaque "SYS"
+// instruction.
+def SYSiccix : A64I_system<0b0, (outs),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
+ uimm3:$Op2, GPR64:$Rt),
+ "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// You can skip the Xt argument whether it makes sense or not for the generic
+// SYS instruction.
+def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
+ (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
+
+
+// But many have aliases, which obviously don't fit into
+class SYSalias<dag ins, string asmstring>
+ : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
+ let isAsmParserOnly = 1;
+
+ bits<14> SysOp;
+ let Op0 = 0b01;
+ let Op1 = SysOp{13-11};
+ let CRn = SysOp{10-7};
+ let CRm = SysOp{6-3};
+ let Op2 = SysOp{2-0};
+}
+
+def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
+
+def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
+def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
+
+def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
+
+def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+
+def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
+ "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// The instructions themselves are rather simple for MSR and MRS.
+def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
+ "msr\t$SysReg, $Rt", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
+ "mrs\t$Rt, $SysReg", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
+ "msr\t$PState, $CRm", [], NoItinerary> {
+ bits<6> PState;
+
+ let Op0 = 0b00;
+ let Op1 = PState{5-3};
+ let CRn = 0b0100;
+ let Op2 = PState{2-0};
+ let Rt = 0b11111;
+}
+
+//===----------------------------------------------------------------------===//
+// Test & branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: TBZ, TBNZ
+
+// The bit to test is a simple unsigned 6-bit immediate in the X-register
+// versions.
+def uimm6 : Operand<i64> {
+ let ParserMatchClass = uimm6_asmoperand;
+}
+
+def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
+
+def tbimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
+
+ // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<14, 4>";
+ let ParserMatchClass = label_wid14_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
+def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
+
+// These instructions correspond to patterns involving "and" with a power of
+// two, which we need to be able to select.
+def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
+def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
+
+let isBranch = 1, isTerminator = 1 in {
+ def TBZxii : A64I_TBimm<0b0, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary>;
+
+ def TBNZxii : A64I_TBimm<0b1, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary>;
+
+
+ // Note, these instructions overlap with the above 64-bit patterns. This is
+ // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
+ // do the same thing and are both permitted assembly. They also both have
+ // sensible DAG patterns.
+ def TBZwii : A64I_TBimm<0b0, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+
+ def TBNZwii : A64I_TBimm<0b1, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B, BL
+
+def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
+
+def bimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blimm_target : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
+ : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
+ !strconcat(asmop, "\t$Label"), patterns,
+ NoItinerary>;
+
+let isBranch = 1 in {
+ def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ }
+
+ def BLimm : A64I_BimmImpl<0b1, "bl",
+ [(AArch64Call tglobaladdr:$Label)], blimm_target> {
+ let isCall = 1;
+ let Defs = [X30];
+ }
+}
+
+def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: BR, BLR, RET, ERET, DRP.
+
+// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
+// at the moment.
+class A64I_BregImpl<bits<4> opc,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin = NoItinerary>
+ : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
+ outs, ins, asmstr, patterns, itin> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Note that these are not marked isCall or isReturn because as far as LLVM is
+// concerned they're not. "ret" is just another jump unless it has been selected
+// by LLVM as the function's return.
+
+let isBranch = 1 in {
+ def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
+ "br\t$Rn", [(brind i64:$Rn)]> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ }
+
+ def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+ "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
+ let isBarrier = 0;
+ let isCall = 1;
+ let Defs = [X30];
+ }
+
+ def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
+ "ret\t$Rn", []> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ // Create a separate pseudo-instruction for codegen to use so that we don't
+ // flag x30 as used in every function. It'll be restored before the RET by the
+ // epilogue if it's legitimately used.
+ def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
+ }
+
+ def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ }
+}
+
+def RETAlias : InstAlias<"ret", (RETx X30)>;
+
+
+//===----------------------------------------------------------------------===//
+// Address generation patterns
+//===----------------------------------------------------------------------===//
+
+// Primary method of address generation for the small/absolute memory model is
+// an ADRP/ADR pair:
+// ADRP x0, some_variable
+// ADD x0, x0, #:lo12:some_variable
+//
+// The load/store elision of the ADD is accomplished when selecting
+// addressing-modes. This just mops up the cases where that doesn't work and we
+// really need an address in some register.
+
+// This wrapper applies a LO12 modifier to the address. Otherwise we could just
+// use the same address.
+
+class ADRP_ADD<SDNode Wrapper, SDNode addrop>
+ : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
+ (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
+
+def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
+def : ADRP_ADD<A64WrapperSmall, texternalsym>;
+def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
+def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
+def : ADRP_ADD<A64WrapperSmall, tjumptable>;
+
+//===----------------------------------------------------------------------===//
+// GOT access patterns
+//===----------------------------------------------------------------------===//
+
+// FIXME: Wibble
+
+class GOTLoadSmall<SDNode addrfrag>
+ : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
+ (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
+
+def : GOTLoadSmall<texternalsym>;
+def : GOTLoadSmall<tglobaladdr>;
+def : GOTLoadSmall<tglobaltlsaddr>;
+
+//===----------------------------------------------------------------------===//
+// Tail call handling
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
+ def TC_RETURNdi
+ : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
+ [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
+
+ def TC_RETURNxi
+ : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
+ [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ Uses = [XSP] in {
+ def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
+ (Bimm bimm_target:$Label)>;
+
+ def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
+ (BRx GPR64:$Rd)>;
+}
+
+
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+ (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+//===----------------------------------------------------------------------===//
+// Thread local storage
+//===----------------------------------------------------------------------===//
+
+// This is a pseudo-instruction representing the ".tlsdesccall" directive in
+// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
+// current location. It should always be immediately followed by a BLR
+// instruction, and is intended solely for relaxation by the linker.
+
+def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
+
+def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
+ let hasSideEffects = 1;
+}
+
+def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
+ [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
+ let isCall = 1;
+ let Defs = [X30];
+}
+
+def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
+ (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield patterns
+//===----------------------------------------------------------------------===//
+
+def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+}]>;
+
+def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+}]>;
+
+def bfi_width_to_imms : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+}]>;
+
+
+// The simpler patterns deal with cases where no AND mask is actually needed
+// (either all bits are used or the low 32 bits are used).
+let AddedComplexity = 10 in {
+
+def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIxxii $src, $Rn,
+ (bfi64_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIwwii $src, $Rn,
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+
+def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+ (i64 4294967295)),
+ (SUBREG_TO_REG (i64 0),
+ (BFIwwii (EXTRACT_SUBREG $src, sub_32),
+ (EXTRACT_SUBREG $Rn, sub_32),
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS))),
+ sub_32)>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous patterns
+//===----------------------------------------------------------------------===//
+
+// Truncation from 64 to 32-bits just involves renaming your register.
+def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
+
+// Similarly, extension where we don't care about the high bits is
+// just a rename.
+def : Pat<(i64 (anyext i32:$val)),
+ (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
+
+// SELECT instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : PseudoInst<(outs FPR128:$Rd),
+ (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+ [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
+ let Uses = [NZCV];
+ let usesCustomInserter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Load/store patterns
+//===----------------------------------------------------------------------===//
+
+// There are lots of patterns here, because we need to allow at least three
+// parameters to vary independently.
+// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
+// 2. LLVM source: zextloadi8, anyextloadi8, ...
+// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+//
+// The biggest problem turns out to be the address-generation variable. At the
+// point of instantiation we need to produce two DAGs, one for the pattern and
+// one for the instruction. Doing this at the lowest level of classes doesn't
+// work.
+//
+// Consider the simple uimm12 addressing mode, and the desire to match both (add
+// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
+// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
+// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
+// operation, and PatFrags are for selection not output.
+//
+// As a result, the address-generation patterns are the final
+// instantiations. However, we do still need to vary the operand for the address
+// further down (At the point we're deciding A64WrapperSmall, we don't know
+// the memory width of the operation).
+
+//===------------------------------
+// 1. Basic infrastructural defs
+//===------------------------------
+
+// First, some simple classes for !foreach and !subst to use:
+class Decls {
+ dag pattern;
+}
+
+def decls : Decls;
+def ALIGN;
+def INST;
+def OFFSET;
+def SHIFT;
+
+// You can't use !subst on an actual immediate, but you *can* use it on an
+// operand record that happens to match a single immediate. So we do.
+def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
+def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
+def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
+def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
+def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
+
+// If the low bits of a pointer are known to be 0 then an "or" is just as good
+// as addition for computing an offset. This fragment forwards that check for
+// TableGen's use.
+def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
+[{
+ return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
+}]>;
+
+// Load/store (unsigned immediate) operations with relocations against global
+// symbols (for lo12) are only valid if those symbols have correct alignment
+// (since the immediate offset is divided by the access scale, it can't have a
+// remainder).
+//
+// The guaranteed alignment is provided as part of the WrapperSmall
+// operation, and checked against one of these.
+def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
+def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
+def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
+def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
+def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
+
+// "Normal" load/store instructions can be used on atomic operations, provided
+// the ordering parameter is at most "monotonic". Anything above that needs
+// special handling with acquire/release instructions.
+class simple_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_load_simple_i8 : simple_load<atomic_load_8>;
+def atomic_load_simple_i16 : simple_load<atomic_load_16>;
+def atomic_load_simple_i32 : simple_load<atomic_load_32>;
+def atomic_load_simple_i64 : simple_load<atomic_load_64>;
+
+class simple_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_store_simple_i8 : simple_store<atomic_store_8>;
+def atomic_store_simple_i16 : simple_store<atomic_store_16>;
+def atomic_store_simple_i32 : simple_store<atomic_store_32>;
+def atomic_store_simple_i64 : simple_store<atomic_store_64>;
+
+//===------------------------------
+// 2. UImm12 and SImm9
+//===------------------------------
+
+// These instructions have two operands providing the address so they can be
+// treated similarly for most purposes.
+
+//===------------------------------
+// 2.1 Base patterns covering extend/truncate semantics
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType transty,
+ ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+ (STORE $Rt, Base, Offset)>;
+}
+
+// Instructions accessing a memory chunk smaller than a register (or, in a
+// pinch, the same size) have a characteristic set of patterns they want to
+// match: extending loads and truncating stores. This class deals with the
+// sign-neutral version of those patterns.
+//
+// It will be instantiated across multiple addressing-modes.
+multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset,
+ dag address, ValueType sty>
+ : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+ (STORE $Rt, Base, Offset)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+ (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+}
+
+// Next come patterns for sign-extending loads.
+multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
+
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset)>;
+ def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
+}
+
+// Integer operations also get atomic instructions to select for.
+multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType sty>
+ : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
+ ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
+
+//===------------------------------
+// 2.2. Addressing-mode instantiations
+//===------------------------------
+
+multiclass uimm12_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+ defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+ defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ i32>;
+
+ defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ i32>;
+
+ defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ f16>;
+
+ defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ f32>;
+
+ defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ f64>;
+
+ defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, qword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, qword_uimm12,
+ !subst(ALIGN, min_align16, decls.pattern))),
+ f128>;
+
+ defm : load_signed_pats<"B", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+
+ defm : load_signed_pats<"H", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern)))),
+ (LDRSWx Base, !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)))>;
+}
+
+// Straightforward patterns of last resort: a pointer with or without an
+// appropriate offset.
+defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
+defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
+ (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// The offset could be hidden behind an "or", of course:
+defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
+ (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// Global addresses under the small-absolute model should use these
+// instructions. There are ELF relocations specifically for it.
+defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
+ (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
+ ALIGN),
+ (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
+
+// External symbols that make it this far should also get standard relocations.
+defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
+ ALIGN),
+ (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
+ (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
+
+// We also want to use uimm12 instructions for local variables at the moment.
+def tframeindex_XFORM : SDNodeXForm<frameindex, [{
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->getTargetFrameIndex(FI, MVT::i64);
+}]>;
+
+defm : uimm12_pats<(i64 frameindex:$Rn),
+ (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
+
+// These can be much simpler than uimm12 because we don't to change the operand
+// type (e.g. LDURB and LDURH take the same operands).
+multiclass simm9_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
+ defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
+
+ defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
+ defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
+ defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
+ defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
+ defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
+ f128>;
+
+ def : Pat<(i64 (zextloadi32 address)),
+ (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
+
+ def : Pat<(truncstorei32 i64:$Rt, address),
+ (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+
+ defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
+ defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
+ def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
+}
+
+defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
+ (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
+ (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+
+//===------------------------------
+// 3. Register offset patterns
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag Extend, dag address,
+ ValueType transty, ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+ (STORE $Rt, Base, Offset, Extend)>;
+}
+
+// The register offset instructions take three operands giving the instruction,
+// and have an annoying split between instructions where Rm is 32-bit and
+// 64-bit. So we need a special hierarchy to describe them. Other than that the
+// same operations should be supported as for simm9 and uimm12 addressing.
+
+multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty>
+ : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+ (STORE $Rt, Base, Offset, Extend)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+ (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
+
+}
+
+// Next come patterns for sign-extending loads.
+multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
+ def : Pat<(store sty:$Rt, address),
+ (STORE $Rt, Base, Offset, Extend)>;
+}
+
+multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ ValueType sty>
+ : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
+ ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
+
+multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
+ dag Extend> {
+ defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+ defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+ defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ i64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ f16>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ f32>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ f64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq4, decls.pattern)),
+ f128>;
+
+ defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+
+ defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern))),
+ (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+
+// Finally we're in a position to tell LLVM exactly what addresses are reachable
+// using register-offset instructions. Essentially a base plus a possibly
+// extended, possibly shifted (by access size) offset.
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
+ (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
+ (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
new file mode 100644
index 000000000000..c96bf85a716c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const {
+ const MCExpr *Expr = 0;
+
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+ switch (MO.getTargetFlags()) {
+ case AArch64II::MO_GOT:
+ Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOT_LO12:
+ Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_LO12:
+ Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G1:
+ Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL:
+ Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL_LO12:
+ Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC:
+ Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC_LO12:
+ Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G1:
+ Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_NO_FLAG:
+ // Expr is already correct
+ break;
+ default:
+ llvm_unreachable("Unexpected MachineOperand flag");
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers
+ return false;
+
+ }
+
+ return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI,
+ AArch64AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..f45d8f784f42
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just contains the anchor for the AArch64MachineFunctionInfo to
+// force vtable emission.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void AArch64MachineFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
new file mode 100644
index 000000000000..33da54f97fda
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -0,0 +1,149 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// Number of bytes of arguments this function has on the stack. If the callee
+ /// is expected to restore the argument stack this should be a multiple of 16,
+ /// all usable during a tail call.
+ ///
+ /// The alternative would forbid tail call optimisation in some cases: if we
+ /// want to transfer control from a function with 8-bytes of stack-argument
+ /// space to a function with 16-bytes then misalignment of this value would
+ /// make a stack adjustment necessary, which could not be undone by the
+ /// callee.
+ unsigned BytesInStackArgArea;
+
+ /// The number of bytes to restore to deallocate space for incoming
+ /// arguments. Canonically 0 in the C calling convention, but non-zero when
+ /// callee is expected to pop the args.
+ unsigned ArgumentStackToRestore;
+
+ /// If the stack needs to be adjusted on frame entry in two stages, this
+ /// records the size of the first adjustment just prior to storing
+ /// callee-saved registers. The callee-saved slots are addressed assuming
+ /// SP == <incoming-SP> - InitialStackAdjust.
+ unsigned InitialStackAdjust;
+
+ /// Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// general-purpose registers that might contain variadic parameters.
+ int VariadicGPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the general-purpose registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicGPRIdx to what's stored in __gr_top.
+ unsigned VariadicGPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// floating-point registers that might contain variadic parameters.
+ int VariadicFPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the floating-point registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicFPRIdx to what's stored in __vr_top.
+ unsigned VariadicFPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of an object pointing just past the last known stacked
+ /// argument on entry to a variadic function. This goes into the __stack field
+ /// of the va_list type.
+ int VariadicStackIdx;
+
+ /// The offset of the frame pointer from the stack pointer on function
+ /// entry. This is expected to be negative.
+ int FramePointerOffset;
+
+public:
+ AArch64MachineFunctionInfo()
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+ void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+ unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+ void setArgumentStackToRestore(unsigned bytes) {
+ ArgumentStackToRestore = bytes;
+ }
+
+ unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+ void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+ int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+ void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+ unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+ void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+ int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+ void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+ unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+ void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+ int getVariadicStackIdx() const { return VariadicStackIdx; }
+ void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+ int getFramePointerOffset() const { return FramePointerOffset; }
+ void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
new file mode 100644
index 000000000000..20b0dcf86f46
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -0,0 +1,171 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti)
+ : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+ return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::FlagClassRegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(AArch64::XSP);
+ Reserved.set(AArch64::WSP);
+
+ Reserved.set(AArch64::XZR);
+ Reserved.set(AArch64::WZR);
+
+ if (TFI->hasFP(MF)) {
+ Reserved.set(AArch64::X29);
+ Reserved.set(AArch64::W29);
+ }
+
+ return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64FrameLowering *TFI =
+ static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+ // In order to work out the base and offset for addressing, the FrameLowering
+ // code needs to know (sometimes) whether the instruction is storing/loading a
+ // callee-saved register, or whether it's a more generic
+ // operation. Fortunately the frame indices are used *only* for that purpose
+ // and are contiguous, so we can check here.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+ unsigned FrameReg;
+ int64_t Offset;
+ Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+ IsCalleeSaveOp);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ // DBG_VALUE instructions have no real restrictions so they can be handled
+ // easily.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ int MinOffset, MaxOffset, OffsetScale;
+ if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ OffsetScale = 1;
+ } else {
+ // Load/store of a stack object
+ TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+ }
+
+ // The frame lowering has told us a base and offset it thinks we should use to
+ // access this variable, but it's still up to us to make sure the values are
+ // legal for the instruction in question.
+ if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+ unsigned BaseReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+ BaseReg, FrameReg, BaseReg, Offset);
+ FrameReg = BaseReg;
+ Offset = 0;
+ }
+
+ // Negative offsets are expected if we address from FP, but for
+ // now this checks nothing has gone horribly wrong.
+ assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return AArch64::X29;
+ else
+ return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const AArch64FrameLowering *AFI
+ = static_cast<const AArch64FrameLowering*>(TFI);
+ return AFI->useFPForAddressing(MF);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
new file mode 100644
index 000000000000..bb64fd55b2c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -0,0 +1,76 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+ const AArch64InstrInfo &TII;
+
+public:
+ AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti);
+
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+ const uint32_t *getTLSDescCallPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *Rs = NULL) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns original class if it is
+ /// possible to copy between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+ /// legal to use in the current sub-target and has the same spill size.
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::tcGPR64RegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 000000000000..bd79546371c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,203 @@
+//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations that describe the AArch64 register file
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8 : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+ let HWEncoding = enc;
+ let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+ list<SubRegIndex> inds = []>
+ : AArch64Reg<enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+ def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+ def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+ [!cast<Register>("W"#Index)], [sub_32]>,
+ DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+ (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+ (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 7),
+ (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+ (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+ (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar registers in the vector unit:
+// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+ def B # Index : AArch64Reg< Index, "b" # Index>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+ [!cast<Register>("B" # Index)], [sub_8]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+ [!cast<Register>("H" # Index)], [sub_16]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+ [!cast<Register>("S" # Index)], [sub_32]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+ [!cast<Register>("D" # Index)], [sub_64]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+ (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+ (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+ (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+ (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+ (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+// + An operand, giving a single ADDvvv instruction (for example). This turns
+// out to be unworkable in the assembly parser (without every Instruction
+// having a "cvt" function, at least) because the constraints can't be
+// properly enforced. It also complicates specifying patterns since each
+// instruction will accept many types.
+// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+// details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+ def V # Index : AArch64RegWithSubs<Index, "v" # Index,
+ [!cast<Register>("Q" # Index)],
+ [sub_alias]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+ (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+ [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+ (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv"> {
+ let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+ let CopyCost = -1;
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
new file mode 100644
index 000000000000..e17cdaa1f6d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -0,0 +1,10 @@
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6bbe075a1b61
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
new file mode 100644
index 000000000000..d412ed2be180
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -0,0 +1,32 @@
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+ const AArch64Subtarget *Subtarget;
+public:
+ explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+ ~AArch64SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 000000000000..d17b73820994
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+ : AArch64GenSubtargetInfo(TT, CPU, FS)
+ , HasNEON(true)
+ , HasCrypto(true)
+ , TargetTriple(TT) {
+
+ ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
new file mode 100644
index 000000000000..2e9205fc9924
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+ bool HasNEON;
+ bool HasCrypto;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_AARCH64_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
new file mode 100644
index 000000000000..df599d599dd6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -0,0 +1,81 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the AArch64TargetMachine
+// methods. Principally just setting up the passes needed to generate correct
+// code on this architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+ RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ InstrInfo(Subtarget),
+ DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+ AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AArch64TargetMachine &getAArch64TargetMachine() const {
+ return getTM<AArch64TargetMachine>();
+ }
+
+ const AArch64Subtarget &getAArch64Subtarget() const {
+ return *getAArch64TargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+ addPass(&UnpackMachineBundlesID);
+ addPass(createAArch64BranchFixupPass());
+ return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+ addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 000000000000..c1f47c2e5372
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+ AArch64Subtarget Subtarget;
+ AArch64InstrInfo InstrInfo;
+ const DataLayout DL;
+ AArch64TargetLowering TLInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64FrameLowering FrameLowering;
+
+public:
+ AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ const AArch64InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ const DataLayout *getDataLayout() const { return &DL; }
+
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
new file mode 100644
index 000000000000..b4452f514590
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
new file mode 100644
index 000000000000..bf0565a79ec8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -0,0 +1,31 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+ /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+ /// AArch64.
+ class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
new file mode 100644
index 000000000000..69bb80a48537
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -0,0 +1,2197 @@
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the (GNU-style) assembly parser for the AArch64
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64Operand;
+
+class AArch64AsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+#define GET_ASSEMBLER_HEADER
+#include "AArch64GenAsmMatcher.inc"
+
+public:
+ enum AArch64MatchResultTy {
+ Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "AArch64GenAsmMatcher.inc"
+ };
+
+ AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ // These are the public interface of the MCTargetAsmParser
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirectiveTLSDescCall(SMLoc L);
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer&Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ // The rest of the sub-parsers have more freedom over interface: they return
+ // an OperandMatchResultTy because it's less ambiguous than true/false or
+ // -1/0/1 even if it is more verbose
+ OperandMatchResultTy
+ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic);
+
+ OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
+
+ OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
+
+ OperandMatchResultTy
+ ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes);
+
+ OperandMatchResultTy
+ ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes);
+
+ OperandMatchResultTy
+ ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ template<typename SomeNamedImmMapper> OperandMatchResultTy
+ ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
+ }
+
+ OperandMatchResultTy
+ ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ /// Scan the next token (which had better be an identifier) and determine
+ /// whether it represents a general-purpose or vector register. It returns
+ /// true if an identifier was found and populates its reference arguments. It
+ /// does not consume the token.
+ bool
+ IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
+ SMLoc &LayoutLoc) const;
+
+};
+
+}
+
+namespace {
+
+/// Instances of this class represent a parsed AArch64 machine instruction.
+class AArch64Operand : public MCParsedAsmOperand {
+private:
+ enum KindTy {
+ k_ImmWithLSL, // #uimm {, LSL #amt }
+ k_CondCode, // eq/ne/...
+ k_FPImmediate, // Limited-precision floating-point imm
+ k_Immediate, // Including expressions referencing symbols
+ k_Register,
+ k_ShiftExtend,
+ k_SysReg, // The register operand of MRS and MSR instructions
+ k_Token, // The mnemonic; other raw tokens the auto-generated
+ k_WrappedRegister // Load/store exclusive permit a wrapped register.
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ union {
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
+ };
+
+ AArch64Operand(KindTy K, SMLoc S, SMLoc E)
+ : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+
+public:
+ AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+ }
+
+ SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getEndLoc() const { return EndLoc; }
+ void print(raw_ostream&) const;
+ void dump() const;
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register || Kind == k_WrappedRegister)
+ && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == k_Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ A64CC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CondCode.Code;
+ }
+
+ static bool isNonConstantExpr(const MCExpr *E,
+ AArch64MCExpr::VariantKind &Variant) {
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+ Variant = A64E->getKind();
+ return true;
+ } else if (!isa<MCConstantExpr>(E)) {
+ Variant = AArch64MCExpr::VK_AARCH64_None;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isMem() const { return false; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
+ bool isSysReg() const { return Kind == k_SysReg; }
+ bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
+ bool isWrappedReg() const { return Kind == k_WrappedRegister; }
+
+ bool isAddSubImmLSL0() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 0) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ bool isAddSubImmLSL12() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 12) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
+ if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
+ return false;
+
+ if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
+ return false;
+
+ return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+ }
+
+ bool isAdrpLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOT
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
+ }
+
+ return isLabel<21, 4096>();
+ }
+
+ template<unsigned RegWidth> bool isBitfieldWidth() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ template<int RegWidth>
+ bool isCVTFixedPos() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ bool isFMOVImm() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ return A64Imms::isFPImm(RealVal, ImmVal);
+ }
+
+ bool isFPZero() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ return RealVal.isPosZero();
+ }
+
+ template<unsigned field_width, unsigned scale>
+ bool isLabel() const {
+ if (!isImm()) return false;
+
+ if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
+ return true;
+ } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Min = - (scale * (1LL << (field_width - 1)));
+ int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
+ return (Val % scale) == 0 && Val >= Min && Val <= Max;
+ }
+
+ // N.b. this disallows explicit relocation specifications via an
+ // AArch64MCExpr. Users needing that behaviour
+ return false;
+ }
+
+ bool isLane1() const {
+ if (!isImm()) return false;
+
+ // Because it's come through custom assembly parsing, it must always be a
+ // constant expression.
+ return cast<MCConstantExpr>(getImm())->getValue() == 1;
+ }
+
+ bool isLoadLitLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+ }
+
+ return isLabel<19, 4>();
+ }
+
+ template<unsigned RegWidth> bool isLogicalImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ if (!CE) return false;
+
+ uint32_t Bits;
+ return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+ }
+
+ template<unsigned RegWidth> bool isLogicalImmMOV() const {
+ if (!isLogicalImm<RegWidth>()) return false;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ // The move alias for ORR is only valid if the immediate cannot be
+ // represented with a move (immediate) instruction; they take priority.
+ int UImm16, Shift;
+ return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
+ && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+ }
+
+ template<int MemSize>
+ bool isOffsetUImm12() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ // Assume they know what they're doing for now if they've given us a
+ // non-constant expression. In principle we could check for ridiculous
+ // things that can't possibly work or relocations that would almost
+ // certainly break resulting code.
+ if (!CE)
+ return true;
+
+ int64_t Val = CE->getValue();
+
+ // Must be a multiple of the access size in bytes.
+ if ((Val & (MemSize - 1)) != 0) return false;
+
+ // Must be 12-bit unsigned
+ return Val >= 0 && Val <= 0xfff * MemSize;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
+ bool isShift() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+ }
+
+ bool isMOVN32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVN64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+
+ bool isMOVZ32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVZ64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_ABS_G2,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMoveWideImm(unsigned RegWidth,
+ AArch64MCExpr::VariantKind *PermittedModifiers,
+ unsigned NumModifiers) const {
+ if (!isImmWithLSL()) return false;
+
+ if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
+ if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+
+ AArch64MCExpr::VariantKind Modifier;
+ if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
+ // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
+ if (!ImmWithLSL.ImplicitAmount) return false;
+
+ for (unsigned i = 0; i < NumModifiers; ++i)
+ if (PermittedModifiers[i] == Modifier) return true;
+
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff;
+ }
+
+ template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
+ bool isMoveWideMovAlias() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int UImm16, Shift;
+ uint64_t Value = CE->getValue();
+
+ // If this is a 32-bit instruction then all bits above 32 should be the
+ // same: either of these is fine because signed/unsigned values should be
+ // permitted.
+ if (RegWidth == 32) {
+ if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+ return false;
+
+ Value &= 0xffffffffULL;
+ }
+
+ return isValidImm(RegWidth, Value, UImm16, Shift);
+ }
+
+ bool isMSRWithReg() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMSRPState() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMRS() const {
+ if (!isSysReg()) return false;
+
+ // First check against specific MSR-only (write-only) registers
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isPRFM() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ if (!CE)
+ return false;
+
+ return CE->getValue() >= 0 && CE->getValue() <= 31;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return ShiftExtend.Amount <= 4;
+ }
+
+ bool isRegExtendLSL() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != A64SE::LSL)
+ return false;
+
+ return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+ }
+
+ template<int MemSize> bool isSImm7Scaled() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int64_t Val = CE->getValue();
+ if (Val % MemSize != 0) return false;
+
+ Val /= MemSize;
+
+ return Val >= -64 && Val < 64;
+ }
+
+ template<int BitWidth>
+ bool isSImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= -(1LL << (BitWidth - 1))
+ && CE->getValue() < (1LL << (BitWidth - 1));
+ }
+
+ template<int bitWidth>
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+ }
+
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ return isa<MCConstantExpr>(getImm());
+ }
+
+ static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
+ unsigned ShiftAmount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
+ Op->ImmWithLSL.Val = Val;
+ Op->ImmWithLSL.ShiftAmount = ShiftAmount;
+ Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
+ Op->CondCode.Code = Code;
+ return Op;
+ }
+
+ static AArch64Operand *CreateFPImm(double Val,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
+ Op->FPImm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
+ Op->Imm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
+ unsigned Amount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
+ Op->ShiftExtend.ShiftType = ShiftTyp;
+ Op->ShiftExtend.Amount = Amount;
+ Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+ static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ template<unsigned RegWidth>
+ void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
+ Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+ }
+
+ void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+
+ Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+ }
+
+ void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+ }
+
+ void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ A64Imms::isFPImm(RealVal, ImmVal);
+
+ Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ }
+
+ void addFPZeroOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ Inst.addOperand(MCOperand::CreateImm(0));
+ }
+
+ void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Encoded = A64InvertCondCode(getCondCode());
+ Inst.addOperand(MCOperand::CreateImm(Encoded));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ template<int MemSize>
+ void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue() / MemSize;
+ Inst.addOperand(MCOperand::CreateImm(Val & 0x7f));
+ }
+
+ template<int BitWidth>
+ void addSImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1)));
+ }
+
+ void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
+ assert (N == 1 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+ }
+
+ template<unsigned field_width, unsigned scale>
+ void addLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+
+ if (!CE) {
+ addExpr(Inst, Imm.Val);
+ return;
+ }
+
+ int64_t Val = CE->getValue();
+ assert(Val % scale == 0 && "Unaligned immediate in instruction");
+ Val /= scale;
+
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+ }
+
+ template<int MemSize>
+ void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
+ } else {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ }
+ }
+
+ template<unsigned RegWidth>
+ void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ uint32_t Bits;
+ A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMRSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+
+ AArch64MCExpr::VariantKind Variant;
+ if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
+ return;
+ }
+
+ // We know it's relocated
+ switch (Variant) {
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ Inst.addOperand(MCOperand::CreateImm(1));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ Inst.addOperand(MCOperand::CreateImm(2));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ Inst.addOperand(MCOperand::CreateImm(3));
+ break;
+ default: llvm_unreachable("Inappropriate move wide relocation");
+ }
+ }
+
+ template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
+ void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int UImm16, Shift;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+
+ if (RegWidth == 32) {
+ Value &= 0xffffffffULL;
+ }
+
+ bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
+ (void)Valid;
+ assert(Valid && "Invalid immediates should have been weeded out by now");
+
+ Inst.addOperand(MCOperand::CreateImm(UImm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ }
+
+ void addPRFMOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ assert(CE->getValue() >= 0 && CE->getValue() <= 31
+ && "PRFM operand should be 5-bits");
+
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ // For Add-sub (extended register) operands.
+ void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+
+ // For the extend in load-store (register offset) instructions.
+ template<unsigned MemSize>
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
+ addAddrRegExtendOperands(Inst, N, MemSize);
+ }
+
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
+ unsigned MemSize) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // First bit of Option is set in instruction classes, the high two bits are
+ // as follows:
+ unsigned OptionHi = 0;
+ switch (ShiftExtend.ShiftType) {
+ case A64SE::UXTW:
+ case A64SE::LSL:
+ OptionHi = 1;
+ break;
+ case A64SE::SXTW:
+ case A64SE::SXTX:
+ OptionHi = 3;
+ break;
+ default:
+ llvm_unreachable("Invalid extend type for register offset");
+ }
+
+ unsigned S = 0;
+ if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
+ S = 1;
+ else if (MemSize != 1 && ShiftExtend.Amount != 0)
+ S = 1;
+
+ Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+ }
+ void addShiftOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+};
+
+} // end anonymous namespace.
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+
+ // See if the operand has a custom parser
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // It could either succeed, fail or just not care.
+ if (ResTy != MatchOperand_NoMatch)
+ return ResTy;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ case AsmToken::Identifier: {
+ // It might be in the LSL/UXTB family ...
+ OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+
+ // We can only continue if no tokens were eaten.
+ if (GotShift != MatchOperand_NoMatch)
+ return GotShift;
+
+ // ... or it might be a register ...
+ uint32_t NumLanes = 0;
+ OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
+ assert(GotReg != MatchOperand_ParseFail
+ && "register parsing shouldn't partially succeed");
+
+ if (GotReg == MatchOperand_Success) {
+ if (Parser.getTok().is(AsmToken::LBrac))
+ return ParseNEONLane(Operands, NumLanes);
+ else
+ return MatchOperand_Success;
+ }
+
+ // ... or it might be a symbolish thing
+ }
+ // Fall through
+ case AsmToken::LParen: // E.g. (strcmp-4)
+ case AsmToken::Integer: // 1f, 2b labels
+ case AsmToken::String: // quoted labels
+ case AsmToken::Dot: // . is Current location
+ case AsmToken::Dollar: // $ is PC
+ case AsmToken::Colon: {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::Hash: { // Immediates
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+ Parser.Lex();
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::LBrac: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ // There's no comma after a '[', so we can parse the next operand
+ // immediately.
+ return ParseOperand(Operands, Mnemonic);
+ }
+ // The following will likely be useful later, but not in very early cases
+ case AsmToken::LCurly: // Weird SIMD lists
+ llvm_unreachable("Don't know how to deal with '{' in operand");
+ return MatchOperand_ParseFail;
+ }
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
+ if (getLexer().is(AsmToken::Colon)) {
+ AArch64MCExpr::VariantKind RefKind;
+
+ OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
+ if (ResTy != MatchOperand_Success)
+ return ResTy;
+
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return MatchOperand_ParseFail;
+
+ ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
+ return MatchOperand_Success;
+ }
+
+ // No weird AArch64MCExpr prefix
+ return getParser().parseExpression(ExprVal)
+ ? MatchOperand_ParseFail : MatchOperand_Success;
+}
+
+// A lane attached to a NEON register. "[N]", which should yield three tokens:
+// '[', N, ']'. A hash is not allowed to precede the immediate here.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "expected lane number");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().getIntVal() >= NumLanes) {
+ Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
+ getContext());
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat actual lane
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+
+
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "expected ']' after lane");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex(); // Eat ']'
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
+ assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+ .Case("got", AArch64MCExpr::VK_AARCH64_GOT)
+ .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12)
+ .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12)
+ .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0)
+ .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
+ .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1)
+ .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
+ .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2)
+ .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
+ .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3)
+ .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0)
+ .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1)
+ .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2)
+ .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2)
+ .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1)
+ .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
+ .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0)
+ .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
+ .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
+ .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
+ .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
+ .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
+ .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
+ .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL)
+ .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
+ .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2)
+ .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1)
+ .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
+ .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0)
+ .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
+ .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12)
+ .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12)
+ .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
+ .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC)
+ .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
+ .Default(AArch64MCExpr::VK_AARCH64_None);
+
+ if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat identifier
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(),
+ "expected ':' after relocation specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmWithLSLOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ const MCExpr *Imm;
+ if (ParseImmediate(Imm) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Eat ','
+ Parser.Lex();
+
+ // The optional operand must be "lsl #N" where N is non-negative.
+ if (Parser.getTok().is(AsmToken::Identifier)
+ && Parser.getTok().getIdentifier().lower() == "lsl") {
+ Parser.Lex();
+
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+ return MatchOperand_ParseFail;
+ }
+ }
+ }
+
+ int64_t ShiftAmount = Parser.getTok().getIntVal();
+
+ if (ShiftAmount < 0) {
+ Error(Parser.getTok().getLoc(), "positive shift amount required");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the number
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
+ false, S, E));
+ return MatchOperand_Success;
+}
+
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCondCodeOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ StringRef Tok = Parser.getTok().getIdentifier();
+ A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+
+ if (CondCode == A64CC::Invalid)
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat condition code
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCRxOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerTok = Parser.getTok().getIdentifier().lower();
+ StringRef Tok(LowerTok);
+ if (Tok[0] != 'c') {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ uint32_t CRNum;
+ bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+ if (BadNum || CRNum > 15) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
+
+ Parser.Lex();
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseFPImmOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ bool Negative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Negative = true;
+ Parser.Lex(); // Eat '-'
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat '+'
+ }
+
+ if (Parser.getTok().isNot(AsmToken::Real)) {
+ Error(S, "Expected floating-point immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
+ if (Negative) RealVal.changeSign();
+ double DblVal = RealVal.convertToDouble();
+
+ Parser.Lex(); // Eat real number
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
+ return MatchOperand_Success;
+}
+
+
+// Automatically generated
+static unsigned MatchRegisterName(StringRef Name);
+
+bool
+AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
+ StringRef &Layout,
+ SMLoc &LayoutLoc) const {
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.isNot(AsmToken::Identifier))
+ return false;
+
+ std::string LowerReg = Tok.getString().lower();
+ size_t DotPos = LowerReg.find('.');
+
+ RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+ if (RegNum == AArch64::NoRegister) {
+ RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+ .Case("ip0", AArch64::X16)
+ .Case("ip1", AArch64::X17)
+ .Case("fp", AArch64::X29)
+ .Case("lr", AArch64::X30)
+ .Default(AArch64::NoRegister);
+ }
+ if (RegNum == AArch64::NoRegister)
+ return false;
+
+ SMLoc S = Tok.getLoc();
+ RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+
+ if (DotPos == StringRef::npos) {
+ Layout = StringRef();
+ } else {
+ // Everything afterwards needs to be a literal token, expected to be
+ // '.2d','.b' etc for vector registers.
+
+ // This StringSwitch validates the input and (perhaps more importantly)
+ // gives us a permanent string to use in the token (a pointer into LowerReg
+ // would go out of scope when we return).
+ LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
+ std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+ Layout = StringSwitch<const char *>(LayoutText)
+ .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
+ .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
+ .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
+ .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+ .Default("");
+
+ if (Layout.size() == 0) {
+ // Malformed register
+ return false;
+ }
+ }
+
+ return true;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes) {
+ unsigned RegNum;
+ StringRef Layout;
+ SMLoc RegEndLoc, LayoutLoc;
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+
+ if (Layout.size() != 0) {
+ unsigned long long TmpLanes = 0;
+ llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
+ if (TmpLanes != 0) {
+ NumLanes = TmpLanes;
+ } else {
+ // If the number of lanes isn't specified explicitly, a valid instruction
+ // will have an element specifier and be capable of acting on the entire
+ // vector register.
+ switch (Layout.back()) {
+ default: llvm_unreachable("Invalid layout specifier");
+ case 'b': NumLanes = 16; break;
+ case 'h': NumLanes = 8; break;
+ case 's': NumLanes = 4; break;
+ case 'd': NumLanes = 2; break;
+ }
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
+ }
+
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+bool
+AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ // This callback is used for things like DWARF frame directives in
+ // assembly. They don't care about things like NEON layouts or lanes, they
+ // just want to be able to produce the DWARF register number.
+ StringRef LayoutSpec;
+ SMLoc RegEndLoc, LayoutLoc;
+ StartLoc = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
+ return true;
+
+ Parser.Lex();
+ EndLoc = Parser.getTok().getLoc();
+
+ return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Since these operands occur in very limited circumstances, without
+ // alternatives, we actually signal an error if there is no match. If relaxing
+ // this, beware of unintended consequences: an immediate will be accepted
+ // during matching, no matter how it gets into the AArch64Operand.
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ if (Tok.is(AsmToken::Identifier)) {
+ bool ValidName;
+ uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+
+ if (!ValidName) {
+ Error(S, "operand specifier not recognised");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // We're done with the identifier. Eat it
+
+ SMLoc E = Parser.getTok().getLoc();
+ const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
+ Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
+ return MatchOperand_Success;
+ } else if (Tok.is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ const MCExpr *ImmVal;
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
+ Error(S, "Invalid immediate for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
+ return MatchOperand_Success;
+ }
+
+ Error(S, "unexpected operand for instruction");
+ return MatchOperand_ParseFail;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseSysRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+
+ // Any MSR/MRS operand will be an identifier, and we want to store it as some
+ // kind of string: SPSel is valid for two different forms of MSR with two
+ // different encodings. There's no collision at the moment, but the potential
+ // is there.
+ if (!Tok.is(AsmToken::Identifier)) {
+ return MatchOperand_NoMatch;
+ }
+
+ SMLoc S = Tok.getLoc();
+ Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
+ Parser.Lex(); // Eat identifier
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseLSXAddressOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ unsigned RegNum;
+ SMLoc RegEndLoc, LayoutLoc;
+ StringRef Layout;
+ if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
+ || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
+ || Layout.size() != 0) {
+ // Check Layout.size because we don't want to let "x3.4s" or similar
+ // through.
+ return MatchOperand_NoMatch;
+ }
+ Parser.Lex(); // Eat register
+
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // We're done
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Otherwise, only ", #0" is valid
+
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat ','
+
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '#'
+
+ if (Parser.getTok().isNot(AsmToken::Integer)
+ || Parser.getTok().getIntVal() != 0 ) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '0'
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseShiftExtend(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ std::string LowerID = IDVal.lower();
+
+ A64SE::ShiftExtSpecifiers Spec =
+ StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
+ .Case("lsl", A64SE::LSL)
+ .Case("lsr", A64SE::LSR)
+ .Case("asr", A64SE::ASR)
+ .Case("ror", A64SE::ROR)
+ .Case("uxtb", A64SE::UXTB)
+ .Case("uxth", A64SE::UXTH)
+ .Case("uxtw", A64SE::UXTW)
+ .Case("uxtx", A64SE::UXTX)
+ .Case("sxtb", A64SE::SXTB)
+ .Case("sxth", A64SE::SXTH)
+ .Case("sxtw", A64SE::SXTW)
+ .Case("sxtx", A64SE::SXTX)
+ .Default(A64SE::Invalid);
+
+ if (Spec == A64SE::Invalid)
+ return MatchOperand_NoMatch;
+
+ // Eat the shift
+ SMLoc S, E;
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
+ Spec != A64SE::ASR && Spec != A64SE::ROR) {
+ // The shift amount can be omitted for the extending versions, but not real
+ // shifts:
+ // add x0, x0, x0, uxtb
+ // is valid, and equivalent to
+ // add x0, x0, x0, uxtb #0
+
+ if (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::EndOfStatement) ||
+ Parser.getTok().is(AsmToken::RBrac)) {
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true,
+ S, E));
+ return MatchOperand_Success;
+ }
+ }
+
+ // Eat # at beginning of immediate
+ if (!Parser.getTok().is(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(),
+ "expected #imm after shift specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ // Make sure we do actually have a number
+ if (!Parser.getTok().is(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(),
+ "expected integer shift amount");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Amount = Parser.getTok().getIntVal();
+ Parser.Lex();
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false,
+ S, E));
+
+ return MatchOperand_Success;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool AArch64AsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case AArch64::BFIwwii:
+ case AArch64::BFIxxii:
+ case AArch64::SBFIZwwii:
+ case AArch64::SBFIZxxii:
+ case AArch64::UBFIZwwii:
+ case AArch64::UBFIZxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+
+ if (ImmR != 0 && ImmS >= ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested insert overflows register");
+ }
+ return false;
+ }
+ case AArch64::BFXILwwii:
+ case AArch64::BFXILxxii:
+ case AArch64::SBFXwwii:
+ case AArch64::SBFXxxii:
+ case AArch64::UBFXwwii:
+ case AArch64::UBFXxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+ int64_t RegWidth = 0;
+ switch (Inst.getOpcode()) {
+ case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
+ RegWidth = 64;
+ break;
+ case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
+ RegWidth = 32;
+ break;
+ }
+
+ if (ImmS >= RegWidth || ImmS < ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested extract overflows register");
+ }
+ return false;
+ }
+ case AArch64::ICix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (!A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::ICi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op requires a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (!A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op requires a register");
+ }
+ return false;
+ }
+ }
+
+ return false;
+}
+
+
+// Parses the instruction *together with* all operands, appending each parsed
+// operand to the "Operands" list
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ size_t CondCodePos = Name.find('.');
+
+ StringRef Mnemonic = Name.substr(0, CondCodePos);
+ Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
+
+ if (CondCodePos != StringRef::npos) {
+ // We have a condition code
+ SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
+ StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos);
+ A64CC::CondCodes Code;
+
+ Code = A64StringToCondCode(CondStr);
+
+ if (Code == A64CC::Invalid) {
+ Error(S, "invalid condition code");
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
+
+ Operands.push_back(AArch64Operand::CreateToken(".", DotL));
+ SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
+ Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+ }
+
+ // Now we parse the operands of this instruction
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+
+ // After successfully parsing some operands there are two special cases to
+ // consider (i.e. notional operands not separated by commas). Both are due
+ // to memory specifiers:
+ // + An RBrac will end an address for load/store/prefetch
+ // + An '!' will indicate a pre-indexed operation.
+ //
+ // It's someone else's responsibility to make sure these tokens are sane
+ // in the given context!
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex();
+ }
+
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+ Parser.Lex();
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "expected comma before next operand");
+ }
+
+ // Eat the EndOfStatement
+ Parser.Lex();
+
+ return false;
+}
+
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".hword")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".xword")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ else if (IDVal == ".tlsdesccall")
+ return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
+
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+// parseDirectiveTLSDescCall:
+// ::= .tlsdesccall symbol
+bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return Error(L, "expected symbol after directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+
+ MCInst Inst;
+ Inst.setOpcode(AArch64::TLSDESCCALL);
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+ getParser().getStreamer().EmitInstruction(Inst);
+ return false;
+}
+
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+
+ if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ if (validateInstruction(Inst, Operands))
+ return true;
+
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+
+ case Match_AddSubRegExtendSmall:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegExtendLarge:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegShift32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
+ case Match_AddSubRegShift64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
+ case Match_AddSubSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register, symbol or integer in range [0, 4095]");
+ case Match_CVTFixedPos32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_CVTFixedPos64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
+ case Match_CondCode:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected AArch64 condition code");
+ case Match_FPImm:
+ // Any situation which allows a nontrivial floating-point constant also
+ // allows a register.
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or floating-point constant");
+ case Match_FPZero:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected floating-point constant #0.0");
+ case Match_Label:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected label or encodable integer pc offset");
+ case Match_Lane1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected lane specifier '[1]'");
+ case Match_LoadStoreExtend32_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0");
+ case Match_LoadStoreExtend32_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend32_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend32_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend32_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtw' with optional shift of #0 or #4");
+ case Match_LoadStoreExtend64_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0");
+ case Match_LoadStoreExtend64_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend64_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend64_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend64_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
+ case Match_LoadStoreSImm7_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 4 in range [-256, 252]");
+ case Match_LoadStoreSImm7_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 8 in range [-512, 508]");
+ case Match_LoadStoreSImm7_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 16 in range [-1024, 1016]");
+ case Match_LoadStoreSImm9:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [-256, 255]");
+ case Match_LoadStoreUImm12_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 4095]");
+ case Match_LoadStoreUImm12_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 8190]");
+ case Match_LoadStoreUImm12_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 16380]");
+ case Match_LoadStoreUImm12_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 32760]");
+ case Match_LoadStoreUImm12_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 65520]");
+ case Match_LogicalSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or logical immediate");
+ case Match_MOVWUImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected relocated symbol or integer in range [0, 65535]");
+ case Match_MRS:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected readable system register");
+ case Match_MSR:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected writable system register or pstate");
+ case Match_NamedImm_at:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]");
+ case Match_NamedImm_dbarrier:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or symbolic barrier operand");
+ case Match_NamedImm_dc:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'dc' operand");
+ case Match_NamedImm_ic:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'");
+ case Match_NamedImm_isb:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or 'sy'");
+ case Match_NamedImm_prefetch:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected prefetch hint: p(ld|st|i)l[123](strm|keep)");
+ case Match_NamedImm_tlbi:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected translation buffer invalidation operand");
+ case Match_UImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 65535]");
+ case Match_UImm3:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_UImm4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_UImm5:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_UImm6:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
+ case Match_UImm7:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 127]");
+ case Match_Width32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 31]");
+ case Match_Width64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 63]");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+void AArch64Operand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_CondCode:
+ OS << "<CondCode: " << CondCode.Code << ">";
+ break;
+ case k_FPImmediate:
+ OS << "<fpimm: " << FPImm.Val << ">";
+ break;
+ case k_ImmWithLSL:
+ OS << "<immwithlsl: imm=" << ImmWithLSL.Val
+ << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+ break;
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_Register:
+ OS << "<register " << getReg() << '>';
+ break;
+ case k_Token:
+ OS << '\'' << getToken() << '\'';
+ break;
+ case k_ShiftExtend:
+ OS << "<shift: type=" << ShiftExtend.ShiftType
+ << ", amount=" << ShiftExtend.Amount << ">";
+ break;
+ case k_SysReg: {
+ StringRef Name(SysReg.Data, SysReg.Length);
+ OS << "<sysreg: " << Name << '>';
+ break;
+ }
+ default:
+ llvm_unreachable("No idea how to print this kind of operand");
+ break;
+ }
+}
+
+void AArch64Operand::dump() const {
+ print(errs());
+}
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+ RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
new file mode 100644
index 000000000000..12c1b8f4c81a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -0,0 +1,803 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions necessary to decode AArch64 instruction
+// bitpatterns into MCInsts (with the help of TableGenerated information from
+// the instruction definitions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ /// Initializes the disassembler.
+ ///
+ AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+ : MCDisassembler(STI), RegInfo(Info) {
+ }
+
+ ~AArch64Disassembler() {
+ }
+
+ /// See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus
+DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+ llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+ return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder) {
+ // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+ // S}. Hence we want to check bit 1.
+ if (!(OptionHiS & 2))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+ // between 0 and 31.
+ if (Imm6Bits > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+ if (Imm6Bits < 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder) {
+ // Any bits are valid in the instruction (they're architecturally ignored),
+ // but a code generator should insert 0.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Imm16 = FullImm & 0xffff;
+ unsigned Shift = FullImm >> 16;
+
+ if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ uint64_t Imm;
+ if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values 0-4 are valid for this 3-bit field
+ if (ShiftAmount > 4)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values below 32 are valid for a 32-bit register
+ if (ShiftAmount > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+ unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+ unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+ // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+ // out assertions that it thinks should never be hit.
+ enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+ Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+ if (!SF) {
+ // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+ if (ImmR > 31 || ImmS > 31)
+ return MCDisassembler::Fail;
+ }
+
+ if (SF) {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // ASR and LSR have more specific patterns so they won't get here:
+ assert(!(ImmS == 31 && !SF && Opc != BFM)
+ && "shift should have used auto decode");
+ assert(!(ImmS == 63 && SF && Opc != BFM)
+ && "shift should have used auto decode");
+
+ // Extension instructions similarly:
+ if (Opc == SBFM && ImmR == 0) {
+ assert((ImmS != 7 && ImmS != 15) && "extension got here");
+ assert((ImmS != 31 || SF == 0) && "extension got here");
+ } else if (Opc == UBFM && ImmR == 0) {
+ assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+ }
+
+ if (Opc == UBFM) {
+ // It might be a LSL instruction, which actually takes the shift amount
+ // itself as an MCInst operand.
+ if (SF && (ImmS + 1) % 64 == ImmR) {
+ Inst.setOpcode(AArch64::LSLxxi);
+ Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+ return MCDisassembler::Success;
+ } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+ Inst.setOpcode(AArch64::LSLwwi);
+ Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+ return MCDisassembler::Success;
+ }
+ }
+
+ // Otherwise it's definitely either an extract or an insert depending on which
+ // of ImmR or ImmS is larger.
+ unsigned ExtractOp, InsertOp;
+ switch (Opc) {
+ default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+ case SBFM:
+ ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+ InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+ break;
+ case BFM:
+ ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+ InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+ break;
+ case UBFM:
+ ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+ InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+ break;
+ }
+
+ // Otherwise it's a boring insert or extract
+ Inst.addOperand(MCOperand::CreateImm(ImmR));
+ Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+ if (ImmS < ImmR)
+ Inst.setOpcode(InsertOp);
+ else
+ Inst.setOpcode(ExtractOp);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // This decoder exists to add the dummy Lane operand to the MCInst, which must
+ // be 1 in assembly but has no other real manifestation.
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+ if (IsToVec) {
+ DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // Add the lane
+ Inst.addOperand(MCOperand::CreateImm(1));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus Result = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+ unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+ unsigned L = fieldFromInstruction(Insn, 22, 1);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+ // Not an official name, but it turns out that bit 23 distinguishes indexed
+ // from non-indexed operations.
+ unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+ if (Indexed && L == 0) {
+ // The MCInst for an indexed store has an out operand and 4 ins:
+ // Rn_wb, Rt, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // You shouldn't load to the same register twice in an instruction...
+ if (L && Rt == Rt2)
+ Result = MCDisassembler::SoftFail;
+
+ // ... or do any operation that writes-back to a transfer register. But note
+ // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+ if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+ Result = MCDisassembler::SoftFail;
+
+ // Exactly how we decode the MCInst's registers depends on the Opc and V
+ // fields of the instruction. These also obviously determine the size of the
+ // operation so we can fill in that information while we're at it.
+ if (V) {
+ // The instruction operates on the FP/SIMD registers
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ } else {
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ assert(L && "unexpected \"store signed\" attempt");
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Indexed && L == 1) {
+ // The MCInst for an indexed load has 3 out operands and an 3 ins:
+ // Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+ return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Val, 0, 5);
+ unsigned Rn = fieldFromInstruction(Val, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+ unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+ switch (MemSize) {
+ case 2:
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case 3:
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+ }
+
+ if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ SomeNamedImmMapper Mapper;
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+ if (ValidNamed || Mapper.validImm(Val)) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+ unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+ if (Opc == 0 || (V == 1 && Opc == 2)) {
+ // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ if (V == 0 && (Opc == 2 || Size == 3)) {
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 0) {
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 1 && (Opc & 2)) {
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ } else {
+ switch (Size) {
+ case 0:
+ DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Opc != 0 && (V != 1 || Opc != 2)) {
+ // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+ // N.b. The official documentation says undpredictable if Rt == Rn, but this
+ // takes place at the architectural rather than encoding level:
+ //
+ // "STR xzr, [sp], #4" is perfectly valid.
+ if (V == 0 && Rt == Rn && Rn != 31)
+ return MCDisassembler::SoftFail;
+ else
+ return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+ createAArch64Disassembler);
+}
+
+
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 000000000000..82ce80c8b1a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+ assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+ if (Value & (1ULL << (BitWidth - 1)))
+ return static_cast<int64_t>(Value) - (1LL << BitWidth);
+ else
+ return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+ O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize) {
+ unsigned ExtImm = MI->getOperand(OpNum).getImm();
+ unsigned OptionHi = ExtImm >> 1;
+ unsigned S = ExtImm & 1;
+ bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+ const char *Ext;
+ switch (OptionHi) {
+ case 1:
+ Ext = (RmSize == 32) ? "uxtw" : "lsl";
+ break;
+ case 3:
+ Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+ break;
+ default:
+ llvm_unreachable("Incorrect Option on load/store (reg offset)");
+ }
+ O << Ext;
+
+ if (S) {
+ unsigned ShiftAmt = Log2_32(MemSize);
+ O << " #" << ShiftAmt;
+ } else if (IsLSL) {
+ O << " #0";
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+ if (Imm12Op.isImm()) {
+ int64_t Imm12 = Imm12Op.getImm();
+ assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+ O << "#" << Imm12;
+ } else {
+ assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+ O << "#" << *Imm12Op.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+
+ printAddSubImmLSL0Operand(MI, OpNum, O);
+
+ O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmROp = MI->getOperand(OpNum);
+ unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+ O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ unsigned Width = ImmSOp.getImm() + 1;
+
+ O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+ unsigned ImmR = ImmROp.getImm();
+ unsigned ImmS = ImmSOp.getImm();
+
+ assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+ O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &CRx = MI->getOperand(OpNum);
+
+ O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+ O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+ assert(MOImm8.isImm()
+ && "Immediate operand required for floating-point immediate inst");
+
+ uint32_t Imm8 = MOImm8.getImm();
+ uint32_t Fraction = Imm8 & 0xf;
+ uint32_t Exponent = (Imm8 >> 4) & 0x7;
+ uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+ float Val = 1.0f + Fraction / 16.0f;
+
+ // That is:
+ // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4,
+ // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+ if (Exponent & 0x4) {
+ Val /= 1 << (7 - Exponent);
+ } else {
+ Val *= 1 << (Exponent + 1);
+ }
+
+ Val = Negative ? -Val : Val;
+
+ o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (!MO.isImm()) {
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+ // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+ uint64_t UImm = MO.getImm();
+ uint64_t Sign = UImm & (1LL << (field_width - 1));
+ int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+ O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint64_t Val;
+ A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+ O << "#0x";
+ O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, int MemSize) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+
+ if (MOImm.isImm()) {
+ uint32_t Imm = MOImm.getImm() * MemSize;
+
+ O << "#" << Imm;
+ } else {
+ O << "#" << *MOImm.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Shift) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ // LSL #0 is not printed
+ if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+ return;
+
+ switch (Shift) {
+ case A64SE::LSL: O << "lsl"; break;
+ case A64SE::LSR: O << "lsr"; break;
+ case A64SE::ASR: O << "asr"; break;
+ case A64SE::ROR: O << "ror"; break;
+ default: llvm_unreachable("Invalid shift specifier in logical instruction");
+ }
+
+ O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &UImm16MO = MI->getOperand(OpNum);
+ const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+ if (UImm16MO.isImm()) {
+ O << '#' << UImm16MO.getImm();
+
+ if (ShiftMO.getImm() != 0)
+ O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+ return;
+ }
+
+ O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ bool ValidName;
+ const MCOperand &MO = MI->getOperand(OpNum);
+ StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+ if (ValidName)
+ O << Name;
+ else
+ O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ bool ValidName;
+ std::string Name = Mapper.toString(MO.getImm(), ValidName);
+ if (ValidName) {
+ O << Name;
+ return;
+ }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Ext) {
+ // FIXME: In principle TableGen should be able to detect this itself far more
+ // easily. We will only accumulate more of these hacks.
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+
+ if (isStackReg(Reg0) || isStackReg(Reg1)) {
+ A64SE::ShiftExtSpecifiers LSLEquiv;
+
+ if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+ LSLEquiv = A64SE::UXTX;
+ else
+ LSLEquiv = A64SE::UXTW;
+
+ if (Ext == LSLEquiv) {
+ O << "lsl #" << MI->getOperand(OpNum).getImm();
+ return;
+ }
+ }
+
+ switch (Ext) {
+ case A64SE::UXTB: O << "uxtb"; break;
+ case A64SE::UXTH: O << "uxth"; break;
+ case A64SE::UXTW: O << "uxtw"; break;
+ case A64SE::UXTX: O << "uxtx"; break;
+ case A64SE::SXTB: O << "sxtb"; break;
+ case A64SE::SXTH: O << "sxth"; break;
+ case A64SE::SXTW: O << "sxtw"; break;
+ case A64SE::SXTX: O << "sxtx"; break;
+ default: llvm_unreachable("Unexpected shift type for printing");
+ }
+
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getImm() != 0)
+ O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+ O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a special assembler directive which applies an
+ // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+ // form outside the normal TableGenerated scheme.
+ O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+ } else if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
+
+ printAnnotation(O, Annot);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
new file mode 100644
index 000000000000..639fa869c016
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -0,0 +1,172 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+ AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+ // Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+ template<unsigned MemSize, unsigned RmSize>
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+ }
+
+
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize);
+
+ void printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+ void printAddSubImmLSL12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+
+ void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+ void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ template<int MemScale>
+ void printOffsetUImm12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &o) {
+ printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+ }
+
+ void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o, int MemScale);
+
+ template<unsigned field_width, unsigned scale>
+ void printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<typename SomeNamedImmMapper>
+ void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+ }
+
+ void printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printMRSOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+ }
+
+ void printMSROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+ }
+
+ void printShiftOperand(const char *name, const MCInst *MI,
+ unsigned OpIdx, raw_ostream &O);
+
+ void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("lsr", MI, OpNum, O);
+ }
+ void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("asr", MI, OpNum, O);
+ }
+ void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("ror", MI, OpNum, O);
+ }
+
+ template<A64SE::ShiftExtSpecifiers Shift>
+ void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand(MI, OpNum, O, Shift);
+ }
+
+ void printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+ void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<int MemSize> void
+ printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<A64SE::ShiftExtSpecifiers EXT>
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printRegExtendOperand(MI, OpNum, O, EXT);
+ }
+
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ bool isStackReg(unsigned RegNo) {
+ return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+ }
+
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
new file mode 100644
index 000000000000..a3373b1087bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -0,0 +1,585 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCAsmBackend class,
+// which is principally concerned with relaxation of the various fixup kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
+public:
+ AArch64AsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(),
+ STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+ {}
+
+
+ ~AArch64AsmBackend() {
+ delete STI;
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ virtual void processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+ // ~0xfff. This means that the required offset to reach a symbol can vary by
+ // up to one step depending on where the ADRP is in memory. For example:
+ //
+ // ADRP x0, there
+ // there:
+ //
+ // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+ // we'll need that as an offset. At any other address "there" will be in the
+ // same page as the ADRP and the instruction should encode 0x0. Assuming the
+ // section isn't 0x1000-aligned, we therefore need to delegate this decision
+ // to the linker -- a relocation!
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+ IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ uint8_t OSABI;
+ ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ unsigned int getNumFixupKinds() const {
+ return AArch64::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_add_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst128_lo12", 0, 32, 0 },
+{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_uabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g3", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g2", 0, 32, 0 },
+{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_tprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_tprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_call", 0, 0, 0 }
+ };
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst&) const {
+ return false;
+ }
+
+ void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+ llvm_unreachable("Cannot relax instructions");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createAArch64ELFObjectWriter(OS, OSABI);
+ }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Correct for now. With all instructions 32-bit only very low-level
+ // considerations could make you select something which may fail.
+ return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // Can't emit NOP with size not multiple of 32-bits
+ if (Count % 4 != 0)
+ return false;
+
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xd503201f);
+
+ return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+ unsigned lo2 = Value & 0x3;
+ unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+ return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_2:
+ assert((int64_t)Value >= -32768 &&
+ (int64_t)Value <= 65536 &&
+ "Out of range ABS16 fixup");
+ return Value;
+ case FK_Data_4:
+ assert((int64_t)Value >= -(1LL << 31) &&
+ (int64_t)Value <= (1LL << 32) - 1 &&
+ "Out of range ABS32 fixup");
+ return Value;
+ case FK_Data_8:
+ return Value;
+
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+ // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+ case AArch64::fixup_a64_ld_prel:
+ // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+ // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+ return (Value & 0x1ffffc) << 3;
+
+ case AArch64::fixup_a64_adr_prel:
+ // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+ // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+ return ADRImmBits(Value & 0x1fffff);
+
+ case AArch64::fixup_a64_adr_prel_page:
+ // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+ // F000 of the result of the operation, checking that -2^32 <= result <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+ case AArch64::fixup_a64_add_tprel_hi12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+ return (Value & 0xfff000) >> 2;
+
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_add_tprel_lo12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+ // FFF of G(TLSDESC(S+A)), with no overflow check.
+ case AArch64::fixup_a64_add_lo12:
+ // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+ // S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_lo12:
+ // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+ // of S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_lo12:
+ // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+ // of S+A, with no overflow check.
+ return (Value & 0xffe) << 9;
+
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_lo12:
+ // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+ // of S+A, with no overflow check.
+ return (Value & 0xffc) << 8;
+
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_lo12:
+ // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+ // of S+A, with no overflow check.
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_ldst128_lo12:
+ // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+ // of S+A, with no overflow check.
+ return (Value & 0xff0) << 6;
+
+ case AArch64::fixup_a64_movw_uabs_g0:
+ // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+ // with a check that S+A < 2^16
+ assert(Value <= 0xffff && "Out of range move wide fixup");
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+ // S+A with no overflow check.
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g1:
+ // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+ // S+A with a check that S+A < 2^32
+ assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+ // FFFF0000 of S+A with no overflow check.
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 of S+A with a check that S+A < 2^48
+ assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+ // 0000 of S+A with no overflow check.
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g3:
+ // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 0000 of S+A (no overflow check needed)
+ return ((Value >> 48) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+ // to bits FFFF of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g0:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+ // bits FFFF of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g0: {
+ // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+ // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = (Value & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ // MCCodeEmitter should have encoded a MOVN, which is fine.
+ Value = (~Value & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+ case AArch64::fixup_a64_movw_tprel_g1:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+ // bits FFFF0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g1: {
+ // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 16) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 16) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+ // to bits FFFF 0000 0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g2:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+ // bits FFFF 0000 0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g2: {
+ // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+ // we should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 32) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 32) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_tstbr:
+ // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+ // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+ assert((int64_t)Value >= -(1LL << 15) &&
+ (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+ return (Value & 0xfffc) << (5 - 2);
+
+ case AArch64::fixup_a64_condbr:
+ // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+ // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+ return (Value & 0x1ffffc) << (5 - 2);
+
+ case AArch64::fixup_a64_uncondbr:
+ // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+ case AArch64::fixup_a64_call:
+ // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+ // checking that -2^27 <= S+A-P < 2^27.
+ assert((int64_t)Value >= -(1LL << 27) &&
+ (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+ return (Value & 0xffffffc) >> 2;
+
+ case AArch64::fixup_a64_adr_gottprel_page:
+ // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+ // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+ // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_adr_prel_got_page:
+ // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+ // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+ // of X, with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+ // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+ // G(S) with no overflow check. Check X & 7 == 0
+ assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_tlsdesc_call:
+ // R_AARCH64_TLSDESC_CALL: For relaxation only.
+ return 0;
+ }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
new file mode 100644
index 000000000000..4bcc65dfca27
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -0,0 +1,292 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file handles ELF-specific object emission, converting LLVM's internal
+// fixups into the appropriate relocations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ AArch64ELFObjectWriter(uint8_t OSABI);
+
+ virtual ~AArch64ELFObjectWriter();
+
+protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+ /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_PREL64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_PREL32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_PREL16;
+ case AArch64::fixup_a64_ld_prel:
+ Type = ELF::R_AARCH64_LD_PREL_LO19;
+ break;
+ case AArch64::fixup_a64_adr_prel:
+ Type = ELF::R_AARCH64_ADR_PREL_LO21;
+ break;
+ case AArch64::fixup_a64_adr_prel_page:
+ Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+ break;
+ case AArch64::fixup_a64_adr_prel_got_page:
+ Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+ break;
+ case AArch64::fixup_a64_tstbr:
+ Type = ELF::R_AARCH64_TSTBR14;
+ break;
+ case AArch64::fixup_a64_condbr:
+ Type = ELF::R_AARCH64_CONDBR19;
+ break;
+ case AArch64::fixup_a64_uncondbr:
+ Type = ELF::R_AARCH64_JUMP26;
+ break;
+ case AArch64::fixup_a64_call:
+ Type = ELF::R_AARCH64_CALL26;
+ break;
+ case AArch64::fixup_a64_adr_gottprel_page:
+ Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+ break;
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+ break;
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_ABS64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_ABS32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_ABS16;
+ case AArch64::fixup_a64_add_lo12:
+ Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_lo12:
+ Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_lo12:
+ Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_lo12:
+ Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_lo12:
+ Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst128_lo12:
+ Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g3:
+ Type = ELF::R_AARCH64_MOVW_UABS_G3;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g0:
+ Type = ELF::R_AARCH64_MOVW_SABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g1:
+ Type = ELF::R_AARCH64_MOVW_SABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g2:
+ Type = ELF::R_AARCH64_MOVW_SABS_G2;
+ break;
+
+ // TLS Local-dynamic block
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+ break;
+
+ // TLS initial-exec block
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+ break;
+
+ // TLS local-exec block
+ case AArch64::fixup_a64_movw_tprel_g2:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_tprel_hi12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ break;
+
+ // TLS general-dynamic block
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_call:
+ Type = ELF::R_AARCH64_TLSDESC_CALL;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
new file mode 100644
index 000000000000..b83577af45c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ MappingSymbolCounter(0), LastEMS(EMS_None) {
+ }
+
+ ~AArch64ELFStreamer() {}
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ EmitA64MappingSymbol();
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_A64,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitA64MappingSymbol() {
+ if (LastEMS == EMS_A64) return;
+ EmitMappingSymbol("$x");
+ LastEMS = EMS_A64;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ /// @}
+};
+}
+
+namespace llvm {
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
new file mode 100644
index 000000000000..5a89ca50cee8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 000000000000..eeb122d38494
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,113 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LLVM fixups applied to MCInsts in the AArch64
+// backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+ namespace AArch64 {
+ enum Fixups {
+ fixup_a64_ld_prel = FirstTargetFixupKind,
+ fixup_a64_adr_prel,
+ fixup_a64_adr_prel_page,
+
+ fixup_a64_add_lo12,
+
+ fixup_a64_ldst8_lo12,
+ fixup_a64_ldst16_lo12,
+ fixup_a64_ldst32_lo12,
+ fixup_a64_ldst64_lo12,
+ fixup_a64_ldst128_lo12,
+
+ fixup_a64_tstbr,
+ fixup_a64_condbr,
+ fixup_a64_uncondbr,
+ fixup_a64_call,
+
+ fixup_a64_movw_uabs_g0,
+ fixup_a64_movw_uabs_g0_nc,
+ fixup_a64_movw_uabs_g1,
+ fixup_a64_movw_uabs_g1_nc,
+ fixup_a64_movw_uabs_g2,
+ fixup_a64_movw_uabs_g2_nc,
+ fixup_a64_movw_uabs_g3,
+
+ fixup_a64_movw_sabs_g0,
+ fixup_a64_movw_sabs_g1,
+ fixup_a64_movw_sabs_g2,
+
+ fixup_a64_adr_prel_got_page,
+ fixup_a64_ld64_got_lo12_nc,
+
+ // Produce offsets relative to the module's dynamic TLS area.
+ fixup_a64_movw_dtprel_g2,
+ fixup_a64_movw_dtprel_g1,
+ fixup_a64_movw_dtprel_g1_nc,
+ fixup_a64_movw_dtprel_g0,
+ fixup_a64_movw_dtprel_g0_nc,
+ fixup_a64_add_dtprel_hi12,
+ fixup_a64_add_dtprel_lo12,
+ fixup_a64_add_dtprel_lo12_nc,
+ fixup_a64_ldst8_dtprel_lo12,
+ fixup_a64_ldst8_dtprel_lo12_nc,
+ fixup_a64_ldst16_dtprel_lo12,
+ fixup_a64_ldst16_dtprel_lo12_nc,
+ fixup_a64_ldst32_dtprel_lo12,
+ fixup_a64_ldst32_dtprel_lo12_nc,
+ fixup_a64_ldst64_dtprel_lo12,
+ fixup_a64_ldst64_dtprel_lo12_nc,
+
+ // Produce the GOT entry containing a variable's address in TLS's
+ // initial-exec mode.
+ fixup_a64_movw_gottprel_g1,
+ fixup_a64_movw_gottprel_g0_nc,
+ fixup_a64_adr_gottprel_page,
+ fixup_a64_ld64_gottprel_lo12_nc,
+ fixup_a64_ld_gottprel_prel19,
+
+ // Produce offsets relative to the thread pointer: TPIDR_EL0.
+ fixup_a64_movw_tprel_g2,
+ fixup_a64_movw_tprel_g1,
+ fixup_a64_movw_tprel_g1_nc,
+ fixup_a64_movw_tprel_g0,
+ fixup_a64_movw_tprel_g0_nc,
+ fixup_a64_add_tprel_hi12,
+ fixup_a64_add_tprel_lo12,
+ fixup_a64_add_tprel_lo12_nc,
+ fixup_a64_ldst8_tprel_lo12,
+ fixup_a64_ldst8_tprel_lo12_nc,
+ fixup_a64_ldst16_tprel_lo12,
+ fixup_a64_ldst16_tprel_lo12_nc,
+ fixup_a64_ldst32_tprel_lo12,
+ fixup_a64_ldst32_tprel_lo12_nc,
+ fixup_a64_ldst64_tprel_lo12,
+ fixup_a64_ldst64_tprel_lo12_nc,
+
+ // Produce the special fixups used by the general-dynamic TLS model.
+ fixup_a64_tlsdesc_adr_page,
+ fixup_a64_tlsdesc_ld64_lo12_nc,
+ fixup_a64_tlsdesc_add_lo12_nc,
+ fixup_a64_tlsdesc_call,
+
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
new file mode 100644
index 000000000000..8ec8cbf1c525
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+ PointerSize = 8;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "//";
+ PrivateGlobalPrefix = ".L";
+ Code32Directive = ".code\t32";
+
+ Data16bitsDirective = "\t.hword\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = "\t.xword\t";
+
+ UseDataRegionDirectives = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
new file mode 100644
index 000000000000..a20bc471c20d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -0,0 +1,27 @@
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AArch64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+ struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+ explicit AArch64ELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
new file mode 100644
index 000000000000..a5c591eee800
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -0,0 +1,502 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ MCContext &Ctx;
+
+public:
+ AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+
+ ~AArch64MCCodeEmitter() {}
+
+ unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ template<int MemSize>
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+ }
+
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const;
+
+ unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // Labels are handled mostly the same way: a symbol is needed, and
+ // just gets some fixup attached.
+ template<AArch64::Fixups fixupDesired>
+ unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ EmitByte(Val & 0xff, OS);
+ Val >>= 8;
+ }
+ }
+
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ template<int hasRs, int hasRt2> unsigned
+ fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (!MO.isExpr()) {
+ // This can occur for manually decoded or constructed MCInsts, but neither
+ // the assembly-parser nor instruction selection will currently produce an
+ // MCInst that's not a symbol reference.
+ assert(MO.isImm() && "Unexpected address requested");
+ return MO.getImm();
+ }
+
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const {
+ const MCOperand &ImmOp = MI.getOperand(OpIdx);
+ if (ImmOp.isImm())
+ return ImmOp.getImm();
+
+ assert(ImmOp.isExpr() && "Unexpected operand type");
+ const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+ unsigned FixupKind;
+
+
+ switch (Expr->getKind()) {
+ default: llvm_unreachable("Unexpected operand modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+ AArch64::fixup_a64_ldst16_lo12,
+ AArch64::fixup_a64_ldst32_lo12,
+ AArch64::fixup_a64_ldst64_lo12,
+ AArch64::fixup_a64_ldst128_lo12 };
+ assert(MemSize <= 16 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+ AArch64::fixup_a64_ldst16_dtprel_lo12,
+ AArch64::fixup_a64_ldst32_dtprel_lo12,
+ AArch64::fixup_a64_ldst64_dtprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+ AArch64::fixup_a64_ldst16_tprel_lo12,
+ AArch64::fixup_a64_ldst32_tprel_lo12,
+ AArch64::fixup_a64_ldst64_tprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+ break;
+ }
+
+ return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind = 0;
+ switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+ default: llvm_unreachable("Invalid expression modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12:
+ FixupKind = AArch64::fixup_a64_add_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+ if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+ Modifier = Expr->getKind();
+
+ unsigned FixupKind = 0;
+ switch(Modifier) {
+ case AArch64MCExpr::VK_AARCH64_None:
+ FixupKind = AArch64::fixup_a64_adr_prel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOT:
+ FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+ FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC:
+ FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+ break;
+ default:
+ llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isExpr())
+ return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind;
+ if (isa<AArch64MCExpr>(MO.getExpr())) {
+ assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+ == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ && "Invalid symbol modifier for literal load");
+ FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+ } else {
+ FixupKind = AArch64::fixup_a64_ld_prel;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+ const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+ unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+ if (UImm16MO.isImm()) {
+ Result |= UImm16MO.getImm();
+ return Result;
+ }
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ AArch64::Fixups requestedFixup;
+ switch (A64E->getKind()) {
+ default: llvm_unreachable("unexpected expression modifier");
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+ }
+
+ return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (!hasRs) EncodedValue |= 0x001F0000;
+ if (!hasRt2) EncodedValue |= 0x00007C00;
+
+ return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+ // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+ // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+ // job to ensure that any bits possibly affected by this are 0. This means we
+ // must zero out bit 30 (essentially emitting a MOVN).
+ MCOperand UImm16MO = MI.getOperand(1);
+
+ // Nothing to do if there's no fixup.
+ if (UImm16MO.isImm())
+ return EncodedValue;
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ switch (A64E->getKind()) {
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ return EncodedValue & ~(1u << 30);
+ default:
+ // Nothing to do for an unsigned fixup.
+ return EncodedValue;
+ }
+
+ llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+ unsigned EncodedValue) const {
+ // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+ // (i.e. all bits 1) but is ignored by the processor.
+ EncodedValue |= 0x1f << 10;
+ return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+ // following (BLR) instruction. It doesn't emit any code itself so it
+ // doesn't go through the normal TableGenerated channels.
+ MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+ const MCExpr *Expr;
+ Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+ Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+ return;
+ }
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+ EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
new file mode 100644
index 000000000000..c1abfe74dfdd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -0,0 +1,178 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_AARCH64_GOT: OS << ":got:"; break;
+ case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break;
+ case VK_AARCH64_LO12: OS << ":lo12:"; break;
+ case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break;
+ case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break;
+ case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break;
+ case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break;
+ case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break;
+ case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break;
+ case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break;
+ case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break;
+ case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break;
+ case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break;
+ case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break;
+ case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break;
+ case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break;
+ case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break;
+ case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break;
+ case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break;
+ case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break;
+ case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break;
+ case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break;
+ case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break;
+ case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break;
+ case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break;
+ case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break;
+ case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break;
+ case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break;
+ case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break;
+ case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break;
+ case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break;
+ case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break;
+ case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break;
+ case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break;
+ case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break;
+
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expression");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ // We're known to be under a TLS fixup, so any symbol should be
+ // modified. There should be only one.
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ switch (getKind()) {
+ default:
+ return;
+ case VK_AARCH64_DTPREL_G2:
+ case VK_AARCH64_DTPREL_G1:
+ case VK_AARCH64_DTPREL_G1_NC:
+ case VK_AARCH64_DTPREL_G0:
+ case VK_AARCH64_DTPREL_G0_NC:
+ case VK_AARCH64_DTPREL_HI12:
+ case VK_AARCH64_DTPREL_LO12:
+ case VK_AARCH64_DTPREL_LO12_NC:
+ case VK_AARCH64_GOTTPREL_G1:
+ case VK_AARCH64_GOTTPREL_G0_NC:
+ case VK_AARCH64_GOTTPREL:
+ case VK_AARCH64_GOTTPREL_LO12:
+ case VK_AARCH64_TPREL_G2:
+ case VK_AARCH64_TPREL_G1:
+ case VK_AARCH64_TPREL_G1_NC:
+ case VK_AARCH64_TPREL_G0:
+ case VK_AARCH64_TPREL_G0_NC:
+ case VK_AARCH64_TPREL_HI12:
+ case VK_AARCH64_TPREL_LO12:
+ case VK_AARCH64_TPREL_LO12_NC:
+ case VK_AARCH64_TLSDESC:
+ case VK_AARCH64_TLSDESC_LO12:
+ break;
+ }
+
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbolsImpl(BE->getLHS(), Asm);
+ AddValueSymbolsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbolsImpl(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
new file mode 100644
index 000000000000..c0e3b29474d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -0,0 +1,167 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes AArch64-specific MCExprs, used for modifiers like
+// ":lo12:" or ":gottprel_g1:".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_AARCH64_None,
+ VK_AARCH64_GOT, // :got: modifier in assembly
+ VK_AARCH64_GOT_LO12, // :got_lo12:
+ VK_AARCH64_LO12, // :lo12:
+
+ VK_AARCH64_ABS_G0, // :abs_g0:
+ VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+ VK_AARCH64_ABS_G1,
+ VK_AARCH64_ABS_G1_NC,
+ VK_AARCH64_ABS_G2,
+ VK_AARCH64_ABS_G2_NC,
+ VK_AARCH64_ABS_G3,
+
+ VK_AARCH64_SABS_G0, // :abs_g0_s:
+ VK_AARCH64_SABS_G1,
+ VK_AARCH64_SABS_G2,
+
+ VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+ VK_AARCH64_DTPREL_G1,
+ VK_AARCH64_DTPREL_G1_NC,
+ VK_AARCH64_DTPREL_G0,
+ VK_AARCH64_DTPREL_G0_NC,
+ VK_AARCH64_DTPREL_HI12,
+ VK_AARCH64_DTPREL_LO12,
+ VK_AARCH64_DTPREL_LO12_NC,
+
+ VK_AARCH64_GOTTPREL_G1, // :gottprel:
+ VK_AARCH64_GOTTPREL_G0_NC,
+ VK_AARCH64_GOTTPREL,
+ VK_AARCH64_GOTTPREL_LO12,
+
+ VK_AARCH64_TPREL_G2, // :tprel:
+ VK_AARCH64_TPREL_G1,
+ VK_AARCH64_TPREL_G1_NC,
+ VK_AARCH64_TPREL_G0,
+ VK_AARCH64_TPREL_G0_NC,
+ VK_AARCH64_TPREL_HI12,
+ VK_AARCH64_TPREL_LO12,
+ VK_AARCH64_TPREL_LO12_NC,
+
+ VK_AARCH64_TLSDESC, // :tlsdesc:
+ VK_AARCH64_TLSDESC_LO12
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 000000000000..7960db08c8d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,194 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+ return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAArch64MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAArch64MCRegisterInfo(X, AArch64::X30);
+ return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+ // On ELF platforms the default static relocation model has a smart enough
+ // linker to cope with referencing external symbols defined in a shared
+ // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+ RM = Reloc::Static;
+ }
+
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new AArch64InstPrinter(MAI, MII, MRI, STI);
+ return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+ AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+ // FIXME: We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+ != MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+ return Addr + Imm;
+ }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+ createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+ createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+ createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ using AArch64_MC::createAArch64MCSubtargetInfo;
+ TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+ createAArch64MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+ createAArch64MCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+ createAArch64MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+ createAArch64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+ createAArch64MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
new file mode 100644
index 000000000000..3849fe379513
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+ MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/README.txt b/contrib/llvm/lib/Target/AArch64/README.txt
new file mode 100644
index 000000000000..601990f17dee
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/README.txt
@@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
new file mode 100644
index 000000000000..b8099cb26b0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the key registration step for the architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+ RegisterTarget<Triple::aarch64>
+ X(TheAArch64Target, "aarch64", "AArch64");
+}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
new file mode 100644
index 000000000000..1678559aa084
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -0,0 +1,1103 @@
+//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides basic encoding and assembly information for AArch64.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Value == Value) {
+ Valid = true;
+ return Pairs[i].Name;
+ }
+ }
+
+ Valid = false;
+ return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+ std::string LowerCaseName = Name.lower();
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Name == LowerCaseName) {
+ Valid = true;
+ return Pairs[i].Value;
+ }
+ }
+
+ Valid = false;
+ return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+ return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+ {"s1e1r", S1E1R},
+ {"s1e2r", S1E2R},
+ {"s1e3r", S1E3R},
+ {"s1e1w", S1E1W},
+ {"s1e2w", S1E2W},
+ {"s1e3w", S1E3W},
+ {"s1e0r", S1E0R},
+ {"s1e0w", S1E0W},
+ {"s12e1r", S12E1R},
+ {"s12e1w", S12E1W},
+ {"s12e0r", S12E0R},
+ {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+ : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+ {"oshld", OSHLD},
+ {"oshst", OSHST},
+ {"osh", OSH},
+ {"nshld", NSHLD},
+ {"nshst", NSHST},
+ {"nsh", NSH},
+ {"ishld", ISHLD},
+ {"ishst", ISHST},
+ {"ish", ISH},
+ {"ld", LD},
+ {"st", ST},
+ {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+ : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+ {"zva", ZVA},
+ {"ivac", IVAC},
+ {"isw", ISW},
+ {"cvac", CVAC},
+ {"csw", CSW},
+ {"cvau", CVAU},
+ {"civac", CIVAC},
+ {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+ : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+ {"ialluis", IALLUIS},
+ {"iallu", IALLU},
+ {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+ : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+ {"sy", SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+ : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+ {"pldl1keep", PLDL1KEEP},
+ {"pldl1strm", PLDL1STRM},
+ {"pldl2keep", PLDL2KEEP},
+ {"pldl2strm", PLDL2STRM},
+ {"pldl3keep", PLDL3KEEP},
+ {"pldl3strm", PLDL3STRM},
+ {"plil1keep", PLIL1KEEP},
+ {"plil1strm", PLIL1STRM},
+ {"plil2keep", PLIL2KEEP},
+ {"plil2strm", PLIL2STRM},
+ {"plil3keep", PLIL3KEEP},
+ {"plil3strm", PLIL3STRM},
+ {"pstl1keep", PSTL1KEEP},
+ {"pstl1strm", PSTL1STRM},
+ {"pstl2keep", PSTL2KEEP},
+ {"pstl2strm", PSTL2STRM},
+ {"pstl3keep", PSTL3KEEP},
+ {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+ : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+ {"spsel", SPSel},
+ {"daifset", DAIFSet},
+ {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+ : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+ {"mdccsr_el0", MDCCSR_EL0},
+ {"dbgdtrrx_el0", DBGDTRRX_EL0},
+ {"mdrar_el1", MDRAR_EL1},
+ {"oslsr_el1", OSLSR_EL1},
+ {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+ {"pmceid0_el0", PMCEID0_EL0},
+ {"pmceid1_el0", PMCEID1_EL0},
+ {"midr_el1", MIDR_EL1},
+ {"ccsidr_el1", CCSIDR_EL1},
+ {"clidr_el1", CLIDR_EL1},
+ {"ctr_el0", CTR_EL0},
+ {"mpidr_el1", MPIDR_EL1},
+ {"revidr_el1", REVIDR_EL1},
+ {"aidr_el1", AIDR_EL1},
+ {"dczid_el0", DCZID_EL0},
+ {"id_pfr0_el1", ID_PFR0_EL1},
+ {"id_pfr1_el1", ID_PFR1_EL1},
+ {"id_dfr0_el1", ID_DFR0_EL1},
+ {"id_afr0_el1", ID_AFR0_EL1},
+ {"id_mmfr0_el1", ID_MMFR0_EL1},
+ {"id_mmfr1_el1", ID_MMFR1_EL1},
+ {"id_mmfr2_el1", ID_MMFR2_EL1},
+ {"id_mmfr3_el1", ID_MMFR3_EL1},
+ {"id_isar0_el1", ID_ISAR0_EL1},
+ {"id_isar1_el1", ID_ISAR1_EL1},
+ {"id_isar2_el1", ID_ISAR2_EL1},
+ {"id_isar3_el1", ID_ISAR3_EL1},
+ {"id_isar4_el1", ID_ISAR4_EL1},
+ {"id_isar5_el1", ID_ISAR5_EL1},
+ {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+ {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+ {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+ {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+ {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+ {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+ {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+ {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+ {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+ {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+ {"mvfr0_el1", MVFR0_EL1},
+ {"mvfr1_el1", MVFR1_EL1},
+ {"mvfr2_el1", MVFR2_EL1},
+ {"rvbar_el1", RVBAR_EL1},
+ {"rvbar_el2", RVBAR_EL2},
+ {"rvbar_el3", RVBAR_EL3},
+ {"isr_el1", ISR_EL1},
+ {"cntpct_el0", CNTPCT_EL0},
+ {"cntvct_el0", CNTVCT_EL0},
+
+ // Trace registers
+ {"trcstatr", TRCSTATR},
+ {"trcidr8", TRCIDR8},
+ {"trcidr9", TRCIDR9},
+ {"trcidr10", TRCIDR10},
+ {"trcidr11", TRCIDR11},
+ {"trcidr12", TRCIDR12},
+ {"trcidr13", TRCIDR13},
+ {"trcidr0", TRCIDR0},
+ {"trcidr1", TRCIDR1},
+ {"trcidr2", TRCIDR2},
+ {"trcidr3", TRCIDR3},
+ {"trcidr4", TRCIDR4},
+ {"trcidr5", TRCIDR5},
+ {"trcidr6", TRCIDR6},
+ {"trcidr7", TRCIDR7},
+ {"trcoslsr", TRCOSLSR},
+ {"trcpdsr", TRCPDSR},
+ {"trcdevaff0", TRCDEVAFF0},
+ {"trcdevaff1", TRCDEVAFF1},
+ {"trclsr", TRCLSR},
+ {"trcauthstatus", TRCAUTHSTATUS},
+ {"trcdevarch", TRCDEVARCH},
+ {"trcdevid", TRCDEVID},
+ {"trcdevtype", TRCDEVTYPE},
+ {"trcpidr4", TRCPIDR4},
+ {"trcpidr5", TRCPIDR5},
+ {"trcpidr6", TRCPIDR6},
+ {"trcpidr7", TRCPIDR7},
+ {"trcpidr0", TRCPIDR0},
+ {"trcpidr1", TRCPIDR1},
+ {"trcpidr2", TRCPIDR2},
+ {"trcpidr3", TRCPIDR3},
+ {"trccidr0", TRCCIDR0},
+ {"trccidr1", TRCCIDR1},
+ {"trccidr2", TRCCIDR2},
+ {"trccidr3", TRCCIDR3},
+
+ // GICv3 registers
+ {"icc_iar1_el1", ICC_IAR1_EL1},
+ {"icc_iar0_el1", ICC_IAR0_EL1},
+ {"icc_hppir1_el1", ICC_HPPIR1_EL1},
+ {"icc_hppir0_el1", ICC_HPPIR0_EL1},
+ {"icc_rpr_el1", ICC_RPR_EL1},
+ {"ich_vtr_el2", ICH_VTR_EL2},
+ {"ich_eisr_el2", ICH_EISR_EL2},
+ {"ich_elsr_el2", ICH_ELSR_EL2}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+ InstPairs = &MRSPairs[0];
+ NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+ {"dbgdtrtx_el0", DBGDTRTX_EL0},
+ {"oslar_el1", OSLAR_EL1},
+ {"pmswinc_el0", PMSWINC_EL0},
+
+ // Trace registers
+ {"trcoslar", TRCOSLAR},
+ {"trclar", TRCLAR},
+
+ // GICv3 registers
+ {"icc_eoir1_el1", ICC_EOIR1_EL1},
+ {"icc_eoir0_el1", ICC_EOIR0_EL1},
+ {"icc_dir_el1", ICC_DIR_EL1},
+ {"icc_sgi1r_el1", ICC_SGI1R_EL1},
+ {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
+ {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+ InstPairs = &MSRPairs[0];
+ NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+ {"osdtrrx_el1", OSDTRRX_EL1},
+ {"osdtrtx_el1", OSDTRTX_EL1},
+ {"teecr32_el1", TEECR32_EL1},
+ {"mdccint_el1", MDCCINT_EL1},
+ {"mdscr_el1", MDSCR_EL1},
+ {"dbgdtr_el0", DBGDTR_EL0},
+ {"oseccr_el1", OSECCR_EL1},
+ {"dbgvcr32_el2", DBGVCR32_EL2},
+ {"dbgbvr0_el1", DBGBVR0_EL1},
+ {"dbgbvr1_el1", DBGBVR1_EL1},
+ {"dbgbvr2_el1", DBGBVR2_EL1},
+ {"dbgbvr3_el1", DBGBVR3_EL1},
+ {"dbgbvr4_el1", DBGBVR4_EL1},
+ {"dbgbvr5_el1", DBGBVR5_EL1},
+ {"dbgbvr6_el1", DBGBVR6_EL1},
+ {"dbgbvr7_el1", DBGBVR7_EL1},
+ {"dbgbvr8_el1", DBGBVR8_EL1},
+ {"dbgbvr9_el1", DBGBVR9_EL1},
+ {"dbgbvr10_el1", DBGBVR10_EL1},
+ {"dbgbvr11_el1", DBGBVR11_EL1},
+ {"dbgbvr12_el1", DBGBVR12_EL1},
+ {"dbgbvr13_el1", DBGBVR13_EL1},
+ {"dbgbvr14_el1", DBGBVR14_EL1},
+ {"dbgbvr15_el1", DBGBVR15_EL1},
+ {"dbgbcr0_el1", DBGBCR0_EL1},
+ {"dbgbcr1_el1", DBGBCR1_EL1},
+ {"dbgbcr2_el1", DBGBCR2_EL1},
+ {"dbgbcr3_el1", DBGBCR3_EL1},
+ {"dbgbcr4_el1", DBGBCR4_EL1},
+ {"dbgbcr5_el1", DBGBCR5_EL1},
+ {"dbgbcr6_el1", DBGBCR6_EL1},
+ {"dbgbcr7_el1", DBGBCR7_EL1},
+ {"dbgbcr8_el1", DBGBCR8_EL1},
+ {"dbgbcr9_el1", DBGBCR9_EL1},
+ {"dbgbcr10_el1", DBGBCR10_EL1},
+ {"dbgbcr11_el1", DBGBCR11_EL1},
+ {"dbgbcr12_el1", DBGBCR12_EL1},
+ {"dbgbcr13_el1", DBGBCR13_EL1},
+ {"dbgbcr14_el1", DBGBCR14_EL1},
+ {"dbgbcr15_el1", DBGBCR15_EL1},
+ {"dbgwvr0_el1", DBGWVR0_EL1},
+ {"dbgwvr1_el1", DBGWVR1_EL1},
+ {"dbgwvr2_el1", DBGWVR2_EL1},
+ {"dbgwvr3_el1", DBGWVR3_EL1},
+ {"dbgwvr4_el1", DBGWVR4_EL1},
+ {"dbgwvr5_el1", DBGWVR5_EL1},
+ {"dbgwvr6_el1", DBGWVR6_EL1},
+ {"dbgwvr7_el1", DBGWVR7_EL1},
+ {"dbgwvr8_el1", DBGWVR8_EL1},
+ {"dbgwvr9_el1", DBGWVR9_EL1},
+ {"dbgwvr10_el1", DBGWVR10_EL1},
+ {"dbgwvr11_el1", DBGWVR11_EL1},
+ {"dbgwvr12_el1", DBGWVR12_EL1},
+ {"dbgwvr13_el1", DBGWVR13_EL1},
+ {"dbgwvr14_el1", DBGWVR14_EL1},
+ {"dbgwvr15_el1", DBGWVR15_EL1},
+ {"dbgwcr0_el1", DBGWCR0_EL1},
+ {"dbgwcr1_el1", DBGWCR1_EL1},
+ {"dbgwcr2_el1", DBGWCR2_EL1},
+ {"dbgwcr3_el1", DBGWCR3_EL1},
+ {"dbgwcr4_el1", DBGWCR4_EL1},
+ {"dbgwcr5_el1", DBGWCR5_EL1},
+ {"dbgwcr6_el1", DBGWCR6_EL1},
+ {"dbgwcr7_el1", DBGWCR7_EL1},
+ {"dbgwcr8_el1", DBGWCR8_EL1},
+ {"dbgwcr9_el1", DBGWCR9_EL1},
+ {"dbgwcr10_el1", DBGWCR10_EL1},
+ {"dbgwcr11_el1", DBGWCR11_EL1},
+ {"dbgwcr12_el1", DBGWCR12_EL1},
+ {"dbgwcr13_el1", DBGWCR13_EL1},
+ {"dbgwcr14_el1", DBGWCR14_EL1},
+ {"dbgwcr15_el1", DBGWCR15_EL1},
+ {"teehbr32_el1", TEEHBR32_EL1},
+ {"osdlr_el1", OSDLR_EL1},
+ {"dbgprcr_el1", DBGPRCR_EL1},
+ {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+ {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+ {"csselr_el1", CSSELR_EL1},
+ {"vpidr_el2", VPIDR_EL2},
+ {"vmpidr_el2", VMPIDR_EL2},
+ {"sctlr_el1", SCTLR_EL1},
+ {"sctlr_el2", SCTLR_EL2},
+ {"sctlr_el3", SCTLR_EL3},
+ {"actlr_el1", ACTLR_EL1},
+ {"actlr_el2", ACTLR_EL2},
+ {"actlr_el3", ACTLR_EL3},
+ {"cpacr_el1", CPACR_EL1},
+ {"hcr_el2", HCR_EL2},
+ {"scr_el3", SCR_EL3},
+ {"mdcr_el2", MDCR_EL2},
+ {"sder32_el3", SDER32_EL3},
+ {"cptr_el2", CPTR_EL2},
+ {"cptr_el3", CPTR_EL3},
+ {"hstr_el2", HSTR_EL2},
+ {"hacr_el2", HACR_EL2},
+ {"mdcr_el3", MDCR_EL3},
+ {"ttbr0_el1", TTBR0_EL1},
+ {"ttbr0_el2", TTBR0_EL2},
+ {"ttbr0_el3", TTBR0_EL3},
+ {"ttbr1_el1", TTBR1_EL1},
+ {"tcr_el1", TCR_EL1},
+ {"tcr_el2", TCR_EL2},
+ {"tcr_el3", TCR_EL3},
+ {"vttbr_el2", VTTBR_EL2},
+ {"vtcr_el2", VTCR_EL2},
+ {"dacr32_el2", DACR32_EL2},
+ {"spsr_el1", SPSR_EL1},
+ {"spsr_el2", SPSR_EL2},
+ {"spsr_el3", SPSR_EL3},
+ {"elr_el1", ELR_EL1},
+ {"elr_el2", ELR_EL2},
+ {"elr_el3", ELR_EL3},
+ {"sp_el0", SP_EL0},
+ {"sp_el1", SP_EL1},
+ {"sp_el2", SP_EL2},
+ {"spsel", SPSel},
+ {"nzcv", NZCV},
+ {"daif", DAIF},
+ {"currentel", CurrentEL},
+ {"spsr_irq", SPSR_irq},
+ {"spsr_abt", SPSR_abt},
+ {"spsr_und", SPSR_und},
+ {"spsr_fiq", SPSR_fiq},
+ {"fpcr", FPCR},
+ {"fpsr", FPSR},
+ {"dspsr_el0", DSPSR_EL0},
+ {"dlr_el0", DLR_EL0},
+ {"ifsr32_el2", IFSR32_EL2},
+ {"afsr0_el1", AFSR0_EL1},
+ {"afsr0_el2", AFSR0_EL2},
+ {"afsr0_el3", AFSR0_EL3},
+ {"afsr1_el1", AFSR1_EL1},
+ {"afsr1_el2", AFSR1_EL2},
+ {"afsr1_el3", AFSR1_EL3},
+ {"esr_el1", ESR_EL1},
+ {"esr_el2", ESR_EL2},
+ {"esr_el3", ESR_EL3},
+ {"fpexc32_el2", FPEXC32_EL2},
+ {"far_el1", FAR_EL1},
+ {"far_el2", FAR_EL2},
+ {"far_el3", FAR_EL3},
+ {"hpfar_el2", HPFAR_EL2},
+ {"par_el1", PAR_EL1},
+ {"pmcr_el0", PMCR_EL0},
+ {"pmcntenset_el0", PMCNTENSET_EL0},
+ {"pmcntenclr_el0", PMCNTENCLR_EL0},
+ {"pmovsclr_el0", PMOVSCLR_EL0},
+ {"pmselr_el0", PMSELR_EL0},
+ {"pmccntr_el0", PMCCNTR_EL0},
+ {"pmxevtyper_el0", PMXEVTYPER_EL0},
+ {"pmxevcntr_el0", PMXEVCNTR_EL0},
+ {"pmuserenr_el0", PMUSERENR_EL0},
+ {"pmintenset_el1", PMINTENSET_EL1},
+ {"pmintenclr_el1", PMINTENCLR_EL1},
+ {"pmovsset_el0", PMOVSSET_EL0},
+ {"mair_el1", MAIR_EL1},
+ {"mair_el2", MAIR_EL2},
+ {"mair_el3", MAIR_EL3},
+ {"amair_el1", AMAIR_EL1},
+ {"amair_el2", AMAIR_EL2},
+ {"amair_el3", AMAIR_EL3},
+ {"vbar_el1", VBAR_EL1},
+ {"vbar_el2", VBAR_EL2},
+ {"vbar_el3", VBAR_EL3},
+ {"rmr_el1", RMR_EL1},
+ {"rmr_el2", RMR_EL2},
+ {"rmr_el3", RMR_EL3},
+ {"contextidr_el1", CONTEXTIDR_EL1},
+ {"tpidr_el0", TPIDR_EL0},
+ {"tpidr_el2", TPIDR_EL2},
+ {"tpidr_el3", TPIDR_EL3},
+ {"tpidrro_el0", TPIDRRO_EL0},
+ {"tpidr_el1", TPIDR_EL1},
+ {"cntfrq_el0", CNTFRQ_EL0},
+ {"cntvoff_el2", CNTVOFF_EL2},
+ {"cntkctl_el1", CNTKCTL_EL1},
+ {"cnthctl_el2", CNTHCTL_EL2},
+ {"cntp_tval_el0", CNTP_TVAL_EL0},
+ {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+ {"cntps_tval_el1", CNTPS_TVAL_EL1},
+ {"cntp_ctl_el0", CNTP_CTL_EL0},
+ {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+ {"cntps_ctl_el1", CNTPS_CTL_EL1},
+ {"cntp_cval_el0", CNTP_CVAL_EL0},
+ {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+ {"cntps_cval_el1", CNTPS_CVAL_EL1},
+ {"cntv_tval_el0", CNTV_TVAL_EL0},
+ {"cntv_ctl_el0", CNTV_CTL_EL0},
+ {"cntv_cval_el0", CNTV_CVAL_EL0},
+ {"pmevcntr0_el0", PMEVCNTR0_EL0},
+ {"pmevcntr1_el0", PMEVCNTR1_EL0},
+ {"pmevcntr2_el0", PMEVCNTR2_EL0},
+ {"pmevcntr3_el0", PMEVCNTR3_EL0},
+ {"pmevcntr4_el0", PMEVCNTR4_EL0},
+ {"pmevcntr5_el0", PMEVCNTR5_EL0},
+ {"pmevcntr6_el0", PMEVCNTR6_EL0},
+ {"pmevcntr7_el0", PMEVCNTR7_EL0},
+ {"pmevcntr8_el0", PMEVCNTR8_EL0},
+ {"pmevcntr9_el0", PMEVCNTR9_EL0},
+ {"pmevcntr10_el0", PMEVCNTR10_EL0},
+ {"pmevcntr11_el0", PMEVCNTR11_EL0},
+ {"pmevcntr12_el0", PMEVCNTR12_EL0},
+ {"pmevcntr13_el0", PMEVCNTR13_EL0},
+ {"pmevcntr14_el0", PMEVCNTR14_EL0},
+ {"pmevcntr15_el0", PMEVCNTR15_EL0},
+ {"pmevcntr16_el0", PMEVCNTR16_EL0},
+ {"pmevcntr17_el0", PMEVCNTR17_EL0},
+ {"pmevcntr18_el0", PMEVCNTR18_EL0},
+ {"pmevcntr19_el0", PMEVCNTR19_EL0},
+ {"pmevcntr20_el0", PMEVCNTR20_EL0},
+ {"pmevcntr21_el0", PMEVCNTR21_EL0},
+ {"pmevcntr22_el0", PMEVCNTR22_EL0},
+ {"pmevcntr23_el0", PMEVCNTR23_EL0},
+ {"pmevcntr24_el0", PMEVCNTR24_EL0},
+ {"pmevcntr25_el0", PMEVCNTR25_EL0},
+ {"pmevcntr26_el0", PMEVCNTR26_EL0},
+ {"pmevcntr27_el0", PMEVCNTR27_EL0},
+ {"pmevcntr28_el0", PMEVCNTR28_EL0},
+ {"pmevcntr29_el0", PMEVCNTR29_EL0},
+ {"pmevcntr30_el0", PMEVCNTR30_EL0},
+ {"pmccfiltr_el0", PMCCFILTR_EL0},
+ {"pmevtyper0_el0", PMEVTYPER0_EL0},
+ {"pmevtyper1_el0", PMEVTYPER1_EL0},
+ {"pmevtyper2_el0", PMEVTYPER2_EL0},
+ {"pmevtyper3_el0", PMEVTYPER3_EL0},
+ {"pmevtyper4_el0", PMEVTYPER4_EL0},
+ {"pmevtyper5_el0", PMEVTYPER5_EL0},
+ {"pmevtyper6_el0", PMEVTYPER6_EL0},
+ {"pmevtyper7_el0", PMEVTYPER7_EL0},
+ {"pmevtyper8_el0", PMEVTYPER8_EL0},
+ {"pmevtyper9_el0", PMEVTYPER9_EL0},
+ {"pmevtyper10_el0", PMEVTYPER10_EL0},
+ {"pmevtyper11_el0", PMEVTYPER11_EL0},
+ {"pmevtyper12_el0", PMEVTYPER12_EL0},
+ {"pmevtyper13_el0", PMEVTYPER13_EL0},
+ {"pmevtyper14_el0", PMEVTYPER14_EL0},
+ {"pmevtyper15_el0", PMEVTYPER15_EL0},
+ {"pmevtyper16_el0", PMEVTYPER16_EL0},
+ {"pmevtyper17_el0", PMEVTYPER17_EL0},
+ {"pmevtyper18_el0", PMEVTYPER18_EL0},
+ {"pmevtyper19_el0", PMEVTYPER19_EL0},
+ {"pmevtyper20_el0", PMEVTYPER20_EL0},
+ {"pmevtyper21_el0", PMEVTYPER21_EL0},
+ {"pmevtyper22_el0", PMEVTYPER22_EL0},
+ {"pmevtyper23_el0", PMEVTYPER23_EL0},
+ {"pmevtyper24_el0", PMEVTYPER24_EL0},
+ {"pmevtyper25_el0", PMEVTYPER25_EL0},
+ {"pmevtyper26_el0", PMEVTYPER26_EL0},
+ {"pmevtyper27_el0", PMEVTYPER27_EL0},
+ {"pmevtyper28_el0", PMEVTYPER28_EL0},
+ {"pmevtyper29_el0", PMEVTYPER29_EL0},
+ {"pmevtyper30_el0", PMEVTYPER30_EL0},
+
+ // Trace registers
+ {"trcprgctlr", TRCPRGCTLR},
+ {"trcprocselr", TRCPROCSELR},
+ {"trcconfigr", TRCCONFIGR},
+ {"trcauxctlr", TRCAUXCTLR},
+ {"trceventctl0r", TRCEVENTCTL0R},
+ {"trceventctl1r", TRCEVENTCTL1R},
+ {"trcstallctlr", TRCSTALLCTLR},
+ {"trctsctlr", TRCTSCTLR},
+ {"trcsyncpr", TRCSYNCPR},
+ {"trcccctlr", TRCCCCTLR},
+ {"trcbbctlr", TRCBBCTLR},
+ {"trctraceidr", TRCTRACEIDR},
+ {"trcqctlr", TRCQCTLR},
+ {"trcvictlr", TRCVICTLR},
+ {"trcviiectlr", TRCVIIECTLR},
+ {"trcvissctlr", TRCVISSCTLR},
+ {"trcvipcssctlr", TRCVIPCSSCTLR},
+ {"trcvdctlr", TRCVDCTLR},
+ {"trcvdsacctlr", TRCVDSACCTLR},
+ {"trcvdarcctlr", TRCVDARCCTLR},
+ {"trcseqevr0", TRCSEQEVR0},
+ {"trcseqevr1", TRCSEQEVR1},
+ {"trcseqevr2", TRCSEQEVR2},
+ {"trcseqrstevr", TRCSEQRSTEVR},
+ {"trcseqstr", TRCSEQSTR},
+ {"trcextinselr", TRCEXTINSELR},
+ {"trccntrldvr0", TRCCNTRLDVR0},
+ {"trccntrldvr1", TRCCNTRLDVR1},
+ {"trccntrldvr2", TRCCNTRLDVR2},
+ {"trccntrldvr3", TRCCNTRLDVR3},
+ {"trccntctlr0", TRCCNTCTLR0},
+ {"trccntctlr1", TRCCNTCTLR1},
+ {"trccntctlr2", TRCCNTCTLR2},
+ {"trccntctlr3", TRCCNTCTLR3},
+ {"trccntvr0", TRCCNTVR0},
+ {"trccntvr1", TRCCNTVR1},
+ {"trccntvr2", TRCCNTVR2},
+ {"trccntvr3", TRCCNTVR3},
+ {"trcimspec0", TRCIMSPEC0},
+ {"trcimspec1", TRCIMSPEC1},
+ {"trcimspec2", TRCIMSPEC2},
+ {"trcimspec3", TRCIMSPEC3},
+ {"trcimspec4", TRCIMSPEC4},
+ {"trcimspec5", TRCIMSPEC5},
+ {"trcimspec6", TRCIMSPEC6},
+ {"trcimspec7", TRCIMSPEC7},
+ {"trcrsctlr2", TRCRSCTLR2},
+ {"trcrsctlr3", TRCRSCTLR3},
+ {"trcrsctlr4", TRCRSCTLR4},
+ {"trcrsctlr5", TRCRSCTLR5},
+ {"trcrsctlr6", TRCRSCTLR6},
+ {"trcrsctlr7", TRCRSCTLR7},
+ {"trcrsctlr8", TRCRSCTLR8},
+ {"trcrsctlr9", TRCRSCTLR9},
+ {"trcrsctlr10", TRCRSCTLR10},
+ {"trcrsctlr11", TRCRSCTLR11},
+ {"trcrsctlr12", TRCRSCTLR12},
+ {"trcrsctlr13", TRCRSCTLR13},
+ {"trcrsctlr14", TRCRSCTLR14},
+ {"trcrsctlr15", TRCRSCTLR15},
+ {"trcrsctlr16", TRCRSCTLR16},
+ {"trcrsctlr17", TRCRSCTLR17},
+ {"trcrsctlr18", TRCRSCTLR18},
+ {"trcrsctlr19", TRCRSCTLR19},
+ {"trcrsctlr20", TRCRSCTLR20},
+ {"trcrsctlr21", TRCRSCTLR21},
+ {"trcrsctlr22", TRCRSCTLR22},
+ {"trcrsctlr23", TRCRSCTLR23},
+ {"trcrsctlr24", TRCRSCTLR24},
+ {"trcrsctlr25", TRCRSCTLR25},
+ {"trcrsctlr26", TRCRSCTLR26},
+ {"trcrsctlr27", TRCRSCTLR27},
+ {"trcrsctlr28", TRCRSCTLR28},
+ {"trcrsctlr29", TRCRSCTLR29},
+ {"trcrsctlr30", TRCRSCTLR30},
+ {"trcrsctlr31", TRCRSCTLR31},
+ {"trcssccr0", TRCSSCCR0},
+ {"trcssccr1", TRCSSCCR1},
+ {"trcssccr2", TRCSSCCR2},
+ {"trcssccr3", TRCSSCCR3},
+ {"trcssccr4", TRCSSCCR4},
+ {"trcssccr5", TRCSSCCR5},
+ {"trcssccr6", TRCSSCCR6},
+ {"trcssccr7", TRCSSCCR7},
+ {"trcsscsr0", TRCSSCSR0},
+ {"trcsscsr1", TRCSSCSR1},
+ {"trcsscsr2", TRCSSCSR2},
+ {"trcsscsr3", TRCSSCSR3},
+ {"trcsscsr4", TRCSSCSR4},
+ {"trcsscsr5", TRCSSCSR5},
+ {"trcsscsr6", TRCSSCSR6},
+ {"trcsscsr7", TRCSSCSR7},
+ {"trcsspcicr0", TRCSSPCICR0},
+ {"trcsspcicr1", TRCSSPCICR1},
+ {"trcsspcicr2", TRCSSPCICR2},
+ {"trcsspcicr3", TRCSSPCICR3},
+ {"trcsspcicr4", TRCSSPCICR4},
+ {"trcsspcicr5", TRCSSPCICR5},
+ {"trcsspcicr6", TRCSSPCICR6},
+ {"trcsspcicr7", TRCSSPCICR7},
+ {"trcpdcr", TRCPDCR},
+ {"trcacvr0", TRCACVR0},
+ {"trcacvr1", TRCACVR1},
+ {"trcacvr2", TRCACVR2},
+ {"trcacvr3", TRCACVR3},
+ {"trcacvr4", TRCACVR4},
+ {"trcacvr5", TRCACVR5},
+ {"trcacvr6", TRCACVR6},
+ {"trcacvr7", TRCACVR7},
+ {"trcacvr8", TRCACVR8},
+ {"trcacvr9", TRCACVR9},
+ {"trcacvr10", TRCACVR10},
+ {"trcacvr11", TRCACVR11},
+ {"trcacvr12", TRCACVR12},
+ {"trcacvr13", TRCACVR13},
+ {"trcacvr14", TRCACVR14},
+ {"trcacvr15", TRCACVR15},
+ {"trcacatr0", TRCACATR0},
+ {"trcacatr1", TRCACATR1},
+ {"trcacatr2", TRCACATR2},
+ {"trcacatr3", TRCACATR3},
+ {"trcacatr4", TRCACATR4},
+ {"trcacatr5", TRCACATR5},
+ {"trcacatr6", TRCACATR6},
+ {"trcacatr7", TRCACATR7},
+ {"trcacatr8", TRCACATR8},
+ {"trcacatr9", TRCACATR9},
+ {"trcacatr10", TRCACATR10},
+ {"trcacatr11", TRCACATR11},
+ {"trcacatr12", TRCACATR12},
+ {"trcacatr13", TRCACATR13},
+ {"trcacatr14", TRCACATR14},
+ {"trcacatr15", TRCACATR15},
+ {"trcdvcvr0", TRCDVCVR0},
+ {"trcdvcvr1", TRCDVCVR1},
+ {"trcdvcvr2", TRCDVCVR2},
+ {"trcdvcvr3", TRCDVCVR3},
+ {"trcdvcvr4", TRCDVCVR4},
+ {"trcdvcvr5", TRCDVCVR5},
+ {"trcdvcvr6", TRCDVCVR6},
+ {"trcdvcvr7", TRCDVCVR7},
+ {"trcdvcmr0", TRCDVCMR0},
+ {"trcdvcmr1", TRCDVCMR1},
+ {"trcdvcmr2", TRCDVCMR2},
+ {"trcdvcmr3", TRCDVCMR3},
+ {"trcdvcmr4", TRCDVCMR4},
+ {"trcdvcmr5", TRCDVCMR5},
+ {"trcdvcmr6", TRCDVCMR6},
+ {"trcdvcmr7", TRCDVCMR7},
+ {"trccidcvr0", TRCCIDCVR0},
+ {"trccidcvr1", TRCCIDCVR1},
+ {"trccidcvr2", TRCCIDCVR2},
+ {"trccidcvr3", TRCCIDCVR3},
+ {"trccidcvr4", TRCCIDCVR4},
+ {"trccidcvr5", TRCCIDCVR5},
+ {"trccidcvr6", TRCCIDCVR6},
+ {"trccidcvr7", TRCCIDCVR7},
+ {"trcvmidcvr0", TRCVMIDCVR0},
+ {"trcvmidcvr1", TRCVMIDCVR1},
+ {"trcvmidcvr2", TRCVMIDCVR2},
+ {"trcvmidcvr3", TRCVMIDCVR3},
+ {"trcvmidcvr4", TRCVMIDCVR4},
+ {"trcvmidcvr5", TRCVMIDCVR5},
+ {"trcvmidcvr6", TRCVMIDCVR6},
+ {"trcvmidcvr7", TRCVMIDCVR7},
+ {"trccidcctlr0", TRCCIDCCTLR0},
+ {"trccidcctlr1", TRCCIDCCTLR1},
+ {"trcvmidcctlr0", TRCVMIDCCTLR0},
+ {"trcvmidcctlr1", TRCVMIDCCTLR1},
+ {"trcitctrl", TRCITCTRL},
+ {"trcclaimset", TRCCLAIMSET},
+ {"trcclaimclr", TRCCLAIMCLR},
+
+ // GICv3 registers
+ {"icc_bpr1_el1", ICC_BPR1_EL1},
+ {"icc_bpr0_el1", ICC_BPR0_EL1},
+ {"icc_pmr_el1", ICC_PMR_EL1},
+ {"icc_ctlr_el1", ICC_CTLR_EL1},
+ {"icc_ctlr_el3", ICC_CTLR_EL3},
+ {"icc_sre_el1", ICC_SRE_EL1},
+ {"icc_sre_el2", ICC_SRE_EL2},
+ {"icc_sre_el3", ICC_SRE_EL3},
+ {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
+ {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
+ {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
+ {"icc_seien_el1", ICC_SEIEN_EL1},
+ {"icc_ap0r0_el1", ICC_AP0R0_EL1},
+ {"icc_ap0r1_el1", ICC_AP0R1_EL1},
+ {"icc_ap0r2_el1", ICC_AP0R2_EL1},
+ {"icc_ap0r3_el1", ICC_AP0R3_EL1},
+ {"icc_ap1r0_el1", ICC_AP1R0_EL1},
+ {"icc_ap1r1_el1", ICC_AP1R1_EL1},
+ {"icc_ap1r2_el1", ICC_AP1R2_EL1},
+ {"icc_ap1r3_el1", ICC_AP1R3_EL1},
+ {"ich_ap0r0_el2", ICH_AP0R0_EL2},
+ {"ich_ap0r1_el2", ICH_AP0R1_EL2},
+ {"ich_ap0r2_el2", ICH_AP0R2_EL2},
+ {"ich_ap0r3_el2", ICH_AP0R3_EL2},
+ {"ich_ap1r0_el2", ICH_AP1R0_EL2},
+ {"ich_ap1r1_el2", ICH_AP1R1_EL2},
+ {"ich_ap1r2_el2", ICH_AP1R2_EL2},
+ {"ich_ap1r3_el2", ICH_AP1R3_EL2},
+ {"ich_hcr_el2", ICH_HCR_EL2},
+ {"ich_misr_el2", ICH_MISR_EL2},
+ {"ich_vmcr_el2", ICH_VMCR_EL2},
+ {"ich_vseir_el2", ICH_VSEIR_EL2},
+ {"ich_lr0_el2", ICH_LR0_EL2},
+ {"ich_lr1_el2", ICH_LR1_EL2},
+ {"ich_lr2_el2", ICH_LR2_EL2},
+ {"ich_lr3_el2", ICH_LR3_EL2},
+ {"ich_lr4_el2", ICH_LR4_EL2},
+ {"ich_lr5_el2", ICH_LR5_EL2},
+ {"ich_lr6_el2", ICH_LR6_EL2},
+ {"ich_lr7_el2", ICH_LR7_EL2},
+ {"ich_lr8_el2", ICH_LR8_EL2},
+ {"ich_lr9_el2", ICH_LR9_EL2},
+ {"ich_lr10_el2", ICH_LR10_EL2},
+ {"ich_lr11_el2", ICH_LR11_EL2},
+ {"ich_lr12_el2", ICH_LR12_EL2},
+ {"ich_lr13_el2", ICH_LR13_EL2},
+ {"ich_lr14_el2", ICH_LR14_EL2},
+ {"ich_lr15_el2", ICH_LR15_EL2}
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+ // First search the registers shared by all
+ std::string NameLower = Name.lower();
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Name == NameLower) {
+ Valid = true;
+ return SysRegPairs[i].Value;
+ }
+ }
+
+ // Now try the instruction-specific registers (either read-only or
+ // write-only).
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Name == NameLower) {
+ Valid = true;
+ return InstPairs[i].Value;
+ }
+ }
+
+ // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+ // are: 11 xxx 1x11 xxxx xxx
+ Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+ SmallVector<StringRef, 4> Ops;
+ if (!GenericRegPattern.match(NameLower, &Ops)) {
+ Valid = false;
+ return -1;
+ }
+
+ uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+ uint32_t Bits;
+ Ops[1].getAsInteger(10, Op1);
+ Ops[2].getAsInteger(10, CRn);
+ Ops[3].getAsInteger(10, CRm);
+ Ops[4].getAsInteger(10, Op2);
+ Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+ Valid = true;
+ return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Value == Bits) {
+ Valid = true;
+ return SysRegPairs[i].Name;
+ }
+ }
+
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Value == Bits) {
+ Valid = true;
+ return InstPairs[i].Name;
+ }
+ }
+
+ uint32_t Op0 = (Bits >> 14) & 0x3;
+ uint32_t Op1 = (Bits >> 11) & 0x7;
+ uint32_t CRn = (Bits >> 7) & 0xf;
+ uint32_t CRm = (Bits >> 3) & 0xf;
+ uint32_t Op2 = Bits & 0x7;
+
+ // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+ // name.
+ if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+ Valid = false;
+ return "";
+ }
+
+ assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+ Valid = true;
+ return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+ + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+ {"ipas2e1is", IPAS2E1IS},
+ {"ipas2le1is", IPAS2LE1IS},
+ {"vmalle1is", VMALLE1IS},
+ {"alle2is", ALLE2IS},
+ {"alle3is", ALLE3IS},
+ {"vae1is", VAE1IS},
+ {"vae2is", VAE2IS},
+ {"vae3is", VAE3IS},
+ {"aside1is", ASIDE1IS},
+ {"vaae1is", VAAE1IS},
+ {"alle1is", ALLE1IS},
+ {"vale1is", VALE1IS},
+ {"vale2is", VALE2IS},
+ {"vale3is", VALE3IS},
+ {"vmalls12e1is", VMALLS12E1IS},
+ {"vaale1is", VAALE1IS},
+ {"ipas2e1", IPAS2E1},
+ {"ipas2le1", IPAS2LE1},
+ {"vmalle1", VMALLE1},
+ {"alle2", ALLE2},
+ {"alle3", ALLE3},
+ {"vae1", VAE1},
+ {"vae2", VAE2},
+ {"vae3", VAE3},
+ {"aside1", ASIDE1},
+ {"vaae1", VAAE1},
+ {"alle1", ALLE1},
+ {"vale1", VALE1},
+ {"vale2", VALE2},
+ {"vale3", VALE3},
+ {"vmalls12e1", VMALLS12E1},
+ {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+ : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+ const fltSemantics &Sem = Val.getSemantics();
+ unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+ uint32_t ExpMask;
+ switch (FracBits) {
+ case 10: // IEEE half-precision
+ ExpMask = 0x1f;
+ break;
+ case 23: // IEEE single-precision
+ ExpMask = 0xff;
+ break;
+ case 52: // IEEE double-precision
+ ExpMask = 0x7ff;
+ break;
+ case 112: // IEEE quad-precision
+ // No immediates are valid for double precision.
+ return false;
+ default:
+ llvm_unreachable("Only half, single and double precision supported");
+ }
+
+ uint32_t ExpStart = FracBits;
+ uint64_t FracMask = (1ULL << FracBits) - 1;
+
+ uint32_t Sign = Val.isNegative();
+
+ uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+ uint64_t Fraction = Bits & FracMask;
+ int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+ Exponent -= ExpMask >> 1;
+
+ // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+ // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+ // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+ uint64_t A64FracStart = FracBits - 4;
+ uint64_t A64FracMask = 0xf;
+
+ // Are there too many fraction bits?
+ if (Fraction & ~(A64FracMask << A64FracStart))
+ return false;
+
+ if (Exponent < -3 || Exponent > 4)
+ return false;
+
+ uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+ uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+ Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+ return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms | immr | size | R | S |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) |
+// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) |
+// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) |
+// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) |
+// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) |
+// | 0 | 11111x | - | | UNALLOCATED | |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+// RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+// RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+ int RepeatWidth;
+ int Rotation = 0;
+ int Num1s = 0;
+
+ // Because there are S+1 ones in the replicated mask, an immediate of all
+ // zeros is not allowed. Filtering it here is probably more efficient.
+ if (Imm == 0) return false;
+
+ for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+ uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+ uint64_t ReplicatedMask = Imm & RepeatMask;
+
+ if (ReplicatedMask == 0) continue;
+
+ // First we have to make sure the mask is actually repeated in each slot for
+ // this width-specifier.
+ bool IsReplicatedMask = true;
+ for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+ if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+ IsReplicatedMask = false;
+ break;
+ }
+ }
+ if (!IsReplicatedMask) continue;
+
+ // Now we have to work out the amount of rotation needed. The first part of
+ // this calculation is actually independent of RepeatWidth, but the complex
+ // case will depend on it.
+ Rotation = CountTrailingZeros_64(Imm);
+ if (Rotation == 0) {
+ // There were no leading zeros, which means it's either in place or there
+ // are 1s at each end (e.g. 0x8003 needs rotating).
+ Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+ : CountLeadingOnes_32(Imm);
+ Rotation = RepeatWidth - Rotation;
+ }
+
+ uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+ | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+ // Of course, they may not actually be ones, so we have to check that:
+ if (!isMask_64(ReplicatedOnes))
+ continue;
+
+ Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+ // We know we've got an almost valid encoding (certainly, if this is invalid
+ // no other parameters would work).
+ break;
+ }
+
+ // The encodings which would produce all 1s are RESERVED.
+ if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+ uint32_t N = RepeatWidth == 64;
+ uint32_t ImmR = RepeatWidth - Rotation;
+ uint32_t ImmS = Num1s - 1;
+
+ switch (RepeatWidth) {
+ default: break; // No action required for other valid rotations.
+ case 16: ImmS |= 0x20; break; // 10ssss
+ case 8: ImmS |= 0x30; break; // 110sss
+ case 4: ImmS |= 0x38; break; // 1110ss
+ case 2: ImmS |= 0x3c; break; // 11110s
+ }
+
+ Bits = ImmS | (ImmR << 6) | (N << 12);
+
+ return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
+ uint64_t &Imm) {
+ uint32_t N = Bits >> 12;
+ uint32_t ImmR = (Bits >> 6) & 0x3f;
+ uint32_t ImmS = Bits & 0x3f;
+
+ // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+ // instructions.
+ if (RegWidth == 32 && N != 0) return false;
+
+ int Width = 0;
+ if (N == 1)
+ Width = 64;
+ else if ((ImmS & 0x20) == 0)
+ Width = 32;
+ else if ((ImmS & 0x10) == 0)
+ Width = 16;
+ else if ((ImmS & 0x08) == 0)
+ Width = 8;
+ else if ((ImmS & 0x04) == 0)
+ Width = 4;
+ else if ((ImmS & 0x02) == 0)
+ Width = 2;
+ else {
+ // ImmS is 0b11111x: UNALLOCATED
+ return false;
+ }
+
+ int Num1s = (ImmS & (Width - 1)) + 1;
+
+ // All encodings which would map to -1 (signed) are RESERVED.
+ if (Num1s == Width) return false;
+
+ int Rotation = (ImmR & (Width - 1));
+ uint64_t Mask = (1ULL << Num1s) - 1;
+ uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+ Mask = (Mask >> Rotation)
+ | ((Mask << (Width - Rotation)) & WidthMask);
+
+ Imm = 0;
+ for (unsigned i = 0; i < RegWidth / Width; ++i) {
+ Imm |= Mask;
+ Mask <<= Width;
+ }
+
+ return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // If high bits are set then a 32-bit MOVZ can't possibly work.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ for (int i = 0; i < RegWidth; i += 16) {
+ // If the value is 0 when we mask out all the bits that could be set with
+ // the current LSL value then it's representable.
+ if ((Value & ~(0xffffULL << i)) == 0) {
+ Shift = i / 16;
+ UImm16 = (Value >> i) & 0xffff;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+ // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+ // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+ // a valid input for isMOVZImm.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+ return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+ int &UImm16, int &Shift) {
+ if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+ return false;
+
+ return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
new file mode 100644
index 000000000000..1b773d632ebe
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -0,0 +1,1068 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ = 0, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Unsigned higher or same >, ==, or unordered
+ LO, // Unsigned lower or same Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Ordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Signed greater than Greater than
+ LE, // Signed less than or equal <, ==, or unordered
+ AL, // Always (unconditional) Always (unconditional)
+ NV, // Always (unconditional) Always (unconditional)
+ // Note the NV exists purely to disassemble 0b1111. Execution
+ // is "always".
+ Invalid
+ };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case A64CC::EQ: return "eq";
+ case A64CC::NE: return "ne";
+ case A64CC::HS: return "hs";
+ case A64CC::LO: return "lo";
+ case A64CC::MI: return "mi";
+ case A64CC::PL: return "pl";
+ case A64CC::VS: return "vs";
+ case A64CC::VC: return "vc";
+ case A64CC::HI: return "hi";
+ case A64CC::LS: return "ls";
+ case A64CC::GE: return "ge";
+ case A64CC::LT: return "lt";
+ case A64CC::GT: return "gt";
+ case A64CC::LE: return "le";
+ case A64CC::AL: return "al";
+ case A64CC::NV: return "nv";
+ }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+ return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+ .Case("eq", A64CC::EQ)
+ .Case("ne", A64CC::NE)
+ .Case("ne", A64CC::NE)
+ .Case("hs", A64CC::HS)
+ .Case("cs", A64CC::HS)
+ .Case("lo", A64CC::LO)
+ .Case("cc", A64CC::LO)
+ .Case("mi", A64CC::MI)
+ .Case("pl", A64CC::PL)
+ .Case("vs", A64CC::VS)
+ .Case("vc", A64CC::VC)
+ .Case("hi", A64CC::HI)
+ .Case("ls", A64CC::LS)
+ .Case("ge", A64CC::GE)
+ .Case("lt", A64CC::LT)
+ .Case("gt", A64CC::GT)
+ .Case("le", A64CC::LE)
+ .Case("al", A64CC::AL)
+ .Case("nv", A64CC::NV)
+ .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+ // It turns out that the condition codes have been designed so that in order
+ // to reverse the intent of the condition you only have to invert the low bit:
+
+ return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+ struct Mapping {
+ const char *Name;
+ uint32_t Value;
+ };
+
+ template<int N>
+ NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+ : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+ StringRef toString(uint32_t Value, bool &Valid) const;
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+
+ /// Many of the instructions allow an alternative assembly form consisting of
+ /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+ /// N being 0 indicates no immediate syntax-form is allowed.
+ bool validImm(uint32_t Value) const;
+protected:
+ const Mapping *Pairs;
+ size_t NumPairs;
+ uint32_t TooBigImm;
+};
+
+namespace A64AT {
+ enum ATValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ S1E1R = 0x43c0, // 01 000 0111 1000 000
+ S1E2R = 0x63c0, // 01 100 0111 1000 000
+ S1E3R = 0x73c0, // 01 110 0111 1000 000
+ S1E1W = 0x43c1, // 01 000 0111 1000 001
+ S1E2W = 0x63c1, // 01 100 0111 1000 001
+ S1E3W = 0x73c1, // 01 110 0111 1000 001
+ S1E0R = 0x43c2, // 01 000 0111 1000 010
+ S1E0W = 0x43c3, // 01 000 0111 1000 011
+ S12E1R = 0x63c4, // 01 100 0111 1000 100
+ S12E1W = 0x63c5, // 01 100 0111 1000 101
+ S12E0R = 0x63c6, // 01 100 0111 1000 110
+ S12E0W = 0x63c7 // 01 100 0111 1000 111
+ };
+
+ struct ATMapper : NamedImmMapper {
+ const static Mapping ATPairs[];
+
+ ATMapper();
+ };
+
+}
+namespace A64DB {
+ enum DBValues {
+ Invalid = -1,
+ OSHLD = 0x1,
+ OSHST = 0x2,
+ OSH = 0x3,
+ NSHLD = 0x5,
+ NSHST = 0x6,
+ NSH = 0x7,
+ ISHLD = 0x9,
+ ISHST = 0xa,
+ ISH = 0xb,
+ LD = 0xd,
+ ST = 0xe,
+ SY = 0xf
+ };
+
+ struct DBarrierMapper : NamedImmMapper {
+ const static Mapping DBarrierPairs[];
+
+ DBarrierMapper();
+ };
+}
+
+namespace A64DC {
+ enum DCValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ ZVA = 0x5ba1, // 01 011 0111 0100 001
+ IVAC = 0x43b1, // 01 000 0111 0110 001
+ ISW = 0x43b2, // 01 000 0111 0110 010
+ CVAC = 0x5bd1, // 01 011 0111 1010 001
+ CSW = 0x43d2, // 01 000 0111 1010 010
+ CVAU = 0x5bd9, // 01 011 0111 1011 001
+ CIVAC = 0x5bf1, // 01 011 0111 1110 001
+ CISW = 0x43f2 // 01 000 0111 1110 010
+ };
+
+ struct DCMapper : NamedImmMapper {
+ const static Mapping DCPairs[];
+
+ DCMapper();
+ };
+
+}
+
+namespace A64IC {
+ enum ICValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ IALLUIS = 0x0388, // 000 0111 0001 000
+ IALLU = 0x03a8, // 000 0111 0101 000
+ IVAU = 0x1ba9 // 011 0111 0101 001
+ };
+
+
+ struct ICMapper : NamedImmMapper {
+ const static Mapping ICPairs[];
+
+ ICMapper();
+ };
+
+ static inline bool NeedsRegister(ICValues Val) {
+ return Val == IVAU;
+ }
+}
+
+namespace A64ISB {
+ enum ISBValues {
+ Invalid = -1,
+ SY = 0xf
+ };
+ struct ISBMapper : NamedImmMapper {
+ const static Mapping ISBPairs[];
+
+ ISBMapper();
+ };
+}
+
+namespace A64PRFM {
+ enum PRFMValues {
+ Invalid = -1,
+ PLDL1KEEP = 0x00,
+ PLDL1STRM = 0x01,
+ PLDL2KEEP = 0x02,
+ PLDL2STRM = 0x03,
+ PLDL3KEEP = 0x04,
+ PLDL3STRM = 0x05,
+ PLIL1KEEP = 0x08,
+ PLIL1STRM = 0x09,
+ PLIL2KEEP = 0x0a,
+ PLIL2STRM = 0x0b,
+ PLIL3KEEP = 0x0c,
+ PLIL3STRM = 0x0d,
+ PSTL1KEEP = 0x10,
+ PSTL1STRM = 0x11,
+ PSTL2KEEP = 0x12,
+ PSTL2STRM = 0x13,
+ PSTL3KEEP = 0x14,
+ PSTL3STRM = 0x15
+ };
+
+ struct PRFMMapper : NamedImmMapper {
+ const static Mapping PRFMPairs[];
+
+ PRFMMapper();
+ };
+}
+
+namespace A64PState {
+ enum PStateValues {
+ Invalid = -1,
+ SPSel = 0x05,
+ DAIFSet = 0x1e,
+ DAIFClr = 0x1f
+ };
+
+ struct PStateMapper : NamedImmMapper {
+ const static Mapping PStatePairs[];
+
+ PStateMapper();
+ };
+
+}
+
+namespace A64SE {
+ enum ShiftExtSpecifiers {
+ Invalid = -1,
+ LSL,
+ LSR,
+ ASR,
+ ROR,
+
+ UXTB,
+ UXTH,
+ UXTW,
+ UXTX,
+
+ SXTB,
+ SXTH,
+ SXTW,
+ SXTX
+ };
+}
+
+namespace A64SysReg {
+ enum SysRegROValues {
+ MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
+ DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000
+ MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000
+ OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100
+ DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110
+ PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110
+ PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111
+ MIDR_EL1 = 0xc000, // 11 000 0000 0000 000
+ CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000
+ CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001
+ CTR_EL0 = 0xd801, // 11 011 0000 0000 001
+ MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101
+ REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110
+ AIDR_EL1 = 0xc807, // 11 001 0000 0000 111
+ DCZID_EL0 = 0xd807, // 11 011 0000 0000 111
+ ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000
+ ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001
+ ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010
+ ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011
+ ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100
+ ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101
+ ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110
+ ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111
+ ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000
+ ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001
+ ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010
+ ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011
+ ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100
+ ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101
+ ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000
+ ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001
+ ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000
+ ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001
+ ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100
+ ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101
+ ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000
+ ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
+ ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
+ ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
+ MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
+ MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
+ RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001
+ RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001
+ RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001
+ ISR_EL1 = 0xc608, // 11 000 1100 0001 000
+ CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001
+ CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010
+
+ // Trace registers
+ TRCSTATR = 0x8818, // 10 001 0000 0011 000
+ TRCIDR8 = 0x8806, // 10 001 0000 0000 110
+ TRCIDR9 = 0x880e, // 10 001 0000 0001 110
+ TRCIDR10 = 0x8816, // 10 001 0000 0010 110
+ TRCIDR11 = 0x881e, // 10 001 0000 0011 110
+ TRCIDR12 = 0x8826, // 10 001 0000 0100 110
+ TRCIDR13 = 0x882e, // 10 001 0000 0101 110
+ TRCIDR0 = 0x8847, // 10 001 0000 1000 111
+ TRCIDR1 = 0x884f, // 10 001 0000 1001 111
+ TRCIDR2 = 0x8857, // 10 001 0000 1010 111
+ TRCIDR3 = 0x885f, // 10 001 0000 1011 111
+ TRCIDR4 = 0x8867, // 10 001 0000 1100 111
+ TRCIDR5 = 0x886f, // 10 001 0000 1101 111
+ TRCIDR6 = 0x8877, // 10 001 0000 1110 111
+ TRCIDR7 = 0x887f, // 10 001 0000 1111 111
+ TRCOSLSR = 0x888c, // 10 001 0001 0001 100
+ TRCPDSR = 0x88ac, // 10 001 0001 0101 100
+ TRCDEVAFF0 = 0x8bd6, // 10 001 0111 1010 110
+ TRCDEVAFF1 = 0x8bde, // 10 001 0111 1011 110
+ TRCLSR = 0x8bee, // 10 001 0111 1101 110
+ TRCAUTHSTATUS = 0x8bf6, // 10 001 0111 1110 110
+ TRCDEVARCH = 0x8bfe, // 10 001 0111 1111 110
+ TRCDEVID = 0x8b97, // 10 001 0111 0010 111
+ TRCDEVTYPE = 0x8b9f, // 10 001 0111 0011 111
+ TRCPIDR4 = 0x8ba7, // 10 001 0111 0100 111
+ TRCPIDR5 = 0x8baf, // 10 001 0111 0101 111
+ TRCPIDR6 = 0x8bb7, // 10 001 0111 0110 111
+ TRCPIDR7 = 0x8bbf, // 10 001 0111 0111 111
+ TRCPIDR0 = 0x8bc7, // 10 001 0111 1000 111
+ TRCPIDR1 = 0x8bcf, // 10 001 0111 1001 111
+ TRCPIDR2 = 0x8bd7, // 10 001 0111 1010 111
+ TRCPIDR3 = 0x8bdf, // 10 001 0111 1011 111
+ TRCCIDR0 = 0x8be7, // 10 001 0111 1100 111
+ TRCCIDR1 = 0x8bef, // 10 001 0111 1101 111
+ TRCCIDR2 = 0x8bf7, // 10 001 0111 1110 111
+ TRCCIDR3 = 0x8bff, // 10 001 0111 1111 111
+
+ // GICv3 registers
+ ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000
+ ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000
+ ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010
+ ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010
+ ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011
+ ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001
+ ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011
+ ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101
+ };
+
+ enum SysRegWOValues {
+ DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000
+ OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100
+ PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100
+
+ // Trace Registers
+ TRCOSLAR = 0x8884, // 10 001 0001 0000 100
+ TRCLAR = 0x8be6, // 10 001 0111 1100 110
+
+ // GICv3 registers
+ ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001
+ ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001
+ ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001
+ ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101
+ ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110
+ ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111
+ };
+
+ enum SysRegValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010
+ OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010
+ TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000
+ MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000
+ MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010
+ DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000
+ OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010
+ DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000
+ DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100
+ DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100
+ DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100
+ DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100
+ DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100
+ DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100
+ DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100
+ DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100
+ DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100
+ DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100
+ DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100
+ DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100
+ DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100
+ DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100
+ DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100
+ DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100
+ DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101
+ DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101
+ DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101
+ DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101
+ DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101
+ DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101
+ DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101
+ DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101
+ DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101
+ DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101
+ DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101
+ DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101
+ DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101
+ DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101
+ DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101
+ DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101
+ DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110
+ DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110
+ DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110
+ DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110
+ DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110
+ DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110
+ DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110
+ DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110
+ DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110
+ DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110
+ DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110
+ DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110
+ DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110
+ DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110
+ DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110
+ DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110
+ DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111
+ DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111
+ DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111
+ DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111
+ DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111
+ DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111
+ DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111
+ DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111
+ DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111
+ DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111
+ DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111
+ DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111
+ DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111
+ DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111
+ DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111
+ DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111
+ TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000
+ OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100
+ DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100
+ DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110
+ DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110
+ CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000
+ VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000
+ VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101
+ CPACR_EL1 = 0xc082, // 11 000 0001 0000 010
+ SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000
+ SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000
+ SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000
+ ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001
+ ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001
+ ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001
+ HCR_EL2 = 0xe088, // 11 100 0001 0001 000
+ SCR_EL3 = 0xf088, // 11 110 0001 0001 000
+ MDCR_EL2 = 0xe089, // 11 100 0001 0001 001
+ SDER32_EL3 = 0xf089, // 11 110 0001 0001 001
+ CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010
+ CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010
+ HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011
+ HACR_EL2 = 0xe08f, // 11 100 0001 0001 111
+ MDCR_EL3 = 0xf099, // 11 110 0001 0011 001
+ TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000
+ TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000
+ TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000
+ TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001
+ TCR_EL1 = 0xc102, // 11 000 0010 0000 010
+ TCR_EL2 = 0xe102, // 11 100 0010 0000 010
+ TCR_EL3 = 0xf102, // 11 110 0010 0000 010
+ VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000
+ VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010
+ DACR32_EL2 = 0xe180, // 11 100 0011 0000 000
+ SPSR_EL1 = 0xc200, // 11 000 0100 0000 000
+ SPSR_EL2 = 0xe200, // 11 100 0100 0000 000
+ SPSR_EL3 = 0xf200, // 11 110 0100 0000 000
+ ELR_EL1 = 0xc201, // 11 000 0100 0000 001
+ ELR_EL2 = 0xe201, // 11 100 0100 0000 001
+ ELR_EL3 = 0xf201, // 11 110 0100 0000 001
+ SP_EL0 = 0xc208, // 11 000 0100 0001 000
+ SP_EL1 = 0xe208, // 11 100 0100 0001 000
+ SP_EL2 = 0xf208, // 11 110 0100 0001 000
+ SPSel = 0xc210, // 11 000 0100 0010 000
+ NZCV = 0xda10, // 11 011 0100 0010 000
+ DAIF = 0xda11, // 11 011 0100 0010 001
+ CurrentEL = 0xc212, // 11 000 0100 0010 010
+ SPSR_irq = 0xe218, // 11 100 0100 0011 000
+ SPSR_abt = 0xe219, // 11 100 0100 0011 001
+ SPSR_und = 0xe21a, // 11 100 0100 0011 010
+ SPSR_fiq = 0xe21b, // 11 100 0100 0011 011
+ FPCR = 0xda20, // 11 011 0100 0100 000
+ FPSR = 0xda21, // 11 011 0100 0100 001
+ DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000
+ DLR_EL0 = 0xda29, // 11 011 0100 0101 001
+ IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001
+ AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000
+ AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000
+ AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000
+ AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001
+ AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001
+ AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001
+ ESR_EL1 = 0xc290, // 11 000 0101 0010 000
+ ESR_EL2 = 0xe290, // 11 100 0101 0010 000
+ ESR_EL3 = 0xf290, // 11 110 0101 0010 000
+ FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000
+ FAR_EL1 = 0xc300, // 11 000 0110 0000 000
+ FAR_EL2 = 0xe300, // 11 100 0110 0000 000
+ FAR_EL3 = 0xf300, // 11 110 0110 0000 000
+ HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100
+ PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000
+ PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000
+ PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001
+ PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010
+ PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011
+ PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101
+ PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000
+ PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001
+ PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010
+ PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000
+ PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001
+ PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010
+ PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011
+ MAIR_EL1 = 0xc510, // 11 000 1010 0010 000
+ MAIR_EL2 = 0xe510, // 11 100 1010 0010 000
+ MAIR_EL3 = 0xf510, // 11 110 1010 0010 000
+ AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000
+ AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000
+ AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000
+ VBAR_EL1 = 0xc600, // 11 000 1100 0000 000
+ VBAR_EL2 = 0xe600, // 11 100 1100 0000 000
+ VBAR_EL3 = 0xf600, // 11 110 1100 0000 000
+ RMR_EL1 = 0xc602, // 11 000 1100 0000 010
+ RMR_EL2 = 0xe602, // 11 100 1100 0000 010
+ RMR_EL3 = 0xf602, // 11 110 1100 0000 010
+ CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001
+ TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010
+ TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010
+ TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010
+ TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011
+ TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100
+ CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000
+ CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011
+ CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000
+ CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000
+ CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000
+ CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000
+ CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000
+ CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001
+ CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001
+ CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001
+ CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010
+ CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010
+ CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010
+ CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000
+ CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001
+ CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010
+ PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000
+ PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001
+ PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010
+ PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011
+ PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100
+ PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101
+ PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110
+ PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111
+ PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000
+ PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001
+ PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010
+ PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011
+ PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100
+ PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101
+ PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110
+ PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111
+ PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000
+ PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001
+ PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010
+ PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011
+ PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100
+ PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101
+ PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110
+ PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111
+ PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000
+ PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001
+ PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010
+ PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011
+ PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100
+ PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101
+ PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110
+ PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111
+ PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000
+ PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001
+ PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010
+ PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011
+ PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100
+ PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101
+ PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110
+ PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111
+ PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000
+ PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001
+ PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010
+ PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011
+ PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100
+ PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101
+ PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110
+ PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111
+ PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000
+ PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001
+ PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010
+ PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011
+ PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100
+ PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101
+ PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110
+ PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111
+ PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000
+ PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001
+ PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010
+ PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011
+ PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100
+ PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101
+ PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110
+
+ // Trace registers
+ TRCPRGCTLR = 0x8808, // 10 001 0000 0001 000
+ TRCPROCSELR = 0x8810, // 10 001 0000 0010 000
+ TRCCONFIGR = 0x8820, // 10 001 0000 0100 000
+ TRCAUXCTLR = 0x8830, // 10 001 0000 0110 000
+ TRCEVENTCTL0R = 0x8840, // 10 001 0000 1000 000
+ TRCEVENTCTL1R = 0x8848, // 10 001 0000 1001 000
+ TRCSTALLCTLR = 0x8858, // 10 001 0000 1011 000
+ TRCTSCTLR = 0x8860, // 10 001 0000 1100 000
+ TRCSYNCPR = 0x8868, // 10 001 0000 1101 000
+ TRCCCCTLR = 0x8870, // 10 001 0000 1110 000
+ TRCBBCTLR = 0x8878, // 10 001 0000 1111 000
+ TRCTRACEIDR = 0x8801, // 10 001 0000 0000 001
+ TRCQCTLR = 0x8809, // 10 001 0000 0001 001
+ TRCVICTLR = 0x8802, // 10 001 0000 0000 010
+ TRCVIIECTLR = 0x880a, // 10 001 0000 0001 010
+ TRCVISSCTLR = 0x8812, // 10 001 0000 0010 010
+ TRCVIPCSSCTLR = 0x881a, // 10 001 0000 0011 010
+ TRCVDCTLR = 0x8842, // 10 001 0000 1000 010
+ TRCVDSACCTLR = 0x884a, // 10 001 0000 1001 010
+ TRCVDARCCTLR = 0x8852, // 10 001 0000 1010 010
+ TRCSEQEVR0 = 0x8804, // 10 001 0000 0000 100
+ TRCSEQEVR1 = 0x880c, // 10 001 0000 0001 100
+ TRCSEQEVR2 = 0x8814, // 10 001 0000 0010 100
+ TRCSEQRSTEVR = 0x8834, // 10 001 0000 0110 100
+ TRCSEQSTR = 0x883c, // 10 001 0000 0111 100
+ TRCEXTINSELR = 0x8844, // 10 001 0000 1000 100
+ TRCCNTRLDVR0 = 0x8805, // 10 001 0000 0000 101
+ TRCCNTRLDVR1 = 0x880d, // 10 001 0000 0001 101
+ TRCCNTRLDVR2 = 0x8815, // 10 001 0000 0010 101
+ TRCCNTRLDVR3 = 0x881d, // 10 001 0000 0011 101
+ TRCCNTCTLR0 = 0x8825, // 10 001 0000 0100 101
+ TRCCNTCTLR1 = 0x882d, // 10 001 0000 0101 101
+ TRCCNTCTLR2 = 0x8835, // 10 001 0000 0110 101
+ TRCCNTCTLR3 = 0x883d, // 10 001 0000 0111 101
+ TRCCNTVR0 = 0x8845, // 10 001 0000 1000 101
+ TRCCNTVR1 = 0x884d, // 10 001 0000 1001 101
+ TRCCNTVR2 = 0x8855, // 10 001 0000 1010 101
+ TRCCNTVR3 = 0x885d, // 10 001 0000 1011 101
+ TRCIMSPEC0 = 0x8807, // 10 001 0000 0000 111
+ TRCIMSPEC1 = 0x880f, // 10 001 0000 0001 111
+ TRCIMSPEC2 = 0x8817, // 10 001 0000 0010 111
+ TRCIMSPEC3 = 0x881f, // 10 001 0000 0011 111
+ TRCIMSPEC4 = 0x8827, // 10 001 0000 0100 111
+ TRCIMSPEC5 = 0x882f, // 10 001 0000 0101 111
+ TRCIMSPEC6 = 0x8837, // 10 001 0000 0110 111
+ TRCIMSPEC7 = 0x883f, // 10 001 0000 0111 111
+ TRCRSCTLR2 = 0x8890, // 10 001 0001 0010 000
+ TRCRSCTLR3 = 0x8898, // 10 001 0001 0011 000
+ TRCRSCTLR4 = 0x88a0, // 10 001 0001 0100 000
+ TRCRSCTLR5 = 0x88a8, // 10 001 0001 0101 000
+ TRCRSCTLR6 = 0x88b0, // 10 001 0001 0110 000
+ TRCRSCTLR7 = 0x88b8, // 10 001 0001 0111 000
+ TRCRSCTLR8 = 0x88c0, // 10 001 0001 1000 000
+ TRCRSCTLR9 = 0x88c8, // 10 001 0001 1001 000
+ TRCRSCTLR10 = 0x88d0, // 10 001 0001 1010 000
+ TRCRSCTLR11 = 0x88d8, // 10 001 0001 1011 000
+ TRCRSCTLR12 = 0x88e0, // 10 001 0001 1100 000
+ TRCRSCTLR13 = 0x88e8, // 10 001 0001 1101 000
+ TRCRSCTLR14 = 0x88f0, // 10 001 0001 1110 000
+ TRCRSCTLR15 = 0x88f8, // 10 001 0001 1111 000
+ TRCRSCTLR16 = 0x8881, // 10 001 0001 0000 001
+ TRCRSCTLR17 = 0x8889, // 10 001 0001 0001 001
+ TRCRSCTLR18 = 0x8891, // 10 001 0001 0010 001
+ TRCRSCTLR19 = 0x8899, // 10 001 0001 0011 001
+ TRCRSCTLR20 = 0x88a1, // 10 001 0001 0100 001
+ TRCRSCTLR21 = 0x88a9, // 10 001 0001 0101 001
+ TRCRSCTLR22 = 0x88b1, // 10 001 0001 0110 001
+ TRCRSCTLR23 = 0x88b9, // 10 001 0001 0111 001
+ TRCRSCTLR24 = 0x88c1, // 10 001 0001 1000 001
+ TRCRSCTLR25 = 0x88c9, // 10 001 0001 1001 001
+ TRCRSCTLR26 = 0x88d1, // 10 001 0001 1010 001
+ TRCRSCTLR27 = 0x88d9, // 10 001 0001 1011 001
+ TRCRSCTLR28 = 0x88e1, // 10 001 0001 1100 001
+ TRCRSCTLR29 = 0x88e9, // 10 001 0001 1101 001
+ TRCRSCTLR30 = 0x88f1, // 10 001 0001 1110 001
+ TRCRSCTLR31 = 0x88f9, // 10 001 0001 1111 001
+ TRCSSCCR0 = 0x8882, // 10 001 0001 0000 010
+ TRCSSCCR1 = 0x888a, // 10 001 0001 0001 010
+ TRCSSCCR2 = 0x8892, // 10 001 0001 0010 010
+ TRCSSCCR3 = 0x889a, // 10 001 0001 0011 010
+ TRCSSCCR4 = 0x88a2, // 10 001 0001 0100 010
+ TRCSSCCR5 = 0x88aa, // 10 001 0001 0101 010
+ TRCSSCCR6 = 0x88b2, // 10 001 0001 0110 010
+ TRCSSCCR7 = 0x88ba, // 10 001 0001 0111 010
+ TRCSSCSR0 = 0x88c2, // 10 001 0001 1000 010
+ TRCSSCSR1 = 0x88ca, // 10 001 0001 1001 010
+ TRCSSCSR2 = 0x88d2, // 10 001 0001 1010 010
+ TRCSSCSR3 = 0x88da, // 10 001 0001 1011 010
+ TRCSSCSR4 = 0x88e2, // 10 001 0001 1100 010
+ TRCSSCSR5 = 0x88ea, // 10 001 0001 1101 010
+ TRCSSCSR6 = 0x88f2, // 10 001 0001 1110 010
+ TRCSSCSR7 = 0x88fa, // 10 001 0001 1111 010
+ TRCSSPCICR0 = 0x8883, // 10 001 0001 0000 011
+ TRCSSPCICR1 = 0x888b, // 10 001 0001 0001 011
+ TRCSSPCICR2 = 0x8893, // 10 001 0001 0010 011
+ TRCSSPCICR3 = 0x889b, // 10 001 0001 0011 011
+ TRCSSPCICR4 = 0x88a3, // 10 001 0001 0100 011
+ TRCSSPCICR5 = 0x88ab, // 10 001 0001 0101 011
+ TRCSSPCICR6 = 0x88b3, // 10 001 0001 0110 011
+ TRCSSPCICR7 = 0x88bb, // 10 001 0001 0111 011
+ TRCPDCR = 0x88a4, // 10 001 0001 0100 100
+ TRCACVR0 = 0x8900, // 10 001 0010 0000 000
+ TRCACVR1 = 0x8910, // 10 001 0010 0010 000
+ TRCACVR2 = 0x8920, // 10 001 0010 0100 000
+ TRCACVR3 = 0x8930, // 10 001 0010 0110 000
+ TRCACVR4 = 0x8940, // 10 001 0010 1000 000
+ TRCACVR5 = 0x8950, // 10 001 0010 1010 000
+ TRCACVR6 = 0x8960, // 10 001 0010 1100 000
+ TRCACVR7 = 0x8970, // 10 001 0010 1110 000
+ TRCACVR8 = 0x8901, // 10 001 0010 0000 001
+ TRCACVR9 = 0x8911, // 10 001 0010 0010 001
+ TRCACVR10 = 0x8921, // 10 001 0010 0100 001
+ TRCACVR11 = 0x8931, // 10 001 0010 0110 001
+ TRCACVR12 = 0x8941, // 10 001 0010 1000 001
+ TRCACVR13 = 0x8951, // 10 001 0010 1010 001
+ TRCACVR14 = 0x8961, // 10 001 0010 1100 001
+ TRCACVR15 = 0x8971, // 10 001 0010 1110 001
+ TRCACATR0 = 0x8902, // 10 001 0010 0000 010
+ TRCACATR1 = 0x8912, // 10 001 0010 0010 010
+ TRCACATR2 = 0x8922, // 10 001 0010 0100 010
+ TRCACATR3 = 0x8932, // 10 001 0010 0110 010
+ TRCACATR4 = 0x8942, // 10 001 0010 1000 010
+ TRCACATR5 = 0x8952, // 10 001 0010 1010 010
+ TRCACATR6 = 0x8962, // 10 001 0010 1100 010
+ TRCACATR7 = 0x8972, // 10 001 0010 1110 010
+ TRCACATR8 = 0x8903, // 10 001 0010 0000 011
+ TRCACATR9 = 0x8913, // 10 001 0010 0010 011
+ TRCACATR10 = 0x8923, // 10 001 0010 0100 011
+ TRCACATR11 = 0x8933, // 10 001 0010 0110 011
+ TRCACATR12 = 0x8943, // 10 001 0010 1000 011
+ TRCACATR13 = 0x8953, // 10 001 0010 1010 011
+ TRCACATR14 = 0x8963, // 10 001 0010 1100 011
+ TRCACATR15 = 0x8973, // 10 001 0010 1110 011
+ TRCDVCVR0 = 0x8904, // 10 001 0010 0000 100
+ TRCDVCVR1 = 0x8924, // 10 001 0010 0100 100
+ TRCDVCVR2 = 0x8944, // 10 001 0010 1000 100
+ TRCDVCVR3 = 0x8964, // 10 001 0010 1100 100
+ TRCDVCVR4 = 0x8905, // 10 001 0010 0000 101
+ TRCDVCVR5 = 0x8925, // 10 001 0010 0100 101
+ TRCDVCVR6 = 0x8945, // 10 001 0010 1000 101
+ TRCDVCVR7 = 0x8965, // 10 001 0010 1100 101
+ TRCDVCMR0 = 0x8906, // 10 001 0010 0000 110
+ TRCDVCMR1 = 0x8926, // 10 001 0010 0100 110
+ TRCDVCMR2 = 0x8946, // 10 001 0010 1000 110
+ TRCDVCMR3 = 0x8966, // 10 001 0010 1100 110
+ TRCDVCMR4 = 0x8907, // 10 001 0010 0000 111
+ TRCDVCMR5 = 0x8927, // 10 001 0010 0100 111
+ TRCDVCMR6 = 0x8947, // 10 001 0010 1000 111
+ TRCDVCMR7 = 0x8967, // 10 001 0010 1100 111
+ TRCCIDCVR0 = 0x8980, // 10 001 0011 0000 000
+ TRCCIDCVR1 = 0x8990, // 10 001 0011 0010 000
+ TRCCIDCVR2 = 0x89a0, // 10 001 0011 0100 000
+ TRCCIDCVR3 = 0x89b0, // 10 001 0011 0110 000
+ TRCCIDCVR4 = 0x89c0, // 10 001 0011 1000 000
+ TRCCIDCVR5 = 0x89d0, // 10 001 0011 1010 000
+ TRCCIDCVR6 = 0x89e0, // 10 001 0011 1100 000
+ TRCCIDCVR7 = 0x89f0, // 10 001 0011 1110 000
+ TRCVMIDCVR0 = 0x8981, // 10 001 0011 0000 001
+ TRCVMIDCVR1 = 0x8991, // 10 001 0011 0010 001
+ TRCVMIDCVR2 = 0x89a1, // 10 001 0011 0100 001
+ TRCVMIDCVR3 = 0x89b1, // 10 001 0011 0110 001
+ TRCVMIDCVR4 = 0x89c1, // 10 001 0011 1000 001
+ TRCVMIDCVR5 = 0x89d1, // 10 001 0011 1010 001
+ TRCVMIDCVR6 = 0x89e1, // 10 001 0011 1100 001
+ TRCVMIDCVR7 = 0x89f1, // 10 001 0011 1110 001
+ TRCCIDCCTLR0 = 0x8982, // 10 001 0011 0000 010
+ TRCCIDCCTLR1 = 0x898a, // 10 001 0011 0001 010
+ TRCVMIDCCTLR0 = 0x8992, // 10 001 0011 0010 010
+ TRCVMIDCCTLR1 = 0x899a, // 10 001 0011 0011 010
+ TRCITCTRL = 0x8b84, // 10 001 0111 0000 100
+ TRCCLAIMSET = 0x8bc6, // 10 001 0111 1000 110
+ TRCCLAIMCLR = 0x8bce, // 10 001 0111 1001 110
+
+ // GICv3 registers
+ ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011
+ ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011
+ ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000
+ ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100
+ ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100
+ ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101
+ ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101
+ ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101
+ ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110
+ ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111
+ ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111
+ ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000
+ ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100
+ ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101
+ ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110
+ ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111
+ ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000
+ ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001
+ ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010
+ ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011
+ ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000
+ ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001
+ ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010
+ ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011
+ ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000
+ ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001
+ ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010
+ ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011
+ ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000
+ ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010
+ ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111
+ ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100
+ ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000
+ ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001
+ ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010
+ ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011
+ ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100
+ ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101
+ ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110
+ ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111
+ ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000
+ ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001
+ ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010
+ ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011
+ ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100
+ ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
+ ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
+ ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111
+ };
+
+ // Note that these do not inherit from NamedImmMapper. This class is
+ // sufficiently different in its behaviour that I don't believe it's worth
+ // burdening the common NamedImmMapper with abstractions only needed in
+ // this one case.
+ struct SysRegMapper {
+ static const NamedImmMapper::Mapping SysRegPairs[];
+
+ const NamedImmMapper::Mapping *InstPairs;
+ size_t NumInstPairs;
+
+ SysRegMapper() {}
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+ std::string toString(uint32_t Bits, bool &Valid) const;
+ };
+
+ struct MSRMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MSRPairs[];
+ MSRMapper();
+ };
+
+ struct MRSMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MRSPairs[];
+ MRSMapper();
+ };
+
+ uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+ enum TLBIValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ IPAS2E1IS = 0x6401, // 01 100 1000 0000 001
+ IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101
+ VMALLE1IS = 0x4418, // 01 000 1000 0011 000
+ ALLE2IS = 0x6418, // 01 100 1000 0011 000
+ ALLE3IS = 0x7418, // 01 110 1000 0011 000
+ VAE1IS = 0x4419, // 01 000 1000 0011 001
+ VAE2IS = 0x6419, // 01 100 1000 0011 001
+ VAE3IS = 0x7419, // 01 110 1000 0011 001
+ ASIDE1IS = 0x441a, // 01 000 1000 0011 010
+ VAAE1IS = 0x441b, // 01 000 1000 0011 011
+ ALLE1IS = 0x641c, // 01 100 1000 0011 100
+ VALE1IS = 0x441d, // 01 000 1000 0011 101
+ VALE2IS = 0x641d, // 01 100 1000 0011 101
+ VALE3IS = 0x741d, // 01 110 1000 0011 101
+ VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110
+ VAALE1IS = 0x441f, // 01 000 1000 0011 111
+ IPAS2E1 = 0x6421, // 01 100 1000 0100 001
+ IPAS2LE1 = 0x6425, // 01 100 1000 0100 101
+ VMALLE1 = 0x4438, // 01 000 1000 0111 000
+ ALLE2 = 0x6438, // 01 100 1000 0111 000
+ ALLE3 = 0x7438, // 01 110 1000 0111 000
+ VAE1 = 0x4439, // 01 000 1000 0111 001
+ VAE2 = 0x6439, // 01 100 1000 0111 001
+ VAE3 = 0x7439, // 01 110 1000 0111 001
+ ASIDE1 = 0x443a, // 01 000 1000 0111 010
+ VAAE1 = 0x443b, // 01 000 1000 0111 011
+ ALLE1 = 0x643c, // 01 100 1000 0111 100
+ VALE1 = 0x443d, // 01 000 1000 0111 101
+ VALE2 = 0x643d, // 01 100 1000 0111 101
+ VALE3 = 0x743d, // 01 110 1000 0111 101
+ VMALLS12E1 = 0x643e, // 01 100 1000 0111 110
+ VAALE1 = 0x443f // 01 000 1000 0111 111
+ };
+
+ struct TLBIMapper : NamedImmMapper {
+ const static Mapping TLBIPairs[];
+
+ TLBIMapper();
+ };
+
+ static inline bool NeedsRegister(TLBIValues Val) {
+ switch (Val) {
+ case VMALLE1IS:
+ case ALLE2IS:
+ case ALLE3IS:
+ case ALLE1IS:
+ case VMALLS12E1IS:
+ case VMALLE1:
+ case ALLE2:
+ case ALLE3:
+ case ALLE1:
+ case VMALLS12E1:
+ return false;
+ default:
+ return true;
+ }
+ }
+}
+
+namespace AArch64II {
+
+ enum TOF {
+ //===--------------------------------------------------------------===//
+ // AArch64 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ // MO_GOT - Represents a relocation referring to the GOT entry of a given
+ // symbol. Used in adrp.
+ MO_GOT,
+
+ // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+ // GOT entry of a given symbol. Used in ldr only.
+ MO_GOT_LO12,
+
+ // MO_DTPREL_* - Represents a relocation referring to the offset from a
+ // module's dynamic thread pointer. Used in the local-dynamic TLS access
+ // model.
+ MO_DTPREL_G1,
+ MO_DTPREL_G0_NC,
+
+ // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+ // providing the offset of a variable from the thread-pointer. Used in
+ // initial-exec TLS model where this offset is assigned in the static thread
+ // block and thus known by the dynamic linker.
+ MO_GOTTPREL,
+ MO_GOTTPREL_LO12,
+
+ // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+ // a TLS descriptor chosen by the dynamic linker. Used for the
+ // general-dynamic and local-dynamic TLS access models where very littls is
+ // known at link-time.
+ MO_TLSDESC,
+ MO_TLSDESC_LO12,
+
+ // MO_TPREL_* - Represents a relocation referring to the offset of a
+ // variable from the thread pointer itself. Used in the local-exec TLS
+ // access model.
+ MO_TPREL_G1,
+ MO_TPREL_G0_NC,
+
+ // MO_LO12 - On a symbol operand, this represents a relocation containing
+ // lower 12 bits of the address. Used in add/sub/ldr/str.
+ MO_LO12
+ };
+}
+
+class APFloat;
+
+namespace A64Imms {
+ bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+ inline bool isFPImm(const APFloat &Val) {
+ uint32_t Imm8;
+ return isFPImm(Val, Imm8);
+ }
+
+ bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+ bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+ bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+ bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+ // We sometimes want to know whether the immediate is representable with a
+ // MOVN but *not* with a MOVZ (because that would take priority).
+ bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 000000000000..f0d4dbe2bfb3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 000000000000..80e5f37eb086
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,56 @@
+//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class ARMAsmPrinter;
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createA15SDOptimizerPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalBaseRegPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 000000000000..68380847a022
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,320 @@
+//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+ "Thumb mode">;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+ "AvoidCPSRPartialUpdate", "true",
+ "Avoid CPSR partial update for OOO execution">;
+
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with shifter operand">;
+
+// Some processors perform return stack prediction. CodeGen should avoid issue
+// "normal" call instructions to callees which do not return.
+def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Has return address stack">;
+
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+ "Supports v7 DSP instructions in Thumb2">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
+
+// M-series ISA?
+def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+ "Is microcontroller profile ('M' series)">;
+
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
+
+// ARM ISAs.
+def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
+ "Support ARM v4T instructions">;
+def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
+ "Support ARM v5T instructions",
+ [HasV4TOps]>;
+def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+ "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ [HasV5TOps]>;
+def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
+ "Support ARM v6 instructions",
+ [HasV5TEOps]>;
+def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+ "Support ARM v6t2 instructions",
+ [HasV6Ops, FeatureThumb2]>;
+def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
+ "Support ARM v7 instructions",
+ [HasV6T2Ops]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+ "Cortex-A5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors",
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors",
+ [FeatureNEONForFP, FeatureT2XtPk,
+ FeatureVFP4, FeatureMP, FeatureHWDiv,
+ FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
+
+// FIXME: It has not been determined if A15 has these features.
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors",
+ [FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
+def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+ "Cortex-R5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic", []>;
+def : ProcNoItin<"arm8", []>;
+def : ProcNoItin<"arm810", []>;
+def : ProcNoItin<"strongarm", []>;
+def : ProcNoItin<"strongarm110", []>;
+def : ProcNoItin<"strongarm1100", []>;
+def : ProcNoItin<"strongarm1110", []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
+def : ProcNoItin<"arm710t", [HasV4TOps]>;
+def : ProcNoItin<"arm720t", [HasV4TOps]>;
+def : ProcNoItin<"arm9", [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm920", [HasV4TOps]>;
+def : ProcNoItin<"arm920t", [HasV4TOps]>;
+def : ProcNoItin<"arm922t", [HasV4TOps]>;
+def : ProcNoItin<"arm940t", [HasV4TOps]>;
+def : ProcNoItin<"ep9312", [HasV4TOps]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
+def : ProcNoItin<"arm1020t", [HasV5TOps]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e", [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm10e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
+def : ProcNoItin<"xscale", [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+ FeatureDB, FeatureMClass]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
+ FeatureDSPThumb2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx,
+ FeatureDSPThumb2]>;
+
+// V7a Processors.
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5", CortexA8Model,
+ [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureVFP4, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+def : ProcessorModel<"cortex-a8", CortexA8Model,
+ [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS]>;
+def : ProcessorModel<"cortex-a9", CortexA9Model,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS]>;
+def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureMP,
+ FeatureHasRAS]>;
+// FIXME: A15 has currently the same ProcessorModel as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model,
+ [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS]>;
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5", CortexA8Model,
+ [ProcR5, HasV7Ops, FeatureDB,
+ FeatureVFP3, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureMClass]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureDSPThumb2,
+ FeatureT2XtPk, FeatureVFP4,
+ FeatureVFPOnlySP, FeatureMClass]>;
+
+// Swift uArch Processors.
+def : ProcessorModel<"swift", SwiftModel,
+ [ProcSwift, HasV7Ops, FeatureNEON,
+ FeatureDB, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+
+ let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 000000000000..13ec2087938a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1945 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMAsmPrinter.h"
+#include "ARM.h"
+#include "ARMBuildAttrs.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ // Per section and per symbol attributes are not supported.
+ // To implement them we would need the ability to delay this emission
+ // until the assembly file is fully parsed/generated as only then do we
+ // know the symbol and section numbers.
+ class AttributeEmitter {
+ public:
+ virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+ virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+ virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+ virtual void Finish() = 0;
+ virtual ~AttributeEmitter() {}
+ };
+
+ class AsmAttributeEmitter : public AttributeEmitter {
+ MCStreamer &Streamer;
+
+ public:
+ AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+ void MaybeSwitchVendor(StringRef Vendor) { }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ Streamer.EmitRawText("\t.eabi_attribute " +
+ Twine(Attribute) + ", " + Twine(Value));
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
+ case ARMBuildAttrs::CPU_name:
+ Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
+ break;
+ /* GAS requires .fpu to be emitted regardless of EABI attribute */
+ case ARMBuildAttrs::Advanced_SIMD_arch:
+ case ARMBuildAttrs::VFP_arch:
+ Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
+ break;
+ }
+ }
+ void Finish() { }
+ };
+
+ class ObjectAttributeEmitter : public AttributeEmitter {
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItemType {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+ } AttributeItem;
+
+ MCObjectStreamer &Streamer;
+ StringRef CurrentVendor;
+ SmallVector<AttributeItemType, 64> Contents;
+
+ // Account for the ULEB/String size of each item,
+ // not just the number of items
+ size_t ContentsSize;
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
+
+ public:
+ ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+ Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
+
+ void MaybeSwitchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor.empty())
+ CurrentVendor = Vendor;
+ else if (CurrentVendor == Vendor)
+ return;
+ else
+ Finish();
+
+ CurrentVendor = Vendor;
+
+ assert(Contents.size() == 0);
+ }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ AttributeItemType attr = {
+ AttributeItemType::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ ContentsSize += getULEBSize(Attribute);
+ ContentsSize += getULEBSize(Value);
+ Contents.push_back(attr);
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ AttributeItemType attr = {
+ AttributeItemType::TextAttribute,
+ Attribute,
+ 0,
+ String
+ };
+ ContentsSize += getULEBSize(Attribute);
+ // String + \0
+ ContentsSize += String.size()+1;
+
+ Contents.push_back(attr);
+ }
+
+ void Finish() {
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (unsigned int i=0; i<Contents.size(); ++i) {
+ AttributeItemType item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
+ case AttributeItemType::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeItemType::TextAttribute:
+ Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+ }
+ };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+ AsmPrinter::EmitDwarfRegOp(MLoc);
+ else {
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ bool odd = SReg & 0x1;
+ unsigned Rx = 256 + (SReg >> 1);
+
+ OutStreamer.AddComment("DW_OP_regx for S register");
+ EmitInt8(dwarf::DW_OP_regx);
+
+ OutStreamer.AddComment(Twine(SReg));
+ EmitULEB128(Rx);
+
+ if (odd) {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(32);
+ } else {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(0);
+ }
+ } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
+ // DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D1);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D2);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+ }
+ }
+}
+
+void ARMAsmPrinter::EmitFunctionBodyEnd() {
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ if (AFI->isThumbFunction()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer.EmitThumbFunc(CurrentFnSym);
+ }
+
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ assert(Size && "C++ constructor pointer had zero size!");
+
+ const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
+ assert(GV && "C++ constructor pointer was not a GlobalValue!");
+
+ const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ (Subtarget->isTargetDarwin()
+ ? MCSymbolRefExpr::VK_None
+ : MCSymbolRefExpr::VK_ARM_TARGET1),
+ OutContext);
+
+ OutStreamer.EmitValue(E, Size);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
+ switch (MO.getType()) {
+ default: llvm_unreachable("<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ if(ARM::GPRPairRegClass.contains(Reg)) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ Reg = TRI->getSubReg(Reg, ARM::gsub_0);
+ }
+ O << ARMInstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#';
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF == ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF == ARMII::MO_HI16))
+ O << ":upper16:";
+ O << Imm;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
+ O << *Mang->getSymbol(GV);
+
+ printOffset(MO.getOffset(), O);
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid << '_' << uid2;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+ << getFunctionNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ case 'a': // Print as a memory address.
+ if (MI->getOperand(OpNum).isReg()) {
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
+ return false;
+ }
+ // Fallthrough
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
+ printOperand(MI, OpNum, O);
+ return false;
+ case 'y': // Print a VFP single precision register as indexed double.
+ if (MI->getOperand(OpNum).isReg()) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ // Find the 'd' register that has this 's' register as a sub-register,
+ // and determine the lane number.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (!ARM::DPRRegClass.contains(*SR))
+ continue;
+ bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg;
+ O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]");
+ return false;
+ }
+ }
+ return true;
+ case 'B': // Bitwise inverse of integer or symbol without a preceding #.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << ~(MI->getOperand(OpNum).getImm());
+ return false;
+ case 'L': // The low 16 bits of an immediate constant.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << (MI->getOperand(OpNum).getImm() & 0xffff);
+ return false;
+ case 'M': { // A register range suitable for LDM/STM.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned RegBegin = MO.getReg();
+ // This takes advantage of the 2 operand-ness of ldm/stm and that we've
+ // already got the operands in registers that are operands to the
+ // inline asm statement.
+
+ O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
+
+ // FIXME: The register allocator not only may not have given us the
+ // registers in sequence, but may not be in ascending registers. This
+ // will require changes in the register allocator that'll need to be
+ // propagated down here if the operands change.
+ unsigned RegOps = OpNum + 1;
+ while (MI->getOperand(RegOps).isReg()) {
+ O << ", "
+ << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
+ RegOps++;
+ }
+
+ O << "}";
+
+ return false;
+ }
+ case 'R': // The most significant register of a pair.
+ case 'Q': { // The least significant register of a pair.
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumVals != 2)
+ return true;
+ unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+
+ case 'e': // The low doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // This modifier is not yet supported.
+ case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
+ return true;
+ case 'H': { // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned Reg = MO.getReg();
+ if(!ARM::GPRPairRegClass.contains(Reg))
+ return false;
+ Reg = TRI->getSubReg(Reg, ARM::gsub_1);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ case 'A': // A memory operand for a VLD1/VST1 instruction.
+ default: return true; // Unknown modifier.
+ case 'm': // The base register of a memory operand.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ O << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg());
+ return false;
+ }
+ }
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+ return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+ // Declare all the text sections up front (before the DWARF sections
+ // emitted by AsmPrinter::doInitialization) so the assembler will keep
+ // them together at the beginning of the object file. This helps
+ // avoid out-of-range branches that are due a fundamental limitation of
+ // the way symbol offsets are encoded with the current Darwin ARM
+ // relocations.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(
+ getObjFileLowering());
+
+ // Collect the set of sections our functions will go into.
+ SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
+ SmallPtrSet<const MCSection *, 8> > TextSections;
+ // Default text section comes first.
+ TextSections.insert(TLOFMacho.getTextSection());
+ // Now any user defined text sections from function attributes.
+ for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM));
+ // Now the coalescable sections.
+ TextSections.insert(TLOFMacho.getTextCoalSection());
+ TextSections.insert(TLOFMacho.getConstTextCoalSection());
+
+ // Emit the sections in the .s file header to fix the order.
+ for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
+ OutStreamer.SwitchSection(TextSections[i]);
+
+ if (RelocM == Reloc::DynamicNoPIC) {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 12, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ } else {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 16, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ }
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
+ }
+ }
+
+ // Use unified assembler syntax.
+ OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF())
+ emitAttributes();
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ // All darwin targets use mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output non-lazy-pointers for external and common global variables.
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+ emitARMAttributeSection();
+
+ /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+ bool emitFPU = false;
+ AttributeEmitter *AttrEmitter;
+ if (OutStreamer.hasRawTextSupport()) {
+ AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+ emitFPU = true;
+ } else {
+ MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+ AttrEmitter = new ObjectAttributeEmitter(O);
+ }
+
+ AttrEmitter->MaybeSwitchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString == "cortex-a8" ||
+ Subtarget->isCortexA8()) {
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ // Fixme: figure out when this is emitted.
+ //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+ // ARMBuildAttrs::AllowWMMXv1);
+ //
+
+ /// ADD additional Else-cases here!
+ } else if (CPUString == "xscale") {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (CPUString == "generic") {
+ // For a generic CPU, we assume a standard v7a architecture in Subtarget.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV7Ops()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV6T2Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ else if (Subtarget->hasV6Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ else if (Subtarget->hasV5TEOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ else if (Subtarget->hasV5TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ else if (Subtarget->hasV4TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+
+ if (Subtarget->hasNEON() && emitFPU) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasVFP4())
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ "neon-vfpv4");
+ else
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ /* If emitted for NEON, omit from VFP below, since you can have both
+ * NEON and VFP in build attributes but only one .fpu */
+ emitFPU = false;
+ }
+
+ /* VFPv4 + .fpu */
+ if (Subtarget->hasVFP4()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
+
+ /* VFPv3 + .fpu */
+ } else if (Subtarget->hasVFP3()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+ /* VFPv2 + .fpu */
+ } else if (Subtarget->hasVFP2()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ }
+
+ /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+ * since NEON can have 1 (allowed) or 2 (MAC operations) */
+ if (Subtarget->hasNEON()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::Allowed);
+ }
+
+ // Signal various FP modes.
+ if (!TM.Options.UnsafeFPMath) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ // FIXME: add more flags to ARMBuildAttrs.h
+ // 8-bytes alignment stuff.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+ }
+ // FIXME: Should we signal R9 usage?
+
+ if (Subtarget->hasDivide())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+ AttrEmitter->Finish();
+ delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (OutStreamer.hasRawTextSupport())
+ return;
+
+ const ARMElfTargetObjectFile &TLOFELF =
+ static_cast<const ARMElfTargetObjectFile &>
+ (getObjFileLowering());
+
+ OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+ // Format version
+ OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+ case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
+ case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
+ }
+ llvm_unreachable("Invalid ARMCPModifier!");
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+ bool isIndirect = Subtarget->isTargetDarwin() &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ if (!isIndirect)
+ return Mang->getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+ MMIMachO.getGVStubEntry(MCSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+ MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ } else if (ACPV->isBlockAddress()) {
+ const BlockAddress *BA =
+ cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
+ MCSym = GetARMGVSymbol(GV);
+ } else if (ACPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
+ MCSym = MBB->getSymbol();
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ ACPV->getLabelId(),
+ OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::CreateAdd(PCRelExpr,
+ MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = 1;
+ if (Opcode == ARM::BR_JTadd)
+ OpNum = 2;
+ else if (Opcode == ARM::BR_JTm)
+ OpNum = 3;
+
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ OutContext),
+ OutContext);
+ // If we're generating a table of Thumb addresses in static relocation
+ // model, we need to add one to keep interworking correctly.
+ else if (AFI->isThumbFunction())
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, 4);
+ }
+ // Mark the end of jump table data-in-code region.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ unsigned OffsetWidth = 4;
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
+ OffsetWidth = 1;
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT8);
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
+ OffsetWidth = 2;
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT16);
+ }
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ if (OffsetWidth == 4) {
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B)
+ .addExpr(MBBSymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ continue;
+ }
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - LJTI_0_0) / 2
+ // .byte (LBB1 - LJTI_0_0) / 2
+ const MCExpr *Expr =
+ MCBinaryExpr::CreateSub(MBBSymbolExpr,
+ MCSymbolRefExpr::Create(JTISymbol, OutContext),
+ OutContext);
+ Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, OffsetWidth);
+ }
+ // Mark the end of jump table data-in-code region. 32-bit offsets use
+ // actual branch instructions here, so we don't mark those as a data-region
+ // at all.
+ if (OffsetWidth != 4)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+ assert(MI->getFlag(MachineInstr::FrameSetup) &&
+ "Only instruction which are involved into frame setup code are allowed");
+
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Opc = MI->getOpcode();
+ unsigned SrcReg, DstReg;
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+ // Two special cases:
+ // 1) tPUSH does not have src/dst regs.
+ // 2) for Thumb1 code we sometimes materialize the constant via constpool
+ // load. Yes, this is pretty fragile, but for now I don't see better
+ // way... :(
+ SrcReg = DstReg = ARM::SP;
+ } else {
+ SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI->getOperand(0).getReg();
+ }
+
+ // Try to figure out the unwinding opcode out of src / dst regs.
+ if (MI->mayStore()) {
+ // Register saves.
+ assert(DstReg == ARM::SP &&
+ "Only stack pointer as a destination reg is supported");
+
+ SmallVector<unsigned, 4> RegList;
+ // Skip src & dst reg, and pred ops.
+ unsigned StartOp = 2 + 2;
+ // Use all the operands.
+ unsigned NumOffset = 0;
+
+ switch (Opc) {
+ default:
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ case ARM::tPUSH:
+ // Special case here: no src & dst reg, but two extra imp ops.
+ StartOp = 2; NumOffset = 2;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ assert(SrcReg == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
+ break;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::t2STR_PRE:
+ assert(MI->getOperand(2).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ RegList.push_back(SrcReg);
+ break;
+ }
+ OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ } else {
+ // Changes of stack / frame pointer.
+ if (SrcReg == ARM::SP) {
+ int64_t Offset = 0;
+ switch (Opc) {
+ default:
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ case ARM::MOVr:
+ case ARM::tMOVr:
+ Offset = 0;
+ break;
+ case ARM::ADDri:
+ Offset = -MI->getOperand(2).getImm();
+ break;
+ case ARM::SUBri:
+ case ARM::t2SUBri:
+ Offset = MI->getOperand(2).getImm();
+ break;
+ case ARM::tSUBspi:
+ Offset = MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tADDspi:
+ case ARM::tADDrSPi:
+ Offset = -MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI.getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ // FIXME: Check for user, it should be "add" instruction!
+ Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ break;
+ }
+ }
+
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ OutStreamer.EmitPad(Offset);
+ } else {
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
+ } else if (DstReg == ARM::SP) {
+ // FIXME: .movsp goes here
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
+ else {
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
+ }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag setting opcode should be expanded early");
+
+ // Check for manual lowerings.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCSymbol *JTIPICSymbol =
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
+ return;
+ }
+ // Darwin call instructions are just normal call instructions with different
+ // clobber semantics (they clobber R9).
+ case ARM::BX_CALL: {
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ case ARM::tBX_CALL: {
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::BMOVPCRX_CALL: {
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+ return;
+ }
+ case ARM::BMOVPCB_CALL: {
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
+ .addExpr(GVSymExpr)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg())
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+ return;
+ }
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ OutStreamer.EmitInstruction(MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
+ /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+ /// in the function. The first operand is the ID# for this instruction, the
+ /// second is the index into the MachineConstantPool that this is, the third
+ /// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ return;
+ }
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::t2TBB_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+ return;
+ }
+ case ARM::t2TBH_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Make sure the Thumb jump table is 4-byte aligned.
+ if (Opc == ARM::tMOVr)
+ EmitAlignment(2);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::TRAPNaCl: {
+ //.long 0xe7fedef0 @ trap
+ uint32_t Val = 0xe7fedef0UL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b 1f
+ // movs r0, #1
+ // 1:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = GetARMSJLJEHLabel();
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3)
+ .addReg(ValReg)
+ // 's' bit operand
+ .addReg(ARM::CPSR)
+ .addReg(ValReg)
+ .addImm(7)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB)
+ .addExpr(SymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitLabel(Label);
+ return;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ .addImm(8)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ .addImm(4)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+ return;
+ }
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(4)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ .addImm(2)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+ RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+ RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 000000000000..c945e4f28699
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,132 @@
+//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const LLVM_OVERRIDE {
+ return "ARM Assembly / Object Emitter";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) LLVM_OVERRIDE;
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) LLVM_OVERRIDE;
+
+ void EmitJumpTable(const MachineInstr *MI);
+ void EmitJump2Table(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+ virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
+
+ virtual void EmitConstantPool() LLVM_OVERRIDE {
+ // we emit constant pools customly!
+ }
+ virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE;
+ virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE;
+ virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE;
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Helper for ELF .o only
+ void emitARMAttributeSection();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+ void EmitUnwindingInstruction(const MachineInstr *MI);
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+public:
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ virtual MachineLocation
+ getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
+
+ /// EmitDwarfRegOp - Emit dwarf register operation.
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE;
+
+ virtual unsigned getISAEncoding() LLVM_OVERRIDE {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ }
+
+private:
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+ MCSymbol *GetARMSJLJEHLabel() const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+public:
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void
+ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 000000000000..9e68ff44890e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,4137 @@
+//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ uint16_t MLxOpc; // MLA / MLS opcode
+ uint16_t MulOpc; // Expanded multiplication opcode
+ uint16_t AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+ : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)
+ new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ // FIXME: Thumb2 support.
+
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ bool isLoad = !MI->mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default: llvm_unreachable("Unknown indexed op!");
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ if (ARM_AM::getSOImmVal(Amt) == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+ isIndirectBranchOpcode(SecondLastOpc)) &&
+ isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+ int BOpc = !AFI->isThumbFunction()
+ ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+ int BccOpc = !AFI->isThumbFunction()
+ ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (isUncondBranchOpcode(Opc)) {
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
+ (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ if (!MI->isPredicable())
+ return false;
+
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ return AFI->isThumb2Function();
+ }
+ return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ assert(JTI < JT.size());
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = MCID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+}
+
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
+ }
+
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc = 0;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq;
+
+ if (Opc) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(MIB);
+ return;
+ }
+
+ // Handle register classes that require multiple instructions.
+ unsigned BeginIdx = 0;
+ unsigned SubRegs = 0;
+ int Spacing = 1;
+
+ // Use VORRq when possible.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ // Fall back to VMOVD.
+ else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
+ else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
+ else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+ else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
+
+ else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
+ else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
+ else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+
+ assert(Opc && "Impossible reg-to-reg copy");
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstrBuilder Mov;
+
+ // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ Spacing = -Spacing;
+ }
+#ifndef NDEBUG
+ SmallSet<unsigned, 4> DstRegs;
+#endif
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+#ifndef NDEBUG
+ assert(!DstRegs.count(Src) && "destructive vector copy");
+ DstRegs.insert(Dst);
+#endif
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src);
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
+ Mov = AddDefaultPred(Mov);
+ }
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown reg class!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSTRspi:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tLDRspi:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+}
+
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MIB);
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MIB.addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+ MachineConstantPool *MCP = MF.getConstantPool();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
+ if (ACPV->isGlobalValue())
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+ MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
+ switch(Orig->getOpcode()) {
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ Orig->getOperand(1).setIndex(CPI);
+ Orig->getOperand(2).setImm(PCLabelId);
+ break;
+ }
+ }
+ return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ if (Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+ bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+ if (isARMCP0 && isARMCP1) {
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (!isARMCP0 && !isARMCP1) {
+ return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+ }
+ return false;
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
+ }
+
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ // Calls don't actually change the stack pointer, even if they have imp-defs.
+ // No ARM calling conventions change the stack pointer. (X86 calling
+ // conventions sometimes do).
+ if (!MI->isCall() && MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ if (!NumCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+ UnpredCost /= Probability.getDenominator();
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (NumCycles + ExtraPredCycles) <= UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ const BranchProbability &Probability) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+ TUnpredCost /= Probability.getDenominator();
+
+ uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+ unsigned FUnpredCost = Comp * FCycles;
+ FUnpredCost /= Probability.getDenominator();
+
+ unsigned UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+}
+
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+ if (Opc == ARM::B)
+ return ARM::Bcc;
+ if (Opc == ARM::tB)
+ return ARM::tBcc;
+ if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
+
+ llvm_unreachable("Unknown unconditional branch opcode!");
+}
+
+/// commuteInstruction - Handle commutable instructions.
+MachineInstr *
+ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case ARM::MOVCCr:
+ case ARM::t2MOVCCr: {
+ // MOVCC can be commuted by inverting the condition.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+ // MOVCC AL can't be inverted. Shouldn't happen.
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return NULL;
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ if (!MI)
+ return NULL;
+ // After swapping the MOVCC operands, also invert the condition.
+ MI->getOperand(MI->findFirstPredOperandIdx())
+ .setImm(ARMCC::getOppositeCondition(CC));
+ return MI;
+ }
+ }
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+}
+
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the defining instruction.
+static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return 0;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return 0;
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return 0;
+ // MI is folded into the MOVCC by predicating it.
+ if (!MI->isPredicable())
+ return 0;
+ // Check if MI has any non-dead defs or physreg uses. This also detects
+ // predicated instructions which will be reading CPSR.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return 0;
+ if (!MO.isReg())
+ continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return 0;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ return 0;
+ if (MO.isDef() && !MO.isDead())
+ return 0;
+ }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
+ return 0;
+ return MI;
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ // MOVCC operands:
+ // 0: Def.
+ // 1: True use.
+ // 2: False use.
+ // 3: Condition code.
+ // 4: CPSR use.
+ TrueOp = 1;
+ FalseOp = 2;
+ Cond.push_back(MI->getOperand(3));
+ Cond.push_back(MI->getOperand(4));
+ // We can always fold a def.
+ Optimizable = true;
+ return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ bool PreferFalse) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ if (!DefMI)
+ return 0;
+
+ // Create a new predicated version of DefMI.
+ // Rfalse is the first use.
+ MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ DefMI->getDesc(),
+ MI->getOperand(0).getReg());
+
+ // Copy all the DefMI operands, excluding its (null) predicate.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands();
+ i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+ NewMI.addOperand(DefMI->getOperand(i));
+
+ unsigned CondCode = MI->getOperand(3).getImm();
+ if (Invert)
+ NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+ else
+ NewMI.addImm(CondCode);
+ NewMI.addOperand(MI->getOperand(4));
+
+ // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+ if (NewMI->hasOptionalDef())
+ AddDefaultCC(NewMI);
+
+ // The output register value when the predicate is false is an implicit
+ // register operand tied to the first def.
+ // The tie makes the register allocator ensure the FalseReg is allocated the
+ // same register as operand 0.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ FalseReg.setImplicit();
+ NewMI.addOperand(FalseReg);
+ NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ uint16_t PseudoOpc;
+ uint16_t MachineOpc;
+};
+
+static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+ if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
+ return AddSubFlagsOpcodeMap[i].MachineOpc;
+ return 0;
+}
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ BaseReg = DestReg;
+ }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(FrameRegIdx+1);
+ Offset = 0;
+ return true;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getSOImmVal(Offset) != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+ // as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Can't fold any offset even if it's zero.
+ return false;
+ case ARMII::AddrMode5: {
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Attempt to fold address comp. if opcode has offset bits
+ if (NumBits > 0) {
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool ARMBaseInstrInfo::
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::t2CMPri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::CMPrr:
+ case ARM::t2CMPrr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+ isPredicated(PotentialAND))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ // There are two possible candidates which can be changed to set CPSR:
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+ MachineInstr *Sub = NULL;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = NULL;
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ // Conservatively refuse to convert an instruction which isn't in the same
+ // BB as the comparison.
+ // For CMPri, we need to check Sub, thus we can't return here.
+ if (CmpInstr->getOpcode() == ARM::CMPri ||
+ CmpInstr->getOpcode() == ARM::t2CMPri)
+ MI = NULL;
+ else
+ return false;
+ }
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ Sub = &*I;
+ break;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
+ // We can't use a predicated instruction - it doesn't always write the flags.
+ if (isPredicated(MI))
+ return false;
+
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri: {
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code requires
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr->getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR.
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
+ }
+ else
+ switch (CC) {
+ default:
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
+ }
+ }
+
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
+
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (DefMCID.hasOptionalDef()) {
+ unsigned NumOps = DefMCID.getNumOperands();
+ const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+ if (MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If DefMI defines CPSR and it is not dead, it's obviously not safe
+ // to delete DefMI.
+ return false;
+ }
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ if (UseMCID.hasOptionalDef()) {
+ unsigned NumOps = UseMCID.getNumOperands();
+ if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+ // If the instruction sets the flag, do not attempt this optimization
+ // since it may change the semantics of the code.
+ return false;
+ }
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
+// Return the number of 32-bit words loaded by LDM or stored by STM. If this
+// can't be easily determined return 0 (missing MachineMemOperand).
+//
+// FIXME: The current MachineInstr design does not support relying on machine
+// mem operands to determine the width of a memory access. Instead, we expect
+// the target to provide this information based on the instruction opcode and
+// operands. However, using MachineMemOperand is a the best solution now for
+// two reasons:
+//
+// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
+// operands. This is much more dangerous than using the MachineMemOperand
+// sizes because CodeGen passes can insert/remove optional machine operands. In
+// fact, it's totally incorrect for preRA passes and appears to be wrong for
+// postRA passes as well.
+//
+// 2) getNumLDMAddresses is only used by the scheduling machine model and any
+// machine model that calls this should handle the unknown (zero size) case.
+//
+// Long term, we should require a target hook that verifies MachineMemOperand
+// sizes during MC lowering. That target hook should be local to MC lowering
+// because we can't ensure that it is aware of other MI forms. Doing this will
+// ensure that MachineMemOperands are correctly propagated through all passes.
+unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ Size += (*I)->getSize();
+ }
+ return Size / 4;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
+ return ItinUOps;
+ }
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isSwift()) {
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ int A8UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++A8UOps;
+ return A8UOps;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ int A9UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++A9UOps;
+ return A9UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseMCID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefMCID.getSchedClass();
+ unsigned UseClass = UseMCID.getSchedClass();
+
+ if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefMCID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseMCID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
+/// Return the number of cycles to add to (or subtract from) the static
+/// itinerary based on the def opcode and alignment. The caller will ensure that
+/// adjusted latency is at least one cycle.
+static int adjustDefLatency(const ARMSubtarget &Subtarget,
+ const MachineInstr *DefMI,
+ const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ int Adjust = 0;
+ if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Adjust;
+ break;
+ }
+ }
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isLikeA9()) {
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Adjust;
+ break;
+ }
+ }
+ return Adjust;
+}
+
+
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ // No operand latency. The caller may fall back to getInstrLatency.
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ DefMCID = &DefMI->getDesc();
+ }
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ return 1;
+ }
+
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+
+ if (Reg == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isLikeA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMI->isBranch())
+ return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ unsigned Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
+ }
+
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+
+ // Get the itinerary's latency if possible, and handle variable_ops.
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ // Unable to find operand latency. The caller may resort to getInstrLatency.
+ if (Latency < 0)
+ return Latency;
+
+ // Adjust for IT block position.
+ int Adj = DefAdj + UseAdj;
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ // Return the itinerary latency, which may be zero but not less than zero.
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefMCID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isLikeA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ // An instruction scheduler typically runs on unbundled instructions, however
+ // other passes may query the latency of a bundled instruction.
+ if (MI->isBundle()) {
+ unsigned Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ }
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
+ return MI->mayLoad() ? 3 : 1;
+
+ unsigned Class = MCID.getSchedClass();
+
+ // For instructions with variable uops, use uops as latency.
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
+ return getNumMicroOps(ItinData, MI);
+
+ // For the common case, fall back on the itinerary's latency.
+ unsigned Latency = ItinData->getStageLatency(Class);
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ unsigned DefAlign = MI->hasOneMemOperand()
+ ? (*MI->memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
+ /*FindMin=*/false);
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // CortexA9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS ||
+ MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
+ unsigned SReg, unsigned &Lane) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+
+ if (DReg != ARM::NoRegister)
+ return DReg;
+
+ Lane = 1;
+ DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
+
+ assert(DReg && "S-register with no D super-register?");
+ return DReg;
+}
+
+/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
+/// set ImplicitSReg to a register number that must be marked as implicit-use or
+/// zero if no register needs to be defined as implicit-use.
+///
+/// If the function cannot determine if an SPR should be marked implicit use or
+/// not, it returns false.
+///
+/// This function handles cases where an instruction is being modified from taking
+/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
+/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
+/// lane of the DPR).
+///
+/// If the other SPR is defined, an implicit-use of it should be added. Else,
+/// (including the case where the DPR itself is defined), it should not.
+///
+static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
+ MachineInstr *MI,
+ unsigned DReg, unsigned Lane,
+ unsigned &ImplicitSReg) {
+ // If the DPR is defined or used already, the other SPR lane will be chained
+ // correctly, so there is nothing to be done.
+ if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
+ ImplicitSReg = 0;
+ return true;
+ }
+
+ // Otherwise we need to go searching to see if the SPR is set explicitly.
+ ImplicitSReg = TRI->getSubReg(DReg,
+ (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
+ MachineBasicBlock::LivenessQueryResult LQR =
+ MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
+
+ if (LQR == MachineBasicBlock::LQR_Live)
+ return true;
+ else if (LQR == MachineBasicBlock::LQR_Unknown)
+ return false;
+
+ // If the register is known not to be live, there is no need to add an
+ // implicit-use.
+ ImplicitSReg = 0;
+ return true;
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ unsigned DstReg, SrcReg, DReg;
+ unsigned Lane;
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
+ break;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VORRd));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
+ MI->setDesc(get(ARM::VGETLNi32));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
+
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ case ARM::VMOVSR: {
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
+ MI->setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
+
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
+
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
+ AddDefaultPred(MIB);
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+
+ // On the first instruction, both DSrc and DDst may be <undef> if present.
+ // Specifically when the original instruction didn't have them as an
+ // <imp-use>.
+ unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
+ bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+
+ // On the second instruction, DDst has definitely been defined above, so
+ // it is not <undef>. DSrc, if present, can be <undef> as above.
+ CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ MIB.addImm(1);
+ AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ }
+
+}
+
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (!SwiftPartialUpdateClearance ||
+ !(Subtarget.isSwift() || Subtarget.isCortexA15()))
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 3;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
+ }
+
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
+}
+
+bool ARMBaseInstrInfo::hasNOP() const {
+ return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+}
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+ // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+ if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+ ((ShImm == 1 || ShImm == 2) &&
+ ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 000000000000..7c107bb41951
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,419 @@
+//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
+
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
+ const ARMSubtarget &Subtarget;
+
+protected:
+ // Can be only subclassed.
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+ // Return whether the target has an explicit NOP encoding.
+ bool hasNOP() const;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const;
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+ MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+ /// should be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const {
+ return NumCycles == 1;
+ }
+
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ virtual bool analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const;
+
+ virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+
+ /// VFP/NEON execution domains.
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const;
+ /// Get the number of addresses by LDM or VLDM or zero for unknown.
+ unsigned getNumLDMAddresses(const MachineInstr *MI) const;
+
+private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+ /// verifyInstruction - Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
+
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isSwiftFastImmShift(const MachineInstr *MI) const;
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+ bool isDead = false) {
+ return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+ return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+ return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+ return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+ Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+ MachineInstr *&MI,
+ const MachineRegisterInfo &MRI);
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
+/// the instruction is encoded with an 'S' bit is determined by the optional
+/// CPSR def operand.
+unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = 0);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 000000000000..b6b27f849a23
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,740 @@
+//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseRegisterInfo.h"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+using namespace llvm;
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
+}
+
+const uint16_t*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
+}
+
+const uint32_t*
+ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+}
+
+const uint32_t*
+ARMBaseRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // FIXME: avoid re-calculating this every time.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
+ if (TFI->hasFP(MF))
+ Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ // Reserve D16-D31 if the subtarget doesn't support them.
+ if (!STI.hasVFP3() || STI.hasD16()) {
+ assert(ARM::D31 == ARM::D16 + 15);
+ for (unsigned i = 0; i != 16; ++i)
+ Reserved.set(ARM::D16 + i);
+ }
+ const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
+ for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
+ for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI)) Reserved.set(*I);
+
+ return Reserved;
+}
+
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+ do {
+ switch (Super->getID()) {
+ case ARM::GPRRegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::DPRRegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QQPRRegClassID:
+ case ARM::QQQQPRRegClassID:
+ case ARM::GPRPairRegClassID:
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::GPRRegClass;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &ARM::CCRRegClass)
+ return 0; // Can't copy CCR registers.
+ return RC;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+ for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+ if (ARM::GPRPairRegClass.contains(*Supers))
+ return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+ return 0;
+}
+
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ unsigned Odd;
+ switch (Hint.first) {
+ case ARMRI::RegPairEven:
+ Odd = 0;
+ break;
+ case ARMRI::RegPairOdd:
+ Odd = 1;
+ break;
+ default:
+ TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ return;
+ }
+
+ // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+ // Check if the other part of the pair has already been assigned, and provide
+ // the paired register as the first hint.
+ unsigned PairedPhys = 0;
+ if (VRM && VRM->hasPhys(Hint.second)) {
+ PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
+ if (PairedPhys && MRI.isReserved(PairedPhys))
+ PairedPhys = 0;
+ }
+
+ // First prefer the paired physreg.
+ if (PairedPhys &&
+ std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+ Hints.push_back(PairedPhys);
+
+ // Then prefer even or odd registers.
+ for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+ unsigned Reg = Order[I];
+ if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+ continue;
+ // Don't provide hints that are paired to a reserved register.
+ unsigned Paired = getPairedGPR(Reg, !Odd, this);
+ if (!Paired || MRI.isReserved(Paired))
+ continue;
+ Hints.push_back(Reg);
+ }
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ if (Hint.second == Reg)
+ // Make sure the pair has not already divorced.
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ }
+}
+
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ // CortexA9 has a Write-after-write hazard for NEON registers.
+ if (!STI.isLikeA9())
+ return false;
+
+ switch (RC->getID()) {
+ case ARM::DPRRegClassID:
+ case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::SPR_8RegClassID:
+ // Avoid reusing S, D, and Q registers.
+ // Don't increase register pressure for QQ and QQQQ.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // When outgoing call frames are so large that we adjust the stack pointer
+ // around the call, we can no longer use the stack pointer to reach the
+ // emergency spill slot.
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(FramePtr))
+ return false;
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+ return true;
+ // A base pointer is required and allowed. Check that it isn't too late to
+ // reserve it.
+ return MRI->canReserveReg(BasePtr);
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return FramePtr;
+ return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C =
+ ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return true;
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const MachineFunction &MF = *MBB->getParent();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+ MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset));
+
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(MIB);
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMFrameLowering *TFI =
+ static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateFrameIndex does not support Thumb1!");
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ unsigned FrameReg;
+
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
+ assert(TFI->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
+ }
+ if (Done)
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert((Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+ "This code isn't needed if offset already handled!");
+
+ unsigned ScratchReg = 0;
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ if (Offset == 0)
+ // Must be addrmode4/6.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
+ else {
+ ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
+ if (!AFI->isThumbFunction())
+ emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ }
+ // Update the original instruction to use the scratch register.
+ MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 000000000000..725033b7e573
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,178 @@
+//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11:
+ // For iOS we want r7 and lr to be next to each other.
+ return !isIOS;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11:
+ // iOS has this second area.
+ return isIOS;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+ const ARMBaseInstrInfo &TII;
+ const ARMSubtarget &STI;
+
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+ // Can be only subclassed.
+ explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &STI);
+
+ // Return the opcode that implements 'Op', or 0 if no opcode
+ unsigned getOpcode(int Op) const;
+
+public:
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ void getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const;
+
+ void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const;
+
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getBaseRegister() const { return BasePtr; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ bool isLowRegister(unsigned Reg) const;
+
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags)const;
+
+ /// Code Generation virtual methods...
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
+ virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 000000000000..11bd6a4a8dbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum SpecialAttr {
+ // This is for the .cpu asm attr. It translates into one or more
+ // AttrType (below) entries in the .ARM.attributes section in the ELF.
+ SEL_CPU
+ };
+
+ enum AttrType {
+ // Rest correspond to ELF/.ARM.attributes
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ MPextension_use = 42, // was 70, 2.08 ABI
+ DIV_use = 44,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use_old = 70
+ };
+
+ // Magic numbers for .ARM.attributes
+ enum AttrMagic {
+ Format_Version = 0x41
+ };
+
+ // Legal Values for CPU_arch, (=6), uleb128
+ enum CPUArch {
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2F_S
+ v6K = 9, // e.g. ARM1136J_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13 // v7_M with DSP extensions
+ };
+
+ enum CPUArchProfile { // (=7), uleb128
+ Not_Applicable = 0, // pre v7, or cross-profile code
+ ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+ RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+ MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+ SystemProfile = (0x53) // 'S' Application or real-time profile
+ };
+
+ // The following have a lot of common use cases
+ enum {
+ //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ Not_Allowed = 0,
+ Allowed = 1,
+
+ // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+ AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_ABI_FP_denormal, (=20), uleb128
+ PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+ // Tag_ABI_FP_number_model, (=23), uleb128
+ AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
+ AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 000000000000..e6e8c3d5fac6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,159 @@
+//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 000000000000..b378b9662682
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,206 @@
+//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8))>;
+
+// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
+// Also save R7-R4 first to match the stack frame fixed spill areas.
+def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 000000000000..5e8e1739a984
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1916 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter : public MachineFunctionPass {
+ ARMJITInfo *JTI;
+ const ARMBaseInstrInfo *II;
+ const DataLayout *TD;
+ const ARMSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+ bool IsThumb;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+ public:
+ ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
+ TD(tm.getDataLayout()), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+ void emitDWordLE(uint64_t Binary);
+ void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+ void addPCLabel(unsigned LabelID);
+ void emitPseudoInstruction(const MachineInstr &MI);
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitSaturateInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+ // TableGen'erated getBinaryCodeForInstr() function to encode any
+ // operand values, instead querying getMachineOpValue() directly for
+ // each operand it needs to encode. Thus, any of the new encoder
+ // helper functions can simply return 0 as the values the return
+ // are already handled elsewhere. They are placeholders to allow this
+ // encoder to continue to function until the MC encoder is sufficiently
+ // far along that this one can be eliminated entirely.
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+ const { return 0; }
+ unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
+ unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getSsatBitPosValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const {return 0; }
+ uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0; }
+
+ unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+ uint32_t Binary;
+ Binary = Imm12 & 0xfff;
+ if (Imm12 >= 0)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+
+ unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+ return 0;
+ }
+
+ uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
+ }
+
+ uint32_t Binary = Imm12 & 0xfff;
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+ unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0) const;
+ unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
+ };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
+
+ assert((Target.getRelocationModel() != Reloc::Default ||
+ Target.getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
+ II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
+ TD = Target.getDataLayout();
+
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+ JTI->Initialize(MF, IsPIC);
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return II->getRegisterInfo().getEncodingValue(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV) const {
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ ACPV, MayNeedFarStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), ACPV,
+ MayNeedFarStub);
+ MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc,
+ intptr_t JTBase) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ llvm_unreachable("Unhandled instruction encoding format!");
+ }
+ case ARMII::MiscFrm:
+ if (MI.getOpcode() == ARM::LEApcrelJT) {
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ }
+ llvm_unreachable("Unhandled instruction encoding!");
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+ assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
+ if (GV) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ isa<Function>(GV),
+ Subtarget->GVIsIndirectSymbol(GV, RelocM),
+ (intptr_t)ACPV);
+ } else {
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ const Constant *CV = MCPE.Val.ConstVal;
+
+ DEBUG({
+ errs() << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ errs() << F->getName();
+ else
+ errs() << *CV;
+ errs() << '\n';
+ });
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = uint32_t(*CI->getValue().getRawData());
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType()->isFloatTy())
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType()->isDoubleTy())
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ } else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+ "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V1);
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V2);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::RRX:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+ DEBUG(errs() << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n');
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+
+ case ARM::MOVi32imm:
+ // Two instructions to materialize a constant.
+ if (Subtarget->hasV6T2Ops())
+ emitMOVi32immInstruction(MI);
+ else
+ emitMOVi2piecesInstruction(MI);
+ break;
+
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::RRX:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const {
+ for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = MCID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
+
+ if (MCID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(MCID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+ if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+ MI.getOpcode() == ARM::STRi12) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate encoding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Skip LDRD and STRD's second operand.
+ if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
+ ++OpIdx;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (MCID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (MCID.getNumOperands() > OpIdx &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // PKH instructions are finished at this point
+ if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (MCID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ MCID.Opcode != ARM::SSAT16 &&
+ MCID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (MCID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (MCID.Opcode == ARM::TPsoft) {
+ llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+ << '\n');
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Handle jump tables.
+ if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex =
+ (MCID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (MCID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
+ // The return register is LR.
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegClass.contains(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegClass.contains(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegClass.contains(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, OpIdx+2);
+
+ // Count the number of registers.
+ unsigned NumRegs = 1;
+ for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
+
+ emitWordLE(Binary);
+}
+
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const MCInstrDesc &MCID = MI.getDesc();
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 000000000000..4891609b336f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,2061 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumTBs, "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class ARMConstantIslands : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ private:
+ unsigned MaxDisp;
+ public:
+ bool NegOk;
+ bool IsSoImm;
+ bool KnownAlignment;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg, bool soimm)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
+ KnownAlignment(false) {
+ HighWaterMark = CPEMI->getParent();
+ }
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ /// Correct for unknown alignment.
+ /// Conservatively subtract 2 bytes to handle weird alignment effects.
+ unsigned getMaxDisp() const {
+ return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+ SmallVector<MachineInstr*, 4> T2JumpTables;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+ const ARMBaseInstrInfo *TII;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ bool isThumb1;
+ bool isThumb2;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void scanFunctionJumpTables();
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+ bool undoLRSpillRestore();
+ bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
+ bool optimizeThumb2Instructions();
+ bool optimizeThumb2Branches();
+ bool reorderThumb2JumpTables();
+ bool optimizeThumb2JumpTables();
+ MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
+ void dumpBBs();
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk, U.IsSoImm);
+ }
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify() {
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ unsigned MBBId = MBB->getNumber();
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+ }
+ DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = getUserOffset(U);
+ // Verify offset using the real max displacement without the safety
+ // adjustment.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+ /* DoDump = */ true)) {
+ DEBUG(dbgs() << "OK\n");
+ continue;
+ }
+ DEBUG(dbgs() << "Out of range.\n");
+ dumpBBs();
+ DEBUG(MF->dump());
+ llvm_unreachable("Constant pool entry out of range!");
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
+
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
+
+ isThumb = AFI->isThumbFunction();
+ isThumb1 = AFI->isThumb1OnlyFunction();
+ isThumb2 = AFI->isThumb2Function();
+
+ HasFarJump = false;
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ scanFunctionJumpTables();
+ MadeChange |= reorderThumb2JumpTables();
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF->RenumberBlocks();
+ }
+
+ // Thumb1 functions containing constant pools get 4-byte alignment.
+ // This is so we can keep exact track of where the alignment padding goes.
+
+ // ARM and Thumb2 functions need to be 4-byte aligned.
+ if (!isThumb1)
+ MF->ensureAlignment(2); // 2 = log2(4)
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ AFI->initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ initializeFunctionInfo(CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+
+ /// Remove dead constant pool entries.
+ MadeChange |= removeUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= handleConstantPoolUser(i);
+ if (CPChange && ++NoCPIters > 30)
+ report_fatal_error("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ if (isThumb2 && !STI->prefers32BitThumb())
+ MadeChange |= optimizeThumb2Instructions();
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
+ if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+ MadeChange |= undoLRSpillRestore();
+
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+ T2JumpTables.clear();
+
+ return MadeChange;
+}
+
+/// doInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void
+ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->ensureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+ MachineInstr *CPEMI =
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
+ }
+ DEBUG(BB->dump());
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// scanFunctionJumpTables - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::scanFunctionJumpTables() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
+/// initializeFunctionInfo - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case ARM::t2BR_JT:
+ T2JumpTables.push_back(I);
+ continue; // Does not get an entry in ImmBranches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc:
+ isCond = true;
+ UOpc = ARM::t2B;
+ Bits = 20;
+ Scale = 2;
+ break;
+ case ARM::t2B:
+ Bits = 24;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ if (Opc == ARM::CONSTPOOL_ENTRY)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ bool IsSoImm = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+
+ // Taking the address of a CP entry.
+ case ARM::LEApcrel:
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ Scale = 4;
+ NegOk = true;
+ IsSoImm = true;
+ break;
+ case ARM::t2LEApcrel:
+ Bits = 12;
+ NegOk = true;
+ break;
+ case ARM::tLEApcrel:
+ Bits = 8;
+ Scale = 4;
+ break;
+
+ case ARM::LDRi12:
+ case ARM::LDRcp:
+ case ARM::t2LDRpci:
+ Bits = 12; // +-offset_12
+ NegOk = true;
+ break;
+
+ case ARM::tLDRpci:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ NegOk = true;
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+ }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(I))
+ BBI.Unalign = 1;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->ensureAlignment(2);
+ }
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+ return Offset;
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ else
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
+ .addImm(ARMCC::AL).addReg(0);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as updateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+/// getUserOffset - Compute the offset of U.MI as seen by the hardware
+/// displacement computation. Update U.KnownAlignment to match its current
+/// basic block location.
+unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
+ unsigned UserOffset = getOffsetOf(U.MI);
+ const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
+ unsigned KnownBits = BBI.internalKnownBits();
+
+ // The value read from PC is offset from the actual instruction address.
+ UserOffset += (isThumb ? 4 : 8);
+
+ // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
+ // Make sure U.getMaxDisp() returns a constrained range.
+ U.KnownAlignment = (KnownBits >= 2);
+
+ // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // For unknown alignments, getMaxDisp() constrains the range instead.
+ if (isThumb && U.KnownAlignment)
+ UserOffset &= ~3u;
+
+ return UserOffset;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+/// UserOffset is computed by getUserOffset above to include PC adjustments. If
+/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
+/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
+bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK, bool IsSoImm) {
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ }
+ return false;
+}
+
+/// isWaterInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// isCPEntryInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+
+ if (DoDump) {
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
+ }
+
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+ || PredMI->getOpcode() == ARM::t2B)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ removeDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case ARM::tB:
+ return ((1<<10)-1)*2;
+ case ARM::t2B:
+ return ((1<<23)-1)*2;
+ default:
+ break;
+ }
+
+ return ((1<<23)-1)*4;
+}
+
+/// findAvailableWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry. For Thumb, prefer water that will not
+/// introduce padding to water that will. To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
+ return true;
+ }
+ if (IP == B)
+ break;
+ }
+ return BestGrowth != ~0u;
+}
+
+/// createNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
+
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = splitBlockBeforeInstr(MI);
+}
+
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = findInRangeCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createPICLabelUId();
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.erase(WaterBB))
+ NewWaterList.insert(NewIsland);
+
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF->insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ updateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
+ // Increase the size of the island block to account for the new entry.
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+ return true;
+}
+
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBInfo[CPEBB->getNumber()].Size -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::removeUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ removeDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
+}
+
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!isThumb1)
+ llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ splitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ if (isThumb)
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
+ .addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
+
+/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::undoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ // First two operands are predicates.
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
+ // Create the new insn and copy the predicate from the old.
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = getUserOffset(U);
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+ // Be conservative with inline asm.
+ if (!U.KnownAlignment)
+ MaxOffs -= 2;
+
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= optimizeThumb2Branches();
+ MadeChange |= optimizeThumb2JumpTables();
+ return MadeChange;
+}
+
+bool ARMConstantIslands::optimizeThumb2Branches() {
+ bool MadeChange = false;
+
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+ ImmBranch &Br = ImmBranches[i];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+ // so this transformation is not safe.
+ if (!Br.MI->killsRegister(ARM::CPSR))
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = getOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (ByteOk || HalfWordOk) {
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ bool BaseRegKill = MI->getOperand(0).isKill();
+ if (!BaseRegKill)
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
+ MachineBasicBlock::iterator PrevI = MI;
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
+ continue;
+
+ MachineInstr *AddrMI = PrevI;
+ bool OptOk = true;
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
+ for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+ const MachineOperand &MO = AddrMI->getOperand(k);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() != BaseReg) {
+ OptOk = false;
+ break;
+ }
+ if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+ OptOk = false;
+ break;
+ }
+ }
+ if (!OptOk)
+ continue;
+
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+ // to delete it as well.
+ MachineInstr *LeaMI = PrevI;
+ if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+ LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+ LeaMI->getOperand(0).getReg() != BaseReg)
+ OptOk = false;
+
+ if (!OptOk)
+ continue;
+
+ DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI
+ << " lea: " << *LeaMI);
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(MI->getOperand(JTOpIdx+1).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+ // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+ // is 2-byte aligned. For now, asm printer will fix it up.
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+ OrigSize += TII->GetInstSizeInBytes(LeaMI);
+ OrigSize += TII->GetInstSizeInBytes(MI);
+
+ AddrMI->eraseFromParent();
+ LeaMI->eraseFromParent();
+ MI->eraseFromParent();
+
+ int delta = OrigSize - NewSize;
+ BBInfo[MBB->getNumber()].Size -= delta;
+ adjustBBOffsetsAfter(MBB);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::reorderThumb2JumpTables() {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ adjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF->begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF->RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 000000000000..4e703ec3c1a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,306 @@
+//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind),
+ PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)),
+ LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {}
+
+const char *ARMConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+ llvm_unreachable("Unknown modifier!");
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ llvm_unreachable("Shouldn't be calling this directly!");
+}
+
+void
+ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->PCAdjust == PCAdjust &&
+ ACPV->Modifier == Modifier) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+ errs() << " " << *this;
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (Modifier) O << "(" << getModifierText() << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty,
+ const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
+ return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier) {
+ return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
+ GV, 0, ARMCP::CPValue, 0,
+ Modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress);
+}
+
+const GlobalValue *ARMConstantPoolConstant::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
+ if (!APC) continue;
+ if (APC->CVal == CVal && equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV);
+ return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
+ unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(s) {}
+
+ARMConstantPoolSymbol *
+ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj) {
+ return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
+ if (!APS) continue;
+
+ if (APS->S == S && equals(APS))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
+ return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddString(S);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned id, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj,
+ Modifier, AddCurrentAddress),
+ MBB(mbb) {}
+
+ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID,
+ unsigned char PCAdj) {
+ return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
+ if (!APMBB) continue;
+
+ if (APMBB->MBB == MBB && equals(APMBB))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV);
+ return ACPMBB && ACPMBB->MBB == MBB &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ O << "BB#" << MBB->getNumber();
+ ARMConstantPoolValue::print(O);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 000000000000..93812fe6bb37
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,226 @@
+//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class BlockAddress;
+class Constant;
+class GlobalValue;
+class LLVMContext;
+class MachineBasicBlock;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPLSDA,
+ CPMachineBasicBlock
+ };
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT,
+ GOTOFF,
+ GOTTPOFF,
+ TPOFF
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // Kind of constant.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ // 8 for ARM, 4 for Thumb.
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+protected:
+ ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+public:
+ virtual ~ARMConstantPoolValue();
+
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const;
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+
+ unsigned getLabelId() const { return LabelId; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+ bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+ bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
+ bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
+
+ virtual unsigned getRelocationInfo() const { return 2; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ bool equals(const ARMConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId &&
+ this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+
+ virtual void print(raw_ostream &O) const;
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class ARMConstantPoolConstant : public ARMConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+ ARMConstantPoolConstant(Type *Ty, const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
+ static ARMConstantPoolConstant *Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(raw_ostream &O) const;
+ static bool classof(const ARMConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ }
+};
+
+/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
+/// symbols.
+class ARMConstantPoolSymbol : public ARMConstantPoolValue {
+ const std::string S; // ExtSymbol being loaded.
+
+ ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj);
+
+ const char *getSymbol() const { return S.c_str(); }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+};
+
+/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
+/// block.
+class ARMConstantPoolMBB : public ARMConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolMBB *Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID, unsigned char PCAdj);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..beb843ca9aa8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1249 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
+ cl::desc("Verify machine code after expanding ARM pseudos"));
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const MCInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addOperand(MO);
+ else
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
+ bool IsLoad;
+ bool isUpdating;
+ bool hasWritebackOperand;
+ uint8_t RegSpacing; // One of type NEONRegSpacing
+ uint8_t NumRegs; // D registers loaded or stored
+ uint8_t RegElts; // elements per D register; used for lane ops
+ // FIXME: Temporary flag to denote whether the real instruction takes
+ // a single register (like the encoding) or all of the registers in
+ // the list (like the asm syntax and the isel DAG). When all definitions
+ // are converted to take only the single encoded register, this will
+ // go away.
+ bool copyAllListRegs;
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ const unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::VMOVScc:
+ case ARM::VMOVDcc: {
+ unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCr:
+ case ARM::MOVCCr: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCsi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addReg(MI.getOperand(5).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVCCsr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addReg(MI.getOperand(3).getReg(),
+ getKillRegState(MI.getOperand(3).isKill()))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .addReg(MI.getOperand(6).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCi16: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCi:
+ case ARM::MOVCCi: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MVNCCi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
+ } else {
+ emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs insructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+ ARM_AM::lsr : ARM_AM::asr),
+ 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tTPsoft:
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOV_ga_dyn:
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_dyn:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
+ bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = isPIC
+ ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+ unsigned HI16TF = isPIC
+ ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+ if (!isPIC) {
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::VLDMQIA: {
+ unsigned NewOpc = ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA: {
+ unsigned NewOpc = ARM::VSTMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0).addReg(D1);
+
+ if (SrcIsKill) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VDUPfqf:
+ case ARM::VDUPfdf:{
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+ ARM::VDUPLN32d;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
+ unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+ &ARM::DPR_VFP2RegClass);
+ // The lane is [0,1] for the containing DReg superregister.
+ // Copy the dst/src register operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addReg(DReg);
+ ++OpIdx;
+ // Add the lane select operand.
+ MIB.addImm(Lane);
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
+
+ case ARM::VST2q8Pseudo:
+ case ARM::VST2q16Pseudo:
+ case ARM::VST2q32Pseudo:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
+ case ARM::VST3d8Pseudo:
+ case ARM::VST3d16Pseudo:
+ case ARM::VST3d32Pseudo:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST3d8Pseudo_UPD:
+ case ARM::VST3d16Pseudo_UPD:
+ case ARM::VST3d32Pseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
+ case ARM::VST3q8Pseudo_UPD:
+ case ARM::VST3q16Pseudo_UPD:
+ case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
+ case ARM::VST3q8oddPseudo_UPD:
+ case ARM::VST3q16oddPseudo_UPD:
+ case ARM::VST3q32oddPseudo_UPD:
+ case ARM::VST4d8Pseudo:
+ case ARM::VST4d16Pseudo:
+ case ARM::VST4d32Pseudo:
+ case ARM::VST1d64QPseudo:
+ case ARM::VST4d8Pseudo_UPD:
+ case ARM::VST4d16Pseudo_UPD:
+ case ARM::VST4d32Pseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
+ case ARM::VST4q8Pseudo_UPD:
+ case ARM::VST4q16Pseudo_UPD:
+ case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
+ case ARM::VST4q8oddPseudo_UPD:
+ case ARM::VST4q16oddPseudo_UPD:
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+ }
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ if (VerifyARMPseudo)
+ MF.verify(this, "After expanding ARM pseudo instructions.");
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..29fcd4009af3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,2973 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb2;
+ LLVMContext *Context;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb2 = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
+ }
+
+ // Code from FastISel.cpp.
+ private:
+ unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ private:
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+ virtual bool FastLowerArguments();
+ private:
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectIToFP(const Instruction *I, bool isSigned);
+ bool SelectFPToI(const Instruction *I, bool isSigned);
+ bool SelectDiv(const Instruction *I, bool isSigned);
+ bool SelectRem(const Instruction *I, bool isSigned);
+ bool SelectCall(const Instruction *I, const char *IntrMemName);
+ bool SelectIntrinsicCall(const IntrinsicInst &I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt);
+ bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
+ bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
+ bool ARMIsMemCpySmall(uint64_t Len);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ unsigned Alignment);
+ unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
+
+ // Call handling routines.
+ private:
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool isVarArg);
+ unsigned getLibcallReg(const Twine &Name);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool isVarArg);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool isARMNEONPred(const MachineInstr *MI);
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(MVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags, bool useAM3);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ if (!MI->hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm1).addImm(Imm2));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addImm(Imm));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
+
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return false;
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+ unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue()));
+ return ImmReg;
+ }
+
+ // Use MVN to emit negative constants.
+ if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
+ unsigned Imm = (unsigned)~(CI->getSExtValue());
+ bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ if (UseImm) {
+ unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ImmReg)
+ .addImm(Imm));
+ return ImmReg;
+ }
+ }
+
+ // Load from constant pool. For now 32-bit only.
+ if (VT != MVT::i32)
+ return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ if (isThumb2)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
+ else
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+ bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Use movw+movt when possible, it avoids constant pool entries.
+ // Darwin targets don't support movt with Reloc::Static, see
+ // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
+ // static movt relocations.
+ if (Subtarget->useMovt() &&
+ Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
+ unsigned Opc;
+ switch (RelocM) {
+ case Reloc::PIC_:
+ Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
+ break;
+ case Reloc::DynamicNoPIC:
+ Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
+ break;
+ default:
+ Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ break;
+ }
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg).addGlobalAddress(GV));
+ } else {
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
+ (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ if (isThumb2) {
+ unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ AddOptionalDefs(MIB);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ AddOptionalDefs(MIB);
+
+ if (RelocM == Reloc::PIC_) {
+ unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), NewDestReg)
+ .addReg(DestReg)
+ .addImm(Id);
+ AddOptionalDefs(MIB);
+ return NewDestReg;
+ }
+ }
+ }
+
+ if (IsIndirect) {
+ MachineInstrBuilder MIB;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb2)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ else
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
+ NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
+ bool needsLowering = false;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (!useAM3) {
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ // Handle negative offsets.
+ if (needsLowering && isThumb2)
+ needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
+ Addr.Offset > -256);
+ } else {
+ // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
+ needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+ Addr.Offset = 0;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags, bool useAM3) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
+ }
+ AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment, bool isZExt, bool allocReg) {
+ unsigned Opc;
+ bool useAM3 = false;
+ bool needVMOV = false;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
+ else
+ Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
+ } else {
+ if (isZExt) {
+ Opc = ARM::LDRBi12;
+ } else {
+ Opc = ARM::LDRSB;
+ useAM3 = true;
+ }
+ }
+ RC = &ARM::GPRRegClass;
+ break;
+ case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
+ else
+ Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
+ } else {
+ Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
+ useAM3 = true;
+ }
+ RC = &ARM::GPRRegClass;
+ break;
+ case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = ARM::t2LDRi8;
+ else
+ Opc = ARM::t2LDRi12;
+ } else {
+ Opc = ARM::LDRi12;
+ }
+ RC = &ARM::GPRRegClass;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = &ARM::GPRRegClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT, useAM3);
+
+ // Create the base instruction, then add the operands.
+ if (allocReg)
+ ResultReg = createResultReg(RC);
+ assert (ResultReg > 255 && "Expected an allocated virtual register.");
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
+ unsigned StrOpc;
+ bool useAM3 = false;
+ switch (VT.SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRBi8;
+ else
+ StrOpc = ARM::t2STRBi12;
+ } else {
+ StrOpc = ARM::STRBi12;
+ }
+ break;
+ case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRHi8;
+ else
+ StrOpc = ARM::t2STRHi12;
+ } else {
+ StrOpc = ARM::STRH;
+ useAM3 = true;
+ }
+ break;
+ case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRi8;
+ else
+ StrOpc = ARM::t2STRi12;
+ } else {
+ StrOpc = ARM::STRi12;
+ }
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT, useAM3);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+
+ // Get the compare predicate.
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+ .addReg(CmpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0) return false;
+
+ unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(AddrReg));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
+ return true;
+}
+
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt) {
+ Type *Ty = Src1Value->getType();
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple()) return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+ // Thus, Src1Value may be a ConstantInt, but we're missing it.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+ if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+ SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
+ isNegativeImm = true;
+ Imm = -Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+ } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+ if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+ if (ConstFP->isZero() && !ConstFP->isNegative())
+ UseImm = true;
+ }
+
+ unsigned CmpOpc;
+ bool isICmp = true;
+ bool needsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ break;
+ case MVT::f64:
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ needsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (isThumb2) {
+ if (!UseImm)
+ CmpOpc = ARM::t2CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
+ } else {
+ if (!UseImm)
+ CmpOpc = ARM::CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
+ }
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(Src1Value);
+ if (SrcReg1 == 0) return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(Src2Value);
+ if (SrcReg2 == 0) return false;
+ }
+
+ // We have i1, i8, or i16, we need to either zero extend or sign extend.
+ if (needsExt) {
+ SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+ if (SrcReg1 == 0) return false;
+ if (!UseImm) {
+ SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+ if (SrcReg2 == 0) return false;
+ }
+ }
+
+ if (!UseImm) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(SrcReg1).addReg(SrcReg2));
+ } else {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(SrcReg1);
+
+ // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+ if (isICmp)
+ MIB.addImm(Imm);
+ AddOptionalDefs(MIB);
+ }
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (Ty->isFloatTy() || Ty->isDoubleTy())
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(ARM::CPSR);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(&ARM::DPRRegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(&ARM::SPRRegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0) return false;
+
+ // Handle sign-extension.
+ if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
+ /*isZExt*/!isSigned);
+ if (SrcReg == 0) return false;
+ }
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
+ else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
+ else return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
+ else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
+ else return false;
+
+ // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+
+ // Check to see if we can use an immediate in the conditional move.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
+ assert (VT == MVT::i32 && "Expecting an i32.");
+ Imm = (int)ConstInt->getValue().getZExtValue();
+ if (Imm < 0) {
+ isNegativeImm = true;
+ Imm = ~Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+
+ unsigned Op2Reg = 0;
+ if (!UseImm) {
+ Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+ }
+
+ unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(0));
+
+ unsigned MovCCOpc;
+ const TargetRegisterClass *RC;
+ if (!UseImm) {
+ RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
+ } else {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+ if (!isNegativeImm)
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ else
+ MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
+ }
+ unsigned ResultReg = createResultReg(RC);
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
+ else if (VT == MVT::i16)
+ LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
+ else if (VT == MVT::i16)
+ LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
+ break;
+ case ISD::OR:
+ Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
+ break;
+ case ISD::SUB:
+ Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // TODO: Often the 2nd operand is an immediate, which can be encoded directly
+ // in the instruction, rather then materializing the value in a register.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT FPVT = TLI.getValueType(I->getType(), true);
+ if (!FPVT.isSimple()) return false;
+ MVT VT = FPVT.getSimpleVT();
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+// This is largely taken directly from CCAssignFnForNode
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ // Fall through to soft float variant, variadic functions don't
+ // use hard floating point ABI.
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool isVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
+ CCAssignFnForCall(CC, false, isVarArg));
+
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
+ break;
+ }
+ case CCValAssign::AExt:
+ // Intentional fall-through. Handle AExt and ZExt.
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool isVarArg) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ MVT DestVT = RVLocs[0].getValVT();
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ MVT CopyVT = RVLocs[0].getValVT();
+
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
+ F.isVarArg()));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple()) return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (RVVT != DestVT) {
+ if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+ return false;
+
+ assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+ // Perform extension if flagged as either zext or sext. Otherwise, do
+ // nothing.
+ if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+ SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
+ if (SrcReg == 0) return false;
+ }
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc));
+ AddOptionalDefs(MIB);
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+ return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
+ if (UseReg)
+ return isThumb2 ? ARM::tBLXr : ARM::BLX;
+ else
+ return isThumb2 ? ARM::tBL : ARM::BL;
+}
+
+unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0, Name);
+ EVT LCREVT = TLI.getValueType(GV->getType());
+ if (!LCREVT.isSimple()) return 0;
+ return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, false))
+ return false;
+
+ unsigned CalleeReg = 0;
+ if (EnableARMLongCalls) {
+ CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
+ if (CalleeReg == 0) return false;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
+ // BL / BLX don't take a predicate, but tBL / tBLX do.
+ if (isThumb2)
+ AddDefaultPred(MIB);
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I,
+ const char *IntrMemName = 0) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee)) return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall()) return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool isVarArg = FTy->isVarArg();
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8 && RetVT != MVT::i1)
+ return false;
+
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
+ RetVT != MVT::i16 && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgRegs.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ // If we're lowering a memory intrinsic instead of a regular call, skip the
+ // last two arguments, which shouldn't be passed to the underlying function.
+ if (IntrMemName && e-i <= 2)
+ break;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+ ArgVT != MVT::i1)
+ return false;
+
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, isVarArg))
+ return false;
+
+ bool UseReg = false;
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || EnableARMLongCalls) UseReg = true;
+
+ unsigned CalleeReg = 0;
+ if (UseReg) {
+ if (IntrMemName)
+ CalleeReg = getLibcallReg(IntrMemName);
+ else
+ CalleeReg = getRegForValue(Callee);
+
+ if (CalleeReg == 0) return false;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = ARMSelectCallOp(UseReg);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
+
+ // ARM calls don't take a predicate, but tBL / tBLX do.
+ if(isThumb2)
+ AddDefaultPred(MIB);
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
+ else
+ MIB.addExternalSymbol(IntrMemName, 0);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
+ return Len <= 16;
+}
+
+bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
+ uint64_t Len, unsigned Alignment) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!ARMIsMemCpySmall(Len))
+ return false;
+
+ while (Len) {
+ MVT VT;
+ if (!Alignment || Alignment >= 4) {
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert (Len == 1 && "Expected a length of 1!");
+ VT = MVT::i8;
+ }
+ } else {
+ // Bound based on alignment.
+ if (Len >= 2 && Alignment == 2)
+ VT = MVT::i16;
+ else {
+ VT = MVT::i8;
+ }
+ }
+
+ bool RV;
+ unsigned ResultReg;
+ RV = ARMEmitLoad(VT, ResultReg, Src);
+ assert (RV == true && "Should be able to handle this load.");
+ RV = ARMEmitStore(VT, ResultReg, Dest);
+ assert (RV == true && "Should be able to handle this store.");
+ (void)RV;
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ Dest.Offset += Size;
+ Src.Offset += Size;
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ unsigned LdrOpc;
+ const TargetRegisterClass *RC;
+ if (isThumb2) {
+ LdrOpc = ARM::t2LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
+ } else {
+ LdrOpc = ARM::LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
+ }
+
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = FramePtr;
+
+ // Recursively load frame address
+ // ldr r0 [fp]
+ // ldr r0 [r0]
+ // ldr r0 [r0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(LdrOpc), DestReg)
+ .addReg(SrcReg).addImm(0));
+ SrcReg = DestReg;
+ }
+ UpdateValueMap(&I, SrcReg);
+ return true;
+ }
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ // Don't handle volatile.
+ if (MTI.isVolatile())
+ return false;
+
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // we would emit dead code because we don't currently handle memmoves.
+ bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ // Small memcpy's are common enough that we want to do them without a call
+ // if possible.
+ uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+ if (ARMIsMemCpySmall(Len)) {
+ Address Dest, Src;
+ if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+ !ARMComputeAddress(MTI.getRawSource(), Src))
+ return false;
+ unsigned Alignment = MTI.getAlignment();
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
+ return true;
+ }
+ }
+
+ if (!MTI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ return false;
+
+ const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+ return SelectCall(&I, IntrMemName);
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+ // Don't handle volatile.
+ if (MSI.isVolatile())
+ return false;
+
+ if (!MSI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return SelectCall(&I, "memset");
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
+ Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
+ return true;
+ }
+ }
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool isZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return 0;
+
+ unsigned Opc;
+ bool isBoolZext = false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ switch (SrcVT.SimpleTy) {
+ default: return 0;
+ case MVT::i16:
+ if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
+ else
+ Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
+ break;
+ case MVT::i8:
+ if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
+ else
+ Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
+ break;
+ case MVT::i1:
+ if (isZExt) {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+ Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ isBoolZext = true;
+ break;
+ }
+ return 0;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg);
+ if (isBoolZext)
+ MIB.addImm(1);
+ else
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+ return ResultReg;
+}
+
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers.
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple()) return false;
+ if (!DestEVT.isSimple()) return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2 = 0;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*isSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*isSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*isSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::FAdd:
+ return SelectBinaryFPOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryFPOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryFPOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectDiv(I, /*isSigned*/ true);
+ case Instruction::UDiv:
+ return SelectDiv(I, /*isSigned*/ false);
+ case Instruction::SRem:
+ return SelectRem(I, /*isSigned*/ true);
+ case Instruction::URem:
+ return SelectRem(I, /*isSigned*/ false);
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return SelectIntrinsicCall(*II);
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
+ default: break;
+ }
+ return false;
+}
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// successful.
+bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ // ldrb r1, [r0] ldrb r1, [r0]
+ // uxtb r2, r1 =>
+ // mov r3, r2 mov r3, r1
+ bool isZExt = true;
+ switch(MI->getOpcode()) {
+ default: return false;
+ case ARM::SXTH:
+ case ARM::t2SXTH:
+ isZExt = false;
+ case ARM::UXTH:
+ case ARM::t2UXTH:
+ if (VT != MVT::i16)
+ return false;
+ break;
+ case ARM::SXTB:
+ case ARM::t2SXTB:
+ isZExt = false;
+ case ARM::UXTB:
+ case ARM::t2UXTB:
+ if (VT != MVT::i8)
+ return false;
+ break;
+ }
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ return false;
+ MI->eraseFromParent();
+ return true;
+}
+
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, MVT VT) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolConstant *CPV =
+ ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ unsigned Opc;
+ unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
+ // Load value.
+ if (isThumb2) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg1)
+ .addConstantPoolIndex(Idx));
+ Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
+ } else {
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(ARM::LDRcp), DestReg1)
+ .addConstantPoolIndex(Idx).addImm(0));
+ Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
+ }
+
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ if (GlobalBaseReg == 0) {
+ GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
+ AFI->setGlobalBaseReg(GlobalBaseReg);
+ }
+
+ unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), DestReg2)
+ .addReg(DestReg1)
+ .addReg(GlobalBaseReg);
+ if (!UseGOTOFF)
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+
+ return DestReg2;
+}
+
+bool ARMFastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ default:
+ return false;
+ case CallingConv::Fast:
+ case CallingConv::C:
+ case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ break;
+ }
+
+ // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
+ // which are passed in r0 - r3.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 4)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ default:
+ return false;
+ }
+ }
+
+
+ static const uint16_t GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ Idx = 0;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ unsigned SrcReg = GPRArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+
+ return true;
+}
+
+namespace llvm {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ // Completely untested on non-iOS.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+
+ // Darwin and thumb1 only for now.
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
+ return new ARMFastISel(funcInfo, libInfo);
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 000000000000..7a02adf24633
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1430 @@
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+ cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs);
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // iOS requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetIOS())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const uint16_t *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (MI->getOpcode() == ARM::LDMIA_RET ||
+ MI->getOpcode() == ARM::t2LDMIA_RET ||
+ MI->getOpcode() == ARM::LDMIA_UPD ||
+ MI->getOpcode() == ARM::t2LDMIA_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+ MI->getOpcode() == ARM::LDR_POST_REG ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII, MIFlags);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII, MIFlags);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+ int D8SpillFI = 0;
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
+ // Allocate the vararg register save area. This is not counted in NumBytes.
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetIOS()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+ }
+
+ // Move past area 1.
+ if (GPRCS1Size > 0) MBBI++;
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it is
+ // now safe to emit this assignment.
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) MBBI++;
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 3.
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
+
+ // Move past the aligned DPRCS2 area.
+ if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+ MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+ // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+ // leaves the stack pointer pointing to the DPRCS2 area.
+ //
+ // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+ NumBytes += MFI->getObjectOffset(D8SpillFI);
+ } else
+ NumBytes = DPRCSOffset;
+
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ // If aligned NEON registers were spilled, the stack has already been
+ // realigned.
+ if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill));
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill));
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ // FIXME: Clarify FrameSetup flags here.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ RegInfo->getBaseRegister())
+ .addReg(ARM::SP));
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For iOS, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ }
+ } else if (NumBytes)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode = STI.isThumb() ?
+ (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+ ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // SP can move around if there are allocas. We may also lose track of SP
+ // when emergency spilling inside a non-reserved call frame setup.
+ bool hasMovingSP = !hasReservedCallFrame(MF);
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (hasMovingSP) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (hasMovingSP) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ if (AFI->isThumb2Function()) {
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ } else if (AFI->isThumb2Function()) {
+ // Use add <rd>, sp, #<imm8>
+ // ldr <rd>, [sp, #<imm8>]
+ // if at all possible to save space.
+ if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+ return Offset;
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references. ldr <rt>,[<rn>, #-<imm8>]
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // D-registers in the aligned area DPRCS2 are NOT spilled here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP).setMIFlags(MIFlags));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP).setMIFlags(MIFlags)
+ .addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+ RetOpcode == ARM::TCRETURNri);
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // The aligned reloads from area DPRCS2 are not inserted here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ Reg = ARM::PC;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ // Fold the return instruction into the LDM.
+ DeleteRet = true;
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet) {
+ MIB.copyImplicitOps(&*MI);
+ MI->eraseFromParent();
+ }
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // Mark the D-register spill slots as properly aligned. Since MFI computes
+ // stack slot layout backwards, this can actually mean that the d-reg stack
+ // slot offsets can be wrong. The offset for d8 will always be correct.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned DNum = CSI[i].getReg() - ARM::D8;
+ if (DNum >= 8)
+ continue;
+ int FI = CSI[i].getFrameIdx();
+ // The even-numbered registers will be 16-byte aligned, the odd-numbered
+ // registers will be 8-byte aligned.
+ MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+ // The stack slot for D8 needs to be maximally aligned because this is
+ // actually the point where we align the stack pointer. MachineFrameInfo
+ // computes all offsets relative to the incoming stack pointer which is a
+ // bit weird when realigning the stack. Any extra padding for this
+ // over-alignment is not realized because the code inserted below adjusts
+ // the stack pointer by numregs * 8 before aligning the stack pointer.
+ if (DNum == 0)
+ MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+ }
+
+ // Move the stack pointer to the d8 spill slot, and align it at the same
+ // time. Leave the stack slot address in the scratch register r4.
+ //
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ //
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+ AFI->setShouldRestoreSPFromFP(true);
+
+ // sub r4, sp, #numregs * 8
+ // The immediate is <= 64, so it doesn't need any special encoding.
+ unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
+
+ // bic r4, r4, #align-1
+ Opc = isThumb ? ARM::t2BICri : ARM::BICri;
+ unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+
+ // mov sp, r4
+ // The stack pointer must be adjusted before spilling anything, otherwise
+ // the stack slots could be clobbered by an interrupt handler.
+ // Leave r4 live, it is used below.
+ Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+ .addReg(ARM::R4);
+ MIB = AddDefaultPred(MIB);
+ if (!isThumb)
+ AddDefaultCC(MIB);
+
+ // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+ // r4 holds the stack slot address.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+ // The writeback is only needed when emitting two vst1.64 instructions.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+ ARM::R4)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4).addImm(16).addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vst1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vstr.64 for the odd last register.
+ if (NumAlignedDPRCS2Regs) {
+ MBB.addLiveIn(NextReg);
+ // vstr.64 uses addrmode5 which has an offset scale of 4.
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ }
+
+ // The last spill instruction inserted should kill the scratch register r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs) {
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ ++MI; ++MI; ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+
+ // These switches all fall through.
+ switch(NumAlignedDPRCS2Regs) {
+ case 7:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ default:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ case 1:
+ case 2:
+ case 4:
+ assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+ ++MI;
+ }
+ return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Find the frame index assigned to d8.
+ int D8SpillFI = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ if (CSI[i].getReg() == ARM::D8) {
+ D8SpillFI = CSI[i].getFrameIdx();
+ break;
+ }
+
+ // Materialize the address of the d8 spill slot into the scratch register r4.
+ // This can be fairly complicated if the stack frame is large, so just use
+ // the normal frame index elimination mechanism to do it. This code runs as
+ // the initial part of the epilog where the stack and base pointers haven't
+ // been changed yet.
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI).addImm(0)));
+
+ // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vld1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4).addImm(16));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vldr.64 for the remaining odd register.
+ if (NumAlignedDPRCS2Regs)
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+ // Last store kills r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ?
+ ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+ // The code above does not insert spill code for the aligned DPRCS2 registers.
+ // The stack realignment code will be inserted between the push instructions
+ // and these spills.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+ // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+ // registers. Do that here instead.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register, 0);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+ if (!SpillAlignedNEONRegs)
+ return;
+
+ // Naked functions don't spill callee-saved registers.
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ return;
+
+ // We are planning to use NEON instructions vst1 / vld1.
+ if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+ return;
+
+ // Don't bother if the default stack alignment is sufficiently high.
+ if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+ return;
+
+ // Aligned spills require stack realignment.
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!RegInfo->canRealignStack(MF))
+ return;
+
+ // We always spill contiguous d-registers starting from d8. Count how many
+ // needs spilling. The register allocator will almost always use the
+ // callee-saved registers in order, but it can happen that there are holes in
+ // the range. Registers above the hole will be spilled to the standard DPRCS
+ // area.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned NumSpills = 0;
+ for (; NumSpills < 8; ++NumSpills)
+ if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
+ break;
+
+ // Don't do this for just one d-register. It's not worth it.
+ if (NumSpills < 2)
+ return;
+
+ // Spill the first NumSpills D-registers after realigning the stack.
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+ // A scratch register is required for the vst1 / vld1 instructions.
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ MRI.setPhysRegUsed(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getVarArgsRegSaveSize() > 0)
+ MRI.setPhysRegUsed(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+ // for sure what the stack size will be, but for this, an estimate is good
+ // enough. If there anything changes it, it'll be a spill, which implies
+ // we've used all the registers and so R4 is already used, so not marking
+ // it here will be OK.
+ // FIXME: It will be better just to find spare register here.
+ unsigned StackSize = MFI->estimateStackSize(MF);
+ if (MFI->hasVarSizedObjects() || StackSize > 508)
+ MRI.setPhysRegUsed(ARM::R4);
+ }
+
+ // See if we can spill vector registers to aligned stack.
+ checkNumAlignedDPRCS2Regs(MF);
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MRI.setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MRI.isPhysRegUsed(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+
+ if (!ARM::GPRRegClass.contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetIOS()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetIOS()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS &&
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF, this)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MRI.setPhysRegUsed(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ MRI.setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb1
+ if (!AFI->isThumb1OnlyFunction() ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!MRI.isReserved(Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!MRI.isReserved(Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MRI.setPhysRegUsed(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MRI.setPhysRegUsed(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
+
+
+void ARMFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 000000000000..efa255a5574a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,82 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg, int SPAdj) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
+ unsigned MIFlags = 0) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const;
+
+ virtual void eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 000000000000..1240169e84ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,98 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MI->mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const MCInstrDesc &LastMCID = LastMI->getDesc();
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastMI->isBarrier() &&
+ // On A9, AGU and NEON/FPU are muxed.
+ !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
+ (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = llvm::prior(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TRI))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = 0;
+ FpMLxStalls = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = 0;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 000000000000..98bfc4cf0cc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,56 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMSubtarget &STI;
+
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ARMBaseInstrInfo &tii,
+ const ARMBaseRegisterInfo &tri,
+ const ARMSubtarget &sti,
+ const ScheduleDAG *DAG) :
+ ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+ TRI(tri), STI(sti), LastMI(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2c51de23f7dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,3603 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(true));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMBaseTargetMachine &TM;
+ const ARMBaseInstrInfo *TII;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ virtual void PreprocessISelDAG();
+
+ /// getI32Imm - Return a target constant of type i32 with the specified
+ /// value.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ SDNode *Select(SDNode *N);
+
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C,
+ bool CheckProfitability = true);
+ bool SelectImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, bool CheckProfitability = true);
+ bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C) {
+ // Don't apply the profitability check
+ return SelectRegShifterOperand(N, A, B, C, false);
+ }
+ bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B) {
+ // Don't apply the profitability check
+ return SelectImmShifterOperand(N, A, B, false);
+ }
+
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
+ bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+ unsigned Scale);
+ bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2ShifterOperandReg(SDValue N,
+ SDValue &BaseReg, SDValue &Opc);
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm);
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm);
+
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+ /// ARM.
+ SDNode *SelectARMIndexedLoad(SDNode *N);
+ SDNode *SelectT2IndexedLoad(SDNode *N);
+
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// loads of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+
+ /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+ /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+ /// SelectCMOVOp - Select CMOV instructions for ARM.
+ SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
+ SDNode *SelectInlineAsm(SDNode *N);
+
+ SDNode *SelectConcatVector(SDNode *N);
+
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Form pairs of consecutive R, S, D, or Q registers.
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc &&
+ isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, int Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale > 0 && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
+
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->hasV6T2Ops())
+ return;
+
+ bool isThumb2 = Subtarget->isThumb();
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (N->getOpcode() != ISD::ADD)
+ continue;
+
+ // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+ // leading zeros, followed by consecutive set bits, followed by 1 or 2
+ // trailing zeros, e.g. 1020.
+ // Transform the expression to
+ // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+ // of trailing zeros of c2. The left shift would be folded as an shifter
+ // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+ // node (UBFX).
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned And_imm = 0;
+ if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+ if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+ std::swap(N0, N1);
+ }
+ if (!And_imm)
+ continue;
+
+ // Check if the AND mask is an immediate of the form: 000.....1111111100
+ unsigned TZ = CountTrailingZeros_32(And_imm);
+ if (TZ != 1 && TZ != 2)
+ // Be conservative here. Shifter operands aren't always free. e.g. On
+ // Swift, left shifter operand of 1 / 2 for free but others are not.
+ // e.g.
+ // ubfx r3, r1, #16, #8
+ // ldr.w r3, [r0, r3, lsl #2]
+ // vs.
+ // mov.w r9, #1020
+ // and.w r2, r9, r1, lsr #14
+ // ldr r2, [r0, r2]
+ continue;
+ And_imm >>= TZ;
+ if (And_imm & (And_imm + 1))
+ continue;
+
+ // Look for (and (srl X, c1), c2).
+ SDValue Srl = N1.getOperand(0);
+ unsigned Srl_imm = 0;
+ if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+ (Srl_imm <= 2))
+ continue;
+
+ // Make sure first operand is not a shifter operand which would prevent
+ // folding of the left shift.
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (isThumb2) {
+ if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ continue;
+ } else {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+ SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+ continue;
+ }
+
+ // Now make the transformation.
+ Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl.getOperand(0),
+ CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+ N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ Srl, CurDAG->getConstant(And_imm, MVT::i32));
+ N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1, CurDAG->getConstant(TZ, MVT::i32));
+ CurDAG->UpdateNodeOperands(N, N0, N1);
+ }
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ !Subtarget->isSwift())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+ if (MCID.mayStore())
+ return true;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
+}
+
+bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS) return false;
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (RHS) return false;
+
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+ }
+
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
+ return false;
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ if (AddSub == ARM_AM::sub) Val *= -1;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(Val, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
+ Addr = N;
+
+ unsigned Alignment = 0;
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned LSNAlign = LSN->getAlignment();
+ unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+ if (LSNAlign >= MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+ SDValue &Base, SDValue &Offset){
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+ ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+ if (!NC || !NC->isNullValue())
+ return false;
+
+ Base = Offset = N;
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+ SDValue &Offset, unsigned Scale) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP))
+ return false;
+
+ // FIXME: Why do we explicitly check for a match here and then return false?
+ // Presumably to allow something else to match, but shouldn't this be
+ // documented?
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+ return false;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else {
+ Base = N;
+ }
+
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+ unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+ // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+ if (LHSC != 0 || RHSC != 0) return false;
+
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+ SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select t2LDRpci instead.
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
+ // Let t2LDRi8 handle (R - imm8).
+ return false;
+
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R - imm8 operands.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm){
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+ SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm) {
+ // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+ return false;
+ else if (RHSC < 0 && RHSC >= -255) // 8 bits
+ return false;
+ }
+
+ // Look for (R + R) or (R + (R << [1,2,3])).
+ unsigned ShAmt = 0;
+ Base = N.getOperand(0);
+ OffReg = N.getOperand(1);
+
+ // Swap if it is ((R << c) + R).
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
+ if (ShOpcVal != ARM_AM::lsl) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
+ if (ShOpcVal == ARM_AM::lsl)
+ std::swap(Base, OffReg);
+ }
+
+ if (ShOpcVal == ARM_AM::lsl) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+ return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 && isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_PRE_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 && !isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_POST_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
+ Match = true;
+
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_PRE_IMM;
+ } else if (!isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_POST_IMM;
+ } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
+ }
+ }
+ }
+
+ if (Match) {
+ if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 5);
+ } else {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 6);
+ }
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+ break;
+ case MVT::i16:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+ break;
+ case MVT::i8:
+ case MVT::i1:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+ break;
+ default:
+ return NULL;
+ }
+ Match = true;
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 5);
+ }
+
+ return NULL;
+}
+
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+ bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
+ case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
+ case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
+ case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
+ case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
+ case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
+ case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
+ case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+
+ case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
+ case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
+ case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
+ case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
+ case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
+ case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
+ case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
+ case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
+ case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
+ case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
+ case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
+ case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
+
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+ EVT AddrTy = MemAddr.getValueType();
+
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
+
+ // Load the odd subregs.
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
+ if (NumVecs == 1) {
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VSt =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ // Transfer memoperands.
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
+ cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ return VStB;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld/vst lane type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ // Quad-register operations:
+ case MVT::v8i16: OpcodeIndex = 0; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 1; break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(SuperReg);
+ Ops.push_back(getI32Imm(Lane));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
+ SDValue Inc = N->getOperand(2);
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+ bool isSigned) {
+ if (!Subtarget->hasV6T2Ops())
+ return NULL;
+
+ unsigned Opc = isSigned
+ ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return NULL;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = CountTrailingOnes_32(And_imm) - 1;
+ unsigned LSB = Srl_imm;
+
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+ // It's cheaper to use a right shift to extract the top bits.
+ if (Subtarget->isThumb()) {
+ Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ // ARM models shift instructions as MOVsi with shifter operand.
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+ SDValue ShOpc =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+ MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ }
+
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
+ unsigned Shl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+ unsigned Srl_imm = 0;
+ if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = 32 - Srl_imm - 1;
+ int LSB = Srl_imm - Shl_imm;
+ if (LSB < 0)
+ return NULL;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+ unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+ unsigned Opc = 0;
+ switch (SOShOp) {
+ case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+ case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+ case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+ case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+ default:
+ llvm_unreachable("Unknown so_reg opcode!");
+ }
+ SDValue SOShImm =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ Opc = ARM::t2MOVCCi;
+ } else if (TrueImm <= 0xffff) {
+ Opc = ARM::t2MOVCCi16;
+ } else if (is_t2_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::t2MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+ // Large immediate.
+ Opc = ARM::t2MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = is_so_imm(TrueImm);
+ if (isSoImm) {
+ Opc = ARM::MOVCCi;
+ } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+ Opc = ARM::MOVCCi16;
+ } else if (is_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() &&
+ (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+ // Large immediate.
+ Opc = ARM::MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ SDValue CCR = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(CC.getOpcode() == ISD::Constant);
+ assert(CCR.getOpcode() == ISD::Register);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also VMOVScc and VMOVDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Illegal conditional move type!");
+ case MVT::i32:
+ Opc = Subtarget->isThumb()
+ ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+ : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::VMOVScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VMOVDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ if (Subtarget->isThumb1Only())
+ return NULL;
+
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+ return NULL;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != NULL &&
+ Size == SRAConstant->getZExtValue()) {
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ Ops.push_back(Node->getOperand(2)); // Low part of Val1
+ Ops.push_back(Node->getOperand(3)); // High part of Val1
+ if (Opc == ARM::ATOMCMPXCHG6432) {
+ Ops.push_back(Node->getOperand(4)); // Low part of Val2
+ Ops.push_back(Node->getOperand(5)); // High part of Val2
+ }
+ Ops.push_back(Node->getOperand(0)); // Chain
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other,
+ Ops.data() ,Ops.size());
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->hasThumb2())
+ // Thumb2-aware targets have the MOVT instruction, so all immediates can
+ // be done with MOV + MOVT, at worst.
+ UseCP = 0;
+ else {
+ if (Subtarget->isThumb()) {
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ } else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ }
+
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb1Only()) {
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+ Ops, 4);
+ } else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 5);
+ }
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb1Only()) {
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
+ } else {
+ unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+ ARM::t2ADDri : ARM::ADDri);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SRL:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+ break;
+ case ISD::SRA:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+ return I;
+ break;
+ case ISD::MUL:
+ if (Subtarget->isThumb1Only())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ unsigned ShImm = Log2_32(RHSV-1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
+ }
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ unsigned ShImm = Log2_32(RHSV+1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
+ }
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
+ // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+ // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+ // are entirely contributed by c2 and lower 16-bits are entirely contributed
+ // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+ // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ break;
+ unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ? ARM::t2MOVTi16
+ : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+ if (!Opc)
+ break;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ break;
+ if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+ SDValue N2 = N0.getOperand(1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C)
+ break;
+ unsigned N1CVal = N1C->getZExtValue();
+ unsigned N2CVal = N2C->getZExtValue();
+ if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+ (N1CVal & 0xffffU) == 0xffffU &&
+ (N2CVal & 0xffffU) == 0x0U) {
+ SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+ MVT::i32);
+ SDValue Ops[] = { N0.getOperand(0), Imm16,
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ }
+ }
+ break;
+ }
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ARMISD::UMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMLAL : ARM::UMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
+ case ARMISD::SMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMLAL : ARM::SMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
+ case ISD::LOAD: {
+ SDNode *ResNode = 0;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ResNode = SelectT2IndexedLoad(N);
+ else
+ ResNode = SelectARMIndexedLoad(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ?
+ ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InFlag);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV:
+ return SelectCMOVOp(N);
+ case ARMISD::VZIP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4i16: Opc = ARM::VZIPd16; break;
+ case MVT::v2f32:
+ // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8i16: Opc = ARM::VZIPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VZIPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VUZP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4i16: Opc = ARM::VUZPd16; break;
+ case MVT::v2f32:
+ // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8i16: Opc = ARM::VUZPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VUZPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VTRN: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4i16: Opc = ARM::VTRNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8i16: Opc = ARM::VTRNq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VTRNq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
+
+ case ARMISD::VLD2DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
+ ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
+ ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
+ ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
+ ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed,
+ ARM::VLD1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
+ ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
+ ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
+ ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
+ ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
+ ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
+ ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed,
+ ARM::VST1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
+ ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD,
+ ARM::VST1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
+ ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
+ ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
+ ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
+ ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_ldrexd: {
+ SDValue MemAddr = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
+
+ // arm_ldrexd returns a i64 value in {i32, i32}
+ std::vector<EVT> ResTys;
+ if (isThumb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ } else
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
+
+ // Place arguments in the right order.
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
+ // Remap uses.
+ SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 0);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
+ ReplaceUses(SDValue(N, 0), Result);
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 1);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
+ ReplaceUses(SDValue(N, 1), Result);
+ }
+ ReplaceUses(SDValue(N, 2), OutChain);
+ return NULL;
+ }
+
+ case Intrinsic::arm_strexd: {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Val0 = N->getOperand(2);
+ SDValue Val1 = N->getOperand(3);
+ SDValue MemAddr = N->getOperand(4);
+
+ // Store exclusive double return a i32 value which is the return status
+ // of the issued store.
+ EVT ResTys[] = { MVT::i32, MVT::Other };
+
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ // Place arguments in the right order.
+ SmallVector<SDValue, 7> Ops;
+ if (isThumb) {
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ } else
+ // arm_strexd uses GPRPair.
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+
+ unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
+
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
+ }
+
+ case Intrinsic::arm_neon_vld1: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld2: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld3: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
+ ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld4: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
+ ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld2lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
+ ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
+ ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
+ ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
+ ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
+ ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
+ ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst2: {
+ static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
+ static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst3: {
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
+ ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo,
+ ARM::VST1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst4: {
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
+ ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo,
+ ARM::VST1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst2lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
+ ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
+ ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst3lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
+ ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
+ ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst4lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
+ ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
+ ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ }
+ break;
+ }
+
+ case ARMISD::VTBL1: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ }
+ case ARMISD::VTBL2: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
+ Ops.data(), Ops.size());
+ }
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
+
+ case ARMISD::ATOMOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMOR6432);
+ case ARMISD::ATOMXOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMXOR6432);
+ case ARMISD::ATOMADD64_DAG:
+ return SelectAtomic64(N, ARM::ATOMADD6432);
+ case ARMISD::ATOMSUB64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSUB6432);
+ case ARMISD::ATOMNAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMNAND6432);
+ case ARMISD::ATOMAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMAND6432);
+ case ARMISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSWAP6432);
+ case ARMISD::ATOMCMPXCHG64_DAG:
+ return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+
+ case ARMISD::ATOMMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMIN6432);
+ case ARMISD::ATOMUMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMIN6432);
+ case ARMISD::ATOMMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMAX6432);
+ case ARMISD::ATOMUMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMAX6432);
+ }
+
+ return SelectCode(N);
+}
+
+SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(
+ N->getOperand(InlineAsm::Op_AsmString));
+ StringRef AsmString = StringRef(S->getSymbol());
+
+ // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+ // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
+ // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
+ // respectively. Since there is no constraint to explicitly specify a
+ // reg pair, we search %H operand inside the asm string. If it is found, the
+ // transformation below enforces a GPRPair reg class for "%r" for 64-bit data.
+ if (AsmString.find(":H}") == StringRef::npos)
+ return NULL;
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Glue = N->getOperand(NumOps-1);
+
+ // Glue node will be appended late.
+ for(unsigned i = 0; i < NumOps -1; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag);
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2)
+ continue;
+
+ assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+ GU = T1.getNode();
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return NULL;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
+ AsmNodeOperands.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 000000000000..bb26090d2d8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,10445 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
+#include "ARM.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+namespace {
+ class ARMCCState : public CCState {
+ public:
+ ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C, ParmContext PC)
+ : CCState(CC, isVarArg, MF, TM, locs, C) {
+ assert(((PC == Call) || (PC == Prologue)) &&
+ "ARMCCState users must specify whether their context is call"
+ "or prologue generation.");
+ CallOrPrologue = PC;
+ }
+ };
+}
+
+// The APCS parameter registers.
+static const uint16_t GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
+
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
+ }
+
+ MVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ if (ElemTy == MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ }
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
+ }
+
+ // Neon does not support vector divide/remainder operations.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::DPRRegClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+ if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
+
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
+ setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
+ setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
+ setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
+ }
+
+ // Use divmod compiler-rt calls for iOS 5.0 and later.
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
+ if (Subtarget->isThumb1Only())
+ addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
+ else
+ addRegisterClass(MVT::i32, &ARM::GPRRegClass);
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ addRegisterClass(MVT::f32, &ARM::SPRRegClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+ // neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
+ setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
+ setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMA, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
+
+ // Mark v2f32 intrinsics.
+ setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+
+ // Neon does not support some operations on v1i64 and v2i64 types.
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source, and nor does
+ // it have a FP_TO_[SU]INT instruction with a narrower destination than
+ // source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
+ // NEON does not have single instruction CTPOP for vectors with element
+ // types wider than 8-bits. However, custom lowering can leverage the
+ // v8i8/v16i8 vcnt instruction.
+ setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+
+ // NEON only has FMA instructions as of VFP4.
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+ }
+
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16,
+ MVT::v2i32};
+ for (unsigned i = 0; i < 6; ++i) {
+ setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ }
+ }
+
+ // ARM and Thumb2 support UMLAL/SMLAL.
+ if (!Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::ADDC);
+
+
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ if (!Subtarget->isThumb1Only()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+ }
+
+ // i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+ || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ if (!Subtarget->isThumb1Only()) {
+ // FIXME: We should do this for Thumb1 as well.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ }
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ if (!Subtarget->isTargetDarwin()) {
+ // Non-Darwin platforms may return values in these registers via the
+ // personality function.
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+ }
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ // FIXME: This should be checking for v6k, not just v6.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ // Custom lowering for 64-bit ops
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
+ // Unordered/Monotonic case.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+ }
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ }
+
+ // Various VFP goodness
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (Subtarget->hasVFP2()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+ // Special handling for half-precision FP.
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ }
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
+
+ if (Subtarget->hasV6Ops())
+ setTargetDAGCombine(ISD::SRL);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ //// temporary - rewrite interface to use type
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ // Prefer likely predicted branches to selects on out-of-order cores.
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
+
+ setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurrence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(MVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = &ARM::DPRRegClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMN: return "ARMISD::CMN";
+ case ARMISD::CMPZ: return "ARMISD::CMPZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+
+ case ARMISD::RBIT: return "ARMISD::RBIT";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::ADDC: return "ARMISD::ADDC";
+ case ARMISD::ADDE: return "ARMISD::ADDE";
+ case ARMISD::SUBC: return "ARMISD::SUBC";
+ case ARMISD::SUBE: return "ARMISD::SUBE";
+
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+
+ case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+ case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
+ case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
+ case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
+ case ARMISD::VSHRN: return "ARMISD::VSHRN";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
+ case ARMISD::VDUP: return "ARMISD::VDUP";
+ case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
+ case ARMISD::VEXT: return "ARMISD::VEXT";
+ case ARMISD::VREV64: return "ARMISD::VREV64";
+ case ARMISD::VREV32: return "ARMISD::VREV32";
+ case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VZIP: return "ARMISD::VZIP";
+ case ARMISD::VUZP: return "ARMISD::VUZP";
+ case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMLAL: return "ARMISD::UMLAL";
+ case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ }
+}
+
+EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return getPointerTy();
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return &ARM::QQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::QQQQPRRegClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::ILP;
+ }
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+
+ if (MCID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+ return Sched::ILP;
+
+ return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::LS; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
+ // Use target triple & subtarget features to do actual dispatch.
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
+ case CallingConv::ARM_AAPCS_VFP:
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ // Fallthrough
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
+ }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallResult(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ true,
+ isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 or half of a v2f64.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const {
+
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ dl, DAG, NextVA,
+ Flags));
+ }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool doesNotRet = CLI.DoesNotReturn;
+ bool isVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Disable tail calls if they're not supported.
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ RegsToPassVector RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+ dl, DAG, VA, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ if (CCInfo.isFirstByValRegValid()) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned int i, j;
+ for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+ offset = ARM::R4 - CCInfo.getFirstByValReg();
+ CCInfo.clearFirstByValReg();
+ }
+
+ if (Flags.getByValSize() - 4*offset > 0) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
+ MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
+ Ops, array_lengthof(Ops)));
+ }
+ } else if (!IsSibCall) {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (EnableARMLongCalls) {
+ assert (getTargetMachine().getRelocationModel() == Reloc::Static
+ && "long-calls with non-static relocation model!");
+ // Handle a global address or an external symbol. If it's not one of
+ // those, the target's already in a register, so we don't need to do
+ // anything extra.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
+
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ if (Subtarget->isThumb()) {
+ if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ if (!isDirect && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ // Emit regular call when code size is the priority
+ !HasMinSizeAttr)
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+ARMTargetLowering::HandleByVal(
+ CCState *State, unsigned &size, unsigned Align) const {
+ unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+ if ((!State->isFirstByValRegValid()) &&
+ (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+ if (Subtarget->isAAPCS_ABI() && Align > 4) {
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ reg = State->AllocateReg(GPRArgRegs, 4);
+ }
+ if (reg != 0) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
+ }
+ }
+ // Confiscate any remaining parameter registers to preclude their
+ // assignment to subsequent parameters.
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If Caller's vararg or byval argument has been split between registers and
+ // stack, do not perform tail call, since part of the argument is in caller's
+ // local frame.
+ const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+ getInfo<ARMFunctionInfo>();
+ if (AFI_Caller->getVarArgsRegSaveSize())
+ return false;
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+ isVarArg));
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+ isVarArg));
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(1), Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+ SDNode *VMov = Copy;
+ // f64 returned in a pair of GPRs.
+ SmallPtrSet<SDNode*, 2> Copies;
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies.insert(*UI);
+ }
+ if (Copies.size() > 2)
+ return false;
+
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ SDValue UseChain = UI->getOperand(0);
+ if (Copies.count(UseChain.getNode()))
+ // Second CopyToReg
+ Copy = *UI;
+ else
+ // First CopyToReg
+ TCChain = UseChain;
+ }
+ } else if (Copy->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Copy->hasOneUse())
+ return false;
+ Copy = *Copy->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
+ return false;
+ Chain = Copy->getOperand(0);
+ } else {
+ return false;
+ }
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ } else {
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ if (RelocM == Reloc::Static)
+ return Result;
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = GA->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ TLSModel::Model model) const {
+ const GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (model == TLSModel::InitialExec) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ // Initial exec model.
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+ true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ } else {
+ // local exec model
+ assert(model == TLSModel::LocalExec);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic:
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModels(GA, DAG, model);
+ }
+ llvm_unreachable("bogus TLS model");
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV,
+ UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+ return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+
+ // FIXME: Enable this for static codegen when tool issues are fixed. Also
+ // update ARMFastISel::ARMMaterializeGV.
+ if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ if (RelocM == Reloc::Static)
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+ unsigned Wrapper = (RelocM == Reloc::PIC_)
+ ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+ return Result;
+ }
+
+ unsigned ARMPCLabelIndex = 0;
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ } else {
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer: {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ return Result;
+ }
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue Op5 = Op.getOperand(5);
+ bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+ unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+ ARM_MB::MemBOpt DMBOpt;
+ if (isDeviceBarrier)
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+ else
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // FIXME: handle "fence singlethread" more efficiently.
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(ARM_MB::ISH, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ isData = ~isData & 1;
+ }
+
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+ DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = &ARM::tGPRRegClass;
+ else
+ RC = &ARM::GPRRegClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize)
+ const {
+ unsigned NumGPRs;
+ if (CCInfo.isFirstByValRegValid())
+ NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+ else {
+ unsigned int firstUnalloced;
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+ sizeof(GPRArgRegs) /
+ sizeof(GPRArgRegs[0]));
+ NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+ }
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ VARegSize = NumGPRs * 4;
+ VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usually
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned firstRegToSaveIndex;
+ if (CCInfo.isFirstByValRegValid())
+ firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+ else {
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ }
+
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize
+ - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ const TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = &ARM::tGPRRegClass;
+ else
+ RC = &ARM::GPRRegClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ CCInfo.AnalyzeFormalArguments(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ SmallVector<SDValue, 16> ArgValues;
+ int lastInsIndex = -1;
+ SDValue ArgValue;
+ Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+ } else {
+ const TargetRegisterClass *RC;
+
+ if (RegVT == MVT::f32)
+ RC = &ARM::SPRRegClass;
+ else if (RegVT == MVT::f64)
+ RC = &ARM::DPRRegClass;
+ else if (RegVT == MVT::v2f64)
+ RC = &ARM::QPRRegClass;
+ else if (RegVT == MVT::i32)
+ RC = AFI->isThumb1OnlyFunction() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ int index = ArgLocs[i].getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->getVarArgsFrameIndex()) {
+ VarArgStyleRegisters(CCInfo, DAG,
+ dl, Chain, CurOrigArg,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ int VAFrameIndex = AFI->getVarArgsFrameIndex();
+ InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ }
+ } else {
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0));
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+
+ // varargs
+ if (isVarArg)
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ CCInfo.getNextStackOffset());
+
+ return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ // Uses only Z Flag
+ CompareType = ARMISD::CMPZ;
+ break;
+ }
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ DebugLoc DL = Cmp.getDebugLoc();
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Op.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
+ // undefined bits before doing a full-word comparison with zero.
+ Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMcc, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool LHSSeenZero = false;
+ bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
+ bool RHSSeenZero = false;
+ bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
+ if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(LHS, DAG), Mask);
+ RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(RHS, DAG), Mask);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
+ RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (getTargetMachine().Options.UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ if (Subtarget->isThumb2()) {
+ // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ // which does another jump to the destination. This also makes it easier
+ // to translate it to TBB / TBH later.
+ // FIXME: This might not work if the function is extremely large.
+ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+ Addr, Op.getOperand(2), JTI, UId);
+ }
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(),
+ false, false, false, 0);
+ Chain = Addr.getValue(1);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ } else {
+ Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(),
+ false, false, false, 0);
+ Chain = Addr.getValue(1);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ }
+}
+
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getValueType().getVectorElementType() == MVT::i32) {
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4i16)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorFP_TO_INT(Op, DAG);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ISD::FP_TO_SINT:
+ Opc = ARMISD::FTOSI;
+ break;
+ case ISD::FP_TO_UINT:
+ Opc = ARMISD::FTOUI;
+ break;
+ }
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+ if (VT.getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ Opc = ARMISD::SITOF;
+ break;
+ case ISD::UINT_TO_FP:
+ Opc = ARMISD::UITOF;
+ break;
+ }
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Tmp1.getValueType();
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (VT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+
+ // This function is only supposed to be called for i64 types, either as the
+ // source or destination of the bit convert.
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+ "ExpandBITCAST called for non-i64 type");
+
+ // Turn i64->f64 into VMOVDRR.
+ if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+ }
+
+ // Turn f64->i64 into VMOVRRD.
+ if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+
+ return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated. The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+/// [b0 b1 b2 b3 b4 b5 b6 b7]
+/// +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+ SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+ return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand. We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
+/// v4i16:Extracted = [k0 k1 k2 k3 ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+ BitCounts, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+ }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand. The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input = [v0 v1 ] (vi: 32-bit elements)
+/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ]
+/// [k0 k1 k2 k3 ]
+/// N1 =+[k1 k0 k3 k2 ]
+/// [k0 k2 k1 k3 ]
+/// N2 =+[k1 k3 k0 k2 ]
+/// [k0 k2 k1 k3 ]
+/// Extended =+[k1 k3 k0 k2 ]
+/// [k0 k2 ]
+/// Extracted=+[k1 k3 ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+ SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+ SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+ SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+ VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ if (VT.getVectorElementType() == MVT::i32)
+ return lowerCTPOP32BitElements(N, DAG);
+ else
+ return lowerCTPOP16BitElements(N, DAG);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Lower vector shifts on NEON to use VSHL.
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // We can get here for a node like i32 = ISD::SHL i32, i64
+ if (VT != MVT::i64)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb1Only()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue TmpOp0, TmpOp1;
+ bool Invert = false;
+ bool Swap = false;
+ unsigned Opc = 0;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal FP comparison");
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETOLT:
+ case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETOLE:
+ case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OLT | OGT).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ break;
+ case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OLT | OGE).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ break;
+ }
+ } else {
+ // Integer comparisons.
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal integer comparison");
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETLE: Swap = true;
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ }
+
+ // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+ if (Opc == ARMISD::VCEQ) {
+
+ SDValue AndOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ AndOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+ AndOp = Op1;
+
+ // Ignore bitconvert.
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+ AndOp = AndOp.getOperand(0);
+
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+ Opc = ARMISD::VTST;
+ Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Invert = !Invert;
+ }
+ }
+ }
+
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ EVT &VT, bool is128Bits, NEONModImmType type) {
+ unsigned OpCmode, Imm;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ switch (SplatBitSize) {
+ case 8:
+ if (type != VMOVModImm)
+ return SDValue();
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+
+ case 16:
+ // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
+
+ case 32:
+ // NEON's 32-bit VMOV supports splat values where:
+ // * only one byte is nonzero, or
+ // * the least significant byte is 0xff and the second byte is nonzero, or
+ // * the least significant 2 bytes are 0xff and the third is nonzero.
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
+
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
+
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
+
+ // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+ // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+ // VMOV.I32. A (very) minor optimization would be to replicate the value
+ // and fall through here to test for a valid 64-bit splat. But, then the
+ // caller would also need to check and handle the change in size.
+ return SDValue();
+
+ case 64: {
+ if (type != VMOVModImm)
+ return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return SDValue();
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+
+ default:
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ }
+
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ // Try splatting with a VMOV.f32...
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal != -1) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+ NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // If that fails, try a VMOV.i32
+ EVT VMovVT;
+ unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
+ SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
+ VMOVModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+ NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // Finally, try a VMVN.i32
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
+ VMVNModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+// check if an VEXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are the same.
+static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ return true;
+}
+
+
+static bool isVEXTMask(ArrayRef<int> M, EVT VT,
+ bool &ReverseVEXT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, it may still be
+ // a VEXT but the source vectors must be swapped.
+ ExpectedElt += 1;
+ if (ExpectedElt == NumElts * 2) {
+ ExpectedElt = 0;
+ ReverseVEXT = true;
+ }
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ // Adjust the index value if the source operands will be swapped.
+ if (ReverseVEXT)
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+ assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+ "Only possible block sizes for VREV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
+static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != 2 * i + WhichResult)
+ return false;
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // Check if an immediate VMOV works.
+ EVT VmovVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
+ if (ImmVal != -1) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+ return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+ }
+ }
+ }
+ }
+
+ // Scan through the operands to see if only one value is used.
+ //
+ // As an optimisation, even if more than one value is used it may be more
+ // profitable to splat with one value then change some lanes.
+ //
+ // Heuristically we decide to do this if the vector has a "dominant" value,
+ // defined as splatted to more than half of the lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
+ bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
+ Value = V;
+ }
+ }
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
+
+ if (ValueCounts.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (hasDominantValue && EltSize <= 32) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are VDUPing a value that comes directly from a vector, that will
+ // cause an unnecessary move to and from a GPR, where instead we could
+ // just use VDUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the VDUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
+ // We need to create a new undef vector to use for the VDUPLANE if the
+ // size of the vector from which we get the value is different than the
+ // size of the vector that we need to create. We will insert the element
+ // such that the register coalescer will remove unnecessary copies.
+ if (VT != Value->getOperand(0).getValueType()) {
+ ConstantSDNode *constIndex;
+ constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+ assert(constIndex && "The index is not a constant!");
+ unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+ VT.getVectorNumElements();
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+ Value, DAG.getConstant(index, MVT::i32)),
+ DAG.getConstant(index, MVT::i32));
+ } else
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ } else
+ N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ if (usesOnlyOneValue) {
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (isConstant && Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ } else if (V.getOperand(0).getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // This code doesn't know how to handle shuffles where the vector
+ // element types do not match (this happens because type legalization
+ // promotes the return type of EXTRACT_VECTOR_ELT).
+ // FIXME: It might be appropriate to extend this code to handle
+ // mismatched types.
+ return SDValue();
+ }
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ // If the element number isn't a constant, we can't effectively
+ // analyze what's going on.
+ if (!isa<ConstantSDNode>(V.getOperand(1)))
+ return SDValue();
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = {0, 0};
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // It probably isn't worth padding out a smaller vector just to
+ // break it down again in a shuffle.
+ return SDValue();
+ }
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(VEXTOffsets[i], MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+ .getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ }
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm, WhichResult;
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16) ||
+ isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
+ isVTRNMask(M, VT, WhichResult) ||
+ isVUZPMask(M, VT, WhichResult) ||
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> VREV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> VREV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3:
+ return DAG.getNode(ARMISD::VEXT, dl, VT,
+ OpLHS, OpRHS,
+ DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+ case OP_VUZPL:
+ case OP_VUZPR:
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+ case OP_VZIPL:
+ case OP_VZIPR:
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+ case OP_VTRNL:
+ case OP_VTRNR:
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+ }
+}
+
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ ArrayRef<int> ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (ArrayRef<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, MVT::i32));
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ // FIXME: floating-point vectors should be canonicalized to integer vectors
+ // of the same time so that they get CSEd properly.
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ if (V2->getOpcode() == ISD::UNDEF &&
+ isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
+ }
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // INSERT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ return Op;
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getUNDEF(MVT::v2f64);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+ DAG.getIntPtrConstant(0));
+ if (Op1.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ if (!isIntN(HalfSize, C->getSExtValue()))
+ return false;
+ } else {
+ if (!isUIntN(HalfSize, C->getZExtValue()))
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
+ EVT NewVT;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Orig Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ NewVT = MVT::v2i32;
+ break;
+ case MVT::v4i8:
+ NewVT = MVT::v4i16;
+ break;
+ }
+ return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+ SDValue NonExtendingLoad =
+ DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+ unsigned ExtOp = 0;
+ switch (LD->getExtensionType()) {
+ default: llvm_unreachable("Unexpected LoadExtType");
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
+ }
+ MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
+ MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
+ return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
+ MemType, ExtType, ExtOp);
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return SkipLoadExtensionForVMULL(LD, DAG);
+
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = ARMISD::VMULLs;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0;
+ SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
+ if (!isMLA) {
+ Op0 = SkipExtensionForVMULL(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(0x89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 2);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid code");
+ case ISD::ADDC: Opc = ARMISD::ADDC; break;
+ case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
+ case ISD::SUBC: Opc = ARMISD::SUBC; break;
+ case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
+ // Monotonic load/store is legal for all targets
+ if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
+ return Op;
+
+ // Aquire/Release load/store is not legal for targets without a
+ // dmb or equivalent available.
+ return SDValue();
+}
+
+
+static void
+ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(0)); // Chain
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ // Low part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0)));
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1)));
+ if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(0)));
+ // High part of Val2
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ }
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
+ case ISD::SRL:
+ case ISD::SRA:
+ Res = Expand64BitShift(N, DAG, Subtarget);
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
+ return;
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_MIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_MAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ return;
+ }
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
+
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+
+ unsigned ldrOpc, strOpc, extendOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ extendOpc = 0;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // (sign extend dest, if required)
+ // cmp dest, incr
+ // cmov.cond scratch2, incr, dest
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ // Sign extend the value, if necessary.
+ if (signExtend && extendOpc) {
+ oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
+ .addReg(dest)
+ .addImm(0));
+ }
+
+ // Build compare and cmov instructions.
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(oldval).addReg(incr));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+ .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
+
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Op1, unsigned Op2,
+ bool NeedsCarry, bool IsCmpxchg,
+ bool IsMinMax, ARMCC::CondCodes CC) const {
+ // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ unsigned vallo = MI->getOperand(3).getReg();
+ unsigned valhi = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *contBB = 0, *cont2BB = 0;
+ if (IsCmpxchg || IsMinMax)
+ contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ if (IsCmpxchg)
+ cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MF->insert(It, loopMBB);
+ if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
+ if (IsCmpxchg) MF->insert(It, cont2BB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned storesuccess = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrexd r2, r3, ptr
+ // <binopa> r0, r2, incr
+ // <binopb> r1, r3, incr
+ // strexd storesuccess, r0, r1, ptr
+ // cmp storesuccess, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+
+ unsigned StoreLo, StoreHi;
+ if (IsCmpxchg) {
+ // Add early exit
+ for (unsigned i = 0; i < 2; i++) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
+ ARM::CMPrr))
+ .addReg(i == 0 ? destlo : desthi)
+ .addReg(i == 0 ? vallo : valhi));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(i == 0 ? contBB : cont2BB);
+ BB = (i == 0 ? contBB : cont2BB);
+ }
+
+ // Copy to physregs for strexd
+ StoreLo = MI->getOperand(5).getReg();
+ StoreHi = MI->getOperand(6).getReg();
+ } else if (Op1) {
+ // Perform binary operation
+ unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
+ .addReg(destlo).addReg(vallo))
+ .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
+ unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
+ .addReg(desthi).addReg(valhi))
+ .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
+
+ StoreLo = tmpRegLo;
+ StoreHi = tmpRegHi;
+ } else {
+ // Copy to physregs for strexd
+ StoreLo = vallo;
+ StoreHi = valhi;
+ }
+ if (IsMinMax) {
+ // Compare and branch to exit block.
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(contBB);
+ BB = contBB;
+ StoreLo = vallo;
+ StoreHi = valhi;
+ }
+
+ // Store
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
+ } else {
+ // Marshal a pair...
+ unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(StoreLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
+ .addReg(r1)
+ .addReg(StoreHi)
+ .addImm(ARM::gsub_1);
+
+ // ...and store it
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ .addReg(StorePair).addReg(ptr));
+ }
+ // Cmp+jump
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(storesuccess).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ const Function *F = MF->getFunction();
+
+ bool isThumb = Subtarget->isThumb();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+ const TargetRegisterClass *TRC = isThumb ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+
+ // Grab constant pool and fixed stack memory operands.
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ MachineMemOperand *FIMMOSt =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore, 4, 4);
+
+ // Load the address of the dispatch MBB into the jump buffer.
+ if (isThumb2) {
+ // Incoming value: jbuf
+ // ldr.n r5, LCPI1_1
+ // orr r5, r5, #1
+ // add r5, pc
+ // str r5, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addImm(PCLabelId);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ } else if (isThumb) {
+ // Incoming value: jbuf
+ // ldr.n r1, LCPI1_4
+ // add r1, pc
+ // mov r2, #1
+ // orrs r1, r2
+ // add r2, $jbuf, #+4 ; &jbuf[1]
+ // str r1, [r2]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId);
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill));
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36)); // &jbuf[1] :: pc
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt));
+ } else {
+ // Incoming value: jbuf
+ // ldr r1, LCPI1_1
+ // add r1, pc, r1
+ // str r1, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId));
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ }
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FI = MFI->getFunctionContextIndex();
+
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ MachineModuleInfo &MMI = MF->getMMI();
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+ ++BB) {
+ if (!BB->isLandingPad()) continue;
+
+ // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+ // pad.
+ for (MachineBasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (!II->isEHLabel()) continue;
+
+ MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ for (SmallVectorImpl<unsigned>::iterator
+ CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+ CSI != CSE; ++CSI) {
+ CallSiteNumToLPad[*CSI].push_back(BB);
+ MaxCSNum = std::max(MaxCSNum, *CSI);
+ }
+ break;
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock*> LPadList;
+ SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+ for (unsigned I = 1; I <= MaxCSNum; ++I) {
+ SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+ LPadList.push_back(*II);
+ InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // Create the jump table and associated information.
+ MachineJumpTableInfo *JTI =
+ MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+ unsigned UId = AFI->createJumpTableUId();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+
+ // Create the MBBs for the dispatch code.
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsLandingPad();
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ unsigned trap_opcode;
+ if (Subtarget->isThumb())
+ trap_opcode = ARM::tTRAP;
+ else
+ trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
+ BuildMI(TrapBB, dl, TII->get(trap_opcode));
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert and MBBs.
+ MF->insert(MF->end(), DispatchBB);
+ MF->insert(MF->end(), DispContBB);
+ MF->insert(MF->end(), TrapBB);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+ MachineMemOperand *FIMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile, 4, 4);
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ MIB.addRegMask(RI.getNoPreservedMask());
+
+ unsigned NumLPads = LPadList.size();
+ if (Subtarget->isThumb2()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ } else {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg1)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else if (Subtarget->isThumb()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ unsigned NewVReg6 = NewVReg5;
+ if (RelocM == Reloc::PIC_) {
+ NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+ }
+
+ BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+ .addReg(NewVReg6, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ if (RelocM == Reloc::PIC_) {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
+ .addReg(NewVReg5, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
+ for (std::vector<MachineBasicBlock*>::iterator
+ I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+ MachineBasicBlock *CurMBB = *I;
+ if (SeenMBBs.insert(CurMBB))
+ DispContBB->addSuccessor(CurMBB);
+ }
+
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
+ const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ SmallVector<MachineBasicBlock*, 64> MBBLPads;
+ for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
+ I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ while (!Successors.empty()) {
+ MachineBasicBlock *SMBB = Successors.pop_back_val();
+ if (SMBB->isLandingPad()) {
+ BB->removeSuccessor(SMBB);
+ MBBLPads.push_back(SMBB);
+ }
+ }
+
+ BB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (MachineBasicBlock::reverse_iterator
+ II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+ if (!II->isCall()) continue;
+
+ DenseMap<unsigned, bool> DefRegs;
+ for (MachineInstr::mop_iterator
+ OI = II->operands_begin(), OE = II->operands_end();
+ OI != OE; ++OI) {
+ if (!OI->isReg()) continue;
+ DefRegs[OI->getReg()] = true;
+ }
+
+ MachineInstrBuilder MIB(*MF, &*II);
+
+ for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Subtarget->isThumb2() &&
+ !ARM::tGPRRegClass.contains(Reg) &&
+ !ARM::hGPRRegClass.contains(Reg))
+ continue;
+ if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
+ continue;
+ if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
+ continue;
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+ (*I)->setIsLandingPad(false);
+
+ // The instruction is gone now.
+ MI->eraseFromParent();
+
+ return MBB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+ // This pseudo instruction has 3 operands: dst, src, size
+ // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+ // Otherwise, we will generate unrolled scalar copies.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned src = MI->getOperand(1).getReg();
+ unsigned SizeVal = MI->getOperand(2).getImm();
+ unsigned Align = MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned ldrOpc, strOpc, UnitSize = 0;
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC_Vec = 0;
+
+ if (Align & 1) {
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ UnitSize = 1;
+ } else if (Align & 2) {
+ ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
+ strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
+ UnitSize = 2;
+ } else {
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16) {
+ ldrOpc = ARM::VLD1q32wb_fixed;
+ strOpc = ARM::VST1q32wb_fixed;
+ UnitSize = 16;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
+ }
+ else if ((Align % 8 == 0) && SizeVal >= 8) {
+ ldrOpc = ARM::VLD1d32wb_fixed;
+ strOpc = ARM::VST1d32wb_fixed;
+ UnitSize = 8;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
+ }
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0) {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
+ }
+
+ unsigned BytesLeft = SizeVal % UnitSize;
+ unsigned LoopSize = SizeVal - BytesLeft;
+
+ if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+ // Use LDR and STR to copy.
+ // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+ // [destOut] = STR_POST(scratch, destIn, UnitSize)
+ unsigned srcIn = src;
+ unsigned destIn = dest;
+ for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(destIn).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(UnitSize));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ // Handle the leftover bytes with LDRB and STRB.
+ // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+ // [destOut] = STRB_POST(scratch, destIn, 1)
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn)
+ .addReg(0).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+ }
+
+ // Expand the pseudo op to a loop.
+ // thisMBB:
+ // ...
+ // movw varEnd, # --> with thumb2
+ // movt varEnd, #
+ // ldrcp varEnd, idx --> without thumb2
+ // fallthrough --> loopMBB
+ // loopMBB:
+ // PHI varPhi, varEnd, varLoop
+ // PHI srcPhi, src, srcLoop
+ // PHI destPhi, dst, destLoop
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+ // subs varLoop, varPhi, #UnitSize
+ // bne loopMBB
+ // fallthrough --> exitMBB
+ // exitMBB:
+ // epilogue to handle left-over bytes
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Load an immediate to varEnd.
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ unsigned VReg1 = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ VReg1 = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+ .addReg(VReg1)
+ .addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ }
+ BB->addSuccessor(loopMBB);
+
+ // Generate the loop body:
+ // varPhi = PHI(varLoop, varEnd)
+ // srcPhi = PHI(srcLoop, src)
+ // destPhi = PHI(destLoop, dst)
+ MachineBasicBlock *entryBB = BB;
+ BB = loopMBB;
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+ .addReg(varLoop).addMBB(loopMBB)
+ .addReg(varEnd).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+ .addReg(srcLoop).addMBB(loopMBB)
+ .addReg(src).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+ .addReg(destLoop).addMBB(loopMBB)
+ .addReg(dest).addMBB(entryBB);
+
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(destPhi).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addReg(0).addImm(UnitSize));
+ }
+
+ // Decrement loop variable by UnitSize.
+ MachineInstrBuilder MIB = BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ // loopMBB can loop back to loopMBB or fall through to exitMBB.
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // Add epilogue to handle BytesLeft.
+ BB = exitMBB;
+ MachineInstr *StartOfExit = exitMBB->begin();
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ unsigned srcIn = srcLoop;
+ unsigned destIn = destLoop;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ switch (MI->getOpcode()) {
+ default: {
+ MI->dump();
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+ // The Thumb2 pre-indexed stores have the same MI operands, they just
+ // define them differently in the .td files from the isel patterns, so
+ // they need pseudos.
+ case ARM::t2STR_preidx:
+ MI->setDesc(TII->get(ARM::t2STR_PRE));
+ return BB;
+ case ARM::t2STRB_preidx:
+ MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ return BB;
+ case ARM::t2STRH_preidx:
+ MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ return BB;
+
+ case ARM::STRi_preidx:
+ case ARM::STRBi_preidx: {
+ unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
+ ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ // Decode the offset.
+ unsigned Offset = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
+ Offset = ARM_AM::getAM2Offset(Offset);
+ if (isSub)
+ Offset = -Offset;
+
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ BuildMI(*BB, MI, dl, TII->get(NewOpc))
+ .addOperand(MI->getOperand(0)) // Rn_wb
+ .addOperand(MI->getOperand(1)) // Rt
+ .addOperand(MI->getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI->getOperand(5)) // pred
+ .addOperand(MI->getOperand(6))
+ .addMemOperand(MMO);
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::STRr_preidx:
+ case ARM::STRBr_preidx:
+ case ARM::STRH_preidx: {
+ unsigned NewOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
+ case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
+ case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_LOAD_MIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+ case ARM::ATOMIC_LOAD_MAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+ case ARM::ATOMIC_LOAD_UMIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+ case ARM::ATOMIC_LOAD_UMAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+
+ case ARM::ATOMADD6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
+ isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMSUB6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
+ isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMXOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
+ isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMAND6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
+ isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMSWAP6432:
+ return EmitAtomicBinary64(MI, BB, 0, 0, false);
+ case ARM::ATOMCMPXCHG6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+ case ARM::ATOMMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LT);
+ case ARM::ATOMMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::GE);
+ case ARM::ATOMUMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LO);
+ case ARM::ATOMUMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::HS);
+
+ case ARM::tMOVCCr_pseudo: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ if (isThumb2)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ else
+ BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ EmitSjLjDispatchBlock(MI, BB);
+ return BB;
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = BB;
+ ++BBI;
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(ABSSrcReg, RegState::Kill)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(ABSSrcReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
+ case ARM::COPY_STRUCT_BYVAL_I32:
+ ++NumLoopByVals;
+ return EmitStructByval(MI, BB);
+ }
+}
+
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ if (!MI->hasPostISelHook()) {
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
+ return;
+ }
+
+ const MCInstrDesc *MCID = &MI->getDesc();
+ // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
+ // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
+ // operand is still set to noreg. If needed, set the optional operand's
+ // register to CPSR, and remove the redundant implicit def.
+ //
+ // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+
+ // Rename pseudo opcodes.
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ if (NewOpc) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ MCID = &TII->get(NewOpc);
+
+ assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ "converted opcode should be the same except for cc_out");
+
+ MI->setDesc(*MCID);
+
+ // Add the optional cc_out operand
+ MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
+ }
+ unsigned ccOutIdx = MCID->getNumOperands() - 1;
+
+ // Any ARM instruction that sets the 's' bit should specify an optional
+ // "cc_out" operand in the last operand position.
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
+ // since we already have an optional CPSR def.
+ bool definesCPSR = false;
+ bool deadCPSR = false;
+ for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
+ definesCPSR = true;
+ if (MO.isDead())
+ deadCPSR = true;
+ MI->RemoveOperand(i);
+ break;
+ }
+ }
+ if (!definesCPSR) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
+ if (deadCPSR) {
+ assert(!MI->getOperand(ccOutIdx).getReg() &&
+ "expect uninitialized optional cc_out operand");
+ return;
+ }
+
+ // If this instruction was defined with an optional CPSR def and its dag node
+ // had a live implicit CPSR def, then activate the optional CPSR def.
+ MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MO.setReg(ARM::CPSR);
+ MO.setIsDef(true);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (!C)
+ return false;
+ return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+// (select cc 0, y) [AllOnes=0]
+// (select cc y, 0) [AllOnes=0]
+// (zext cc) [AllOnes=0]
+// (sext cc) [AllOnes=0/1]
+// (select cc -1, y) [AllOnes=1]
+// (select cc y, -1) [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+ SDValue &CC, bool &Invert,
+ SDValue &OtherOp,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: return false;
+ case ISD::SELECT: {
+ CC = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ if (isZeroOrAllOnes(N1, AllOnes)) {
+ Invert = false;
+ OtherOp = N2;
+ return true;
+ }
+ if (isZeroOrAllOnes(N2, AllOnes)) {
+ Invert = true;
+ OtherOp = N1;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ // (zext cc) can never be the all ones value.
+ if (AllOnes)
+ return false;
+ // Fall through.
+ case ISD::SIGN_EXTEND: {
+ EVT VT = N->getValueType(0);
+ CC = N->getOperand(0);
+ if (CC.getValueType() != MVT::i1)
+ return false;
+ Invert = !AllOnes;
+ if (AllOnes)
+ // When looking for an AllOnes constant, N is an sext, and the 'other'
+ // value is 0.
+ OtherOp = DAG.getConstant(0, VT);
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ // When looking for a 0 constant, N can be zext or sext.
+ OtherOp = DAG.getConstant(1, VT);
+ else
+ OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ return true;
+ }
+ }
+}
+
+// Combine a constant select operand into its use:
+//
+// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
+// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+// (add (zext cc), x) -> (select cc (add x, 1), x)
+// (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
+//
+// @param N The node to transform.
+// @param Slct The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
+// @returns The new node, or SDValue() on failure.
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool AllOnes = false) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ SDValue NonConstantVal;
+ SDValue CCOp;
+ bool SwapSelectOps;
+ if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+ NonConstantVal, DAG))
+ return SDValue();
+
+ // Slct is now know to be the desired identity constant when CC is true.
+ SDValue TrueVal = OtherOp;
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ OtherOp, NonConstantVal);
+ // Unless SwapSelectOps says CC should be false.
+ if (SwapSelectOps)
+ std::swap(TrueVal, FalseVal);
+
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, TrueVal, FalseVal);
+}
+
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ return SDValue();
+}
+
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+ TLI.getPointerTy()));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ llvm_unreachable("Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ widenType, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
+static SDValue findMUL_LOHI(SDValue V) {
+ if (V->getOpcode() == ISD::UMUL_LOHI ||
+ V->getOpcode() == ISD::SMUL_LOHI)
+ return V;
+ return SDValue();
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+
+ // Look for multiply add opportunities.
+ // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
+ // each add nodes consumes a value from ISD::UMUL_LOHI and there is
+ // a glue link from the first add to the second add.
+ // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
+ // a S/UMLAL instruction.
+ // loAdd UMUL_LOHI
+ // \ / :lo \ :hi
+ // \ / \ [no multiline comment]
+ // ADDC | hiAdd
+ // \ :glue / /
+ // \ / /
+ // ADDE
+ //
+ assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ SDValue AddcOp0 = AddcNode->getOperand(0);
+ SDValue AddcOp1 = AddcNode->getOperand(1);
+
+ // Check if the two operands are from the same mul_lohi node.
+ if (AddcOp0.getNode() == AddcOp1.getNode())
+ return SDValue();
+
+ assert(AddcNode->getNumValues() == 2 &&
+ AddcNode->getValueType(0) == MVT::i32 &&
+ AddcNode->getValueType(1) == MVT::Glue &&
+ "Expect ADDC with two result values: i32, glue");
+
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (AddeNode == NULL)
+ return SDValue();
+
+ // Make sure it is really an ADDE.
+ if (AddeNode->getOpcode() != ISD::ADDE)
+ return SDValue();
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::Glue &&
+ "ADDE node has the wrong inputs");
+
+ // Check for the triangle shape.
+ SDValue AddeOp0 = AddeNode->getOperand(0);
+ SDValue AddeOp1 = AddeNode->getOperand(1);
+
+ // Make sure that the ADDE operands are not coming from the same node.
+ if (AddeOp0.getNode() == AddeOp1.getNode())
+ return SDValue();
+
+ // Find the MUL_LOHI node walking up ADDE's operands.
+ bool IsLeftOperandMUL = false;
+ SDValue MULOp = findMUL_LOHI(AddeOp0);
+ if (MULOp == SDValue())
+ MULOp = findMUL_LOHI(AddeOp1);
+ else
+ IsLeftOperandMUL = true;
+ if (MULOp == SDValue())
+ return SDValue();
+
+ // Figure out the right opcode.
+ unsigned Opc = MULOp->getOpcode();
+ unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
+
+ // Figure out the high and low input values to the MLAL node.
+ SDValue* HiMul = &MULOp;
+ SDValue* HiAdd = NULL;
+ SDValue* LoMul = NULL;
+ SDValue* LowAdd = NULL;
+
+ if (IsLeftOperandMUL)
+ HiAdd = &AddeOp1;
+ else
+ HiAdd = &AddeOp0;
+
+
+ if (AddcOp0->getOpcode() == Opc) {
+ LoMul = &AddcOp0;
+ LowAdd = &AddcOp1;
+ }
+ if (AddcOp1->getOpcode() == Opc) {
+ LoMul = &AddcOp1;
+ LowAdd = &AddcOp0;
+ }
+
+ if (LoMul == NULL)
+ return SDValue();
+
+ if (LoMul->getNode() != HiMul->getNode())
+ return SDValue();
+
+ // Create the merged node.
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(LoMul->getOperand(0));
+ Ops.push_back(LoMul->getOperand(1));
+ Ops.push_back(*LowAdd);
+ Ops.push_back(*HiAdd);
+
+ SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ &Ops[0], Ops.size());
+
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(MLALNode.getNode(), 1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ SDValue LoMLALResult(MLALNode.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+/// PerformADDCCombine - Target-specific dag combine transform from
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+static SDValue PerformADDCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N1.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ int64_t MulAmt = C->getSExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs-1),
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, MVT::i32),Res);
+
+ } else
+ return SDValue();
+ }
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && mask == ~mask2
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && ~mask == mask2
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
+ return SDValue();
+ unsigned Mask = MaskC->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return SDValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned amt = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(amt, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ }
+
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformXORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(~InvMask);
+ unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned Mask = (1 << Width)-1;
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & (~Mask2)) == 0)
+ return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ }
+ return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ DebugLoc DL = St->getDebugLoc();
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = St->getDebugLoc();
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(0), BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() != MVT::i64 ||
+ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = StVal.getDebugLoc();
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = N->getDebugLoc();
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI){
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ DebugLoc dl = N->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ Ops, 2, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op = N->getOperand(0);
+
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ EVT VT = N->getValueType(0);
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+ integerPart cN;
+ integerPart c0 = 0;
+ for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+ I != E; I++) {
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+ if (!C)
+ return false;
+
+ bool isExact;
+ APFloat APF = C->getValueAPF();
+ if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+ != APFloat::opOK || !isExact)
+ return false;
+
+ c0 = (I == 0) ? cN : c0;
+ if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+ return false;
+ }
+ C = c0;
+ return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vmul.f32 d16, d17, d16
+/// vcvt.s32.f32 d16, d16
+/// becomes:
+/// vcvt.s32.f32 d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+
+ if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+ Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ uint64_t C;
+ SDValue N0 = Op->getOperand(0);
+ SDValue ConstVec = Op->getOperand(1);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+ Intrinsic::arm_neon_vcvtfp2fxu;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ N->getValueType(0),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vcvt.f32.s32 d16, d16
+/// vdiv.f32 d16, d17, d16
+/// becomes:
+/// vcvt.f32.s32 d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OpOpcode = Op.getNode()->getOpcode();
+
+ if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+ return SDValue();
+
+ uint64_t C;
+ SDValue ConstVec = N->getOperand(1);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+ Intrinsic::arm_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (isIntrinsic)
+ Cnt = -Cnt;
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ // Vector shifts: check for immediate versions and lower them.
+ // Note: This is done during DAG combining instead of DAG legalizing because
+ // the build_vectors for 64-bit vector element shift counts are generally
+ // not legal, and it is hard to see their values after they get legalized to
+ // loads from a constant pool.
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ case Intrinsic::arm_neon_vqshiftsu:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+ VShiftOpc = ARMISD::VSHL;
+ break;
+ }
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ break;
+ }
+ return SDValue();
+
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vshll intrinsic");
+
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshiftsu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ // Narrowing shifts require an immediate right shift.
+ if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
+
+ default:
+ llvm_unreachable("unhandled vector shift");
+ }
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ // Opcode already set above.
+ break;
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (Cnt == VT.getVectorElementType().getSizeInBits())
+ VShiftOpc = ARMISD::VSHLLi;
+ else
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+ ARMISD::VSHLLs : ARMISD::VSHLLu);
+ break;
+ case Intrinsic::arm_neon_vshiftn:
+ VShiftOpc = ARMISD::VSHRN; break;
+ case Intrinsic::arm_neon_vrshifts:
+ VShiftOpc = ARMISD::VRSHRs; break;
+ case Intrinsic::arm_neon_vrshiftu:
+ VShiftOpc = ARMISD::VRSHRu; break;
+ case Intrinsic::arm_neon_vrshiftn:
+ VShiftOpc = ARMISD::VRSHRN; break;
+ case Intrinsic::arm_neon_vqshifts:
+ VShiftOpc = ARMISD::VQSHLs; break;
+ case Intrinsic::arm_neon_vqshiftu:
+ VShiftOpc = ARMISD::VQSHLu; break;
+ case Intrinsic::arm_neon_vqshiftsu:
+ VShiftOpc = ARMISD::VQSHLsu; break;
+ case Intrinsic::arm_neon_vqshiftns:
+ VShiftOpc = ARMISD::VQSHRNs; break;
+ case Intrinsic::arm_neon_vqshiftnu:
+ VShiftOpc = ARMISD::VQSHRNu; break;
+ case Intrinsic::arm_neon_vqshiftnsu:
+ VShiftOpc = ARMISD::VQSHRNsu; break;
+ case Intrinsic::arm_neon_vqrshiftns:
+ VShiftOpc = ARMISD::VQRSHRNs; break;
+ case Intrinsic::arm_neon_vqrshiftnu:
+ VShiftOpc = ARMISD::VQRSHRNu; break;
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ VShiftOpc = ARMISD::VQRSHRNsu; break;
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vshiftins: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+ VShiftOpc = ARMISD::VSLI;
+ else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+ VShiftOpc = ARMISD::VSRI;
+ else {
+ llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vqrshifts:
+ case Intrinsic::arm_neon_vqrshiftu:
+ // No immediate versions of these to check for.
+ break;
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them. As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
+ // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
+ // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
+ SDValue N1 = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue N0 = N->getOperand(0);
+ if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
+ DAG.MaskedValueIsZero(N0.getOperand(0),
+ APInt::getHighBitsSet(32, 16)))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+ }
+ return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue N0 = N->getOperand(0);
+
+ // Check for sign- and zero-extensions of vector extract operations of 8-
+ // and 16-bit vector elements. NEON supports these directly. They are
+ // handled during DAG combining because type legalization will promote them
+ // to 32-bit types and it is messy to recognize the operations after that.
+ if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Lane = N0.getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = N0.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (VT == MVT::i32 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ISD::SIGN_EXTEND:
+ Opc = ARMISD::VGETLANEs;
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Opc = ARMISD::VGETLANEu;
+ break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // Simplify
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, y
+ // moveq r0, x
+ // to
+ // cmp r0, x
+ // movne r0, y
+ //
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, x
+ // movne r0, y
+ // to
+ // cmp r0, x
+ // movne r0, y
+ /// FIXME: Turn this into a target neutral optimization?
+ SDValue Res;
+ if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+ N->getOperand(3), Cmp);
+ } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+ SDValue ARMcc;
+ SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+ N->getOperand(3), NewCmp);
+ }
+
+ if (Res.getNode()) {
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
+ // Capture demanded bits information that would be otherwise lost.
+ if (KnownZero == 0xfffffffe)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i1));
+ else if (KnownZero == 0xffffff00)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i8));
+ else if (KnownZero == 0xffff0000)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i16));
+ }
+
+ return Res;
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+ case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
+ }
+ return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ // Unaligned access can use (for example) LRDB, LRDH, LDR
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = Subtarget->hasV7Ops();
+ return true;
+ }
+ return false;
+ }
+ case MVT::f64:
+ case MVT::v2f64: {
+ // For any little-endian targets with neon, we can support unaligned ld/st
+ // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
+ // A big-endian target may also explictly support unaligned accesses
+ if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
+ }
+}
+
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if ((!IsMemset || ZeroMemset) &&
+ Subtarget->hasNEON() &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
+ bool Fast;
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+ return MVT::v2f64;
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+ return MVT::f64;
+ }
+ }
+
+ // Lowering to i32/i16 if the size permits.
+ if (Size >= 4)
+ return MVT::i32;
+ else if (Size >= 2)
+ return MVT::i16;
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+ return true;
+ }
+
+ return false;
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ bool isNeg = false;
+ if (V < 0) {
+ isNeg = true;
+ V = - V;
+ }
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // + imm12 or - imm8
+ if (isNeg)
+ return V == (V & ((1LL << 8) - 1));
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ // Same as ARM mode. FIXME: NEON?
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb1Only())
+ return isLegalT1AddressImmediate(V, VT);
+ else if (Subtarget->isThumb2())
+ return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+ // ARM mode.
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ int Scale = AM.Scale;
+ if (Scale < 0)
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ Scale = Scale & ~1;
+ return Scale == 2 || Scale == 4 || Scale == 8;
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ EVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb1Only())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb2())
+ return isLegalT2ScaledAddressingMode(AM, VT);
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // Thumb2 and ARM modes can use cmn for negative immediates.
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ // Thumb1 doesn't have cmn, and only 8-bit immediates.
+ return Imm >= 0 && Imm <= 255;
+}
+
+/// isLegalAddImmediate - Return true if the specified immediate is a legal add
+/// *or sub* immediate, that is the target has add or sub instructions which can
+/// add a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // Same encoding for add/sub, just flip the sign.
+ int64_t AbsImm = llvm::abs64(Imm);
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(AbsImm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(AbsImm) != -1;
+ // Thumb1 only has 8-bit unsigned immediate.
+ return AbsImm >= 0 && AbsImm <= 255;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal=
+ ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+ return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ case 'h': return C_RegisterClass;
+ case 'x': return C_RegisterClass;
+ case 't': return C_RegisterClass;
+ case 'j': return C_Other; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'Q': return C_Memory;
+ }
+ } else if (Constraint.size() == 2) {
+ switch (Constraint[0]) {
+ default: break;
+ // All 'U+' constraints are addresses.
+ case 'U': return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC ARM Constraint Letters
+ switch (Constraint[0]) {
+ case 'l': // Low regs or general regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
+ case 'h': // High regs or no regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, &ARM::hGPRRegClass);
+ break;
+ case 'r':
+ return RCPair(0U, &ARM::GPRRegClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPRRegClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, &ARM::DPRRegClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, &ARM::QPRRegClass);
+ break;
+ case 'x':
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPR_8RegClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, &ARM::DPR_8RegClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, &ARM::QPR_8RegClass);
+ break;
+ case 't':
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPRRegClass);
+ break;
+ }
+ }
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'j':
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (ConstraintLetter) {
+ case 'j':
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
+ case 'I':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between 0 and 255, for ADD
+ // immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getT2SOImmVal(CVal) != -1)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb1Only()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The ARM target isn't yet aware of offsets.
+ return false;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget->hasVFP3())
+ return false;
+ if (VT == MVT::f32)
+ return ARM_AM::getFP32Imm(Imm) != -1;
+ if (VT == MVT::f64)
+ return ARM_AM::getFP64Imm(Imm) != -1;
+ return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_strexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldrexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 000000000000..9ee17f0781b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,578 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in
+ // DYN mode.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ // Add pseudo op to model memcpy for struct byval.
+ COPY_STRUCT_BYVAL,
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMN, // ARM CMN instructions.
+ CMPZ, // ARM compare that sets only Z flag.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+
+ CMOV, // ARM conditional move instructions.
+
+ BCC_i64,
+
+ RBIT, // ARM bitreverse instruction
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ ADDC, // Add with carry
+ ADDE, // Add using carry
+ SUBC, // Sub with carry
+ SUBE, // Sub using carry
+
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+
+ TC_RETURN, // Tail call return pseudo.
+
+ THREAD_POINTER,
+
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
+ MEMBARRIER, // Memory barrier (DMB)
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
+
+ VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
+ VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
+ VCGEU, // Vector compare unsigned greater than or equal.
+ VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
+ VCGTU, // Vector compare unsigned greater than.
+ VTST, // Vector test bits.
+
+ // Vector shift by immediate:
+ VSHL, // ...left
+ VSHRs, // ...right (signed)
+ VSHRu, // ...right (unsigned)
+ VSHLLs, // ...left long (signed)
+ VSHLLu, // ...left long (unsigned)
+ VSHLLi, // ...left long (with maximum shift count)
+ VSHRN, // ...right narrow
+
+ // Vector rounding shift by immediate:
+ VRSHRs, // ...right (signed)
+ VRSHRu, // ...right (unsigned)
+ VRSHRN, // ...right narrow
+
+ // Vector saturating shift by immediate:
+ VQSHLs, // ...left (signed)
+ VQSHLu, // ...left (unsigned)
+ VQSHLsu, // ...left (signed to unsigned)
+ VQSHRNs, // ...right narrow (signed)
+ VQSHRNu, // ...right narrow (unsigned)
+ VQSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector saturating rounding shift by immediate:
+ VQRSHRNs, // ...right narrow (signed)
+ VQRSHRNu, // ...right narrow (unsigned)
+ VQRSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector shift and insert:
+ VSLI, // ...left
+ VSRI, // ...right
+
+ // Vector get lane (VMOV scalar to ARM core register)
+ // (These are used for 8- and 16-bit element types only.)
+ VGETLANEu, // zero-extend vector extract element
+ VGETLANEs, // sign-extend vector extract element
+
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
+ // Vector move f32 immediate:
+ VMOVFPIMM,
+
+ // Vector duplicate:
+ VDUP,
+ VDUPLANE,
+
+ // Vector shuffles:
+ VEXT, // extract
+ VREV64, // reverse elements within 64-bit doublewords
+ VREV32, // reverse elements within 32-bit words
+ VREV16, // reverse elements within 16-bit halfwords
+ VZIP, // zip (interleave)
+ VUZP, // unzip (deinterleave)
+ VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
+
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
+ UMLAL, // 64bit Unsigned Accumulate Multiply
+ SMLAL, // 64bit Signed Accumulate Multiply
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN,
+
+ // Bit-field insert
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector bitwise select
+ VBSL,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD,
+
+ // 64-bit atomic ops (value split into two registers)
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+ ATOMCMPXCHG64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMIN64_DAG,
+ ATOMMAX64_DAG,
+ ATOMUMAX64_DAG
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace ARM {
+ bool isBitFieldInvertedMask(unsigned v);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual bool isSelectSupported(SelectSupportKind Kind) const {
+ // ARM does not support scalar condition selects on vectors.
+ return (Kind != ScalarCondVectorVal);
+ }
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual void
+ AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+
+ SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ using TargetLowering::isZExtFree;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+ bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ const ARMSubtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT) const;
+
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(MVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ const TargetRegisterInfo *RegInfo;
+
+ const InstrItineraryData *Itins;
+
+ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
+
+ typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+ void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ TLSModel::Model model) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable = false)
+ const;
+
+ void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ virtual bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Op1,
+ unsigned Op2,
+ bool NeedsCarry = false,
+ bool IsCmpxchg = false,
+ bool IsMinMax = false,
+ ARMCC::CondCodes CC = ARMCC::AL) const;
+ MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const;
+
+ void SetupEntryBlockForSjLj(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const;
+
+ MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ };
+
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+ }
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 000000000000..67a6820932fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,2094 @@
+//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def LdStExFrm : Format<11>;
+
+def ArithMiscFrm : Format<12>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
+def DPSoRegImmFrm : Format<42>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+ bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain : Domain<1>; // Instructions in VFP domain only
+def NeonDomain : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let ParserMethod = "parseProcIFlagsOperand";
+}
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; }
+def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
+ let DecoderMethod = "DecodePredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// ARM special operands for disassembly only.
+//
+def SetEndAsmOperand : ImmAsmOperand {
+ let Name = "SetEndImm";
+ let ParserMethod = "parseSetEndImm";
+}
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+ let ParserMatchClass = SetEndAsmOperand;
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let ParserMethod = "parseMSRMaskOperand";
+}
+def msr_mask : Operand<i32> {
+ let PrintMethod = "printMSRMaskOperand";
+ let DecoderMethod = "DecodeMSRMask";
+ let ParserMatchClass = MSRMaskOperand;
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+// Offset Encoding
+// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
+def shr_imm8 : Operand<i32> {
+ let EncoderMethod = "getShiftRight8Imm";
+ let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
+}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
+def shr_imm16 : Operand<i32> {
+ let EncoderMethod = "getShiftRight16Imm";
+ let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
+}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
+def shr_imm32 : Operand<i32> {
+ let EncoderMethod = "getShiftRight32Imm";
+ let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
+}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
+def shr_imm64 : Operand<i32> {
+ let EncoderMethod = "getShiftRight64Imm";
+ let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Assembler alias templates.
+//
+class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+
+class InstTemplate<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : Instruction {
+ let Namespace = "ARM";
+
+ AddrMode AM = am;
+ int Size = sz;
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+ Format F = f;
+ bits<6> Form = F.Value;
+ Domain D = d;
+ bit isUnaryDataProc = 0;
+ bit canXformTo16Bit = 0;
+ // The instruction is a 16-bit flag setting Thumb instruction. Used
+ // by the parser to determine whether to require the 'S' suffix on the
+ // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ bit thumbArithFlagSetting = 0;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ // The layout of TSFlags should be kept in sync with ARMBaseInfo.h.
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{6-5} = IndexModeBits;
+ let TSFlags{12-7} = Form;
+ let TSFlags{13} = isUnaryDataProc;
+ let TSFlags{14} = canXformTo16Bit;
+ let TSFlags{17-15} = D.Value;
+ let TSFlags{18} = thumbArithFlagSetting;
+
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+class Encoding {
+ field bits<32> Inst;
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+}
+
+class InstARM<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding {
+ let DecoderNamespace = "ARM";
+}
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin> {
+ let DecoderNamespace = "Thumb";
+}
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<string asm, dag iops>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "", NoItinerary> {
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let isCodeGenOnly = 0; // So we get asm matcher for it.
+ let AsmString = asm;
+ let isPseudo = 1;
+}
+
+class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsARM]>;
+class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb]>;
+class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb2]>;
+class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasVFP2]>;
+class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasNEON]>;
+
+// Pseudo instructions for the code generator.
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+ GenericDomain, "", itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : tPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : t2PseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
+ asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b11111001;
+ let Inst{3-0} = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
+
+ let Unpredictable{11-8} = 0b1111;
+ let DecoderMethod = "DecodeSwap";
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+}
+
+class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+}
+
+
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 0; // P bit
+ let Inst{21} = 1;
+ let Inst{20} = isLoad; // L bit
+ let Inst{19-16} = addr; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0; // S bit
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
+ opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
+ let Inst{27-20} = opcod;
+
+ let Unpredictable{9-8} = 0b11;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
+// PKH instructions
+def PKHLSLAsmOperand : ImmAsmOperand {
+ let Name = "PKHLSLImm";
+ let ParserMethod = "parsePKHLSLImm";
+}
+def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{
+ let PrintMethod = "printPKHLSLShiftImm";
+ let ParserMatchClass = PKHLSLAsmOperand;
+}
+def PKHASRAsmOperand : AsmOperandClass {
+ let Name = "PKHASRImm";
+ let ParserMethod = "parsePKHASRImm";
+}
+def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{
+ let PrintMethod = "printPKHASRShiftImm";
+ let ParserMatchClass = PKHASRAsmOperand;
+}
+
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<5> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh;
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5T];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
+ pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
+ Encoding {
+ let Inst{31-27} = opcod1;
+ let Inst{15-14} = opcod2;
+ let Inst{12} = opcod3;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin,
+ asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ let thumbArithFlagSetting = 1;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "ThumbSBit";
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+ let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+ let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+ let Inst{15-14} = 0b00;
+ let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010000;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+ let Inst{15-12} = opA;
+ let Inst{11-9} = opB;
+}
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+
+class T1BranchCond<bits<4> opcode> : Encoding16 {
+ let Inst{15-12} = opcode;
+}
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+ let Inst{15-12} = 0b1011;
+ let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "Thumb";
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = addr{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
+}
+class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ bits<9> imm;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = imm{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+ : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+ let Inst{31-28} = opc;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
+
+// T2Ipreldst - Thumb2 pre-indexed load / store instructions.
+class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = addr{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// T2Ipostldst - Thumb2 post-indexed load / store instructions.
+class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<4> Rn;
+ bits<9> offset;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = offset{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = offset{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode.
+class T2v6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasV6T2];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = opcod4;
+ let Inst{6} = 1;
+ let Inst{4} = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ bits<5> fbits;
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+ let Inst{5} = fbits{0};
+ let Inst{3-0} = fbits{4-1};
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-24} = 0b11110100;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+ let DecoderNamespace = "NEONLoadStore";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+ bit op5, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{23} = op23;
+ let Inst{21-19} = op21_19;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
+ let DecoderMethod = "DecodeNEONModImmInstruction";
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bit lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bits<2> lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{2-0} = Vm{2-0};
+ let Inst{5} = lane{1};
+ let Inst{3} = lane{0};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
+ "", itin> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
+ // A8.6.303, A8.6.328, A8.6.329
+ let Inst{3-0} = 0b0000;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+ let DecoderNamespace = "NEONDup";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+ opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
+
+// VFP/NEON Instruction aliases for type suffices.
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+ let Predicates = [HasNEON] in {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 000000000000..80f0ec74376a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,148 @@
+//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ if (hasNOP()) {
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ } else {
+ NopInst.setOpcode(ARM::MOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ }
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ return ARM::LDRi12;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ return ARM::LDRBi12;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ return ARM::STRi12;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ return ARM::STRBi12;
+ }
+
+ return 0;
+}
+
+namespace {
+ /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for ARM ELF.
+ struct ARMCGBR : public MachineFunctionPass {
+ static char ID;
+ ARMCGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->getGlobalBaseReg() == 0)
+ return false;
+
+ const ARMTargetMachine *TM =
+ static_cast<const ARMTargetMachine *>(&MF.getTarget());
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ LLVMContext* Context = &MF.getFunction()->getContext();
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0,
+ "_GLOBAL_OFFSET_TABLE_");
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
+ unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType());
+ unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
+
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
+ ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
+ TII.get(Opc), GlobalBaseReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char ARMCGBR::ID = 0;
+FunctionPass*
+llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 000000000000..5d3e059b7038
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,46 @@
+//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+ ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 000000000000..11550c5ae678
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,5233 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<1>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+
+def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
+def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
+def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
+def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
+ SDT_ARMStructByVal,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+ [SDNPHasChain]>;
+
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
+ AssemblerPredicate<"HasV4TOps", "armv4t">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
+ AssemblerPredicate<"HasV5TEOps", "armv5te">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
+ AssemblerPredicate<"HasV6Ops", "armv6">;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
+ AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
+ AssemblerPredicate<"HasV7Ops", "armv7">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
+ AssemblerPredicate<"FeatureVFP2", "VFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
+ AssemblerPredicate<"FeatureVFP3", "VFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
+ AssemblerPredicate<"FeatureVFP4", "VFP4">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">,
+ AssemblerPredicate<"FeatureFP16","half-float">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">,
+ AssemblerPredicate<"FeatureHWDiv", "divide">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate<"FeatureT2XtPk",
+ "pack/extract">;
+def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
+ AssemblerPredicate<"FeatureDSPThumb2",
+ "thumb2-dsp">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate<"FeatureDB",
+ "data-barriers">;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate<"FeatureMP",
+ "mp-extensions">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">,
+ AssemblerPredicate<"ModeThumb", "thumb">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
+ AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ "thumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv7m">;
+def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass",
+ "armv7a/r">;
+def IsARM : Predicate<"!Subtarget->isThumb()">,
+ AssemblerPredicate<"!ModeThumb", "arm-mode">;
+def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
+def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
+ AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
+
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed.
+// Do not use them for Darwin platforms.
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
+ "!Subtarget->isTargetDarwin()">;
+def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
+ "Subtarget->isTargetDarwin()">;
+
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
+def IsLE : Predicate<"TLI.isLittleEndian()">;
+def IsBE : Predicate<"TLI.isBigEndian()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// imm_neg_XFORM - Return the negation of an i32 immediate value.
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// imm_not_XFORM - Return the complement of a i32 immediate value.
+def imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
+}]>;
+
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
+
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use so_imm.
+def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], imm_not_XFORM> {
+ let ParserMatchClass = so_imm_not_asmoperand;
+}
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+ // Returns true if all low 16-bits are 0.
+ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+class BinOpWithFlagFrag<dag res> :
+ PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Immediate operands with a shared generic asm render method.
+class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeT2BROperand";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ let EncoderMethod = "getARMBLTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blx_target : Operand<i32> {
+ let EncoderMethod = "getARMBLXTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; }
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
+def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeDPRRegListOperand";
+}
+
+def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; }
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeSPRRegListOperand";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm_XFORM: SDNodeXForm<imm, [{
+ switch (N->getZExtValue()){
+ default: assert(0);
+ case 0: return CurDAG->getTargetConstant(0, MVT::i32);
+ case 8: return CurDAG->getTargetConstant(1, MVT::i32);
+ case 16: return CurDAG->getTargetConstant(2, MVT::i32);
+ case 24: return CurDAG->getTargetConstant(3, MVT::i32);
+ }
+}]>;
+def RotImmAsmOperand : AsmOperandClass {
+ let Name = "RotImm";
+ let ParserMethod = "parseRotImm";
+}
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }],
+ rot_imm_XFORM> {
+ let PrintMethod = "printRotImmOperand";
+ let ParserMatchClass = RotImmAsmOperand;
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 encoded as imm5 == 0.
+def ShifterImmAsmOperand : AsmOperandClass {
+ let Name = "ShifterImm";
+ let ParserMethod = "parseShifterImm";
+}
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+}
+
+// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
+def so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectRegShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm);
+}
+
+def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; }
+def so_reg_imm : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectImmShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftRegShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_imm : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }]> {
+ let EncoderMethod = "getSOImmOpValue";
+ let ParserMatchClass = SOImmAsmOperand;
+ let DecoderMethod = "DecodeSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->hasV6T2Ops())
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
+/// imm0_7 predicate - Immediate in the range [0,7].
+def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 8;
+}]> {
+ let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,15].
+def Imm0_15AsmOperand: ImmAsmOperand {
+ let Name = "Imm0_15";
+ let DiagnosticType = "ImmRange0_15";
+}
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 16;
+}]> {
+ let ParserMatchClass = Imm0_15AsmOperand;
+}
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_31AsmOperand;
+}
+
+/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
+def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_32AsmOperand;
+}
+
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
+/// imm0_255 predicate - Immediate in the range [0,255].
+def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = Imm0_255AsmOperand;
+}
+
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
+// imm0_65535_neg - An immediate whose negative value is in the range [0.65535].
+def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
+ return -Imm >= 0 && -Imm < 65536;
+}]>;
+
+// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
+// a relocatable expression.
+//
+// FIXME: This really needs a Thumb version separate from the ARM version.
+// While the range is the same, and can thus use the same match class,
+// the encoding is different so it should have a different encoder method.
+def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
+def imm0_65535_expr : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+ let ParserMatchClass = Imm0_65535ExprAsmOperand;
+}
+
+/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
+def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
+def imm24b : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 0xffffff;
+}]> {
+ let ParserMatchClass = Imm24bitAsmOperand;
+}
+
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def BitfieldAsmOperand : AsmOperandClass {
+ let Name = "Bitfield";
+ let ParserMethod = "parseBitfield";
+}
+
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+ let DecoderMethod = "DecodeBitfieldMaskOperand";
+ let ParserMatchClass = BitfieldAsmOperand;
+}
+
+def imm1_32_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+ }],
+ imm1_32_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_32AsmOperand;
+}
+
+def imm1_16_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+ imm1_16_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_16AsmOperand;
+}
+
+// Define ARM specific addressing modes.
+// addrmode_imm12 := reg +/- imm12
+//
+def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let DecoderMethod = "DecodeAddrModeImm12Operand";
+ let ParserMatchClass = MemImm12OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let DecoderMethod = "DecodeSORegMemOperand";
+ let ParserMatchClass = MemRegOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift);
+}
+
+// postidx_imm8 := +/- [0,255]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value.
+def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
+def postidx_imm8 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8Operand";
+ let ParserMatchClass = PostIdxImm8AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+// postidx_imm8s4 := +/- [0,1020]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value, scaled by 4.
+def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
+def postidx_imm8s4 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8s4Operand";
+ let ParserMatchClass = PostIdxImm8s4AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// postidx_reg := +/- reg
+//
+def PostIdxRegAsmOperand : AsmOperandClass {
+ let Name = "PostIdxReg";
+ let ParserMethod = "parsePostIdxReg";
+}
+def postidx_reg : Operand<i32> {
+ let EncoderMethod = "getPostIdxRegOpValue";
+ let DecoderMethod = "DecodePostIdxReg";
+ let PrintMethod = "printPostIdxRegOperand";
+ let ParserMatchClass = PostIdxRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+
+// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
+//
+// FIXME: addrmode2 should be refactored the rest of the way to always
+// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
+def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
+ let PrintMethod = "printAddrMode2Operand";
+ let ParserMatchClass = AddrMode2AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def PostIdxRegShiftedAsmOperand : AsmOperandClass {
+ let Name = "PostIdxRegShifted";
+ let ParserMethod = "parsePostIdxReg";
+}
+def am2offset_reg : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ // When using this for assembly, it's always as a post-index offset.
+ let ParserMatchClass = PostIdxRegShiftedAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
+// the GPR is purely vestigal at this point.
+def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
+def am2offset_imm : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let ParserMatchClass = AM2OffsetImmAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+// FIXME: split into imm vs. reg versions.
+def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
+ let PrintMethod = "printAddrMode3Operand";
+ let ParserMatchClass = AddrMode3AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// FIXME: split into imm vs. reg versions.
+// FIXME: parser method to handle +/- register.
+def AM3OffsetAsmOperand : AsmOperandClass {
+ let Name = "AM3Offset";
+ let ParserMethod = "parseAM3Offset";
+}
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let ParserMatchClass = AM3OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let EncoderMethod = "getAddrMode5OpValue";
+ let DecoderMethod = "DecodeAddrMode5Operand";
+ let ParserMatchClass = AddrMode5AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+}
+
+// addrmode6 := reg with optional alignment
+//
+def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
+def addrmode6 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+ let ParserMatchClass = AddrMode6AsmOperand;
+}
+
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
+// (single element from one lane) for size 32.
+def addrmode6oneL32 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addr_offset_none := reg
+//
+def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
+def addr_offset_none : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecodeAddrMode7Operand";
+ let ParserMatchClass = MemNoOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base);
+}
+
+def nohash_imm : Operand<i32> {
+ let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let ParserMethod = "parseCoprocNumOperand";
+}
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+ let DecoderMethod = "DecodeCoprocessor";
+}
+
+def pf_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let ParserMethod = "parseCoprocRegOperand";
+}
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+def CoprocOptionAsmOperand : AsmOperandClass {
+ let Name = "CoprocOption";
+ let ParserMethod = "parseCoprocOptionOperand";
+}
+def coproc_option_imm : Operand<i32> {
+ let PrintMethod = "printCoprocOptionImm";
+ let ParserMatchClass = CoprocOptionAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+
+/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the AsI1_bin_irs counterpart.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
+
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, iir,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ }
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_reg:$shift))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
+}
+}
+
+/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
+
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
+ GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
+ GPR:$Rn))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]>,
+ Sched<[WriteCMP, ReadALU, ReadALU]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rsi : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rsr : AI1<opcod, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
+}
+
+class AI_ext_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn,
+ (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+}
+
+class AI_exta_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPRnopc:$Rd, CPSR,
+ (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+}
+
+/// AI1_rsc_irs - Define instructions and patterns for rsc
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt),
+ (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt),
+ (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
+// (These pseudos use a hand-written selection code).
+let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
+def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2),
+ NoItinerary, []>;
+def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+}
+
+def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+ "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
+}
+
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+
+def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
+ "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Unpredictable{11-8} = 0b1111;
+}
+
+// The 16-bit operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "bkpt", "\t$val", []>, Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010010;
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State
+// FIXME: We should use InstAlias to handle the optional operands.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ []>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16-9} = 0b00000000;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
+}
+
+let DecoderMethod = "DecodeCPSInstruction" in {
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">;
+}
+
+// Preload signals the memory system of possible future data/instruction access.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ bits<17> shift;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
+ let Inst{4} = 0;
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end", []>, Requires<[IsARM]> {
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ []>, Requires<[IsARM, HasV7]> {
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
+}
+
+/*
+ * A5.4 Permanently UNDEFINED instructions.
+ *
+ * For most targets use UDF #65006, for which the OS will generate SIGTRAP.
+ * Other UDF encodings generate SIGILL.
+ *
+ * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb.
+ * Encoding A1:
+ * 1110 0111 1111 iiii iiii iiii 1111 iiii
+ * Encoding T1:
+ * 1101 1110 iiii iiii
+ * It uses the following encoding:
+ * 1110 0111 1111 1110 1101 1110 1111 0000
+ * - In ARM: UDF #60896;
+ * - In Thumb: UDF #254 followed by a branch-to-self.
+ */
+let isBarrier = 1, isTerminator = 1 in
+def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,UseNaClTrap]> {
+ let Inst = 0xe7fedef0;
+}
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,DontUseNaClTrap]> {
+ let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ 4, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<14> label;
+ let Inst{27-25} = 0b001;
+ let Inst{24} = 0;
+ let Inst{23-22} = label{13-12};
+ let Inst{21} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label{11-0};
+}
+
+let hasSideEffects = 1 in {
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi, []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]> {
+ let Inst{27-0} = 0b0001001011111111111100011110;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{27-0} = 0b0001101000001111000000001110;
+ }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+
+ def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+ "bx", "\t$dst", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+}
+
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
+let isCall = 1,
+ // FIXME: Do we really need a non-predicated version? If so, it should
+ // at least be a pseudo instruction expanding to the predicated version
+ // at MC lowering time.
+ Defs = [LR], Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsARM]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>,
+ Requires<[IsARM, HasV5T]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
+ IIC_Br, "blx", "\t$func",
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T]> {
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T]>;
+
+ // ARMv4
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T]>;
+
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
+ 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsARM]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ IIC_Br, "b", "\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ let isBarrier = 1 in {
+ // B is "predicable" since it's just a Bcc with an 'always' condition.
+ let isPredicable = 1 in
+ // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+ // should be sufficient.
+ // FIXME: Is B really a Barrier? That doesn't seem right.
+ def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+}
+
+// BLX (immediate)
+def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", []>,
+ Requires<[IsARM, HasV5T]> {
+ let Inst{31-25} = 0b1111101;
+ bits<25> target;
+ let Inst{23-0} = target{24-1};
+ let Inst{24} = target{0};
+}
+
+// Branch and Exchange Jazelle
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* pattern left blank */]> {
+ bits<4> func;
+ let Inst{23-20} = 0b0010;
+ let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>;
+
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>;
+
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM]>;
+
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM]>;
+}
+
+// Secure Monitor Call is a system instruction.
+def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []> {
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt)
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
+}
+
+// Store Return State
+class SRSI<bit wb, string asm>
+ : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<5> mode;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 1;
+ let Inst{21} = wb;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1101; // SP
+ let Inst{15-5} = 0b00000101000;
+ let Inst{4-0} = mode;
+}
+
+def SRSDA : SRSI<0, "srsda\tsp, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDB : SRSI<0, "srsdb\tsp, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSIA : SRSI<0, "srsia\tsp, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIB : SRSI<0, "srsib\tsp, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+
+def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>;
+def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>;
+def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>;
+def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>;
+def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>;
+
+// Return From Exception
+class RFEI<bit wb, string asm>
+ : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<4> Rn;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0;
+ let Inst{21} = wb;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xa00;
+}
+
+def RFEDA : RFEI<0, "rfeda\t$Rn"> {
+ let Inst{24-23} = 0;
+}
+def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> {
+ let Inst{24-23} = 0;
+}
+def RFEDB : RFEI<0, "rfedb\t$Rn"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEIA : RFEI<0, "rfeia\t$Rn"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIB : RFEI<0, "rfeib\t$Rn"> {
+ let Inst{24-23} = 0b11;
+}
+def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
+ let Inst{24-23} = 0b11;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / Store Instructions.
+//
+
+// Load
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
+
+// Loads with zero extension
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+ (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
+ []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
+ def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let DecoderMethod = "DecodeLDRPreImm";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12";
+ }
+
+ def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeLDRPreReg";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2";
+ }
+
+ def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, iir,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, iii,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// IIC_iLoad_siu depending on whether it the offset register is shifted.
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+ def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, itin,
+ opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtLdrdPre";
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+multiclass AI3ldrT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
+ (ins addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ let DecoderMethod = "DecodeLDR";
+ }
+}
+
+defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
+defm LDRHT : AI3ldrT<0b1011, "ldrht">;
+defm LDRSHT : AI3ldrT<0b1111, "ldrsht">;
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r,
+ "strd", "\t$Rt, $src2, $addr", []>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{21} = 0;
+}
+
+// Indexed stores
+multiclass AI2_stridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
+ def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ StFrm, iii,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+ let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12";
+ let DecoderMethod = "DecodeSTRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, ldst_so_reg:$addr),
+ IndexModePre, StFrm, iir,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0; // Inst{4} = 0
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode2";
+ let DecoderMethod = "DecodeSTRPreReg";
+ }
+ def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, iir,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, iii,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// IIC_iStore_siu depending on whether it the offset register is shifted.
+defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
+}
+
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STR_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+}
+
+
+
+def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ addr_offset_none:$addr,
+ am3offset:$offset))]> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ IndexModePre, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtStrdPre";
+}
+
+def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr,
+ am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// STRT, STRBT, and STRHT
+
+def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+}
+
+
+multiclass AI3strT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackReg";
+ }
+}
+
+
+defm STRHT : AI3strT<0b1011, "strht">;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // IA is the default, so no need for an explicit suffix on the
+ // mnemonic here. Without it is the canonical spelling.
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
+ DPSoRegRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src",
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-8} = src{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = src{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
+ DPSoRegImmFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-5} = src{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
+ Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def : InstAlias<"mov${p} $Rd, $imm",
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
+ Requires<[IsARM]>;
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
+ (ins GPR:$src, imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set GPRnopc:$Rd,
+ (or (and GPR:$src, 0xffff),
+ lo16AllZero:$imm))]>, UnaryDP,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+ Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+def SXTAB : AI_exta_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def SXTAH : AI_exta_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 3)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+ (UXTB16 GPR:$Src, 1)>;
+
+def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111101;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111111;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+//
+// Currently, ADDS/SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real ADD/SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm RSC : AI1_rsc_irs<0b0111, "rsc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
+def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
+
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [],
+ dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = op27_20;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1111;
+}
+
+// Saturating add/subtract
+
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract
+
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract
+
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate
+
+def SSAT : AI<(outs GPRnopc:$Rd),
+ (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101010;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPRnopc:$Rd),
+ (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110111;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101110;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
+ (SSAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
+ (USAT imm:$pos, GPRnopc:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
+// like in the actual instruction encoding. The complexity of mapping the mask
+// to the lsb/msb pair should be handled by ISel, not encapsulated in the
+// instruction description.
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // msb
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+}
+def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+// property. Remove them when it's possible to add those properties
+// on an individual MachineInstr, not just an instruction description.
+let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b0000;
+ let Unpredictable{15-12} = 0b1111;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6, UseMulOps]>;
+}
+
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6, UseMulOps]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ 4, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+ (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+}
+
+// Multiply + accumulate
+def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, IIC_iMAC64, [],
+ (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6, UseMulOps]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6, UseMulOps]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ let DecoderMethod = "DecodeSMLAInstruction" in {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+ }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y>.
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld.
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22} = long;
+ let Inst{21-20} = 0b00;
+ let Inst{11-8} = Rm;
+ let Inst{7} = 0;
+ let Inst{6} = sub;
+ let Inst{5} = swap;
+ let Inst{4} = 1;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+ Sched<[WriteALU]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+let AddedComplexity = 5 in
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+let AddedComplexity = 5 in
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+ (REVSH GPR:$Rm)>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
+ (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
+ (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (srl GPRnopc:$src2, imm16_31:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+ (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
+ (CMPrsi GPR:$src, so_reg_imm:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
+ (CMPrsr GPR:$src, so_reg_reg:$rhs)>;
+
+// CMN register-integer
+let isCompare = 1, Defs = [CPSR] in {
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
+ "cmn", "\t$Rn, $imm",
+ [(ARMcmn GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+// CMN register-register/shift
+def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = 1;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsi : AI1<0b1011, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsr : AI1<0b1011, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+}
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+
+let isCommutable = 1, isSelect = 1 in
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_imm:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_reg:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+
+let isMoveImm = 1 in
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
+ 4, IIC_iMOVi,
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+
+let isMoveImm = 1 in
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let ParserMethod = "parseMemBarrierOptOperand";
+}
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+ let DecoderMethod = "DecodeMemBarrierOption";
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff05;
+ let Inst{3-0} = opt;
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "isb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff06;
+ let Inst{3-0} = opt;
+}
+
+// Pseudo instruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+
+let usesCustomInserter = 1 in {
+ let Defs = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let usesCustomInserter = 1 in {
+ def COPY_STRUCT_BYVAL_I32 : PseudoInst<
+ (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
+ NoItinerary,
+ [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary,
+ "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+let hasExtraSrcRegAllocReq = 1 in
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
+}
+
+
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7.
+let mayLoad = 1, mayStore = 1 in {
+def SWP : AIswp<0, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+def SWPB: AIswp<1, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{31-28} = 0b1111;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{27-25} = 0b110;
+}
+class ACInoP<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b110;
+}
+multiclass LdStCop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACI<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACInoP<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm LDC : LdStCop <1, 0, "ldc">;
+defm LDCL : LdStCop <1, 1, "ldcl">;
+defm STC : LdStCop <0, 0, "stc">;
+defm STCL : LdStCop <0, 1, "stcl">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
+defm STC2 : LdSt2Cop<0, 0, "stc2">;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register.
+//
+
+class MovRCopro<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABI<0b1110, oops, iops, NoItinerary, opc,
+ "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> {
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+
+def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRCopro2<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABXI<0b1110, oops, iops, NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+
+def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
+ imm:$CRm, imm:$opc2),
+ (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
+ : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+
+ let DecoderMethod = "DecodeMRRC2";
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, apsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Unpredictable{19-17} = 0b111;
+
+ let Inst{15-12} = Rd;
+
+ let Inst{11-0} = 0b000000000000;
+ let Unpredictable{11-0} = 0b110100001111;
+}
+
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>,
+ Requires<[IsARM]>;
+
+// The MRSsys instruction is the MRS instruction from the ARM ARM,
+// section B9.3.9, with the R bit set to 1.
+def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, spsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Unpredictable{19-16} = 0b1111;
+
+ let Inst{15-12} = Rd;
+
+ let Inst{11-0} = 0b000000000000;
+ let Unpredictable{11-0} = 0b110100001111;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn", []> {
+ bits<5> mask;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
+ "msr", "\t$mask, $a", []> {
+ bits<5> mask;
+ bits<12> a;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = a;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-IOS version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsIOS]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for both ARM and Thumb. Any differences are handled when
+// the pseudo is expanded (which happens before any passes that need the
+// instruction size).
+let isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+ def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+ 4, IIC_Br, [(brind GPR:$dst)],
+ (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+ Requires<[IsARM, NoV4T]>;
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls. These patterns also apply to Thumb mode.
+def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>;
+def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
+def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+def : ARMPat<(ARMcall_nolink texternalsym:$func),
+ (BMOVPCB_CALL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+ Requires<[IsARM, HasV6]>;
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)),
+ (UXTAB GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
+ (UXTAH GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
+ (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)),
+ (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>;
+
+// Atomic load/store patterns
+def : ARMPat<(atomic_load_8 ldst_so_reg:$src),
+ (LDRBrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_8 addrmode_imm12:$src),
+ (LDRBi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_load_16 addrmode3:$src),
+ (LDRH addrmode3:$src)>;
+def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
+ (LDRrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
+ (LDRi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+ (STRBrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+ (STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
+def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+ (STRH GPR:$val, addrmode3:$ptr)>;
+def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+ (STRrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+ (STRi12 GPR:$val, addrmode_imm12:$ptr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"ldmea", "ldmdb">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+
+// PUSH/POP aliases for STM/LDM
+def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// SSAT/USAT optional shift operand.
+def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+
+
+// Extend instruction optional rotate operand.
+def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb${p} $Rd, $Rm",
+ (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb16${p} $Rd, $Rm",
+ (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxth${p} $Rd, $Rm",
+ (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb${p} $Rd, $Rm",
+ (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb16${p} $Rd, $Rm",
+ (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxth${p} $Rd, $Rm",
+ (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+
+// RFE aliases
+def : MnemonicAlias<"rfefa", "rfeda">;
+def : MnemonicAlias<"rfeea", "rfedb">;
+def : MnemonicAlias<"rfefd", "rfeia">;
+def : MnemonicAlias<"rfeed", "rfeib">;
+def : MnemonicAlias<"rfe", "rfeia">;
+
+// SRS aliases
+def : MnemonicAlias<"srsfa", "srsda">;
+def : MnemonicAlias<"srsea", "srsdb">;
+def : MnemonicAlias<"srsfd", "srsia">;
+def : MnemonicAlias<"srsed", "srsib">;
+def : MnemonicAlias<"srs", "srsia">;
+
+// QSAX == QSUBADDX
+def : MnemonicAlias<"qsubaddx", "qsax">;
+// SASX == SADDSUBX
+def : MnemonicAlias<"saddsubx", "sasx">;
+// SHASX == SHADDSUBX
+def : MnemonicAlias<"shaddsubx", "shasx">;
+// SHSAX == SHSUBADDX
+def : MnemonicAlias<"shsubaddx", "shsax">;
+// SSAX == SSUBADDX
+def : MnemonicAlias<"ssubaddx", "ssax">;
+// UASX == UADDSUBX
+def : MnemonicAlias<"uaddsubx", "uasx">;
+// UHASX == UHADDSUBX
+def : MnemonicAlias<"uhaddsubx", "uhasx">;
+// UHSAX == UHSUBADDX
+def : MnemonicAlias<"uhsubaddx", "uhsax">;
+// UQASX == UQADDSUBX
+def : MnemonicAlias<"uqaddsubx", "uqasx">;
+// UQSAX == UQSUBADDX
+def : MnemonicAlias<"uqsubaddx", "uqsax">;
+// USAX == USUBADDX
+def : MnemonicAlias<"usubaddx", "usax">;
+
+// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+ (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+
+// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
+// LSR, ROR, and RRX instructions.
+// FIXME: We need C++ parser hooks to map the alias to the MOV
+// encoding. It seems we should be able to do that sort of thing
+// in tblgen, but it could get ugly.
+let TwoOperandAliasConstraint = "$Rm = $Rd" in {
+def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+}
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+let TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+}
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+// 'it' blocks in ARM mode just validate the predicates. The IT itself
+// is discarded.
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 000000000000..0411ac4e282a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,6806 @@
+//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NEON-specific Operands.
+//===----------------------------------------------------------------------===//
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
+def nImmSplatI8 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI8AsmOperand;
+}
+def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
+def nImmSplatI16 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI16AsmOperand;
+}
+def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
+def nImmSplatI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI32AsmOperand;
+}
+def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
+def nImmVMOVI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperand;
+}
+def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
+def nImmVMOVI32Neg : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32NegAsmOperand;
+}
+def nImmVMOVF32 : Operand<i32> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
+def nImmSplatI64 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI64AsmOperand;
+}
+
+def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
+def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
+def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndex8Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndex16Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndex32Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Register list of one D register.
+def VecListOneDAsmOperand : AsmOperandClass {
+ let Name = "VecListOneD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
+ let ParserMatchClass = VecListOneDAsmOperand;
+}
+// Register list of two sequential D registers.
+def VecListDPairAsmOperand : AsmOperandClass {
+ let Name = "VecListDPair";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
+ let ParserMatchClass = VecListDPairAsmOperand;
+}
+// Register list of three sequential D registers.
+def VecListThreeDAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
+ let ParserMatchClass = VecListThreeDAsmOperand;
+}
+// Register list of four sequential D registers.
+def VecListFourDAsmOperand : AsmOperandClass {
+ let Name = "VecListFourD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
+ let ParserMatchClass = VecListFourDAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpaced";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
+ let ParserMatchClass = VecListDPairSpacedAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListThreeQAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
+ let ParserMatchClass = VecListThreeQAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListFourQAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
+ let ParserMatchClass = VecListFourQAsmOperand;
+}
+
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoSpacedAllLanes"> {
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
+}
+// Register list of three D registers, with "all lanes" subscripting.
+def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeDAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeAllLanes"> {
+ let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three sequential Q regs).
+def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeSpacedAllLanes"> {
+ let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
+}
+// Register list of four D registers, with "all lanes" subscripting.
+def VecListFourDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
+ let ParserMatchClass = VecListFourDAllLanesAsmOperand;
+}
+// Register list of four D registers spaced by 2 (four sequential Q regs).
+def VecListFourQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQAllLanes : RegisterOperand<DPR,
+ "printVectorListFourSpacedAllLanes"> {
+ let ParserMatchClass = VecListFourQAllLanesAsmOperand;
+}
+
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of two D registers with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two Q registers with half-word lane subscripting.
+def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
+// Register list of three D registers with byte lane subscripting.
+def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of three Q registers with half-word lane subscripting.
+def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of four D registers with byte lane subscripting.
+def VecListFourDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of four Q registers with half-word lane subscripting.
+def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 8;
+}]>;
+def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 4;
+}]>;
+def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 4;
+}]>;
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates. The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types. The "SHINS" version is for shift and insert operations.
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvbsl : SDNode<"ARMISD::VBSL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
+def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 DPair:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
+ "$src = $dst">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1,
+ "vld1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+
+// ...with address register writeback:
+multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
+
+// ...with 3 registers
+class VLD1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+
+// ...with 4 registers
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+
+// VLD2 : Vector Load (multiple 2-element structures)
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
+
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
+
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
+
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
+
+// VLD3 : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// VLD4 : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+// VLD1LN : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6oneL32:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{5-4} = Rn{5-4};
+}
+def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVLD1LN";
+}
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+ []> {
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN";
+}
+
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN" ;
+}
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
+ (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// ...with address register writeback:
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
+
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
+ (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
+
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+
+// ...with address register writeback:
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+ def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
+}
+
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
+
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+
+// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+// VLD4DUP : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+// VST1 : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST1d8 : VST1D<{0,0,0,?}, "8">;
+def VST1d16 : VST1D<{0,1,0,?}, "16">;
+def VST1d32 : VST1D<{1,0,0,?}, "32">;
+def VST1d64 : VST1D<{1,1,0,?}, "64">;
+
+def VST1q8 : VST1Q<{0,0,?,?}, "8">;
+def VST1q16 : VST1Q<{0,1,?,?}, "16">;
+def VST1q32 : VST1Q<{1,0,?,?}, "32">;
+def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+
+// ...with address register writeback:
+multiclass VST1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+multiclass VST1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
+
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
+
+// ...with 3 registers
+class VST1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
+
+// VST2 : Vector Store (multiple 2-element structures)
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
+
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
+}
+
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
+
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
+
+// VST3 : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// VST4 : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+// VST1LN : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+ addrmode6oneL32> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins AdrMode:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ AdrMode:$Rn, am6offset:$Rm))]> {
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt, addrmode6oneL32> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// VST2LN : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+// VST3LN : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+// VST4LN : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+ (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+
+// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
+// load / store if it's legal.
+def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
+ (VLD1q64 addrmode6:$addr)>;
+def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q64 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
+ (VLD1q32 addrmode6:$addr)>;
+def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q32 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+ InstrItinClass itin, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
+ let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
+ let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
+ SDNode MulOp, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic. The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+}
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+}
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+}
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+// Q = quarter int (8 bit) elements
+// H = half int (16 bit) elements
+// S = single int (32 bit) elements
+// D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
+ def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
+ def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
+ def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
+ def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
+ v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
+ : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+ OpcodeStr, Dt, OpNode, Commutable> {
+ def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
+ def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp, Commutable>;
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp, Commutable>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp, Commutable>;
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp, Commutable>;
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp, Commutable>;
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
+ def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+ IntOp, Commutable> {
+ def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+ def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+ def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+ def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+ def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+ def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+ def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
+ def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+ def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ string baseOpc, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+// element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+// Neon Shift Narrow operations,
+// element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, shr_imm8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, shr_imm16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, shr_imm32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+// VADD : Vector Add (integer and floating-point)
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
+// VADDL : Vector Add Long (Q = D + D)
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
+// VADDW : Vector Add Wide (Q = Q + D)
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+// VHADD : Vector Halving Add
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
+// VRHADD : Vector Rounding Halving Add
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+// VQADD : Vector Saturating Add
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
+// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+ int_arm_neon_vaddhn, 1>;
+// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+// VMUL : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (VMULslfq (v4f32 QPR:$src1),
+ (v2f32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+ "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+// VMLA : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+ (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+// VMLS : Vector Multiply Subtract (integer and floating-point)
+defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+ (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+ (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
+ (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
+ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+
+// Vector Subtract Operations.
+
+// VSUB : Vector Subtract (integer and floating-point)
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
+// VSUBL : Vector Subtract Long (Q = D - D)
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
+// VSUBW : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+// VHSUB : Vector Halving Subtract
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
+// VQSUB : Vector Saturing Subtract
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
+// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+ int_arm_neon_vsubhn, 0>;
+// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+// VCEQ : Vector Compare Equal
+defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$Vd, $Vm, #0", NEONvceqz>;
+
+// VCGE : Vector Compare Greater Than or Equal
+defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$Vd, $Vm, #0", NEONvclez>;
+}
+
+// VCGT : Vector Compare Greater Than
+defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$Vd, $Vm, #0", NEONvcltz>;
+}
+
+// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+// VTST : Vector Test Bits
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+ IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+// VAND : Vector Bitwise AND
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
+
+// VEOR : Vector Bitwise Exclusive OR
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
+
+// VORR : Vector Bitwise OR
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
+// VBIC : Vector Bitwise Bit Clear (AND NOT)
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+}
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+// VORN : Vector Bitwise OR NOT
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+}
+
+// VMVN : Vector Bitwise NOT
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+// VBIF : Vector Bitwise Insert if False
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+// VBIT : Vector Bitwise Insert if True
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+// VABD : Vector Absolute Difference
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 1>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+// VABDL : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+// VABA : Vector Absolute Difference and Accumulate
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
+
+// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+// VMAX : Vector Maximum
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+// VMIN : Vector Minimum
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f32",
+ v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f32",
+ v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+// VPADD : Vector Pairwise Add
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+// VPADDL : Vector Pairwise Add Long
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+ int_arm_neon_vpaddls>;
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+ int_arm_neon_vpaddlu>;
+
+// VPADAL : Vector Pairwise Add and Accumulate Long
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+ int_arm_neon_vpadals>;
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+ int_arm_neon_vpadalu>;
+
+// VPMAX : Vector Pairwise Maximum
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+// VPMIN : Vector Pairwise Minimum
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+// VRECPE : Vector Reciprocal Estimate
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAD, "vrecpe", "u32",
+ v2i32, v2i32, int_arm_neon_vrecpe>;
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAQ, "vrecpe", "u32",
+ v4i32, v4i32, int_arm_neon_vrecpe>;
+def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f32",
+ v2f32, v2f32, int_arm_neon_vrecpe>;
+def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f32",
+ v4f32, v4f32, int_arm_neon_vrecpe>;
+
+// VRECPS : Vector Reciprocal Step
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+// VRSQRTE : Vector Reciprocal Square Root Estimate
+def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAD, "vrsqrte", "u32",
+ v2i32, v2i32, int_arm_neon_vrsqrte>;
+def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAQ, "vrsqrte", "u32",
+ v4i32, v4i32, int_arm_neon_vrsqrte>;
+def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+// VRSQRTS : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+// VSHL : Vector Shift
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu>;
+
+// VSHL : Vector Shift Left (Immediate)
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
+// VSHR : Vector Shift Right (Immediate)
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+ NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+ NEONvshru>;
+
+// VSHLL : Vector Shift Left Long
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+// VSHLL : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, ImmTy, OpNode> {
+ let Inst{21-16} = op21_16;
+ let DecoderMethod = "DecodeVSHLMaxInstruction";
+}
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+ v8i16, v8i8, imm8, NEONvshlli>;
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+ v4i32, v4i16, imm16, NEONvshlli>;
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+ v2i64, v2i32, imm32, NEONvshlli>;
+
+// VSHRN : Vector Shift Right and Narrow
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
+
+// VRSHL : Vector Rounding Shift
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu>;
+// VRSHR : Vector Rounding Shift Right
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+ NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+ NEONvrshru>;
+
+// VRSHRN : Vector Rounding Shift Right and Narrow
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+ NEONvrshrn>;
+
+// VQSHL : Vector Saturating Shift
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu>;
+// VQSHL : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
+// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+
+// VQSHRN : Vector Saturating Shift Right and Narrow
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+ NEONvqshrns>;
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+ NEONvqshrnu>;
+
+// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+ NEONvqshrnsu>;
+
+// VQRSHL : Vector Saturating Rounding Shift
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+ NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+ NEONvqrshrnu>;
+
+// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+ NEONvqrshrnsu>;
+
+// VSRA : Vector Shift Right and Accumulate
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+// VRSRA : Vector Rounding Shift Right and Accumulate
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+// VSLI : Vector Shift Left and Insert
+defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
+// VSRI : Vector Shift Right and Insert
+defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
+
+// Vector Absolute and Saturating Absolute.
+
+// VABS : Vector Absolute Value
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+ int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
+
+// VQABS : Vector Saturating Absolute Value
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+ int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+// VNEG : Vector Negate (integer)
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+// VNEG : Vector Negate (floating-point)
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+// VQNEG : Vector Saturating Negate
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+ int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+// VCLS : Vector Count Leading Sign Bits
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+ int_arm_neon_vcls>;
+// VCLZ : Vector Count Leading Zeros
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+ ctlz>;
+// VCNT : Vector Count One Bits
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiD, "vcnt", "8",
+ v8i8, v8i8, ctpop>;
+def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiQ, "vcnt", "8",
+ v16i8, v16i8, ctpop>;
+
+// Vector Swap
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
+
+// Vector Move Operations.
+
+// VMOV : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VMOV : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+// VMOV : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+ (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]>,
+ Requires<[HasNEON, HasFastVGETLNi32]> {
+ let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+ (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+ (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+ (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+// VMOV : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+ (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+
+// VDUP : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
+// VDUP : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty, Operand IdxTy>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand IdxTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ VectorIndex32:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32q DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+ (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+ (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+ (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+ (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+// VMOVN : Vector Narrowing Move
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
+// VQMOVN : Vector Saturating Narrowing Move
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
+// VMOVL : Vector Lengthening Move
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
+
+// Vector Conversions.
+
+// VCVT : Vector Convert Between Floating-Point and Integers
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v4i32, v4f32, fp_to_sint>;
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v4i32, v4f32, fp_to_uint>;
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v4f32, v4i32, sint_to_fp>;
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v4f32, v4i32, uint_to_fp>;
+
+// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+}
+
+let DecoderMethod = "DecodeVCVTQ" in {
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+}
+
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+// VREV64 : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+
+// VREV32 : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+// VREV16 : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+// VEXT : Vector Extract
+
+
+// All of these have a two-operand InstAlias.
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+ (v2f32 DPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+ (v4f32 QPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
+// VTRN : Vector Transpose
+
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+// VUZP : Vector Unzip (Deinterleave)
+
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+// VZIP : Vector Zip (Interleave)
+
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+// VTBL : Vector Table Lookup
+let DecoderMethod = "DecodeTBLInstruction" in {
+def VTBL1
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBL2
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+def VTBL3
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+def VTBL4
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins VecListFourD:$Vn, DPR:$Vm),
+ NVTBLFrm, IIC_VTB4,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+// VTBX : Vector Table Extension
+def VTBX1
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBX2
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
+def VTBX3
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
+ NVTBLFrm, IIC_VTBX3,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
+ "$orig = $Vd", []>;
+def VTBX4
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
+ "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+} // DecoderMethod = "DecodeTBLInstruction"
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "8"> =
+// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
+// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0)))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ let AddedComplexity = 10 in {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ }
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+//
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)),
+// (i32 0))),
+// dsub_0)),
+// dsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length, but which ends up only
+// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
+//
+// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)),
+// dsub_0)>;
+multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
+
+defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+// Double lengthening - v4i8 -> v4i16 -> v4i32
+defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+// v2i8 -> v2i16 -> v2i32
+defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+// v2i16 -> v2i32 -> v2i64
+defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+// ... two-operand aliases
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD4 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+
+// VLD4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VSWP allows, but does not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+
+// "vmov Rd, #-imm" can be handled via "vmvn".
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
+
+def : NEONMnemonicAlias<"vzipq", "vzip">;
+def : NEONMnemonicAlias<"vswpq", "vswp">;
+
+def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
+def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
+
+
+// Alias for loading floating point immediates that aren't representable
+// using the vmov.f32 encoding but the bitpattern is representable using
+// the .i32 encoding.
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 000000000000..ae7a5c00bd74
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1417 @@
+//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def imm_sr_XFORM: SDNodeXForm<imm, [{
+ unsigned Imm = N->getZExtValue();
+ return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32);
+}]>;
+def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+}], imm_sr_XFORM> {
+ let PrintMethod = "printThumbSRImm";
+ let ParserMatchClass = ThumbSRImmAsmOperand;
+}
+
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
+def t_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_1020s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; }
+def t_imm0_508s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_508s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// Alias use only, so no printer is necessary.
+def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; }
+def t_imm0_508s4_neg : Operand<i32> {
+ let ParserMatchClass = t_imm0_508s4_neg_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Define Thumb specific addressing modes.
+
+let OperandType = "OPERAND_PCREL" in {
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+ let DecoderMethod = "DecodeThumbBROperand";
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLTargetOperand";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLXOffset";
+}
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+// We use separate scaled versions because the Select* functions need
+// to explicitly check for a matching constant and return false here so that
+// the reg+imm forms will match instead. This is a horrible way to do that,
+// as it forces tight coupling between the methods, but it's how selectiondag
+// currently works.
+def t_addrmode_rrs1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
+def t_addrmode_is4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let ParserMatchClass = t_addrmode_is4_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
+def t_addrmode_is2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let ParserMatchClass = t_addrmode_is2_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
+def t_addrmode_is1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let ParserMatchClass = t_addrmode_is1_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+// FIXME: This really shouldn't have an explicit SP operand at all. It should
+// be implicit, just like in the instruction encoding itself.
+def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeSP";
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let ParserMatchClass = t_addrmode_sp_asm_operand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let DecoderMethod = "DecodeThumbAddrModePC";
+ let PrintMethod = "printThumbLdrLabelOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+class T1SystemEncoding<bits<8> opc>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = opc;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
+ T1SystemEncoding<0x00>, // A8.6.110
+ Requires<[IsThumb2]>;
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
+ T1SystemEncoding<0x10>, // A8.6.410
+ Requires<[IsThumb2]>;
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
+ T1SystemEncoding<0x20>, // A8.6.408
+ Requires<[IsThumb2]>;
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
+ T1SystemEncoding<0x30>, // A8.6.409
+ Requires<[IsThumb2]>;
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
+ T1SystemEncoding<0x40>, // A8.6.157
+ Requires<[IsThumb2]>;
+
+// The imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
+ []>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
+
+def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
+ []>, T1Encoding<0b101101> {
+ bits<1> end;
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = end;
+ let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags", []>,
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
+ let DecoderMethod = "DecodeThumbCPS";
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// FIXME: This should not be marked as having side effects, and it should be
+// rematerializable. Clearing the side effect bit causes miscompilations,
+// probably because the instruction can be moved around.
+def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
+ IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
+ T1Encoding<{1,0,1,0,1,?}> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> imm;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "add", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,0,?,?}> {
+ // A6.2.5 & A8.6.8
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,1,?,?}> {
+ // A6.2.5 & A8.6.214
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+def : tInstAlias<"add${p} sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+
+// Can optionally specify SP as a three operand instruction.
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
+def : tInstAlias<"sub${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
+
+// ADD <Rm>, sp
+def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T1
+ bits<4> Rdn;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = Rdn{2-0};
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+// ADD sp, <Rm>
+def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T2
+ bits<4> Rm;
+ let Inst{7} = 1;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b101;
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ let Unpredictable{2-0} = 0b111;
+ }
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+ [(ARMretflag)], (tBX LR, pred:$p)>;
+
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+ 2, IIC_Br, [],
+ (tBX GPR:$Rm, pred:$p)>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+ Defs = [LR], Uses = [SP] in {
+ // Also used for Thumb2
+ def tBL : TIx2<0b11110, 0b11, 1,
+ (outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
+ "bl${p}\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb]> {
+ bits<24> func;
+ let Inst{26} = func{23};
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi : TIx2<0b11110, 0b11, 0,
+ (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T]> {
+ bits<24> func;
+ let Inst{26} = func{23};
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T]>,
+ T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b", "\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ }
+
+ // Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
+ let Defs = [LR] in
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ T1BranchCond<{1,1,0,1}> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+}
+
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // IOS versions.
+ let Uses = [SP] in {
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb]>;
+ }
+ // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
+ // on IOS), so it's in ARMInstrThumb2.td.
+ // Non-IOS version:
+ let Uses = [SP] in {
+ def tTAILJMPdND : tPseudoExpand<(outs),
+ (ins t_brtarget:$dst, pred:$p),
+ 4, IIC_Br, [],
+ (tB t_brtarget:$dst, pred:$p)>,
+ Requires<[IsThumb, IsNotIOS]>;
+ }
+}
+
+
+// A8.6.218 Supervisor Call (Software Interrupt)
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16 {
+ bits<8> imm;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ "trap", [(trap)]>, Encoding16 {
+ let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
+ "ldrsb", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
+ "ldrsh", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", ".n\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For assembly/disassembly use only.
+def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr", []>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+// Writeback version is just a pseudo, as there's no encoding difference.
+// Writeback happens iff the base register is not in the destination register
+// list.
+def tLDMIA_UPD :
+ InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "$Rn = $wb", IIC_iLoad_mu>,
+ PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
+ let Size = 2;
+ let OutOperandList = (outs GPR:$wb);
+ let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let Pattern = [];
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// There is no non-writeback version of STM for Thumb.
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+ (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ AddrModeNone, 2, IIC_iStore_mu,
+ "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
+ T1Encoding<{1,1,0,0,0,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+} // neverHasSideEffects
+
+def : InstAlias<"ldm${p} $Rn!, $regs",
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
+}
+
+// CMP register
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
+// Because we have an explicit tMOVSr below, we need an alias to handle
+// the immediate "movs" form here. Blech.
+def : tInstAlias <"movs $Rdn, $imm",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
+
+// A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+ 2, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", "", []>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16 {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL : // A8.6.105 T1
+ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
+ [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
+ T1DataProcessing<0b1101> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+ let AsmMatchConverter = "cvtThumbMultiply";
+}
+
+def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
+ pred:$p)>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in // Expanded after instruction selection.
+ def tMOVCCr_pseudo :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ NoItinerary,
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 2, IIC_iALUi, []>;
+
+let hasSideEffects = 1 in
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 2, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everything out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-IOS version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, 0, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsIOS]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
+ (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
+ (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
+ (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
+ (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
+ (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
+ (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+ Requires<[IsThumb]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+ Requires<[IsThumb, HasV5T]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+ (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
+ (tLDRBi t_addrmode_is1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
+ (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
+ (tLDRHi t_addrmode_is2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
+ (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
+ (tLDRi t_addrmode_is4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
+ (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+ (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+ (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+ (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary,
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ 2, IIC_iPop_Br, [],
+ (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+ 2, IIC_Br, [(brind GPR:$Rm)],
+ (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
+
+
+// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
+// encoding is available on ARMv6K, but we don't differentiate that finely.
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+
+
+// For round-trip assembly/disassembly, we have to handle a CPS instruction
+// without any iflags. That's not, strictly speaking, valid syntax, but it's
+// a useful extension and assembles to defined behaviour (the insn does
+// nothing).
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+
+// Implied destination operand forms for shifts.
+def : tInstAlias<"lsl${s}${p} $Rdm, $imm",
+ (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>;
+def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
+ (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+def : tInstAlias<"asr${s}${p} $Rdm, $imm",
+ (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 000000000000..c9d709eb5222
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,4327 @@
+//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred_asmoperand : AsmOperandClass {
+ let Name = "ITCondCode";
+ let ParserMethod = "parseITCondCode";
+}
+def it_pred : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = it_pred_asmoperand;
+}
+
+// IT block condition mask
+def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
+def it_mask : Operand<i32> {
+ let PrintMethod = "printThumbITMask";
+ let ParserMatchClass = it_mask_asmoperand;
+}
+
+// t2_shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 not allowed
+def t2_shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+ let DecoderMethod = "DecodeT2ShifterImmOperand";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
+ let PrintMethod = "printT2SOOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// so_imm_notSext_XFORM - Return a so_imm value packed into the format
+// described for so_imm_notSext def below, with sign extension from 16
+// bits.
+def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
+ APInt apIntN = N->getAPIntValue();
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }]> {
+ let ParserMatchClass = t2_so_imm_asmoperand;
+ let EncoderMethod = "getT2SOImmOpValue";
+ let DecoderMethod = "DecodeT2SOImm";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use t2_so_imm.
+def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
+// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm
+// if the upper 16 bits are zero.
+def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
+ APInt apIntN = N->getAPIntValue();
+ if (!apIntN.isIntN(16)) return false;
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1;
+ }], t2_so_imm_notSext16_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 4096;
+}]> {
+ let ParserMatchClass = imm0_4095_asmoperand;
+}
+
+def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; }
+def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM> {
+ let ParserMatchClass = imm0_4095_neg_asmoperand;
+}
+
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+ uint32_t Val = -N->getZExtValue();
+ return (Val > 0 && Val < 255);
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+def lo5AllOne : PatLeaf<(i32 imm), [{
+ // Returns true if all low 5-bits are 1.
+ return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
+def t2addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+ let PrintMethod = "printAddrModeImm12Operand";
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm12";
+ let ParserMatchClass = t2addrmode_imm12_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2ldrlabel := imm12
+def t2ldrlabel : Operand<i32> {
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printThumbLdrLabelOperand";
+}
+
+def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
+def t2ldr_pcrel_imm12 : Operand<i32> {
+ let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand;
+ // used for assembler pseudo instruction and maps to t2ldrlabel, so
+ // doesn't need encoder or print methods of its own.
+}
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand";
+}
+
+
+// t2addrmode_posimm8 := reg + imm8
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def t2addrmode_posimm8 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemPosImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_negimm8 := reg - imm8
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def t2addrmode_negimm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemNegImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm8 := reg +/- imm8
+def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
+def t2addrmode_imm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8_offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let DecoderMethod = "DecodeT2Imm8";
+}
+
+// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
+def t2addrmode_imm8s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4Operand";
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8s4";
+ let ParserMatchClass = MemImm8s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
+def t2am_imm8s4_offset : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let EncoderMethod = "getT2Imm8s4OpValue";
+ let DecoderMethod = "DecodeT2Imm8S4";
+}
+
+// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
+def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm0_1020s4Offset";
+}
+def t2addrmode_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
+ let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
+ let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
+def t2addrmode_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+ let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let DecoderMethod = "DecodeT2AddrModeSOReg";
+ let ParserMatchClass = t2addrmode_so_reg_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+}
+
+// Addresses for the TBB/TBH instructions.
+def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
+def addrmode_tbb : Operand<i32> {
+ let PrintMethod = "printAddrModeTBB";
+ let ParserMatchClass = addrmode_tbb_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
+def addrmode_tbh : Operand<i32> {
+ let PrintMethod = "printAddrModeTBH";
+ let ParserMatchClass = addrmode_tbh_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0,
+ string wide = ""> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+// the ".w" suffix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
+ // Assembler aliases w/ the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
+}
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_imm:$imm))]>;
+ // register
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
+ 4, iir,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ }
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, iis,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<PatFrag opnode> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iALUi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
+ rGPR:$Rn))]>;
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, IIC_iALUsi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
+ rGPR:$Rn))]>;
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sTwoRegImm<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+ }
+ }
+ // 12-bit imm
+ def ri12 : T2I<
+ (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ }
+ // register
+ def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
+ IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Defs = [CPSR], Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+// rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
+ // 5-bit imm
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-21} = 0b010010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-21} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0000;
+ }
+
+ // Optional destination register
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rn, $Rm",
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+
+ // Assembler aliases w/o the ".w" suffix.
+ // No alias here for 'rr' version as not all instantiations of this
+ // multiclass want one (CMP in particular, does not).
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = signed;
+ let Inst{23} = 1;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{9} = addr{8}; // U
+ let Inst{8} = 0; // The W bit.
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+
+ // pci variant is very similar to i12, but supports negative offsets
+ // from the PC.
+ def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ let isReMaterializable = 1;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = ?; // add = (U == '1')
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = 0b1111; // Rn
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-0} = addr{11-0};
+ }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0001;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2, HasT2ExtractPack]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+class T2I_exta_rrot_np<bits<3> opcod, string opc>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b10;
+ // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+ let Inst{22} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2Adr";
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+let hasSideEffects = 1 in
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi,
+ []>;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
+ UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ rGPR, UnOpFrag<(sextloadi8 node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_imm8s4:$addr),
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+ (t2LDRHpci tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+
+def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+
+// Indexed stores
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+
+def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+} // mayStore = 1, neverHasSideEffects = 1
+
+def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+}
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2LdrdPre";
+ let DecoderMethod = "DecodeT2LDRDPreInstruction";
+}
+
+def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
+ IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2StrdPre";
+ let DecoderMethod = "DecodeT2STRDPreInstruction";
+}
+
+def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
+ t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
+ }
+
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+ // FIXME: We should have a separate 'pci' variant here. As-is we represent
+ // it via the i12 variant, which it's related to, but that means we can
+ // represent negative immediates, which aren't legal for anything except
+ // the 'pci' case (Rn == 15).
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+multiclass thumb2_st_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+}
+
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'.
+// Use aliases to get that to play nice here.
+def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+
+def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0110;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+ UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth",
+ UnOpFrag<(sext_inreg node:$Src, i16)>>;
+def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+ UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth",
+ UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16 rGPR:$Src, 3)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16 rGPR:$Src, 1)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm t2ADD : T2I_bin_ii12rs<0b000, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+let hasPostISelHook = 1 in {
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+}
+
+// RSB
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
+ (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
+ (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
+let AddedComplexity = 1 in
+def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
+ (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Signed/Unsigned saturate.
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm;
+ let Inst{21} = sh{5};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{5} = 0;
+}
+
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
+ NoItinerary,
+ "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-22} = 0b1111001110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate Instructions.
+//
+
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b01; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b10; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+
+defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10100;
+ let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b11100;
+ let Inst{15} = 0;
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+}
+
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode,
+ bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let isMoveImm = MoveImm;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1, 1>;
+
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 rGPR:$src), [{
+ return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// so_imm_notSext is needed instead of so_imm_not, as the value of imm
+// will match the extended, not the original bitWidth for $src.
+def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
+
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+ Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+ (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MlaLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
+
+def t2UMLAL : T2MlaLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y>
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2FourReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions.
+// Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-22} = 0b01010;
+ let Inst{21-20} = op1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (t2REVSH rGPR:$Rm)>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 0; // BT form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 1; // TB form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
+ (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
+
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def t2CMNri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
+ "cmn", ".w\t$Rn, $imm",
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def t2CMNzrr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
+ "cmn", ".w\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, rGPR:$Rm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def t2CMNzrs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
+ "cmn", ".w\t$Rn, $ShiftedRm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+
+// Assembler aliases w/o the ".w" suffix.
+// No alias here for 'rr' version as not all instantiations of this multiclass
+// want one (CMP in particular, does not).
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+
+def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+
+let isCommutable = 1, isSelect = 1 in
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
+ IIC_iCMOVi,
+ "movw", "\t$Rd, $imm", []>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mvn", "\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0011;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
+
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dsb", "\t$opt", []>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
+}
+
+def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "isb", "\t$opt",
+ []>, Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f6;
+ let Inst{3-0} = opt;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+ let Inst{3-0} = 0b1111;
+
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{3-0} = Rd;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrex", "\t$Rt, $addr", "", []> {
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000101;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = addr{7-0};
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strex", "\t$Rd, $Rt, $addr", "",
+ []> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = addr{7-0};
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [(br bb:$target)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 1;
+
+ bits<24> target;
+ let Inst{26} = target{19};
+ let Inst{11} = target{18};
+ let Inst{13} = target{17};
+ let Inst{25-16} = target{20-11};
+ let Inst{10-0} = target{10-0};
+ let DecoderMethod = "DecodeT2BInstruction";
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+ "tbb", "\t$addr", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+
+def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+ "tbh", "\t$addr", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects ", "two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
+
+ let DecoderMethod = "DecodeThumb2BCCInstruction";
+}
+
+// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // IOS version.
+ let Uses = [SP] in
+ def tTAILJMPd: tPseudoExpand<(outs),
+ (ins uncondbrtarget:$dst, pred:$p),
+ 4, IIC_Br, [],
+ (t2B uncondbrtarget:$dst, pred:$p)>,
+ Requires<[IsThumb2, IsIOS]>;
+}
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+ AddrModeNone, 2, IIC_iALUx,
+ "it$mask\t$cc", "", []> {
+ // 16-bit instruction.
+ let Inst{31-16} = 0x0000;
+ let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
+
+ let DecoderMethod = "DecodeIT";
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+ bits<4> func;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{19-16} = func;
+ let Inst{15-0} = 0b1000111100000000;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$Rn, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+}
+
+
+// Change Processor State is a system instruction.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op), []> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+ let DecoderMethod = "DecodeT2CPSInstruction";
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
+ bits<8> imm;
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
+}
+
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
+
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+ bits<4> opt;
+ let Inst{31-20} = 0b111100111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-8} = 0b10000000;
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction.
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+
+ bits<4> opt;
+ let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<5> mode;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24-23} = Op;
+ let Inst{22} = 0;
+ let Inst{21} = W;
+ let Inst{20-16} = 0b01101;
+ let Inst{15-5} = 0b11000000000;
+ let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction.
+def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb", "\tsp!, $mode", []>;
+def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb","\tsp, $mode", []>;
+def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp!, $mode", []>;
+def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp, $mode", []>;
+
+
+def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>;
+
+def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
+
+// Return From Exception is a system instruction.
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xc000;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
+
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor load/store -- for disassembly only
+//
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
+ : T2I<oops, iops, NoItinerary, opc, asm, []> {
+ let Inst{31-28} = op31_28;
+ let Inst{27-25} = 0b110;
+}
+
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+ def _OFFSET : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset"> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : T2CI<op31_28, (outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+// Move to ARM core register from Special Register
+
+// A/R class MRS.
+//
+// A/R class can only move from CPSR or SPSR.
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
+
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111111111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+// M class MRS.
+//
+// This MRS has a mask field in bits 7-0 and can take more values than
+// the A/R class (a full msr_mask).
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
+ "mrs", "\t$Rd, $mask", []>,
+ Requires<[IsThumb,IsMClass]> {
+ bits<4> Rd;
+ bits<8> mask;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = 0b1111;
+ let Inst{7-0} = mask;
+}
+
+
+// Move from ARM core register to Special Register
+//
+// A/R class MSR.
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = mask{3-0};
+ let Inst{7-0} = 0;
+}
+
+// M class MSR.
+//
+// Move from ARM core register to Special Register
+def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
+ NoItinerary, "msr", "\t$SYSm, $Rn", []>,
+ Requires<[IsThumb,IsMClass]> {
+ bits<12> SYSm;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-0} = SYSm;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register
+//
+
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : T2Cop<Op, oops, iops,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ pattern> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+ list<dag> pattern = []>
+ : T2Cop<Op, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+ (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+
+def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+ GPR:$Rt2, imm:$CRm)]>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.
+//
+
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Atomic load/store patterns
+def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr),
+ (t2LDRi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
+ (t2LDRi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
+ (t2LDRs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Aliases for ADC without the ".w" optional width specifier.
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
+ (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SBC without the ".w" optional width specifier.
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
+ (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
+ (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
+ (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
+ (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Alias for compares without the ".w" optional width specifier.
+def : t2InstAlias<"cmn${p} $Rn, $Rm",
+ (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"teq${p} $Rn, $Rm",
+ (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"tst${p} $Rn, $Rm",
+ (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+
+// Memory barriers
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>;
+
+// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+
+// Alias for MVN with(out) the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
+ (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
+ (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// PUSH/POP aliases for STM/LDM
+def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// STMDB/STMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
+ (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stmdb${p}.w $Rn!, $regs",
+ (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"ldmdb${p}.w $Rn, $regs",
+ (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs",
+ (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
+def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+
+
+// Alias for RSB without the ".w" optional width specifier, and with optional
+// implied destination register.
+def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $imm",
+ (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm",
+ (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm",
+ (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+
+// SSAT/USAT optional shift operand.
+def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+
+// STM w/o the .w suffix.
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for STR, STRB, and STRH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Extend instruction optional rotate operand.
+def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb${p} $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+// Extend instruction w/o the ".w" optional width specifier.
+def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+
+// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
+
+// LDR(literal) w/ alternate [pc, #imm] syntax.
+def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ // Version w/ the .w suffix.
+def : t2InstAlias<"ldr${p}.w $Rt, $addr",
+ (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
+ (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
+ (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p}.w $Rt, $addr",
+ (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
+ (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"add${p} $Rd, pc, $imm",
+ (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 000000000000..b5a896c69985
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1479 @@
+//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "parseFPImm";
+}
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+// The VCVT to/from fixed-point instructions encode the 'fbits' operand
+// (the number of fixed bits) differently than it appears in the assembly
+// source. It's encoded as "Size - fbits" where Size is the size of the
+// fixed-point representation (32 or 16) and fbits is the value appearing
+// in the assembly source, an integer in [0,16] or (0,32], depending on size.
+def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
+def fbits32 : Operand<i32> {
+ let PrintMethod = "printFBits32";
+ let ParserMatchClass = fbits32_asm_operand;
+}
+
+def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
+def fbits16 : Operand<i32> {
+ let PrintMethod = "printFBits16";
+ let ParserMatchClass = fbits16_asm_operand;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", "\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", "\t$Sd, $addr",
+ [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", "\t$Dd, $addr",
+ [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", "\t$Sd, $addr",
+ [(store SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR_NZCV] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR_NZCV]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR_NZCV] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR_NZCV]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-6} = 0b11;
+ let Inst{4} = 0;
+}
+
+// Between half-precision and single-precision. For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : Pat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+ [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VMOVRRS : AVConv3I<0b11000101, 0b1010,
+ (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> src1;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = src1{4-1};
+ let Inst{5} = src1{0};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+ let DecoderMethod = "DecodeVMOVRRS";
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+ (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+ IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ // Instruction operands.
+ bits<5> dst1;
+ bits<4> src1;
+ bits<4> src2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = dst1{4-1};
+ let Inst{5} = dst1{0};
+ let Inst{15-12} = src1;
+ let Inst{19-16} = src2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+
+ let DecoderMethod = "DecodeVMOVSRR";
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX: SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+let Constraints = "$a = $dst" in {
+
+// FP to Fixed-Point:
+
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+
+} // End of 'let Constraints = "$a = $dst" in'
+
+//===----------------------------------------------------------------------===//
+// FP Multiply-Accumulate Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma x, y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
+ (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
+ (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma z, x, y)
+def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+
+// (fma x, y, (fneg z)) -> (vfnms z, x, y))
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+ 4, IIC_fpUNA64,
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+ 4, IIC_fpUNA32,
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
+ let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
+ let Inst{27-20} = 0b11101110;
+ let Inst{19-16} = opc19_16;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
+
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+ VFPMiscFrm, IIC_fpUNA64,
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ bits<5> Dd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Dd{4};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ bits<5> Sd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases.
+//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
+def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
+def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
+def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
+def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
+def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
+def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
+
+
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+// VLDR/VSTR accept an optional type suffix.
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMOV can accept optional 32-bit or less data type suffix suffix.
+def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+ (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+ (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+ (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 000000000000..351a290e2aa0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback();
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ // Restore VFP caller-saved registers.
+ "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, with two
+ // updating load instructions.
+ "ldmia sp!, {r0, r1, r2, r3, lr}\n"
+ "ldr pc, [sp], #4\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+ void *PtrAddr = JCE.allocIndirectGV(
+ GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+ // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+ // 4-byte address. See emitFunctionStub for details.
+ StubLayout Result = {16, 4};
+ return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ void *Addr;
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 16-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
+ DEBUG(if (F)
+ errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+ << "] for GV '" << F->getName() << "'\n";
+ else
+ errs() << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n");
+ }
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache(Addr, 8);
+ if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+
+ return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC (which is register 15 on all architectures).
+ // FIXME: This avoids the need for register info in the JIT class.
+ *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 000000000000..23a6a9b512f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on ARM.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 000000000000..b7ac5d57c362
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,2062 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+ struct ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+ bool isThumb2;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Reg;
+ bool isKill;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ };
+ class UnitRegsMap {
+ public:
+ UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
+ const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
+ DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
+ Cache.find(Reg);
+ if (found != Cache.end())
+ return found->second;
+ else
+ return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
+ .first->second;
+ }
+ private:
+ SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
+ SmallVector<unsigned, 4> Res;
+
+ const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
+ if (TRC == &ARM::QPRRegClass) {
+ if (Reg > ARM::Q7) {
+ Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
+ return Res;
+ }
+
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
+
+ return Res;
+ }
+
+ if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+
+ return Res;
+ }
+
+ Res.push_back(Reg);
+
+ return Res;
+
+ }
+ const TargetRegisterInfo* TRI;
+ DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ DebugLoc dl,
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs);
+ void MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &MemOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ unsigned AddMemOp(MemOpQueue& MemOps,
+ const MemOpQueueEntry newEntry,
+ UnitRegsMap& UnitRegsInfo,
+ SmallSet<unsigned, 4>& UsedUnitRegs,
+ unsigned At = -1U);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ case ARM::STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ case ARM::VLDRS:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
+ }
+ case ARM::VSTRS:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
+ }
+ case ARM::VLDRD:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
+ }
+ case ARM::VSTRD:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
+ }
+ }
+}
+
+namespace llvm {
+ namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+}
+
+ } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+ return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+ return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+ return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill,
+ int Opcode, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned Scratch, DebugLoc dl,
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
+ Mode = ARM_AM::ib;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ Mode = ARM_AM::da;
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // Check if this is a supported opcode before we insert instructions to
+ // calculate a new base register.
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (isi32Load(Opcode))
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+ if (Offset < 0) {
+ BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = isThumb2
+ ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ // FIXME: Try t2ADDri12 or t2SUBri12?
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VLDRD);
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ if (!Opcode) return false;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ // Add implicit defs for super-registers.
+ for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
+ MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
+
+ return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ // First calculate which of the registers should be killed by the merged
+ // instruction.
+ const unsigned insertPos = memOps[insertAfter].Position;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+ if (i == memOpsBegin) {
+ i = memOpsEnd;
+ if (i == e)
+ break;
+ }
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 8> ImpDefs;
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ unsigned Reg = memOps[i].Reg;
+ // If we are inserting the merged operation after an operation that
+ // uses the same register, make sure to transfer any kill flag.
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+ Regs.push_back(std::make_pair(Reg, isKill));
+
+ // Collect any implicit defs of super-registers. They must be preserved.
+ for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+ continue;
+ unsigned DefReg = MO->getReg();
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
+ ImpDefs.push_back(DefReg);
+ }
+ }
+
+ // Try to do the merge.
+ MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+ ++Loc;
+ if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+ Pred, PredReg, Scratch, dl, Regs, ImpDefs))
+ return;
+
+ // Merge succeeded, update records.
+ Merges.push_back(prior(Loc));
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+ assert(Idx >= 0 && "Cannot find killing operand");
+ memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+ memOps[j].isKill = false;
+ }
+ memOps[i].isKill = true;
+ }
+ MBB.erase(memOps[i].MBBI);
+ // Update this memop to refer to the merged instruction.
+ // We may need to move kill flags again.
+ memOps[i].Merged = true;
+ memOps[i].MBBI = Merges.back();
+ memOps[i].Position = insertPos;
+ }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned insertAfter = SIndex;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ DebugLoc dl = Loc->getDebugLoc();
+ const MachineOperand &PMO = Loc->getOperand(0);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+ unsigned Count = 1;
+ unsigned Limit = ~0U;
+
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+ switch (Opcode) {
+ default: break;
+ case ARM::VSTRS:
+ Limit = 32;
+ break;
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRS:
+ Limit = 32;
+ break;
+ }
+
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+ // Register numbers must be in ascending order. For VFP / NEON load and
+ // store multiples, the registers must also be consecutive and within the
+ // limit on the number of registers per instruction.
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
+ ((isNotVFP && RegNum > PRegNum) ||
+ ((Count < Limit) && RegNum == PRegNum+1))) {
+ Offset += Size;
+ PRegNum = RegNum;
+ ++Count;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+ Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+ MemOps, Merges);
+ return;
+ }
+
+ if (MemOps[i].Position > MemOps[insertAfter].Position)
+ insertAfter = i;
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+ Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ return;
+}
+
+static bool definesCPSR(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2SUBri:
+ case ARM::SUBri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tSUBspi:
+ break;
+ }
+
+ // Make sure the offset fits in 8 bits.
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
+}
+
+static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2ADDri:
+ case ARM::ADDri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tADDspi:
+ break;
+ }
+
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ // Make sure the offset fits in 8 bits.
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ case ARM::VLDMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ case ARM::VLDMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ case ARM::VSTMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ case ARM::VSTMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ }
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ bool BaseKill = MI->getOperand(0).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_PRE_IMM;
+ case ARM::STRi12:
+ return ARM::STR_PRE_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_PRE;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_PRE;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_POST_IMM;
+ case ARM::STRi12:
+ return ARM::STR_POST_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_POST;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_POST;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
+
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ // AM2 - 12 bits, thumb2 - 8 bits.
+ unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (!isAM5 &&
+ isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+ MBB.erase(PrevMBBI);
+ }
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (!isAM5 &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ if (isAM5) {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2) {
+ // LDR_PRE, LDR_POST
+ if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ // t2LDR_PRE, t2LDR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ // FIXME: post-indexed stores use am2offset_imm, which still encodes
+ // the vestigal zero-reg offset register. When that's fixed, this clause
+ // can be removed entirely.
+ if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ // STR_PRE, STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ // t2STR_PRE, t2STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operation that this
+/// pass is capable of operating on.
+static bool isMemoryOp(const MachineInstr *MI) {
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return MI->getOperand(1).isReg();
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM3) {
+ if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Offset, bool isDef,
+ DebugLoc dl, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill, bool RegUndef,
+ unsigned BaseReg, bool BaseKill, bool BaseUndef,
+ bool OffKill, bool OffUndef,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isT2) {
+ if (isDef) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
+ if (!Errata602117 &&
+ ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
+ return false;
+
+ MachineBasicBlock::iterator NewBBI = MBBI;
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ NewBBI = llvm::prior(MBBI);
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ } else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
+ // Never kill the base register in the first instruction.
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
+ }
+ return false;
+}
+
+/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
+/// It adds store mem ops with simple push_back/insert method,
+/// without any additional logic.
+/// For load operation it does the next:
+/// 1. Adds new load operation into MemOp collection at "At" position.
+/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
+/// contents, prior to "At".
+/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
+/// UsedUnitRegs - set of unit-registers currently in use.
+/// At - position at which it would added, and prior which the clean-up
+/// should be made (for load operation).
+/// FIXME: The clean-up also should be made for store operations,
+/// but the memory address should be analyzed instead of unit registers.
+unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
+ const MemOpQueueEntry NewEntry,
+ UnitRegsMap& UnitRegsInfo,
+ SmallSet<unsigned, 4>& UsedUnitRegs,
+ unsigned At) {
+ unsigned Cleaned = 0;
+
+ if (At == -1U) {
+ At = MemOps.size();
+ MemOps.push_back(NewEntry);
+ } else
+ MemOps.insert(&MemOps[At], NewEntry);
+
+ // FIXME:
+ // If operation is not load, leave it as is by now,
+ // So 0 overridden ops would cleaned in this case.
+ if (!NewEntry.MBBI->mayLoad())
+ return 0;
+
+ const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
+
+ bool FoundOverriddenLoads = false;
+
+ for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
+ if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
+ FoundOverriddenLoads = true;
+ break;
+ }
+
+ // If we detect that this register is used by load operations that are
+ // predecessors for the new one, remove them from MemOps then.
+ if (FoundOverriddenLoads) {
+ MemOpQueue UpdatedMemOps;
+
+ // Scan through MemOps entries.
+ for (unsigned i = 0; i != At; ++i) {
+ MemOpQueueEntry& MemOpEntry = MemOps[i];
+
+ // FIXME: Skip non-load operations by now.
+ if (!MemOpEntry.MBBI->mayLoad())
+ continue;
+
+ const SmallVector<unsigned, 4>& MemOpUnitRegs =
+ UnitRegsInfo[MemOpEntry.Reg];
+
+ // Lookup entry that loads contents into register used by new entry.
+ bool ReleaseThisEntry = false;
+ for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
+ if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
+ MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
+ ReleaseThisEntry = true;
+ ++Cleaned;
+ break;
+ }
+ }
+
+ if (ReleaseThisEntry) {
+ const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
+ for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
+ UsedUnitRegs.erase(RelesedRegs[r]);
+ } else
+ UpdatedMemOps.push_back(MemOpEntry);
+ }
+
+ // Keep anything without changes after At position.
+ for (unsigned i = At, e = MemOps.size(); i != e; ++i)
+ UpdatedMemOps.push_back(MemOps[i]);
+
+ MemOps.swap(UpdatedMemOps);
+ }
+
+ UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
+
+ return Cleaned;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ UnitRegsMap UnitRegsInfo(TRI);
+ SmallSet<unsigned, 4> UsedRegUnits;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+ SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+ int Offset = getMemoryOpOffset(MBBI);
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
+ const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
+ UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ unsigned OverridesCleaned =
+ AddMemOp(MemOps,
+ MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
+ UnitRegsInfo, UsedRegUnits) != 0;
+ NumMemOps += 1 - OverridesCleaned;
+ Advance = true;
+ } else {
+ for (unsigned I = 0; I != NumMemOps; ++I) {
+ if (Offset < MemOps[I].Offset) {
+ MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
+ unsigned OverridesCleaned =
+ AddMemOp(MemOps, entry, UnitRegsInfo,
+ UsedRegUnits, I) != 0;
+ NumMemOps += 1 - OverridesCleaned;
+
+ Advance = true;
+ break;
+ } else if (Offset == MemOps[I].Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
+ ++Position;
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register.
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ Merges.clear();
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+ // Try folding preceding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+ if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+ ++NumMerges;
+ NumMerges += Merges.size();
+
+ // Try folding preceding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ ++NumMerges;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ } else if (NumMemOps == 1) {
+ // Try folding preceding/trailing base inc/dec into the single
+ // load/store.
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+ ++NumMerges;
+ RS->forward(prior(MBBI));
+ }
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ UsedRegUnits.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
+/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
+/// =>
+/// ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.begin() &&
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
+ MachineInstr *PrevMI = prior(MBBI);
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() != ARM::LR)
+ return false;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+ PrevMI->setDesc(TII->get(NewOpc));
+ MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ RS = new RegScavenger();
+ isThumb2 = AFI->isThumb2Function();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const DataLayout *TD;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ MachineRegisterInfo *MRI;
+ MachineFunction *MF;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred,
+ bool &isT2);
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = Fn.getTarget().getDataLayout();
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ MRI = &Fn.getRegInfo();
+ MF = &Fn;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
+ while (++I != E) {
+ if (I->isDebugValue() || MemOps.count(&*I))
+ continue;
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
+ return false;
+ if (isLd && I->mayStore())
+ return false;
+ if (!isLd) {
+ if (I->mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (I->mayStore())
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+ return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
+ }
+ }
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+
+/// Copy Op0 and Op1 operands into a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+ MachineInstr *Op1) {
+ assert(MI->memoperands_empty() && "expected a new machineinstr");
+ size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
+ + (Op1->memoperands_end() - Op1->memoperands_begin());
+
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+ MachineSDNode::mmo_iterator MemEnd =
+ std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+ MemEnd =
+ std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+ MI->setMemRefs(MemBegin, MemEnd);
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset, unsigned &PredReg,
+ ARMCC::CondCodes &Pred,
+ bool &isT2) {
+ // Make sure we're allowed to generate LDRD/STRD.
+ if (!STI->hasV5TEOps())
+ return false;
+
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+ unsigned Scale = 1;
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDRi12)
+ NewOpc = ARM::LDRD;
+ else if (Opcode == ARM::STRi12)
+ NewOpc = ARM::STRD;
+ else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ NewOpc = ARM::t2LDRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+ NewOpc = ARM::t2STRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else
+ return false;
+
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
+ if (!Op0->hasOneMemOperand() ||
+ !(*Op0->memoperands_begin())->getValue() ||
+ (*Op0->memoperands_begin())->isVolatile())
+ return false;
+
+ unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ const Function *Func = MF->getFunction();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+ : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ if (isT2) {
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+ return false;
+ Offset = OffImm;
+ } else {
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+ }
+ EvenReg = Op0->getOperand(0).getReg();
+ OddReg = Op1->getOperand(0).getReg();
+ if (EvenReg == OddReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ Pred = getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
+ return true;
+}
+
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the following:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastOpcode = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ unsigned LSMOpcode
+ = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
+ if (LastOpcode && LSMOpcode != LastOpcode)
+ break;
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ LastOpcode = LSMOpcode;
+ if (++NumMove == 8) // FIXME: Tune this limit.
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+ if (DoMove)
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ ++InsertPos;
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned EvenReg = 0, OddReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ bool isT2 = false;
+ unsigned NewOpc = 0;
+ int Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ EvenReg, OddReg, BaseReg,
+ Offset, PredReg, Pred, isT2)) {
+ Ops.pop_back();
+ Ops.pop_back();
+
+ const MCInstrDesc &MCID = TII->get(NewOpc);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ MRI->constrainRegClass(EvenReg, TRC);
+ MRI->constrainRegClass(OddReg, TRC);
+
+ // Form the pair instruction.
+ if (isLd) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg, RegState::Define)
+ .addReg(OddReg, RegState::Define)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
+ ++NumLDRDFormed;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg)
+ .addReg(OddReg)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+ MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ if (MI->isCall() || MI->isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opc = MI->getOpcode();
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 000000000000..b6414832003d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,125 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ switch (MO.getTargetFlags()) {
+ default: {
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on symbol operand");
+ case 0:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+ break;
+ }
+ break;
+ }
+
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+ OutContext);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+ return MCOperand::CreateExpr(Expr);
+
+}
+
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO,
+ GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ return false;
+ }
+ return true;
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..af445e2f35aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void ARMFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 000000000000..88d96c0be8a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,275 @@
+//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+ /// to determine if function is compiled under Thumb mode, for that use
+ /// 'isThumb'.
+ bool hasThumb2;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ ///
+ /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
+ /// Some may be spilled after the stack has been realigned.
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
+ /// the aligned portion of the stack frame. This is always a contiguous
+ /// sequence of D-registers starting from d8.
+ ///
+ /// We do not keep track of the frame indices used for these registers - they
+ /// behave like any other frame index in the aligned stack frame. These
+ /// registers also aren't included in DPRCSSize above.
+ unsigned NumAlignedDPRCS2Regs;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned PICLabelUId;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ hasThumb2(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ NumAlignedDPRCS2Regs(0),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+
+ explicit ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+
+ bool isThumbFunction() const { return isThumb; }
+ bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+ bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; }
+ void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
+ }
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ assert(0 && "Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 000000000000..efa22fbed9f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 000000000000..6f3819afd0a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,24 @@
+//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+using namespace llvm;
+
+void ARMRegisterInfo::anchor() { }
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 000000000000..8a248425c33c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+ virtual void anchor();
+public:
+ ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 000000000000..b0f576bc2b6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,403 @@
+//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+ // All bits of ARM registers with sub-registers are covered by sub-registers.
+ let CoveredBySubRegs = 1;
+}
+
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
+ let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+
+// Note: Code depends on these having consecutive numbers.
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
+def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
+def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
+def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
+def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
+def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
+def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
+
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
+def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
+
+def gsub_0 : SubRegIndex;
+def gsub_1 : SubRegIndex;
+// Let TableGen synthesize the remaining 12 ssub_* indices.
+// We don't need to name them.
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+}
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q0 : ARMReg< 0, "q0", [D0, D1]>;
+def Q1 : ARMReg< 1, "q1", [D2, D3]>;
+def Q2 : ARMReg< 2, "q2", [D4, D5]>;
+def Q3 : ARMReg< 3, "q3", [D6, D7]>;
+def Q4 : ARMReg< 4, "q4", [D8, D9]>;
+def Q5 : ARMReg< 5, "q5", [D10, D11]>;
+def Q6 : ARMReg< 6, "q6", [D12, D13]>;
+def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8 : ARMReg< 8, "q8", [D16, D17]>;
+def Q9 : ARMReg< 9, "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Current Program Status Register.
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
+def FPEXC : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r9 == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+ SP, LR, PC)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRs without the PC. Some ARM instructions do not allow the PC in
+// certain operand slots, particularly as the destination. Primarily
+// useful for disassembly.
+def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
+// implied SP argument list.
+// FIXME: It would be better to not use this at all and refactor the
+// instructions to not have SP an an explicit argument. That makes
+// frame index resolution a bit trickier, though.
+def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+ let AltOrders = [(and tcGPR, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
+
+// Scalar single precision floating point register class..
+// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
+// avoid partial-write dependencies on D registers (S registers are
+// renamed as portions of D registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
+ (sequence "S%u", 0, 31), 2),
+ (sequence "S%u", 0, 31))>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (sequence "D%u", 0, 31)> {
+ // Allocate non-VFP2 registers D16-D31 first.
+ let AltOrders = [(rotl DPR, 16)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 16)>;
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 8)>;
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (sequence "Q%u", 0, 15)> {
+ // Allocate non-VFP2 aliases Q8-Q15 first.
+ let AltOrders = [(rotl QPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 8)>;
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 4)>;
+
+// Pseudo-registers representing odd-even pairs of D registers. The even-odd
+// pairs are already represented by the Q registers.
+// These are needed by NEON instructions requiring two consecutive D registers.
+// There is no D31_D0 register as that is always an UNPREDICTABLE encoding.
+def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2)]>;
+
+// Register class representing a pair of consecutive D registers.
+// Use the Q registers for the even-odd pairs.
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
+ // Allocate starting at non-VFP2 registers D16-D31 first.
+ // Prefer even-odd pairs as they are easier to copy.
+ let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
+// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs.
+def Tuples2R : RegisterTuples<[gsub_0, gsub_1],
+ [(add R0, R2, R4, R6, R8, R10, R12),
+ (add R1, R3, R5, R7, R9, R11, SP)]>;
+
+// Register class representing a pair of even-odd GPRs.
+def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> {
+ let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
+}
+
+// Pseudo-registers representing 3 consecutive D registers.
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 1),
+ (shl DPR, 2)]>;
+
+// 3 consecutive D registers.
+def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Tuples of 4 D regs that isn't also a pair of Q regs.
+def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2),
+ (decimate (shl DPR, 3), 2),
+ (decimate (shl DPR, 4), 2)]>;
+
+// 4 consecutive D registers.
+def DQuad : RegisterClass<"ARM", [v4i64], 256,
+ (interleave Tuples2Q, TuplesOE4D)>;
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
+ [(shl QQPR, 0), (shl QQPR, 2)]>;
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQQQPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+
+// Pseudo-registers representing 2-spaced consecutive D registers.
+def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 2)]>;
+
+// Spaced pairs of D registers.
+def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>;
+
+def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4)]>;
+
+// Spaced triples of D registers.
+def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4),
+ (shl DPR, 6)]>;
+
+// Spaced quads of D registers.
+def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 000000000000..21877fd9af37
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
+ };
+ }
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 000000000000..2d088de96e27
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,336 @@
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+// Uops | Latency from register | Uops - resource requirements - latency
+// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3
+// | | uopc Rd, Rn, T0 - P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+// def WriteALUsr : SchedWrite;
+// def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+// ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+// // Resources.
+// def P01 : ProcResource<3>; // ALU unit (3 of it).
+// ...
+// // Resource usages.
+// def : WriteRes<WriteALUsr, [P01, P01]> {
+// Latency = 4; // Latency of 4.
+// NumMicroOps = 2; // Dispatch 2 micro-ops.
+// // The two instances of resource P01 are occupied for one cycle. It is one
+// // cycle because these resources happen to be pipelined.
+// ResourceCycles = [1, 1];
+// }
+// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx : InstrItinClass;
+def IIC_iALUi : InstrItinClass;
+def IIC_iALUr : InstrItinClass;
+def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
+def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
+def IIC_iUNAr : InstrItinClass;
+def IIC_iUNAsi : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
+def IIC_iCMPi : InstrItinClass;
+def IIC_iCMPr : InstrItinClass;
+def IIC_iCMPsi : InstrItinClass;
+def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
+def IIC_iMOVi : InstrItinClass;
+def IIC_iMOVr : InstrItinClass;
+def IIC_iMOVsi : InstrItinClass;
+def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
+def IIC_iCMOVi : InstrItinClass;
+def IIC_iCMOVr : InstrItinClass;
+def IIC_iCMOVsi : InstrItinClass;
+def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
+def IIC_iMUL16 : InstrItinClass;
+def IIC_iMAC16 : InstrItinClass;
+def IIC_iMUL32 : InstrItinClass;
+def IIC_iMAC32 : InstrItinClass;
+def IIC_iMUL64 : InstrItinClass;
+def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass;
+def IIC_iLoad_mu : InstrItinClass;
+def IIC_iLoad_mBr : InstrItinClass;
+def IIC_iPop : InstrItinClass;
+def IIC_iPop_Br : InstrItinClass;
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass;
+def IIC_iStore_mu : InstrItinClass;
+def IIC_Preload : InstrItinClass;
+def IIC_Br : InstrItinClass;
+def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA32 : InstrItinClass;
+def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP32 : InstrItinClass;
+def IIC_fpCMP64 : InstrItinClass;
+def IIC_fpCVTSD : InstrItinClass;
+def IIC_fpCVTDS : InstrItinClass;
+def IIC_fpCVTSH : InstrItinClass;
+def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIS : InstrItinClass;
+def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTSI : InstrItinClass;
+def IIC_fpCVTDI : InstrItinClass;
+def IIC_fpMOVIS : InstrItinClass;
+def IIC_fpMOVID : InstrItinClass;
+def IIC_fpMOVSI : InstrItinClass;
+def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU32 : InstrItinClass;
+def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL32 : InstrItinClass;
+def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC32 : InstrItinClass;
+def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC32 : InstrItinClass;
+def IIC_fpFMAC64 : InstrItinClass;
+def IIC_fpDIV32 : InstrItinClass;
+def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT32 : InstrItinClass;
+def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad32 : InstrItinClass;
+def IIC_fpLoad64 : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass;
+def IIC_fpLoad_mu : InstrItinClass;
+def IIC_fpStore32 : InstrItinClass;
+def IIC_fpStore64 : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass;
+def IIC_fpStore_mu : InstrItinClass;
+def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
+def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
+def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
+def IIC_VLD4 : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
+def IIC_VUNAD : InstrItinClass;
+def IIC_VUNAQ : InstrItinClass;
+def IIC_VBIND : InstrItinClass;
+def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
+def IIC_VMOVImm : InstrItinClass;
+def IIC_VMOVD : InstrItinClass;
+def IIC_VMOVQ : InstrItinClass;
+def IIC_VMOVIS : InstrItinClass;
+def IIC_VMOVID : InstrItinClass;
+def IIC_VMOVISL : InstrItinClass;
+def IIC_VMOVSI : InstrItinClass;
+def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
+def IIC_VPERMD : InstrItinClass;
+def IIC_VPERMQ : InstrItinClass;
+def IIC_VPERMQ3 : InstrItinClass;
+def IIC_VMACD : InstrItinClass;
+def IIC_VMACQ : InstrItinClass;
+def IIC_VFMACD : InstrItinClass;
+def IIC_VFMACQ : InstrItinClass;
+def IIC_VRECSD : InstrItinClass;
+def IIC_VRECSQ : InstrItinClass;
+def IIC_VCNTiD : InstrItinClass;
+def IIC_VCNTiQ : InstrItinClass;
+def IIC_VUNAiD : InstrItinClass;
+def IIC_VUNAiQ : InstrItinClass;
+def IIC_VQUNAiD : InstrItinClass;
+def IIC_VQUNAiQ : InstrItinClass;
+def IIC_VBINiD : InstrItinClass;
+def IIC_VBINiQ : InstrItinClass;
+def IIC_VSUBiD : InstrItinClass;
+def IIC_VSUBiQ : InstrItinClass;
+def IIC_VBINi4D : InstrItinClass;
+def IIC_VBINi4Q : InstrItinClass;
+def IIC_VSUBi4D : InstrItinClass;
+def IIC_VSUBi4Q : InstrItinClass;
+def IIC_VABAD : InstrItinClass;
+def IIC_VABAQ : InstrItinClass;
+def IIC_VSHLiD : InstrItinClass;
+def IIC_VSHLiQ : InstrItinClass;
+def IIC_VSHLi4D : InstrItinClass;
+def IIC_VSHLi4Q : InstrItinClass;
+def IIC_VPALiD : InstrItinClass;
+def IIC_VPALiQ : InstrItinClass;
+def IIC_VMULi16D : InstrItinClass;
+def IIC_VMULi32D : InstrItinClass;
+def IIC_VMULi16Q : InstrItinClass;
+def IIC_VMULi32Q : InstrItinClass;
+def IIC_VMACi16D : InstrItinClass;
+def IIC_VMACi32D : InstrItinClass;
+def IIC_VMACi16Q : InstrItinClass;
+def IIC_VMACi32Q : InstrItinClass;
+def IIC_VEXTD : InstrItinClass;
+def IIC_VEXTQ : InstrItinClass;
+def IIC_VTB1 : InstrItinClass;
+def IIC_VTB2 : InstrItinClass;
+def IIC_VTB3 : InstrItinClass;
+def IIC_VTB4 : InstrItinClass;
+def IIC_VTBX1 : InstrItinClass;
+def IIC_VTBX2 : InstrItinClass;
+def IIC_VTBX3 : InstrItinClass;
+def IIC_VTBX4 : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 000000000000..2c6382542ab9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1075 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0 : FuncUnit; // pipeline 0
+def A8_Pipe1 : FuncUnit; // pipeline 1
+def A8_LSPipe : FuncUnit; // Load / store pipeline
+def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
+ // Two fully-pipelined integer ALU pipelines
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+ // Integer multiply pipeline
+ // Result written in E5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2], [], -1>, // dynamic uops
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<7, [A8_NPipe], 0>,
+ InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NPipe], 0>,
+ InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<9, [A8_NPipe], 0>,
+ InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<11, [A8_NPipe], 0>,
+ InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<20, [A8_NPipe], 0>,
+ InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 2], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 2], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ // Issue through integer pipeline, and execute in NEON unit.
+ //
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2]>,
+ //
+ // Double-register FP Binary
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Quad-register Permute Move
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
+def CortexA8Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA8Itineraries;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 000000000000..9739ed20ce2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,2496 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
+ // Two fully-pipelined integer ALU pipelines
+
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2], [], -1>, // dynamic uops
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
+
+ // VFP and NEON shares the same register file. This means that every VFP
+ // instruction should wait for full completion of the consecutive NEON
+ // instruction and vice-versa. We model this behavior with two artificial FUs:
+ // DRegsVFP and DRegsVFP.
+ //
+ // Every VFP instruction:
+ // - Acquires DRegsVFP resource for 1 cycle
+ // - Reserves DRegsN resource for the whole duration (including time to
+ // register file writeback!).
+ // Every NEON instruction does the same but with FUs swapped.
+ //
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit.
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<6, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<7, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<16, [A9_DRegsN], 0, Reserved>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<26, [A9_DRegsN], 0, Reserved>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<18, [A9_DRegsN], 0, Reserved>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<33, [A9_DRegsN], 0, Reserved>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Load
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler and will eventually replace itineraries.
+
+
+// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
+def CortexA9Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let ILPWindow = 10; // Don't reschedule small blocks to hide
+ // latency. Minimum latency requirements are already
+ // modeled strictly by reserving resources.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA9Itineraries;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+let SchedModel = CortexA9Model in {
+
+def A9UnitALU : ProcResource<2>;
+def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
+def A9UnitAGU : ProcResource<1>;
+def A9UnitLS : ProcResource<1>;
+def A9UnitFP : ProcResource<1> { let Buffered = 0; }
+def A9UnitB : ProcResource<1>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write types with their resources and latency on A9.
+
+// Consume an issue slot, but no processor resources. This is useful when all
+// other writes associated with the operand have NumMicroOps = 0.
+def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
+
+// Write an integer register.
+def A9WriteI : SchedWriteRes<[A9UnitALU]>;
+// Write an integer shifted-by register
+def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+
+// Basic ALU.
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
+// ALU with operand shifted by immediate.
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
+// ALU with operand shifted by register.
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+
+// Multiplication
+def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
+def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
+def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+// Floating-point
+// Only one FP or AGU instruction may issue per cycle. We model this
+// by having FP instructions consume the AGU resource.
+def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
+def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
+def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
+def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
+
+// NEON has an odd mix of latencies. Simply name the write types by latency.
+def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
+def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
+def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
+def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+
+// Reserve A9UnitFP for 2 consecutive cycles.
+def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// Branches don't have a def operand but still consume resources.
+def A9WriteB : SchedWriteRes<[A9UnitB]>;
+
+// Address generation.
+def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
+
+// Load Integer.
+def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+// Load the upper 32-bits using the same micro-op.
+def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
+ let NumMicroOps = 0; }
+// Offset shifted by register.
+def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+// Load (and zero extend) a byte.
+def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
+
+// Load or Store Float, aligned.
+def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
+
+// Store Integer.
+def A9WriteS : SchedWriteRes<[A9UnitLS]>;
+
+//===----------------------------------------------------------------------===//
+// Define resources dynamically for load multiple variants.
+
+// Define helpers for extra latency without consuming resources.
+def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
+foreach NumCycles = 2-8 in {
+def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
+} // foreach NumCycles
+
+// Define address generation sequences and predicates for 8 flavors of LDMs.
+foreach NumAddr = 1-8 in {
+
+// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
+// latency for instructions that generate multiple loads or stores.
+def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
+
+// Define a predicate to select the LDM based on number of memory addresses.
+def A9LMAdr#NumAddr#Pred :
+ SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+
+} // foreach NumAddr
+
+// Fall-back for unknown LDMs.
+def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
+
+// LDM/VLDM/VLDn address generation latency & resources.
+// Dynamically select the A9WriteAdrN sequence using a predicate.
+def A9WriteLMAdr : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
+ // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
+ SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
+
+// Define LDM Resources.
+// These take no issue resource, so they can be combined with other
+// writes like WriteB.
+// A9WriteLMLo takes a single LS resource and 2 cycles.
+def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+foreach NumAddr = 1-8 in {
+def A9WriteL#NumAddr : WriteSequence<
+ [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+def A9WriteL#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// LDM: Load multiple into 32-bit integer registers.
+
+// A9WriteLM variants expand into a pair of writes for each 64-bit
+// value loaded. When the number of registers is odd, the last
+// A9WriteLnHi is naturally ignored because the instruction has no
+// following def operands. These variants take no issue resource, so
+// they may need to be part of a WriteSequence that includes A9WriteIssue.
+def A9WriteLM : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3Hi, A9WriteL3Hi,
+ A9WriteL4Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL5Hi,
+ A9WriteL6Hi, A9WriteL6Hi,
+ A9WriteL7Hi, A9WriteL7Hi,
+ A9WriteL8Hi, A9WriteL8Hi]>]> {
+ let Variadic = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
+
+// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
+// so can be used in WriteSequences for in single-issue instructions that
+// encapsulate multiple loads.
+def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+
+foreach NumAddr = 1-8 in {
+
+// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
+def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
+
+// A9WriteLfp1-8 definitions are statically expanded into a sequence of
+// A9WriteLfpOps with additive latency that takes a single issue slot.
+// Used directly to describe NEON VLDn.
+def A9WriteLfp#NumAddr : WriteSequence<
+ [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
+// permuting loaded values.
+def A9WriteLfp#NumAddr#Mov : WriteSequence<
+ [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+} // foreach NumAddr
+
+// Define VLDM/VSTM PreRA resources.
+// A9WriteLMfpPreRA are dynamically expanded into the correct
+// A9WriteLfp1-8 sequence based on a predicate. This supports the
+// preRA VLDM variants in which all 64-bit loads are written to the
+// same tuple of either single or double precision registers.
+def A9WriteLMfpPreRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
+ // For unknown VLDM/VSTM PreRA, assume 2xS registers.
+ SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
+
+// Define VLDM/VSTM PostRA Resources.
+// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
+def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
+
+foreach NumAddr = 1-8 in {
+
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+def A9WriteLMfp#NumAddr : WriteSequence<
+ [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMfp#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+} // foreach NumAddr
+
+// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
+// pair of writes for each 64-bit data loaded. When the number of
+// registers is odd, the last WriteLMfpnHi is naturally ignored because
+// the instruction has no following def operands.
+def A9WriteLMfpPostRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi,
+ A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi,
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi,
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi,
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
+ let Variadic = 1;
+}
+
+// Distinguish between our multiple MI-level forms of the same
+// VLDM/VSTM instructions.
+def A9PreRA : SchedPredicate<
+ "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
+def A9PostRA : SchedPredicate<
+ "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
+
+// VLDM represents all destination registers as a single register
+// tuple, unlike LDM. So the number of write operands is not variadic.
+def A9WriteLMfp : SchedWriteVariant<[
+ SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
+ SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
+
+//===----------------------------------------------------------------------===//
+// Resources for other (non LDM/VLDM) Variants.
+
+// These mov immediate writers are unconditionally expanded with
+// additive latency.
+def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
+def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
+
+// Some ALU operations can read loaded integer values one cycle early.
+def A9ReadALU : SchedReadAdvance<1,
+ [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
+ A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
+ A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
+ A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
+
+// Read types for operands that are unconditionally read in cycle N
+// after the instruction issues, decreases producer latency by N-1.
+def A9Read2 : SchedReadAdvance<1>;
+def A9Read3 : SchedReadAdvance<2>;
+def A9Read4 : SchedReadAdvance<3>;
+
+//===----------------------------------------------------------------------===//
+// Map itinerary classes to scheduler read/write resources per operand.
+//
+// For ARM, we piggyback scheduler resources on the Itinerary classes
+// to avoid perturbing the existing instruction definitions.
+
+// This table follows the ARM Cortex-A9 Technical Reference Manuals,
+// mostly in order.
+
+def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+ IIC_iMVNi,IIC_iMVNsi,
+ IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
+def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
+
+def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
+def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
+def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
+
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
+
+// A9WriteHi ignored for MUL32.
+def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
+ IIC_iMUL64,IIC_iMAC64]>;
+// FIXME: SMLALxx needs itin classes
+def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
+
+// TODO: For floating-point ops, we model the pipeline forwarding
+// latencies here. WAW latencies are sometimes longer.
+
+def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
+ IIC_fpUNA32, IIC_fpUNA64,
+ IIC_fpCMP32, IIC_fpCMP64]>;
+def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
+def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
+ IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
+ IIC_fpALU32, IIC_fpALU64]>;
+def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
+def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
+def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
+def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
+def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
+def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
+def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
+def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
+
+def :ItinRW<[A9WriteB], [IIC_Br]>;
+
+// A9 PLD is processed in a dedicated unit.
+def :ItinRW<[], [IIC_Preload]>;
+
+// Note: We must assume that loads are aligned, since the machine
+// model cannot know this statically and A9 ignores alignment hints.
+
+// A9WriteAdr consumes AGU regardless address writeback. But it's
+// latency is only relevant for users of an updated address.
+def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
+ IIC_iLoad_iu,IIC_iLoad_ru]>;
+def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
+def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
+ IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
+def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
+def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
+ IIC_iLoad_d_ru]>;
+// Store either has no def operands, or the one def for address writeback.
+def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
+ IIC_iStore_iu, IIC_iStore_ru,
+ IIC_iStore_d_i, IIC_iStore_d_r,
+ IIC_iStore_d_ru]>;
+def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
+ IIC_iStore_bh_i, IIC_iStore_bh_r,
+ IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
+def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
+
+// A9WriteML will be expanded into a separate write for each def
+// operand. Address generation consumes resources, but A9WriteLMAdr
+// is listed after all def operands, so has no effective latency.
+//
+// Note: A9WriteLM expands into an even number of def operands. The
+// actual number of def operands may be less by one.
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
+
+// Load multiple with address writeback has an extra def operand in
+// front of the loaded registers.
+//
+// Reuse the load-multiple variants for store-multiple because the
+// resources are identical, For stores only the address writeback
+// has a def operand so the WriteL latencies are unused.
+def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
+ IIC_iStore_m,
+ IIC_iStore_mu]>;
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
+
+def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
+
+def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
+def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
+def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
+ IIC_fpStore_m, IIC_fpStore_mu]>;
+
+// Note: Unlike VLDM, VLD1 expects the writeback operand after the
+// normal writes.
+def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
+ IIC_VLD1x2, IIC_VLD1x2u]>;
+def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
+ IIC_VLD1x4, IIC_VLD1x4u,
+ IIC_VLD4dup, IIC_VLD4dupu]>;
+def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
+ IIC_VLD2, IIC_VLD2u,
+ IIC_VLD2dup, IIC_VLD2dupu]>;
+def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
+ IIC_VLD2x2, IIC_VLD2x2u,
+ IIC_VLD2ln, IIC_VLD2lnu]>;
+def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
+ IIC_VLD3dup, IIC_VLD3dupu]>;
+def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
+ IIC_VLD4ln, IIC_VLD4lnu]>;
+def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
+
+// Vector stores use similar resources to vector loads, so use the
+// same write types. The address write must be first for stores with
+// address writeback.
+def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
+ IIC_VST1x2, IIC_VST1x2u,
+ IIC_VST1ln, IIC_VST1lnu,
+ IIC_VST2, IIC_VST2u,
+ IIC_VST2x2, IIC_VST2x2u,
+ IIC_VST2ln, IIC_VST2lnu]>;
+def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
+ IIC_VST1x4, IIC_VST1x4u,
+ IIC_VST3, IIC_VST3u,
+ IIC_VST3ln, IIC_VST3lnu,
+ IIC_VST4, IIC_VST4u,
+ IIC_VST4ln, IIC_VST4lnu]>;
+
+// NEON moves.
+def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
+def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
+def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
+
+// NEON integer arithmetic
+//
+// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
+// VSUB/VMVN/VCLSD/VCLZD/VCNTD
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
+// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
+// ...
+// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
+// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
+// VQNEG/VQABS
+def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
+// VABS
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
+// VPADD/VPADDL are mapped later under IIC_SHLi.
+// ...
+// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
+def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
+// VMOVimm/VMVNimm/VORRimm/VBICimm
+def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
+def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
+
+// NEON integer multiply
+//
+// Note: these don't quite match the timing docs, but they do match
+// the original A9 itinerary.
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
+def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
+def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
+def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
+def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
+
+// NEON integer shift
+// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
+def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
+def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
+
+// NEON permute
+def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
+def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
+ [IIC_VPERMQ3, IIC_VEXTQ]>;
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
+ [IIC_VTBX4]>;
+
+// NEON floating-point
+def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
+def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
+def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
+def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
+def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
+def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
+def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
+// advance. But our instrinfo claims it does.
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
+} // SchedModel = CortexA9Model
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
new file mode 100644
index 000000000000..7c6df410706e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -0,0 +1,1144 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+def SwiftItineraries : ProcessorItineraries<
+ [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_LS]>],
+ [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 3>,
+ InstrStage<1, [SW_ALU0]>],
+ [5, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 6, 1, 1]>,
+ //
+ // Integer divide
+ InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 0>,
+ InstrStage<14, [SW_IDIV]>],
+ [14, 1, 1]>,
+
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [4, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [2], [], -1>, // dynamic uops
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ //
+ // Most floating-point moves get issued on ALU0.
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 4>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Count
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Integer to Lane Move
+ // FIXME: I think this is correct, but it is not clear from the tuning guide.
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Double-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Quad-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP Binary
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FusedF P Multiple-Accumulate
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute (3 cycle issue on A9)
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 3;
+ let MispredictPenalty = 14; // A branch direction mispredict.
+
+ let Itineraries = SwiftItineraries;
+}
+
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+ // Processor resources.
+ def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+ def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+ def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+ def SwiftUnitP2 : ProcResource<1>; // LS unit.
+ def SwiftUnitDiv : ProcResource<1>;
+
+ // Generic resource requirements.
+ def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+ def SwiftWriteP01ThreeCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+ }
+
+ // 4.2.4 Arithmetic and Logical.
+ // ALU operation register shifted by immediate variant.
+ def SwiftWriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+ ]>;
+ def SwiftWriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftWriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftReadAdvanceALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+ SchedVar<NoSchedPred, [NoReadAdvance]>
+ ]>;
+ // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+ // AND,BIC,EOR,ORN,ORR
+ // CLZ,RBIT,REV,REV16,REVSH,PKH
+ def : WriteRes<WriteALU, [SwiftUnitP01]>;
+ def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+ def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+ def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+ def : ReadAdvance<ReadALU, 0>;
+ def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+ // 4.2.5 Integer comparison
+ def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 000000000000..0ace9bc1796d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,300 @@
+//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+ [V6_Pipe], [], [
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..41a7e0c2c8a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,200 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff),
+ false, false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+// Adjust parameters for memset, EABI uses format (ptr, size, value),
+// GNU library uses (ptr, value, size)
+// See RTABI section 4.3.4
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ // Use default for non AAPCS (or Darwin) subtargets
+ if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
+ return SDValue();
+
+ const ARMTargetLowering &TLI =
+ *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // First argument: data pointer
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ // Second argument: buffer size
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+ // Third argument: value to fill
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+
+ // Emit __eabi_memset call
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()), // return type
+ false, // return sign ext
+ false, // return zero ext
+ false, // is var arg
+ false, // is in regs
+ 0, // number of fixed arguments
+ TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
+ false, // is tail call
+ false, // does not return
+ false, // is return val used
+ DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()), // callee
+ Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 000000000000..6419a737295a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,68 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+namespace ARM_AM {
+ static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
+ switch (Opcode) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+} // end namespace ARM_AM
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ ~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ // Adjust parameters for memset, see RTABI section 4.3.4
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 000000000000..739300e4eff9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,241 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+ cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+ cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetOptions &Options)
+ : ARMGenSubtargetInfo(TT, CPU, FS)
+ , ARMProcFamily(Others)
+ , stackAlignment(4)
+ , CPUString(CPU)
+ , TargetTriple(TT)
+ , Options(Options)
+ , TargetABI(ARM_ABI_APCS) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ IsMClass = false;
+ NoARM = false;
+ PostRAScheduler = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ FPOnlySP = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+ UnsafeFPMath = false;
+}
+
+void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+ if (CPUString.empty())
+ CPUString = "generic";
+
+ // Insert the architecture feature derived from the target triple into the
+ // feature string. This is important for setting features that are implied
+ // based on the architecture version.
+ std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
+ CPUString);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+ ParseSubtargetFeatures(CPUString, ArchFS);
+
+ // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+ // ARM version or CPU and then remove this.
+ if (!HasV6T2Ops && hasThumb2())
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ if ((TargetTriple.getTriple().find("eabi") != std::string::npos) ||
+ (isTargetIOS() && isMClass()))
+ // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
+ // Darwin-EABI conforms to AACPS but not the rest of EABI.
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (!isTargetIOS())
+ UseMovt = hasV6T2Ops();
+ else {
+ IsR9Reserved = ReserveR9 | !HasV6Ops;
+ UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
+ }
+
+ if (!isThumb() || hasThumb2())
+ PostRAScheduler = true;
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+ AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ if (!isTargetDarwin()) {
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else {
+ if (RelocM == Reloc::PIC_) {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage())
+ // Hidden $non_lazy_ptr reference.
+ return true;
+
+ return false;
+ } else {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ return SchedModel->MispredictPenalty;
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 000000000000..5b5ee6aeb865
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,315 @@
+//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetOptions;
+
+class ARMSubtarget : public ARMGenSubtargetInfo {
+protected:
+ enum ARMProcFamilyEnum {
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
+ };
+
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
+ /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+ /// Specify whether target support specific ARM ISA variants.
+ bool HasV4TOps;
+ bool HasV5TOps;
+ bool HasV5TEOps;
+ bool HasV6Ops;
+ bool HasV6T2Ops;
+ bool HasV7Ops;
+
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
+ /// floating point ISAs are supported.
+ bool HasVFPv2;
+ bool HasVFPv3;
+ bool HasVFPv4;
+ bool HasNEON;
+
+ /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+ /// specified. Use the method useNEONForSinglePrecisionFP() to
+ /// determine if NEON should actually be used.
+ bool UseNEONForSinglePrecisionFP;
+
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
+
+ /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+ /// forwarding to allow mul + mla being issued back to back.
+ bool HasVMLxForwarding;
+
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
+ /// InThumbMode - True if compiling for Thumb, false for ARM.
+ bool InThumbMode;
+
+ /// HasThumb2 - True if Thumb2 instructions are supported.
+ bool HasThumb2;
+
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// v6m, v7m for example.
+ bool IsMClass;
+
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+ /// imms (including global addresses).
+ bool UseMovt;
+
+ /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
+ /// must be able to synthesize call stubs for interworking between ARM and
+ /// Thumb.
+ bool SupportsTailCall;
+
+ /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+ /// only so far)
+ bool HasFP16;
+
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+ /// that partially update CPSR and add false dependency on the previous
+ /// CPSR setting instruction.
+ bool AvoidCPSRPartialUpdate;
+
+ /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+ /// movs with shifter operand (i.e. asr, lsl, lsr).
+ bool AvoidMOVsShifterOperand;
+
+ /// HasRAS - Some processors perform return stack prediction. CodeGen should
+ /// avoid issue "normal" call instructions to callees which do not return.
+ bool HasRAS;
+
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
+ /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+
+ /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+ /// and such) instructions in Thumb2 code.
+ bool Thumb2DSP;
+
+ /// NaCl TRAP instruction is generated instead of the regular TRAP.
+ bool UseNaClTrap;
+
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// SchedModel - Processor specific instruction costs.
+ const MCSchedModel *SchedModel;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetOptions &Options);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+ // Change this once Thumb1 ldmia / stmia support is added.
+ return isThumb1Only() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// \brief Reset the features for the ARM target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
+ void computeIssueWidth();
+
+ bool hasV4TOps() const { return HasV4TOps; }
+ bool hasV5TOps() const { return HasV5TOps; }
+ bool hasV5TEOps() const { return HasV5TEOps; }
+ bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6T2Ops() const { return HasV6T2Ops; }
+ bool hasV7Ops() const { return HasV7Ops; }
+
+ bool isCortexA5() const { return ARMProcFamily == CortexA5; }
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
+ bool isCortexM3() const { return CPUString == "cortex-m3"; }
+ bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
+ bool isCortexR5() const { return ARMProcFamily == CortexR5; }
+
+ bool hasARMOps() const { return !NoARM; }
+
+ bool hasVFP2() const { return HasVFPv2; }
+ bool hasVFP3() const { return HasVFPv3; }
+ bool hasVFP4() const { return HasVFPv4; }
+ bool hasNEON() const { return HasNEON; }
+ bool useNEONForSinglePrecisionFP() const {
+ return hasNEON() && UseNEONForSinglePrecisionFP; }
+
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useMulOps() const { return UseMulOps; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool hasVMLxForwarding() const { return HasVMLxForwarding; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
+ bool hasRAS() const { return HasRAS; }
+ bool hasMPExtension() const { return HasMPExtension; }
+ bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool useNaClTrap() const { return UseNaClTrap; }
+
+ bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NaCl;
+ }
+ bool isTargetELF() const { return !isTargetDarwin(); }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return InThumbMode; }
+ bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+ bool isThumb2() const { return InThumbMode && HasThumb2; }
+ bool hasThumb2() const { return HasThumb2; }
+ bool isMClass() const { return IsMClass; }
+ bool isARClass() const { return !IsMClass; }
+
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool supportsTailCall() const { return SupportsTailCall; }
+
+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ unsigned getMispredictionPenalty() const;
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+ /// symbol.
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 000000000000..42c7d2c437e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,225 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableGlobalMerge("global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
+
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
+extern "C" void LLVMInitializeARMTarget() {
+ // Register the target.
+ RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+ RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+}
+
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, Options),
+ JITInfo(),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+ // Default to soft float ABI
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
+}
+
+void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our ARM pass. This
+ // allows the ARM pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createARMTargetTransformInfoPass(this));
+}
+
+
+void ARMTargetMachine::anchor() { }
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
+ DL(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "v128:32:128-v64:32:64-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S64") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S32")),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
+}
+
+void ThumbTargetMachine::anchor() { }
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget.hasThumb2()
+ ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+ : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+ DL(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S64") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S32")),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget.hasThumb2()
+ ? new ARMFrameLowering(Subtarget)
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+namespace {
+/// ARM Code Generator Pass Configuration Options.
+class ARMPassConfig : public TargetPassConfig {
+public:
+ ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ ARMBaseTargetMachine &getARMTargetMachine() const {
+ return getTM<ARMBaseTargetMachine>();
+ }
+
+ const ARMSubtarget &getARMSubtarget() const {
+ return *getARMTargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new ARMPassConfig(this, PM);
+}
+
+bool ARMPassConfig::addPreISel() {
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
+ addPass(createGlobalMergePass(TM->getTargetLowering()));
+
+ return false;
+}
+
+bool ARMPassConfig::addInstSelector() {
+ addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ TM->Options.EnableFastISel)
+ addPass(createARMGlobalBaseRegPass());
+ return false;
+}
+
+bool ARMPassConfig::addPreRegAlloc() {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
+ addPass(createARMLoadStoreOptimizationPass(true));
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
+ addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
+ return true;
+}
+
+bool ARMPassConfig::addPreSched2() {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only()) {
+ addPass(createARMLoadStoreOptimizationPass());
+ printAndVerify("After ARM load / store optimizer");
+ }
+ if (getARMSubtarget().hasNEON())
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ }
+
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ addPass(createARMExpandPseudoPass());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only())
+ addPass(&IfConverterID);
+ }
+ if (getARMSubtarget().isThumb2())
+ addPass(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMPassConfig::addPreEmitPass() {
+ if (getARMSubtarget().isThumb2()) {
+ if (!getARMSubtarget().prefers32BitThumb())
+ addPass(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ addPass(&UnpackMachineBundlesID);
+ }
+
+ addPass(createARMConstantIslandPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 000000000000..d4caf5ca6e19
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,146 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+ ARMSubtarget Subtarget;
+private:
+ ARMJITInfo JITInfo;
+ InstrItineraryData InstrItins;
+
+public:
+ ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ // Implemented by derived classes
+ llvm_unreachable("getTargetLowering not implemented");
+ }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ /// \brief Register ARM analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
+ ARMInstrInfo InstrInfo;
+ const DataLayout DL; // Calculates type size & alignment
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ ARMFrameLowering FrameLowering;
+ public:
+ ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+/// Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ const DataLayout DL; // Calculates type size & alignment
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+ ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+ virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+ virtual const ARMBaseInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+ /// returns either Thumb1FrameLowering or ARMFrameLowering
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 000000000000..dfdf6ab356a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,53 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(isAAPCS_ABI);
+
+ if (isAAPCS_ABI) {
+ LSDASection = NULL;
+ }
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+}
+
+const MCExpr *ARMElfTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+ return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ MCSymbolRefExpr::VK_ARM_TARGET2,
+ getContext());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 000000000000..7f60727e5305
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,43 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
+public:
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(NULL)
+ {}
+
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
+ virtual const MCSection *getAttributesSection() const {
+ return AttributesSection;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 000000000000..1019b972e957
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,458 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armtti"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARMTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARMTTI : public ImmutablePass, public TargetTransformInfo {
+ const ARMBaseTargetMachine *TM;
+ const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ ARMTTI(const ARMBaseTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeARMTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+ /// @}
+
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 16;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaximumUnrollFactor() const {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const;
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
+
+ unsigned getAddressComputationCost(Type *Val) const;
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
+ "ARM Target Transform Info", true, true, false)
+char ARMTTI::ID = 0;
+
+ImmutablePass *
+llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
+ return new ARMTTI(TM);
+}
+
+
+unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return 4;
+
+ int32_t SImmVal = Imm.getSExtValue();
+ uint32_t ZImmVal = Imm.getZExtValue();
+ if (!ST->isThumb()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else if (ST->isThumb2()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else /*Thumb1*/ {
+ if (SImmVal >= 0 && SImmVal < 256)
+ return 1;
+ if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ return 2;
+ // Load from constantpool.
+ return 3;
+ }
+ return 2;
+}
+
+unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ // Some arithmetic, load and store operations have specific instructions
+ // to cast up/down their types automatically at no extra cost.
+ // TODO: Get these tables to know at least what the related operations are.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
+ // Vector float <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+
+ // Vector double <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
+ };
+
+ if (SrcTy.isVector() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
+ array_lengthof(NEONVectorConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorConversionTbl[Idx].Cost;
+ }
+
+ // Scalar float to integer conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
+ };
+ if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
+ array_lengthof(NEONFloatConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONFloatConversionTbl[Idx].Cost;
+ }
+
+ // Scalar integer to float conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
+ };
+
+ if (SrcTy.isInteger() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
+ array_lengthof(NEONIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONIntegerConversionTbl[Idx].Cost;
+ }
+
+ // Scalar integer conversion costs.
+ static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ // i16 -> i64 requires two dependent operations.
+ { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+ // Truncates on i64 are assumed to be free.
+ { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
+ };
+
+ if (SrcTy.isInteger()) {
+ int Idx =
+ ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
+ array_lengthof(ARMIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return ARMIntegerConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) const {
+ // Penalize inserting into an D-subregister. We end up with a three times
+ // lower estimated throughput on swift.
+ if (ST->isSwift() &&
+ Opcode == Instruction::InsertElement &&
+ ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return 3;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+}
+
+unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ // On NEON a a vector select gets lowered to vbsl.
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ return LT.first;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
+
+unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only handle costs of reverse shuffles for now.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a double
+ // word (vrev) or two if we shuffle a quad word (vrev, vext).
+ { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
+
+ { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
+ };
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
+ ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 000000000000..ed7b7ec9d2cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,7874 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMOperand;
+
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
+class ARMAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ const MCRegisterInfo *MRI;
+
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
+
+ struct {
+ ARMCC::CondCodes Cond; // Condition for IT block.
+ unsigned Mask:4; // Condition mask for instructions.
+ // Starting at first 1 (from lsb).
+ // '1' condition as indicated in IT.
+ // '0' inverse of condition (else).
+ // Count of instructions in IT block is
+ // 4 - trailingzeroes(mask)
+
+ bool FirstCond; // Explicit flag for when we're parsing the
+ // First instruction in the IT block. It's
+ // implied in the mask, so needs special
+ // handling.
+
+ unsigned CurPosition; // Current position in parsing of IT
+ // block. In range [0,3]. Initialized
+ // according to count of instructions in block.
+ // ~0U if no active IT block.
+ } ITState;
+ bool inITBlock() { return ITState.CurPosition != ~0U;}
+ void forwardITPosition() {
+ if (!inITBlock()) return;
+ // Move to the next instruction in the IT block, if there is one. If not,
+ // mark the block as done.
+ unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ if (++ITState.CurPosition == 5 - TZ)
+ ITState.CurPosition = ~0U; // Done with the IT block after this.
+ }
+
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool Warning(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ return Parser.Warning(L, Msg, Ranges);
+ }
+ bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ return Parser.Error(L, Msg, Ranges);
+ }
+
+ int tryParseRegister();
+ bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
+ bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
+ unsigned &ShiftAmount);
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
+ bool parseDirectiveThumbFunc(SMLoc L);
+ bool parseDirectiveCode(SMLoc L);
+ bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveEabiAttr(SMLoc L);
+
+ StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
+ bool &CarrySetting, unsigned &ProcessorIMod,
+ StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumbOne() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+ }
+ bool isThumbTwo() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
+ }
+ bool hasV6Ops() const {
+ return STI.getFeatureBits() & ARM::HasV6Ops;
+ }
+ bool hasV7Ops() const {
+ return STI.getFeatureBits() & ARM::HasV7Ops;
+ }
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ setAvailableFeatures(FB);
+ }
+ bool isMClass() const {
+ return STI.getFeatureBits() & ARM::FeatureMClass;
+ }
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+ /// }
+
+ OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocNumOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocOptionOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseMemBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseProcIFlagsOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseMSRMaskOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
+ StringRef Op, int Low, int High);
+ OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "lsl", 0, 31);
+ }
+ OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "asr", 1, 32);
+ }
+ OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
+
+ // Asm Match Converter Methods
+ void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegAddrMode2(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStWriteBackRegAddrMode2(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStWriteBackRegAddrMode3(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdExtTWriteBackImm(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdExtTWriteBackReg(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStExtTWriteBackImm(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStExtTWriteBackReg(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegAddrMode3(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtThumbMultiply(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtVLDwbFixed(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtVLDwbRegister(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtVSTwbFixed(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtVSTwbRegister(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+public:
+ enum ARMMatchResultTy {
+ Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresNotITBlock,
+ Match_RequiresV6,
+ Match_RequiresThumb2,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARMGenAsmMatcher.inc"
+
+ };
+
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Cache the MCRegisterInfo.
+ MRI = &getContext().getRegisterInfo();
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Not in an ITBlock to start with.
+ ITState.CurPosition = ~0U;
+
+ // Set ELF header flags.
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+ }
+
+ // Implementation of the MCTargetAsmParser interface:
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseDirective(AsmToken DirectiveID);
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+ unsigned checkTargetMatchPredicate(MCInst &Inst);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// operand.
+class ARMOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ k_CondCode,
+ k_CCOut,
+ k_ITCondMask,
+ k_CoprocNum,
+ k_CoprocReg,
+ k_CoprocOption,
+ k_Immediate,
+ k_MemBarrierOpt,
+ k_Memory,
+ k_PostIndexRegister,
+ k_MSRMask,
+ k_ProcIFlags,
+ k_VectorIndex,
+ k_Register,
+ k_RegisterList,
+ k_DPRRegisterList,
+ k_SPRRegisterList,
+ k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
+ k_ShiftedRegister,
+ k_ShiftedImmediate,
+ k_ShifterImmediate,
+ k_RotateImmediate,
+ k_BitfieldDescriptor,
+ k_Token
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ SmallVector<unsigned, 8> Registers;
+
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
+ union {
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
+ };
+
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case k_CondCode:
+ CC = o.CC;
+ break;
+ case k_ITCondMask:
+ ITMask = o.ITMask;
+ break;
+ case k_Token:
+ Tok = o.Tok;
+ break;
+ case k_CCOut:
+ case k_Register:
+ Reg = o.Reg;
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList:
+ Registers = o.Registers;
+ break;
+ case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
+ VectorList = o.VectorList;
+ break;
+ case k_CoprocNum:
+ case k_CoprocReg:
+ Cop = o.Cop;
+ break;
+ case k_CoprocOption:
+ CoprocOption = o.CoprocOption;
+ break;
+ case k_Immediate:
+ Imm = o.Imm;
+ break;
+ case k_MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
+ case k_Memory:
+ Memory = o.Memory;
+ break;
+ case k_PostIndexRegister:
+ PostIdxReg = o.PostIdxReg;
+ break;
+ case k_MSRMask:
+ MMask = o.MMask;
+ break;
+ case k_ProcIFlags:
+ IFlags = o.IFlags;
+ break;
+ case k_ShifterImmediate:
+ ShifterImm = o.ShifterImm;
+ break;
+ case k_ShiftedRegister:
+ RegShiftedReg = o.RegShiftedReg;
+ break;
+ case k_ShiftedImmediate:
+ RegShiftedImm = o.RegShiftedImm;
+ break;
+ case k_RotateImmediate:
+ RotImm = o.RotImm;
+ break;
+ case k_BitfieldDescriptor:
+ Bitfield = o.Bitfield;
+ break;
+ case k_VectorIndex:
+ VectorIndex = o.VectorIndex;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
+ SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
+ unsigned getCoproc() const {
+ assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
+ Kind == k_SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
+ const MCExpr *getImm() const {
+ assert(isImm() && "Invalid access!");
+ return Imm.Val;
+ }
+
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
+ }
+
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == k_MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == k_ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == k_MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ bool isCoprocNum() const { return Kind == k_CoprocNum; }
+ bool isCoprocReg() const { return Kind == k_CoprocReg; }
+ bool isCoprocOption() const { return Kind == k_CoprocOption; }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isCCOut() const { return Kind == k_CCOut; }
+ bool isITMask() const { return Kind == k_ITCondMask; }
+ bool isITCondCode() const { return Kind == k_CondCode; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isFPImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ return Val != -1;
+ }
+ bool isFBits16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 16;
+ }
+ bool isFBits32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 1 && Value <= 32;
+ }
+ bool isImm8s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ }
+ bool isImm0_1020s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ }
+ bool isImm0_508s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ }
+ bool isImm0_508s4Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ // explicitly exclude zero. we want that to use the normal 0_508 version.
+ return ((Value & 3) == 0) && Value > 0 && Value <= 508;
+ }
+ bool isImm0_255() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 256;
+ }
+ bool isImm0_4095() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4096;
+ }
+ bool isImm0_4095Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ return Value > 0 && Value < 4096;
+ }
+ bool isImm0_1() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
+ bool isImm0_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 8;
+ }
+ bool isImm0_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 16;
+ }
+ bool isImm0_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isImm0_63() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
+ bool isImm1_16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 17;
+ }
+ bool isImm1_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isImm0_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 33;
+ }
+ bool isImm0_65535() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm0_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm24bit() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 0xffffff;
+ }
+ bool isImmThumbSR() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isPKHLSLImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isPKHASRImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, but it can't fit
+ // into shift immediate encoding, we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+ else return (isARMSOImm() || isARMSOImmNeg());
+ }
+ bool isARMSOImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(Value) != -1;
+ }
+ bool isARMSOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isARMSOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
+ bool isT2SOImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
+ }
+ bool isT2SOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(~Value) != -1;
+ }
+ bool isT2SOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
+ bool isSetEndImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 1 || Value == 0;
+ }
+ bool isReg() const { return Kind == k_Register; }
+ bool isRegList() const { return Kind == k_RegisterList; }
+ bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isMem() const { return Kind == k_Memory; }
+ bool isShifterImm() const { return Kind == k_ShifterImmediate; }
+ bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
+ bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
+ bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
+ bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
+ bool isPostIdxReg() const {
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
+ }
+ bool isMemNoOffset(bool alignOK = false) const {
+ if (!isMem())
+ return false;
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
+ (alignOK || Memory.Alignment == 0);
+ }
+ bool isMemPCRelImm12() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base register must be PC.
+ if (Memory.BaseRegNum != ARM::PC)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isAlignedMemory() const {
+ return isMemNoOffset(true);
+ }
+ bool isAddrMode2() const {
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAM2OffsetImm() const {
+ if (!isImm()) return false;
+ // Immediate offset in range [-4095, 4095].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAddrMode3() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // No shifts are legal for AM3.
+ if (Memory.ShiftType != ARM_AM::no_shift) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ // The #-0 offset is encoded as INT32_MIN, and we have to check
+ // for this too.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAM3Offset() const {
+ if (Kind != k_Immediate && Kind != k_PostIndexRegister)
+ return false;
+ if (Kind == k_PostIndexRegister)
+ return PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ // Immediate offset in range [-255, 255].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAddrMode5() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-1020, 1020] and a multiple of 4.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
+ Val == INT32_MIN;
+ }
+ bool isMemTBB() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isMemTBH() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
+ Memory.Alignment != 0 )
+ return false;
+ return true;
+ }
+ bool isMemRegOffset() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isT2MemRegOffset() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.Alignment != 0)
+ return false;
+ // Only lsl #{0, 1, 2, 3} allowed.
+ if (Memory.ShiftType == ARM_AM::no_shift)
+ return true;
+ if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3)
+ return false;
+ return true;
+ }
+ bool isMemThumbRR() const {
+ // Thumb reg+reg addressing is simple. Just two registers, a base and
+ // an offset. No shifts, negations or any other complicating factors.
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return isARMLowRegister(Memory.BaseRegNum) &&
+ (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
+ }
+ bool isMemThumbRIs4() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 124].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 124 && (Val % 4) == 0;
+ }
+ bool isMemThumbRIs2() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 62].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 62 && (Val % 2) == 0;
+ }
+ bool isMemThumbRIs1() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 31].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 31;
+ }
+ bool isMemThumbSPI() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
+ }
+ bool isMemImm8s4Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [-1020, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
+ }
+ bool isMemImm0_1020s4Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 256);
+ }
+ bool isMemPosImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val < 256;
+ }
+ bool isMemNegImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
+ // Immediate offset in range [-255, -1].
+ if (!Memory.OffsetImm) return false;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
+ }
+ bool isMemUImm12Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= 0 && Val < 4096);
+ }
+ bool isMemImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val > -256 && Val < 256) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) ||
+ (Val == INT32_MIN);
+ }
+
+ bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+
+ // NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPair() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListDPairSpaced() const {
+ if (isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPairAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListDPairSpacedAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListThreeDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListThreeQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListFourQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListThreeDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListFourDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVectorIndex8() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 8;
+ }
+ bool isVectorIndex16() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 4;
+ }
+ bool isVectorIndex32() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 2;
+ }
+
+ bool isNEONi8splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i8 value splatted across 8 bytes. The immediate is just the 8 byte
+ // value.
+ return Value >= 0 && Value < 256;
+ }
+
+ bool isNEONi16splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i16 value in the range [0,255] or [0x0100, 0xff00]
+ return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+ }
+
+ bool isNEONi32splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000);
+ }
+
+ bool isNEONi32vmov() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+ bool isNEONi32vmovNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = ~CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+
+ bool isNEONi64splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ // i64 value with each byte being either 0 or 0xff.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
+ return true;
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::CreateReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val));
+ }
+
+ void addITMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(ITMask.Mask));
+ }
+
+ void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ assert(isRegShiftedReg() &&
+ "addRegShiftedRegOperands() on non RegShiftedReg!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
+ }
+
+ void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ assert(isRegShiftedImm() &&
+ "addRegShiftedImmOperands() on non RegShiftedImm!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
+ // Shift of #32 is encoded as 0 where permitted
+ unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm);
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm)));
+ }
+
+ void addShifterImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) |
+ ShifterImm.Imm));
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::CreateReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addRotImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Encoded as val>>3. The printer handles display as 8, 16, 24.
+ Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
+ }
+
+ void addBitfieldOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Munge the lsb/width into a bitfield mask.
+ unsigned lsb = Bitfield.LSB;
+ unsigned width = Bitfield.Width;
+ // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
+ uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
+ (32 - (lsb + width)));
+ Inst.addOperand(MCOperand::CreateImm(Mask));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFBits16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue()));
+ }
+
+ void addFBits32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue()));
+ }
+
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the LDRD/STRD
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4)));
+ }
+
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate, except for 32, which encodes as
+ // zero.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
+ }
+
+ void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // An ASR value of 32 encodes as 0, so that's how we want to add it to
+ // the instruction as well.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
+ }
+
+ void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+ }
+
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
+ void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually an imm0_4095, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
+ void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+ }
+
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ }
+
+ void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int32_t Imm = Memory.OffsetImm->getValue();
+ // FIXME: Handle #-0
+ if (Imm == INT32_MIN) Imm = 0;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
+ }
+
+ void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant AM2OffsetImm operand!");
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (Kind == k_PostIndexRegister) {
+ int32_t Val =
+ ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return;
+ }
+
+ // Constant offset.
+ const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm());
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (isImm()) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (isImm()) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemTBBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemTBHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ unsigned Val =
+ ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm));
+ }
+
+ void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
+
+ void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8s4 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ // Immediate is scaled by 4.
+ Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd));
+ }
+
+ void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ // The sign, shift type, and shift amount are encoded in a single operand
+ // using the AM2 encoding helpers.
+ ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
+ unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
+ PostIdxReg.ShiftTy);
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+ }
+
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ }
+
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
+ }
+
+ void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ // Mask in that this is an i8 splat.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00));
+ }
+
+ void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = ~CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
+ Imm |= (Value & 1) << i;
+ }
+ Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00));
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ Op->ITMask.Mask = Mask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CondCode);
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ Op->Cop.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ Op->RegShiftedReg.ShiftTy = ShTy;
+ Op->RegShiftedReg.SrcReg = SrcReg;
+ Op->RegShiftedReg.ShiftReg = ShiftReg;
+ Op->RegShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ Op->RegShiftedImm.ShiftTy = ShTy;
+ Op->RegShiftedImm.SrcReg = SrcReg;
+ Op->RegShiftedImm.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ Op->ShifterImm.isASR = isASR;
+ Op->ShifterImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ Op->RotImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ Op->Bitfield.LSB = LSB;
+ Op->Bitfield.Width = Width;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *
+ CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ KindTy Kind = k_RegisterList;
+
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first))
+ Kind = k_DPRRegisterList;
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
+ contains(Regs.front().first))
+ Kind = k_SPRRegisterList;
+
+ ARMOperand *Op = new ARMOperand(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->first);
+ array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorList);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(unsigned BaseRegNum,
+ const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum,
+ ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm,
+ unsigned Alignment,
+ bool isNegative,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_Memory);
+ Op->Memory.BaseRegNum = BaseRegNum;
+ Op->Memory.OffsetImm = OffsetImm;
+ Op->Memory.OffsetRegNum = OffsetRegNum;
+ Op->Memory.ShiftType = ShiftType;
+ Op->Memory.ShiftImm = ShiftImm;
+ Op->Memory.Alignment = Alignment;
+ Op->Memory.isNegative = isNegative;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
+ ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ Op->PostIdxReg.RegNum = RegNum;
+ Op->PostIdxReg.isAdd = isAdd;
+ Op->PostIdxReg.ShiftTy = ShiftTy;
+ Op->PostIdxReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_CondCode:
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case k_CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case k_ITCondMask: {
+ static const char *const MaskStr[] = {
+ "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
+ "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ };
+ assert((ITMask.Mask & 0xf) == ITMask.Mask);
+ OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
+ break;
+ }
+ case k_CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case k_CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case k_CoprocOption:
+ OS << "<coprocessor option: " << CoprocOption.Val << ">";
+ break;
+ case k_MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
+ break;
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ break;
+ case k_Memory:
+ OS << "<memory "
+ << " base:" << Memory.BaseRegNum;
+ OS << ">";
+ break;
+ case k_PostIndexRegister:
+ OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
+ << PostIdxReg.RegNum;
+ if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
+ OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
+ << PostIdxReg.ShiftImm;
+ OS << ">";
+ break;
+ case k_ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
+ break;
+ }
+ case k_Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case k_ShifterImmediate:
+ OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
+ << " #" << ShifterImm.Imm << ">";
+ break;
+ case k_ShiftedRegister:
+ OS << "<so_reg_reg "
+ << RegShiftedReg.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+ << " " << RegShiftedReg.ShiftReg << ">";
+ break;
+ case k_ShiftedImmediate:
+ OS << "<so_reg_imm "
+ << RegShiftedImm.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+ << " #" << RegShiftedImm.ShiftImm << ">";
+ break;
+ case k_RotateImmediate:
+ OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
+ break;
+ case k_BitfieldDescriptor:
+ OS << "<bitfield " << "lsb: " << Bitfield.LSB
+ << ", width: " << Bitfield.Width << ">";
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
+ case k_VectorList:
+ OS << "<vector_list " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
+ case k_Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ RegNo = tryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::tryParseRegister() {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) return -1;
+
+ std::string lowerCase = Tok.getString().lower();
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
+ .Default(0);
+ }
+ if (!RegNum) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase);
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
+
+ Parser.Lex(); // Eat identifier token.
+
+ return RegNum;
+}
+
+// Try to parse a shifter (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::tryParseShiftRegister(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ std::string lowerCase = Tok.getString().lower();
+ ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
+ .Case("lsl", ARM_AM::lsl)
+ .Case("lsr", ARM_AM::lsr)
+ .Case("asr", ARM_AM::asr)
+ .Case("ror", ARM_AM::ror)
+ .Case("rrx", ARM_AM::rrx)
+ .Default(ARM_AM::no_shift);
+
+ if (ShiftTy == ARM_AM::no_shift)
+ return 1;
+
+ Parser.Lex(); // Eat the operator.
+
+ // The source register for the shift has already been added to the
+ // operand list, so we need to pop it off and combine it into the shifted
+ // register operand instead.
+ OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ if (!PrevOp->isReg())
+ return Error(PrevOp->getStartLoc(), "shift must be of a register");
+ int SrcReg = PrevOp->getReg();
+
+ SMLoc EndLoc;
+ int64_t Imm = 0;
+ int ShiftReg = 0;
+ if (ShiftTy == ARM_AM::rrx) {
+ // RRX Doesn't have an explicit shift amount. The encoder expects
+ // the shift register to be the same as the source register. Seems odd,
+ // but OK.
+ ShiftReg = SrcReg;
+ } else {
+ // Figure out if this is shifted by a constant or a register (for non-RRX).
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat hash.
+ SMLoc ImmLoc = Parser.getTok().getLoc();
+ const MCExpr *ShiftExpr = 0;
+ if (getParser().parseExpression(ShiftExpr, EndLoc)) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // The expression must be evaluatable as an immediate.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+ if (!CE) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+ ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+ Error(ImmLoc, "immediate shift value out of range");
+ return -1;
+ }
+ // shift by zero is a nop. Always send it through as lsl.
+ // ('as' compatibility)
+ if (Imm == 0)
+ ShiftTy = ARM_AM::lsl;
+ } else if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc L = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ ShiftReg = tryParseRegister();
+ if (ShiftReg == -1) {
+ Error (L, "expected immediate or register in shift operand");
+ return -1;
+ }
+ } else {
+ Error (Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
+ return -1;
+ }
+ }
+
+ if (ShiftReg && ShiftTy != ARM_AM::rrx)
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, EndLoc));
+ else
+ Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
+ S, EndLoc));
+
+ return 0;
+}
+
+
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &RegTok = Parser.getTok();
+ int RegNo = tryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+ RegTok.getEndLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ return false;
+ }
+
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return true;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ switch (Name.size()) {
+ default: return -1;
+ case 2:
+ if (Name[0] != CoprocOp)
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ case 3:
+ if (Name[0] != CoprocOp || Name[1] != '1')
+ return -1;
+ switch (Name[2]) {
+ default: return -1;
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
+ }
+ }
+}
+
+/// parseITCondCode - Try to parse a condition code for an IT instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ unsigned CC = StringSwitch<unsigned>(Tok.getString().lower())
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC == ~0U)
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
+
+ return MatchOperand_Success;
+}
+
+/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
+/// coproc_option : '{' imm0_255 '}'
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ // If this isn't a '{', this isn't a coprocessor immediate operand.
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '{'
+
+ const MCExpr *Expr;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(Expr)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
+ Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+
+ // Check for and consume the closing '}'
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return MatchOperand_ParseFail;
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the '}'
+
+ Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
+ return MatchOperand_Success;
+}
+
+// For register list parsing, we need to map from raw GPR register numbering
+// to the enumeration values. The enumeration values aren't sorted by
+// register number due to our using "sp", "lr" and "pc" as canonical names.
+static unsigned getNextRegister(unsigned Reg) {
+ // If this is a GPR, we need to do it manually, otherwise we can rely
+ // on the sort ordering of the enumeration since the other reg-classes
+ // are sane.
+ if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ return Reg + 1;
+ switch(Reg) {
+ default: llvm_unreachable("Invalid GPR number!");
+ case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
+ case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
+ case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
+ case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8;
+ case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10;
+ case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12;
+ case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR;
+ case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0;
+ }
+}
+
+// Return the low-subreg of a given Q register.
+static unsigned getDRegFromQReg(unsigned QReg) {
+ switch (QReg) {
+ default: llvm_unreachable("expected a Q register!");
+ case ARM::Q0: return ARM::D0;
+ case ARM::Q1: return ARM::D2;
+ case ARM::Q2: return ARM::D4;
+ case ARM::Q3: return ARM::D6;
+ case ARM::Q4: return ARM::D8;
+ case ARM::Q5: return ARM::D10;
+ case ARM::Q6: return ARM::D12;
+ case ARM::Q7: return ARM::D14;
+ case ARM::Q8: return ARM::D16;
+ case ARM::Q9: return ARM::D18;
+ case ARM::Q10: return ARM::D20;
+ case ARM::Q11: return ARM::D22;
+ case ARM::Q12: return ARM::D24;
+ case ARM::Q13: return ARM::D26;
+ case ARM::Q14: return ARM::D28;
+ case ARM::Q15: return ARM::D30;
+ }
+}
+
+/// Parse a register list.
+bool ARMAsmParser::
+parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ // Check the first register in the list to see what register class
+ // this is a list of.
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ ++Reg;
+ }
+ const MCRegisterClass *RC;
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else
+ return Error(RegLoc, "invalid register in register list");
+
+ // Store the register.
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+
+ // This starts immediately after the first register token in the list,
+ // so we can see either a comma or a minus (range separator) as a legal
+ // next token.
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1)
+ return Error(AfterMinusLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!RC->contains(EndReg))
+ return Error(AfterMinusLoc, "invalid register in register list");
+ // Ranges must go from low to high.
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
+ return Error(AfterMinusLoc, "bad range in register list");
+
+ // Add all the registers in the range to the register list.
+ while (Reg != EndReg) {
+ Reg = getNextRegister(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ }
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
+ Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ bool isQReg = false;
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ isQReg = true;
+ }
+ // The register must be in the same register class as the first.
+ if (!RC->contains(Reg))
+ return Error(RegLoc, "invalid register in register list");
+ // List must be monotonically increasing.
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
+ // VFP register lists must also be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ if (isQReg)
+ Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(Parser.getTok().getLoc(), "'}' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ // Push the register list operand.
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
+ return false;
+}
+
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'
+
+ const MCExpr *LaneIndex;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(LaneIndex)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
+ if (!CE) {
+ Error(Loc, "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "']' expected");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the ']'.
+ int64_t Val = CE->getValue();
+
+ // FIXME: Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 || Val > 7) {
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ return MatchOperand_ParseFail;
+ }
+ Index = Val;
+ LaneKind = IndexedLane;
+ return MatchOperand_Success;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
+// parse a vector register list
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
+ SMLoc S = Parser.getTok().getLoc();
+ // As an extension (to match gas), support a plain D register or Q register
+ // (without encosing curly braces) as a single or double entry list,
+ // respectively.
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ Error(S, "vector register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Count = 1;
+ int Spacing = 0;
+ unsigned FirstReg = Reg;
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
+ ++Reg;
+ ++Count;
+ }
+
+ SMLoc E;
+ if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the minus.
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1) {
+ Error(AfterMinusLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ Error(AfterMinusLoc, "invalid register in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Ranges must go from low to high.
+ if (Reg > EndReg) {
+ Error(AfterMinusLoc, "bad range in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(AfterMinusLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+
+ // Add all the registers in the range to the register list.
+ Count += EndReg - Reg;
+ Reg = EndReg;
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // vector register lists must be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ //
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
+ Reg = getDRegFromQReg(Reg);
+ if (Reg != OldReg + 1) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Reg;
+ Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc LaneLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(LaneLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ continue;
+ }
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(Parser.getTok().getLoc(), "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ switch (LaneKind) {
+ case NoLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ (Spacing == 2),
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex,
+ (Spacing == 2),
+ S, E));
+ break;
+ }
+ return MatchOperand_Success;
+}
+
+/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().parseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef IFlagsStr = Tok.getString();
+
+ // An iflags string of "none" is interpreted to mean that none of the AIF
+ // bits are set. Not a terribly useful instruction, but a valid encoding.
+ unsigned IFlags = 0;
+ if (IFlagsStr != "none") {
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef Mask = Tok.getString();
+
+ if (isMClass()) {
+ // See ARMv6-M 10.1.1
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
+ // Note: in the documentation:
+ // ARM deprecates using MSR APSR without a _<bits> qualifier as an alias
+ // for MSR APSR_nzcvq.
+ // but we do make it an alias here. This is so to get the "mask encoding"
+ // bits correct on MSR APSR writes.
+ //
+ // FIXME: Note the 0xc00 "mask encoding" bits version of the registers
+ // should really only be allowed when writing a special register. Note
+ // they get dropped in the MRS instruction reading a special register as
+ // the SYSm field is only 8 bits.
+ //
+ // FIXME: the _g and _nzcvqg versions are only allowed if the processor
+ // includes the DSP extension but that is not checked.
+ .Case("apsr", 0x800)
+ .Case("apsr_nzcvq", 0x800)
+ .Case("apsr_g", 0x400)
+ .Case("apsr_nzcvqg", 0xc00)
+ .Case("iapsr", 0x801)
+ .Case("iapsr_nzcvq", 0x801)
+ .Case("iapsr_g", 0x401)
+ .Case("iapsr_nzcvqg", 0xc01)
+ .Case("eapsr", 0x802)
+ .Case("eapsr_nzcvq", 0x802)
+ .Case("eapsr_g", 0x402)
+ .Case("eapsr_nzcvqg", 0xc02)
+ .Case("xpsr", 0x803)
+ .Case("xpsr_nzcvq", 0x803)
+ .Case("xpsr_g", 0x403)
+ .Case("xpsr_nzcvqg", 0xc03)
+ .Case("ipsr", 0x805)
+ .Case("epsr", 0x806)
+ .Case("iepsr", 0x807)
+ .Case("msp", 0x808)
+ .Case("psp", 0x809)
+ .Case("primask", 0x810)
+ .Case("basepri", 0x811)
+ .Case("basepri_max", 0x812)
+ .Case("faultmask", 0x813)
+ .Case("control", 0x814)
+ .Default(~0U);
+
+ if (FlagsVal == ~0U)
+ return MatchOperand_NoMatch;
+
+ if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
+ // basepri, basepri_max and faultmask only valid for V7m.
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+ }
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ std::string SpecReg = Mask.slice(Start, Next).lower();
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_f
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 8; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ // cpsr_all is an alias for cpsr_fc, as is plain cpsr.
+ if (Flags == "all" || Flags == "")
+ Flags = "fc";
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags is NOT equivalent to "fc" flags.
+ // NOTE: This is a divergence from gas' behavior. Uncommenting the following
+ // two lines would enable gas compatibility at the expense of breaking
+ // round-tripping.
+ //
+ // if (!FlagsVal)
+ // FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
+ int Low, int High) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ std::string LowerOp = Op.lower();
+ std::string UpperOp = Op.upper();
+ if (ShiftName != LowerOp && ShiftName != UpperOp) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat shift type token.
+
+ // There must be a '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc Loc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+ if (Val < Low || Val > High) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = StringSwitch<int>(Tok.getString())
+ .Case("be", 1)
+ .Case("le", 0)
+ .Default(-1);
+ Parser.Lex(); // Eat the token.
+
+ if (Val == -1) {
+ Error(S, "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+ getContext()),
+ S, Tok.getEndLoc()));
+ return MatchOperand_Success;
+}
+
+/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
+/// instructions. Legal values are:
+/// lsl #n 'n' in [0,31]
+/// asr #n 'n' in [1,32]
+/// n == 32 encoded as n == 0.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ bool isASR;
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ isASR = false;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ isASR = true;
+ else {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
+
+ const MCExpr *ShiftAmount;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed shift expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(ExLoc, "shift amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ if (isASR) {
+ // Shift amount must be in [1,32]
+ if (Val < 1 || Val > 32) {
+ Error(ExLoc, "'asr' shift amount must be in range [1,32]");
+ return MatchOperand_ParseFail;
+ }
+ // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
+ if (isThumb() && Val == 32) {
+ Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
+ return MatchOperand_ParseFail;
+ }
+ if (Val == 32) Val = 0;
+ } else {
+ // Shift amount must be in [1,32]
+ if (Val < 0 || Val > 31) {
+ Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
+/// of instructions. Legal values are:
+/// ror #n 'n' in {0, 8, 16, 24}
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName != "ror" && ShiftName != "ROR")
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a rotate amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
+
+ const MCExpr *ShiftAmount;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed rotate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(ExLoc, "rotate amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
+ // normally, zero is represented in asm by omitting the rotate operand
+ // entirely.
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
+ Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ // The bitfield descriptor is really two operands, the LSB and the width.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *LSBExpr;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().parseExpression(LSBExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
+ if (!CE) {
+ Error(E, "'lsb' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t LSB = CE->getValue();
+ // The LSB must be in the range [0,31]
+ if (LSB < 0 || LSB > 31) {
+ Error(E, "'lsb' operand must be in the range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ // Expect another immediate operand.
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "too few operands");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *WidthExpr;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(WidthExpr, EndLoc)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ CE = dyn_cast<MCConstantExpr>(WidthExpr);
+ if (!CE) {
+ Error(E, "'width' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Width = CE->getValue();
+ // The LSB must be in the range [1,32-lsb]
+ if (Width < 1 || Width > 32 - LSB) {
+ Error(E, "'width' operand must be in the range [1,32-lsb]");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // postidx_reg := '+' register {, shift}
+ // | '-' register {, shift}
+ // | register {, shift}
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ bool haveEaten = false;
+ bool isAdd = true;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
+ return MatchOperand_ParseFail;
+
+ // FIXME: Only approximates end...may include intervening whitespace.
+ E = Parser.getTok().getLoc();
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
+ ShiftImm, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // am3offset := '+' register
+ // | '-' register
+ // | register
+ // | # imm
+ // | # + imm
+ // | # - imm
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ // Do immediates first, as we always parse those if we have a '#'.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat the '#'.
+ // Explicitly look for a '-', as we need to encode negative zero
+ // differently.
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ SMLoc E;
+ if (getParser().parseExpression(Offset, E))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ // Negative zero is encoded as the flag value INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ Val = INT32_MIN;
+
+ Operands.push_back(
+ ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+
+ return MatchOperand_Success;
+ }
+
+
+ bool haveEaten = false;
+ bool isAdd = true;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+
+ Tok = Parser.getTok();
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Tok.getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
+ 0, S, Tok.getEndLoc()));
+
+ return MatchOperand_Success;
+}
+
+/// cvtT2LdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtT2StrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode2(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+
+/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode3(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdrdPre(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtStrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtStrdPre(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// cvtThumbMultiply - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+void ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+ // If we have a three-operand form, make sure to set Rn to be the operand
+ // that isn't the same as Rd.
+ unsigned RegOp = 4;
+ if (Operands.size() == 6 &&
+ ((ARMOperand*)Operands[4])->getReg() ==
+ ((ARMOperand*)Operands[3])->getReg())
+ RegOp = 5;
+ ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+ Inst.addOperand(Inst.getOperand(0));
+ ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+}
+
+void ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+void ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+void ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+void ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error. The first token must be a '[' when called.
+bool ARMAsmParser::
+parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S, E;
+ assert(Parser.getTok().is(AsmToken::LBrac) &&
+ "Token is not a Left Bracket");
+ S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const AsmToken &BaseRegTok = Parser.getTok();
+ int BaseRegNum = tryParseRegister();
+ if (BaseRegNum == -1)
+ return Error(BaseRegTok.getLoc(), "register expected");
+
+ // The next token must either be a comma, a colon or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) &&
+ !Tok.is(AsmToken::RBrac))
+ return Error(Tok.getLoc(), "malformed memory operand");
+
+ if (Tok.is(AsmToken::RBrac)) {
+ E = Tok.getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
+ 0, 0, false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand. It's rather odd, but syntactically valid.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) &&
+ "Lost colon or comma in memory operand?!");
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ }
+
+ // If we have a ':', it's an alignment specifier.
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat the ':'.
+ E = Parser.getTok().getLoc();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ unsigned Align = 0;
+ switch (CE->getValue()) {
+ default:
+ return Error(E,
+ "alignment specifier must be 16, 32, 64, 128, or 256 bits");
+ case 16: Align = 2; break;
+ case 32: Align = 4; break;
+ case 64: Align = 8; break;
+ case 128: Align = 16; break;
+ case 256: Align = 32; break;
+ }
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ ARM_AM::no_shift, 0, Align,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // If we have a '#', it's an immediate offset, else assume it's a register
+ // offset. Be friendly and also accept a plain integer (without a leading
+ // hash) for gas compatibility.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
+ Parser.getTok().is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ E = Parser.getTok().getLoc();
+
+ bool isNegative = getParser().getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().parseExpression(Offset))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ // If the constant was #-0, represent it as INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ CE = MCConstantExpr::Create(INT32_MIN, getContext());
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0,
+ ARM_AM::no_shift, 0, 0,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // The register offset is optionally preceded by a '+' or '-'
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex(); // Eat the '-'.
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ // Nothing to do.
+ Parser.Lex(); // Eat the '+'.
+ }
+
+ E = Parser.getTok().getLoc();
+ int OffsetRegNum = tryParseRegister();
+ if (OffsetRegNum == -1)
+ return Error(E, "register expected");
+
+ // If there's a shift operator, handle it.
+ ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftType, ShiftImm))
+ return true;
+ }
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ ShiftType, ShiftImm, 0, isNegative,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+}
+
+/// parseMemRegOffsetShift - one of these two:
+/// ( lsl | lsr | asr | ror ) , # shift_amount
+/// rrx
+/// return true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
+ unsigned &Amount) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
+ St = ARM_AM::lsl;
+ else if (ShiftName == "lsr" || ShiftName == "LSR")
+ St = ARM_AM::lsr;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ St = ARM_AM::asr;
+ else if (ShiftName == "ror" || ShiftName == "ROR")
+ St = ARM_AM::ror;
+ else if (ShiftName == "rrx" || ShiftName == "RRX")
+ St = ARM_AM::rrx;
+ else
+ return Error(Loc, "illegal shift operator");
+ Parser.Lex(); // Eat shift type token.
+
+ // rrx stands alone.
+ Amount = 0;
+ if (St != ARM_AM::rrx) {
+ Loc = Parser.getTok().getLoc();
+ // A '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return true;
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error(Loc, "shift amount must be an immediate");
+ int64_t Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
+ ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
+ return Error(Loc, "immediate shift value out of range");
+ // If <ShiftTy> #0, turn it into a no_shift.
+ if (Imm == 0)
+ St = ARM_AM::lsl;
+ // For consistency, treat lsr #32 and asr #32 as having immediate value 0.
+ if (Imm == 32)
+ Imm = 0;
+ Amount = Imm;
+ }
+
+ return false;
+}
+
+/// parseFPImm - A floating point immediate expression operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Anything that can accept a floating point constant as an operand
+ // needs to go through here, as the regular parseExpression is
+ // integer only.
+ //
+ // This routine still creates a generic Immediate operand, containing
+ // a bitcast of the 64-bit floating point value. The various operands
+ // that accept floats can check whether the value is valid for them
+ // via the standard is*() predicates.
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ // Disambiguate the VMOV forms that can accept an FP immediate.
+ // vmov.f32 <sreg>, #imm
+ // vmov.f64 <dreg>, #imm
+ // vmov.f32 <dreg>, #imm @ vector f32x2
+ // vmov.f32 <qreg>, #imm @ vector f32x4
+ //
+ // There are also the NEON VMOV instructions which expect an
+ // integer constant. Make sure we don't try to parse an FPImm
+ // for these:
+ // vmov.i{8|16|32|64} <dreg|qreg>, #imm
+ ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
+ if (!TyOp->isToken() || (TyOp->getToken() != ".f32" &&
+ TyOp->getToken() != ".f64"))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat the '#'.
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 31;
+ Parser.Lex(); // Eat the token.
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(IntVal, getContext()),
+ S, Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+ }
+ // Also handle plain integers. Instructions which allow floating point
+ // immediates also allow a raw encoded 8-bit value.
+ if (Tok.is(AsmToken::Integer)) {
+ int64_t Val = Tok.getIntVal();
+ Parser.Lex(); // Eat the token.
+ if (Val > 255 || Val < 0) {
+ Error(Loc, "encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ double RealVal = ARM_AM::getFPImmFloat(Val);
+ Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue();
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(Val, getContext()), S,
+ Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+ }
+
+ Error(Loc, "invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
+
+/// Parse a arm instruction operand. For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+ SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Identifier: {
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
+ }
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ }
+ case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::String: // quoted label names.
+ case AsmToken::Dot: { // . as a branch target
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = Parser.getTok().getLoc();
+ if (getParser().parseExpression(IdVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::LBrac:
+ return parseMemory(Operands);
+ case AsmToken::LCurly:
+ return parseRegisterList(Operands);
+ case AsmToken::Dollar:
+ case AsmToken::Hash: {
+ // #42 -> immediate.
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Colon)) {
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (CE) {
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
+ }
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+
+ // There can be a trailing '!' on operands that we want as a separate
+ // '!' Token operand. Handle that here. For example, the compatibilty
+ // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
+ return false;
+ }
+ // w/ a ':' after the '#', it's just like a plain ':'.
+ // FALLTHROUGH
+ }
+ case AsmToken::Colon: {
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (parsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
+ }
+}
+
+// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ RefKind = ARMMCExpr::VK_ARM_None;
+
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ if (IDVal == "lower16") {
+ RefKind = ARMMCExpr::VK_ARM_LO16;
+ } else if (IDVal == "upper16") {
+ RefKind = ARMMCExpr::VK_ARM_HI16;
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+ return false;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+// FIXME: This is a bit of a maze of special cases.
+StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod,
+ StringRef &ITMask) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
+ //
+ // FIXME: Would be nice to autogen this.
+ if ((Mnemonic == "movs" && isThumb()) ||
+ Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
+ Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
+ Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
+ Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
+ Mnemonic == "fmuls")
+ return Mnemonic;
+
+ // First, split out any predication code. Ignore mnemonics we know aren't
+ // predicated but do have a carry-set and so weren't caught above.
+ if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
+ Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
+ Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
+ Mnemonic != "sbcs" && Mnemonic != "rscs") {
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
+ Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
+ Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
+ Mnemonic == "vfms" || Mnemonic == "vfnms" ||
+ (Mnemonic == "movs" && isThumb()))) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ // The "it" instruction has the condition mask on the end of the mnemonic.
+ if (Mnemonic.startswith("it")) {
+ ITMask = Mnemonic.slice(2, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 2);
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+ Mnemonic == "orr" || Mnemonic == "mvn" ||
+ Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+ Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+ (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
+ Mnemonic == "mla" || Mnemonic == "smlal" ||
+ Mnemonic == "umlal" || Mnemonic == "umull"))) {
+ CanAcceptCarrySet = true;
+ } else
+ CanAcceptCarrySet = false;
+
+ if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+ Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+ Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+ Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+ Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" ||
+ (Mnemonic == "clrex" && !isThumb()) ||
+ (Mnemonic == "nop" && isThumbOne()) ||
+ ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" ||
+ Mnemonic == "ldc2" || Mnemonic == "ldc2l" ||
+ Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) ||
+ ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) &&
+ !isThumb()) ||
+ Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) {
+ CanAcceptPredicationCode = false;
+ } else
+ CanAcceptPredicationCode = true;
+
+ if (isThumb()) {
+ if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+ Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+ CanAcceptPredicationCode = false;
+ }
+}
+
+bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME: This is all horribly hacky. We really need a better way to deal
+ // with optional operands like this in the matcher table.
+
+ // The 'mov' mnemonic is special. One variant has a cc_out operand, while
+ // another does not. Specifically, the MOVW instruction does not. So we
+ // special case it here and remove the defaulted (non-setting) cc_out
+ // operand if that's the instruction we're trying to match.
+ //
+ // We do this as post-processing of the explicit operands rather than just
+ // conditionally adding the cc_out in the first place because we need
+ // to check the type of the parsed immediate operand.
+ if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
+ !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
+ static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when there are only two register operands.
+ if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
+ // have to check the immediate range here since Thumb2 has a variant
+ // that can handle a different range and has a cc_out operand.
+ if (((isThumb() && Mnemonic == "add") ||
+ (isThumbTwo() && Mnemonic == "sub")) &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
+ static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ return true;
+ // For Thumb2, add/sub immediate does not have a cc_out operand for the
+ // imm0_4095 variant. That's the least-preferred variant when
+ // selecting via the generic "add" mnemonic, so to know that we
+ // should remove the cc_out operand, we have to explicitly check that
+ // it's not one of the other variants. Ugh.
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ // Nest conditions rather than one big 'if' statement for readability.
+ //
+ // If either register is a high reg, it's either one of the SP
+ // variants (handled above) or a 32-bit encoding, so we just
+ // check against T3. If the second register is the PC, this is an
+ // alternate form of ADR, which uses encoding T4, so check for that too.
+ if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
+ static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ return false;
+ // If both registers are low, we're in an IT block, and the immediate is
+ // in range, we should use encoding T1 instead, which has a cc_out.
+ if (inITBlock() &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
+ static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ return false;
+
+ // Otherwise, we use encoding T4, which does not have a cc_out
+ // operand.
+ return true;
+ }
+
+ // The thumb2 multiply instruction doesn't have a CCOut register, so
+ // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
+ // use the 16-bit encoding or not.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ // If the registers aren't low regs, the destination reg isn't the
+ // same as one of the source regs, or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
+ !inITBlock() ||
+ (static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[5])->getReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[4])->getReg())))
+ return true;
+
+ // Also check the 'mul' syntax variant that doesn't specify an explicit
+ // destination register.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ // If the registers aren't low regs or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock()))
+ return true;
+
+
+
+ // Register-register 'add/sub' for thumb does not have a cc_out operand
+ // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also
+ // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't
+ // right, this will result in better diagnostics (which operand is off)
+ // anyway.
+ if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5 || Operands.size() == 6) &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+ (Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm())))
+ return true;
+
+ return false;
+}
+
+static bool isDataTypeToken(StringRef Tok) {
+ return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
+ Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
+ Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" ||
+ Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" ||
+ Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" ||
+ Tok == ".f" || Tok == ".d";
+}
+
+// FIXME: This bit should probably be handled via an explicit match class
+// in the .td files that matches the suffix instead of having it be
+// a literal string token the way it is now.
+static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
+ return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Mnemonic = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ StringRef ITMask;
+ Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
+ ProcessorIMod, ITMask);
+
+ // In Thumb1, only the branch (B) instruction can be predicated.
+ if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "conditional execution not supported in Thumb1");
+ }
+
+ Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
+
+ // Handle the IT instruction ITMask. Convert it to a bitmask. This
+ // is the mask as it will be for the IT encoding if the conditional
+ // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
+ // where the conditional bit0 is zero, the instruction post-processing
+ // will adjust the mask accordingly.
+ if (Mnemonic == "it") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ if (ITMask.size() > 3) {
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "too many conditions on IT instruction");
+ }
+ unsigned Mask = 8;
+ for (unsigned i = ITMask.size(); i != 0; --i) {
+ char pos = ITMask[i - 1];
+ if (pos != 't' && pos != 'e') {
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
+ }
+ Mask >>= 1;
+ if (ITMask[i - 1] == 't')
+ Mask |= 8;
+ }
+ Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
+ }
+
+ // FIXME: This is all a pretty gross hack. We should automatically handle
+ // optional operands like this via tblgen.
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // If we had a carry-set on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptCarrySet && CarrySetting) {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' can not set flags, but 's' suffix specified");
+ }
+ // If we had a predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not predicable, but condition code specified");
+ }
+
+ // Add the carry setting operand, if necessary.
+ if (CanAcceptCarrySet) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ Loc));
+ }
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), Loc));
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ StringRef ExtraToken = Name.slice(Start, Next);
+
+ // Some NEON instructions have an optional datatype suffix that is
+ // completely ignored. Check for that.
+ if (isDataTypeToken(ExtraToken) &&
+ doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
+ continue;
+
+ if (ExtraToken != ".n") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
+ }
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ // Some instructions, mostly Thumb, have forms for the same mnemonic that
+ // do and don't have a cc_out optional-def operand. With some spot-checks
+ // of the operand list, we can figure out which variant we're trying to
+ // parse and adjust accordingly before actually matching. We shouldn't ever
+ // try to remove a cc_out operand that was explicitly set on the the
+ // mnemonic, of course (CarrySetting == true). Reason number #317 the
+ // table driven matcher doesn't fit well with the ARM instruction set.
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand*>(Operands[2])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
+ bool isLoad = (Mnemonic == "ldrexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
+ ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+
+ const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
+ MRC.contains(Op2->getReg())) {
+ unsigned Reg1 = Op1->getReg();
+ unsigned Reg2 = Op2->getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ Error(Op2->getStartLoc(), isLoad ?
+ "destination operands must be sequential" :
+ "source operands must be sequential");
+ return true;
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+ &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
+ Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
+ NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
+ delete Op1;
+ delete Op2;
+ }
+ }
+
+ return false;
+}
+
+// Validate context-sensitive operand constraints.
+
+// return 'true' if register list contains non-low GPR registers,
+// 'false' otherwise. If Reg is in the register list or is HiReg, set
+// 'containsReg' to true.
+static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
+ unsigned HiReg, bool &containsReg) {
+ containsReg = false;
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ containsReg = true;
+ // Anything other than a low register isn't legal here.
+ if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg))
+ return true;
+ }
+ return false;
+}
+
+// Check if the specified regisgter is in the register list of the inst,
+// starting at the indicated operand number.
+static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ return true;
+ }
+ return false;
+}
+
+// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
+// the ARMInsts array) instead. Getting that here requires awkward
+// API changes, though. Better way?
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return ARMInsts[Opcode];
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool ARMAsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ SMLoc Loc = Operands[0]->getStartLoc();
+ // Check the IT block state first.
+ // NOTE: BKPT instruction has the interesting property of being
+ // allowed in IT blocks, but not being predicable. It just always
+ // executes.
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
+ Inst.getOpcode() != ARM::BKPT) {
+ unsigned bit = 1;
+ if (ITState.FirstCond)
+ ITState.FirstCond = false;
+ else
+ bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ // The instruction must be predicable.
+ if (!MCID.isPredicable())
+ return Error(Loc, "instructions in IT block must be predicable");
+ unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
+ unsigned ITCond = bit ? ITState.Cond :
+ ARMCC::getOppositeCondition(ITState.Cond);
+ if (Cond != ITCond) {
+ // Find the condition code Operand to get its SMLoc information.
+ SMLoc CondLoc;
+ for (unsigned i = 1; i < Operands.size(); ++i)
+ if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
+ CondLoc = Operands[i]->getStartLoc();
+ return Error(CondLoc, "incorrect condition in IT block; got '" +
+ StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
+ "', but expected '" +
+ ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ }
+ // Check for non-'al' condition codes outside of the IT block.
+ } else if (isThumbTwo() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL && Inst.getOpcode() != ARM::tB &&
+ Inst.getOpcode() != ARM::t2B)
+ return Error(Loc, "predicated instructions must be in IT block");
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands must be sequential");
+ return false;
+ }
+ case ARM::STRD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::SBFX:
+ case ARM::UBFX: {
+ // width must be in range [1, 32-lsb]
+ unsigned lsb = Inst.getOperand(2).getImm();
+ unsigned widthm1 = Inst.getOperand(3).getImm();
+ if (widthm1 >= 32 - lsb)
+ return Error(Operands[5]->getStartLoc(),
+ "bitfield width must be in range [1,32-lsb]");
+ return false;
+ }
+ case ARM::tLDMIA: {
+ // If we're parsing Thumb2, the .w variant is available and handles
+ // most cases that are normally illegal for a Thumb1 LDM
+ // instruction. We'll make the transformation in processInstruction()
+ // if necessary.
+ //
+ // Thumb LDM instructions are writeback iff the base register is not
+ // in the register list.
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ "registers must be in range r0-r7");
+ // If we should have writeback, then there should be a '!' token.
+ if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "writeback operator '!' expected");
+ // If we should not have writeback, there must not be a '!'. This is
+ // true even for the 32-bit wide encodings.
+ if (listContainsBase && hasWritebackToken)
+ return Error(Operands[3]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ break;
+ }
+ case ARM::t2LDMIA_UPD: {
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+ break;
+ }
+ case ARM::tMUL: {
+ // The second source operand must be the same register as the destination
+ // operand.
+ //
+ // In this case, we must directly check the parsed operands because the
+ // cvtThumbMultiply() function is written in such a way that it guarantees
+ // this first statement is always true for the new Inst. Essentially, the
+ // destination is unconditionally copied into the second source operand
+ // without checking to see if it matches what we actually parsed.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ return Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ }
+ break;
+ }
+ // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
+ // so only issue a diagnostic for thumb1. The instructions will be
+ // switched to the t2 encodings in processInstruction() if necessary.
+ case ARM::tPOP: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or pc");
+ break;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or lr");
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[4]->getStartLoc(),
+ "registers must be in range r0-r7");
+ break;
+ }
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need thumb2 (for the wide encoding), or we have an error.
+ if (!isThumbTwo() &&
+ Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ return Error(Operands[4]->getStartLoc(),
+ "source register must be the same as destination");
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32;
+ case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16;
+ case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32;
+
+ // VST3LN
+ case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8;
+ case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16;
+ case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32;
+ case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16;
+ case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32;
+
+ // VST3
+ case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8;
+ case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16;
+ case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32;
+ case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8;
+ case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16;
+ case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32;
+
+ // VST4LN
+ case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8;
+ case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16;
+ case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32;
+ case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16;
+ case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32;
+
+ // VST4
+ case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8;
+ case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16;
+ case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32;
+ case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8;
+ case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16;
+ case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32;
+ }
+}
+
+static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32;
+ case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16;
+ case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32;
+
+ // VLD3DUP
+ case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8;
+ case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16;
+ case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32;
+ case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8;
+ case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16;
+ case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32;
+
+ // VLD3LN
+ case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8;
+ case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16;
+ case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32;
+ case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16;
+ case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32;
+
+ // VLD3
+ case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8;
+ case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16;
+ case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32;
+ case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8;
+ case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16;
+ case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32;
+
+ // VLD4LN
+ case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8;
+ case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16;
+ case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32;
+ case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16;
+ case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32;
+
+ // VLD4DUP
+ case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8;
+ case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16;
+ case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32;
+ case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8;
+ case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16;
+ case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32;
+
+ // VLD4
+ case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8;
+ case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16;
+ case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32;
+ case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8;
+ case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16;
+ case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32;
+ }
+}
+
+bool ARMAsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ // Alias for alternate form of 'ADR Rd, #imm' instruction.
+ case ARM::ADDri: {
+ if (Inst.getOperand(1).getReg() != ARM::PC ||
+ Inst.getOperand(5).getReg() != 0)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for alternate PC+imm syntax of LDR instructions.
+ case ARM::t2LDRpcrel:
+ // Select the narrow version if the immediate will fit.
+ if (Inst.getOperand(1).getImm() > 0 &&
+ Inst.getOperand(1).getImm() <= 0xff)
+ Inst.setOpcode(ARM::tLDRpci);
+ else
+ Inst.setOpcode(ARM::t2LDRpci);
+ return true;
+ case ARM::t2LDRBpcrel:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ return true;
+ case ARM::t2LDRHpcrel:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ return true;
+ case ARM::t2LDRSBpcrel:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ return true;
+ case ARM::t2LDRSHpcrel:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ return true;
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8:
+ case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8:
+ case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_32:
+ case ARM::VST2LNqWB_register_Asm_16:
+ case ARM::VST2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_register_Asm_8:
+ case ARM::VST3LNdWB_register_Asm_16:
+ case ARM::VST3LNdWB_register_Asm_32:
+ case ARM::VST3LNqWB_register_Asm_16:
+ case ARM::VST3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_register_Asm_8:
+ case ARM::VST4LNdWB_register_Asm_16:
+ case ARM::VST4LNdWB_register_Asm_32:
+ case ARM::VST4LNqWB_register_Asm_16:
+ case ARM::VST4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdWB_fixed_Asm_8:
+ case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8:
+ case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_32:
+ case ARM::VST2LNqWB_fixed_Asm_16:
+ case ARM::VST2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_fixed_Asm_8:
+ case ARM::VST3LNdWB_fixed_Asm_16:
+ case ARM::VST3LNdWB_fixed_Asm_32:
+ case ARM::VST3LNqWB_fixed_Asm_16:
+ case ARM::VST3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_fixed_Asm_8:
+ case ARM::VST4LNdWB_fixed_Asm_16:
+ case ARM::VST4LNdWB_fixed_Asm_32:
+ case ARM::VST4LNqWB_fixed_Asm_16:
+ case ARM::VST4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdAsm_8:
+ case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8:
+ case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_32:
+ case ARM::VST2LNqAsm_16:
+ case ARM::VST2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdAsm_8:
+ case ARM::VST3LNdAsm_16:
+ case ARM::VST3LNdAsm_32:
+ case ARM::VST3LNqAsm_16:
+ case ARM::VST3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdAsm_8:
+ case ARM::VST4LNdAsm_16:
+ case ARM::VST4LNdAsm_32:
+ case ARM::VST4LNqAsm_16:
+ case ARM::VST4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8:
+ case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8:
+ case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_32:
+ case ARM::VLD2LNqWB_register_Asm_16:
+ case ARM::VLD2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_register_Asm_8:
+ case ARM::VLD3LNdWB_register_Asm_16:
+ case ARM::VLD3LNdWB_register_Asm_32:
+ case ARM::VLD3LNqWB_register_Asm_16:
+ case ARM::VLD3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_register_Asm_8:
+ case ARM::VLD4LNdWB_register_Asm_16:
+ case ARM::VLD4LNdWB_register_Asm_32:
+ case ARM::VLD4LNqWB_register_Asm_16:
+ case ARM::VLD4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8:
+ case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8:
+ case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_32:
+ case ARM::VLD2LNqWB_fixed_Asm_16:
+ case ARM::VLD2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_fixed_Asm_8:
+ case ARM::VLD3LNdWB_fixed_Asm_16:
+ case ARM::VLD3LNdWB_fixed_Asm_32:
+ case ARM::VLD3LNqWB_fixed_Asm_16:
+ case ARM::VLD3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_fixed_Asm_8:
+ case ARM::VLD4LNdWB_fixed_Asm_16:
+ case ARM::VLD4LNdWB_fixed_Asm_32:
+ case ARM::VLD4LNqWB_fixed_Asm_16:
+ case ARM::VLD4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8:
+ case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8:
+ case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_32:
+ case ARM::VLD2LNqAsm_16:
+ case ARM::VLD2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdAsm_8:
+ case ARM::VLD3LNdAsm_16:
+ case ARM::VLD3LNdAsm_32:
+ case ARM::VLD3LNqAsm_16:
+ case ARM::VLD3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdAsm_8:
+ case ARM::VLD4LNdAsm_16:
+ case ARM::VLD4LNdAsm_32:
+ case ARM::VLD4LNqAsm_16:
+ case ARM::VLD4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD3DUPdAsm_8:
+ case ARM::VLD3DUPdAsm_16:
+ case ARM::VLD3DUPdAsm_32:
+ case ARM::VLD3DUPqAsm_8:
+ case ARM::VLD3DUPqAsm_16:
+ case ARM::VLD3DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_fixed_Asm_8:
+ case ARM::VLD3DUPdWB_fixed_Asm_16:
+ case ARM::VLD3DUPdWB_fixed_Asm_32:
+ case ARM::VLD3DUPqWB_fixed_Asm_8:
+ case ARM::VLD3DUPqWB_fixed_Asm_16:
+ case ARM::VLD3DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_register_Asm_8:
+ case ARM::VLD3DUPdWB_register_Asm_16:
+ case ARM::VLD3DUPdWB_register_Asm_32:
+ case ARM::VLD3DUPqWB_register_Asm_8:
+ case ARM::VLD3DUPqWB_register_Asm_16:
+ case ARM::VLD3DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3 multiple 3-element structure instructions.
+ case ARM::VLD3dAsm_8:
+ case ARM::VLD3dAsm_16:
+ case ARM::VLD3dAsm_32:
+ case ARM::VLD3qAsm_8:
+ case ARM::VLD3qAsm_16:
+ case ARM::VLD3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_fixed_Asm_8:
+ case ARM::VLD3dWB_fixed_Asm_16:
+ case ARM::VLD3dWB_fixed_Asm_32:
+ case ARM::VLD3qWB_fixed_Asm_8:
+ case ARM::VLD3qWB_fixed_Asm_16:
+ case ARM::VLD3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_register_Asm_8:
+ case ARM::VLD3dWB_register_Asm_16:
+ case ARM::VLD3dWB_register_Asm_32:
+ case ARM::VLD3qWB_register_Asm_8:
+ case ARM::VLD3qWB_register_Asm_16:
+ case ARM::VLD3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD4DUPdAsm_8:
+ case ARM::VLD4DUPdAsm_16:
+ case ARM::VLD4DUPdAsm_32:
+ case ARM::VLD4DUPqAsm_8:
+ case ARM::VLD4DUPqAsm_16:
+ case ARM::VLD4DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_fixed_Asm_8:
+ case ARM::VLD4DUPdWB_fixed_Asm_16:
+ case ARM::VLD4DUPdWB_fixed_Asm_32:
+ case ARM::VLD4DUPqWB_fixed_Asm_8:
+ case ARM::VLD4DUPqWB_fixed_Asm_16:
+ case ARM::VLD4DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_register_Asm_8:
+ case ARM::VLD4DUPdWB_register_Asm_16:
+ case ARM::VLD4DUPdWB_register_Asm_32:
+ case ARM::VLD4DUPqWB_register_Asm_8:
+ case ARM::VLD4DUPqWB_register_Asm_16:
+ case ARM::VLD4DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4 multiple 4-element structure instructions.
+ case ARM::VLD4dAsm_8:
+ case ARM::VLD4dAsm_16:
+ case ARM::VLD4dAsm_32:
+ case ARM::VLD4qAsm_8:
+ case ARM::VLD4qAsm_16:
+ case ARM::VLD4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_fixed_Asm_8:
+ case ARM::VLD4dWB_fixed_Asm_16:
+ case ARM::VLD4dWB_fixed_Asm_32:
+ case ARM::VLD4qWB_fixed_Asm_8:
+ case ARM::VLD4qWB_fixed_Asm_16:
+ case ARM::VLD4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_register_Asm_8:
+ case ARM::VLD4dWB_register_Asm_16:
+ case ARM::VLD4dWB_register_Asm_32:
+ case ARM::VLD4qWB_register_Asm_8:
+ case ARM::VLD4qWB_register_Asm_16:
+ case ARM::VLD4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST3 multiple 3-element structure instructions.
+ case ARM::VST3dAsm_8:
+ case ARM::VST3dAsm_16:
+ case ARM::VST3dAsm_32:
+ case ARM::VST3qAsm_8:
+ case ARM::VST3qAsm_16:
+ case ARM::VST3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_fixed_Asm_8:
+ case ARM::VST3dWB_fixed_Asm_16:
+ case ARM::VST3dWB_fixed_Asm_32:
+ case ARM::VST3qWB_fixed_Asm_8:
+ case ARM::VST3qWB_fixed_Asm_16:
+ case ARM::VST3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_register_Asm_8:
+ case ARM::VST3dWB_register_Asm_16:
+ case ARM::VST3dWB_register_Asm_32:
+ case ARM::VST3qWB_register_Asm_8:
+ case ARM::VST3qWB_register_Asm_16:
+ case ARM::VST3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST4 multiple 3-element structure instructions.
+ case ARM::VST4dAsm_8:
+ case ARM::VST4dAsm_16:
+ case ARM::VST4dAsm_32:
+ case ARM::VST4qAsm_8:
+ case ARM::VST4qAsm_16:
+ case ARM::VST4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_fixed_Asm_8:
+ case ARM::VST4dWB_fixed_Asm_16:
+ case ARM::VST4dWB_fixed_Asm_32:
+ case ARM::VST4qWB_fixed_Asm_8:
+ case ARM::VST4qWB_fixed_Asm_16:
+ case ARM::VST4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_register_Asm_8:
+ case ARM::VST4dWB_register_Asm_16:
+ case ARM::VST4dWB_register_Asm_32:
+ case ARM::VST4qWB_register_Asm_8:
+ case ARM::VST4qWB_register_Asm_16:
+ case ARM::VST4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle encoding choice for the shift-immediate instructions.
+ case ARM::t2LSLri:
+ case ARM::t2LSRri:
+ case ARM::t2ASRri: {
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLri: NewOpc = ARM::tLSLri; break;
+ case ARM::t2LSRri: NewOpc = ARM::tLSRri; break;
+ case ARM::t2ASRri: NewOpc = ARM::tASRri; break;
+ }
+ // The Thumb1 operands aren't in the same order. Awesome, eh?
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsr:
+ case ARM::t2MOVSsr: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break;
+ case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break;
+ }
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Amount == 32) Amount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (newOpc != ARM::t2RRX)
+ TmpInst.addOperand(MCOperand::CreateImm(Amount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
+ case ARM::ASRr:
+ case ARM::LSRr:
+ case ARM::LSLr:
+ case ARM::RORr: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+ }
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsr);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::ASRi:
+ case ARM::LSRi:
+ case ARM::LSLi:
+ case ARM::RORi: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRi: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLi: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORi: ShiftTy = ARM_AM::ror; break;
+ }
+ // A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Amt = Inst.getOperand(2).getImm();
+ unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ // A shift by 32 should be encoded as 0 when permitted
+ if (Amt == 32 && (ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr))
+ Amt = 0;
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (Opc == ARM::MOVsi)
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::RRXi: {
+ unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsi);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDMIA_UPD: {
+ // If this is a load of a single register, then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDR_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STMDB_UPD: {
+ // If this is a store of a single register, then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STR_PRE);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::LDMIA_UPD:
+ // If this is a load of a single register via a 'pop', then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDR_POST_IMM);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ case ARM::STMDB_UPD:
+ // If this is a store of a single register via a 'push', then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STR_PRE_IMM);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::tADDi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
+ Inst.setOpcode(ARM::tADDi3);
+ return true;
+ }
+ break;
+ case ARM::tSUBi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
+ Inst.setOpcode(ARM::tSUBi3);
+ return true;
+ }
+ break;
+ case ARM::t2ADDri:
+ case ARM::t2SUBri: {
+ // If the destination and first source operand are the same, and
+ // the flags are compatible with the current IT status, use encoding T2
+ // instead of T3. For compatibility with the system 'as'. Make sure the
+ // wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ !isARMLowRegister(Inst.getOperand(0).getReg()) ||
+ (unsigned)Inst.getOperand(2).getImm() > 255 ||
+ ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
+ ARM::tADDi8 : ARM::tSUBi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need to use the 32-bit encoding if it's available.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ Inst.setOpcode(ARM::t2ADDrr);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ return true;
+ }
+ break;
+ }
+ case ARM::tB:
+ // A Thumb conditional branch outside of an IT block is a tBcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
+ Inst.setOpcode(ARM::tBcc);
+ return true;
+ }
+ break;
+ case ARM::t2B:
+ // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){
+ Inst.setOpcode(ARM::t2Bcc);
+ return true;
+ }
+ break;
+ case ARM::t2Bcc:
+ // If the conditional is AL or we're in an IT block, we really want t2B.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) {
+ Inst.setOpcode(ARM::t2B);
+ return true;
+ }
+ break;
+ case ARM::tBcc:
+ // If the conditional is AL, we really want tB.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL) {
+ Inst.setOpcode(ARM::tB);
+ return true;
+ }
+ break;
+ case ARM::tLDMIA: {
+ // If the register list contains any high registers, or if the writeback
+ // doesn't match what tLDMIA can do, we need to use the 32-bit encoding
+ // instead if we're in Thumb2. Otherwise, this should have generated
+ // an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
+ (!listContainsBase && !hasWritebackToken) ||
+ (listContainsBase && hasWritebackToken)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA);
+ // If we're switching to the updating version, we need to insert
+ // the writeback tied operand.
+ if (hasWritebackToken)
+ Inst.insert(Inst.begin(),
+ MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ return true;
+ }
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMIA_UPD);
+ return true;
+ }
+ break;
+ }
+ case ARM::tPOP: {
+ bool listContainsBase;
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2LDMIA_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMDB_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
+ case ARM::t2MOVi: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255 &&
+ ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't in the same order for tMOVi8...
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::t2MOVr: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't the same for tMOV[S]r... (no cc_out)
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::t2SXTH:
+ case ARM::t2SXTB:
+ case ARM::t2UXTH:
+ case ARM::t2UXTB: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Illegal opcode!");
+ case ARM::t2SXTH: NewOpc = ARM::tSXTH; break;
+ case ARM::t2SXTB: NewOpc = ARM::tSXTB; break;
+ case ARM::t2UXTH: NewOpc = ARM::tUXTH; break;
+ case ARM::t2UXTB: NewOpc = ARM::tUXTB; break;
+ }
+ // The operands aren't the same for thumb1 (no rotate operand).
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::MOVsi: {
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
+ // rrx shifts and asr/lsr of #32 is encoded as 0
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ return false;
+ if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
+ // Shifting by zero is accepted as a vanilla 'MOVr'
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ANDrsi:
+ case ARM::ORRrsi:
+ case ARM::EORrsi:
+ case ARM::BICrsi:
+ case ARM::SUBrsi:
+ case ARM::ADDrsi: {
+ unsigned newOpc;
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ANDrsi: newOpc = ARM::ANDrr; break;
+ case ARM::ORRrsi: newOpc = ARM::ORRrr; break;
+ case ARM::EORrsi: newOpc = ARM::EORrr; break;
+ case ARM::BICrsi: newOpc = ARM::BICrr; break;
+ case ARM::SUBrsi: newOpc = ARM::SUBrr; break;
+ case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
+ }
+ // If the shift is by zero, use the non-shifted instruction definition.
+ // The exception is for right shifts, where 0 == 32
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 &&
+ !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ITasm:
+ case ARM::t2IT: {
+ // The mask bits for all but the first condition are represented as
+ // the low bit of the condition code value implies 't'. We currently
+ // always have 1 implies 't', so XOR toggle the bits if the low bit
+ // of the condition code is zero.
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned Mask = MO.getImm();
+ unsigned OrigMask = Mask;
+ unsigned TZ = CountTrailingZeros_32(Mask);
+ if ((Inst.getOperand(0).getImm() & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ for (unsigned i = 3; i != TZ; --i)
+ Mask ^= 1 << i;
+ }
+ MO.setImm(Mask);
+
+ // Set up the IT block state according to the IT instruction we just
+ // matched.
+ assert(!inITBlock() && "nested IT blocks?!");
+ ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
+ ITState.Mask = OrigMask; // Use the original mask, not the updated one.
+ ITState.CurPosition = 0;
+ ITState.FirstCond = true;
+ break;
+ }
+ case ARM::t2LSLrr:
+ case ARM::t2LSRrr:
+ case ARM::t2ASRrr:
+ case ARM::t2SBCrr:
+ case ARM::t2RORrr:
+ case ARM::t2BICrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break;
+ case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break;
+ case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break;
+ case ARM::t2SBCrr: NewOpc = ARM::tSBC; break;
+ case ARM::t2RORrr: NewOpc = ARM::tROR; break;
+ case ARM::t2BICrr: NewOpc = ARM::tBIC; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::t2ANDrr:
+ case ARM::t2EORrr:
+ case ARM::t2ADCrr:
+ case ARM::t2ORRrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ // These instructions are special in that they are commutable, so shorter encodings
+ // are available more often.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
+ Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2ADCrr: NewOpc = ARM::tADC; break;
+ case ARM::t2ANDrr: NewOpc = ARM::tAND; break;
+ case ARM::t2EORrr: NewOpc = ARM::tEOR; break;
+ case ARM::t2ORRrr: NewOpc = ARM::tORR; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ } else {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ }
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // 16-bit thumb arithmetic instructions either require or preclude the 'S'
+ // suffix depending on whether they're in an IT block or not.
+ unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &MCID = getInstDesc(Opc);
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.hasOptionalDef() &&
+ "optionally flag setting instruction missing optional def operand");
+ assert(MCID.NumOperands == Inst.getNumOperands() &&
+ "operand count mismatch!");
+ // Find the optional-def operand (cc_out).
+ unsigned OpNo;
+ for (OpNo = 0;
+ !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ ++OpNo)
+ ;
+ // If we're parsing Thumb1, reject it completely.
+ if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
+ return Match_MnemonicFail;
+ // If we're parsing Thumb2, which form is legal depends on whether we're
+ // in an IT block.
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
+ !inITBlock())
+ return Match_RequiresITBlock;
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
+ inITBlock())
+ return Match_RequiresNotITBlock;
+ }
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
+ return Match_Success;
+}
+
+static const char *getSubtargetFeatureName(unsigned Val);
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ // Context sensitive operand constraints aren't handled by the matcher,
+ // so check them here.
+ if (validateInstruction(Inst, Operands)) {
+ // Still progress the IT block, otherwise one wrong condition causes
+ // nasty cascading errors.
+ forwardITPosition();
+ return true;
+ }
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
+
+ // Only move forward at the very end so that everything in validate
+ // and process gets a consistent answer about whether we're in an IT
+ // block.
+ forwardITPosition();
+
+ // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
+ // doesn't actually encode.
+ if (Inst.getOpcode() == ARM::ITasm)
+ return false;
+
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing (Thumb vs. ARM, e.g.).
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction",
+ ((ARMOperand*)Operands[0])->getLocRange());
+ case Match_RequiresNotITBlock:
+ return Error(IDLoc, "flag setting instruction only valid outside IT block");
+ case Match_RequiresITBlock:
+ return Error(IDLoc, "instruction only valid inside IT block");
+ case Match_RequiresV6:
+ return Error(IDLoc, "instruction variant requires ARMv6 or later");
+ case Match_RequiresThumb2:
+ return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_15: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+/// parseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return parseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".thumb")
+ return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_func")
+ return parseDirectiveThumbFunc(DirectiveID.getLoc());
+ else if (IDVal == ".code")
+ return parseDirectiveCode(DirectiveID.getLoc());
+ else if (IDVal == ".syntax")
+ return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
+ else if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// parseDirectiveThumb
+/// ::= .thumb
+bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ return false;
+}
+
+/// parseDirectiveThumbFunc
+/// ::= .thumbfunc symbol_name
+bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
+ const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
+ bool isMachO = MAI.hasSubsectionsViaSymbols();
+ StringRef Name;
+ bool needFuncName = true;
+
+ // Darwin asm has (optionally) function name after .thumb_func direction
+ // ELF doesn't
+ if (isMachO) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::EndOfStatement)) {
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ Name = Tok.getIdentifier();
+ Parser.Lex(); // Consume the identifier token.
+ needFuncName = false;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+
+ // Eat the end of statement and any blank lines that follow.
+ while (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex();
+
+ // FIXME: assuming function name will be the line following .thumb_func
+ // We really should be checking the next symbol definition even if there's
+ // stuff in between.
+ if (needFuncName) {
+ Name = Parser.getTok().getIdentifier();
+ }
+
+ // Mark symbol as a thumb symbol.
+ MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitThumbFunc(Func);
+ return false;
+}
+
+/// parseDirectiveSyntax
+/// ::= .syntax unified | divided
+bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(L, "unexpected token in .syntax directive");
+ StringRef Mode = Tok.getString();
+ if (Mode == "unified" || Mode == "UNIFIED")
+ Parser.Lex();
+ else if (Mode == "divided" || Mode == "DIVIDED")
+ return Error(L, "'.syntax divided' arm asssembly not supported");
+ else
+ return Error(L, "unrecognized syntax mode in .syntax directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// parseDirectiveCode
+/// ::= .code 16 | 32
+bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Integer))
+ return Error(L, "unexpected token in .code directive");
+ int64_t Val = Parser.getTok().getIntVal();
+ if (Val == 16)
+ Parser.Lex();
+ else if (Val == 32)
+ Parser.Lex();
+ else
+ return Error(L, "invalid operand to .code directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ if (Val == 16) {
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ } else {
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
+ return false;
+}
+
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.eatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.eatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
+/// parseDirectiveArch
+/// ::= .arch token
+bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
+ return true;
+}
+
+/// parseDirectiveEabiAttr
+/// ::= .eabi_attribute int, int
+bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ return true;
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+ RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+ unsigned Kind) {
+ ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ if (Kind == MCK__35_0 && Op->isImm()) {
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (!CE)
+ return Match_InvalidOperand;
+ if (CE->getValue() == 0)
+ return Match_Success;
+ }
+ return Match_InvalidOperand;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 000000000000..2e009e55e3b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,4553 @@
+//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "llvm/MC/MCDisassembler.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+ // Handles the condition code status of instructions in IT blocks
+ class ITStatus
+ {
+ public:
+ // Returns the condition code for instruction in IT block
+ unsigned getITCC() {
+ unsigned CC = ARMCC::AL;
+ if (instrInITBlock())
+ CC = ITStates.back();
+ return CC;
+ }
+
+ // Advances the IT block state to the next T or E
+ void advanceITState() {
+ ITStates.pop_back();
+ }
+
+ // Returns true if the current instruction is in an IT block
+ bool instrInITBlock() {
+ return !ITStates.empty();
+ }
+
+ // Returns true if current instruction is the last instruction in an IT block
+ bool instrLastInITBlock() {
+ return ITStates.size() == 1;
+ }
+
+ // Called when decoding an IT instruction. Sets the IT state for the following
+ // instructions that for the IT block. Firstcond and Mask correspond to the
+ // fields in the IT instruction encoding.
+ void setITState(char Firstcond, char Mask) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ // push condition codes onto the stack the correct order for the pops
+ for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITStates.push_back(CCBits);
+ else
+ ITStates.push_back(CCBits ^ 1);
+ }
+ ITStates.push_back(CCBits);
+ }
+
+ private:
+ std::vector<unsigned char> ITStates;
+ };
+}
+
+namespace {
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+};
+
+/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+private:
+ mutable ITStatus ITBlock;
+ DecodeStatus AddThumbPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(MCInst&) const;
+};
+}
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
+ unsigned Insn,
+ uint64_t Adddress,
+ const void *Decoder);
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+#include "ARMGenDisassemblerTables.inc"
+
+static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ARMDisassembler(STI);
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ThumbDisassembler(STI);
+}
+
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ // VFP and NEON instructions, similarly, are shared between ARM
+ // and Thumb modes.
+ MI.clear();
+ result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. The immediate Value has had any PC
+/// adjustment made by the caller. If the instruction is a branch instruction
+/// then isBranch is true, else false. If the getOpInfo() function was set as
+/// part of the setupForSymbolicDisassembly() call then that function is called
+/// to get any symbolic information at the Address for this instruction. If
+/// that returns non-zero then the symbolic information it returns is used to
+/// create an MCExpr and that is added as an operand to the MCInst. If
+/// getOpInfo() returns zero and isBranch is true then a symbol look up for
+/// Value is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with Value is created. This function returns true if it adds an
+/// operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
+ bool isBranch, uint64_t InstSize,
+ MCInst &MI, const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+
+ if (!getOpInfo ||
+ !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ // Clear SymbolicOp.Value from above and also all other fields.
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (!SymbolLookUp)
+ return false;
+ uint64_t ReferenceType;
+ if (isBranch)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ else
+ ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ const char *ReferenceName;
+ uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
+ const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
+ Address, &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ }
+ // For branches always create an MCExpr so it gets printed as hex address.
+ else if (isBranch) {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ if (!Name && !isBranch)
+ return false;
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ llvm_unreachable("bad SymbolicOp.VariantKind");
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the Pc.
+/// These can often be values in a literal pool near the Address of the
+/// instruction. The Address of the instruction and its immediate Value are
+/// used as a possible literal pool entry. The SymbolLookUp call back will
+/// return the name of a symbol referenced by the literal pool's entry if
+/// the referenced address is that of a symbol. Or it will return a pointer to
+/// a literal 'C' string if the referenced address of the literal pool's entry
+/// is an address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
+ ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
+// Thumb1 instructions don't have explicit S bits. Rather, they
+// implicitly set CPSR. Since it's not represented in the encoding, the
+// auto-generated decoder won't inject the CPSR operand. We need to fix
+// that as a post-pass.
+static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
+ if (i > 0 && OpInfo[i-1].isPredicate()) continue;
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+ return;
+ }
+ }
+
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+}
+
+// Most Thumb instructions don't have explicit predicates in the
+// encoding, but rather get their predicates from IT context. We need
+// to fix up the predicate operands using this context information as a
+// post-pass.
+MCDisassembler::DecodeStatus
+ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ MCDisassembler::DecodeStatus S = Success;
+
+ // A few instructions actually have predicates encoded in them. Don't
+ // try to overwrite it if we're seeing one of those.
+ switch (MI.getOpcode()) {
+ case ARM::tBcc:
+ case ARM::t2Bcc:
+ case ARM::tCBZ:
+ case ARM::tCBNZ:
+ case ARM::tCPS:
+ case ARM::t2CPS3p:
+ case ARM::t2CPS2p:
+ case ARM::t2CPS1p:
+ case ARM::tMOVSr:
+ case ARM::tSETEND:
+ // Some instructions (mostly conditional branches) are not
+ // allowed in IT blocks.
+ if (ITBlock.instrInITBlock())
+ S = SoftFail;
+ else
+ return Success;
+ break;
+ case ARM::tB:
+ case ARM::t2B:
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Some instructions (mostly unconditional branches) can
+ // only appears at the end of, or outside of, an IT.
+ if (ITBlock.instrInITBlock() && !ITBlock.instrLastInITBlock())
+ S = SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ // If we're in an IT block, base the predicate on that. Otherwise,
+ // assume a predicate of AL.
+ unsigned CC;
+ CC = ITBlock.getITCC();
+ if (CC == 0xF)
+ CC = ARMCC::AL;
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isPredicate()) {
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+ return S;
+ }
+ }
+
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+
+ return S;
+}
+
+// Thumb VFP instructions are a special case. Because we share their
+// encodings between ARM and Thumb modes, and they are predicable in ARM
+// mode, the auto-generated decoder will give them an (incorrect)
+// predicate operand. We need to rewrite these operands based on the IT
+// context as a post-pass.
+void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+ unsigned CC;
+ CC = ITBlock.getITCC();
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ MCInst::iterator I = MI.begin();
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (OpInfo[i].isPredicate() ) {
+ I->setImm(CC);
+ ++I;
+ if (CC == ARMCC::AL)
+ I->setReg(0);
+ else
+ I->setReg(ARM::CPSR);
+ return;
+ }
+ }
+}
+
+DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert((STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
+
+ // We want to read exactly 2 bytes of data.
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint16_t insn16 = (bytes[1] << 8) | bytes[0];
+ DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
+ Address, this, STI);
+ if (result) {
+ Size = 2;
+ bool InITBlock = ITBlock.instrInITBlock();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableThumb216, MI, insn16,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+
+ // Nested IT blocks are UNPREDICTABLE. Must be checked before we add
+ // the Thumb predicate.
+ if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
+ result = MCDisassembler::SoftFail;
+
+ Check(result, AddThumbPredicate(MI));
+
+ // If we find an IT instruction, we need to parse its condition
+ // code and mask operands so that we can apply them correctly
+ // to the subsequent instructions.
+ if (MI.getOpcode() == ARM::t2IT) {
+
+ unsigned Firstcond = MI.getOperand(0).getImm();
+ unsigned Mask = MI.getOperand(1).getImm();
+ ITBlock.setITState(Firstcond, Mask);
+ }
+
+ return result;
+ }
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint32_t insn32 = (bytes[3] << 8) |
+ (bytes[2] << 0) |
+ (bytes[1] << 24) |
+ (bytes[0] << 16);
+ MI.clear();
+ result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ bool InITBlock = ITBlock.instrInITBlock();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
+ MI.clear();
+ uint32_t NEONLdStInsn = insn32;
+ NEONLdStInsn &= 0xF0FFFFFF;
+ NEONLdStInsn |= 0x04000000;
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
+ MI.clear();
+ uint32_t NEONDataInsn = insn32;
+ NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONDataInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+static const uint16_t GPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::SP, ARM::LR, ARM::PC
+};
+
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
+}
+
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = ARM::R0;
+ break;
+ case 1:
+ Register = ARM::R1;
+ break;
+ case 2:
+ Register = ARM::R2;
+ break;
+ case 3:
+ Register = ARM::R3;
+ break;
+ case 9:
+ Register = ARM::R9;
+ break;
+ case 12:
+ Register = ARM::R12;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const uint16_t SPRDecoderTable[] = {
+ ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15,
+ ARM::S16, ARM::S17, ARM::S18, ARM::S19,
+ ARM::S20, ARM::S21, ARM::S22, ARM::S23,
+ ARM::S24, ARM::S25, ARM::S26, ARM::S27,
+ ARM::S28, ARM::S29, ARM::S30, ARM::S31
+};
+
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPRDecoderTable[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15,
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31
+};
+
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const uint16_t QPRDecoderTable[] = {
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
+};
+
+
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+ RegNo >>= 1;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairDecoderTable[] = {
+ ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
+ ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
+ ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
+ ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
+ ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
+ ARM::Q15
+};
+
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairSpacedDecoderTable[] = {
+ ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
+ ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
+ ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
+ ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17,
+ ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21,
+ ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25,
+ ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29,
+ ARM::D28_D30, ARM::D29_D31
+};
+
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 29)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairSpacedDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xF) return MCDisassembler::Fail;
+ // AL predicate is not allowed on Thumb1 branches.
+ if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ if (Val == ARMCC::AL) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ } else
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val)
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ uint32_t imm = Val & 0xFF;
+ uint32_t rot = (Val & 0xF00) >> 7;
+ uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
+ Inst.addOperand(MCOperand::CreateImm(rot_imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+
+ // Register-immediate
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ if (Shift == ARM_AM::ror && imm == 0)
+ Shift = ARM_AM::rrx;
+
+ unsigned Op = Shift | (imm << 3);
+ Inst.addOperand(MCOperand::CreateImm(Op));
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction(Val, 8, 4);
+
+ // Register-register
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+
+ return S;
+}
+
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ bool writebackLoad = false;
+ unsigned writebackReg = 0;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ writebackLoad = true;
+ writebackReg = Inst.getOperand(0).getReg();
+ break;
+ }
+
+ // Empty register lists are not allowed.
+ if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Val & (1 << i)) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
+ }
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
+
+ regs = regs >> 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // This operand encodes a mask of contiguous zeros between a specified MSB
+ // and LSB. To decode it, we create the mask of all bits MSB-and-lower,
+ // the mask of all bits LSB-and-lower, and then xor them to create
+ // the mask of that's all ones on [msb, lsb]. Finally we not it to
+ // create the final mask.
+ unsigned msb = fieldFromInstruction(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction(Val, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (lsb > msb) {
+ Check(S, MCDisassembler::SoftFail);
+ // The check above will cause the warning for the "potentially undefined
+ // instruction encoding" but we can't build a bad MCOperand value here
+ // with a lsb > msb or else printing the MCInst will cause a crash.
+ lsb = msb;
+ }
+
+ uint32_t msb_mask = 0xFFFFFFFF;
+ if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
+ uint32_t lsb_mask = (1U << lsb) - 1;
+
+ Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask)));
+ return S;
+}
+
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_OPTION:
+ if (coproc == 0xA || coproc == 0xB)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(coproc));
+ Inst.addOperand(MCOperand::CreateImm(CRd));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STCL_PRE:
+ case ARM::LDC_OFFSET:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDCL_PRE:
+ case ARM::STC_OFFSET:
+ case ARM::STCL_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STCL_PRE:
+ imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDCL_POST:
+ case ARM::t2STC_POST:
+ case ARM::t2STCL_POST:
+ case ARM::LDC_POST:
+ case ARM::LDCL_POST:
+ case ARM::STC_POST:
+ case ARM::STCL_POST:
+ imm |= U << 8;
+ // fall through.
+ default:
+ // The 'option' variant doesn't encode 'U' in the immediate since
+ // the immediate is unsigned [0,255].
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction(Insn, 25, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRT_POST_REG:
+ case ARM::STRT_POST_IMM:
+ case ARM::STRBT_POST_REG:
+ case ARM::STRBT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRBT_POST_REG:
+ case ARM::LDRBT_POST_IMM:
+ case ARM::LDRT_POST_REG:
+ case ARM::LDRT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::AddrOpc Op = ARM_AM::add;
+ if (!fieldFromInstruction(Insn, 23, 1))
+ Op = ARM_AM::sub;
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail; // UNPREDICTABLE
+
+ if (reg) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
+ switch( fieldFromInstruction(Insn, 5, 2)) {
+ case 0:
+ Opc = ARM_AM::lsl;
+ break;
+ case 1:
+ Opc = ARM_AM::lsr;
+ break;
+ case 2:
+ Opc = ARM_AM::asr;
+ break;
+ case 3:
+ Opc = ARM_AM::ror;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ unsigned amt = fieldFromInstruction(Insn, 7, 5);
+ if (Opc == ARM_AM::ror && amt == 0)
+ Opc = ARM_AM::rrx;
+ unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ } else {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
+ Inst.addOperand(MCOperand::CreateImm(tmp));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+ unsigned U = fieldFromInstruction(Val, 12, 1);
+
+ ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ ShOp = ARM_AM::lsl;
+ break;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ break;
+ case 2:
+ ShOp = ARM_AM::asr;
+ break;
+ case 3:
+ ShOp = ARM_AM::ror;
+ break;
+ }
+
+ if (ShOp == ARM_AM::ror && imm == 0)
+ ShOp = ARM_AM::rrx;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned shift;
+ if (U)
+ shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
+ else
+ shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
+ Inst.addOperand(MCOperand::CreateImm(shift));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned type = fieldFromInstruction(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned Rt2 = Rt + 1;
+
+ bool writeback = (W == 1) | (P == 0);
+
+ // For {LD,ST}RD, Rt must be even, else undefined.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (Rt & 0x1) S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+
+ if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && fieldFromInstruction(Insn, 8, 4))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (type && Rn == 15){
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && Rn == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (type && (Rt == 15 || (writeback && Rn == Rt)))
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt == 15 || Rm == 15))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) { // Writeback
+ if (P)
+ U |= ARMII::IndexModePre << 9;
+ else
+ U |= ARMII::IndexModePost << 9;
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) {
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ case ARM::LDRHTr:
+ case ARM::LDRSBTr:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (type) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm));
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(U));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction(Insn, 23, 2);
+
+ switch (mode) {
+ case 0:
+ mode = ARM_AM::da;
+ break;
+ case 1:
+ mode = ARM_AM::ia;
+ break;
+ case 2:
+ mode = ARM_AM::db;
+ break;
+ case 3:
+ mode = ARM_AM::ib;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction(Insn, 0, 16);
+
+ if (pred == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMDA:
+ Inst.setOpcode(ARM::RFEDA);
+ break;
+ case ARM::LDMDA_UPD:
+ Inst.setOpcode(ARM::RFEDA_UPD);
+ break;
+ case ARM::LDMDB:
+ Inst.setOpcode(ARM::RFEDB);
+ break;
+ case ARM::LDMDB_UPD:
+ Inst.setOpcode(ARM::RFEDB_UPD);
+ break;
+ case ARM::LDMIA:
+ Inst.setOpcode(ARM::RFEIA);
+ break;
+ case ARM::LDMIA_UPD:
+ Inst.setOpcode(ARM::RFEIA_UPD);
+ break;
+ case ARM::LDMIB:
+ Inst.setOpcode(ARM::RFEIB);
+ break;
+ case ARM::LDMIB_UPD:
+ Inst.setOpcode(ARM::RFEIB_UPD);
+ break;
+ case ARM::STMDA:
+ Inst.setOpcode(ARM::SRSDA);
+ break;
+ case ARM::STMDA_UPD:
+ Inst.setOpcode(ARM::SRSDA_UPD);
+ break;
+ case ARM::STMDB:
+ Inst.setOpcode(ARM::SRSDB);
+ break;
+ case ARM::STMDB_UPD:
+ Inst.setOpcode(ARM::SRSDB_UPD);
+ break;
+ case ARM::STMIA:
+ Inst.setOpcode(ARM::SRSIA);
+ break;
+ case ARM::STMIA_UPD:
+ Inst.setOpcode(ARM::SRSIA_UPD);
+ break;
+ case ARM::STMIB:
+ Inst.setOpcode(ARM::SRSIB);
+ break;
+ case ARM::STMIB_UPD:
+ Inst.setOpcode(ARM::SRSIB_UPD);
+ break;
+ default:
+ if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ }
+
+ // For stores (which become SRS's, the only operand is the mode.
+ if (fieldFromInstruction(Insn, 20, 1) == 0) {
+ Inst.addOperand(
+ MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
+ return S;
+ }
+
+ return DecodeRFEInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail; // Tied
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 18, 2);
+ unsigned M = fieldFromInstruction(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 9, 2);
+ unsigned M = fieldFromInstruction(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::t2CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::t2CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
+
+ if (Inst.getOpcode() == ARM::t2MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+
+ if (Inst.getOpcode() == ARM::MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned add = fieldFromInstruction(Val, 12, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!add) imm *= -1;
+ if (imm == 0 && !add) imm = INT32_MIN;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ if (Rn == 15)
+ tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned U = fieldFromInstruction(Val, 8, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus Status = MCDisassembler::Success;
+
+ // Note the J1 and J2 values are from the encoded instruction. So here
+ // change them to I1 and I2 values via as documented:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = fieldFromInstruction(Insn, 26, 1);
+ unsigned J1 = fieldFromInstruction(Insn, 13, 1);
+ unsigned J2 = fieldFromInstruction(Insn, 11, 1);
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned imm10 = fieldFromInstruction(Insn, 16, 10);
+ unsigned imm11 = fieldFromInstruction(Insn, 0, 11);
+ unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11;
+ int imm32 = SignExtend32<24>(tmp << 1);
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm32));
+
+ return Status;
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
+
+ if (pred == 0xF) {
+ Inst.setOpcode(ARM::BLXi);
+ imm |= fieldFromInstruction(Insn, 24, 1) << 1;
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ return S;
+ }
+
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned align = fieldFromInstruction(Val, 4, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!align)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else
+ Inst.addOperand(MCOperand::CreateImm(4 << align));
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ // First output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // Second output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
+ // Third output register
+ switch(Inst.getOpcode()) {
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Writeback operand
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d8Twb_register:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d16Twb_register:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d32Twb_register:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d8Qwb_register:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d16Qwb_register:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d32Qwb_register:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ switch (Inst.getOpcode()) {
+ default:
+ // The below have been updated to have explicit am6offset split
+ // between fixed and register offset. For those instructions not
+ // yet updated, we need to add an additional reg0 operand for the
+ // fixed variant.
+ //
+ // The fixed offset encodes as Rm == 0xd, so we check for that.
+ if (Rm == 0xd) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ break;
+ }
+ // Fall through to handle the register offset variant.
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ // Writeback Operand
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1d8wb_register:
+ case ARM::VST1d16wb_register:
+ case ARM::VST1d32wb_register:
+ case ARM::VST1d64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
+ if (Rm == 0xF)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ switch (Inst.getOpcode()) {
+ default:
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ break;
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ break;
+ }
+
+
+ // First input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // Second input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Third input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+
+ if (size == 0 && align == 1)
+ return MCDisassembler::Fail;
+ align *= (1 << size);
+
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
+ align *= 2*size;
+
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
+
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm != 0xD && Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+
+ if (size == 0x3) {
+ if (align == 0)
+ return MCDisassembler::Fail;
+ size = 4;
+ align = 16;
+ } else {
+ if (size == 2) {
+ size = 1 << size;
+ align *= 8;
+ } else {
+ size = 1 << size;
+ align *= 4*size;
+ }
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction(Insn, 6, 1);
+
+ if (Q) {
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ switch (Inst.getOpcode()) {
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv2i32:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv2i32:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VORRiv8i16:
+ case ARM::VORRiv4i32:
+ case ARM::VBICiv8i16:
+ case ARM::VBICiv4i32:
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 18, 2);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(8 << size));
+
+ return S;
+}
+
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction(Insn, 6, 1);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (op) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail; // Writeback
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::VTBL2:
+ case ARM::VTBX2:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned dst = fieldFromInstruction(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch(Inst.getOpcode()) {
+ default:
+ return MCDisassembler::Fail;
+ case ARM::tADR:
+ break; // tADR does not explicitly represent the PC as an operand.
+ case ARM::tADDrSPi:
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ return S;
+}
+
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction(Val, 3, 3);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned imm = fieldFromInstruction(Val, 3, 5);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = Val << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction(Val, 2, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDs:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIs:
+ break;
+ default: {
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ }
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ if (Rn == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDi12);
+ Inst.addOperand(MCOperand::CreateReg(ARM::PC));
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ int imm = fieldFromInstruction(Insn, 0, 12);
+ if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+ }
+
+ unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+ addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0)
+ Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm * 4));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x100))
+ imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
+
+ // Some instructions always use an additive offset.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ case ARM::t2LDRBT:
+ case ARM::t2LDRHT:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSHT:
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ imm |= 0x100;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ addr |= fieldFromInstruction(Insn, 9, 1) << 8;
+ addr |= Rn << 9;
+ unsigned load = fieldFromInstruction(Insn, 20, 1);
+
+ if (!load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = fieldFromInstruction(Insn, 0, 7);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::tADDrSP) {
+ unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+ Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else if (Inst.getOpcode() == ARM::tADDspr) {
+ unsigned Rm = fieldFromInstruction(Insn, 3, 4);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction(Insn, 0, 3);
+
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(flags));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned add = fieldFromInstruction(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(add));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // Val is passed in as S:J1:J2:imm10H:imm10L:'0'
+ // Note only one trailing zero not two. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with two trailing zeros as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:'00', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address,
+ (Address & ~2u) + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm32));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xA || Val == 0xB)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus
+DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 22, 4);
+ if (pred == 0xE || pred == 0xF) {
+ unsigned opc = fieldFromInstruction(Insn, 4, 28);
+ switch (opc) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xf3bf8f4:
+ Inst.setOpcode(ARM::t2DSB);
+ break;
+ case 0xf3bf8f5:
+ Inst.setOpcode(ARM::t2DMB);
+ break;
+ case 0xf3bf8f6:
+ Inst.setOpcode(ARM::t2ISB);
+ break;
+ }
+
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
+ }
+
+ unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
+
+ if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+// Decode a shifted immediate operand. These basically consist
+// of an 8-bit value, and a 4-bit directive that specifies either
+// a splat operation or a rotation.
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned ctrl = fieldFromInstruction(Val, 10, 2);
+ if (ctrl == 0) {
+ unsigned byte = fieldFromInstruction(Val, 8, 2);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+ switch (byte) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8)));
+ break;
+ case 3:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) |
+ (imm << 8) | imm));
+ break;
+ }
+ } else {
+ unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction(Val, 7, 5);
+ unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<9>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ // Val is passed in as S:J1:J2:imm10:imm11
+ // Note no trailing zero after imm11. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm32));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val & ~0xf)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!Val) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder){
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+ if (Rm == 0xF) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0 :
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned pred = fieldFromInstruction(Insn, 4, 4);
+ unsigned mask = fieldFromInstruction(Insn, 0, 4);
+
+ if (pred == 0xF) {
+ pred = 0xE;
+ S = MCDisassembler::SoftFail;
+ }
+
+ if (mask == 0x0) {
+ mask |= 0x8;
+ S = MCDisassembler::SoftFail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateImm(mask));
+ return S;
+}
+
+static DecodeStatus
+DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+ if (Rt == Rt2)
+ Check(S, MCDisassembler::SoftFail);
+
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus
+DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
+ if (sign1 != sign2) return MCDisassembler::Fail;
+
+ unsigned Val = fieldFromInstruction(Insn, 0, 8);
+ Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction(Insn, 26, 1) << 11;
+ Val |= sign1 << 12;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Shift of "asr #32" is not allowed in Thumb2 mode.
+ if (Val == 0x20) S = MCDisassembler::SoftFail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Rt == Rn || Rn == Rt2)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv2f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv2f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv4f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv4f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction(Val, 28, 4);
+
+ if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned CRm = fieldFromInstruction(Val, 0, 4);
+ unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+ unsigned cop = fieldFromInstruction(Val, 8, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
+
+ if ((cop & ~0x1) == 0xa)
+ return MCDisassembler::Fail;
+
+ if (Rt == Rt2)
+ S = MCDisassembler::SoftFail;
+
+ Inst.addOperand(MCOperand::CreateImm(cop));
+ Inst.addOperand(MCOperand::CreateImm(opc1));
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(CRm));
+
+ return S;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 000000000000..2afb20d6686a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,1449 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "ARMGenAsmWriter.inc"
+
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+ // lsr #32 and asr #32 exist, but should be encoded as a 0.
+ assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
+
+ if (imm == 0)
+ return 32;
+ return imm;
+}
+
+/// Prints the shift value with an immediate value.
+static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
+ unsigned ShImm, bool UseMarkup) {
+ if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
+ return;
+ O << ", ";
+
+ assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+ O << getShiftOpcStr(ShOpc);
+
+ if (ShOpc != ARM_AM::rrx) {
+ O << " ";
+ if (UseMarkup)
+ O << "<imm:";
+ O << "#" << translateShiftImm(ShImm);
+ if (UseMarkup)
+ O << ">";
+ }
+}
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << markup("<reg:")
+ << getRegisterName(RegNo)
+ << markup(">");
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ unsigned Opcode = MI->getOpcode();
+
+ // Check for HINT instructions w/ canonical names.
+ if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ switch (MI->getOperand(0).getImm()) {
+ case 0: O << "\tnop"; break;
+ case 1: O << "\tyield"; break;
+ case 2: O << "\twfe"; break;
+ case 3: O << "\twfi"; break;
+ case 4: O << "\tsev"; break;
+ default:
+ // Anything else should just print normally.
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ printPredicateOperand(MI, 1, O);
+ if (Opcode == ARM::t2HINT)
+ O << ".w";
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Check for MOVs and print canonical forms, instead.
+ if (Opcode == ARM::MOVsr) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6, O);
+ printPredicateOperand(MI, 4, O);
+
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
+
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::MOVsi) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+ printSBitModifierOperand(MI, 5, O);
+ printPredicateOperand(MI, 3, O);
+
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ O << ", "
+ << markup("<imm:")
+ << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
+ << markup(">");
+ printAnnotation(O, Annot);
+ return;
+ }
+
+
+ // A8.6.123 PUSH
+ if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // A8.6.122 POP
+ if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
+
+ // A8.6.355 VPUSH
+ if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // A8.6.354 VPOP
+ if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::tLDMIA) {
+ bool Writeback = true;
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg)
+ Writeback = false;
+ }
+
+ O << "\tldm";
+
+ printPredicateOperand(MI, 1, O);
+ O << '\t';
+ printRegName(O, BaseReg);
+ if (Writeback) O << "!";
+ O << ", ";
+ printRegisterList(MI, 3, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Thumb1 NOP
+ if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 &&
+ MI->getOperand(1).getReg() == ARM::R8) {
+ O << "\tnop";
+ printPredicateOperand(MI, 2, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when decoding them, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ bool isStore = Opcode == ARM::STREXD;
+ unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+ if (MRC.contains(Reg)) {
+ MCInst NewMI;
+ MCOperand NewReg;
+ NewMI.setOpcode(Opcode);
+
+ if (isStore)
+ NewMI.addOperand(MI->getOperand(0));
+ NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
+ &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewMI.addOperand(NewReg);
+
+ // Copy the rest operands into NewMI.
+ for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ NewMI.addOperand(MI->getOperand(i));
+ printInstruction(&NewMI, O);
+ return;
+ }
+ }
+
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ printRegName(O, Reg);
+ } else if (Op.isImm()) {
+ O << markup("<imm:")
+ << '#' << formatImm(Op.getImm())
+ << markup(">");
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex. And only print 32 unsigned bits for the address.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex((uint32_t)Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ if (MO1.isExpr())
+ O << *MO1.getExpr();
+ else if (MO1.isImm()) {
+ O << markup("<mem:") << "[pc, "
+ << markup("<imm:") << "#" << formatImm(MO1.getImm())
+ << markup(">]>", "]");
+ }
+ else
+ llvm_unreachable("Unknown LDR label operand?");
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ printRegName(O, MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+
+ O << ' ';
+ printRegName(O, MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ printRegName(O, MO1.getReg());
+
+ // Print the shift opc.
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
+ O << ", "
+ << markup("<imm:")
+ << "#"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm())
+ << markup(">");
+ }
+ O << "]" << markup(">");
+ return;
+ }
+
+ O << ", ";
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
+ ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+#ifndef NDEBUG
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+ assert(IdxMode != ARMII::IndexModePost &&
+ "Should be pre or offset index op");
+#endif
+
+ printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs
+ << markup(">");
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
+
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
+ ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "], " << markup(">");
+
+ if (MO2.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO3.getImm());
+ printRegName(O, MO2.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ O << markup("<imm:")
+ << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs
+ << markup(">");
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << markup("<mem:") << '[';
+ printRegName(O, MO1.getReg());
+
+ if (MO2.getReg()) {
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+ O << ']' << markup(">");
+ return;
+ }
+
+ //If the op is sub we have to print the immediate even if it is 0
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (ImmOffs || (op == ARM_AM::sub)) {
+ O << ", "
+ << markup("<imm:")
+ << "#"
+ << ARM_AM::getAddrOpcStr(op)
+ << ImmOffs
+ << markup(">");
+ }
+ O << ']' << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM3PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (MO1.getReg()) {
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << (MO2.getImm() ? "" : "-");
+ printRegName(O, MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ << markup(">");
+}
+
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+ .getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ if (ImmOffs || Op == ARM_AM::sub) {
+ O << ", "
+ << markup("<imm:")
+ << "#"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ImmOffs * 4
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ":" << (MO2.getImm() << 3);
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else {
+ O << ", ";
+ printRegName(O, MO.getReg());
+ }
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint32_t v = ~MO.getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+ O << markup("<imm:") << '#' << lsb << markup(">")
+ << ", "
+ << markup("<imm:") << '#' << width << markup(">");
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ bool isASR = (ShiftOp & (1 << 5)) != 0;
+ unsigned Amt = ShiftOp & 0x1f;
+ if (isASR) {
+ O << ", asr "
+ << markup("<imm:")
+ << "#" << (Amt == 0 ? 32 : Amt)
+ << markup(">");
+ }
+ else if (Amt) {
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << Amt
+ << markup(">");
+ }
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+ O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ // A shift amount of 32 is encoded as 0.
+ if (Imm == 0)
+ Imm = 32;
+ assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+ O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum) O << ", ";
+ printRegName(O, MI->getOperand(i).getReg());
+ }
+ O << "}";
+}
+
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+ O << ", ";
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
+
+ if (IFlags == 0)
+ O << "none";
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+
+ if (getAvailableFeatures() & ARM::FeatureMClass) {
+ unsigned SYSm = Op.getImm();
+ unsigned Opcode = MI->getOpcode();
+ // For reads of the special registers ignore the "mask encoding" bits
+ // which are only for writes.
+ if (Opcode == ARM::t2MRS_M)
+ SYSm &= 0xff;
+ switch (SYSm) {
+ default: llvm_unreachable("Unexpected mask value!");
+ case 0:
+ case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
+ case 0x400: O << "apsr_g"; return;
+ case 0xc00: O << "apsr_nzcvqg"; return;
+ case 1:
+ case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
+ case 0x401: O << "iapsr_g"; return;
+ case 0xc01: O << "iapsr_nzcvqg"; return;
+ case 2:
+ case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
+ case 0x402: O << "eapsr_g"; return;
+ case 0xc02: O << "eapsr_nzcvqg"; return;
+ case 3:
+ case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
+ case 0x403: O << "xpsr_g"; return;
+ case 0xc03: O << "xpsr_nzcvqg"; return;
+ case 5:
+ case 0x805: O << "ipsr"; return;
+ case 6:
+ case 0x806: O << "epsr"; return;
+ case 7:
+ case 0x807: O << "iepsr"; return;
+ case 8:
+ case 0x808: O << "msp"; return;
+ case 9:
+ case 0x809: O << "psp"; return;
+ case 0x10:
+ case 0x810: O << "primask"; return;
+ case 0x11:
+ case 0x811: O << "basepri"; return;
+ case 0x12:
+ case 0x812: O << "basepri_max"; return;
+ case 0x13:
+ case 0x813: O << "faultmask"; return;
+ case 0x14:
+ case 0x814: O << "control"; return;
+ }
+ }
+
+ // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+ // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+ if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+ O << "APSR_";
+ switch (Mask) {
+ default: llvm_unreachable("Unexpected mask value!");
+ case 4: O << "g"; return;
+ case 8: O << "nzcvq"; return;
+ case 12: O << "nzcvqg"; return;
+ }
+ }
+
+ if (SpecRegRBit)
+ O << "SPSR";
+ else
+ O << "CPSR";
+
+ if (Mask) {
+ O << '_';
+ if (Mask & 8) O << 'f';
+ if (Mask & 4) O << 's';
+ if (Mask & 2) O << 'x';
+ if (Mask & 1) O << 'c';
+ }
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ // Handle the undefined 15 CC value here for printing so we don't abort().
+ if ((unsigned)CC == 15)
+ O << "<und>";
+ else if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNum).getReg()) {
+ assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+ "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm();
+
+ O << markup("<imm:");
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+ O << markup(">");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << markup("<imm:")
+ << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+ << markup(">");
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << formatImm((Imm == 0 ? 32 : Imm))
+ << markup(">");
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned RegNum = MO2.getReg()) {
+ O << ", ";
+ printRegName(O, RegNum);
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << formatImm(ImmOffs * Scale)
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ printRegName(O, Reg);
+
+ // Print the shift opc.
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
+ }
+ else if (OffImm > 0) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ // Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << formatImm(MO2.getImm() * 4)
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ O << ", " << markup("<imm:");
+ if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+ O << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ // Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", ";
+ printRegName(O, MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << ShAmt
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << markup("<imm:")
+ << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ << markup(">");
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << markup("<imm:")
+ << "#0x";
+ O.write_hex(Val);
+ O << markup(">");
+}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << formatImm(Imm + 1)
+ << markup(">");
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ O << ", ror "
+ << markup("<imm:")
+ << "#";
+ switch (Imm) {
+ default: assert (0 && "illegal ror immediate!");
+ case 1: O << "8"; break;
+ case 2: O << "16"; break;
+ case 3: O << "24"; break;
+ }
+ O << markup(">");
+}
+
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << markup("<imm:")
+ << "#" << 16 - MI->getOperand(OpNum).getImm()
+ << markup(">");
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << markup("<imm:")
+ << "#" << 32 - MI->getOperand(OpNum).getImm()
+ << markup(">");
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "}";
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 000000000000..edff75d886e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,163 @@
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/LICENSE.TXT b/contrib/llvm/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 000000000000..68afea12ed44
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants to you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable copyright license to reproduce, prepare derivative
+ works of, publicly display, publicly perform, sublicense, and distribute the
+ Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable (except as stated in this section) patent license
+ to make, have made, use, offer to sell, sell, import, and otherwise transfer
+ the Work, where such license applies only to those patent claims licensable
+ by ARM that are necessarily infringed by ARM's Software alone or by
+ combination of the Software with the Work to which such Software was
+ submitted. If any entity institutes patent litigation against ARM or any
+ other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+ that ARM's Software, or the Work to which ARM has contributed constitutes
+ direct or contributory patent infringement, then any patent licenses granted
+ to that entity under this Agreement for the Software or Work shall terminate
+ as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
new file mode 100644
index 000000000000..62473b2bfdee
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -0,0 +1,668 @@
+//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ sub = 0,
+ add
+ };
+
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should ignore the low 6 bits, then
+ // retry the hunt.
+ if (Imm & 63U) {
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned RotAmt2 = TZ2 & ~1;
+ if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+ // 16-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~65535U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImm16ShiftedVal - Return true if the specified value can be
+ /// obtained by left shifting a 16-bit immediate.
+ static inline bool isThumbImm16ShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~65535U << getThumbImm16ValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+
+ /// getT2SOImmValSplat - Return the 12-bit encoded representation
+ /// if the specified value can be obtained by splatting the low 8 bits
+ /// into every other byte or every byte of a 32-bit value. i.e.,
+ /// 00000000 00000000 00000000 abcdefgh control = 0
+ /// 00000000 abcdefgh 00000000 abcdefgh control = 1
+ /// abcdefgh 00000000 abcdefgh 00000000 control = 2
+ /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
+ /// Return -1 if none of the above apply.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValSplatVal(unsigned V) {
+ unsigned u, Vs, Imm;
+ // control = 0
+ if ((V & 0xffffff00) == 0)
+ return V;
+
+ // If the value is zeroes in the first byte, just shift those off
+ Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+ // Any passing value only has 8 bits of payload, splatted across the word
+ Imm = Vs & 0xff;
+ // Likewise, any passing values have the payload splatted into the 3rd byte
+ u = Imm | (Imm << 16);
+
+ // control = 1 or 2
+ if (Vs == u)
+ return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+ // control = 3
+ if (Vs == (u | (u << 8)))
+ return (3 << 8) | Imm;
+
+ return -1;
+ }
+
+ /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+ /// specified value is a rotated 8-bit value. Return -1 if no rotation
+ /// encoding is possible.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValRotateVal(unsigned V) {
+ unsigned RotAmt = CountLeadingZeros_32(V);
+ if (RotAmt >= 24)
+ return -1;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ if ((rotr32(0xff000000U, RotAmt) & V) == V)
+ return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+ return -1;
+ }
+
+ /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+ /// encoding for it. If not, return -1.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmVal(unsigned Arg) {
+ // If 'Arg' is an 8-bit splat, then get the encoded value.
+ int Splat = getT2SOImmValSplatVal(Arg);
+ if (Splat != -1)
+ return Splat;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ int Rot = getT2SOImmValRotateVal(Arg);
+ if (Rot != -1)
+ return Rot;
+
+ return -1;
+ }
+
+ static inline unsigned getT2SOImmValRotate(unsigned V) {
+ if ((V & ~255U) == 0) return 0;
+ // Use CTZ to compute the rotate amount.
+ unsigned RotAmt = CountTrailingZeros_32(V);
+ return (32 - RotAmt) & 31;
+ }
+
+ static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+ unsigned V = Imm;
+ // Passing values can be any combination of splat values and shifter
+ // values. If this can be handled with a single shifter or splat, bail
+ // out. Those should be handled directly, not with a two-part val.
+ if (getT2SOImmValSplatVal(V) != -1)
+ return false;
+ V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Likewise, try masking out a splat value first.
+ V = Imm;
+ if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+ V &= ~0xff00ff00U;
+ else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+ V &= ~0x00ff00ffU;
+ // If what's left can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Otherwise, do not accept.
+ return false;
+ }
+
+ static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+ assert (isT2SOImmTwoPartVal(Imm) &&
+ "Immedate cannot be encoded as two part immediate!");
+ // Try a shifter operand as one part
+ unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ // If the rest is encodable as an immediate, then return it.
+ if (getT2SOImmVal(V) != -1) return V;
+
+ // Try masking out a splat value first.
+ if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+ return Imm & 0xff00ff00U;
+
+ // The other splat is all that's left as an option.
+ assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+ return Imm & 0x00ff00ffU;
+ }
+
+ static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+ // Mask out the first hunk
+ Imm ^= getT2SOImmTwoPartFirst(Imm);
+ // Return what's left
+ assert (getT2SOImmVal(Imm) != -1 &&
+ "Unable to encode second part of T2 two part SO immediate");
+ return Imm;
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+ // fourth operand 16-17 encodes the index mode.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+ unsigned IdxMode = 0) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)((AM2Opc >> 13) & 7);
+ }
+ static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+ return (AM2Opc >> 16);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+ // index mode.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+ unsigned IdxMode = 0) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset | (IdxMode << 9);
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+ static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+ return (AM3Opc >> 9);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #6
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for NEON load / store instructions.
+ //
+ // addrmode6 := reg with optional alignment
+ //
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ llvm_unreachable("Unsupported NEON immediate");
+ }
+ return Val;
+ }
+
+ AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+ //===--------------------------------------------------------------------===//
+ // Floating-point Immediates
+ //
+ static inline float getFPImmFloat(unsigned Imm) {
+ // We expect an 8-bit binary encoding of a floating-point number here.
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ uint8_t Sign = (Imm >> 7) & 0x1;
+ uint8_t Exp = (Imm >> 4) & 0x7;
+ uint8_t Mantissa = Imm & 0xf;
+
+ // 8-bit FP iEEEE Float Encoding
+ // abcd efgh aBbbbbbc defgh000 00000000 00000000
+ //
+ // where B = NOT(b);
+
+ FPUnion.I = 0;
+ FPUnion.I |= Sign << 31;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ FPUnion.I |= (Exp & 0x3) << 23;
+ FPUnion.I |= Mantissa << 19;
+ return FPUnion.F;
+ }
+
+ /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP32Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP32Imm(const APFloat &FPImm) {
+ return getFP32Imm(FPImm.bitcastToAPInt());
+ }
+
+ /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP64Imm(const APInt &Imm) {
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffULL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP64Imm(const APFloat &FPImm) {
+ return getFP64Imm(FPImm.bitcastToAPInt());
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
new file mode 100644
index 000000000000..e66e98567873
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -0,0 +1,688 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
+ bool isThumbMode; // Currently emitting Thumb code.
+public:
+ ARMAsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")) {}
+
+ ~ARMAsmBackend() {
+ delete STI;
+ }
+
+ unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+ bool hasNOP() const {
+ return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16", 0, 20, 0 },
+{ "fixup_arm_movw_lo16", 0, 20, 0 },
+{ "fixup_t2_movt_hi16", 0, 20, 0 },
+{ "fixup_t2_movw_lo16", 0, 20, 0 },
+{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved);
+
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ void handleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+ }
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ case ARM::tLDRpciASM: return ARM::t2LDRpci;
+ case ARM::tADR: return ARM::t2ADR;
+ case ARM::tB: return ARM::t2B;
+ }
+}
+
+bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+ return false;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 2046 || Offset < -2048;
+ }
+ case ARM::fixup_arm_thumb_bcc: {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_cp: {
+ // If the immediate is negative, greater than 1020, or not a multiple
+ // of four, the wide version of the instruction must be used.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 1020 || Offset < 0 || Offset & 3;
+ }
+ }
+ llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+}
+
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
+}
+
+bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
+ const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
+ const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
+ if (isThumb()) {
+ const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
+ : Thumb1_16bitNopEncoding;
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write16(nopEncoding);
+ if (Count & 1)
+ OW->Write8(0);
+ return true;
+ }
+ // ARM mode
+ const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
+ : ARMv4_NopEncoding;
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(nopEncoding);
+ // FIXME: should this function return false when unable to write exactly
+ // 'Count' bytes with NOP encodings?
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ }
+
+ return true;
+}
+
+static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ MCContext *Ctx = NULL) {
+ unsigned Kind = Fixup.getKind();
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like
+ // the other hi16 fixup?
+ case ARM::fixup_t2_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ if (Ctx && Value >= 4096)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 15;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ uint32_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 4) >> 1;
+ uint32_t signBit = (offset & 0x800000) >> 23;
+ uint32_t I1Bit = (offset & 0x400000) >> 22;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x200000) >> 21;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
+ uint32_t imm11Bits = (offset & 0x000007FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ (uint16_t)imm11Bits);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
+
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 2) >> 2;
+ uint32_t signBit = (offset & 0x400000) >> 22;
+ uint32_t I1Bit = (offset & 0x200000) >> 21;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x100000) >> 20;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
+ uint32_t imm10LBits = (offset & 0x3FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ ((uint16_t)imm10LBits) << 1);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_cp:
+ // Offset by 4, and don't encode the low two bits. Two bytes of that
+ // 'off by 4' is implicitly handled by the half-word ordering of the
+ // Thumb encoding, so we only need to adjust by 2 here.
+ return ((Value - 2) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10_unscaled: {
+ Value = Value - 8; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ Value = (Value & 0xf) | ((Value & 0xf0) << 4);
+ return Value | (isAdd << 23);
+ }
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
+ // swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10) {
+ uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ }
+}
+
+void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (A) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Asm.isThumbFunc(&Sym))
+ Value |= 1;
+ }
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+
+ // Try to get the encoded value for the fixup as-if we're mapping it into
+ // the instruction. This allows adjustFixupValue() to issue a diagnostic
+ // if the value aren't invalid.
+ (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
+}
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_pcrel_10_unscaled:
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ return 4;
+ }
+}
+
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value = adjustFixupValue(Fixup, Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMELFObjectWriter(OS, OSABI);
+ }
+};
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ object::mach::CPUSubtypeARM CS =
+ StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
+ .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
+ .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
+ .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
+ .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
+ .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
+ .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
+ .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
+ .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
+ .Default(object::mach::CSARM_V7);
+
+ return new DarwinARMAsmBackend(T, TT, CS);
+ }
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on ARM");
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, OSABI);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
new file mode 100644
index 000000000000..de48a0e0f30a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -0,0 +1,421 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+ llvm_unreachable("Unknown condition code");
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ OSHST = 2,
+ OSH = 3,
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ ST = 14,
+ SY = 15
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_12: return "#0xc";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_8: return "#0x8";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_4: return "#0x4";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_0: return "#0x0";
+ }
+ }
+} // namespace ARM_MB
+
+/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+
+ /// ARM Index Modes
+ enum IndexMode {
+ IndexModeNone = 0,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3
+ };
+
+ /// ARM Addressing Modes
+ enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16
+ };
+
+ inline static const char *AddrModeToString(AddrMode addrmode) {
+ switch (addrmode) {
+ case AddrModeNone: return "AddrModeNone";
+ case AddrMode1: return "AddrMode1";
+ case AddrMode2: return "AddrMode2";
+ case AddrMode3: return "AddrMode3";
+ case AddrMode4: return "AddrMode4";
+ case AddrMode5: return "AddrMode5";
+ case AddrMode6: return "AddrMode6";
+ case AddrModeT1_1: return "AddrModeT1_1";
+ case AddrModeT1_2: return "AddrModeT1_2";
+ case AddrModeT1_4: return "AddrModeT1_4";
+ case AddrModeT1_s: return "AddrModeT1_s";
+ case AddrModeT2_i12: return "AddrModeT2_i12";
+ case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_so: return "AddrModeT2_so";
+ case AddrModeT2_pc: return "AddrModeT2_pc";
+ case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+ case AddrMode_i12: return "AddrMode_i12";
+ }
+ }
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16,
+
+ /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY,
+
+ /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+ MO_HI16_NONLAZY,
+
+ /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY_PIC,
+
+ /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movt instruction.
+ MO_HI16_NONLAZY_PIC,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb
+ // instruction. Used by the parser to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
+ ThumbArithFlagSetting = 1 << 18,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
new file mode 100644
index 000000000000..f98bbd204c7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -0,0 +1,289 @@
+//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+ enum { DefaultEABIVersion = 0x05000000U };
+ unsigned GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+
+ public:
+ ARMELFObjectWriter(uint8_t OSABI);
+
+ virtual ~ARMELFObjectWriter();
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ };
+}
+
+ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
+ ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+
+ARMELFObjectWriter::~ARMELFObjectWriter() {}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+ bool EmitThisSym = false;
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol.getSection());
+ bool InNormalSection = true;
+ unsigned RelocType = 0;
+ RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+ DEBUG(
+ const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Kind2;
+ Kind2 = Target.getSymB() ? Target.getSymB()->getKind() :
+ MCSymbolRefExpr::VK_None;
+ dbgs() << "considering symbol "
+ << Section.getSectionName() << "/"
+ << Symbol.getName() << "/"
+ << " Rel:" << (unsigned)RelocType
+ << " Kind: " << (int)Kind << "/" << (int)Kind2
+ << " Tmp:"
+ << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+ << Symbol.isVariable() << "/" << Symbol.isTemporary()
+ << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+ if (IsPCRel) { ++PCRelCount;
+ switch (RelocType) {
+ default:
+ // Most relocation types are emitted as explicit symbols
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = true;
+ break;
+ case ELF::R_ARM_ABS32:
+ // But things get strange with R_ARM_ABS32
+ // In this case, most things that go in .rodata show up
+ // as section relative relocations
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".rodata", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = false;
+ break;
+ }
+ } else {
+ NonPCRelCount++;
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".rodata", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+
+ switch (RelocType) {
+ default: EmitThisSym = true; break;
+ case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ case ELF::R_ARM_PREL31: EmitThisSym = false; break;
+ }
+ }
+
+ if (EmitThisSym)
+ return &Symbol;
+ if (! Symbol.isTemporary() && InNormalSection) {
+ return &Symbol;
+ }
+ return NULL;
+}
+
+// Need to examine the Fixup when determining whether to
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return GetRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("Unimplemented");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_REL32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ llvm_unreachable("unimplemented");
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_uncondbl:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_PLT32;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ Type = ELF::R_ARM_THM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ Type = ELF::R_ARM_THM_CALL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_NONE:
+ Type = ELF::R_ARM_NONE;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET1:
+ Type = ELF::R_ARM_TARGET1;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET2:
+ Type = ELF::R_ARM_TARGET2;
+ break;
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ llvm_unreachable("Unimplemented");
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 000000000000..418971df3292
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,418 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOp.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
+ FnStart(0), Personality(0), CantUnwind(false) {}
+
+ ~ARMELFStreamer() {}
+
+ // ARM exception handling directives
+ virtual void EmitFnStart();
+ virtual void EmitFnEnd();
+ virtual void EmitCantUnwind();
+ virtual void EmitPersonality(const MCSymbol *Per);
+ virtual void EmitHandlerData();
+ virtual void EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset = 0);
+ virtual void EmitPad(int64_t Offset);
+ virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ if (IsThumb)
+ EmitThumbMappingSymbol();
+ else
+ EmitARMMappingSymbol();
+
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ MCELFStreamer::EmitAssemblerFlag(Flag);
+
+ switch (Flag) {
+ case MCAF_SyntaxUnified:
+ return; // no-op here.
+ case MCAF_Code16:
+ IsThumb = true;
+ return; // Change to Thumb mode
+ case MCAF_Code32:
+ IsThumb = false;
+ return; // Change to ARM mode
+ case MCAF_Code64:
+ return;
+ case MCAF_SubsectionsViaSymbols:
+ return;
+ }
+ }
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_ARMELFStreamer;
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_ARM,
+ EMS_Thumb,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitThumbMappingSymbol() {
+ if (LastEMS == EMS_Thumb) return;
+ EmitMappingSymbol("$t");
+ LastEMS = EMS_Thumb;
+ }
+
+ void EmitARMMappingSymbol() {
+ if (LastEMS == EMS_ARM) return;
+ EmitMappingSymbol("$a");
+ LastEMS = EMS_ARM;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ void EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+
+ getAssembler().setIsThumbFunc(Func);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+ SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ }
+
+ // Helper functions for ARM exception handling directives
+ void Reset();
+
+ void EmitPersonalityFixup(StringRef Name);
+
+ void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+ SectionKind Kind, const MCSymbol &Fn);
+ void SwitchToExTabSection(const MCSymbol &FnStart);
+ void SwitchToExIdxSection(const MCSymbol &FnStart);
+
+ bool IsThumb;
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ // ARM Exception Handling Frame Information
+ MCSymbol *ExTab;
+ MCSymbol *FnStart;
+ const MCSymbol *Personality;
+ bool CantUnwind;
+};
+}
+
+inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+ unsigned Type,
+ unsigned Flags,
+ SectionKind Kind,
+ const MCSymbol &Fn) {
+ const MCSectionELF &FnSection =
+ static_cast<const MCSectionELF &>(Fn.getSection());
+
+ // Create the name for new section
+ StringRef FnSecName(FnSection.getSectionName());
+ SmallString<128> EHSecName(Prefix);
+ if (FnSecName != ".text") {
+ EHSecName += FnSecName;
+ }
+
+ // Get .ARM.extab or .ARM.exidx section
+ const MCSectionELF *EHSection = NULL;
+ if (const MCSymbol *Group = FnSection.getGroup()) {
+ EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
+ FnSection.getEntrySize(), Group->getName());
+ } else {
+ EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
+ }
+ assert(EHSection);
+
+ // Switch to .ARM.extab or .ARM.exidx section
+ SwitchSection(EHSection);
+ EmitCodeAlignment(4, 0);
+}
+
+inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.extab",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.exidx",
+ ELF::SHT_ARM_EXIDX,
+ ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+void ARMELFStreamer::Reset() {
+ ExTab = NULL;
+ FnStart = NULL;
+ Personality = NULL;
+ CantUnwind = false;
+}
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(PersonalitySym,
+ MCSymbolRefExpr::VK_ARM_NONE,
+ getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(
+ MCFixup::Create(DF->getContents().size(), PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::EmitFnStart() {
+ assert(FnStart == 0);
+ FnStart = getContext().CreateTempSymbol();
+ EmitLabel(FnStart);
+}
+
+void ARMELFStreamer::EmitFnEnd() {
+ assert(FnStart && ".fnstart must preceeds .fnend");
+
+ // Emit unwind opcodes if there is no .handlerdata directive
+ int PersonalityIndex = -1;
+ if (!ExTab && !CantUnwind) {
+ // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ PersonalityIndex = 1;
+
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+ }
+
+ // Emit the exception index table entry
+ SwitchToExIdxSection(*FnStart);
+
+ if (PersonalityIndex == 1)
+ EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+
+ const MCSymbolRefExpr *FnStartRef =
+ MCSymbolRefExpr::Create(FnStart,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(FnStartRef, 4, 0);
+
+ if (CantUnwind) {
+ EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
+ } else {
+ const MCSymbolRefExpr *ExTabEntryRef =
+ MCSymbolRefExpr::Create(ExTab,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+ EmitValue(ExTabEntryRef, 4, 0);
+ }
+
+ // Clean exception handling frame information
+ Reset();
+}
+
+void ARMELFStreamer::EmitCantUnwind() {
+ CantUnwind = true;
+}
+
+void ARMELFStreamer::EmitHandlerData() {
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ assert(!ExTab);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ // Emit Personality
+ assert(Personality && ".personality directive must preceed .handlerdata");
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(PersonalityRef, 4, 0);
+
+ // Emit unwind opcodes
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= UNWIND_OPCODE_FINISH << 16;
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+}
+
+void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+ Personality = Per;
+}
+
+void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitPad(int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ // TODO: Not implemented
+}
+
+namespace llvm {
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb) {
+ ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+
+}
+
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
new file mode 100644
index 000000000000..77ae5d23628e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_STREAMER_H
+#define ARM_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb);
+}
+
+#endif // ARM_ELF_STREAMER_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
new file mode 100644
index 000000000000..0085feb82069
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -0,0 +1,119 @@
+//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
+ // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ fixup_arm_pcrel_10_unscaled,
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // The following fixups handle the ARM BL instructions. These can be
+ // conditionalised; however, the ARM ELF ABI requires a different relocation
+ // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that
+ // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has
+ // no conditional version; R_ARM_JUMP24 would have to insert a veneer.
+ //
+ // MachO does not draw a distinction between the two cases, so it will treat
+ // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
+
+ // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ fixup_arm_uncondbl,
+
+ // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
+ // conditionalisation.
+ fixup_arm_condbl,
+
+ // fixup_arm_blx - Fixup for ARM BLX instructions.
+ fixup_arm_blx,
+
+ // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // It is possible to create an "immediate" that happens to be pcrel.
+ // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8)
+ // result in different reloc tags than the above two.
+ // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+ fixup_arm_movt_hi16_pcrel, // :upper16:
+ fixup_arm_movw_lo16_pcrel, // :lower16:
+ fixup_t2_movt_hi16_pcrel, // :upper16:
+ fixup_t2_movw_lo16_pcrel, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
new file mode 100644
index 000000000000..c1aab9c72cca
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -0,0 +1,60 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables"),
+ cl::init(false));
+
+
+void ARMMCAsmInfoDarwin::anchor() { }
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+ UseDataRegionDirectives = true;
+
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::SjLj;
+}
+
+void ARMELFMCAsmInfo::anchor() { }
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ PrivateGlobalPrefix = ".L";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
+ WeakRefDirective = "\t.weak\t";
+
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ if (EnableARMEHABI)
+ ExceptionsType = ExceptionHandling::ARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
new file mode 100644
index 000000000000..f0b289c6f3b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -0,0 +1,35 @@
+//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
+ explicit ARMMCAsmInfoDarwin();
+ };
+
+ class ARMELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit ARMELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
new file mode 100644
index 000000000000..7a59a7dd5055
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1531 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+ const MCContext &CTX;
+
+public:
+ ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti), CTX(ctx) {
+ }
+
+ ~ARMMCCodeEmitter() {}
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumb2() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+ }
+ bool isTargetDarwin() const {
+ Triple TT(STI.getTargetTriple());
+ Triple::OSType OS = TT.getOS();
+ return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ }
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups)const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// operand.
+ uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ llvm_unreachable("Invalid ShiftOpc!");
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getPostIdxRegOpValue - Return encoding for postidx_reg operands.
+ uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN) {
+ SImm = 0;
+ isAdd = false;
+ }
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are
+// determined by negating them and XOR'ing them with bit 23.
+static int32_t encodeThumbBLOffset(int32_t offset) {
+ offset >>= 1;
+ uint32_t S = (offset & 0x800000) >> 23;
+ uint32_t J1 = (offset & 0x400000) >> 22;
+ uint32_t J2 = (offset & 0x200000) >> 21;
+ J1 = (~J1 & 0x1);
+ J2 = (~J2 & 0x1);
+ J1 ^= S;
+ J2 ^= S;
+
+ offset &= ~0x600000;
+ offset |= J1 << 22;
+ offset |= J2 << 21;
+
+ return offset;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br,
+ Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc,
+ Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ return (MO.getImm() >> 1);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (isThumb2())
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbl, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups);
+ }
+
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups);
+
+ return MO.getImm() >> 1;
+}
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Val =
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+ int32_t offset = MO.getImm();
+ uint32_t Val = 0x2000;
+
+ int SoImmVal;
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ SoImmVal = 0;
+ } else if (offset < 0) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x2000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ } else {
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ }
+
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
+ return Val;
+}
+
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+ int32_t Val = MO.getImm();
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
+ Val *= -1;
+ Val |= 0x1000;
+ }
+ return Val;
+}
+
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+ return MO.getImm();
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else {
+ Reg = ARM::PC;
+ int32_t Offset = MO.getImm();
+ // FIXME: Handle #-0.
+ if (Offset < 0) {
+ Offset *= -1;
+ isAdd = false;
+ }
+ Imm12 = Offset;
+ }
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2Imm8s4OpValue - Return encoding info for
+/// '+/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ int32_t Imm8 = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm8 >= 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (Imm8 < 0)
+ Imm8 = -(uint32_t)Imm8;
+
+ // Scaled by 4.
+ Imm8 /= 4;
+
+ uint32_t Binary = Imm8 & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
+/// 'reg + imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {11-8} = reg
+ // {7-0} = imm8
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Imm8 = MO1.getImm();
+ return (Reg << 8) | Imm8;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ default: llvm_unreachable("Unexpected expression type");
+ case MCExpr::SymbolRef: return false;
+ case MCExpr::Binary: return true;
+ }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ MCFixupKind Kind;
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ switch (ARM16Expr->getKind()) {
+ default: llvm_unreachable("Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16_pcrel
+ : ARM::fixup_arm_movt_hi16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ return 0;
+ }
+ // If the expression doesn't have :upper16: or :lower16: on it,
+ // it's just a plain immediate expression, and those evaluate to
+ // the lower 16 bits of the expression regardless of whether
+ // we have a movt or a movw.
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift
+ // amount. However, it would be an easy mistake to make so check here.
+ assert((ShImm & ~0x1f) == 0 && "Out of range shift amount");
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {4} isAdd
+ // {3-0} Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ bool isAdd = MO1.getImm() != 0;
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+
+ // If The first operand isn't a register, we have a label reference.
+ if (!MO.isReg()) {
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ return (Rn << 9) | (1 << 13);
+ }
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ return (MO.getImm() >> 2);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is Rs, the amount to shift by, and the third specifies
+ // the type of the shift.
+ //
+ // {3-0} = Rm.
+ // {4} = 1
+ // {6-5} = type
+ // {11-8} = Rs
+ // {7} = 0
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+
+ // Encode the shift operation Rs.
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ // RRX - 110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ case ARM_AM::rrx:
+ Binary |= 0x60;
+ return Binary;
+ }
+
+ // Encode shift_imm bit[11:7].
+ Binary |= SBits << 4;
+ unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
+ assert(Offset < 32 && "Offset must be in range 0-31!");
+ return Binary | (Offset << 7);
+}
+
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ Value <<= 4;
+ Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::rrx: // FALLTHROUGH
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = CountTrailingZeros_32(v);
+ uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6OneLane32AddressOpValue - Encode an addrmode6 register number
+/// along with the alignment operand for use in VST1 and VLD1 with size 32.
+unsigned ARMMCCodeEmitter::
+getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 8:
+ case 16:
+ case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes.
+ case 2: Align = 0x00; break;
+ case 4: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Pseudo instructions don't get encoded.
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+
+ int Size;
+ if (Desc.getSize() == 2 || Desc.getSize() == 4)
+ Size = Desc.getSize();
+ else
+ llvm_unreachable("Unexpected instruction size!");
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (isThumb() && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
new file mode 100644
index 000000000000..fc8505b052bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -0,0 +1,72 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
new file mode 100644
index 000000000000..cd4067a52955
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -0,0 +1,76 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ // There are no TLS ARMMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 000000000000..f09fb5a94fd8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,298 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInfo.h"
+#include "ARMELFStreamer.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMMCTargetDesc.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
+ Triple triple(TT);
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ unsigned Len = TT.size();
+ unsigned Idx = 0;
+
+ // FIXME: Enhance Triple helper class to extract ARM version.
+ bool isThumb = false;
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+ isThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+
+ bool NoCPU = CPU == "generic" || CPU.empty();
+ std::string ARMArchFeature;
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer >= '7' && SubVer <= '9') {
+ if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ if (NoCPU)
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+ if (NoCPU)
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ } else {
+ // v7 CPUs have lots of different feature sets. If no CPU is specified,
+ // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
+ // the "minimum" feature set and use CPU string to figure out the exact
+ // features.
+ if (NoCPU)
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ }
+ } else if (SubVer == '6') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+ ARMArchFeature = "+v6t2";
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ if (NoCPU)
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6,+noarm,+mclass";
+ else
+ ARMArchFeature = "+v6";
+ } else
+ ARMArchFeature = "+v6";
+ } else if (SubVer == '5') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchFeature = "+v5te";
+ else
+ ARMArchFeature = "+v5t";
+ } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+ ARMArchFeature = "+v4t";
+ }
+
+ if (isThumb) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+thumb-mode";
+ else
+ ARMArchFeature += ",+thumb-mode";
+ }
+
+ if (triple.isOSNaCl()) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-trap";
+ else
+ ARMArchFeature += ",+nacl-trap";
+ }
+
+ return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+ return X;
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitARMMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
+ return X;
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return new ARMMCAsmInfoDarwin();
+
+ return new ARMELFMCAsmInfo();
+}
+
+static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ Triple TheTriple(TT);
+ // Default relocation model on Darwin is PIC, not DynamicNoPIC.
+ RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ }
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("ARM does not support Windows COFF format");
+ }
+
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ TheTriple.getArch() == Triple::thumb);
+}
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI, MII, MRI, STI);
+ return 0;
+}
+
+namespace {
+
+class ARMMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: This is not right for thumb.
+ return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ }
+};
+
+}
+
+static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ARMMCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget,
+ createARMMCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget,
+ createARMMCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 000000000000..a89981e4f060
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,77 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+ std::string ParseARMTriple(StringRef TT, StringRef CPU);
+
+ /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+
+/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
new file mode 100644
index 000000000000..b9efe74b41e5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -0,0 +1,497 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
+
+ bool requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType, const MCSymbolData *SD,
+ uint64_t FixedValue);
+
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+ unsigned &Log2Size) {
+ RelocType = unsigned(macho::RIT_Vanilla);
+ Log2Size = ~0U;
+
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ Log2Size = llvm::Log2_32(1);
+ return true;
+ case FK_Data_2:
+ Log2Size = llvm::Log2_32(2);
+ return true;
+ case FK_Data_4:
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ case FK_Data_8:
+ Log2Size = llvm::Log2_32(8);
+ return true;
+
+ // Handle 24-bit branch kinds.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // Handle Thumb branches.
+ case ARM::fixup_arm_thumb_br:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(2);
+ return true;
+
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 1;
+ return true;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 3;
+ return true;
+
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 0;
+ return true;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 2;
+ return true;
+ }
+}
+
+void ARMMachObjectWriter::
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_ARM_Half;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint32_t Value2 = 0;
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_ARM_HalfDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+ //
+ // For these two r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ // the other half of the relocated expression is in the following pair
+ // relocation entry in the low 16 bits of r_address field.
+ unsigned ThumbBit = 0;
+ unsigned MovtBit = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
+ MovtBit = 1;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ ThumbBit = 1;
+ break;
+ }
+
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ uint32_t OtherHalf = MovtBit
+ ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((OtherHalf << 0) |
+ (macho::RIT_Pair << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_Difference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType,
+ const MCSymbolData *SD,
+ uint64_t FixedValue) {
+ // Most cases can be identified purely from the symbol.
+ if (Writer->doesSymbolRequireExternRelocation(SD))
+ return true;
+ int64_t Value = (int64_t)FixedValue; // The displacement is signed.
+ int64_t Range;
+ switch (RelocType) {
+ default:
+ return false;
+ case macho::RIT_ARM_Branch24Bit:
+ // PC pre-adjustment of 8 for these instructions.
+ Value -= 8;
+ // ARM BL/BLX has a 25-bit offset.
+ Range = 0x1ffffff;
+ break;
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ // PC pre-adjustment of 4 for these instructions.
+ Value -= 4;
+ // Thumb BL/BLX has a 24-bit offset.
+ Range = 0xffffff;
+ }
+ // BL/BLX also use external relocations when an internal relocation
+ // would result in the target being out of range. This gives the linker
+ // enough information to generate a branch island.
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Value += Writer->getSectionAddress(&SymSD);
+ Value -= Writer->getSectionAddress(Fragment.getParent());
+ // If the resultant value would be out of range for an internal relocation,
+ // use an external instead.
+ if (Value > Range || Value < -(Range + 1))
+ return true;
+ return false;
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size;
+ unsigned RelocType = macho::RIT_Vanilla;
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ // If we failed to get fixup kind info, it's because there's no legal
+ // relocation type for the fixup kind. This happens when it's a fixup that's
+ // expected to always be resolvable at assembly time and not have any
+ // relocations needed.
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ if (RelocType == macho::RIT_ARM_Half)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ }
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // FIXME: For other platforms, we need to use scattered relocations for
+ // internal relocations with offsets. If this is an internal relocation with
+ // an offset, it also needs a scattered relocation entry.
+ //
+ // Is this right for ARM?
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // FIXME!
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
+ FixedValue)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ // The type is determined by the fixup kind.
+ Type = RelocType;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == macho::RIT_ARM_Half) {
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
+ uint32_t Value = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value = FixedValue & 0xffff;
+ break;
+ }
+ macho::RelocationEntry MREPair;
+ MREPair.Word0 = Value;
+ MREPair.Word1 = ((0xffffff) |
+ (Log2Size << 25) |
+ (macho::RIT_Pair << 28));
+
+ Writer->addRelocation(Fragment->getParent(), MREPair);
+ }
+
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
new file mode 100644
index 000000000000..dad5576df4cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -0,0 +1,112 @@
+//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants for the ARM unwind opcodes and exception
+// handling table entry kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_H
+#define ARM_UNWIND_OP_H
+
+namespace llvm {
+
+ /// ARM exception handling table entry kinds
+ enum ARMEHTEntryKind {
+ EHT_GENERIC = 0x00,
+ EHT_COMPACT = 0x80
+ };
+
+ enum {
+ /// Special entry for the function never unwind
+ EXIDX_CANTUNWIND = 0x1
+ };
+
+ /// ARM-defined frame unwinding opcodes
+ enum ARMUnwindOpcodes {
+ // Format: 00xxxxxx
+ // Purpose: vsp = vsp + ((x << 2) + 4)
+ UNWIND_OPCODE_INC_VSP = 0x00,
+
+ // Format: 01xxxxxx
+ // Purpose: vsp = vsp - ((x << 2) + 4)
+ UNWIND_OPCODE_DEC_VSP = 0x40,
+
+ // Format: 10000000 00000000
+ // Purpose: refuse to unwind
+ UNWIND_OPCODE_REFUSE = 0x8000,
+
+ // Format: 1000xxxx xxxxxxxx
+ // Purpose: pop r[15:12], r[11:4]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000,
+
+ // Format: 1001xxxx
+ // Purpose: vsp = r[x]
+ // Constraint: x != 13 && x != 15
+ UNWIND_OPCODE_SET_VSP = 0x90,
+
+ // Format: 10100xxx
+ // Purpose: pop r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0,
+
+ // Format: 10101xxx
+ // Purpose: pop r14, r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8,
+
+ // Format: 10110000
+ // Purpose: finish
+ UNWIND_OPCODE_FINISH = 0xb0,
+
+ // Format: 10110001 0000xxxx
+ // Purpose: pop r[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK = 0xb100,
+
+ // Format: 10110010 x(uleb128)
+ // Purpose: vsp = vsp + ((x << 2) + 0x204)
+ UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2,
+
+ // Format: 10110011 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300,
+
+ // Format: 10111xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8,
+
+ // Format: 11000xxx
+ // Purpose: pop wR[(10+x):10]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0,
+
+ // Format: 11000110 xxxxyyyy
+ // Purpose: pop wR[(x+y):x]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600,
+
+ // Format: 11000111 0000xxxx
+ // Purpose: pop wCGR[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700,
+
+ // Format: 11001000 xxxxyyyy
+ // Purpose: pop d[(16+x+y):(16+x)]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800,
+
+ // Format: 11001001 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900,
+
+ // Format: 11010xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
+ };
+
+}
+
+#endif // ARM_UNWIND_OP_H
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 000000000000..2e266c2e9624
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,399 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool isLikeA9;
+ bool isSwift;
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+ SmallPtrSet<MachineInstr*, 4> IgnoreStall;
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI);
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MI->mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ IgnoreStall.insert(DefMI);
+ return true;
+ }
+
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
+ if (IgnoreStall.count(MI))
+ return false;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ unsigned Limit1 = isLikeA9 ? 1 : 4;
+ unsigned Limit2 = isLikeA9 ? 1 : 4;
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+ if (i <= Limit1)
+ return true;
+ }
+
+ // Look for VMLx RAW hazard.
+ if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const MCInstrDesc &MCID1 = TII->get(MulOpc);
+ const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TmpReg = MRI->createVirtualRegister(
+ TII->getRegClass(MCID1, 0, TRI, MF));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = llvm::prior(MII);
+ MachineInstr &MI2 = *MII;
+ MII = llvm::prior(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+ IgnoreStall.clear();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(0);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(MCID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= ExpandFPMLxInstructions(MBB);
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 000000000000..fa5681fb12bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() {
+ RegisterTarget<Triple::arm, /*HasJIT=*/true>
+ X(TheARMTarget, "arm", "ARM");
+
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+ Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 000000000000..2c3388cc452c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,407 @@
+//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
+}
+
+
+void Thumb1FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetIOS()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ bool HasFP = hasFP(MF);
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (HasFP) {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup));
+ if (NumBytes > 508)
+ // If offset is > 508 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+
+ if (STI.isTargetELF() && HasFP)
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // Thumb1 does not currently support dynamic stack realignment. Report a
+ // fatal error rather then silently generate bad code.
+ if (RegInfo->needsStackRealignment(MF))
+ report_fatal_error("Dynamic stack realignment not supported for thumb1.");
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP));
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
+ if (MI->getOpcode() == ARM::tLDRspi &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert((MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ } else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (VARegSaveSize) {
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. Therefore, we
+ // pop the old LR into R3 as a temporary.
+
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB.copyImplicitOps(&*MBBI);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ MIB.setMIFlags(MachineInstr::FrameSetup);
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NumRegs = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NumRegs = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NumRegs)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 000000000000..5a300afd5d36
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,56 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 000000000000..095736d52a88
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,104 @@
+//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tMOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+ if (RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+ if (RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 000000000000..36af20492d4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,61 @@
+//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+ Thumb1RegisterInfo RI;
+public:
+ explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 000000000000..7452fb776ebd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,679 @@
+//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1RegisterInfo.h"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ if (ARM::tGPRRegClass.hasSubClassEq(RC))
+ return &ARM::tGPRRegClass;
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::tGPRRegClass;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = MachineInstr::NoFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+ ARMCC::AL, 0, MIFlags);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+ bool NeedCC = false;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ if (DestReg == ARM::SP) {
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ } else {
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NumBits = 8;
+ NeedCC = true;
+ }
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+ DestReg, BaseReg, NumBytes, true,
+ TII, MRI, MIFlags);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+ const MachineInstrBuilder MIB =
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
+ .setMIFlags(MIFlags));
+ AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+ } else {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill))
+ .setMIFlags(MIFlags);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(DestReg).addImm(ThisVal);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+ } else {
+ bool isKill = BaseReg != ARM::SP;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+
+ BaseReg = DestReg;
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NeedCC = isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc) {
+ const MCInstrDesc &MCID = TII.get(ExtraOpc);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3)
+ .setMIFlags(MIFlags));
+ }
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const Thumb1RegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+ DestReg))
+ .addImm(ThisVal));
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
+ if (isSub) {
+ const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill));
+ }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+ unsigned Op = i;
+ for (unsigned e = MI.getNumOperands(); i != e; ++i)
+ MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+ if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset
+ MI.RemoveOperand(FrameRegIdx+1);
+ return true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
+ removeOperands(MI, FrameRegIdx);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+ .addImm(Offset / Scale));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+ }
+ return true;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
+ return true;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
+ removeOperands(MI, FrameRegIdx);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+ }
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = llvm::next(II);
+ emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+ *this);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+ }
+ return true;
+ } else {
+ if (AddrMode != ARMII::AddrModeT1_s)
+ llvm_unreachable("Unsupported addressing mode!");
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with the frame register (e.g., sp).
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
+ return true;
+ }
+
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ ImmOp.ChangeToImmediate(0);
+ } else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask * Scale);
+ }
+ }
+
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill));
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ unsigned VReg = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
+ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ unsigned Opcode = MI.getOpcode();
+
+ // Remove predicate first.
+ int PIdx = MI.findFirstPredOperandIdx();
+ if (PIdx != -1)
+ removeOperands(MI, PIdx);
+
+ if (MI.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tLDRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+ UseRR = true;
+ }
+ } else {
+ emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+ *this);
+ }
+
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+ MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
+ } else if (MI.mayStore()) {
+ VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ bool UseRR = false;
+
+ if (Opcode == ARM::tSTRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+ *this);
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+ MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
+ } else {
+ llvm_unreachable("Unexpected opcode!");
+ }
+
+ // Add predicate back if it's needed.
+ if (MI.isPredicable())
+ AddDefaultPred(MIB);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 000000000000..ebbab36dd7b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,65 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 000000000000..97c254ce75a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,274 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+using namespace llvm;
+
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Thumb IT blocks insertion pass";
+ }
+
+ private:
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
+ };
+ char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
+ // if we have:
+ //
+ // movs r1, r1
+ // rsb r1, 0
+ // movs r2, r2
+ // rsb r2, 0
+ //
+ // we don't want this to be converted to:
+ //
+ // movs r1, r1
+ // movs r2, r2
+ // itt mi
+ // rsb r1, 0
+ // rsb r2, 0
+ //
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC == ARMCC::AL) {
+ ++MBBI;
+ continue;
+ }
+
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
+ // Insert an IT instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+ .addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
+ ++MBBI;
+
+ // Form IT block.
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ unsigned Mask = 0, Pos = 3;
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
+ }
+
+ // Finalize IT mask.
+ Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
+ MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+ // Finalize the bundle.
+ MachineBasicBlock::instr_iterator LI = LastITMI;
+ finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI));
+
+ Modified = true;
+ ++NumITs;
+ }
+
+ return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+
+ if (!AFI->isThumbFunction())
+ return false;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+ MachineBasicBlock &MBB = *MFI;
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
+ }
+
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
+ return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+ return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 000000000000..67e8ec7c5ff2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,572 @@
+//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tNOP);
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ // FIXME
+ return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
+ unsigned PredReg = 0;
+ return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Handle SPR, DPR, and QPR copies.
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ // If profitable, use a movw or movt to materialize the offset.
+ // FIXME: Use the scavenger to grab a scratch register.
+ if (DestReg != ARM::SP && DestReg != BaseReg &&
+ NumBytes >= 4096 &&
+ ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+ bool Fits = false;
+ if (NumBytes < 65536) {
+ // Use a movw to materialize the 16-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+ .addImm(NumBytes)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ } else if ((NumBytes & 0xffff) == 0) {
+ // Use a movt to materialize the 32-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+ .addReg(DestReg)
+ .addImm(NumBytes >> 16)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ }
+
+ if (Fits) {
+ if (isSub) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ }
+ return;
+ }
+ }
+
+ while (NumBytes) {
+ unsigned ThisVal = NumBytes;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+ .addReg(BaseReg).setMIFlags(MIFlags));
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ bool HasCCOut = true;
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ } else {
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ }
+
+ // Build the new ADD / SUB.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)).setMIFlags(MIFlags);
+ if (HasCCOut)
+ AddDefaultCC(MIB);
+
+ BaseReg = DestReg;
+ }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi12: return ARM::t2LDRi8;
+ case ARM::t2LDRHi12: return ARM::t2LDRHi8;
+ case ARM::t2LDRBi12: return ARM::t2LDRBi8;
+ case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+ case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+ case ARM::t2STRi12: return ARM::t2STRi8;
+ case ARM::t2STRBi12: return ARM::t2STRBi8;
+ case ARM::t2STRHi12: return ARM::t2STRHi8;
+
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi8: return ARM::t2LDRi12;
+ case ARM::t2LDRHi8: return ARM::t2LDRHi12;
+ case ARM::t2LDRBi8: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+ case ARM::t2STRi8: return ARM::t2STRi12;
+ case ARM::t2STRBi8: return ARM::t2STRBi12;
+ case ARM::t2STRHi8: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRs: return ARM::t2LDRi12;
+ case ARM::t2LDRHs: return ARM::t2LDRHi12;
+ case ARM::t2LDRBs: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+ case ARM::t2STRs: return ARM::t2STRi12;
+ case ARM::t2STRBs: return ARM::t2STRBi12;
+ case ARM::t2STRHs: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrModeT2_i12.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+ if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
+ AddDefaultPred(MIB);
+ return true;
+ }
+
+ bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ Offset = 0;
+ return true;
+ }
+ // Another common case: imm12.
+ if (Offset < 4096 &&
+ (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+ unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ MI.setDesc(TII.get(NewOpc));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Remove the cc_out operand.
+ if (HasCCOut)
+ MI.RemoveOperand(MI.getNumOperands()-1);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, extract 8 adjacent bits from the immediate into this
+ // t2ADDri/t2SUBri.
+ unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ } else {
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return false;
+
+ // AddrModeT2_so cannot handle any offset. If there is no offset
+ // register then we change to an immediate version.
+ unsigned NewOpc = Opcode;
+ if (AddrMode == ARMII::AddrModeT2_so) {
+ unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ if (OffsetReg != 0) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ return Offset == 0;
+ }
+
+ MI.RemoveOperand(FrameRegIdx+1);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+ NewOpc = immediateOffsetOpcode(Opcode);
+ AddrMode = ARMII::AddrModeT2_i12;
+ }
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign convert Opcode to the appropriate
+ // instruction
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset < 0) {
+ NewOpc = negativeOffsetOpcode(Opcode);
+ NumBits = 8;
+ isSub = true;
+ Offset = -Offset;
+ } else {
+ NewOpc = positiveOffsetOpcode(Opcode);
+ NumBits = 12;
+ }
+ } else if (AddrMode == ARMII::AddrMode5) {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+ int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ Offset += InstrOffs * 4;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+ } else {
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ if (NewOpc != Opcode)
+ MI.setDesc(TII.get(NewOpc));
+
+ MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+ // Attempt to fold address computation
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with fp/sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else
+ ImmedOffset = -ImmedOffset;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, offset doesn't fit. Pull in what we can to simplify
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else {
+ ImmedOffset = -ImmedOffset;
+ if (ImmedOffset == 0)
+ // Change the opcode back if the encoded offset is zero.
+ MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+ }
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 000000000000..2cdcd06e9350
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,75 @@
+//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+ Thumb2RegisterInfo RI;
+public:
+ explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 000000000000..1a7a4d450cfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,52 @@
+//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2RegisterInfo.h"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 000000000000..6b397e869683
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,42 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 000000000000..d50f5d972232
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,1037 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+ cl::init(-1), cl::Hidden);
+
+namespace {
+ /// ReduceTable - A static table with information on mapping from wide
+ /// opcodes to narrow
+ struct ReduceEntry {
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
+ uint8_t Imm1Limit; // Limit of immediate field (bits)
+ uint8_t Imm2Limit; // Limit of immediate field when it's two-address
+ unsigned LowRegs1 : 1; // Only possible if low-registers are used
+ unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+ unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
+ // 1 - No cc field.
+ // 2 - Always set CPSR.
+ unsigned PredCC2 : 2;
+ unsigned PartFlag : 1; // 16-bit instruction does partial flag update
+ unsigned Special : 1; // Needs to be dealt with specially
+ unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
+ };
+
+ static const ReduceEntry ReduceTable[] = {
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
+ };
+
+ class Thumb2SizeReduce : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2SizeReduce();
+
+ const Thumb2InstrInfo *TII;
+ const ARMSubtarget *STI;
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Thumb2 instruction size reduction pass";
+ }
+
+ private:
+ /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+ DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+ bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
+
+ bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+ bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry);
+
+ bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
+
+ /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+ /// instruction.
+ bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ bool IsSelfLoop);
+
+ /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+ /// non-two-address instruction.
+ bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ bool IsSelfLoop);
+
+ /// ReduceMI - Attempt to reduce MI, return true on success.
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop);
+
+ /// ReduceMBB - Reduce width of instructions in the specified basic block.
+ bool ReduceMBB(MachineBasicBlock &MBB);
+
+ bool OptimizeSize;
+ bool MinimizeSize;
+
+ // Last instruction to define CPSR in the current block.
+ MachineInstr *CPSRDef;
+ // Was CPSR last defined by a high latency instruction?
+ // When CPSRDef is null, this refers to CPSR defs in predecessors.
+ bool HighLatencyCPSR;
+
+ struct MBBInfo {
+ // The flags leaving this block have high latency.
+ bool HighLatencyCPSR;
+ // Has this block been visited yet?
+ bool Visited;
+
+ MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ };
+
+ SmallVector<MBBInfo, 8> BlockInfo;
+ };
+ char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ OptimizeSize = MinimizeSize = false;
+ for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+ unsigned FromOpc = ReduceTable[i].WideOpc;
+ if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ }
+}
+
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+ for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+ if (*Regs == ARM::CPSR)
+ return true;
+ return false;
+}
+
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+ switch(Def->getOpcode()) {
+ case ARM::FMSTAT:
+ case ARM::tMUL:
+ return true;
+ }
+ return false;
+}
+
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+/// = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
+ // Disable the check for -Oz (aka OptimizeForSizeHarder).
+ if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
+ return false;
+
+ if (!CPSRDef)
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ return HighLatencyCPSR || FirstInSelfLoop;
+
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = CPSRDef->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ Defs.insert(Reg);
+ }
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Use->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Defs.count(Reg))
+ return false;
+ }
+
+ // If the current CPSR has high latency, try to avoid the false dependency.
+ if (HighLatencyCPSR)
+ return true;
+
+ // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+ // of them, so always shrink them when CPSR doesn't have high latency.
+ if (Use->getOpcode() == ARM::t2MOVi ||
+ Use->getOpcode() == ARM::t2MOVi16)
+ return false;
+
+ // No read-after-write dependency. The narrowing will add false dependency.
+ return true;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead) {
+ if ((is2Addr && Entry.PredCC2 == 0) ||
+ (!is2Addr && Entry.PredCC1 == 0)) {
+ if (Pred == ARMCC::AL) {
+ // Not predicated, must set CPSR.
+ if (!HasCC) {
+ // Original instruction was not setting CPSR, but CPSR is not
+ // currently live anyway. It's ok to set it. The CPSR def is
+ // dead though.
+ if (!LiveCPSR) {
+ HasCC = true;
+ CCDead = true;
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // Predicated, must not set CPSR.
+ if (HasCC)
+ return false;
+ }
+ } else if ((is2Addr && Entry.PredCC2 == 2) ||
+ (!is2Addr && Entry.PredCC1 == 2)) {
+ /// Old opcode has an optional def of CPSR.
+ if (HasCC)
+ return true;
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+ if (!HasImplicitCPSRDef(MI->getDesc()))
+ return false;
+ HasCC = true;
+ } else {
+ // 16-bit instruction does not set CPSR.
+ if (HasCC)
+ return false;
+ }
+
+ return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
+ Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
+ Opc == ARM::t2LDMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isSPOk = isPCOk || isLROk;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ if (isPCOk && Reg == ARM::PC)
+ continue;
+ if (isLROk && Reg == ARM::LR)
+ continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
+ if (!isARMLowRegister(Reg))
+ return false;
+ }
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry) {
+ if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+ return false;
+
+ unsigned Scale = 1;
+ bool HasImmOffset = false;
+ bool HasShift = false;
+ bool HasOffReg = true;
+ bool isLdStMul = false;
+ unsigned Opc = Entry.NarrowOpc1;
+ unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
+
+ switch (Entry.WideOpc) {
+ default:
+ llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+ case ARM::t2LDRi12:
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ HasOffReg = false;
+ }
+
+ Scale = 4;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRBi12:
+ case ARM::t2STRBi12:
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRHi12:
+ case ARM::t2STRHi12:
+ Scale = 2;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRSHs:
+ case ARM::t2STRs:
+ case ARM::t2STRBs:
+ case ARM::t2STRHs:
+ HasShift = true;
+ OpNum = 4;
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB: {
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+ return false;
+
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+
+ if (!isOK)
+ return false;
+
+ OpNum = 0;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_RET: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg != ARM::SP)
+ return false;
+ Opc = Entry.NarrowOpc2; // tPOP_RET
+ OpNum = 2;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ OpNum = 0;
+
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP &&
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
+ Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
+ return false;
+ }
+
+ isLdStMul = true;
+ break;
+ }
+ }
+
+ unsigned OffsetReg = 0;
+ bool OffsetKill = false;
+ if (HasShift) {
+ OffsetReg = MI->getOperand(2).getReg();
+ OffsetKill = MI->getOperand(2).isKill();
+
+ if (MI->getOperand(3).getImm())
+ // Thumb1 addressing mode doesn't support shift.
+ return false;
+ }
+
+ unsigned OffsetImm = 0;
+ if (HasImmOffset) {
+ OffsetImm = MI->getOperand(2).getImm();
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+ // Make sure the immediate field fits.
+ return false;
+ }
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+ if (!isLdStMul) {
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
+ assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+ }
+
+ // Transfer the rest of operands.
+ for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++NumLdSts;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::t2ADDri) {
+ // If the source register is SP, try to reduce to tADDrSPi, otherwise
+ // it's a normal reduce.
+ if (MI->getOperand(1).getReg() != ARM::SP) {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ // Try to reduce to tADDrSPi.
+ unsigned Imm = MI->getOperand(2).getImm();
+ // The immediate must be in range, the destination register must be a low
+ // reg, the predicate must be "always" and the condition flags must not
+ // be being set.
+ if (Imm & 3 || Imm > 1020)
+ return false;
+ if (!isARMLowRegister(MI->getOperand(0).getReg()))
+ return false;
+ if (MI->getOperand(3).getImm() != ARMCC::AL)
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
+
+ MBB.erase_instr(MI);
+ ++NumNarrows;
+ return true;
+ }
+
+ if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+ return false;
+
+ if (MI->mayLoad() || MI->mayStore())
+ return ReduceLoadStore(MBB, MI, Entry);
+
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri:
+ case ARM::t2ADDSrr: {
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri: {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+ // fallthrough
+ }
+ case ARM::t2ADDSrr:
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ }
+ break;
+ }
+ case ARM::t2RSBri:
+ case ARM::t2RSBSri:
+ case ARM::t2SXTB:
+ case ARM::t2SXTH:
+ case ARM::t2UXTB:
+ case ARM::t2UXTH:
+ if (MI->getOperand(2).getImm() == 0)
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ }
+ return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+
+ if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+ return false;
+
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ // t2MUL is "special". The tied source operand is second, not first.
+ if (MI->getOpcode() == ARM::t2MUL) {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ // Early exit if the regs aren't all low regs.
+ if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
+ || !isARMLowRegister(Reg2))
+ return false;
+ if (Reg0 != Reg2) {
+ // If the other operand also isn't the same as the destination, we
+ // can't reduce.
+ if (Reg1 != Reg0)
+ return false;
+ // Try to commute the operands to make it a 2-address instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ } else if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+ return false;
+ if (Entry.Imm2Limit) {
+ unsigned Imm = MI->getOperand(2).getImm();
+ unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+ if (Imm > Limit)
+ return false;
+ } else {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+ return false;
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(MI, IsSelfLoop))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if (SkipPred && MCID.OpInfo[i].isPredicate())
+ continue;
+ MIB.addOperand(MI->getOperand(i));
+ }
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++Num2Addrs;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+ if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+ return false;
+
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
+ unsigned Limit = ~0U;
+ if (Entry.Imm1Limit)
+ Limit = (1 << Entry.Imm1Limit) - 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate())
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::CPSR)
+ continue;
+ if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+ return false;
+ } else if (MO.isImm() &&
+ !MCID.OpInfo[i].isPredicate()) {
+ if (((unsigned)MO.getImm()) > Limit)
+ return false;
+ }
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(MI, IsSelfLoop))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if ((MCID.getOpcode() == ARM::t2RSBSri ||
+ MCID.getOpcode() == ARM::t2RSBri ||
+ MCID.getOpcode() == ARM::t2SXTB ||
+ MCID.getOpcode() == ARM::t2SXTH ||
+ MCID.getOpcode() == ARM::t2UXTB ||
+ MCID.getOpcode() == ARM::t2UXTH) && i == 2)
+ // Skip the zero immediate operand, it's now implicit.
+ continue;
+ bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
+ if (SkipPred && isPred)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+ // Skip implicit def of CPSR. Either it's modeled as an optional
+ // def now or it's already an implicit def on the new instruction.
+ continue;
+ MIB.addOperand(MO);
+ }
+ if (!MCID.isPredicable() && NewMCID.isPredicable())
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++NumNarrows;
+ return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
+ bool HasDef = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+
+ DefCPSR = true;
+ if (!MO.isDead())
+ HasDef = true;
+ }
+
+ return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+ if (MO.isKill()) {
+ LiveCPSR = false;
+ break;
+ }
+ }
+
+ return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI == ReduceOpcodeMap.end())
+ return false;
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+ // Don't attempt normal reductions on "special" cases for now.
+ if (Entry.Special)
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ return false;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Yes, CPSR could be livein.
+ bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+ MachineInstr *BundleMI = 0;
+
+ CPSRDef = 0;
+ HighLatencyCPSR = false;
+
+ // Check predecessors for the latest CPSRDef.
+ bool HasBackEdges = false;
+ for (MachineBasicBlock::pred_iterator
+ I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
+ const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+ if (!PInfo.Visited) {
+ // Since blocks are visited in RPO, this must be a back-edge.
+ HasBackEdges = true;
+ continue;
+ }
+ if (PInfo.HighLatencyCPSR) {
+ HighLatencyCPSR = true;
+ break;
+ }
+ }
+
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ bool IsSelfLoop = MBB.isSuccessor(&MBB);
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+
+ MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+ if (MI->isDebugValue())
+ continue;
+
+ LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+ // Does NextMII belong to the same bundle as MI?
+ bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+ Modified = true;
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
+ MI = &*I;
+ // Removing and reinserting the first instruction in a bundle will break
+ // up the bundle. Fix the bundling if it was broken.
+ if (NextInSameBundle && !NextMII->isBundledWithPred())
+ NextMII->bundleWithPred();
+ }
+
+ if (!NextInSameBundle && MI->isInsideBundle()) {
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ if (BundleMI->killsRegister(ARM::CPSR))
+ LiveCPSR = false;
+ MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
+ if (MO && !MO->isDead())
+ LiveCPSR = true;
+ }
+
+ bool DefCPSR = false;
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+ if (MI->isCall()) {
+ // Calls don't really set CPSR.
+ CPSRDef = 0;
+ HighLatencyCPSR = false;
+ IsSelfLoop = false;
+ } else if (DefCPSR) {
+ // This is the last CPSR defining instruction.
+ CPSRDef = MI;
+ HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
+ IsSelfLoop = false;
+ }
+ }
+
+ MBBInfo &Info = BlockInfo[MBB.getNumber()];
+ Info.HighLatencyCPSR = HighLatencyCPSR;
+ Info.Visited = true;
+ return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ STI = &TM.getSubtarget<ARMSubtarget>();
+
+ // Optimizing / minimizing size?
+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+ OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+
+ BlockInfo.clear();
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Visit blocks in reverse post-order so LastCPSRDef is known for all
+ // predecessors.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ bool Modified = false;
+ for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ Modified |= ReduceMBB(**I);
+ return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+ return new Thumb2SizeReduce();
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 000000000000..3e69098edcc3
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,2114 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+#include <set>
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+ cl::value_desc("function name"));
+
+enum WhatToGenerate {
+ GenProgram,
+ GenModule,
+ GenContents,
+ GenFunction,
+ GenFunctions,
+ GenInline,
+ GenVariable,
+ GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+ cl::desc("Choose what kind of output to generate"),
+ cl::init(GenProgram),
+ cl::values(
+ clEnumValN(GenProgram, "program", "Generate a complete program"),
+ clEnumValN(GenModule, "module", "Generate a module definition"),
+ clEnumValN(GenContents, "contents", "Generate contents of a module"),
+ clEnumValN(GenFunction, "function", "Generate a function definition"),
+ clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+ clEnumValN(GenInline, "inline", "Generate an inline function"),
+ clEnumValN(GenVariable, "variable", "Generate a variable definition"),
+ clEnumValN(GenType, "type", "Generate a type definition"),
+ clEnumValEnd
+ )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+ cl::desc("Specify the name of the thing to generate"),
+ cl::init("!bad!"));
+
+extern "C" void LLVMInitializeCppBackendTarget() {
+ // Register the target.
+ RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
+
+namespace {
+ typedef std::vector<Type*> TypeList;
+ typedef std::map<Type*,std::string> TypeMap;
+ typedef std::map<const Value*,std::string> ValueMap;
+ typedef std::set<std::string> NameSet;
+ typedef std::set<Type*> TypeSet;
+ typedef std::set<const Value*> ValueSet;
+ typedef std::map<const Value*,std::string> ForwardRefMap;
+
+ /// CppWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C++ translation unit.
+ class CppWriter : public ModulePass {
+ formatted_raw_ostream &Out;
+ const Module *TheModule;
+ uint64_t uniqueNum;
+ TypeMap TypeNames;
+ ValueMap ValueNames;
+ NameSet UsedNames;
+ TypeSet DefinedTypes;
+ ValueSet DefinedValues;
+ ForwardRefMap ForwardRefs;
+ bool is_inline;
+ unsigned indent_level;
+
+ public:
+ static char ID;
+ explicit CppWriter(formatted_raw_ostream &o) :
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+
+ virtual const char *getPassName() const { return "C++ backend"; }
+
+ bool runOnModule(Module &M);
+
+ void printProgram(const std::string& fname, const std::string& modName );
+ void printModule(const std::string& fname, const std::string& modName );
+ void printContents(const std::string& fname, const std::string& modName );
+ void printFunction(const std::string& fname, const std::string& funcName );
+ void printFunctions();
+ void printInline(const std::string& fname, const std::string& funcName );
+ void printVariable(const std::string& fname, const std::string& varName );
+ void printType(const std::string& fname, const std::string& typeName );
+
+ void error(const std::string& msg);
+
+
+ formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
+ private:
+ void printLinkageType(GlobalValue::LinkageTypes LT);
+ void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM);
+ void printCallingConv(CallingConv::ID cc);
+ void printEscapedString(const std::string& str);
+ void printCFP(const ConstantFP* CFP);
+
+ std::string getCppName(Type* val);
+ inline void printCppName(Type* val);
+
+ std::string getCppName(const Value* val);
+ inline void printCppName(const Value* val);
+
+ void printAttributes(const AttributeSet &PAL, const std::string &name);
+ void printType(Type* Ty);
+ void printTypes(const Module* M);
+
+ void printConstant(const Constant *CPV);
+ void printConstants(const Module* M);
+
+ void printVariableUses(const GlobalVariable *GV);
+ void printVariableHead(const GlobalVariable *GV);
+ void printVariableBody(const GlobalVariable *GV);
+
+ void printFunctionUses(const Function *F);
+ void printFunctionHead(const Function *F);
+ void printFunctionBody(const Function *F);
+ void printInstruction(const Instruction *I, const std::string& bbname);
+ std::string getOpName(const Value*);
+
+ void printModuleBody();
+ };
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+ Out << '\n';
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ Out.indent(indent_level);
+ return Out;
+}
+
+static inline void sanitize(std::string &str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+}
+
+static std::string getTypePrefix(Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ default: return "other_";
+ }
+}
+
+void CppWriter::error(const std::string& msg) {
+ report_fatal_error(msg);
+}
+
+static inline std::string ftostr(const APFloat& V) {
+ std::string Buf;
+ if (&V.getSemantics() == &APFloat::IEEEdouble) {
+ raw_string_ostream(Buf) << V.convertToDouble();
+ return Buf;
+ } else if (&V.getSemantics() == &APFloat::IEEEsingle) {
+ raw_string_ostream(Buf) << (double)V.convertToFloat();
+ return Buf;
+ }
+ return "<unknown format in ftostr>"; // error
+}
+
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(mod->getContext(), ";
+ Out << "APFloat(";
+#if HAVE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
+#endif
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << StrVal;
+ else
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
+#if HAVE_PRINTF_A
+ }
+#endif
+ Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
+ }
+}
+
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateLinkage:
+ Out << "GlobalValue::LinkerPrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
+ }
+}
+
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
+ }
+}
+
+void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) {
+ switch (TLM) {
+ case GlobalVariable::NotThreadLocal:
+ Out << "GlobalVariable::NotThreadLocal";
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ Out << "GlobalVariable::GeneralDynamicTLSModel";
+ break;
+ case GlobalVariable::LocalDynamicTLSModel:
+ Out << "GlobalVariable::LocalDynamicTLSModel";
+ break;
+ case GlobalVariable::InitialExecTLSModel:
+ Out << "GlobalVariable::InitialExecTLSModel";
+ break;
+ case GlobalVariable::LocalExecTLSModel:
+ Out << "GlobalVariable::LocalExecTLSModel";
+ break;
+ }
+}
+
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+}
+
+std::string CppWriter::getCppName(Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())";
+ default:
+ error("Invalid primitive type");
+ break;
+ }
+ // shouldn't be returned, but make it sensible
+ return "Type::getVoidTy(mod->getContext())";
+ }
+
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
+
+ // See if the type has a name in the symboltable and build accordingly
+ std::string name;
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (STy->hasName())
+ name = STy->getName();
+
+ if (name.empty())
+ name = utostr(uniqueNum++);
+
+ name = std::string(prefix) + name;
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+}
+
+void CppWriter::printCppName(Type* Ty) {
+ printEscapedString(getCppName(Ty));
+}
+
+std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ if (val->hasName())
+ name += val->getName();
+ else
+ name += utostr(uniqueNum++);
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+}
+
+void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+}
+
+void CppWriter::printAttributes(const AttributeSet &PAL,
+ const std::string &name) {
+ Out << "AttributeSet " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeSet, 4> Attrs;"; nl(Out);
+ Out << "AttributeSet PAS;"; in(); nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlotIndex(i);
+ AttrBuilder attrs(PAL.getSlotAttributes(i), index);
+ Out << "{"; in(); nl(Out);
+ Out << "AttrBuilder B;"; nl(Out);
+
+#define HANDLE_ATTR(X) \
+ if (attrs.contains(Attribute::X)) { \
+ Out << "B.addAttribute(Attribute::" #X ");"; nl(Out); \
+ attrs.removeAttribute(Attribute::X); \
+ }
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(StackProtectStrong);
+ HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(ReturnsTwice);
+ HANDLE_ATTR(UWTable);
+ HANDLE_ATTR(NonLazyBind);
+ HANDLE_ATTR(MinSize);
+#undef HANDLE_ATTR
+
+ if (attrs.contains(Attribute::StackAlignment)) {
+ Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')';
+ nl(Out);
+ attrs.removeAttribute(Attribute::StackAlignment);
+ }
+
+ Out << "PAS = AttributeSet::get(mod->getContext(), ";
+ if (index == ~0U)
+ Out << "~0U,";
+ else
+ Out << index << "U,";
+ Out << " B);"; out(); nl(Out);
+ Out << "}"; out(); nl(Out);
+ nl(Out);
+ Out << "Attrs.push_back(PAS);"; nl(Out);
+ }
+ Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
+ }
+}
+
+void CppWriter::printType(Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
+
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return;
+
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ Type* argTy = static_cast<Type*>(*PI);
+ printType(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
+ Out << ");";
+ nl(Out);
+ }
+ printType(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ StructType* ST = cast<StructType>(Ty);
+ if (!ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = mod->getTypeByName(\"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "if (!" << typeName << ") {";
+ nl(Out);
+ Out << typeName << " = ";
+ Out << "StructType::create(mod->getContext(), \"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "}";
+ nl(Out);
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+ }
+
+ Out << "std::vector<Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ Type* fieldTy = static_cast<Type*>(*EI);
+ printType(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ Out << ");";
+ nl(Out);
+ }
+
+ if (ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = ";
+ Out << "StructType::get(" << "mod->getContext(), ";
+ } else {
+ Out << "if (" << typeName << "->isOpaque()) {";
+ nl(Out);
+ Out << typeName << "->setBody(";
+ }
+
+ Out << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ if (!ST->isLiteral()) {
+ Out << "}";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::ArrayTyID: {
+ ArrayType* AT = cast<ArrayType>(Ty);
+ Type* ET = AT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ PointerType* PT = cast<PointerType>(Ty);
+ Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::VectorTyID: {
+ VectorType* PT = cast<VectorType>(Ty);
+ Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
+
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
+}
+
+void CppWriter::printTypes(const Module* M) {
+ // Add all of the global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
+
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
+ }
+ }
+ }
+}
+
+
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
+
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName
+ << " = ConstantInt::get(mod->getContext(), APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth()
+ << ", StringRef(\"" << constValue << "\"), 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CVec = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CVec->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CVec->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CVec->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(CV)) {
+ if (CDS->isString()) {
+ Out << "Constant *" << constName <<
+ " = ConstantDataArray::getString(mod->getContext(), \"";
+ StringRef Str = CDS->getAsString();
+ bool nullTerminate = false;
+ if (Str.back() == 0) {
+ Str = Str.drop_back();
+ nullTerminate = true;
+ }
+ printEscapedString(Str);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true);";
+ else
+ Out << "\", false);";// No null terminator
+ } else {
+ // TODO: Could generate more efficient code generating CDS calls instead.
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ for (unsigned i = 0; i != CDS->getNumElements(); ++i) {
+ Constant *Elt = CDS->getElementAsConstant(i);
+ printConstant(Elt);
+ Out << constName << "_elems.push_back(" << getCppName(Elt) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName;
+
+ if (isa<ArrayType>(CDS->getType()))
+ Out << " = ConstantArray::get(";
+ else
+ Out << " = ConstantVector::get(";
+ Out << typeName << ", " << constName << "_elems);";
+ }
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << constName << "_indices);";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::FAdd: Out << "getFAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::FSub: Out << "getFSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::FMul: Out << "getFMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
+ }
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
+ }
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
+ }
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "Constant* " << constName << " = ";
+ Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
+ }
+ nl(Out);
+}
+
+void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
+ }
+ }
+ }
+ }
+ }
+}
+
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ const Constant *Init = GV->getInitializer();
+ printType(Init->getType());
+ if (const Function *F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
+ }
+ }
+}
+
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
+ printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(/*Module=*/*mod, ";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\");";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
+ Out << "\");";
+ nl(Out);
+ }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
+ }
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocalMode(";
+ printThreadLocalMode(GV->getThreadLocalMode());
+ Out << ");";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+}
+
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
+ nl(Out);
+ }
+}
+
+std::string CppWriter::getOpName(const Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
+
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
+
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+}
+
+static StringRef ConvertAtomicOrdering(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ case NotAtomic: return "NotAtomic";
+ case Unordered: return "Unordered";
+ case Monotonic: return "Monotonic";
+ case Acquire: return "Acquire";
+ case Release: return "Release";
+ case AcquireRelease: return "AcquireRelease";
+ case SequentiallyConsistent: return "SequentiallyConsistent";
+ }
+ llvm_unreachable("Unknown ordering");
+}
+
+static StringRef ConvertAtomicSynchScope(SynchronizationScope SynchScope) {
+ switch (SynchScope) {
+ case SingleThread: return "SingleThread";
+ case CrossThread: return "CrossThread";
+ }
+ llvm_unreachable("Unknown synch scope");
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ const unsigned Ops(I->getNumOperands());
+ std::string* opNames = new std::string[Ops];
+ for (unsigned i = 0; i < Ops; i++)
+ opNames[i] = getOpName(I->getOperand(i));
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create(mod->getContext(), "
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
+ }
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst *SI = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << getOpName(SI->getCondition()) << ", "
+ << getOpName(SI->getDefaultDest()) << ", "
+ << SI->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ const IntegersSubset CaseVal = i.getCaseValueEx();
+ const BasicBlock *BB = i.getCaseSuccessor();
+ Out << iName << "->addCase("
+ << getOpName(CaseVal) << ", "
+ << getOpName(BB) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::IndirectBr: {
+ const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+ Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+ << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ nl(Out);
+ for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+ Out << iName << "->addDestination(" << opNames[i] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Resume: {
+ Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << getOpName(inv->getArgOperand(i)) << ");";
+ nl(Out);
+ }
+ // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getNormalDest()) << ", "
+ << getOpName(inv->getUnwindDest()) << ", "
+ << iName << "_params, \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unreachable: {
+ Out << "new UnreachableInst("
+ << "mod->getContext(), "
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::FSub: Out << "Instruction::FSub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::FMul: Out << "Instruction::FMul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load: {
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ if (load->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << load->getAlignment() << ");";
+ if (load->isAtomic()) {
+ StringRef Ordering = ConvertAtomicOrdering(load->getOrdering());
+ StringRef CrossThread = ConvertAtomicSynchScope(load->getSynchScope());
+ nl(Out) << iName << "->setAtomic("
+ << Ordering << ", " << CrossThread << ");";
+ }
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << "StoreInst* " << iName << " = new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ if (store->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << store->getAlignment() << ");";
+ if (store->isAtomic()) {
+ StringRef Ordering = ConvertAtomicOrdering(store->getOrdering());
+ StringRef CrossThread = ConvertAtomicSynchScope(store->getSynchScope());
+ nl(Out) << iName << "->setAtomic("
+ << Ordering << ", " << CrossThread << ");";
+ }
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", "
+ << phi->getNumIncomingValues() << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
+ Out << iName << "->addIncoming("
+ << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+ << getOpName(phi->getIncomingBlock(i)) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: llvm_unreachable("Unreachable");
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call: {
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
+ nl(Out);
+ }
+ if (call->getNumArgOperands() > 1) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params, \"";
+ } else if (call->getNumArgOperands() == 1) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", \"";
+ }
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true" : "false");
+ Out << ");";
+ nl(Out);
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices, \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices, \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Fence: {
+ const FenceInst *fi = cast<FenceInst>(I);
+ StringRef Ordering = ConvertAtomicOrdering(fi->getOrdering());
+ StringRef CrossThread = ConvertAtomicSynchScope(fi->getSynchScope());
+ Out << "FenceInst* " << iName
+ << " = new FenceInst(mod->getContext(), "
+ << Ordering << ", " << CrossThread << ", " << bbname
+ << ");";
+ break;
+ }
+ case Instruction::AtomicCmpXchg: {
+ const AtomicCmpXchgInst *cxi = cast<AtomicCmpXchgInst>(I);
+ StringRef Ordering = ConvertAtomicOrdering(cxi->getOrdering());
+ StringRef CrossThread = ConvertAtomicSynchScope(cxi->getSynchScope());
+ Out << "AtomicCmpXchgInst* " << iName
+ << " = new AtomicCmpXchgInst("
+ << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", "
+ << Ordering << ", " << CrossThread << ", " << bbname
+ << ");";
+ nl(Out) << iName << "->setName(\"";
+ printEscapedString(cxi->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::AtomicRMW: {
+ const AtomicRMWInst *rmwi = cast<AtomicRMWInst>(I);
+ StringRef Ordering = ConvertAtomicOrdering(rmwi->getOrdering());
+ StringRef CrossThread = ConvertAtomicSynchScope(rmwi->getSynchScope());
+ StringRef Operation;
+ switch (rmwi->getOperation()) {
+ case AtomicRMWInst::Xchg: Operation = "AtomicRMWInst::Xchg"; break;
+ case AtomicRMWInst::Add: Operation = "AtomicRMWInst::Add"; break;
+ case AtomicRMWInst::Sub: Operation = "AtomicRMWInst::Sub"; break;
+ case AtomicRMWInst::And: Operation = "AtomicRMWInst::And"; break;
+ case AtomicRMWInst::Nand: Operation = "AtomicRMWInst::Nand"; break;
+ case AtomicRMWInst::Or: Operation = "AtomicRMWInst::Or"; break;
+ case AtomicRMWInst::Xor: Operation = "AtomicRMWInst::Xor"; break;
+ case AtomicRMWInst::Max: Operation = "AtomicRMWInst::Max"; break;
+ case AtomicRMWInst::Min: Operation = "AtomicRMWInst::Min"; break;
+ case AtomicRMWInst::UMax: Operation = "AtomicRMWInst::UMax"; break;
+ case AtomicRMWInst::UMin: Operation = "AtomicRMWInst::UMin"; break;
+ case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Bad atomic operation");
+ }
+ Out << "AtomicRMWInst* " << iName
+ << " = new AtomicRMWInst("
+ << Operation << ", "
+ << opNames[0] << ", " << opNames[1] << ", "
+ << Ordering << ", " << CrossThread << ", " << bbname
+ << ");";
+ nl(Out) << iName << "->setName(\"";
+ printEscapedString(rmwi->getName());
+ Out << "\");";
+ break;
+ }
+ }
+ DefinedValues.insert(I);
+ nl(Out);
+ delete [] opNames;
+}
+
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
+
+ // Print the function's function type
+ printType(F->getFunctionType());
+
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+ }
+
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
+
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand)) {
+ consts.insert(C);
+ for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+ // If the operand references a GVal or Constant, make a note of it
+ Value* operand = C->getOperand(j);
+ printType(operand->getType());
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
+ }
+ }
+
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
+
+ // Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ if (GenerationType != GenFunction) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
+ }
+}
+
+void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(F);
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (F->hasGC()) {
+ printCppName(F);
+ Out << "->setGC(\"" << F->getGC() << "\");";
+ nl(Out);
+ }
+ Out << "}";
+ nl(Out);
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+}
+
+void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
+
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
+
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
+ nl(Out);
+ }
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
+ nl(Out);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"";
+ printEscapedString(AI->getName());
+ Out << "\");";
+ nl(Out);
+ }
+ }
+ }
+
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname <<
+ " = BasicBlock::Create(mod->getContext(), \"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
+ }
+ }
+
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
+ nl(Out);
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
+ nl(Out);
+ ForwardRefs.erase(I);
+ }
+}
+
+void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+}
+
+void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
+
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
+ }
+
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+ }
+}
+
+void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <llvm/IR/BasicBlock.h>\n";
+ Out << "#include <llvm/IR/CallingConv.h>\n";
+ Out << "#include <llvm/IR/Constants.h>\n";
+ Out << "#include <llvm/IR/DerivedTypes.h>\n";
+ Out << "#include <llvm/IR/Function.h>\n";
+ Out << "#include <llvm/IR/GlobalVariable.h>\n";
+ Out << "#include <llvm/IR/InlineAsm.h>\n";
+ Out << "#include <llvm/IR/Instructions.h>\n";
+ Out << "#include <llvm/IR/LLVMContext.h>\n";
+ Out << "#include <llvm/IR/Module.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+}
+
+void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"";
+ printEscapedString(mName);
+ Out << "\", getGlobalContext());";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"";
+ printEscapedString(mName);
+ Out << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+}
+
+void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
+ }
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
+ }
+ }
+}
+
+void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printType(const std::string &fname,
+ const std::string &typeName) {
+ Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
+ }
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ }
+
+ return false;
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+ PM.add(new CppWriter(o));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 000000000000..477e788ee2fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,46 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+ CPPTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, TT, CPU, FS, Options) {}
+
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter);
+
+ virtual const DataLayout *getDataLayout() const { return 0; }
+};
+
+extern Target TheCppBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 000000000000..1ca74a4895c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,28 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+ // This class always works, but shouldn't be the default in most cases.
+ return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() {
+ TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",
+ "C++ backend",
+ &CppBackend_TripleMatchQuality);
+}
+
+extern "C" void LLVMInitializeCppBackendTargetMC() {}
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 000000000000..dfbefc864283
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,79 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class ModulePass;
+ class TargetMachine;
+ class MachineInstr;
+ class HexagonMCInst;
+ class HexagonAsmPrinter;
+ class HexagonTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+ FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+ FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonPacketizer();
+ FunctionPass *createHexagonNewValueJump();
+
+
+/* TODO: object output.
+ MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx);
+*/
+/* TODO: assembler input.
+ TargetAsmBackend *createHexagonAsmBackend(const Target &,
+ const std::string &);
+*/
+ void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
+ HexagonAsmPrinter &AP);
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 000000000000..8a5ee40590bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,177 @@
+//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//===----------------------------------------------------------------------===//
+
+// Hexagon Archtectures
+def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+ "Hexagon v2">;
+def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+ "Hexagon v3">;
+def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+ "Hexagon v4">;
+def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
+ "Hexagon v5">;
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget.hasV5TOps()">;
+def NoV5T : Predicate<"!Subtarget.hasV5TOps()">;
+def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["PredSense"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = [""];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+ let FilterClass = "PredNewRel";
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let ColFields = ["PNewValue"];
+ let KeyCol = [""];
+ let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+ let FilterClass = "NewValueRel";
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+ let ColFields = ["isNVStore"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+def getBasedWithImmOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+ "isMEMri", "isFloat"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["Absolute"];
+ let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["BaseImmOffset"];
+ let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+ let FilterClass = "ImmRegRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+def HexagonInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, SchedMachineModel Model,
+ list<SubtargetFeature> Features>
+ : ProcessorModel<Name, Model, Features>;
+
+def : Proc<"hexagonv2", HexagonModel, [ArchV2]>;
+def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
+def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
+
+
+// Hexagon Uses the MC printer for assembler output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def HexagonAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = HexagonInstrInfo;
+
+ let AssemblyWriters = [HexagonAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 000000000000..88cd3fbacea0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,314 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "InstPrinter/HexagonInstPrinter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+ "hexagon-align-calls", cl::Hidden, cl::init(true),
+ cl::desc("Insert falign after call instruction for Hexagon target"));
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV) const {
+ // For basic block level alignment, use ".falign".
+ if (!GV) {
+ OutStreamer.EmitRawText(StringRef("\t.falign"));
+ return;
+ }
+
+ AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ default: llvm_unreachable ("<unknown operand type>");
+ case MachineOperand::MO_Register:
+ O << HexagonInstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ // FIXME: PIC relocation model.
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // Computing the address of a global symbol, not calling it.
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+}
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ if (MBB->hasAddressTaken()) {
+ return false;
+ }
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+ case 'c': // Don't print "$" before a global var name or constant.
+ // Hexagon never has a prefix.
+ printOperand(MI, OpNo, OS);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ OS << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, OS);
+ return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &Base = MI->getOperand(OpNo);
+ const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+ if (Base.isReg())
+ printOperand(MI, OpNo, O);
+ else
+ llvm_unreachable("Unimplemented");
+
+ if (Offset.isImm()) {
+ if (Offset.getImm())
+ O << " + #" << Offset.getImm();
+ }
+ else
+ llvm_unreachable("Unimplemented");
+
+ return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ llvm_unreachable("Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->isBundle()) {
+ std::vector<const MachineInstr*> BundleMIs;
+
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI;
+ ++MII;
+ unsigned int IgnoreCount = 0;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ const MachineInstr *MInst = MII;
+ if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
+ MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ IgnoreCount++;
+ ++MII;
+ continue;
+ }
+ //BundleMIs.push_back(&*MII);
+ BundleMIs.push_back(MInst);
+ ++MII;
+ }
+ unsigned Size = BundleMIs.size();
+ assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
+ for (unsigned Index = 0; Index < Size; Index++) {
+ HexagonMCInst MCI;
+ MCI.setPacketStart(Index == 0);
+ MCI.setPacketEnd(Index == (Size-1));
+
+ HexagonLowerToMC(BundleMIs[Index], MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
+ }
+ else {
+ HexagonMCInst MCI;
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ MCI.setPacketStart(true);
+ MCI.setPacketEnd(true);
+ }
+ HexagonLowerToMC(MI, MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
+
+ return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+// Name != E; ++Name)
+// if (isprint(*Name))
+// OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+ int OpNo, raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << HexagonInstPrinter::getRegisterName(MO1.getReg())
+ << " + #"
+ << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+ "Expecting global address");
+
+ O << *Mang->getSymbol(MO.getGlobal());
+ if (MO.getOffset() != 0) {
+ O << " + ";
+ O << MO.getOffset();
+ }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+ "Expecting jump table index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetJTISymbol(MO.getIndex());
+}
+
+void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) &&
+ "Expecting constant pool index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetCPISymbol(MO.getIndex());
+}
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return(new HexagonInstPrinter(MAI, MII, MRI));
+ else
+ return NULL;
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+ RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
new file mode 100644
index 000000000000..bc2af636124c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -0,0 +1,165 @@
+//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hexagon Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class HexagonAsmPrinter : public AsmPrinter {
+ const HexagonSubtarget *Subtarget;
+
+ public:
+ explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Assembly Printer";
+ }
+
+ bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+ virtual void EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV = 0) const;
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+
+ // void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero,
+ raw_ostream &O) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+ O << getRegisterName(RegNo);
+ }
+
+ void printImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << value;
+ }
+
+ void printNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << -value;
+ }
+
+ void printMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << (int) MO2.getImm();
+ }
+
+ void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << ", #"
+ << MO2.getImm();
+ }
+
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printImmOperand(MI, OpNo, O);
+ }
+ else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printImmOperand(MI, OpNo, O);
+ }
+ else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O);
+
+#if 0
+ void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O);
+#endif
+
+ void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+ raw_ostream &O);
+
+ void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+ static const char *getRegisterName(unsigned RegNo);
+
+#if 0
+ void EmitStartOfAsmFile(Module &M);
+#endif
+ };
+
+} // end of llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 000000000000..d4078ad28b60
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,236 @@
+//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+ static char ID;
+ HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+ QTM(TM),
+ QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon CFG Optimizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+ return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
+ || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+ return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) {
+ const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ int NewOpcode = 0;
+ switch(MI->getOpcode()) {
+ case Hexagon::JMP_c:
+ NewOpcode = Hexagon::JMP_cNot;
+ break;
+
+ case Hexagon::JMP_cNot:
+ NewOpcode = Hexagon::JMP_c;
+ break;
+
+ case Hexagon::JMP_cdnPt:
+ NewOpcode = Hexagon::JMP_cdnNotPt;
+ break;
+
+ case Hexagon::JMP_cdnNotPt:
+ NewOpcode = Hexagon::JMP_cdnPt;
+ break;
+
+ default:
+ llvm_unreachable("Cannot handle this case");
+ }
+
+ MI->setDesc(QII->get(NewOpcode));
+ MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ // Traverse the basic block.
+ MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+ if (MII != MBB->end()) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (IsConditionalBranch(Opc)) {
+
+ //
+ // (Case 1) Transform the code if the following condition occurs:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...next block in layout is BB3...
+ // BB3: ...
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ //
+ // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...other basic blocks ...
+ // BB4:
+ // ...not a fall-thru
+ // BB3: ...
+ // jump BB4
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ // BB4: ...
+ //
+ unsigned NumSuccs = MBB->succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ MachineBasicBlock* FirstSucc = *SI;
+ MachineBasicBlock* SecondSucc = *(++SI);
+ MachineBasicBlock* LayoutSucc = NULL;
+ MachineBasicBlock* JumpAroundTarget = NULL;
+
+ if (MBB->isLayoutSuccessor(FirstSucc)) {
+ LayoutSucc = FirstSucc;
+ JumpAroundTarget = SecondSucc;
+ } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ LayoutSucc = SecondSucc;
+ JumpAroundTarget = FirstSucc;
+ } else {
+ // Odd case...cannot handle.
+ }
+
+ // The target of the unconditional branch must be JumpAroundTarget.
+ // TODO: If not, we should not invert the unconditional branch.
+ MachineBasicBlock* CondBranchTarget = NULL;
+ if ((MI->getOpcode() == Hexagon::JMP_c) ||
+ (MI->getOpcode() == Hexagon::JMP_cNot)) {
+ CondBranchTarget = MI->getOperand(1).getMBB();
+ }
+
+ if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+ continue;
+ }
+
+ if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+ // Ensure that BB2 has one instruction -- an unconditional jump.
+ if ((LayoutSucc->size() == 1) &&
+ IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+ MachineBasicBlock* UncondTarget =
+ LayoutSucc->front().getOperand(0).getMBB();
+ // Check if the layout successor of BB2 is BB3.
+ bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+ bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+ JumpAroundTarget->size() >= 1 &&
+ IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+ JumpAroundTarget->pred_size() == 1 &&
+ JumpAroundTarget->succ_size() == 1;
+
+ if (case1 || case2) {
+ InvertAndChangeJumpTarget(MI, UncondTarget);
+ MBB->removeSuccessor(JumpAroundTarget);
+ MBB->addSuccessor(UncondTarget);
+
+ // Remove the unconditional branch in LayoutSucc.
+ LayoutSucc->erase(LayoutSucc->begin());
+ LayoutSucc->removeSuccessor(UncondTarget);
+ LayoutSucc->addSuccessor(JumpAroundTarget);
+
+ // This code performs the conversion for case 2, which moves
+ // the block to the fall-thru case (BB3 in the code above).
+ if (case2 && !case1) {
+ JumpAroundTarget->moveAfter(LayoutSucc);
+ // only move a block if it doesn't have a fall-thru. otherwise
+ // the CFG will be incorrect.
+ if (!UncondTarget->canFallThrough()) {
+ UncondTarget->moveAfter(JumpAroundTarget);
+ }
+ }
+
+ //
+ // Correct live-in information. Is used by post-RA scheduler
+ // The live-in to LayoutSucc is now all values live-in to
+ // JumpAroundTarget.
+ //
+ std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+ LayoutSucc->livein_end());
+ std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+ LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+ }
+ for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+ LayoutSucc->addLiveIn(NewLiveIn[i]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+ return new HexagonCFGOptimizer(TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 000000000000..e61b2a7a58ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+ CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 000000000000..2c93d04f98e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,204 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+ const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &c)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+ LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+ unsigned NumArgs = Ins.size();
+ unsigned i = 0;
+
+ // If the function returns a small struct in registers, skip
+ // over the first (dummy) argument.
+ if (SretValueInRegs != 0) {
+ ++i;
+ }
+
+
+ for (; i != NumArgs; ++i) {
+ EVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ // For Hexagon, Return small structures in registers.
+ if (SretValueInRegs != 0) {
+ if (SretValueInRegs <= 32) {
+ unsigned Reg = Hexagon::R0;
+ addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+ CCValAssign::Full));
+ return;
+ }
+ if (SretValueInRegs <= 64) {
+ unsigned Reg = Hexagon::D0;
+ addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+ CCValAssign::Full));
+ return;
+ }
+ }
+
+
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ Hexagon_CCAssignFn Fn,
+ int NonVarArgsParams,
+ unsigned SretValueSize) {
+ unsigned NumOps = Outs.size();
+
+ unsigned i = 0;
+ // If the called function returns a small struct in registers, skip
+ // the first actual parameter. We do not want to pass a pointer to
+ // the stack location.
+ if (SretValueSize != 0) {
+ ++i;
+ }
+
+ for (; i != NumOps; ++i) {
+ EVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+ NonVarArgsParams, i+1, false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+ false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+ false)) {
+ dbgs() << "Call result has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 000000000000..489b3a3e5985
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+ class TargetRegisterInfo;
+ class TargetMachine;
+ class Hexagon_CCState;
+ class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values. It captures which registers are already assigned and which
+/// stack slots are used. It provides accessors to allocate these values.
+class Hexagon_CCState {
+ CallingConv::ID CallingConv;
+ bool IsVarArg;
+ const TargetMachine &TM;
+ const TargetRegisterInfo &TRI;
+ SmallVector<CCValAssign, 16> &Locs;
+ LLVMContext &Context;
+
+ unsigned StackOffset;
+ SmallVector<uint32_t, 16> UsedRegs;
+public:
+ Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+ SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+ void addLoc(const CCValAssign &V) {
+ Locs.push_back(V);
+ }
+
+ LLVMContext &getContext() const { return Context; }
+ const TargetMachine &getTarget() const { return TM; }
+ unsigned getCallingConv() const { return CallingConv; }
+ bool isVarArg() const { return IsVarArg; }
+
+ unsigned getNextStackOffset() const { return StackOffset; }
+
+ /// isAllocated - Return true if the specified register (or an alias) is
+ /// allocated.
+ bool isAllocated(unsigned Reg) const {
+ return UsedRegs[Reg/32] & (1 << (Reg&31));
+ }
+
+ /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+ /// incorporating info about the formals into this state.
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+ /// incorporating info about the result values into this state.
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+ /// about the passed values into this state.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+ unsigned SretValueSize);
+
+ /// AnalyzeCallOperands - Same as above except it takes vectors of types
+ /// and argument flags.
+ void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn);
+
+ /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+ /// incorporating info about the passed values into this state.
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallResult - Same as above except it's specialized for calls which
+ /// produce a single value.
+ void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+ /// getFirstUnallocated - Return the first unallocated register in the set, or
+ /// NumRegs if they are all allocated.
+ unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (!isAllocated(Regs[i]))
+ return i;
+ return NumRegs;
+ }
+
+ /// AllocateReg - Attempt to allocate one register. If it is not available,
+ /// return zero. Otherwise, return the register, marking it and any aliases
+ /// as allocated.
+ unsigned AllocateReg(unsigned Reg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with extra register to be shadowed.
+ unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateReg - Attempt to allocate one of the specified registers. If none
+ /// are available, return zero. Otherwise, return the first one available,
+ /// marking it and any aliases as allocated.
+ unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc];
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with list of registers to be shadowed.
+ unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+ unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateStack - Allocate a chunk of stack space with the specified size
+ /// and alignment.
+ unsigned AllocateStack(unsigned Size, unsigned Align) {
+ assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+ StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+ unsigned Result = StackOffset;
+ StackOffset += Size;
+ return Result;
+ }
+
+ // HandleByVal - Allocate a stack slot large enough to pass an argument by
+ // value. The size and alignment information of the argument is encoded in its
+ // parameter attribute.
+ void HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+ /// MarkAllocated - Mark a register and all of its aliases as allocated.
+ void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 000000000000..08144217fd30
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,180 @@
+//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
+// scavenger mechanism available in the allocator. There are two registers
+// reserved to allow spilling/restoring predicate registers. One is used to
+// hold the predicate value. The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Expand Predicate Spill Code";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ // STriw_pred [R30], ofst, SrcReg;
+ unsigned FP = MI->getOperand(0).getReg();
+ assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(1).isImm() && "Not an offset");
+ int Offset = MI->getOperand(1).getImm();
+ int SrcReg = MI->getOperand(2).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+ if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw_indexed))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw_indexed))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0)
+ .addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw_indexed)).
+ addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred [R30], ofst.
+ int DstReg = MI->getOperand(0).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+ "Not a predicate register");
+ unsigned FP = MI->getOperand(1).getReg();
+ assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(2).isImm() && "Not an offset");
+ int Offset = MI->getOperand(2).getImm();
+ if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP)
+ .addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+ return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 000000000000..240cc9566648
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,183 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the loop needs to be set up manually, i.e. via
+// direct transfers to SAn and LCn.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+ initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// \brief Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// \brief Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// \brief Add the instruction to set the LC and SA registers explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+ "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ unsigned Dist = Sub > 0 ? Sub : -Sub;
+ if (Dist > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+}
+
+
+/// \brief convert a loop instruction to a sequence of instructions that
+/// set the LC0 and SA0 register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0)
+ .addReg(Scratch);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 000000000000..d6a9329cd407
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,348 @@
+//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonFrameLowering.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+ "disable-hexagon-dealloc-ret",
+ cl::Hidden,
+ cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ determineFrameLayout(MF);
+
+ // Check if frame moves are needed for EH.
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !MF.getFunction()->needsUnwindTableEntry();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ int NumBytes = (int) MFI->getStackSize();
+
+ // LLVM expects allocframe not to be the first instruction in the
+ // basic block.
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+ //
+ // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset.
+ //
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ const std::vector<MachineInstr*>& AdjustRegs =
+ FuncInfo->getAllocaAdjustInsts();
+ for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+ e = AdjustRegs.end();
+ i != e; ++i) {
+ MachineInstr* MI = *i;
+ assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+ "Expected adjust alloca node");
+
+ MachineOperand& MO = MI->getOperand(2);
+ assert(MO.isImm() && "Expected immediate");
+ MO.setImm(MFI->getMaxCallFrameSize());
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+ // Advance CFA. DW_CFA_def_cfa
+ unsigned FPReg = QRI->getFrameRegister();
+ unsigned RAReg = QRI->getRARegister();
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, -8);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // R31 = (R31 - #4)
+ MachineLocation LRDst(RAReg, -4);
+ MachineLocation LRSrc(RAReg);
+ Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+ // R30 = (R30 - #8)
+ MachineLocation SPDst(FPReg, -8);
+ MachineLocation SPSrc(FPReg);
+ Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+ //
+ // Only insert ALLOCFRAME if we need to.
+ //
+ if (hasFP(MF)) {
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const int ALLOCFRAME_MAX = 16384;
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+ // Subtract offset from frame pointer.
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+ QRI->getStackRegister()).
+ addReg(QRI->getStackRegister()).
+ addReg(HEXAGON_RESERVED_REG_1);
+ } else {
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ //
+ // Only insert deallocframe if we need to.
+ //
+ if (hasFP(MF)) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineBasicBlock::iterator MBBI_end = MBB.end();
+ //
+ // For Hexagon, we don't need the frame size.
+ //
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+ // versions.
+ if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+ && !DisableDeallocRet) {
+ // Remove jumpr node.
+ MBB.erase(MBBI);
+ // Add dealloc_return.
+ BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+ .addImm(NumBytes);
+ } else { // Add deallocframe for V2 and V3.
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+ FuncInfo->hasClobberLR() );
+}
+
+static inline
+unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) {
+ MCSuperRegIterator SRI(Reg, TRI);
+ assert(SRI.isValid() && "Expected a superreg");
+ unsigned SuperReg = *SRI;
+ ++SRI;
+ assert(!SRI.isValid() && "Expected exactly one superreg");
+ return SuperReg;
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word store.
+ //
+ unsigned SuperReg = uniqueSuperReg(Reg, TRI);
+ bool CanUseDblStore = false;
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
+ CanUseDblStore = (SuperRegNext == SuperReg);
+ }
+
+
+ if (CanUseDblStore) {
+ TII.storeRegToStackSlot(MBB, MI, SuperReg, true,
+ CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg);
+ ++i;
+ } else {
+ // Cannot use a double-word store.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+ TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word load.
+ //
+ unsigned SuperReg = uniqueSuperReg(Reg, TRI);
+ const TargetRegisterClass* SuperRegClass = 0;
+ bool CanUseDblLoad = false;
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
+ CanUseDblLoad = (SuperRegNext == SuperReg);
+ }
+
+
+ if (CanUseDblLoad) {
+ TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(),
+ SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg);
+ ++i;
+ } else {
+ // Cannot use a double-word load.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+void HexagonFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ llvm_unreachable("Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 000000000000..a62c76aaf676
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,55 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+ const HexagonSubtarget &STI;
+ void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+ explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 000000000000..178662447a7f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,1554 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction. The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested hardware loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+#endif
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace llvm {
+ void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
+namespace {
+ class CountValue;
+ struct HexagonHardwareLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ const HexagonTargetMachine *TM;
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+#ifndef NDEBUG
+ static int Counter;
+#endif
+
+ public:
+ static char ID;
+
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {
+ initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// Kinds of comparisons in the compare instructions.
+ struct Comparison {
+ enum Kind {
+ EQ = 0x01,
+ NE = 0x02,
+ L = 0x04, // Less-than property.
+ G = 0x08, // Greater-than property.
+ U = 0x40, // Unsigned property.
+ LTs = L,
+ LEs = L | EQ,
+ GTs = G,
+ GEs = G | EQ,
+ LTu = L | U,
+ LEu = L | EQ | U,
+ GTu = G | U,
+ GEu = G | EQ | U
+ };
+
+ static Kind getSwappedComparison(Kind Cmp) {
+ assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)(Cmp ^ (L|G));
+ return Cmp;
+ }
+ };
+
+ /// \brief Find the register that contains the loop controlling
+ /// induction variable.
+ /// If successful, it will return true and set the \p Reg, \p IVBump
+ /// and \p IVOp arguments. Otherwise it will return false.
+ /// The returned induction register is the register R that follows the
+ /// following induction pattern:
+ /// loop:
+ /// R = phi ..., [ R.next, LatchBlock ]
+ /// R.next = R + #bump
+ /// if (R.next < #N) goto loop
+ /// IVBump is the immediate value added to R, and IVOp is the instruction
+ /// "R.next = R + #bump".
+ bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+ int64_t &IVBump, MachineInstr *&IVOp) const;
+
+ /// \brief Analyze the statements in a loop to determine if the loop
+ /// has a computable trip count and, if so, return a value that represents
+ /// the trip count expression.
+ CountValue *getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts);
+
+ /// \brief Return the expression that represents the number of times
+ /// a loop iterates. The function takes the operands that represent the
+ /// loop start value, loop end value, and induction value. Based upon
+ /// these operands, the function attempts to compute the trip count.
+ /// If the trip count is not directly available (as an immediate value,
+ /// or a register), the function will attempt to insert computation of it
+ /// to the loop's preheader.
+ CountValue *computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const;
+
+ /// \brief Return true if the instruction is not valid within a hardware
+ /// loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// \brief Return true if the loop contains an instruction that inhibits
+ /// using the hardware loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// \brief Given a loop, check if we can convert it to a hardware loop.
+ /// If so, then perform the conversion and return true.
+ bool convertToHardwareLoop(MachineLoop *L);
+
+ /// \brief Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const;
+
+ /// \brief Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+
+ /// \brief Make sure that the "bump" instruction executes before the
+ /// compare. We need that for the IV fixup, so that the compare
+ /// instruction would not use a bumped value that has not yet been
+ /// defined. If the instructions are out of order, try to reorder them.
+ bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+ /// \brief Get the instruction that loads an immediate value into \p R,
+ /// or 0 if such an instruction does not exist.
+ MachineInstr *defWithImmediate(unsigned R);
+
+ /// \brief Get the immediate value referenced to by \p MO, either for
+ /// immediate operands, or for register operands, where the register
+ /// was defined with an immediate value.
+ int64_t getImmediate(MachineOperand &MO);
+
+ /// \brief Reset the given machine operand to now refer to a new immediate
+ /// value. Assumes that the operand was already referencing an immediate
+ /// value, either directly, or via a register.
+ void setImmediate(MachineOperand &MO, int64_t Val);
+
+ /// \brief Fix the data flow of the induction varible.
+ /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+ /// |
+ /// +-> back to phi
+ /// where "bump" is the increment of the induction variable:
+ /// iv = iv + #const.
+ /// Due to some prior code transformations, the actual flow may look
+ /// like this:
+ /// phi -+-> bump ---> back to phi
+ /// |
+ /// +-> comparison-in-latch (against upper_bound-bump),
+ /// i.e. the comparison that controls the loop execution may be using
+ /// the value of the induction variable from before the increment.
+ ///
+ /// Return true if the loop's flow is the desired one (i.e. it's
+ /// either been fixed, or no fixing was necessary).
+ /// Otherwise, return false. This can happen if the induction variable
+ /// couldn't be identified, or if the value in the latch's comparison
+ /// cannot be adjusted to reflect the post-bump value.
+ bool fixupInductionVariable(MachineLoop *L);
+
+ /// \brief Given a loop, if it does not have a preheader, create one.
+ /// Return the block that is the preheader.
+ MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
+ };
+
+ char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+ int HexagonHardwareLoops::Counter = 0;
+#endif
+
+ /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+ /// of the MachineOperand class without the concerns of changing the
+ /// operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ struct {
+ unsigned Reg;
+ unsigned Sub;
+ } R;
+ unsigned ImmVal;
+ } Contents;
+
+ public:
+ explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+ Kind = t;
+ if (Kind == CV_Register) {
+ Contents.R.Reg = v;
+ Contents.R.Sub = u;
+ } else {
+ Contents.ImmVal = v;
+ }
+ }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Reg;
+ }
+ unsigned getSubReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Sub;
+ }
+ unsigned getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ return Contents.ImmVal;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+ if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+ if (isImm()) { OS << Contents.ImmVal; }
+ }
+ };
+} // end anonymous namespace
+
+
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+FunctionPass *llvm::createHexagonHardwareLoops() {
+ return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+ bool Changed = false;
+
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
+ TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
+ TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop())
+ Changed |= convertToHardwareLoop(L);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+ unsigned &Reg,
+ int64_t &IVBump,
+ MachineInstr *&IVOp
+ ) const {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // This pair represents an induction register together with an immediate
+ // value that will be added to it in each loop iteration.
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+
+ // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
+ // from an induction operation
+ // R.next = R + bump
+ // where bump is an immediate value.
+ typedef std::map<unsigned,RegisterBump> InductionMap;
+
+ InductionMap IndMap;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction. Get the operand that corresponds to the
+ // latch block, and see if is a result of an addition of form "reg+imm",
+ // where the "reg" is defined by the PHI node we are looking at.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiOpReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add is the PHI we're
+ // looking at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ unsigned CSz = Cond.size();
+ assert (CSz == 1 || CSz == 2);
+ unsigned PredR = Cond[CSz-1].getReg();
+
+ MachineInstr *PredI = MRI->getVRegDef(PredR);
+ if (!PredI->isCompare())
+ return false;
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpImm = 0, CmpMask = 0;
+ bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+ CmpMask, CmpImm);
+ // Fail if the compare was not analyzed, or it's not comparing a register
+ // with an immediate value. Not checking the mask here, since we handle
+ // the individual compare opcodes (including CMPb) later on.
+ if (!CmpAnalyzed)
+ return false;
+
+ // Exactly one of the input registers to the comparison should be among
+ // the induction registers.
+ InductionMap::iterator IndMapEnd = IndMap.end();
+ InductionMap::iterator F = IndMapEnd;
+ if (CmpReg1 != 0) {
+ InductionMap::iterator F1 = IndMap.find(CmpReg1);
+ if (F1 != IndMapEnd)
+ F = F1;
+ }
+ if (CmpReg2 != 0) {
+ InductionMap::iterator F2 = IndMap.find(CmpReg2);
+ if (F2 != IndMapEnd) {
+ if (F != IndMapEnd)
+ return false;
+ F = F2;
+ }
+ }
+ if (F == IndMapEnd)
+ return false;
+
+ Reg = F->second.first;
+ IVBump = F->second.second;
+ IVOp = MRI->getVRegDef(F->first);
+ return true;
+}
+
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
+///
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts) {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) // dead loop?
+ return 0;
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) // multiple backedges?
+ return 0;
+
+ // Make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return 0;
+
+ // Look for the cmp instruction to determine if we can get a useful trip
+ // count. The trip count can be either a register or an immediate. The
+ // location of the value depends upon the type (reg or imm).
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return 0;
+
+ unsigned IVReg = 0;
+ int64_t IVBump = 0;
+ MachineInstr *IVOp;
+ bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+ if (!FoundIV)
+ return 0;
+
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+ MachineOperand *InitialValue = 0;
+ MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+ for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+ MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+ if (MBB == Preheader)
+ InitialValue = &IV_Phi->getOperand(i);
+ else if (MBB == Latch)
+ IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
+ }
+ if (!InitialValue)
+ return 0;
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return 0;
+
+ MachineBasicBlock *Header = L->getHeader();
+ // TB must be non-null. If FB is also non-null, one of them must be
+ // the header. Otherwise, branch to TB could be exiting the loop, and
+ // the fall through can go to the header.
+ assert (TB && "Latch block without a branch?");
+ assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+ if (!TB || (FB && TB != Header && FB != Header))
+ return 0;
+
+ // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+ // to put imm(0), followed by P in the vector Cond.
+ // If TB is not the header, it means that the "not-taken" path must lead
+ // to the header.
+ bool Negated = (Cond.size() > 1) ^ (TB != Header);
+ unsigned PredReg = Cond[Cond.size()-1].getReg();
+ MachineInstr *CondI = MRI->getVRegDef(PredReg);
+ unsigned CondOpc = CondI->getOpcode();
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int Mask = 0, ImmValue = 0;
+ bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+ Mask, ImmValue);
+ if (!AnalyzedCmp)
+ return 0;
+
+ // The comparison operator type determines how we compute the loop
+ // trip count.
+ OldInsts.push_back(CondI);
+ OldInsts.push_back(IVOp);
+
+ // Sadly, the following code gets information based on the position
+ // of the operands in the compare instruction. This has to be done
+ // this way, because the comparisons check for a specific relationship
+ // between the operands (e.g. is-less-than), rather than to find out
+ // what relationship the operands are in (as on PPC).
+ Comparison::Kind Cmp;
+ bool isSwapped = false;
+ const MachineOperand &Op1 = CondI->getOperand(1);
+ const MachineOperand &Op2 = CondI->getOperand(2);
+ const MachineOperand *EndValue = 0;
+
+ if (Op1.isReg()) {
+ if (Op2.isImm() || Op1.getReg() == IVReg)
+ EndValue = &Op2;
+ else {
+ EndValue = &Op1;
+ isSwapped = true;
+ }
+ }
+
+ if (!EndValue)
+ return 0;
+
+ switch (CondOpc) {
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
+ case Hexagon::CMPLTrr:
+ Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
+ break;
+ case Hexagon::CMPLTUrr:
+ Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
+ break;
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
+ break;
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
+ break;
+ // Very limited support for byte/halfword compares.
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPhEQri_V4: {
+ if (IVBump != 1)
+ return 0;
+
+ int64_t InitV, EndV;
+ // Since the comparisons are "ri", the EndValue should be an
+ // immediate. Check it just in case.
+ assert(EndValue->isImm() && "Unrecognized latch comparison");
+ EndV = EndValue->getImm();
+ // Allow InitialValue to be a register defined with an immediate.
+ if (InitialValue->isReg()) {
+ if (!defWithImmediate(InitialValue->getReg()))
+ return 0;
+ InitV = getImmediate(*InitialValue);
+ } else {
+ assert(InitialValue->isImm());
+ InitV = InitialValue->getImm();
+ }
+ if (InitV >= EndV)
+ return 0;
+ if (CondOpc == Hexagon::CMPbEQri_V4) {
+ if (!isInt<8>(InitV) || !isInt<8>(EndV))
+ return 0;
+ } else { // Hexagon::CMPhEQri_V4
+ if (!isInt<16>(InitV) || !isInt<16>(EndV))
+ return 0;
+ }
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
+ }
+ default:
+ return 0;
+ }
+
+ if (isSwapped)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ if (InitialValue->isReg()) {
+ unsigned R = InitialValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ OldInsts.push_back(MRI->getVRegDef(R));
+ }
+ if (EndValue->isReg()) {
+ unsigned R = EndValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ }
+
+ return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
+}
+
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates. The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const {
+ // Cannot handle comparison EQ, i.e. while (A == B).
+ if (Cmp == Comparison::EQ)
+ return 0;
+
+ // Check if either the start or end values are an assignment of an immediate.
+ // If so, use the immediate value rather than the register.
+ if (Start->isReg()) {
+ const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+ if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
+ Start = &StartValInstr->getOperand(1);
+ }
+ if (End->isReg()) {
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
+ End = &EndValInstr->getOperand(1);
+ }
+
+ assert (Start->isReg() || Start->isImm());
+ assert (End->isReg() || End->isImm());
+
+ bool CmpLess = Cmp & Comparison::L;
+ bool CmpGreater = Cmp & Comparison::G;
+ bool CmpHasEqual = Cmp & Comparison::EQ;
+
+ // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
+ // If loop executes while iv is "less" with the iv value going down, then
+ // the iv must wrap.
+ if (CmpLess && IVBump < 0)
+ return 0;
+ // If loop executes while iv is "greater" with the iv value going up, then
+ // the iv must wrap.
+ if (CmpGreater && IVBump > 0)
+ return 0;
+
+ if (Start->isImm() && End->isImm()) {
+ // Both, start and end are immediates.
+ int64_t StartV = Start->getImm();
+ int64_t EndV = End->getImm();
+ int64_t Dist = EndV - StartV;
+ if (Dist == 0)
+ return 0;
+
+ bool Exact = (Dist % IVBump) == 0;
+
+ if (Cmp == Comparison::NE) {
+ if (!Exact)
+ return 0;
+ if ((Dist < 0) ^ (IVBump < 0))
+ return 0;
+ }
+
+ // For comparisons that include the final value (i.e. include equality
+ // with the final value), we need to increase the distance by 1.
+ if (CmpHasEqual)
+ Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+ // assert (CmpLess => Dist > 0);
+ assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
+ // assert (CmpGreater => Dist < 0);
+ assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+
+ // "Normalized" distance, i.e. with the bump set to +-1.
+ int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
+ : (-Dist + (-IVBump-1)) / (-IVBump);
+ assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
+
+ uint64_t Count = Dist1;
+
+ if (Count > 0xFFFFFFFFULL)
+ return 0;
+
+ return new CountValue(CountValue::CV_Immediate, Count);
+ }
+
+ // A general case: Start and End are some values, but the actual
+ // iteration count may not be available. If it is not, insert
+ // a computation of it into the preheader.
+
+ // If the induction variable bump is not a power of 2, quit.
+ // Othwerise we'd need a general integer division.
+ if (!isPowerOf2_64(abs64(IVBump)))
+ return 0;
+
+ MachineBasicBlock *PH = Loop->getLoopPreheader();
+ assert (PH && "Should have a preheader by now");
+ MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+ DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
+ : DebugLoc();
+
+ // If Start is an immediate and End is a register, the trip count
+ // will be "reg - imm". Hexagon's "subtract immediate" instruction
+ // is actually "reg + -imm".
+
+ // If the loop IV is going downwards, i.e. if the bump is negative,
+ // then the iteration count (computed as End-Start) will need to be
+ // negated. To avoid the negation, just swap Start and End.
+ if (IVBump < 0) {
+ std::swap(Start, End);
+ IVBump = -IVBump;
+ }
+ // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
+ // Signedness, and "including equality" are preserved.
+
+ bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+ bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+ int64_t StartV = 0, EndV = 0;
+ if (Start->isImm())
+ StartV = Start->getImm();
+ if (End->isImm())
+ EndV = End->getImm();
+
+ int64_t AdjV = 0;
+ // To compute the iteration count, we would need this computation:
+ // Count = (End - Start + (IVBump-1)) / IVBump
+ // or, when CmpHasEqual:
+ // Count = (End - Start + (IVBump-1)+1) / IVBump
+ // The "IVBump-1" part is the adjustment (AdjV). We can avoid
+ // generating an instruction specifically to add it if we can adjust
+ // the immediate values for Start or End.
+
+ if (CmpHasEqual) {
+ // Need to add 1 to the total iteration count.
+ if (Start->isImm())
+ StartV--;
+ else if (End->isImm())
+ EndV++;
+ else
+ AdjV += 1;
+ }
+
+ if (Cmp != Comparison::NE) {
+ if (Start->isImm())
+ StartV -= (IVBump-1);
+ else if (End->isImm())
+ EndV += (IVBump-1);
+ else
+ AdjV += (IVBump-1);
+ }
+
+ unsigned R = 0, SR = 0;
+ if (Start->isReg()) {
+ R = Start->getReg();
+ SR = Start->getSubReg();
+ } else {
+ R = End->getReg();
+ SR = End->getSubReg();
+ }
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ // Hardware loops cannot handle 64-bit registers. If it's a double
+ // register, it has to have a subregister.
+ if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+ return 0;
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+ // Compute DistR (register with the distance between Start and End).
+ unsigned DistR, DistSR;
+
+ // Avoid special case, where the start value is an imm(0).
+ if (Start->isImm() && StartV == 0) {
+ DistR = End->getReg();
+ DistSR = End->getSubReg();
+ } else {
+ const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
+ (RegToImm ? TII->get(Hexagon::SUB_ri) :
+ TII->get(Hexagon::ADD_ri));
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+ if (RegToReg) {
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else if (RegToImm) {
+ SubIB.addImm(EndV)
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else { // ImmToReg
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addImm(-StartV);
+ }
+ DistR = SubR;
+ DistSR = 0;
+ }
+
+ // From DistR, compute AdjR (register with the adjusted distance).
+ unsigned AdjR, AdjSR;
+
+ if (AdjV == 0) {
+ AdjR = DistR;
+ AdjSR = DistSR;
+ } else {
+ // Generate CountR = ADD DistR, AdjVal
+ unsigned AddR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+ BuildMI(*PH, InsertPos, DL, AddD, AddR)
+ .addReg(DistR, 0, DistSR)
+ .addImm(AdjV);
+
+ AdjR = AddR;
+ AdjSR = 0;
+ }
+
+ // From AdjR, compute CountR (register with the final count).
+ unsigned CountR, CountSR;
+
+ if (IVBump == 1) {
+ CountR = AdjR;
+ CountSR = AdjSR;
+ } else {
+ // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+ unsigned Shift = Log2_32(IVBump);
+
+ // Generate NormR = LSR DistR, Shift.
+ unsigned LsrR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
+ BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+ .addReg(AdjR, 0, AdjSR)
+ .addImm(Shift);
+
+ CountR = LsrR;
+ CountSR = 0;
+ }
+
+ return new CountValue(CountValue::CV_Register, CountR, CountSR);
+}
+
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(
+ const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a hardware loop
+ if (MI->getDesc().isCall())
+ return true;
+
+ // do not allow nested hardware loops
+ if (isHardwareLoop(MI))
+ return true;
+
+ // check if the instruction defines a hardware loop register
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
+ R == Hexagon::SA0 || R == Hexagon::SA1)
+ return true;
+ }
+ return false;
+}
+
+
+/// \brief - Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop function.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/// \brief Returns true if the instruction is dead. This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (MRI->use_nodbg_empty(Reg))
+ continue;
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+ use_nodbg_iterator End = MRI->use_nodbg_end();
+ if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
+ return false;
+
+ MachineInstr *OnePhi = I.getOperand().getParent();
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (!OPO.isReg() || !OPO.isDef())
+ continue;
+
+ unsigned OPReg = OPO.getReg();
+ use_nodbg_iterator nextJ;
+ for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+ J != End; J = nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand &Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ // If the phi node has a user that is not MI, bail...
+ if (MI != UseMI)
+ return false;
+ }
+ }
+ DeadPhis.push_back(OnePhi);
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim.
+
+ SmallVector<MachineInstr*, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I != E; I = nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI == MI)
+ continue;
+ if (Use.isDebug())
+ UseMI->getOperand(0).setReg(0U);
+ // This may also be a "instr -> phi -> instr" case which can
+ // be removed too.
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i)
+ DeadPhis[i]->eraseFromParent();
+ }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop. If so, then perform the transformation.
+///
+/// This function works on innermost loops first. A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
+///
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ // This is just for sanity.
+ assert(L->getHeader() && "Loop without a header?");
+
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= convertToHardwareLoop(*I);
+
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed)
+ return Changed;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = HWLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= HWLoopLimit)
+ return false;
+ Counter++;
+ }
+#endif
+
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L))
+ return false;
+
+ // Is the induction variable bump feeding the latch condition?
+ if (!fixupInductionVariable(L))
+ return false;
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate hw loop if the loop has more than one exit.
+ if (LastMBB == 0)
+ return false;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI == LastMBB->end())
+ return false;
+
+ // Ensure the loop has a preheader: the loop instruction will be
+ // placed there.
+ bool NewPreheader = false;
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = createPreheaderForLoop(L);
+ if (!Preheader)
+ return false;
+ NewPreheader = true;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ SmallVector<MachineInstr*, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getLoopTripCount(L, OldInsts);
+ if (TripCount == 0)
+ return false;
+
+ // Is the trip count available in the preheader?
+ if (TripCount->isReg()) {
+ // There will be a use of the register inserted into the preheader,
+ // so make sure that the register is actually defined at that point.
+ MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+ MachineBasicBlock *BBDef = TCDef->getParent();
+ if (!NewPreheader) {
+ if (!MDT->dominates(BBDef, Preheader))
+ return false;
+ } else {
+ // If we have just created a preheader, the dominator tree won't be
+ // aware of it. Check if the definition of the register dominates
+ // the header, but is not the header itself.
+ if (!MDT->properlyDominates(BBDef, L->getHeader()))
+ return false;
+ }
+ }
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the
+ // loop latch may contains instrs. that need to be executed after the
+ // first iteration.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart))
+ return false;
+ }
+
+ // Convert the loop to a hardware loop.
+ DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+ DebugLoc DL;
+ if (InsertPos != Preheader->end())
+ DL = InsertPos->getDebugLoc();
+
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+ .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
+ // Add the Loop instruction to the beginning of the loop.
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart)
+ .addReg(CountReg);
+ } else {
+ assert(TripCount->isImm() && "Expecting immediate value for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop,
+ // if the immediate fits in the instructions. Otherwise, we need to
+ // create a new virtual register.
+ int64_t CountImm = TripCount->getImm();
+ if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
+ .addImm(CountImm);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart).addReg(CountReg);
+ } else
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
+ .addMBB(LoopStart).addImm(CountImm);
+ }
+
+ // Make sure the loop start always has a reference in the CFG. We need
+ // to create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with an endloop instruction.
+ DebugLoc LastIDL = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, LastIDL,
+ TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ if (LastI->getOpcode() == Hexagon::JMP_c ||
+ LastI->getOpcode() == Hexagon::JMP_cNot) {
+ // Delete one and change/add an uncond. branch to out of the loop.
+ MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+ LastI = LastMBB->erase(LastI);
+ if (!L->contains(BranchTarget)) {
+ if (LastI != LastMBB->end())
+ LastI = LastMBB->erase(LastI);
+ SmallVector<MachineOperand, 0> Cond;
+ TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
+ }
+ } else {
+ // Conditional branch to loop start; just delete it.
+ LastMBB->erase(LastI);
+ }
+ delete TripCount;
+
+ // The induction operation and the comparison may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
+ ++NumHWLoops;
+ return true;
+}
+
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+ MachineInstr *CmpI) {
+ assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+ MachineBasicBlock *BB = BumpI->getParent();
+ if (CmpI->getParent() != BB)
+ return false;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ // Check if things are in order to begin with.
+ for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+ if (&*I == CmpI)
+ return true;
+
+ // Out of order.
+ unsigned PredR = CmpI->getOperand(0).getReg();
+ bool FoundBump = false;
+ instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
+ for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+ MachineInstr *In = &*I;
+ for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = In->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ if (MO.getReg() == PredR) // Found an intervening use of PredR.
+ return false;
+ }
+ }
+
+ if (In == BumpI) {
+ instr_iterator After = BumpI;
+ instr_iterator From = CmpI;
+ BB->splice(llvm::next(After), BB, From);
+ FoundBump = true;
+ break;
+ }
+ }
+ assert (FoundBump && "Cannot determine instruction order");
+ return FoundBump;
+}
+
+
+MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+ MachineInstr *DI = MRI->getVRegDef(R);
+ unsigned DOpc = DI->getOpcode();
+ switch (DOpc) {
+ case Hexagon::TFRI:
+ case Hexagon::TFRI64:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return DI;
+ }
+ return 0;
+}
+
+
+int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ assert(DI && "Need an immediate operand");
+ // All currently supported "define-with-immediate" instructions have the
+ // actual immediate value in the operand(1).
+ int64_t v = DI->getOperand(1).getImm();
+ return v;
+}
+
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+ if (MO.isImm()) {
+ MO.setImm(Val);
+ return;
+ }
+
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ if (MRI->hasOneNonDBGUse(R)) {
+ // If R has only one use, then just change its defining instruction to
+ // the new immediate value.
+ DI->getOperand(1).setImm(Val);
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ MachineBasicBlock &B = *DI->getParent();
+ DebugLoc DL = DI->getDebugLoc();
+ BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
+ .addImm(Val);
+ MO.setReg(NewR);
+}
+
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // These data structures follow the same concept as the corresponding
+ // ones in findInductionRegister (where some comments are).
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+ typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+ typedef std::set<RegisterInduction> RegisterInductionSet;
+
+ // Register candidates for induction variables, with their associated bumps.
+ RegisterInductionSet IndRegs;
+
+ // Look for induction patterns:
+ // vreg1 = PHI ..., [ latch, vreg2 ]
+ // vreg2 = ADD vreg1, imm
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add/sub is the PHI we are looking
+ // at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ if (IndRegs.empty())
+ return false;
+
+ MachineBasicBlock *TB = 0, *FB = 0;
+ SmallVector<MachineOperand,2> Cond;
+ // AnalyzeBranch returns true if it fails to analyze branch.
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ // Check if the latch branch is unconditional.
+ if (Cond.empty())
+ return false;
+
+ if (TB != Header && FB != Header)
+ // The latch does not go back to the header. Not a latch we know and love.
+ return false;
+
+ // Expecting a predicate register as a condition. It won't be a hardware
+ // predicate register at this point yet, just a vreg.
+ // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+ // into Cond, followed by the predicate register. For non-negated branches
+ // it's just the register.
+ unsigned CSz = Cond.size();
+ if (CSz != 1 && CSz != 2)
+ return false;
+
+ unsigned P = Cond[CSz-1].getReg();
+ MachineInstr *PredDef = MRI->getVRegDef(P);
+
+ if (!PredDef->isCompare())
+ return false;
+
+ SmallSet<unsigned,2> CmpRegs;
+ MachineOperand *CmpImmOp = 0;
+
+ // Go over all operands to the compare and look for immediate and register
+ // operands. Assume that if the compare has a single register use and a
+ // single immediate operand, then the register is being compared with the
+ // immediate value.
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg()) {
+ // Skip all implicit references. In one case there was:
+ // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+ if (MO.isImplicit())
+ continue;
+ if (MO.isUse()) {
+ unsigned R = MO.getReg();
+ if (!defWithImmediate(R)) {
+ CmpRegs.insert(MO.getReg());
+ continue;
+ }
+ // Consider the register to be the "immediate" operand.
+ if (CmpImmOp)
+ return false;
+ CmpImmOp = &MO;
+ }
+ } else if (MO.isImm()) {
+ if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
+ return false;
+ CmpImmOp = &MO;
+ }
+ }
+
+ if (CmpRegs.empty())
+ return false;
+
+ // Check if the compared register follows the order we want. Fix if needed.
+ for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+ I != E; ++I) {
+ // This is a success. If the register used in the comparison is one that
+ // we have identified as a bumped (updated) induction register, there is
+ // nothing to do.
+ if (CmpRegs.count(I->first))
+ return true;
+
+ // Otherwise, if the register being compared comes out of a PHI node,
+ // and has been recognized as following the induction pattern, and is
+ // compared against an immediate, we can fix it.
+ const RegisterBump &RB = I->second;
+ if (CmpRegs.count(RB.first)) {
+ if (!CmpImmOp)
+ return false;
+
+ int64_t CmpImm = getImmediate(*CmpImmOp);
+ int64_t V = RB.second;
+ if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit).
+ return false;
+ if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit).
+ return false;
+ CmpImm += V;
+ // Some forms of cmp-immediate allow u9 and s10. Assume the worst case
+ // scenario, i.e. an 8-bit value.
+ if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
+ return false;
+
+ // Make sure that the compare happens after the bump. Otherwise,
+ // after the fixup, the compare would use a yet-undefined register.
+ MachineInstr *BumpI = MRI->getVRegDef(I->first);
+ bool Order = orderBumpCompare(BumpI, PredDef);
+ if (!Order)
+ return false;
+
+ // Finally, fix the compare instruction.
+ setImmediate(*CmpImmOp, CmpImm);
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ MO.setReg(I->first);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+ MachineLoop *L) {
+ if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+ return TmpPH;
+
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineFunction *MF = Header->getParent();
+ DebugLoc DL;
+
+ if (!Latch || Header->hasAddressTaken())
+ return 0;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+
+ // Verify that all existing predecessors have analyzable branches
+ // (or no branches at all).
+ typedef std::vector<MachineBasicBlock*> MBBVector;
+ MBBVector Preds(Header->pred_begin(), Header->pred_end());
+ SmallVector<MachineOperand,2> Tmp1;
+ MachineBasicBlock *TB = 0, *FB = 0;
+
+ if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+ return 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+ if (NotAnalyzed)
+ return 0;
+ }
+ }
+
+ MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+ MF->insert(Header, NewPH);
+
+ if (Header->pred_size() > 2) {
+ // Ensure that the header has only two predecessors: the preheader and
+ // the loop latch. Any additional predecessors of the header should
+ // join at the newly created preheader. Inspect all PHI nodes from the
+ // header and create appropriate corresponding PHI nodes in the preheader.
+
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+
+ const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+ MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+ NewPH->insert(NewPH->end(), NewPN);
+
+ unsigned PR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(PR);
+ unsigned NewPR = MRI->createVirtualRegister(RC);
+ NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+ // Copy all non-latch operands of a header's PHI node to the newly
+ // created PHI node in the preheader.
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ unsigned PredR = PN->getOperand(i).getReg();
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB == Latch)
+ continue;
+
+ NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+ NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+ }
+
+ // Remove copied operands from the old PHI node and add the value
+ // coming from the preheader's PHI.
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB != Latch) {
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ }
+ PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+ PN->addOperand(MachineOperand::CreateMBB(NewPH));
+ }
+
+ } else {
+ assert(Header->pred_size() == 2);
+
+ // The header has only two predecessors, but the non-latch predecessor
+ // is not a preheader (e.g. it has other successors, etc.)
+ // In such a case we don't need any extra PHI nodes in the new preheader,
+ // all we need is to adjust existing PHIs in the header to now refer to
+ // the new preheader.
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ MachineOperand &MO = PN->getOperand(i+1);
+ if (MO.getMBB() != Latch)
+ MO.setMBB(NewPH);
+ }
+ }
+ }
+
+ // "Reroute" the CFG edges to link in the new preheader.
+ // If any of the predecessors falls through to the header, insert a branch
+ // to the new preheader in that place.
+ SmallVector<MachineOperand,1> Tmp2;
+ SmallVector<MachineOperand,1> EmptyCond;
+
+ TB = FB = 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ Tmp2.clear();
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+ (void)NotAnalyzed; // supress compiler warning
+ assert (!NotAnalyzed && "Should be analyzable!");
+ if (TB != Header && (Tmp2.empty() || FB != Header))
+ TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
+ PB->ReplaceUsesOfBlockWith(Header, NewPH);
+ }
+ }
+
+ // It can happen that the latch block will fall through into the header.
+ // Insert an unconditional branch to the header.
+ TB = FB = 0;
+ bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+ (void)LatchNotAnalyzed; // supress compiler warning
+ assert (!LatchNotAnalyzed && "Should be analyzable!");
+ if (!TB && !FB)
+ TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
+
+ // Finally, the branch from the preheader to the header.
+ TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
+ NewPH->addSuccessor(Header);
+
+ return NewPH;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 000000000000..8fc9ba1ee8cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1663 @@
+//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+ cl::Hidden, cl::init(2),
+ cl::desc("Maximum number of uses of a global address such that we still us a"
+ "constant extended instruction"));
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const HexagonSubtarget &Subtarget;
+
+ // Keep a reference to HexagonTargetMachine.
+ HexagonTargetMachine& TM;
+ const HexagonInstrInfo *TII;
+ DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
+public:
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(targetmachine, OptLevel),
+ Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+ TM(targetmachine),
+ TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+ initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
+ bool hasNumUsesBelowThresGA(SDNode *N) const;
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ inline bool foldGlobalAddress(SDValue &N, SDValue &R);
+ inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
+ bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
+ bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+ virtual const char *getPassName() const {
+ return "Hexagon DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+ bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectSHL(SDNode *N);
+ SDNode *SelectSelect(SDNode *N);
+ SDNode *SelectTruncate(SDNode *N);
+ SDNode *SelectMul(SDNode *N);
+ SDNode *SelectZeroExtend(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectIntrinsicWChain(SDNode *N);
+ SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectConstantFP(SDNode *N);
+ SDNode *SelectAdd(SDNode *N);
+ bool isConstExtProfitable(SDNode *N) const;
+
+// XformMskToBitPosU5Imm - Returns the bit position which
+// the single bit 32 bit mask represents.
+// Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, MVT::i32);
+}
+
+// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// Return true if there is exactly one bit set in V, i.e., if V is one of the
+// following integers: 2^0, 2^1, ..., 2^31.
+bool ImmIsSingleBit(uint32_t v) const {
+ uint32_t c = CountPopulation_64(v);
+ // Only return true if we counted 1 bit.
+ return c == 1;
+}
+
+// XformM5ToU5Imm - Return a target constant with the specified value, of type
+// i32 where the negative literal is transformed into a positive literal for
+// use in -= memops.
+inline SDValue XformM5ToU5Imm(signed Imm) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, MVT::i32);
+}
+
+
+// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+// [1..128], used in cmpb.gtu instructions.
+inline SDValue XformU7ToU7M1Imm(signed Imm) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+}
+
+// Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new HexagonDAGToDAGISel(TM, OptLevel);
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+ &SelectionDAGISel::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+static bool IsS11_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_cmpeq:
+ case Intrinsic::hexagon_C2_cmpgt:
+ case Intrinsic::hexagon_C2_cmpgtu:
+ case Intrinsic::hexagon_C2_cmpgtup:
+ case Intrinsic::hexagon_C2_cmpgtp:
+ case Intrinsic::hexagon_C2_cmpeqp:
+ case Intrinsic::hexagon_C2_bitsset:
+ case Intrinsic::hexagon_C2_bitsclr:
+ case Intrinsic::hexagon_C2_cmpeqi:
+ case Intrinsic::hexagon_C2_cmpgti:
+ case Intrinsic::hexagon_C2_cmpgtui:
+ case Intrinsic::hexagon_C2_cmpgei:
+ case Intrinsic::hexagon_C2_cmpgeui:
+ case Intrinsic::hexagon_C2_cmplt:
+ case Intrinsic::hexagon_C2_cmpltu:
+ case Intrinsic::hexagon_C2_bitsclri:
+ case Intrinsic::hexagon_C2_and:
+ case Intrinsic::hexagon_C2_or:
+ case Intrinsic::hexagon_C2_xor:
+ case Intrinsic::hexagon_C2_andn:
+ case Intrinsic::hexagon_C2_not:
+ case Intrinsic::hexagon_C2_orn:
+ case Intrinsic::hexagon_C2_pxfer_map:
+ case Intrinsic::hexagon_C2_any8:
+ case Intrinsic::hexagon_C2_all8:
+ case Intrinsic::hexagon_A2_vcmpbeq:
+ case Intrinsic::hexagon_A2_vcmpbgtu:
+ case Intrinsic::hexagon_A2_vcmpheq:
+ case Intrinsic::hexagon_A2_vcmphgt:
+ case Intrinsic::hexagon_A2_vcmphgtu:
+ case Intrinsic::hexagon_A2_vcmpweq:
+ case Intrinsic::hexagon_A2_vcmpwgt:
+ case Intrinsic::hexagon_A2_vcmpwgtu:
+ case Intrinsic::hexagon_C2_tfrrp:
+ case Intrinsic::hexagon_S2_tstbit_i:
+ case Intrinsic::hexagon_S2_tstbit_r:
+ return 1;
+ }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_tfrpr:
+ return Hexagon::TFR_RsPd;
+ case Intrinsic::hexagon_C2_and:
+ return Hexagon::AND_pp;
+ case Intrinsic::hexagon_C2_xor:
+ return Hexagon::XOR_pp;
+ case Intrinsic::hexagon_C2_or:
+ return Hexagon::OR_pp;
+ case Intrinsic::hexagon_C2_not:
+ return Hexagon::NOT_p;
+ case Intrinsic::hexagon_C2_any8:
+ return Hexagon::ANY_pp;
+ case Intrinsic::hexagon_C2_all8:
+ return Hexagon::ALL_pp;
+ case Intrinsic::hexagon_C2_vitpack:
+ return Hexagon::VITPACK_pp;
+ case Intrinsic::hexagon_C2_mask:
+ return Hexagon::MASK_p;
+ case Intrinsic::hexagon_C2_mux:
+ return Hexagon::MUX_rr;
+
+ // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxir:
+ return Hexagon::MUX_ri;
+
+ // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxri:
+ return Hexagon::MUX_ir;
+
+ case Intrinsic::hexagon_C2_muxii:
+ return Hexagon::MUX_ii;
+ case Intrinsic::hexagon_C2_vmux:
+ return Hexagon::VMUX_prr64;
+ case Intrinsic::hexagon_S2_valignrb:
+ return Hexagon::VALIGN_rrp;
+ case Intrinsic::hexagon_S2_vsplicerb:
+ return Hexagon::VSPLICE_rrp;
+ }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+ if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i8 && isInt<11>(Offset)) {
+ return true;
+ }
+ return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads. Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDNode* Const32 = LD->getBasePtr().getNode();
+ unsigned Opcode = 0;
+
+ if (Const32->getOpcode() == HexagonISD::CONST32 &&
+ ISD::isNormalLoad(LD)) {
+ SDValue Base = Const32->getOperand(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+ // Figure out base + offset opcode
+ if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+ else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+ else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+ else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+ else llvm_unreachable("unknown memory type");
+
+ // Build indexed load.
+ SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other,
+ SDValue(NewBase,0),
+ TargetConstOff,
+ Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(LD, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+ MVT::i64, SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+ MVT::i32, Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
+ SDValue(Result_4, 0), // New address.
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT LoadedVT = LD->getMemoryVT();
+ unsigned Opcode = 0;
+
+ // Check for zero ext loads.
+ bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+ // Figure out the opcode.
+ if (LoadedVT == MVT::i64) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDrid;
+ else
+ Opcode = Hexagon::LDrid;
+ } else if (LoadedVT == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDriw;
+ else
+ Opcode = Hexagon::LDriw;
+ } else if (LoadedVT == MVT::i16) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+ else
+ Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+ } else if (LoadedVT == MVT::i8) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+ else
+ Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+ } else
+ llvm_unreachable("unknown memory type");
+
+ // For zero ext i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::ZEXTLOAD) {
+ return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+ }
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::SEXTLOAD) {
+ // Handle sign ext i64 loads.
+ return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+ }
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result;
+ } else {
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_1, 0),
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_1;
+ }
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+ SDNode *result;
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+ // Handle indexed loads.
+ if (AM != ISD::UNINDEXED) {
+ result = SelectIndexedLoad(LD, dl);
+ } else {
+ result = SelectBaseOffsetLoad(LD, dl);
+ }
+
+ return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDValue Base = ST->getBasePtr();
+ SDValue Offset = ST->getOffset();
+ SDValue Value = ST->getValue();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT StoredVT = ST->getMemoryVT();
+
+ // Offset value must be within representable range
+ // and must have correct alignment properties.
+ if (TII->isValidAutoIncImm(StoredVT, Val)) {
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the post inc version of opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+ else llvm_unreachable("unknown memory type");
+
+ // Build post increment store.
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(ST, Result);
+ ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+ return Result;
+ }
+
+ // Note: Order of operands matches the def of instruction:
+ // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // and it differs for POST_ST* for instance.
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+ else llvm_unreachable("unknown memory type");
+
+ // Build regular store.
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+ 4);
+ // Build splitted incriment instruction.
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base,
+ TargetConstVal,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+ ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+ DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDNode* Const32 = ST->getBasePtr().getNode();
+ SDValue Value = ST->getValue();
+ unsigned Opcode = 0;
+
+ // Try to lower stores of GlobalAdresses into indexed stores. Custom
+ // lowering for GlobalAddress nodes has already turned it into a
+ // CONST32. Avoid truncating stores for the moment. Post-inc stores
+ // do the same. Don't think there's a reason for it, so will file a
+ // bug to fix.
+ if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+ !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+ SDValue Base = Const32->getOperand(0);
+ if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+ EVT StoredVT = ST->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+
+ // Figure out base + offset opcode
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+ else llvm_unreachable("unknown memory type");
+
+ SDValue Ops[] = {SDValue(NewBase,0),
+ CurDAG->getTargetConstant(Offset,PointerTy),
+ Value, Chain};
+ // build indexed store
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(ST, Result);
+ return Result;
+ }
+ }
+ }
+
+ return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+ // Handle indexed stores.
+ if (AM != ISD::UNINDEXED) {
+ return SelectIndexedStore(ST, dl);
+ }
+
+ return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ //
+ // --- match with the following ---
+ //
+ // %mul.i = mpy (%tmp1, %add)
+ //
+
+ if (N->getValueType(0) == MVT::i64) {
+ // Shifting a i64 signed multiply.
+ SDValue MulOp0 = N->getOperand(0);
+ SDValue MulOp1 = N->getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload.
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N000 = N00.getOperand(0);
+ SDValue N001 = N00.getOperand(1);
+ if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+ SDValue N01 = N0.getOperand(1);
+ SDValue N02 = N0.getOperand(2);
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Shift = N->getOperand(0);
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ // %shr5.i = lshr i64 %mul.i, 32
+ // %conv3.i = trunc i64 %shr5.i to i32
+ //
+ // --- match with the following ---
+ //
+ // %conv3.i = mpy (%tmp1, %add)
+ //
+ // Trunc to i32.
+ if (N->getValueType(0) == MVT::i32) {
+ // Trunc from i64.
+ if (Shift.getNode()->getValueType(0) == MVT::i64) {
+ // Trunc child is logical shift right.
+ if (Shift.getOpcode() != ISD::SRL) {
+ return SelectCode(N);
+ }
+
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ SDValue ShiftOp1 = Shift.getOperand(1);
+
+ // Shift by const 32
+ if (ShiftOp1.getOpcode() != ISD::Constant) {
+ return SelectCode(N);
+ }
+
+ int32_t ShiftConst =
+ cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+ if (ShiftConst != 32) {
+ return SelectCode(N);
+ }
+
+ // Shifting a i64 signed multiply
+ SDValue Mul = ShiftOp0;
+ if (Mul.getOpcode() != ISD::MUL) {
+ return SelectCode(N);
+ }
+
+ SDValue MulOp0 = Mul.getOperand(0);
+ SDValue MulOp1 = Mul.getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32)
+ return SelectCode(N);
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i32) {
+ SDValue Shl_0 = N->getOperand(0);
+ SDValue Shl_1 = N->getOperand(1);
+ // RHS is const.
+ if (Shl_1.getOpcode() == ISD::Constant) {
+ if (Shl_0.getOpcode() == ISD::MUL) {
+ SDValue Mul_0 = Shl_0.getOperand(0); // Val
+ SDValue Mul_1 = Shl_0.getOperand(1); // Const
+ // RHS of mul is const.
+ if (Mul_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t MulConst =
+ cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+ int32_t ValConst = MulConst << ShlConst;
+ SDValue Val = CurDAG->getTargetConstant(ValConst,
+ MVT::i32);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+ MVT::i32, Mul_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ }
+ } else if (Shl_0.getOpcode() == ISD::SUB) {
+ SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+ SDValue Sub_1 = Shl_0.getOperand(1); // Val
+ if (Sub_0.getOpcode() == ISD::Constant) {
+ int32_t SubConst =
+ cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+ if (SubConst == 0) {
+ if (Sub_1.getOpcode() == ISD::SHL) {
+ SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+ SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+ if (Shl2_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t Shl2Const =
+ cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+ int32_t ValConst = 1 << (ShlConst+Shl2Const);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+ if (ConstantSDNode *CN =
+ dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+ Shl2_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDNode *IsIntrinsic = N->getOperand(0).getNode();
+ if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+ unsigned ID =
+ cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+ if (doesIntrinsicReturnPredicate(ID)) {
+ // Now we need to differentiate target data types.
+ if (N->getValueType(0) == MVT::i64) {
+ // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+ MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 0));
+ ReplaceUses(N, Result_3);
+ return Result_3;
+ }
+ if (N->getValueType(0) == MVT::i32) {
+ // Convert the zero_extend to Rs = Pd
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ ReplaceUses(N, RsPd);
+ return RsPd;
+ }
+ llvm_unreachable("Unexpected value type");
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+ // We are concerned with only those intrinsics that have predicate registers
+ // as at least one of the operands.
+ if (IntrinsicWithPred) {
+ SmallVector<SDValue, 8> Ops;
+ const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ // Iterate over all the operands of the intrinsics.
+ // For PredRegs, do the transfer.
+ // For Double/Int Regs, just preserve the value
+ // For immediates, lower it.
+ for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+ SDNode *Arg = N->getOperand(i).getNode();
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF);
+
+ if (RC == &Hexagon::IntRegsRegClass ||
+ RC == &Hexagon::DoubleRegsRegClass) {
+ Ops.push_back(SDValue(Arg, 0));
+ } else if (RC == &Hexagon::PredRegsRegClass) {
+ // Do the transfer.
+ SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(Arg, 0));
+ Ops.push_back(SDValue(PdRs,0));
+ } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+ // This is immediate operand. Lower it here making sure that we DO have
+ // const SDNode for immediate value.
+ int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+ SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ Ops.push_back(SDVal);
+ } else {
+ llvm_unreachable("Unimplemented");
+ }
+ }
+ EVT ReturnValueVT = N->getValueType(0);
+ SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+ ReturnValueVT,
+ Ops.data(), Ops.size());
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ return SelectCode(N);
+}
+
+//
+// Map floating point constant values.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ APFloat APF = CN->getValueAPF();
+ if (N->getValueType(0) == MVT::f32) {
+ return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
+ CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32));
+ }
+ else if (N->getValueType(0) == MVT::f64) {
+ return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
+ CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64));
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i1) {
+ SDNode* Result;
+ int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Val == -1) {
+ // Create the IntReg = 1 node.
+ SDNode* IntRegTFR =
+ CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32));
+
+ // Pd = IntReg
+ SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(IntRegTFR, 0));
+
+ // not(Pd)
+ SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1,
+ SDValue(Pd, 0));
+
+ // xor(not(Pd))
+ Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+ SDValue(Pd, 0), SDValue(NotPd, 0));
+
+ // We have just built:
+ // Rs = Pd
+ // Pd = xor(not(Pd), Pd)
+
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+ // Identify nodes of the form: add(asr(...)).
+ SDNode* Src1 = N->getOperand(0).getNode();
+ if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+ || Src1->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+ // Rd and Rd' are assigned to the same register
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32,
+ N->getOperand(1),
+ Src1->getOperand(0),
+ Src1->getOperand(1));
+ ReplaceUses(N, Result);
+
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+
+ case ISD::ConstantFP:
+ return SelectConstantFP(N);
+
+ case ISD::ADD:
+ return SelectAdd(N);
+
+ case ISD::SHL:
+ return SelectSHL(N);
+
+ case ISD::LOAD:
+ return SelectLoad(N);
+
+ case ISD::STORE:
+ return SelectStore(N);
+
+ case ISD::SELECT:
+ return SelectSelect(N);
+
+ case ISD::TRUNCATE:
+ return SelectTruncate(N);
+
+ case ISD::MUL:
+ return SelectMul(N);
+
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+
+ if (Addr.getOpcode() != ISD::ADD) {
+ return(SelectADDRriS11_2(Addr, Base, Offset));
+ }
+
+ return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+ SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+
+ return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+
+ switch (ConstraintCode) {
+ case 'o': // Offsetable.
+ case 'v': // Not offsetable.
+ default: return true;
+ case 'm': // Memory.
+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
+ unsigned UseCount = 0;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ UseCount++;
+ }
+
+ return (UseCount <= 1);
+
+}
+
+//===--------------------------------------------------------------------===//
+// Return 'true' if use count of the global address is below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
+ assert(N->getOpcode() == ISD::TargetGlobalAddress &&
+ "Expecting a target global address");
+
+ // Always try to fold the address.
+ if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ return true;
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
+ GlobalAddressUseCountMap.find(GA->getGlobal());
+
+ if (GI == GlobalAddressUseCountMap.end())
+ return false;
+
+ return GI->second <= MaxNumOfUsesForConstExtenders;
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the non GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, false);
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, true);
+}
+
+//===--------------------------------------------------------------------===//
+// Fold offset of the global address if number of uses are below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
+ bool ShouldLookForGP) {
+ if (N.getOpcode() == ISD::ADD) {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
+ (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
+ GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
+
+ if (Const && GA &&
+ (GA->getOpcode() == ISD::TargetGlobalAddress)) {
+ if ((N0.getOpcode() == HexagonISD::CONST32) &&
+ !hasNumUsesBelowThresGA(GA))
+ return false;
+ R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
+ Const->getDebugLoc(),
+ N.getValueType(),
+ GA->getOffset() +
+ (uint64_t)Const->getSExtValue());
+ return true;
+ }
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 000000000000..15858a9368ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1687 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+ cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ // NumNamedVarArgParams can not be zero for a VarArg function.
+ assert ( (NumNamedVarArgParams > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
+
+ if ( (int)ValNo < NumNamedVarArgParams ) {
+ // Deal with named arguments.
+ return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+ }
+
+ // Deal with un-named arguments.
+ unsigned ofst;
+ if (ArgFlags.isByVal()) {
+ // If pass-by-value, the size allocated on stack is decided
+ // by ArgFlags.getByValSize(), not by the size of LocVT.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ ofst = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ ofst = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ llvm_unreachable(0);
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (ArgFlags.isByVal()) {
+ // Passed on stack.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const uint16_t RegList[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ static const uint16_t RegList1[] = {
+ Hexagon::D1, Hexagon::D2
+ };
+ static const uint16_t RegList2[] = {
+ Hexagon::R1, Hexagon::R3
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+ if (LocVT == MVT::i1 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+ return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter. Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Check for varargs.
+ NumNamedVarArgParams = -1;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+ {
+ const Function* CalleeFn = NULL;
+ Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+ if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+ {
+ // If a function has zero args and is a vararg function, that's
+ // disallowed so it must be an undeclared function. Do not assume
+ // varargs if the callee is undefined.
+ if (CalleeFn->isVarArg() &&
+ CalleeFn->getFunctionType()->getNumParams() != 0) {
+ NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+ }
+ }
+ }
+
+ if (NumNamedVarArgParams > 0)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+ if(isTailCall) {
+ bool StructAttrFlag =
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet,
+ StructAttrFlag,
+ Outs, OutVals, Ins, DAG);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ isTailCall = false;
+ break;
+ }
+ }
+ if (isTailCall) {
+ DEBUG(dbgs () << "Eligible for Tail Call\n");
+ } else {
+ DEBUG(dbgs () <<
+ "Argument must be passed on stack. Not eligible for Tail Call\n");
+ }
+ }
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+ getPointerTy());
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ // Loc info must be one of Full, SExt, ZExt, or AExt.
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isMemLoc()) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+ if (Flags.isByVal()) {
+ // The argument is a struct passed by value. According to LLVM, "Arg"
+ // is is pointer.
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+ Flags, DAG, dl));
+ } else {
+ // The argument is not passed by value. "Arg" is a buildin type. It is
+ // not a pointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false,
+ 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector.
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+ MemOpChains.size());
+ }
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ //
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (flag_aligned_memcpy) {
+ const char *MemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+ Callee =
+ DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ flag_aligned_memcpy = false;
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ if (isTailCall)
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD)
+ return false;
+
+ if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ // Ensure that Offset is a constant.
+ return (isa<ConstantSDNode>(Offset));
+ }
+
+ return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonOperands.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ int64_t m = 0;
+ if (ShiftAmount > 0) {
+ m = v % ShiftAmount;
+ v = v >> ShiftAmount;
+ }
+ return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const
+{
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ bool isInc = false;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+ int ShiftAmount = VT.getSizeInBits() / 16;
+ if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ switch (Node->getOpcode()) {
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ if (FuncInfo->hasClobberLR())
+ break;
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_Mem: {
+ for (; NumVals; --NumVals, ++i) {}
+ break;
+ }
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+ // Check it to be lr
+ if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ FuncInfo->setHasClobberLR(true);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ } // Node->getOpcode
+ return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ // Mark all jump table targets as address taken.
+ const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ MBB->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+ }
+
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ getPointerTy(), TargetJT);
+ SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(2, MVT::i32));
+ SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+ ShiftIndex);
+ SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+ MachinePointerInfo(), false, false, false,
+ 0);
+ return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+ // Get a reference to the stack pointer.
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ //
+ // For Hexagon, the outgoing memory arguments area should be on top of the
+ // alloca area on the stack i.e., the outgoing memory arguments should be
+ // at a lower address than the alloca area. Move the alloca area down the
+ // stack by adding back the space reserved for outgoing arguments to SP
+ // here.
+ //
+ // We do not know what the size of the outgoing args is at this point.
+ // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+ // stack pointer. We patch this instruction with the correct, known
+ // offset in emitPrologue().
+ //
+ // Use a placeholder immediate (zero) for now. This will be patched up
+ // by emitPrologue().
+ SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+ MVT::i32,
+ Sub,
+ DAG.getConstant(0, MVT::i32));
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+ TM.getRegisterInfo()->getStackRegister(),
+ Sub);
+
+ SDValue Ops[2] = { ArgAdjust, CopyChain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+ // For LLVM, in the case when returning a struct by value (>8byte),
+ // the first argument is a pointer that points to the location on caller's
+ // stack where the return value will be stored. For Hexagon, the location on
+ // caller's stack is passed only when the struct size is smaller than (and
+ // equal to) 8 bytes. If not, no address will be passed into callee and
+ // callee return the result direclty through R0/R1.
+
+ SmallVector<SDValue, 4> MemOps;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ unsigned ObjSize;
+ unsigned StackLocation;
+ int FI;
+
+ if ( (VA.isRegLoc() && !Flags.isByVal())
+ || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+ // Arguments passed in registers
+ // 1. int, long long, ptr args that get allocated in register.
+ // 2. Large struct that gets an register to put its address in.
+ EVT RegVT = VA.getLocVT();
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
+ RegVT == MVT::i32 || RegVT == MVT::f32) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::i64) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else {
+ assert (0);
+ }
+ } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+ assert (0 && "ByValSize must be bigger than 8 bytes");
+ } else {
+ // Sanity check.
+ assert(VA.isMemLoc());
+
+ if (Flags.isByVal()) {
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ ObjSize = Flags.getByValSize();
+ } else {
+ ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+ }
+
+ StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ // Create the frame index object for this incoming parameter...
+ FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+ // Create the SelectionDAG nodes cordl, responding to a load
+ // from this parameter.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+ if (Flags.isByVal()) {
+ // If it's a pass-by-value aggregate, then do not dereference the stack
+ // location. Instead, we should generate a reference to the stack
+ // location.
+ InVals.push_back(FIN);
+ } else {
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo(), false, false,
+ false, 0));
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+ MemOps.size());
+
+ if (isVarArg) {
+ // This will point to the next argument passed via stack.
+ int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+ HEXAGON_LRFP_SIZE +
+ CCInfo.getNextStackOffset(),
+ true);
+ FuncInfo->setVarArgsFrameIndex(FrameIndex);
+ }
+
+ return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // VASTART stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+ Op.getOperand(1), MachinePointerInfo(SV), false,
+ false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(4);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode* OpNode = Op.getNode();
+ EVT SVT = OpNode->getValueType(0);
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValTy = Op.getValueType();
+
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
+ CP->getAlignment());
+ return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res);
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ TRI->getFrameRegister(), VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ DebugLoc dl = Op.getDebugLoc();
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+ const HexagonTargetObjectFile &TLOF =
+ static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+ }
+
+ return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+ &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
+
+ const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+
+ if (QRI->Subtarget.hasV5TOps()) {
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+ }
+
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+
+ computeRegisterProperties();
+
+ // Align loop entry
+ setPrefLoopAlignment(4);
+
+ // Limits for inline expansion of memcpy/memmove
+ MaxStoresPerMemcpy = 6;
+ MaxStoresPerMemmove = 6;
+
+ //
+ // Library calls for unsupported operations
+ //
+
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ if (QRI->Subtarget.hasV5TOps()) {
+ // Hexagon V5 Support.
+ setOperationAction(ISD::FADD, MVT::f32, Legal);
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ } else {
+
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+ setOperationAction(ISD::FABS, MVT::f32, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
+
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ if (QRI->Subtarget.hasV5TOps()) {
+
+ // We need to make the operation type of SELECT node to be Custom,
+ // such that we don't go into the infinite loop of
+ // select -> setcc -> select_cc -> select loop.
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ } else {
+
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+ // support SELECT_CC on every type.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ }
+
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(5);
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // In V4, we have double word add/sub with carry. The problem with
+ // modelling this instruction is that it produces 2 results - Rdd and Px.
+ // To model update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't
+ // have these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Expand);
+
+ if (TM.getSubtargetImpl()->isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+
+ setMinFunctionAlignment(2);
+
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+ setStackPointerRegisterToSaveRestore(StackRegister);
+ setSchedulingPreference(Sched::VLIW);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALL: return "HexagonISD::CALL";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple()) {
+ return false;
+ }
+ return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple()) {
+ return false;
+ }
+ return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Hexagon.");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT: return Op;
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB)
+const {
+ switch (MI->getOpcode()) {
+ case Hexagon::ADJDYNALLOC: {
+ MachineFunction *MF = BB->getParent();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF->getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->addAllocaAdjustInst(MI);
+ return BB;
+ }
+ default: llvm_unreachable("Unexpected instr type to insert");
+ } // switch
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+ std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r': // R0-R31
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::i32:
+ case MVT::i16:
+ case MVT::i8:
+ case MVT::f32:
+ return std::make_pair(0U, &Hexagon::IntRegsRegClass);
+ case MVT::i64:
+ case MVT::f64:
+ return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
+ }
+ default:
+ llvm_unreachable("Unknown asm register class");
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+ return QRI->Subtarget.hasV5TOps();
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // Allows a signed-extended 11-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+ return false;
+ }
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV) {
+ return false;
+ }
+
+ int Scale = AM.Scale;
+ if (Scale < 0) Scale = -Scale;
+ switch (Scale) {
+ case 0: // No scale reg, "r+i", "r", or just "i".
+ break;
+ default: // No scaled addressing mode.
+ return false;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // ***************************************************************************
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes.
+ // ***************************************************************************
+
+ // If this is a tail call via a function pointer, then don't do it!
+ if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+ && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ return false;
+ }
+
+ // Do not optimize if the calling conventions do not match.
+ if (!CCMatch)
+ return false;
+
+ // Do not tail call optimize vararg calls.
+ if (isVarArg)
+ return false;
+
+ // Also avoid tail call optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // In addition to the cases above, we also disable Tail Call Optimization if
+ // the calling convention code that at least one outgoing argument needs to
+ // go on the stack. We cannot check that here because at this point that
+ // information is not available.
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 000000000000..3279cc652434
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,173 @@
+//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace HexagonISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ CONST32,
+ CONST32_GP, // For marking data present in GP.
+ CONST32_Int_Real,
+ FCONST32,
+ SETCC,
+ ADJDYNALLOC,
+ ARGEXTEND,
+
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ BR_JT, // Jump table.
+ BARRIER, // Memory barrier.
+ WrapperJT,
+ WrapperCP,
+ WrapperCombineII,
+ WrapperCombineRR,
+ WrapperCombineRI_V4,
+ WrapperCombineIR_V4,
+ WrapperPackhl,
+ WrapperSplatB,
+ WrapperSplatH,
+ WrapperShuffEB,
+ WrapperShuffEH,
+ WrapperShuffOB,
+ WrapperShuffOH,
+ TC_RETURN
+ };
+ }
+
+ class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+
+ bool CanReturnSmallStruct(const Function* CalleeFn,
+ unsigned& RetSize) const;
+
+ public:
+ HexagonTargetMachine &TM;
+ explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock
+ *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Intrinsics
+ virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+ };
+} // end namespace llvm
+
+#endif // Hexagon_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 000000000000..587fa7d7f10e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,379 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Hexagon Intruction Flags +
+//
+// *** Must match HexagonBaseInfo.h ***
+//===----------------------------------------------------------------------===//
+
+class IType<bits<5> t> {
+ bits<5> Value = t;
+}
+def TypePSEUDO : IType<0>;
+def TypeALU32 : IType<1>;
+def TypeCR : IType<2>;
+def TypeJR : IType<3>;
+def TypeJ : IType<4>;
+def TypeLD : IType<5>;
+def TypeST : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE : IType<8>;
+def TypeENDLOOP: IType<31>;
+
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<4> value> {
+ bits<4> Value = value;
+}
+
+def HasV2SubT : SubTarget<0xf>;
+def HasV2SubTOnly : SubTarget<0x1>;
+def NoV2SubT : SubTarget<0x0>;
+def HasV3SubT : SubTarget<0xe>;
+def HasV3SubTOnly : SubTarget<0x2>;
+def NoV3SubT : SubTarget<0x1>;
+def HasV4SubT : SubTarget<0xc>;
+def NoV4SubT : SubTarget<0x3>;
+def HasV5SubT : SubTarget<0x8>;
+def NoV5SubT : SubTarget<0x7>;
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoAddrMode : AddrModeType<0>; // No addressing mode
+def Absolute : AddrModeType<1>; // Absolute addressing mode
+def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode
+def BaseImmOffset : AddrModeType<3>; // Indirect with offset
+def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
+def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
+
+class MemAccessSize<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess : MemAccessSize<3>;// Word access instrution (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+
+
+//===----------------------------------------------------------------------===//
+// Intruction Class Declaration +
+//===----------------------------------------------------------------------===//
+
+class OpcodeHexagon {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<2> IParse = 0; // Parse bits.
+
+ let Inst{31-28} = IClass;
+ let Inst{15-14} = IParse;
+
+ bits<1> zero = 0;
+}
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr, InstrItinClass itin, IType type>
+ : Instruction, OpcodeHexagon {
+ let Namespace = "Hexagon";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Constraints = cstr;
+ let Itinerary = itin;
+ let Size = 4;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
+ IType Type = type;
+ let TSFlags{4-0} = Type.Value;
+
+ // Solo instructions, i.e., those that cannot be in a packet with others.
+ bits<1> isSolo = 0;
+ let TSFlags{5} = isSolo;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<2> opNewBits = 0;
+ let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16.
+ bits<1> isNVStorable = 0;
+ let TSFlags{16} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{17} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{18} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{19} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{22-20} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{23} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending.
+
+ // If an instruction is valid on a subtarget (v2-v5), set the corresponding
+ // bit from validSubTargets. v2 is the least significant bit.
+ // By default, instruction is valid on all subtargets.
+ SubTarget validSubTargets = HasV2SubT;
+ let TSFlags{32-29} = validSubTargets.Value;
+
+ // Addressing mode for load/store instructions.
+ AddrModeType addrMode = NoAddrMode;
+ let TSFlags{35-33} = addrMode.Value;
+
+ // Memory access size for mem access instructions (load/store)
+ MemAccessSize accessSize = NoMemAccess;
+ let TSFlags{38-36} = accessSize.Value;
+
+ // Fields used for relation models.
+ string BaseOpcode = "";
+ string CextOpcode = "";
+ string PredSense = "";
+ string PNewValue = "";
+ string InputType = ""; // Input is "imm" or "reg" type.
+ string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
+ string isFloat = "false"; // Set to "true" for the floating-point load/store.
+ string isBrTaken = ""; // Set to "true"/"false" for jump instructions
+
+ let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+ "");
+ let PNewValue = !if(isPredicatedNew, "new", "");
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+}
+
+//===----------------------------------------------------------------------===//
+// Intruction Classes Definitions +
+//===----------------------------------------------------------------------===//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>;
+
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>;
+
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>;
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>;
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>;
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MInst<outs, ins, asmstr, pattern, cstr>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : SInst<outs, ins, asmstr, pattern, cstr>;
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>;
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>;
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr="">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
+
+//===----------------------------------------------------------------------===//
+// Intruction Classes Definitions -
+//===----------------------------------------------------------------------===//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 000000000000..9fda0da91612
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,66 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypePREFIX : IType<30>;
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+//
+// NV type instructions.
+//
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// Post increment ST Instruction.
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>;
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MEMInst<outs, ins, asmstr, pattern, cstr>;
+
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 000000000000..60b12ac01c9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,2576 @@
+//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/MathExtras.h"
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "HexagonGenInstrInfo.inc"
+#include "HexagonGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = -4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = -8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = -2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = -1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = -32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = -64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = -32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = -16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = -8;
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+
+
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::LDriw:
+ case Hexagon::LDrid:
+ case Hexagon::LDrih:
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::STriw:
+ case Hexagon::STrid:
+ case Hexagon::STrih:
+ case Hexagon::STrib:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const{
+
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_c;
+
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ int regPos = 0;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // JMP_cNot.
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ BccOpc = Hexagon::JMP_cNot;
+ regPos = 1;
+ }
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+ false)) {
+ MachineBasicBlock *NextBB =
+ llvm::next(MachineFunction::iterator(&MBB));
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, 0, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ }
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+ return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ TBB = NULL;
+ FBB = NULL;
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return true;
+ } while (I != MBB.begin());
+
+ I = MBB.end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_c) {
+ // Block ends with fall-through true condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
+ // Block ends with fall-through false condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+ if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+ (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
+ if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Hexagon:JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_c;
+ int BccOpcNot = Hexagon::JMP_cNot;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+ I->getOpcode() != BccOpcNot)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUri_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhGTUri_V4:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+ // Map Pd = Ps to Pd = or(Ps, Ps).
+ BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
+ // We can have an overlap between single and double reg: r1:0 = r0.
+ if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+ // r1:0 = r0
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ } else {
+ // r1:0 = r1 or no overlap.
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+ Hexagon::subreg_loreg))).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ }
+ return;
+ }
+ if (Hexagon::CRRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ Hexagon::PredRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STrid))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Unimplemented");
+ }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == &Hexagon::DoubleRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == &Hexagon::PredRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Can't store this register to stack slot");
+ }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ // Hexagon_TODO: Implement.
+ return(0);
+}
+
+MachineInstr*
+HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE))
+ .addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = &Hexagon::PredRegsRegClass;
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ TRC = &Hexagon::IntRegsRegClass;
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ TRC = &Hexagon::DoubleRegsRegClass;
+ } else {
+ llvm_unreachable("Cannot handle this register class");
+ }
+
+ unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
+
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
+ case Hexagon::TFR_FI:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ // Store Byte
+ case Hexagon::STrib_nv_V4:
+ case Hexagon::STrib_indexed_nv_V4:
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ case Hexagon::STrib_shl_nv_V4:
+ case Hexagon::STb_GP_nv_V4:
+ case Hexagon::POST_STbri_nv_V4:
+ case Hexagon::STrib_cPt_nv_V4:
+ case Hexagon::STrib_cdnPt_nv_V4:
+ case Hexagon::STrib_cNotPt_nv_V4:
+ case Hexagon::STrib_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STbri_cPt_nv_V4:
+ case Hexagon::POST_STbri_cdnPt_nv_V4:
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_abs_nv_V4:
+ case Hexagon::STrib_abs_cPt_nv_V4:
+ case Hexagon::STrib_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_nv_V4:
+ case Hexagon::STrib_imm_abs_cPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Halfword
+ case Hexagon::STrih_nv_V4:
+ case Hexagon::STrih_indexed_nv_V4:
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ case Hexagon::STrih_shl_nv_V4:
+ case Hexagon::STh_GP_nv_V4:
+ case Hexagon::POST_SThri_nv_V4:
+ case Hexagon::STrih_cPt_nv_V4:
+ case Hexagon::STrih_cdnPt_nv_V4:
+ case Hexagon::STrih_cNotPt_nv_V4:
+ case Hexagon::STrih_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_SThri_cPt_nv_V4:
+ case Hexagon::POST_SThri_cdnPt_nv_V4:
+ case Hexagon::POST_SThri_cNotPt_nv_V4:
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_abs_nv_V4:
+ case Hexagon::STrih_abs_cPt_nv_V4:
+ case Hexagon::STrih_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_nv_V4:
+ case Hexagon::STrih_imm_abs_cPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Word
+ case Hexagon::STriw_nv_V4:
+ case Hexagon::STriw_indexed_nv_V4:
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ case Hexagon::STriw_shl_nv_V4:
+ case Hexagon::STw_GP_nv_V4:
+ case Hexagon::POST_STwri_nv_V4:
+ case Hexagon::STriw_cPt_nv_V4:
+ case Hexagon::STriw_cdnPt_nv_V4:
+ case Hexagon::STriw_cNotPt_nv_V4:
+ case Hexagon::STriw_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STwri_cPt_nv_V4:
+ case Hexagon::POST_STwri_cdnPt_nv_V4:
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_abs_nv_V4:
+ case Hexagon::STriw_abs_cPt_nv_V4:
+ case Hexagon::STriw_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_abs_cdnNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_nv_V4:
+ case Hexagon::STriw_imm_abs_cPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ // Load Byte
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDrib_cPt:
+ case Hexagon::POST_LDrib_cNotPt:
+ case Hexagon::POST_LDrib_cdnPt_V4:
+ case Hexagon::POST_LDrib_cdnNotPt_V4:
+
+ // Load unsigned byte
+ case Hexagon::POST_LDriub:
+ case Hexagon::POST_LDriub_cPt:
+ case Hexagon::POST_LDriub_cNotPt:
+ case Hexagon::POST_LDriub_cdnPt_V4:
+ case Hexagon::POST_LDriub_cdnNotPt_V4:
+
+ // Load halfword
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDrih_cPt:
+ case Hexagon::POST_LDrih_cNotPt:
+ case Hexagon::POST_LDrih_cdnPt_V4:
+ case Hexagon::POST_LDrih_cdnNotPt_V4:
+
+ // Load unsigned halfword
+ case Hexagon::POST_LDriuh:
+ case Hexagon::POST_LDriuh_cPt:
+ case Hexagon::POST_LDriuh_cNotPt:
+ case Hexagon::POST_LDriuh_cdnPt_V4:
+ case Hexagon::POST_LDriuh_cdnNotPt_V4:
+
+ // Load word
+ case Hexagon::POST_LDriw:
+ case Hexagon::POST_LDriw_cPt:
+ case Hexagon::POST_LDriw_cNotPt:
+ case Hexagon::POST_LDriw_cdnPt_V4:
+ case Hexagon::POST_LDriw_cdnNotPt_V4:
+
+ // Load double word
+ case Hexagon::POST_LDrid:
+ case Hexagon::POST_LDrid_cPt:
+ case Hexagon::POST_LDrid_cNotPt:
+ case Hexagon::POST_LDrid_cdnPt_V4:
+ case Hexagon::POST_LDrid_cdnNotPt_V4:
+
+ // Store byte
+ case Hexagon::POST_STbri:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+
+ // Store halfword
+ case Hexagon::POST_SThri:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+
+ // Store word
+ case Hexagon::POST_STwri:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+
+ // Store double word
+ case Hexagon::POST_STdri:
+ case Hexagon::POST_STdri_cPt:
+ case Hexagon::POST_STdri_cNotPt:
+ case Hexagon::POST_STdri_cdnPt_V4:
+ case Hexagon::POST_STdri_cdnNotPt_V4:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ if (isNewValueJump(MI))
+ return true;
+
+ if (isNewValueStore(MI))
+ return true;
+
+ return false;
+}
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+ bool isPred = MI->getDesc().isPredicable();
+
+ if (!isPred)
+ return false;
+
+ const int Opc = MI->getOpcode();
+
+ switch(Opc) {
+ case Hexagon::TFRI:
+ return isInt<12>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_nv_V4:
+ return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_nv_V4:
+ return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_nv_V4:
+ return isUInt<6>(MI->getOperand(1).getImm());
+
+ case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
+ return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::LDrih_indexed:
+ case Hexagon::LDriuh_indexed:
+ return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDrib_indexed:
+ case Hexagon::LDriub_indexed:
+ return isUInt<6>(MI->getOperand(2).getImm());
+
+ case Hexagon::POST_LDrid:
+ return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDriw:
+ return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDriuh:
+ return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDriub:
+ return isInt<4>(MI->getOperand(3).getImm());
+
+ case Hexagon::STrib_imm_V4:
+ case Hexagon::STrih_imm_V4:
+ case Hexagon::STriw_imm_V4:
+ return (isUInt<6>(MI->getOperand(1).getImm()) &&
+ isInt<6>(MI->getOperand(2).getImm()));
+
+ case Hexagon::ADD_ri:
+ return isInt<8>(MI->getOperand(2).getImm());
+
+ case Hexagon::ASLH:
+ case Hexagon::ASRH:
+ case Hexagon::SXTB:
+ case Hexagon::SXTH:
+ case Hexagon::ZXTB:
+ case Hexagon::ZXTH:
+ return Subtarget.hasV4TOps();
+
+ case Hexagon::JMPR:
+ return false;
+ }
+
+ return true;
+}
+
+// This function performs the following inversiones:
+//
+// cPt ---> cNotPt
+// cNotPt ---> cPt
+//
+// however, these inversiones are NOT included:
+//
+// cdnPt -X-> cdnNotPt
+// cdnNotPt -X-> cdnPt
+// cPt_nv -X-> cNotPt_nv (new value stores)
+// cNotPt_nv -X-> cPt_nv (new value stores)
+//
+// because only the following transformations are allowed:
+//
+// cNotPt ---> cdnNotPt
+// cPt ---> cdnPt
+// cNotPt ---> cNotPt_nv
+// cPt ---> cPt_nv
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ switch(Opc) {
+ default: llvm_unreachable("Unexpected predicated instruction");
+ case Hexagon::TFR_cPt:
+ return Hexagon::TFR_cNotPt;
+ case Hexagon::TFR_cNotPt:
+ return Hexagon::TFR_cPt;
+
+ case Hexagon::TFRI_cPt:
+ return Hexagon::TFRI_cNotPt;
+ case Hexagon::TFRI_cNotPt:
+ return Hexagon::TFRI_cPt;
+
+ case Hexagon::JMP_c:
+ return Hexagon::JMP_cNot;
+ case Hexagon::JMP_cNot:
+ return Hexagon::JMP_c;
+
+ case Hexagon::ADD_ri_cPt:
+ return Hexagon::ADD_ri_cNotPt;
+ case Hexagon::ADD_ri_cNotPt:
+ return Hexagon::ADD_ri_cPt;
+
+ case Hexagon::ADD_rr_cPt:
+ return Hexagon::ADD_rr_cNotPt;
+ case Hexagon::ADD_rr_cNotPt:
+ return Hexagon::ADD_rr_cPt;
+
+ case Hexagon::XOR_rr_cPt:
+ return Hexagon::XOR_rr_cNotPt;
+ case Hexagon::XOR_rr_cNotPt:
+ return Hexagon::XOR_rr_cPt;
+
+ case Hexagon::AND_rr_cPt:
+ return Hexagon::AND_rr_cNotPt;
+ case Hexagon::AND_rr_cNotPt:
+ return Hexagon::AND_rr_cPt;
+
+ case Hexagon::OR_rr_cPt:
+ return Hexagon::OR_rr_cNotPt;
+ case Hexagon::OR_rr_cNotPt:
+ return Hexagon::OR_rr_cPt;
+
+ case Hexagon::SUB_rr_cPt:
+ return Hexagon::SUB_rr_cNotPt;
+ case Hexagon::SUB_rr_cNotPt:
+ return Hexagon::SUB_rr_cPt;
+
+ case Hexagon::COMBINE_rr_cPt:
+ return Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::COMBINE_rr_cNotPt:
+ return Hexagon::COMBINE_rr_cPt;
+
+ case Hexagon::ASLH_cPt_V4:
+ return Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASLH_cNotPt_V4:
+ return Hexagon::ASLH_cPt_V4;
+
+ case Hexagon::ASRH_cPt_V4:
+ return Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::ASRH_cNotPt_V4:
+ return Hexagon::ASRH_cPt_V4;
+
+ case Hexagon::SXTB_cPt_V4:
+ return Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTB_cNotPt_V4:
+ return Hexagon::SXTB_cPt_V4;
+
+ case Hexagon::SXTH_cPt_V4:
+ return Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::SXTH_cNotPt_V4:
+ return Hexagon::SXTH_cPt_V4;
+
+ case Hexagon::ZXTB_cPt_V4:
+ return Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTB_cNotPt_V4:
+ return Hexagon::ZXTB_cPt_V4;
+
+ case Hexagon::ZXTH_cPt_V4:
+ return Hexagon::ZXTH_cNotPt_V4;
+ case Hexagon::ZXTH_cNotPt_V4:
+ return Hexagon::ZXTH_cPt_V4;
+
+
+ case Hexagon::JMPR_cPt:
+ return Hexagon::JMPR_cNotPt;
+ case Hexagon::JMPR_cNotPt:
+ return Hexagon::JMPR_cPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_shl_cPt_V4:
+ return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cPt_V4:
+ return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cPt_V4:
+ return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cPt_V4:
+ return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cPt_V4:
+ return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cPt_V4:
+ return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+ // Byte.
+ case Hexagon::POST_STbri_cPt:
+ return Hexagon::POST_STbri_cNotPt;
+ case Hexagon::POST_STbri_cNotPt:
+ return Hexagon::POST_STbri_cPt;
+
+ case Hexagon::STrib_cPt:
+ return Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_cNotPt:
+ return Hexagon::STrib_cPt;
+
+ case Hexagon::STrib_indexed_cPt:
+ return Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_indexed_cNotPt:
+ return Hexagon::STrib_indexed_cPt;
+
+ case Hexagon::STrib_imm_cPt_V4:
+ return Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_imm_cNotPt_V4:
+ return Hexagon::STrib_imm_cPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ return Hexagon::STrib_indexed_shl_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cPt_V4;
+
+ // Halfword.
+ case Hexagon::POST_SThri_cPt:
+ return Hexagon::POST_SThri_cNotPt;
+ case Hexagon::POST_SThri_cNotPt:
+ return Hexagon::POST_SThri_cPt;
+
+ case Hexagon::STrih_cPt:
+ return Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_cNotPt:
+ return Hexagon::STrih_cPt;
+
+ case Hexagon::STrih_indexed_cPt:
+ return Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_indexed_cNotPt:
+ return Hexagon::STrih_indexed_cPt;
+
+ case Hexagon::STrih_imm_cPt_V4:
+ return Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_imm_cNotPt_V4:
+ return Hexagon::STrih_imm_cPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ return Hexagon::STrih_indexed_shl_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cPt_V4;
+
+ // Word.
+ case Hexagon::POST_STwri_cPt:
+ return Hexagon::POST_STwri_cNotPt;
+ case Hexagon::POST_STwri_cNotPt:
+ return Hexagon::POST_STwri_cPt;
+
+ case Hexagon::STriw_cPt:
+ return Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_cNotPt:
+ return Hexagon::STriw_cPt;
+
+ case Hexagon::STriw_indexed_cPt:
+ return Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_cNotPt:
+ return Hexagon::STriw_indexed_cPt;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ return Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cPt_V4;
+
+ case Hexagon::STriw_imm_cPt_V4:
+ return Hexagon::STriw_imm_cNotPt_V4;
+ case Hexagon::STriw_imm_cNotPt_V4:
+ return Hexagon::STriw_imm_cPt_V4;
+
+ // Double word.
+ case Hexagon::POST_STdri_cPt:
+ return Hexagon::POST_STdri_cNotPt;
+ case Hexagon::POST_STdri_cNotPt:
+ return Hexagon::POST_STdri_cPt;
+
+ case Hexagon::STrid_cPt:
+ return Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_cNotPt:
+ return Hexagon::STrid_cPt;
+
+ case Hexagon::STrid_indexed_cPt:
+ return Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_cNotPt:
+ return Hexagon::STrid_indexed_cPt;
+
+ case Hexagon::STrid_indexed_shl_cPt_V4:
+ return Hexagon::STrid_indexed_shl_cNotPt_V4;
+ case Hexagon::STrid_indexed_shl_cNotPt_V4:
+ return Hexagon::STrid_indexed_shl_cPt_V4;
+
+ // V4 Store to global address.
+ case Hexagon::STd_GP_cPt_V4:
+ return Hexagon::STd_GP_cNotPt_V4;
+ case Hexagon::STd_GP_cNotPt_V4:
+ return Hexagon::STd_GP_cPt_V4;
+
+ case Hexagon::STb_GP_cPt_V4:
+ return Hexagon::STb_GP_cNotPt_V4;
+ case Hexagon::STb_GP_cNotPt_V4:
+ return Hexagon::STb_GP_cPt_V4;
+
+ case Hexagon::STh_GP_cPt_V4:
+ return Hexagon::STh_GP_cNotPt_V4;
+ case Hexagon::STh_GP_cNotPt_V4:
+ return Hexagon::STh_GP_cPt_V4;
+
+ case Hexagon::STw_GP_cPt_V4:
+ return Hexagon::STw_GP_cNotPt_V4;
+ case Hexagon::STw_GP_cNotPt_V4:
+ return Hexagon::STw_GP_cPt_V4;
+
+ // Load.
+ case Hexagon::LDrid_cPt:
+ return Hexagon::LDrid_cNotPt;
+ case Hexagon::LDrid_cNotPt:
+ return Hexagon::LDrid_cPt;
+
+ case Hexagon::LDriw_cPt:
+ return Hexagon::LDriw_cNotPt;
+ case Hexagon::LDriw_cNotPt:
+ return Hexagon::LDriw_cPt;
+
+ case Hexagon::LDrih_cPt:
+ return Hexagon::LDrih_cNotPt;
+ case Hexagon::LDrih_cNotPt:
+ return Hexagon::LDrih_cPt;
+
+ case Hexagon::LDriuh_cPt:
+ return Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDriuh_cNotPt:
+ return Hexagon::LDriuh_cPt;
+
+ case Hexagon::LDrib_cPt:
+ return Hexagon::LDrib_cNotPt;
+ case Hexagon::LDrib_cNotPt:
+ return Hexagon::LDrib_cPt;
+
+ case Hexagon::LDriub_cPt:
+ return Hexagon::LDriub_cNotPt;
+ case Hexagon::LDriub_cNotPt:
+ return Hexagon::LDriub_cPt;
+
+ // Load Indexed.
+ case Hexagon::LDrid_indexed_cPt:
+ return Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDrid_indexed_cNotPt:
+ return Hexagon::LDrid_indexed_cPt;
+
+ case Hexagon::LDriw_indexed_cPt:
+ return Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDriw_indexed_cNotPt:
+ return Hexagon::LDriw_indexed_cPt;
+
+ case Hexagon::LDrih_indexed_cPt:
+ return Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDrih_indexed_cNotPt:
+ return Hexagon::LDrih_indexed_cPt;
+
+ case Hexagon::LDriuh_indexed_cPt:
+ return Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed_cNotPt:
+ return Hexagon::LDriuh_indexed_cPt;
+
+ case Hexagon::LDrib_indexed_cPt:
+ return Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDrib_indexed_cNotPt:
+ return Hexagon::LDrib_indexed_cPt;
+
+ case Hexagon::LDriub_indexed_cPt:
+ return Hexagon::LDriub_indexed_cNotPt;
+ case Hexagon::LDriub_indexed_cNotPt:
+ return Hexagon::LDriub_indexed_cPt;
+
+ // Post Inc Load.
+ case Hexagon::POST_LDrid_cPt:
+ return Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw_cNotPt:
+ return Hexagon::POST_LDriw_cPt;
+
+ case Hexagon::POST_LDrih_cPt:
+ return Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDrih_cNotPt:
+ return Hexagon::POST_LDrih_cPt;
+
+ case Hexagon::POST_LDriuh_cPt:
+ return Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDriuh_cNotPt:
+ return Hexagon::POST_LDriuh_cPt;
+
+ case Hexagon::POST_LDrib_cPt:
+ return Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDrib_cNotPt:
+ return Hexagon::POST_LDrib_cPt;
+
+ case Hexagon::POST_LDriub_cPt:
+ return Hexagon::POST_LDriub_cNotPt;
+ case Hexagon::POST_LDriub_cNotPt:
+ return Hexagon::POST_LDriub_cPt;
+
+ // Dealloc_return.
+ case Hexagon::DEALLOC_RET_cPt_V4:
+ return Hexagon::DEALLOC_RET_cNotPt_V4;
+ case Hexagon::DEALLOC_RET_cNotPt_V4:
+ return Hexagon::DEALLOC_RET_cPt_V4;
+
+ // New Value Jump.
+ // JMPEQ_ri - with -1.
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ return Hexagon::JMP_EQriPntneg_nv_V4;
+
+ // JMPEQ_ri.
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return Hexagon::JMP_EQriNotPt_nv_V4;
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ return Hexagon::JMP_EQriPt_nv_V4;
+
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ return Hexagon::JMP_EQriPnt_nv_V4;
+
+ // JMPEQ_rr.
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ return Hexagon::JMP_EQrrPnt_nv_V4;
+
+ // JMPGT_ri - with -1.
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ return Hexagon::JMP_GTriPntneg_nv_V4;
+
+ // JMPGT_ri.
+ case Hexagon::JMP_GTriPt_nv_V4:
+ return Hexagon::JMP_GTriNotPt_nv_V4;
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ return Hexagon::JMP_GTriPt_nv_V4;
+
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ return Hexagon::JMP_GTriPnt_nv_V4;
+
+ // JMPGT_rr.
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ return Hexagon::JMP_GTrrPt_nv_V4;
+
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrPnt_nv_V4;
+
+ // JMPGT_rrdn.
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_nv_V4;
+
+ // JMPGTU_ri.
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ return Hexagon::JMP_GTUriPnt_nv_V4;
+
+ // JMPGTU_rr.
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_nv_V4;
+
+ // JMPGTU_rrdn.
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+ }
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+ enum Hexagon::PredSense inPredSense;
+ inPredSense = invertPredicate ? Hexagon::PredSense_false :
+ Hexagon::PredSense_true;
+ int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+ if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+ return CondOpcode;
+
+ // This switch case will be removed once all the instructions have been
+ // modified to use relation maps.
+ switch(Opc) {
+ case Hexagon::TFR:
+ return !invertPredicate ? Hexagon::TFR_cPt :
+ Hexagon::TFR_cNotPt;
+ case Hexagon::TFRI_f:
+ return !invertPredicate ? Hexagon::TFRI_cPt_f :
+ Hexagon::TFRI_cNotPt_f;
+ case Hexagon::TFRI:
+ return !invertPredicate ? Hexagon::TFRI_cPt :
+ Hexagon::TFRI_cNotPt;
+ case Hexagon::JMP:
+ return !invertPredicate ? Hexagon::JMP_c :
+ Hexagon::JMP_cNot;
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 :
+ Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 :
+ Hexagon::JMP_EQriNotPt_nv_V4;
+ case Hexagon::COMBINE_rr:
+ return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+ Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::ASLH:
+ return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+ Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASRH:
+ return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+ Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::SXTB:
+ return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+ Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTH:
+ return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+ Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::ZXTB:
+ return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+ Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTH:
+ return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+ Hexagon::ZXTH_cNotPt_V4;
+
+ case Hexagon::JMPR:
+ return !invertPredicate ? Hexagon::JMPR_cPt :
+ Hexagon::JMPR_cNotPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+ Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+ Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+ Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+ Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+ // V4 Load from global address
+ case Hexagon::LDd_GP_V4:
+ return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 :
+ Hexagon::LDd_GP_cNotPt_V4;
+ case Hexagon::LDb_GP_V4:
+ return !invertPredicate ? Hexagon::LDb_GP_cPt_V4 :
+ Hexagon::LDb_GP_cNotPt_V4;
+ case Hexagon::LDub_GP_V4:
+ return !invertPredicate ? Hexagon::LDub_GP_cPt_V4 :
+ Hexagon::LDub_GP_cNotPt_V4;
+ case Hexagon::LDh_GP_V4:
+ return !invertPredicate ? Hexagon::LDh_GP_cPt_V4 :
+ Hexagon::LDh_GP_cNotPt_V4;
+ case Hexagon::LDuh_GP_V4:
+ return !invertPredicate ? Hexagon::LDuh_GP_cPt_V4 :
+ Hexagon::LDuh_GP_cNotPt_V4;
+ case Hexagon::LDw_GP_V4:
+ return !invertPredicate ? Hexagon::LDw_GP_cPt_V4 :
+ Hexagon::LDw_GP_cNotPt_V4;
+
+ // Byte.
+ case Hexagon::POST_STbri:
+ return !invertPredicate ? Hexagon::POST_STbri_cPt :
+ Hexagon::POST_STbri_cNotPt;
+ case Hexagon::STrib:
+ return !invertPredicate ? Hexagon::STrib_cPt :
+ Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_indexed:
+ return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+ Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_imm_V4:
+ return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+ Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+ Hexagon::STrib_indexed_shl_cNotPt_V4;
+ // Halfword.
+ case Hexagon::POST_SThri:
+ return !invertPredicate ? Hexagon::POST_SThri_cPt :
+ Hexagon::POST_SThri_cNotPt;
+ case Hexagon::STrih:
+ return !invertPredicate ? Hexagon::STrih_cPt :
+ Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_indexed:
+ return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+ Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_imm_V4:
+ return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+ Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+ Hexagon::STrih_indexed_shl_cNotPt_V4;
+ // Word.
+ case Hexagon::POST_STwri:
+ return !invertPredicate ? Hexagon::POST_STwri_cPt :
+ Hexagon::POST_STwri_cNotPt;
+ case Hexagon::STriw:
+ return !invertPredicate ? Hexagon::STriw_cPt :
+ Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_indexed:
+ return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+ Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+ Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_imm_V4:
+ return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+ Hexagon::STriw_imm_cNotPt_V4;
+ // Double word.
+ case Hexagon::POST_STdri:
+ return !invertPredicate ? Hexagon::POST_STdri_cPt :
+ Hexagon::POST_STdri_cNotPt;
+ case Hexagon::STrid:
+ return !invertPredicate ? Hexagon::STrid_cPt :
+ Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_indexed:
+ return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+ Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+ Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+ // V4 Store to global address
+ case Hexagon::STd_GP_V4:
+ return !invertPredicate ? Hexagon::STd_GP_cPt_V4 :
+ Hexagon::STd_GP_cNotPt_V4;
+ case Hexagon::STb_GP_V4:
+ return !invertPredicate ? Hexagon::STb_GP_cPt_V4 :
+ Hexagon::STb_GP_cNotPt_V4;
+ case Hexagon::STh_GP_V4:
+ return !invertPredicate ? Hexagon::STh_GP_cPt_V4 :
+ Hexagon::STh_GP_cNotPt_V4;
+ case Hexagon::STw_GP_V4:
+ return !invertPredicate ? Hexagon::STw_GP_cPt_V4 :
+ Hexagon::STw_GP_cNotPt_V4;
+
+ // Load.
+ case Hexagon::LDrid:
+ return !invertPredicate ? Hexagon::LDrid_cPt :
+ Hexagon::LDrid_cNotPt;
+ case Hexagon::LDriw:
+ return !invertPredicate ? Hexagon::LDriw_cPt :
+ Hexagon::LDriw_cNotPt;
+ case Hexagon::LDrih:
+ return !invertPredicate ? Hexagon::LDrih_cPt :
+ Hexagon::LDrih_cNotPt;
+ case Hexagon::LDriuh:
+ return !invertPredicate ? Hexagon::LDriuh_cPt :
+ Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDrib:
+ return !invertPredicate ? Hexagon::LDrib_cPt :
+ Hexagon::LDrib_cNotPt;
+ case Hexagon::LDriub:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+ case Hexagon::LDrid_indexed:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+ Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDriw_indexed:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+ Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDrih_indexed:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+ Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+ Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDrib_indexed:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+ Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDriub_indexed:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+ Hexagon::LDriub_indexed_cNotPt;
+ // Post Increment Load.
+ case Hexagon::POST_LDrid:
+ return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+ Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw:
+ return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+ Hexagon::POST_LDriw_cNotPt;
+ case Hexagon::POST_LDrih:
+ return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+ Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDriuh:
+ return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+ Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDrib:
+ return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+ Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDriub:
+ return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+ Hexagon::POST_LDriub_cNotPt;
+ // DEALLOC_RETURN.
+ case Hexagon::DEALLOC_RET_V4:
+ return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+ Hexagon::DEALLOC_RET_cNotPt_V4;
+ }
+ llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+ (Cond[0].getImm() == 0));
+
+ // This will change MI's opcode to its predicate version.
+ // However, its operand list is still the old one, i.e. the
+ // non-predicate one.
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+
+ int oper = -1;
+ unsigned int GAIdx = 0;
+
+ // Indicates whether the current MI has a GlobalAddress operand
+ bool hasGAOpnd = false;
+ std::vector<MachineOperand> tmpOpnds;
+
+ // Indicates whether we need to shift operands to right.
+ bool needShift = true;
+
+ // The predicate is ALWAYS the FIRST input operand !!!
+ if (MI->getNumOperands() == 0) {
+ // The non-predicate version of MI does not take any operands,
+ // i.e. no outs and no ins. In this condition, the predicate
+ // operand will be directly placed at Operands[0]. No operand
+ // shift is needed.
+ // Example: BARRIER
+ needShift = false;
+ oper = -1;
+ }
+ else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
+ && MI->getOperand(MI->getNumOperands()-1).isDef()
+ && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
+ // The non-predicate version of MI does not have any input operands.
+ // In this condition, we extend the length of Operands[] by one and
+ // copy the original last operand to the newly allocated slot.
+ // At this moment, it is just a place holder. Later, we will put
+ // predicate operand directly into it. No operand shift is needed.
+ // Example: r0=BARRIER (this is a faked insn used here for illustration)
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ needShift = false;
+ oper = MI->getNumOperands() - 2;
+ }
+ else {
+ // We need to right shift all input operands by one. Duplicate the
+ // last operand into the newly allocated slot.
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ }
+
+ if (needShift)
+ {
+ // Operands[ MI->getNumOperands() - 2 ] has been copied into
+ // Operands[ MI->getNumOperands() - 1 ], so we start from
+ // Operands[ MI->getNumOperands() - 3 ].
+ // oper is a signed int.
+ // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
+ {
+ MachineOperand &MO = MI->getOperand(oper);
+
+ // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7]
+ // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
+ // /\~
+ // /||\~
+ // ||
+ // Predicate Operand here
+ if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
+ break;
+ }
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ }
+ else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ }
+ else if (MO.isGlobal()) {
+ // MI can not have more than one GlobalAddress operand.
+ assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
+
+ // There is no member function called "ChangeToGlobalAddress" in the
+ // MachineOperand class (not like "ChangeToRegister" and
+ // "ChangeToImmediate"). So we have to remove them from Operands[] list
+ // first, and then add them back after we have inserted the predicate
+ // operand. tmpOpnds[] is to remember these operands before we remove
+ // them.
+ tmpOpnds.push_back(MO);
+
+ // Operands[oper] is a GlobalAddress operand;
+ // Operands[oper+1] has been copied into Operands[oper+2];
+ hasGAOpnd = true;
+ GAIdx = oper;
+ continue;
+ }
+ else {
+ assert(false && "Unexpected operand type");
+ }
+ }
+ }
+
+ int regPos = invertJump ? 1 : 0;
+ MachineOperand PredMO = Cond[regPos];
+
+ // [oper] now points to the last explicit Def. Predicate operand must be
+ // located at [oper+1]. See diagram above.
+ // This assumes that the predicate is always the first operand,
+ // i.e. Operands[0+numResults], in the set of inputs
+ // It is better to have an assert here to check this. But I don't know how
+ // to write this assert because findFirstPredOperandIdx() would return -1
+ if (oper < -1) oper = -1;
+ MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+ PredMO.isImplicit(), PredMO.isKill(),
+ PredMO.isDead(), PredMO.isUndef(),
+ PredMO.isDebug());
+
+ if (hasGAOpnd)
+ {
+ unsigned int i;
+
+ // Operands[GAIdx] is the original GlobalAddress operand, which is
+ // already copied into tmpOpnds[0].
+ // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
+ // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
+ // so we start from [GAIdx+2]
+ for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
+ tmpOpnds.push_back(MI->getOperand(i));
+
+ // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
+ // It is very important that we always remove from the end of Operands[]
+ // MI->getNumOperands() is at least 2 if program goes to here.
+ for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
+ MI->RemoveOperand(i);
+
+ for (i = 0; i < tmpOpnds.size(); ++i)
+ MI->addOperand(tmpOpnds[i]);
+ }
+
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ assert(isPredicated(MI));
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == &Hexagon::PredRegsRegClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ // TODO: Fix this
+ return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ Cond.erase(Cond.begin());
+ } else {
+ Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+ const BranchProbability &Probability) const {
+ return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::DEALLOC_RET_V4 :
+ case Hexagon::DEALLOC_RET_cPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotPt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+ return true;
+ }
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+ // This function is to check whether the "Offset" is in the correct range of
+ // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+ // inserted to calculate the final address. Due to this reason, the function
+ // assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
+
+ switch(Opcode) {
+
+ case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
+ case Hexagon::LDriw_f:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw:
+ case Hexagon::STriw_f:
+ return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+ case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
+ case Hexagon::LDrid_f:
+ case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
+ case Hexagon::STrid_f:
+ return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::STrih:
+ return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+ case Hexagon::LDrib:
+ case Hexagon::STrib:
+ case Hexagon::LDriub:
+ return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+ case Hexagon::ADD_ri:
+ case Hexagon::TFR_FI:
+ return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+ (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
+ return (0 <= Offset && Offset <= 255);
+
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
+ return (0 <= Offset && Offset <= 127);
+
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
+ return (0 <= Offset && Offset <= 63);
+
+ // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+ // any size. Later pass knows how to handle it.
+ case Hexagon::STriw_pred:
+ case Hexagon::LDriw_pred:
+ return true;
+
+ case Hexagon::LOOP0_i:
+ return isUInt<10>(Offset);
+
+ // INLINEASM is very special.
+ case Hexagon::INLINEASM:
+ return true;
+ }
+
+ llvm_unreachable("No offset range is defined for this opcode. "
+ "Please define it in the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
+ }
+ llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
+ case Hexagon::MemOPb_SETBITi_V4:
+ case Hexagon::MemOPh_SETBITi_V4:
+ case Hexagon::MemOPw_SETBITi_V4:
+ case Hexagon::MemOPb_CLRBITi_V4:
+ case Hexagon::MemOPh_CLRBITi_V4:
+ case Hexagon::MemOPw_CLRBITi_V4:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPLTrr:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGEri:
+ case Hexagon::CMPGEUri:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::
+isConditionalTransfer (const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ return true;
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return QRI.Subtarget.hasV4TOps();
+ }
+}
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ return true;
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return QRI.Subtarget.hasV4TOps();
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return QRI.Subtarget.hasV4TOps();
+ }
+}
+
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+//
+// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+// ^ ^
+// / \ (not OK. it will cause new-value store to be
+// / X conditional on p0.new while R2 producer is
+// / \ on p0)
+// / \.
+// p.new store p.old NV store
+// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new]
+// ^ ^
+// \ /
+// \ /
+// \ /
+// p.old store
+// [if (p0)memw(R0+#0)=R2]
+//
+// The above diagram shows the steps involoved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+// 2) p0 = cmp.eq(r3, #0) }
+//
+// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+bool HexagonInstrInfo::
+isConditionalStore (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cNotPt :
+ return QRI.Subtarget.hasV4TOps();
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return QRI.Subtarget.hasV4TOps();
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store. See diagram below:
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
+ //
+ // +---------------------+
+ // /-----| if (p0) memw(..)=r0 |---------\~
+ // || +---------------------+ ||
+ // promote || /\ /\ || promote
+ // || /||\ /||\ ||
+ // \||/ demote || \||/
+ // \/ || || \/
+ // +-------------------------+ || +-------------------------+
+ // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+ // +-------------------------+ || +-------------------------+
+ // || || ||
+ // || demote \||/
+ // promote || \/ NOT possible
+ // || || /\~
+ // \||/ || /||\~
+ // \/ || ||
+ // +-----------------------------+
+ // | if (p0.new) memw(..)=r0.new |
+ // +-----------------------------+
+ // Double Dot New Store
+ //
+ }
+}
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
+ return (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)));
+}
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+}
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+DFAPacketizer *HexagonInstrInfo::
+CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II);
+}
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel() || MI->isInlineAsm())
+ return true;
+
+ return false;
+}
+
+bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
+
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable = (F >> HexagonII::ExtendablePos)
+ & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const {
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Returns Operand Index for the constant extended instruction.
+unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
+
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
+
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+ if (NonExtOpcode >= 0)
+ return NonExtOpcode;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ case HexagonII::BaseImmOffset :
+ return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 000000000000..5df13a88b5d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,207 @@
+//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "HexagonRegisterInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RI;
+ const HexagonSubtarget& Subtarget;
+public:
+ explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+ virtual bool
+ PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool isPredicatedNew(const MachineInstr *MI) const;
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ virtual bool
+ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+ const BranchProbability &Probability) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+ virtual DFAPacketizer*
+ CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+ bool isValidOffset(const int Opcode, const int Offset) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isU6_3Immediate(const int value) const;
+ bool isU6_2Immediate(const int value) const;
+ bool isU6_1Immediate(const int value) const;
+ bool isU6_0Immediate(const int value) const;
+ bool isS4_3Immediate(const int value) const;
+ bool isS4_2Immediate(const int value) const;
+ bool isS4_1Immediate(const int value) const;
+ bool isS4_0Immediate(const int value) const;
+ bool isS12_Immediate(const int value) const;
+ bool isU6_Immediate(const int value) const;
+ bool isS8_Immediate(const int value) const;
+ bool isS6_Immediate(const int value) const;
+
+ bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
+ bool isConditionalALU32 (const MachineInstr* MI) const;
+ bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isConditionalStore(const MachineInstr* MI) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
+ bool isDeallocRet(const MachineInstr *MI) const;
+ unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ bool isExtendable(const MachineInstr* MI) const;
+ bool isExtended(const MachineInstr* MI) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isNewValueStore(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool isConstExtended(MachineInstr *MI) const;
+ unsigned getAddrMode(const MachineInstr* MI) const;
+ bool isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const;
+ unsigned short getCExtOpNum(const MachineInstr *MI) const;
+ int getMinValue(const MachineInstr *MI) const;
+ int getMaxValue(const MachineInstr *MI) const;
+ bool NonExtEquivalentExists (const MachineInstr *MI) const;
+ short getNonExtOpcode(const MachineInstr *MI) const;
+private:
+ int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 000000000000..74dc0ca72a04
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,2765 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonOperands.td"
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+ def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b),
+ (i32 IntRegs:$c)))]>;
+ def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+ [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b,
+ (i32 IntRegs:$c)))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
+ let CextOpcode = CextOp in {
+ let InputType = "reg" in
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1,
+ opExtentBits = 10, InputType = "imm" in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>;
+ }
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
+ let CextOpcode = CextOp in {
+ let InputType = "reg" in
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+ let isExtendable = 1, opExtendable = 2, isExtentSigned = 0,
+ opExtentBits = 9, InputType = "imm" in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>;
+ }
+}
+
+multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
+ u8ExtPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
+ s8ExtPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU (Instructions with register-register form)
+//===----------------------------------------------------------------------===//
+multiclass ALU32_Pbase<string mnemonic, bit isNot,
+ bit isPredNew> {
+
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+ ") $dst = ")#mnemonic#"($src2, $src3)",
+ []>;
+}
+
+multiclass ALU32_Pred<string mnemonic, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
+ }
+}
+
+let InputType = "reg" in
+multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in {
+ let isPredicable = 1 in
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+ let neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ALU32_Pred<mnemonic, 0>;
+ defm NotPt : ALU32_Pred<mnemonic, 1>;
+ }
+ }
+}
+
+let isCommutable = 1 in {
+ defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
+ defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel;
+ defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel;
+ defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel;
+}
+
+defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU (ADD with register-immediate form)
+//===----------------------------------------------------------------------===//
+multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+ ") $dst = ")#mnemonic#"($src2, #$src3)",
+ []>;
+}
+
+multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, InputType = "imm" in
+multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
+ isPredicable = 1 in
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s16Ext:$src2),
+ "$dst = "#mnemonic#"($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
+ (s16ExtPred:$src2)))]>;
+
+ let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ALU32ri_Pred<mnemonic, 0>;
+ defm NotPt : ALU32ri_Pred<mnemonic, 1>;
+ }
+ }
+}
+
+defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "OR", InputType = "imm" in
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = or($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
+ s10ExtPred:$src2))]>, ImmRegRel;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = not($src1)",
+ [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", CextOpcode = "AND" in
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = and($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
+ s10ExtPred:$src2))]>, ImmRegRel;
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = neg($src1)",
+ [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+ "nop",
+ []>;
+
+// Rd32=sub(#s10,Rs32)
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "SUB", InputType = "imm" in
+def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins s10Ext:$src1, IntRegs:$src2),
+ "$dst = sub(#$src1, $src2)",
+ [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
+ ImmRegRel;
+
+
+multiclass TFR_Pred<bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
+ []>;
+ // Predicate new
+ let PNewValue = "new" in
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
+ []>;
+ }
+}
+
+let InputType = "reg", neverHasSideEffects = 1 in
+multiclass TFR_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let isPredicable = 1 in
+ def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+
+ let isPredicated = 1 in {
+ defm Pt : TFR_Pred<0>;
+ defm NotPt : TFR_Pred<1>;
+ }
+ }
+}
+
+class T_TFR64_Pred<bit PredNot, bit isPredNew>
+ : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#
+ !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []>
+{
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let IClass = 0b1111;
+ let Inst{27-24} = 0b1101;
+ let Inst{13} = isPredNew;
+ let Inst{7} = PredNot;
+ let Inst{4-0} = dst;
+ let Inst{6-5} = src1;
+ let Inst{20-17} = src2{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src2{4-1};
+ let Inst{8} = 0b0;
+}
+
+multiclass TFR64_Pred<bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : T_TFR64_Pred<PredNot, 0>;
+
+ let PNewValue = "new" in
+ def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
+ }
+}
+
+let neverHasSideEffects = 1 in
+multiclass TFR64_base<string BaseName> {
+ let BaseOpcode = BaseName in {
+ let isPredicable = 1 in
+ def NAME : ALU32Inst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1),
+ "$dst = $src1" > {
+ bits<5> dst;
+ bits<5> src1;
+
+ let IClass = 0b1111;
+ let Inst{27-23} = 0b01010;
+ let Inst{4-0} = dst;
+ let Inst{20-17} = src1{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src1{4-1};
+ let Inst{8} = 0b0;
+ }
+
+ let isPredicated = 1 in {
+ defm Pt : TFR64_Pred<0>;
+ defm NotPt : TFR64_Pred<1>;
+ }
+ }
+}
+
+multiclass TFRI_Pred<bit PredNot> {
+ let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
+ []>;
+
+ // Predicate new
+ let PNewValue = "new" in
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
+ []>;
+ }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
+multiclass TFRI_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
+ let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16,
+ isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in
+ def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+ "$dst = #$src1",
+ [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
+
+ let opExtendable = 2, opExtentBits = 12, neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ defm Pt : TFRI_Pred<0>;
+ defm NotPt : TFRI_Pred<1>;
+ }
+ }
+}
+
+defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR64 : TFR64_base<"TFR64">, PredNewRel;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+ SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineRR :
+ SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2))))]>;
+
+// Rd=combine(Rt.[HL], Rs.[HL])
+class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1,
+ IntRegs:$src2),
+ "$dst = combine($src1."# A #", $src2."# B #")", []>;
+
+let isPredicable = 1 in {
+ def COMBINE_hh : COMBINE_halves<"H", "H">;
+ def COMBINE_hl : COMBINE_halves<"H", "L">;
+ def COMBINE_lh : COMBINE_halves<"L", "H">;
+ def COMBINE_ll : COMBINE_halves<"L", "L">;
+}
+
+def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
+ (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
+ (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+ ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "$dst = vmux($src1, $src2, $src3)",
+ []>;
+
+let CextOpcode = "MUX", InputType = "reg" in
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst = mux($src1, $src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))]>, ImmRegRel;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
+ IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2,
+ (i32 IntRegs:$src3))))]>, ImmRegRel;
+
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+ s8Ext:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ s8ExtPred:$src3)))]>, ImmRegRel;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, #$src2, #$src3)",
+ [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
+ s8ExtPred:$src2,
+ s8ImmPred:$src3)))]>;
+
+// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
+multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+ let isPredicatedNew = isPredNew in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+ ") $dst = ")#mnemonic#"($src2)">,
+ Requires<[HasV4T]>;
+}
+
+multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+ }
+}
+
+multiclass ALU32_2op_base<string mnemonic> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, neverHasSideEffects = 1 in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = "#mnemonic#"($src1)">;
+
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
+ neverHasSideEffects = 1 in {
+ defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>;
+ defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+ }
+ }
+}
+
+defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
+defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
+defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
+defm SXTH : ALU32_2op_base<"sxth">, PredNewRel;
+defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
+defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel;
+
+def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
+ (ASLH IntRegs:$src1)>;
+
+def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
+ (ASRH IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
+ (SXTB IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
+ (SXTH IntRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional combine.
+let neverHasSideEffects = 1, isPredicated = 1 in {
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = combine($src2, $src3)",
+ []>;
+
+let isPredicatedFalse = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = combine($src2, $src3)",
+ []>;
+
+let isPredicatedNew = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+let isPredicatedNew = 1, isPredicatedFalse = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = combine($src2, $src3)",
+ []>;
+}
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
+
+def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = cl0($src1)",
+ [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>;
+
+def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = ct0($src1)",
+ [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>;
+
+def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = cl0($src1)",
+ [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
+
+def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = ct0($src1)",
+ [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
+
+def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>;
+
+def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setlt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = maxu($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setult (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = maxu($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setult (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setgt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = minu($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setugt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = minu($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setugt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
+
+// Subtract halfword.
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = and($src1, !$src2)",
+ []>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = any8($src1)",
+ []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = all8($src1)",
+ []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = vitpack($src1, $src2)",
+ []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = valignb($src1, $src2, $src3)",
+ []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = vspliceb($src1, $src2, $src3)",
+ []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+ "$dst = mask($src1)",
+ []>;
+
+def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+ def JMP : JInst< (outs),
+ (ins brtarget:$offset),
+ "jump $offset",
+ [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_c : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src) jump $offset",
+ [(brcond (i1 PredRegs:$src), bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cNot : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src) jump $offset",
+ []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+ "if ($pred) jump $dst",
+ []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:t $offset",
+ []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnNotPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:t $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:nt $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnNotPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:nt $offset",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// Jump to address from register.
+let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR: JRInst<(outs), (ins),
+ "jumpr r31",
+ [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if ($src1) jumpr r31",
+ []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if (!$src1) jumpr r31",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+// Load -- MEMri operand
+multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($addr)",
+ []>;
+}
+
+multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
+ "$dst = "#mnemonic#"($addr)",
+ []>;
+
+ let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >;
+ defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+ defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel;
+ defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel;
+ defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel;
+ defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel;
+ defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel;
+ defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
+
+def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
+ (LDrib ADDRriS11_0:$addr) >;
+
+def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
+ (LDriub ADDRriS11_0:$addr) >;
+
+def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
+ (LDrih ADDRriS11_1:$addr) >;
+
+def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
+ (LDriuh ADDRriS11_1:$addr) >;
+
+def : Pat < (i32 (load ADDRriS11_2:$addr)),
+ (LDriw ADDRriS11_2:$addr) >;
+
+def : Pat < (i64 (load ADDRriS11_3:$addr)),
+ (LDrid ADDRriS11_3:$addr) >;
+
+
+// Load - Base with Immediate offset addressing mode
+multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+#$src3)",
+ []>;
+}
+
+multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1, AddedComplexity = 20 in
+ def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1+#$offset)",
+ []>;
+
+ let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >;
+ defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset in {
+ defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext,
+ 11, 6>, AddrModeRel;
+ defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext,
+ 11, 6>, AddrModeRel;
+ defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext,
+ 12, 7>, AddrModeRel;
+ defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext,
+ 12, 7>, AddrModeRel;
+ defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext,
+ 13, 8>, AddrModeRel;
+ defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext,
+ 14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 20 in {
+def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+ (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+
+def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+ (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+
+def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+ (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+
+def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+ (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+
+def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
+ (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >;
+
+def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
+ (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment load
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//===----------------------------------------------------------------------===//
+
+multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
+ [],
+ "$src2 = $dst2">;
+}
+
+multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+ let isPredicated = 1 in {
+ defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+let hasCtrlDep = 1, neverHasSideEffects = 1 in {
+ defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>,
+ PredNewRel;
+ defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>,
+ PredNewRel;
+}
+
+def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
+
+// Load byte any-extend.
+def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+ (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
+
+def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
+ (i32 (LDrih ADDRriS11_1:$addr))>;
+
+let AddedComplexity = 20 in
+def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
+ (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
+
+let AddedComplexity = 10 in
+def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (i32 (LDriub ADDRriS11_0:$addr))>;
+
+let AddedComplexity = 20 in
+def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+ (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
+
+// Load predicate.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def LDriw_pred : LDInst2<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+ def DEALLOCFRAME : LDInst2<(outs), (ins),
+ "deallocframe",
+ []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2),
+ "$dst =+ mpyi($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ u8ExtPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst =- mpyi($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1),
+ u8ImmPred:$src2)))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+CextOpcode = "MPYI", InputType = "imm" in
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ s9ExtPred:$src2))]>, ImmRegRel;
+
+// Rd=mpyi(Rs,Rt)
+let CextOpcode = "MPYI", InputType = "reg" in
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyi($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>, ImmRegRel;
+
+// Rx+=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8,
+CextOpcode = "MPYI_acc", InputType = "imm" in
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
+ "$dst += mpyi($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3),
+ (i32 IntRegs:$src1)))],
+ "$src1 = $dst">, ImmRegRel;
+
+// Rx+=mpyi(Rs,Rt)
+let CextOpcode = "MPYI_acc", InputType = "reg" in
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyi($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))],
+ "$src1 = $dst">, ImmRegRel;
+
+// Rx-=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
+ "$dst -= mpyi($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ u8ExtPred:$src3)))],
+ "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (mul (i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (anyext (i32 IntRegs:$src2)))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2)))))]>;
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpy($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (add (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))),
+ (i64 DoubleRegs:$src1)))],
+ "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst -= mpy($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3))))))],
+ "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (add (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))),
+ (i64 DoubleRegs:$src1)))], "$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst -= mpyu($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3))))))],
+ "$src1 = $dst">;
+
+
+let InputType = "reg", CextOpcode = "ADD_acc" in
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += add($src2, $src3)",
+ [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))],
+ "$src1 = $dst">, ImmRegRel;
+
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+InputType = "imm", CextOpcode = "ADD_acc" in
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Ext:$src3),
+ "$dst += add($src2, #$src3)",
+ [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
+ s8_16ExtPred:$src3),
+ (i32 IntRegs:$src1)))],
+ "$src1 = $dst">, ImmRegRel;
+
+let CextOpcode = "SUB_acc", InputType = "reg" in
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst -= add($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">, ImmRegRel;
+
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "SUB_acc", InputType = "imm" in
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Ext:$src3),
+ "$dst -= add($src2, #$src3)",
+ [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
+ (add (i32 IntRegs:$src2),
+ s8_16ExtPred:$src3)))],
+ "$src1 = $dst">, ImmRegRel;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+// Store doubleword.
+
+//===----------------------------------------------------------------------===//
+// Post increment store
+//===----------------------------------------------------------------------===//
+
+multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3",
+ [],
+ "$src2 = $dst">;
+}
+
+multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ #mnemonic#"($src1++#$offset) = $src2",
+ [],
+ "$src1 = $dst">;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+
+let isNVStorable = 0 in
+defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel;
+
+def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
+ (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with MEMri operand.
+//===----------------------------------------------------------------------===//
+multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($addr) = $src2",
+ []>;
+}
+
+multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+
+ // Predicate new
+ let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+ defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : STInst2<(outs),
+ (ins MEMri:$addr, RC:$src),
+ mnemonic#"($addr) = $src",
+ []>;
+
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>;
+ defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+ defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+
+ let isNVStorable = 0 in
+ defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
+
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
+ (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
+ (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
+ (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>;
+
+
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with base+immediate offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4",
+ []>;
+}
+
+multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+ defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+
+ // Predicate new
+ let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+ defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : STInst2<(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3",
+ []>;
+
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
+ defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>;
+ defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, InputType = "reg" in {
+ defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel;
+ defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel;
+ defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel;
+ let isNVStorable = 0 in
+ defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+ u6_3Ext, 14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_0ExtPred:$offset)),
+ (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_1ExtPred:$offset)),
+ (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_2ExtPred:$offset)),
+ (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
+ s11_3ExtPred:$offset)),
+ (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>;
+}
+
+// memh(Rx++#s4:1)=Rt.H
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def STriw_pred : STInst2<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>;
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+ def ALLOCFRAME : STInst2<(outs),
+ (ins i32imm:$amt),
+ "allocframe(#$amt)",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = not($src1)",
+ [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtw($src1)",
+ [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+// clrbit.
+def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = clrbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
+ (not
+ (shl 1, u5ImmPred:$src2))))]>;
+
+def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = clrbit($src1, #$src2)",
+ []>;
+
+// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31).
+def : Pat <(and (i32 IntRegs:$src1), 2147483647),
+ (CLRBIT_31 (i32 IntRegs:$src1), 31)>;
+
+// setbit.
+def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
+ (shl 1, u5ImmPred:$src2)))]>;
+
+// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31).
+def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ []>;
+
+def : Pat <(or (i32 IntRegs:$src1), -2147483648),
+ (SETBIT_31 (i32 IntRegs:$src1), 31)>;
+
+// togglebit.
+def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1),
+ (shl 1, u5ImmPred:$src2)))]>;
+
+// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31).
+def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = togglebit($src1, #$src2)",
+ []>;
+
+def : Pat <(xor (i32 IntRegs:$src1), -2147483648),
+ (TOGBIT_31 (i32 IntRegs:$src1), 31)>;
+
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+ "$dst = $src1 /* Should almost never emit this. */",
+ []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1 /* Should almost never emit this. */",
+ [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
+
+def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
+
+// Shift by immediate and add.
+let AddedComplexity = 100 in
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ u3Imm:$src3),
+ "$dst = addasl($src1, $src2, #$src3)",
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+ [SDNPHasChain]>;
+
+let hasSideEffects = 1, isSolo = 1 in
+def BARRIER : SYSInst<(outs), (ins),
+ "barrier",
+ [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1),
+ (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))]>;
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ s12ImmPred:$src3)))]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
+ "Error; should not emit",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
+ (i32 IntRegs:$src3))))]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
+ s12ImmPred:$src3)))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+ "$dst = add($src1)",
+ [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+ "loop0($offset, #$src2)",
+ []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+ "loop0($offset, $src2)",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
+ "$dst.l = #LO($imm_value)",
+ []>;
+
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
+ "$dst.h = #HI($imm_value)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst.l = #LO($jt)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst.h = #HI($jt)",
+ []>;
+
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.l = #LO($label)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in
+def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.h = #HI($label)",
+ []>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+// This is for sdata.
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst),
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+// This is for non-sdata.
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst = CONST32(#$jt)",
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst), imm:$global) ]>;
+
+// Map BlockAddress lowering to CONST32_Int_Real
+def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
+ (CONST32_Int_Real tblockaddress:$addr)>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst = CONST32($label)",
+ [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global),
+ "$dst = CONST64(#$global)",
+ [(set (i64 DoubleRegs:$dst), imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+ "$dst = xor($dst, $dst)",
+ [(set (i1 PredRegs:$dst), 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set (i32 IntRegs:$dst),
+ (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2))))),
+ (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "Should never be emitted",
+ [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "Should never be emitted",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALL : JInst<(outs), (ins calltarget:$dst),
+ "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLR : JRInst<(outs), (ins IntRegs:$dst),
+ "callr $dst",
+ []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
+ "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
+ "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
+ "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call (i32 IntRegs:$dst)),
+ (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+ (TCRETURNR (i32 IntRegs:$dst))>;
+
+// Atomic load and store support
+// 8 bit atomic load
+def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
+ (i32 (LDriub ADDRriS11_0:$src1))>;
+
+def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
+ (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
+
+// 16 bit atomic load
+def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
+ (i32 (LDriuh ADDRriS11_1:$src1))>;
+
+def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
+ (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
+
+def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
+ (i32 (LDriw ADDRriS11_2:$src1))>;
+
+def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
+ (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
+
+// 64 bit atomic load
+def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
+ (i64 (LDrid ADDRriS11_3:$src1))>;
+
+def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
+ (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
+
+
+def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
+ (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+
+def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
+ (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
+ (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)),
+ (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
+ (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+
+
+
+def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)),
+ (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>;
+
+def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset),
+ (i64 DoubleRegs:$src1)),
+ (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def : Pat <(and (i32 IntRegs:$src1), 65535),
+ (ZXTH (i32 IntRegs:$src1))>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and (i32 IntRegs:$src1), 255),
+ (ZXTB (i32 IntRegs:$src1))>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def : Pat <(add (i1 PredRegs:$src1), -1),
+ (NOT_p (i1 PredRegs:$src1))>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src3),
+ (i32 IntRegs:$src4)),
+ (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
+ Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3),
+ (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3,
+ s8ImmPred:$src2))>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = TFR_condset_ri(p0, r1, #i)
+def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
+ (i32 IntRegs:$src3)),
+ (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3),
+ s12ImmPred:$src2))>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = TFR_condset_ir(p0, #i, r1)
+def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3),
+ (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
+ (i32 IntRegs:$src2)))>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+ (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+ (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+let AddedComplexity = 10 in
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+ (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+ (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg))))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+ (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg))))))>;
+
+// We want to prevent emitting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_cNot.
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+ (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+ (JMP_c (i1 PredRegs:$src1), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3)),
+ (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
+ subreg_hireg)))),
+ (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
+ subreg_loreg))))))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2),
+ (i1 PredRegs:$src3)),
+ (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
+ (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+ (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def : Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+ (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg))))>;
+
+// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
+def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg)))>;
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt
+def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg)))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (SXTW (i32 IntRegs:$src1)))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+ (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
+
+def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1)))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
+// rs < rt -> rt > rs.
+// We can let assembler map it, or we can do in the compiler itself.
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+
+// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs)
+// rs < rt -> rt > rs.
+// We can let assembler map it, or we can do in the compiler itself.
+def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
+
+// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+
+// Generate cmpgeu(Rs, #u8)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)),
+ (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)),
+ (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (sext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+ (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+ (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[NoV4T]>;
+
+// i32 -> i64
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
+
+// i8 -> i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// i1 -> i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// i16 -> i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// i32 -> i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+ (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
+ (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+// When there is an offset we should prefer the pattern below over the pattern above.
+// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
+// So this complexity below is comfortably higher to allow for choosing the below.
+// If this is not done then we generate addresses such as
+// ********************************************
+// r1 = add (r0, #4)
+// r1 = memw(r1 + #0)
+// instead of
+// r1 = memw(r0 + #4)
+// ********************************************
+let AddedComplexity = 100 in
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// anyext i16 -> i64.
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (i64
+ (MPYU64_acc
+ (i64
+ (COMBINE_rr
+ (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64_acc
+ (i64
+ (MPYU64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), 32)),
+ subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
+ 32)), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (i64
+ (MPY64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPY64_acc
+ (i64
+ (MPY64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), 32)),
+ subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
+ 32)), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
+
+// Hexagon specific ISD nodes.
+//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+ SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set (i32 IntRegs:$dst),
+ (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1),
+ s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ [(set (i32 IntRegs:$dst),
+ (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+ (COPY (i32 IntRegs:$src1))>;
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(HexagonBR_JT (i32 IntRegs:$src))]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BRIND : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(brind (i32 IntRegs:$src))]>;
+
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+ (i32 (CONST32_set_jt tjumptable:$dst))>;
+
+// XTYPE/SHIFT
+
+// Multi-class for logical operators :
+// Shift by immediate/register and accumulate/logical
+multiclass xtype_imm<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
+ def _ri : SInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
+ [(set (i32 IntRegs:$dst),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$src2),
+ u5ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+ def d_ri : SInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
+ [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))],
+ "$src1 = $dst">;
+}
+
+// Multi-class for logical operators :
+// Shift by register and accumulate/logical (32/64 bits)
+multiclass xtype_reg<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
+ def _rr : SInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
+ [(set (i32 IntRegs:$dst),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">;
+
+ def d_rr : SInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
+ [(set (i64 DoubleRegs:$dst),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">;
+
+}
+
+multiclass basic_xtype_imm<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>;
+ defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>;
+ defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>;
+ defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>;
+}
+
+multiclass basic_xtype_reg<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>;
+ defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>;
+ defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>;
+ defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>;
+}
+
+multiclass xtype_xor_imm<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>;
+}
+
+defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>,
+ xtype_xor_imm<"asl", shl>;
+
+defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>,
+ xtype_xor_imm<"lsr", srl>;
+
+defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>;
+defm LSL : basic_xtype_reg<"lsl", shl>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+ (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV5.td"
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 000000000000..157ab3d0e38c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,137 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLv3 : JInst<(outs), (ins calltarget:$dst),
+ "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst),
+ "callr $dst",
+ []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// Jump to address from register
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call (i32 IntRegs:$dst)),
+ (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+ (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 000000000000..cd0e4758968c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,3531 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in
+class T_Immext<dag ins> :
+ EXTENDERInst<(outs), ins, "immext(#$imm)", []>,
+ Requires<[HasV4T]>;
+
+def IMMEXT_b : T_Immext<(ins brtarget:$imm)>;
+def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
+def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
+def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
+
+// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
+
+// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
+
+def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
+ (HexagonCONST32 node:$addr), [{
+ return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
+}]>;
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+// Generate frame index addresses.
+let neverHasSideEffects = 1, isReMaterializable = 1,
+isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
+def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$offset),
+ "$dst = add($src1, ##$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.eq(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Preserve the TSTBIT generation
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ 1, 0))>;
+
+// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll
+// Rd=cmp.ne(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = !cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.eq(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.ne(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = !cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine
+// Rdd=combine(Rs, #s8)
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s8Ext:$src2),
+ "$dst = combine($src1, #$src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rdd=combine(#s8, Rs)
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, IntRegs:$src2),
+ "$dst = combine(#$src1, $src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def HexagonWrapperCombineRI_V4 :
+ SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineIR_V4 :
+ SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+
+def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
+ (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
+ (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
+ Requires<[HasV4T]>;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
+ (ins s8Imm:$src1, u6Ext:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+//
+// These absolute set addressing mode instructions accept immediate as
+// an operand. We have duplicated these patterns to take global address.
+
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memd($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memb($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memub($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memw($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Following patterns are defined for absolute set addressing mode
+// instruction which take global address as operand.
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memd($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memb($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memub($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdressExt:$addr),
+ "$dst1 = memw($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// multiclass for load instructions with base + register offset
+// addressing mode
+multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
+ []>, Requires<[HasV4T]>;
+}
+
+multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let neverHasSideEffects = 1 in
+multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst = "#mnemonic#"($src1+$src2<<#$offset)",
+ []>, Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >;
+ defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseRegOffset in {
+ defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, AddrModeRel;
+ defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, AddrModeRel;
+ defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel;
+ defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel;
+ defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+let AddedComplexity = 40 in {
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrib_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrih_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (load (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriw_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i64 (load (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrid_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
+ Requires<[HasV4T]>;
+}
+
+
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+let AddedComplexity = 10 in {
+def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ Requires<[HasV4T]>;
+}
+
+// zext i1->i64
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
+ Requires<[HasV4T]>;
+// zext i8->i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i1->i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i16->i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// anyext i16->i64
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// anyext i32->i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
+def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
+ "memd($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U)=Rs
+def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memb($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U)=Rs
+def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memh($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U)=Rs
+def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memw($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
+def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, globaladdressExt:$src2),
+ "memd($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U)=Rs
+def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
+ "memb($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U)=Rs
+def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
+ "memh($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U)=Rs
+def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
+ "memw($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// multiclass for store instructions with base + register offset addressing
+// mode
+multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ RC:$src5),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+ mnemonic#"($src1+$src2<<#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >;
+ defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ RC:$src5),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+ mnemonic#"($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >;
+ defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseRegOffset, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+ defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>,
+ ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+
+ defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>,
+ ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+
+ defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>,
+ ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+
+ let isNVStorable = 0 in
+ defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2,
+ u2ImmPred:$src3))),
+ (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2,
+ u2ImmPred:$src3))),
+ (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+ (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i64 DoubleRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+ (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, DoubleRegs:$src4)>;
+}
+
+// memd(Ru<<#u2+#U6)=Rtt
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
+validSubTargets = HasV4SubT in
+def STrid_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u0AlwaysExtPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
+multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
+multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+ let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
+ mnemonic#"($src1+#$src2) = #$src3",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in {
+ defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>;
+ defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm",
+ validSubTargets = HasV4SubT in {
+ defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel;
+ defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel;
+ defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
+ (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+
+def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
+ u6_1ImmPred:$src2)),
+ (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
+
+def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
+ (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+}
+
+let AddedComplexity = 6 in
+def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
+def STrib_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei8 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u0AlwaysExtPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store halfword.
+// TODO: needs to be implemented
+// memh(Re=#U6)=Rt.H
+// memh(Rs+#s11:1)=Rt.H
+let AddedComplexity = 6 in
+def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
+def STrih_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei16 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u0AlwaysExtPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// Store predicate:
+let neverHasSideEffects = 1 in
+def STriw_pred_V4 : STInst2<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 6 in
+def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
+def STriw_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4",
+ [(store (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u0AlwaysExtPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// multiclass for new-value store instructions with base + immediate offset.
+//
+multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
+ Operand predImmOp, bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>;
+ defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
+ defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 11, 6>, AddrModeRel;
+ defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 12, 7>, AddrModeRel;
+ defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 13, 8>, AddrModeRel;
+}
+
+// multiclass for new-value store instructions with base + immediate offset.
+// and MEMri operand.
+multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
+
+ // Predicate new
+ defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, RC:$src),
+ mnemonic#"($addr) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT,
+mayStore = 1 in {
+ defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+}
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// Post increment store
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+
+multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ mnemonic#"($src1++#$offset) = $src2.new",
+ [],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+let validSubTargets = HasV4SubT in {
+defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+}
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+// memh(Ru<<#u2+#U6)=Nt.new
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NV/J +
+//===----------------------------------------------------------------------===//
+
+multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, $src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, $src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr,
+ string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1, $src2.new)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1, $src2.new)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr,
+ string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Multiclass for regular dot new of Ist operand register.
+multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for dot new of 2nd operand register.
+multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, including -1.
+multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+ defm Ptneg : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
+ defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, excluding -1.
+multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for tstbit, where 2nd operand is always #0.
+multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for GT.
+multiclass NVJ_type_rr_ri<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+ defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+}
+
+// Multiclass for EQ.
+multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+}
+
+// Multiclass for GTU.
+multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+ defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm_only<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm_only<"", OpcStr>;
+}
+
+// Multiclass for tstbit.
+multiclass NVJ_type_r0<string OpcStr> {
+ defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
+ defm r0 : NVJ_type_br_pred_tstbit<"", OpcStr>;
+ }
+
+// Base Multiclass for New Value Jump.
+multiclass NVJ_type {
+ defm GT : NVJ_type_rr_ri<"cmp.gt">;
+ defm EQ : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
+ defm GTU : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
+ defm TSTBIT : NVJ_type_r0<"tstbit">;
+}
+
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ defm JMP_ : NVJ_type;
+}
+
+//===----------------------------------------------------------------------===//
+// NV/J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3),
+ "$dst = add($src1, add($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+ s6_16ExtPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
+ (i32 IntRegs:$src3))))]>,
+ Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
+ (i32 IntRegs:$src3)))]>,
+ Requires<[HasV4T]>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
+
+// Logical doublewords.
+// Rdd=and(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = and($src1, ~$src2)",
+ [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+ (not (i64 DoubleRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// Rdd=or(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = or($src1, ~$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical doublewords.
+// Rxx^=xor(Rss,Rtt)
+let validSubTargets = HasV4SubT in
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT in
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
+ "$dst = or($src1, and($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ExtPred:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rx^=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rx^=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
+ "$dst |= and($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ExtPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rx|=or(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
+ "$dst |= or($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ExtPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>, ImmRegRel;
+
+
+// Modulo wrap
+// Rd=modwrap(Rs,Rt)
+// Round
+// Rd=cround(Rs,#u5)
+// Rd=cround(Rs,Rt)
+// Rd=round(Rs,#u5)[:sat]
+// Rd=round(Rs,Rt)[:sat]
+// Vector reduce add unsigned halfwords
+// Rd=vraddh(Rss,Rtt)
+// Vector add bytes
+// Rdd=vaddb(Rss,Rtt)
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+// Rxx+=vrcnegh(Rss,Rt)
+// Vector maximum bytes
+// Rdd=vmaxb(Rtt,Rss)
+// Vector reduce maximum halfwords
+// Rxx=vrmaxh(Rss,Ru)
+// Rxx=vrmaxuh(Rss,Ru)
+// Vector reduce maximum words
+// Rxx=vrmaxuw(Rss,Ru)
+// Rxx=vrmaxw(Rss,Ru)
+// Vector minimum bytes
+// Rdd=vminb(Rtt,Rss)
+// Vector reduce minimum halfwords
+// Rxx=vrminh(Rss,Ru)
+// Rxx=vrminuh(Rss,Ru)
+// Vector reduce minimum words
+// Rxx=vrminuw(Rss,Ru)
+// Rxx=vrminw(Rss,Ru)
+// Vector subtract bytes
+// Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT in
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add(#$src1, mpyi($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ u6ExtPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(##,mpyi(Rs,#U6))
+def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2,
+ u6ImmPred:$src3))>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add(#$src1, mpyi($src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ u6ExtPred:$src1))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rd=add(##,mpyi(Rs,Rt))
+def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2,
+ IntRegs:$src3))>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+let validSubTargets = HasV4SubT in
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3),
+ u6_2ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3),
+ "$dst = add($src1, mpyi($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ u6ExtPred:$src3)))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi($src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>, ImmRegRel;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ExtPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+let validSubTargets = HasV4SubT in {
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+ "$dst = lsl(#$src1, $src2)",
+ [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
+ (i32 IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asl($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asr($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsl($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+ (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsr($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+def MEMOPIMM : SDNodeXForm<imm, [{
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int32_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 65535..65515
+ // assigning to a short restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int16_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 255..235
+ // assigning to a char restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int8_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-31].
+ // As an SDNode.
+ // we bit negate the value first
+ int32_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-15].
+ // As an SDNode.
+ int16_t imm = N->getSExtValue();
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-15].
+ // As an SDNode.
+ // we bit negate the value first
+ int16_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-7].
+ // As an SDNode.
+ int8_t imm = N->getSExtValue();
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-7].
+ // As an SDNode.
+ // we bit negate the value first
+ int8_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4<(outs),
+ (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+ opc#"($base+#$offset)"#memOp#"$delta",
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1110;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the immediate value.
+//===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4 <(outs),
+ (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+ opc#"($base+#$offset)"#memOp#"#$delta"
+ #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1111;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+ def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+ def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+ def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
+
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+ def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+ def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
+ def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+}
+
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+ defm r : MemOp_rr <opc, opcBits, ImmOp>;
+ defm i : MemOp_ri <opc, opcBits, ImmOp>;
+}
+
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
+validSubTargets =HasV4SubT in {
+ let opExtentBits = 6, accessSize = ByteAccess in
+ defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+ let opExtentBits = 7, accessSize = HalfWordAccess in
+ defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+ let opExtentBits = 8, accessSize = WordAccess in
+ defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 180 in
+ def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
+
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+ u5ImmPred:$addend),
+ (add IntRegs:$base, ExtPred:$offset)),
+ (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+}
+
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon addMI, InstHexagon subMI> {
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+}
+
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
+ MemOPw_SUBi_V4>;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ PatLeaf immPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI> {
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
+
+ let AddedComplexity = 195 in
+ def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$subend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+}
+
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+ ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+ ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+ ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+ PatLeaf extPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+
+ // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+ let AddedComplexity = 250 in
+ def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$bitend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+ // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+ let AddedComplexity = 225 in
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Byte - clrbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+ ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+ // Byte - setbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
+ ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+ // Half Word - clrbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+ // Half Word - setbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+ // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+ // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
+
+ // memw(Rs+#0) = [clrbit|setbit](#U5)
+ // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+ defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ SETMEMIMM, MemOPw_SETBITi_V4, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
+ PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 141 in
+ // mem[bhw](Rs+#0) [+-&|]= Rt
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+
+ // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+ let AddedComplexity = 150 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
+ ComplexPattern addrPred, PatLeaf extPred,
+ InstHexagon addMI, InstHexagon subMI,
+ InstHexagon andMI, InstHexagon orMI > {
+
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
+ MemOPh_ADDr_V4, MemOPh_SUBr_V4,
+ MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+ // Byte
+ defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
+ MemOPb_ADDr_V4, MemOPb_SUBr_V4,
+ MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [HasV4T, UseMEMOP] in {
+ // Byte, Half Word
+ defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend
+ // Word
+ defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
+ MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+}
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
+// Pd=cmpb.eq(Rs,#u8)
+
+// p=!cmp.eq(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.eq(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,#u9)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u9Ext:$src2),
+ "$dst = !cmp.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst = cmpb.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+ bb:$offset)>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)), 255), 0))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Ext:$src2),
+ "$dst = cmpb.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformU7ToU7M1Imm(imm);
+}]>;
+
+// For the sequence
+// zext( seteq ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( seteq (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 254), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setlt ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// This pattern interefers with coremark performance, not implementing at this
+// time.
+// For the sequence
+// zext( setgt ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+
+// For the sequence
+// zext( setuge ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setge ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setule ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setle ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+// biasing the expression by 48. We are depending on the representation of
+// the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+// retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+// retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7StrictPosImmPred:$src2)))),
+ (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
+ (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
+InputType = "reg" in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ (and (i32 IntRegs:$src2), 255)))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Following instruction is not being extended as it results into the incorrect
+// code for negative numbers.
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = cmph.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
+ s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+// r0=xor(r0,r1)
+// r0=and(r0,#0xffff)
+// p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)),
+ 65535), 0))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+// r0=asl(r0,16)
+// r1=asl(r1,16)
+// p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmph.gt instruction.
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+isCompare = 1, validSubTargets = HasV4SubT in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s8Ext:$src2),
+ "$dst = cmph.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ s8ExtPred:$src2))]>,
+ Requires<[HasV4T]>;
+*/
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "reg" in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setugt (and (i32 IntRegs:$src1), 65535),
+ (and (i32 IntRegs:$src2), 65535)))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "imm" in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Ext:$src2),
+ "$dst = cmph.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+// dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+ "dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "jump $dst // Restore_and_dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, isBarrier = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "call $dst // Restore_and_dealloc_before_tailcall",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Save registers function call.
+let isCall = 1, isBarrier = 1,
+ Uses = [R29, R31] in {
+ def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "call $dst // Save_calle_saved_registers",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, i32imm:$amt1),
+ "if ($src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Load/Store with absolute addressing mode
+// memw(#u6)=Rt
+
+multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = Absolute in {
+ defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
+ ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+
+ defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
+ ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+
+ defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
+ ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+
+ let isNVStorable = 0 in
+ defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+def : Pat<(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with GP-relative addressing mode.
+// mem[bhwd](#global)=Rt
+// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
+//===----------------------------------------------------------------------===//
+multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdress:$global, RC:$src),
+ mnemonic#"(#$global) = $src",
+ []>;
+
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$global, RC:$src),
+ mnemonic#"(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+}
+
+let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in {
+defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
+ ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
+defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
+ ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
+defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
+ ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
+defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
+ ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+}
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+// to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isExtended = 1, neverHasSideEffects = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 1, isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdressExt:$absaddr),
+ "$dst = "#mnemonic#"(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+ let opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = Absolute in {
+ defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
+ defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+ defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
+ defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+ defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriw_abs_V4 tglobaladdr: $absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrib_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriub_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrih_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdress:$global),
+ "$dst = "#mnemonic#"(#$global)",
+ []>;
+
+ let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>;
+defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
+defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
+defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>;
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+
+// Transfer global address into a register
+let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
+ "$dst = ##$src1",
+ [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Transfer a block address into a register
+def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
+ (TFRI_V4 tblockaddress:$src1)>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 50, Predicates = [HasV4T] in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+ (TFRI_V4 tglobaladdr:$src1)>;
+
+
+// Load - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ "$dst=memd($src1<<#$src2+##$offset)",
+ [(set (i64 DoubleRegs:$dst),
+ (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ !strconcat("$dst = ",
+ !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
+ [(set IntRegs:$dst,
+ (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset)))))]>,
+ Requires<[HasV4T]>;
+}
+
+defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
+defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
+defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriw_ind : LD_indirect_lo<"memw", load>;
+
+// Store - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def STrid_ind_lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ IntRegs:$src4),
+ !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
+ [(OpNode (i32 IntRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+}
+
+defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
+defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
+defm STriw_ind : ST_indirect_lo<"memw", store>;
+
+// Store - absolute addressing mode: These instruction take constant
+// value as the extended operand.
+multiclass ST_absimm<string OpcStr> {
+let isExtended = 1, opExtendable = 0, isPredicable = 1,
+validSubTargets = HasV4SubT in
+ def _abs_V4 : STInst2<(outs),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2"),
+ []>,
+ Requires<[HasV4T]>;
+
+let isExtended = 1, opExtendable = 1, isPredicated = 1,
+validSubTargets = HasV4SubT in {
+ def _abs_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(##$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(##$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
+validSubTargets = HasV4SubT in
+ def _abs_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2.new"),
+ []>,
+ Requires<[HasV4T]>;
+
+let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
+isNVStore = 1, validSubTargets = HasV4SubT in {
+ def _abs_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)",
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)",
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+}
+}
+
+defm STrib_imm : ST_absimm<"memb">;
+defm STrih_imm : ST_absimm<"memh">;
+defm STriw_imm : ST_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+
+def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+}
+
+// Load - absolute addressing mode: These instruction take constant
+// value as the extended operand
+
+multiclass LD_absimm<string OpcStr> {
+let isExtended = 1, opExtendable = 1, isPredicable = 1,
+validSubTargets = HasV4SubT in
+ def _abs_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins u0AlwaysExt:$src),
+ !strconcat("$dst = ",
+ !strconcat(OpcStr, "(##$src)")),
+ []>,
+ Requires<[HasV4T]>;
+
+let isExtended = 1, opExtendable = 2, isPredicated = 1,
+validSubTargets = HasV4SubT in {
+ def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
+ !strconcat("if ($src1) $dst = ",
+ !strconcat(OpcStr, "(##$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
+ !strconcat("if (!$src1) $dst = ",
+ !strconcat(OpcStr, "(##$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
+ !strconcat("if ($src1.new) $dst = ",
+ !strconcat(OpcStr, "(##$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
+ !strconcat("if (!$src1.new) $dst = ",
+ !strconcat(OpcStr, "(##$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+}
+}
+
+defm LDrib_imm : LD_absimm<"memb">;
+defm LDriub_imm : LD_absimm<"memub">;
+defm LDrih_imm : LD_absimm<"memh">;
+defm LDriuh_imm : LD_absimm<"memuh">;
+defm LDriw_imm : LD_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(i32 (load u0AlwaysExtPred:$src)),
+ (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
+
+def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
+ (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
+
+def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
+ (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
+
+def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
+ (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+
+def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
+ (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+}
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STriw_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
+ "memw($src1+#$src2) = ##$src3",
+ [(store (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STrih_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
+ "memh($src1+#$src2) = ##$src3",
+ [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1),
+ FoldGlobalAddrGP:$addr),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
+ (i64 DoubleRegs:$src1)),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+//let AddedComplexity = 100 in
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
new file mode 100644
index 000000000000..92d098cc0446
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -0,0 +1,626 @@
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, f32>,
+ SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (f32 IntRegs:$dst),
+ (HexagonFCONST32 tglobaladdr:$global))]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
+ "$dst = CONST64(#$src1)",
+ [(set DoubleRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
+ "$dst = CONST32(#$src1)",
+ [(set IntRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+// Transfer immediate float.
+// Only works with single precision fp value.
+// For double precision, use CONST64_float_real, as 64bit transfer
+// can only hold 40-bit values - 32 from const ext + 8 bit immediate.
+let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
+def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32imm:$src1),
+ "$dst = ##$src1",
+ [(set IntRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2),
+ "if ($src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV5T]>;
+
+let isPredicated = 1 in
+def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2),
+ "if (!$src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV5T]>;
+
+// Convert single precision to double precision and vice-versa.
+def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2df($src)",
+ [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2sf($src)",
+ [(set IntRegs:$dst, (fround DoubleRegs:$src))]>,
+ Requires<[HasV5T]>;
+
+
+// Load.
+def LDrid_f : LDInst<(outs DoubleRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memd($addr)",
+ [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>,
+ Requires<[HasV5T]>;
+
+
+let AddedComplexity = 20 in
+def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s11_3Imm:$offset),
+ "$dst = memd($src1+#$offset)",
+ [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1,
+ s11_3ImmPred:$offset))))]>,
+ Requires<[HasV5T]>;
+
+def LDriw_f : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr), "$dst = memw($addr)",
+ [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>,
+ Requires<[HasV5T]>;
+
+
+let AddedComplexity = 20 in
+def LDriw_indexed_f : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_2Imm:$offset),
+ "$dst = memw($src1+#$offset)",
+ [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset))))]>,
+ Requires<[HasV5T]>;
+
+// Store.
+def STriw_f : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1",
+ [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 10 in
+def STriw_indexed_f : STInst<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3",
+ [(store (f32 IntRegs:$src3),
+ (add IntRegs:$src1, s11_2ImmPred:$src2))]>,
+ Requires<[HasV5T]>;
+
+def STrid_f : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memd($addr) = $src1",
+ [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>,
+ Requires<[HasV5T]>;
+
+// Indexed store double word.
+let AddedComplexity = 10 in
+def STrid_indexed_f : STInst<(outs),
+ (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
+ "memd($src1+#$src2) = $src3",
+ [(store (f64 DoubleRegs:$src3),
+ (add IntRegs:$src1, s11_3ImmPred:$src2))]>,
+ Requires<[HasV5T]>;
+
+
+// Add
+let isCommutable = 1 in
+def fADD_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfadd($src1, $src2)",
+ [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfadd($src1, $src2)",
+ [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+def fSUB_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfsub($src1, $src2)",
+ [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfsub($src1, $src2)",
+ [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fMUL_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmpy($src1, $src2)",
+ [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfmpy($src1, $src2)",
+ [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+// Compare.
+let isCompare = 1 in {
+multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> {
+ def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst,
+ (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>,
+ Requires<[HasV5T]>;
+}
+
+multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> {
+ def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst,
+ (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>,
+ Requires<[HasV5T]>;
+}
+}
+
+defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>;
+defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>;
+defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>;
+defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>;
+defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>;
+defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>;
+
+defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>;
+defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>;
+defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>;
+defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>;
+defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>;
+defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>;
+
+// olt.
+def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ (f64 DoubleRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+// gt.
+def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1),
+ (f64 (CONST64_Float_Real fpimm:$src2))))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>,
+ Requires<[HasV5T]>;
+
+// ult.
+def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ (f64 DoubleRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+// le.
+// rs <= rt -> rt >= rs.
+def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+
+// Rss <= Rtt -> Rtt >= Rss.
+def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// rs <= rt -> rt >= rs.
+def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// Rss <= Rtt -> Rtt >= Rss.
+def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// ne.
+def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1,
+ (f64 (CONST64_Float_Real fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1,
+ (f64 (CONST64_Float_Real fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+// Convert Integer to Floating Point.
+def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_d2sf($src)",
+ [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_ud2sf($src)",
+ [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_uw2sf($src)",
+ [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_w2sf($src)",
+ [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_d2df($src)",
+ [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_ud2df($src)",
+ [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_uw2df($src)",
+ [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_w2df($src)",
+ [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+// Convert Floating Point to Integer - default.
+def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2uw($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2w($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2uw($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2w($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2d($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2ud($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2d($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2ud($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+// Convert Floating Point to Integer: non-chopped.
+let AddedComplexity = 20 in
+def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2uw($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2w($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2uw($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2w($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2d($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2ud($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2d($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2ud($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
+ (i32 (TFR IntRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(f32 (bitconvert (i32 IntRegs:$src))),
+ (f32 (TFR IntRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))),
+ (i64 (TFR64 DoubleRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
+ (f64 (TFR64 DoubleRegs:$src))>,
+ Requires<[HasV5T]>;
+
+// Floating point fused multiply-add.
+def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst += dfmpy($src2, $src3)",
+ [(set (f64 DoubleRegs:$dst),
+ (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))],
+ "$src1 = $dst">,
+ Requires<[HasV5T]>;
+
+def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += sfmpy($src2, $src3)",
+ [(set (f32 IntRegs:$dst),
+ (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
+ "$src1 = $dst">,
+ Requires<[HasV5T]>;
+
+
+// Floating point max/min.
+let AddedComplexity = 100 in
+def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = dfmax($src1, $src2)",
+ [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmax($src1, $src2)",
+ [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = dfmin($src1, $src2)",
+ [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmin($src1, $src2)",
+ [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "Error; should not emit",
+ [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
+ fpimm:$src2,
+ fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+
+def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (f32 IntRegs:$src3),
+ (f32 IntRegs:$src4)),
+ (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
+ IntRegs:$src3)>, Requires<[HasV5T]>;
+
+def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (f64 DoubleRegs:$src3),
+ (f64 DoubleRegs:$src4)),
+ (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1),
+ DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
+ (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = TFR_condset_ri(p0, r1, #i)
+def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3),
+ (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = TFR_condset_ir(p0, #i, r1)
+def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3),
+ (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
+
+def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
+ (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fabs (f32 IntRegs:$src1)),
+ (CLRBIT_31 (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fneg (f32 IntRegs:$src1)),
+ (TOGBIT_31 (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+/*
+def : Pat <(fabs (f64 DoubleRegs:$src1)),
+ (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fabs (f64 DoubleRegs:$src1)),
+ (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ Requires<[HasV5T]>;
+ */
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 000000000000..99f59d5ea669
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3503 @@
+//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2, u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+ u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sis9<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_didi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u4Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+//
+// LDInst classes.
+//
+let mayLoad = 1, neverHasSideEffects = 1 in
+class di_LDInstPI_diu4<string opc, Intrinsic IntID>
+ : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2),
+ (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset),
+ "$dst2 = memd($src1++#$offset:circ($src3))",
+ [],
+ "$src1 = $dst">;
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def HEXAGON_A2_add:
+ si_ALU32_sisi <"add", int_hexagon_A2_add>;
+def HEXAGON_A2_addi:
+ si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def HEXAGON_A2_and:
+ si_ALU32_sisi <"and", int_hexagon_A2_and>;
+def HEXAGON_A2_andir:
+ si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
+def HEXAGON_A2_not:
+ si_ALU32_si <"not", int_hexagon_A2_not>;
+def HEXAGON_A2_or:
+ si_ALU32_sisi <"or", int_hexagon_A2_or>;
+def HEXAGON_A2_orir:
+ si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
+def HEXAGON_A2_xor:
+ si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def HEXAGON_A2_neg:
+ si_ALU32_si <"neg", int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def HEXAGON_A2_sub:
+ si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
+def HEXAGON_A2_subri:
+ si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def HEXAGON_A2_tfril:
+ si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
+def HEXAGON_A2_tfrih:
+ si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
+def HEXAGON_A2_tfrsi:
+ si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
+def HEXAGON_A2_tfrpi:
+ di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def HEXAGON_A2_tfr:
+ si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def HEXAGON_A2_combinew:
+ di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
+def HEXAGON_A2_combine_hh:
+ si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
+def HEXAGON_A2_combine_lh:
+ si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
+def HEXAGON_A2_combine_hl:
+ si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
+def HEXAGON_A2_combine_ll:
+ si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
+def HEXAGON_A2_combineii:
+ di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def HEXAGON_C2_mux:
+ si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
+def HEXAGON_C2_muxri:
+ si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
+def HEXAGON_C2_muxir:
+ si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
+def HEXAGON_C2_muxii:
+ si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def HEXAGON_A2_aslh:
+ si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
+def HEXAGON_A2_asrh:
+ si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+ si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def HEXAGON_A2_sxth:
+ si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
+def HEXAGON_A2_sxtb:
+ si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
+def HEXAGON_A2_zxth:
+ si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
+def HEXAGON_A2_zxtb:
+ si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def HEXAGON_C2_cmpeq:
+ qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
+def HEXAGON_C2_cmpeqi:
+ qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
+def HEXAGON_C2_cmpgei:
+ qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
+def HEXAGON_C2_cmpgeui:
+ qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
+def HEXAGON_C2_cmpgt:
+ qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
+def HEXAGON_C2_cmpgti:
+ qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
+def HEXAGON_C2_cmpgtu:
+ qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
+def HEXAGON_C2_cmpgtui:
+ qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
+def HEXAGON_C2_cmplt:
+ qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
+def HEXAGON_C2_cmpltu:
+ qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+* ALU32/VH *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def HEXAGON_A2_svaddh:
+ si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
+def HEXAGON_A2_svaddhs:
+ si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
+def HEXAGON_A2_svadduhs:
+ si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def HEXAGON_A2_svavgh:
+ si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
+def HEXAGON_A2_svavghs:
+ si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
+def HEXAGON_A2_svnavgh:
+ si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def HEXAGON_A2_svsubh:
+ si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
+def HEXAGON_A2_svsubhs:
+ si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
+def HEXAGON_A2_svsubuhs:
+ si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def HEXAGON_A2_addp:
+ di_ALU64_didi <"add", int_hexagon_A2_addp>;
+def HEXAGON_A2_addsat:
+ si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def HEXAGON_A2_addh_l16_hl:
+ si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
+def HEXAGON_A2_addh_l16_ll:
+ si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
+
+def HEXAGON_A2_addh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
+def HEXAGON_A2_addh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
+
+def HEXAGON_A2_addh_h16_hh:
+ si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
+def HEXAGON_A2_addh_h16_hl:
+ si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
+def HEXAGON_A2_addh_h16_lh:
+ si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
+def HEXAGON_A2_addh_h16_ll:
+ si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
+
+def HEXAGON_A2_addh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
+def HEXAGON_A2_addh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
+def HEXAGON_A2_addh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
+def HEXAGON_A2_addh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def HEXAGON_C2_cmpeqp:
+ qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
+def HEXAGON_C2_cmpgtp:
+ qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
+def HEXAGON_C2_cmpgtup:
+ qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def HEXAGON_A2_andp:
+ di_ALU64_didi <"and", int_hexagon_A2_andp>;
+def HEXAGON_A2_orp:
+ di_ALU64_didi <"or", int_hexagon_A2_orp>;
+def HEXAGON_A2_xorp:
+ di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def HEXAGON_A2_max:
+ si_ALU64_sisi <"max", int_hexagon_A2_max>;
+def HEXAGON_A2_maxu:
+ si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def HEXAGON_A2_min:
+ si_ALU64_sisi <"min", int_hexagon_A2_min>;
+def HEXAGON_A2_minu:
+ si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def HEXAGON_A2_subp:
+ di_ALU64_didi <"sub", int_hexagon_A2_subp>;
+def HEXAGON_A2_subsat:
+ si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def HEXAGON_A2_subh_l16_hl:
+ si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
+def HEXAGON_A2_subh_l16_ll:
+ si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
+
+def HEXAGON_A2_subh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
+def HEXAGON_A2_subh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
+
+def HEXAGON_A2_subh_h16_hh:
+ si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
+def HEXAGON_A2_subh_h16_hl:
+ si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
+def HEXAGON_A2_subh_h16_lh:
+ si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
+def HEXAGON_A2_subh_h16_ll:
+ si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
+
+def HEXAGON_A2_subh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
+def HEXAGON_A2_subh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
+def HEXAGON_A2_subh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
+def HEXAGON_A2_subh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def HEXAGON_A2_tfrp:
+ di_ALU64_di <"", int_hexagon_A2_tfrp>;
+
+/********************************************************************
+* ALU64/BIT *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def HEXAGON_S2_parityp:
+ si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
+
+/********************************************************************
+* ALU64/PERM *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def HEXAGON_S2_packhl:
+ di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
+
+/********************************************************************
+* ALU64/VB *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def HEXAGON_A2_vaddub:
+ di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
+def HEXAGON_A2_vaddubs:
+ di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def HEXAGON_A2_vavgub:
+ di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
+def HEXAGON_A2_vavgubr:
+ di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def HEXAGON_A2_vcmpbeq:
+ qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def HEXAGON_A2_vcmpbgtu:
+ qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def HEXAGON_A2_vmaxub:
+ di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
+def HEXAGON_A2_vminub:
+ di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def HEXAGON_A2_vsubub:
+ di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
+def HEXAGON_A2_vsububs:
+ di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def HEXAGON_C2_vmux:
+ di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+* ALU64/VH *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def HEXAGON_A2_vaddh:
+ di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
+def HEXAGON_A2_vaddhs:
+ di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
+def HEXAGON_A2_vadduhs:
+ di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def HEXAGON_A2_vavgh:
+ di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
+def HEXAGON_A2_vavghcr:
+ di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
+def HEXAGON_A2_vavghr:
+ di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
+def HEXAGON_A2_vavguh:
+ di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
+def HEXAGON_A2_vavguhr:
+ di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
+def HEXAGON_A2_vnavgh:
+ di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
+def HEXAGON_A2_vnavghcr:
+ di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
+def HEXAGON_A2_vnavghr:
+ di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def HEXAGON_A2_vcmpheq:
+ qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def HEXAGON_A2_vcmphgt:
+ qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def HEXAGON_A2_vcmphgtu:
+ qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def HEXAGON_A2_vmaxh:
+ di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
+def HEXAGON_A2_vmaxuh:
+ di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def HEXAGON_A2_vminh:
+ di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
+def HEXAGON_A2_vminuh:
+ di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def HEXAGON_A2_vsubh:
+ di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
+def HEXAGON_A2_vsubhs:
+ di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
+def HEXAGON_A2_vsubuhs:
+ di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+* ALU64/VW *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def HEXAGON_A2_vaddw:
+ di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
+def HEXAGON_A2_vaddws:
+ di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def HEXAGON_A2_vavguw:
+ di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
+def HEXAGON_A2_vavguwr:
+ di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
+def HEXAGON_A2_vavgw:
+ di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
+def HEXAGON_A2_vavgwcr:
+ di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
+def HEXAGON_A2_vavgwr:
+ di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
+def HEXAGON_A2_vnavgw:
+ di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
+def HEXAGON_A2_vnavgwcr:
+ di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
+def HEXAGON_A2_vnavgwr:
+ di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def HEXAGON_A2_vcmpweq:
+ qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def HEXAGON_A2_vcmpwgt:
+ qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def HEXAGON_A2_vcmpwgtu:
+ qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def HEXAGON_A2_vmaxw:
+ di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
+def HEXAGON_A2_vmaxuw:
+ di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def HEXAGON_A2_vminw:
+ di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
+def HEXAGON_A2_vminuw:
+ di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def HEXAGON_A2_vsubw:
+ di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
+def HEXAGON_A2_vsubws:
+ di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def HEXAGON_C2_all8:
+ qi_SInst_qi <"all8", int_hexagon_C2_all8>;
+def HEXAGON_C2_any8:
+ qi_SInst_qi <"any8", int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def HEXAGON_C2_pxfer_map:
+ qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
+def HEXAGON_C2_and:
+ qi_SInst_qiqi <"and", int_hexagon_C2_and>;
+def HEXAGON_C2_andn:
+ qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
+def HEXAGON_C2_not:
+ qi_SInst_qi <"not", int_hexagon_C2_not>;
+def HEXAGON_C2_or:
+ qi_SInst_qiqi <"or", int_hexagon_C2_or>;
+def HEXAGON_C2_orn:
+ qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
+def HEXAGON_C2_xor:
+ qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
+
+
+/********************************************************************
+* MTYPE/ALU *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def HEXAGON_M2_acci:
+ si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
+def HEXAGON_M2_accii:
+ si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
+def HEXAGON_M2_nacci:
+ si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
+def HEXAGON_M2_naccii:
+ si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def HEXAGON_M2_subacc:
+ si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def HEXAGON_M2_vabsdiffh:
+ di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def HEXAGON_M2_vabsdiffw:
+ di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def HEXAGON_M2_xor_xacc:
+ si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+* MTYPE/COMPLEX *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def HEXAGON_M2_cmpys_s1:
+ di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
+def HEXAGON_M2_cmpys_s0:
+ di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
+def HEXAGON_M2_cmpysc_s1:
+ di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
+def HEXAGON_M2_cmpysc_s0:
+ di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
+
+def HEXAGON_M2_cmacs_s1:
+ di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
+def HEXAGON_M2_cmacs_s0:
+ di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
+def HEXAGON_M2_cmacsc_s1:
+ di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
+def HEXAGON_M2_cmacsc_s0:
+ di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
+
+def HEXAGON_M2_cnacs_s1:
+ di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
+def HEXAGON_M2_cnacs_s0:
+ di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
+def HEXAGON_M2_cnacsc_s1:
+ di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
+def HEXAGON_M2_cnacsc_s0:
+ di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def HEXAGON_M2_cmpyr_s0:
+ di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
+def HEXAGON_M2_cmacr_s0:
+ di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
+
+def HEXAGON_M2_cmpyi_s0:
+ di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
+def HEXAGON_M2_cmaci_s0:
+ di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def HEXAGON_M2_cmpyrs_s0:
+ si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
+def HEXAGON_M2_cmpyrs_s1:
+ si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
+
+def HEXAGON_M2_cmpyrsc_s0:
+ si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
+def HEXAGON_M2_cmpyrsc_s1:
+ si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def HEXAGON_M2_vcmpy_s0_sat_i:
+ di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
+def HEXAGON_M2_vcmpy_s1_sat_i:
+ di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def HEXAGON_M2_vcmpy_s0_sat_r:
+ di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
+def HEXAGON_M2_vcmpy_s1_sat_r:
+ di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def HEXAGON_M2_vcmac_s0_sat_i:
+ di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
+def HEXAGON_M2_vcmac_s0_sat_r:
+ di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def HEXAGON_M2_vrcmpyi_s0:
+ di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
+def HEXAGON_M2_vrcmpyr_s0:
+ di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
+
+def HEXAGON_M2_vrcmpyi_s0c:
+ di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
+def HEXAGON_M2_vrcmpyr_s0c:
+ di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
+
+def HEXAGON_M2_vrcmaci_s0:
+ di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
+def HEXAGON_M2_vrcmacr_s0:
+ di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
+
+def HEXAGON_M2_vrcmaci_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
+def HEXAGON_M2_vrcmacr_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+* MTYPE/MPYH *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def HEXAGON_M2_mpysmi:
+//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9,
+// not si_MInst_sis9 - but for now, we will use s9.
+// def Hexagon_M2_mpysmi:
+// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpysmi:
+ si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>;
+def HEXAGON_M2_mpyi:
+ si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
+def HEXAGON_M2_mpyui:
+ si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
+def HEXAGON_M2_macsip:
+ si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
+def HEXAGON_M2_maci:
+ si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
+def HEXAGON_M2_macsin:
+ si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def HEXAGON_M2_mmpyl_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
+def HEXAGON_M2_mmpyl_s1:
+ di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
+def HEXAGON_M2_mmpyl_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
+def HEXAGON_M2_mmpyl_s0:
+ di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
+def HEXAGON_M2_mmpyh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
+def HEXAGON_M2_mmpyh_s1:
+ di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
+def HEXAGON_M2_mmpyh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
+def HEXAGON_M2_mmpyh_s0:
+ di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
+def HEXAGON_M2_mmacls_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
+def HEXAGON_M2_mmacls_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
+def HEXAGON_M2_mmacls_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
+def HEXAGON_M2_mmacls_s0:
+ di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
+def HEXAGON_M2_mmachs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
+def HEXAGON_M2_mmachs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
+def HEXAGON_M2_mmachs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
+def HEXAGON_M2_mmachs_s0:
+ di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def HEXAGON_M2_mmpyul_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def HEXAGON_M2_mmpyul_s1:
+ di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def HEXAGON_M2_mmpyul_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def HEXAGON_M2_mmpyul_s0:
+ di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def HEXAGON_M2_mmpyuh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def HEXAGON_M2_mmpyuh_s1:
+ di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def HEXAGON_M2_mmpyuh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def HEXAGON_M2_mmpyuh_s0:
+ di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def HEXAGON_M2_mmaculs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def HEXAGON_M2_mmaculs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def HEXAGON_M2_mmaculs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def HEXAGON_M2_mmaculs_s0:
+ di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def HEXAGON_M2_mmacuhs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def HEXAGON_M2_mmacuhs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def HEXAGON_M2_mmacuhs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def HEXAGON_M2_mmacuhs_s0:
+ di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def HEXAGON_M2_hmmpyh_rs1:
+ si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
+def HEXAGON_M2_hmmpyl_rs1:
+ si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
+def HEXAGON_M2_mpy_up:
+ si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
+def HEXAGON_M2_dpmpyss_rnd_s0:
+ si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
+def HEXAGON_M2_mpyu_up:
+ si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def HEXAGON_M2_dpmpyuu_s0:
+ di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
+def HEXAGON_M2_dpmpyuu_acc_s0:
+ di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
+def HEXAGON_M2_dpmpyuu_nac_s0:
+ di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
+def HEXAGON_M2_dpmpyss_s0:
+ di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
+def HEXAGON_M2_dpmpyss_acc_s0:
+ di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
+def HEXAGON_M2_dpmpyss_nac_s0:
+ di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+* MTYPE/MPYS *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def HEXAGON_M2_mpy_hh_s0:
+ si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
+def HEXAGON_M2_mpy_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
+def HEXAGON_M2_mpy_rnd_hh_s1:
+ si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
+def HEXAGON_M2_mpy_sat_rnd_hh_s1:
+ si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def HEXAGON_M2_mpy_sat_hh_s1:
+ si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
+def HEXAGON_M2_mpy_rnd_hh_s0:
+ si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
+def HEXAGON_M2_mpy_sat_rnd_hh_s0:
+ si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def HEXAGON_M2_mpy_sat_hh_s0:
+ si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
+
+def HEXAGON_M2_mpy_hl_s0:
+ si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
+def HEXAGON_M2_mpy_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
+def HEXAGON_M2_mpy_rnd_hl_s1:
+ si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
+def HEXAGON_M2_mpy_sat_rnd_hl_s1:
+ si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def HEXAGON_M2_mpy_sat_hl_s1:
+ si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
+def HEXAGON_M2_mpy_rnd_hl_s0:
+ si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
+def HEXAGON_M2_mpy_sat_rnd_hl_s0:
+ si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def HEXAGON_M2_mpy_sat_hl_s0:
+ si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
+
+def HEXAGON_M2_mpy_lh_s0:
+ si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
+def HEXAGON_M2_mpy_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
+def HEXAGON_M2_mpy_rnd_lh_s1:
+ si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
+def HEXAGON_M2_mpy_sat_rnd_lh_s1:
+ si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def HEXAGON_M2_mpy_sat_lh_s1:
+ si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
+def HEXAGON_M2_mpy_rnd_lh_s0:
+ si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
+def HEXAGON_M2_mpy_sat_rnd_lh_s0:
+ si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def HEXAGON_M2_mpy_sat_lh_s0:
+ si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
+
+def HEXAGON_M2_mpy_ll_s0:
+ si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
+def HEXAGON_M2_mpy_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
+def HEXAGON_M2_mpy_rnd_ll_s1:
+ si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
+def HEXAGON_M2_mpy_sat_rnd_ll_s1:
+ si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def HEXAGON_M2_mpy_sat_ll_s1:
+ si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
+def HEXAGON_M2_mpy_rnd_ll_s0:
+ si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
+def HEXAGON_M2_mpy_sat_rnd_ll_s0:
+ si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def HEXAGON_M2_mpy_sat_ll_s0:
+ si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def HEXAGON_M2_mpyd_hh_s0:
+ di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
+def HEXAGON_M2_mpyd_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
+def HEXAGON_M2_mpyd_rnd_hh_s1:
+ di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
+def HEXAGON_M2_mpyd_rnd_hh_s0:
+ di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def HEXAGON_M2_mpyd_hl_s0:
+ di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
+def HEXAGON_M2_mpyd_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
+def HEXAGON_M2_mpyd_rnd_hl_s1:
+ di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
+def HEXAGON_M2_mpyd_rnd_hl_s0:
+ di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def HEXAGON_M2_mpyd_lh_s0:
+ di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
+def HEXAGON_M2_mpyd_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
+def HEXAGON_M2_mpyd_rnd_lh_s1:
+ di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
+def HEXAGON_M2_mpyd_rnd_lh_s0:
+ di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def HEXAGON_M2_mpyd_ll_s0:
+ di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
+def HEXAGON_M2_mpyd_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
+def HEXAGON_M2_mpyd_rnd_ll_s1:
+ di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
+def HEXAGON_M2_mpyd_rnd_ll_s0:
+ di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def HEXAGON_M2_mpy_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
+def HEXAGON_M2_mpy_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
+def HEXAGON_M2_mpy_acc_sat_hh_s1:
+ si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def HEXAGON_M2_mpy_acc_sat_hh_s0:
+ si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def HEXAGON_M2_mpy_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
+def HEXAGON_M2_mpy_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
+def HEXAGON_M2_mpy_acc_sat_hl_s1:
+ si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def HEXAGON_M2_mpy_acc_sat_hl_s0:
+ si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def HEXAGON_M2_mpy_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
+def HEXAGON_M2_mpy_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
+def HEXAGON_M2_mpy_acc_sat_lh_s1:
+ si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def HEXAGON_M2_mpy_acc_sat_lh_s0:
+ si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def HEXAGON_M2_mpy_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
+def HEXAGON_M2_mpy_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
+def HEXAGON_M2_mpy_acc_sat_ll_s1:
+ si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def HEXAGON_M2_mpy_acc_sat_ll_s0:
+ si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def HEXAGON_M2_mpy_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
+def HEXAGON_M2_mpy_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
+def HEXAGON_M2_mpy_nac_sat_hh_s1:
+ si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def HEXAGON_M2_mpy_nac_sat_hh_s0:
+ si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def HEXAGON_M2_mpy_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
+def HEXAGON_M2_mpy_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
+def HEXAGON_M2_mpy_nac_sat_hl_s1:
+ si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def HEXAGON_M2_mpy_nac_sat_hl_s0:
+ si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def HEXAGON_M2_mpy_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
+def HEXAGON_M2_mpy_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
+def HEXAGON_M2_mpy_nac_sat_lh_s1:
+ si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def HEXAGON_M2_mpy_nac_sat_lh_s0:
+ si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def HEXAGON_M2_mpy_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
+def HEXAGON_M2_mpy_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
+def HEXAGON_M2_mpy_nac_sat_ll_s1:
+ si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def HEXAGON_M2_mpy_nac_sat_ll_s0:
+ si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def HEXAGON_M2_mpyd_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
+def HEXAGON_M2_mpyd_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def HEXAGON_M2_mpyd_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
+def HEXAGON_M2_mpyd_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def HEXAGON_M2_mpyd_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
+def HEXAGON_M2_mpyd_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def HEXAGON_M2_mpyd_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
+def HEXAGON_M2_mpyd_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def HEXAGON_M2_mpyd_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
+def HEXAGON_M2_mpyd_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def HEXAGON_M2_mpyd_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
+def HEXAGON_M2_mpyd_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def HEXAGON_M2_mpyd_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
+def HEXAGON_M2_mpyd_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def HEXAGON_M2_mpyd_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
+def HEXAGON_M2_mpyd_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyu_hh_s0:
+ si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
+def HEXAGON_M2_mpyu_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
+def HEXAGON_M2_mpyu_hl_s0:
+ si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
+def HEXAGON_M2_mpyu_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
+def HEXAGON_M2_mpyu_lh_s0:
+ si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
+def HEXAGON_M2_mpyu_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
+def HEXAGON_M2_mpyu_ll_s0:
+ si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
+def HEXAGON_M2_mpyu_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyud_hh_s0:
+ di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
+def HEXAGON_M2_mpyud_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
+def HEXAGON_M2_mpyud_hl_s0:
+ di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
+def HEXAGON_M2_mpyud_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
+def HEXAGON_M2_mpyud_lh_s0:
+ di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
+def HEXAGON_M2_mpyud_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
+def HEXAGON_M2_mpyud_ll_s0:
+ di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
+def HEXAGON_M2_mpyud_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyu_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
+def HEXAGON_M2_mpyu_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
+def HEXAGON_M2_mpyu_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
+def HEXAGON_M2_mpyu_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
+def HEXAGON_M2_mpyu_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
+def HEXAGON_M2_mpyu_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
+def HEXAGON_M2_mpyu_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
+def HEXAGON_M2_mpyu_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyu_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
+def HEXAGON_M2_mpyu_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
+def HEXAGON_M2_mpyu_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
+def HEXAGON_M2_mpyu_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
+def HEXAGON_M2_mpyu_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
+def HEXAGON_M2_mpyu_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
+def HEXAGON_M2_mpyu_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
+def HEXAGON_M2_mpyu_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyud_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def HEXAGON_M2_mpyud_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def HEXAGON_M2_mpyud_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def HEXAGON_M2_mpyud_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def HEXAGON_M2_mpyud_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def HEXAGON_M2_mpyud_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def HEXAGON_M2_mpyud_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def HEXAGON_M2_mpyud_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def HEXAGON_M2_mpyud_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def HEXAGON_M2_mpyud_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def HEXAGON_M2_mpyud_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def HEXAGON_M2_mpyud_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def HEXAGON_M2_mpyud_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def HEXAGON_M2_mpyud_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def HEXAGON_M2_mpyud_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def HEXAGON_M2_mpyud_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def HEXAGON_A2_vraddub:
+ di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
+def HEXAGON_A2_vraddub_acc:
+ di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def HEXAGON_A2_vrsadub:
+ di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
+def HEXAGON_A2_vrsadub_acc:
+ di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+* MTYPE/VH *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def HEXAGON_M2_vdmpys_s1:
+ di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
+def HEXAGON_M2_vdmpys_s0:
+ di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
+def HEXAGON_M2_vdmacs_s1:
+ di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
+def HEXAGON_M2_vdmacs_s0:
+ di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def HEXAGON_M2_vdmpyrs_s0:
+ si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
+def HEXAGON_M2_vdmpyrs_s1:
+ si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def HEXAGON_M2_vmpy2es_s1:
+ di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
+def HEXAGON_M2_vmpy2es_s0:
+ di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
+def HEXAGON_M2_vmac2es:
+ di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
+def HEXAGON_M2_vmac2es_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
+def HEXAGON_M2_vmac2es_s0:
+ di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def HEXAGON_M2_vmpy2s_s0:
+ di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
+def HEXAGON_M2_vmpy2s_s1:
+ di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
+def HEXAGON_M2_vmac2:
+ di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
+def HEXAGON_M2_vmac2s_s0:
+ di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
+def HEXAGON_M2_vmac2s_s1:
+ di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def HEXAGON_M2_vmpy2s_s0pack:
+ si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
+def HEXAGON_M2_vmpy2s_s1pack:
+ si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def HEXAGON_M2_vrmpy_s0:
+ di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
+def HEXAGON_M2_vrmac_s0:
+ di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+* STYPE/ALU *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def HEXAGON_A2_abs:
+ si_SInst_si <"abs", int_hexagon_A2_abs>;
+def HEXAGON_A2_absp:
+ di_SInst_di <"abs", int_hexagon_A2_absp>;
+def HEXAGON_A2_abssat:
+ si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def HEXAGON_A2_negp:
+ di_SInst_di <"neg", int_hexagon_A2_negp>;
+def HEXAGON_A2_negsat:
+ si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def HEXAGON_A2_notp:
+ di_SInst_di <"not", int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def HEXAGON_A2_sxtw:
+ di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def HEXAGON_S2_cl0:
+ si_SInst_si <"cl0", int_hexagon_S2_cl0>;
+def HEXAGON_S2_cl0p:
+ si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
+def HEXAGON_S2_cl1:
+ si_SInst_si <"cl1", int_hexagon_S2_cl1>;
+def HEXAGON_S2_cl1p:
+ si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
+def HEXAGON_S2_clb:
+ si_SInst_si <"clb", int_hexagon_S2_clb>;
+def HEXAGON_S2_clbp:
+ si_SInst_di <"clb", int_hexagon_S2_clbp>;
+def HEXAGON_S2_clbnorm:
+ si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def HEXAGON_S2_ct0:
+ si_SInst_si <"ct0", int_hexagon_S2_ct0>;
+def HEXAGON_S2_ct1:
+ si_SInst_si <"ct1", int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def Hexagon_C2_bitsclr:
+ qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
+def Hexagon_C2_bitsclri:
+ qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
+def Hexagon_C2_bitsset:
+ qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def HEXAGON_S2_extractu:
+ si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
+def HEXAGON_S2_extractu_rp:
+ si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
+def HEXAGON_S2_extractup:
+ di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
+def HEXAGON_S2_extractup_rp:
+ di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def Hexagon_S2_insert:
+ si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
+def Hexagon_S2_insert_rp:
+ si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
+def Hexagon_S2_insertp:
+ di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
+def Hexagon_S2_insertp_rp:
+ di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def Hexagon_S2_interleave:
+ di_SInst_di <"interleave", int_hexagon_S2_interleave>;
+def Hexagon_S2_deinterleave:
+ di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def Hexagon_S2_lfsp:
+ di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def Hexagon_S2_brev:
+ si_SInst_si <"brev", int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def HEXAGON_S2_setbit_i:
+ si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
+def HEXAGON_S2_togglebit_i:
+ si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
+def HEXAGON_S2_clrbit_i:
+ si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
+def HEXAGON_S2_setbit_r:
+ si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
+def HEXAGON_S2_togglebit_r:
+ si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
+def HEXAGON_S2_clrbit_r:
+ si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def HEXAGON_S2_tstbit_i:
+ qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
+def HEXAGON_S2_tstbit_r:
+ qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+* STYPE/COMPLEX *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def HEXAGON_A2_vconj:
+ di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def HEXAGON_S2_vcrotate:
+ di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+* STYPE/PERM *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def HEXAGON_A2_sat:
+ si_SInst_di <"sat", int_hexagon_A2_sat>;
+def HEXAGON_A2_satb:
+ si_SInst_si <"satb", int_hexagon_A2_satb>;
+def HEXAGON_A2_sath:
+ si_SInst_si <"sath", int_hexagon_A2_sath>;
+def HEXAGON_A2_satub:
+ si_SInst_si <"satub", int_hexagon_A2_satub>;
+def HEXAGON_A2_satuh:
+ si_SInst_si <"satuh", int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def HEXAGON_A2_swiz:
+ si_SInst_si <"swiz", int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def HEXAGON_S2_valignib:
+ di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
+def HEXAGON_S2_valignrb:
+ di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def HEXAGON_S2_vrndpackwh:
+ si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
+def HEXAGON_S2_vrndpackwhs:
+ si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def HEXAGON_S2_svsathb:
+ si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
+def HEXAGON_S2_vsathb:
+ si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
+def HEXAGON_S2_svsathub:
+ si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
+def HEXAGON_S2_vsathub:
+ si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
+def HEXAGON_S2_vsatwh:
+ si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
+def HEXAGON_S2_vsatwuh:
+ si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def HEXAGON_S2_vsathb_nopack:
+ di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
+def HEXAGON_S2_vsathub_nopack:
+ di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def HEXAGON_S2_vsatwh_nopack:
+ di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
+def HEXAGON_S2_vsatwuh_nopack:
+ di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def HEXAGON_S2_shuffeb:
+ di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
+def HEXAGON_S2_shuffeh:
+ di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
+def HEXAGON_S2_shuffob:
+ di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
+def HEXAGON_S2_shuffoh:
+ di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def HEXAGON_S2_vsplatrb:
+ si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def HEXAGON_S2_vsplatrh:
+ di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def Hexagon_S2_vsplicerb:
+ di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
+def Hexagon_S2_vspliceib:
+ di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def HEXAGON_S2_vsxtbh:
+ di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
+def HEXAGON_S2_vsxthw:
+ di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def HEXAGON_S2_vtrunehb:
+ si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def HEXAGON_S2_vtrunohb:
+ si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def HEXAGON_S2_vtrunewh:
+ di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def HEXAGON_S2_vtrunowh:
+ di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def HEXAGON_S2_vzxtbh:
+ di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
+def HEXAGON_S2_vzxthw:
+ di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+* STYPE/PRED *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def HEXAGON_C2_mask:
+ di_SInst_qi <"mask", int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def HEXAGON_C2_tfrpr:
+ si_SInst_qi <"", int_hexagon_C2_tfrpr>;
+def HEXAGON_C2_tfrrp:
+ qi_SInst_si <"", int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def HEXAGON_C2_vitpack:
+ si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+* STYPE/SHIFT *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def HEXAGON_S2_asl_i_r:
+ si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
+def HEXAGON_S2_asr_i_r:
+ si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
+def HEXAGON_S2_lsr_i_r:
+ si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
+def HEXAGON_S2_asl_i_p:
+ di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
+def HEXAGON_S2_asr_i_p:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
+def HEXAGON_S2_lsr_i_p:
+ di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def HEXAGON_S2_asl_i_r_acc:
+ si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
+def HEXAGON_S2_asr_i_r_acc:
+ si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
+def HEXAGON_S2_lsr_i_r_acc:
+ si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
+def HEXAGON_S2_asl_i_r_nac:
+ si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
+def HEXAGON_S2_asr_i_r_nac:
+ si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
+def HEXAGON_S2_lsr_i_r_nac:
+ si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
+def HEXAGON_S2_asl_i_p_acc:
+ di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
+def HEXAGON_S2_asr_i_p_acc:
+ di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
+def HEXAGON_S2_lsr_i_p_acc:
+ di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
+def HEXAGON_S2_asl_i_p_nac:
+ di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
+def HEXAGON_S2_asr_i_p_nac:
+ di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
+def HEXAGON_S2_lsr_i_p_nac:
+ di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def HEXAGON_S2_addasl_rrri:
+ si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def HEXAGON_S2_asl_i_r_and:
+ si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
+def HEXAGON_S2_asr_i_r_and:
+ si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
+def HEXAGON_S2_lsr_i_r_and:
+ si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
+
+def HEXAGON_S2_asl_i_r_xacc:
+ si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
+def HEXAGON_S2_lsr_i_r_xacc:
+ si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
+
+def HEXAGON_S2_asl_i_r_or:
+ si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
+def HEXAGON_S2_asr_i_r_or:
+ si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
+def HEXAGON_S2_lsr_i_r_or:
+ si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
+
+def HEXAGON_S2_asl_i_p_and:
+ di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
+def HEXAGON_S2_asr_i_p_and:
+ di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
+def HEXAGON_S2_lsr_i_p_and:
+ di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
+
+def HEXAGON_S2_asl_i_p_xacc:
+ di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
+def HEXAGON_S2_lsr_i_p_xacc:
+ di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
+
+def HEXAGON_S2_asl_i_p_or:
+ di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
+def HEXAGON_S2_asr_i_p_or:
+ di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
+def HEXAGON_S2_lsr_i_p_or:
+ di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def HEXAGON_S2_asr_i_r_rnd:
+ si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
+def HEXAGON_S2_asr_i_r_rnd_goodsyntax:
+ si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def HEXAGON_S2_asl_i_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def HEXAGON_S2_asl_r_r:
+ si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
+def HEXAGON_S2_asr_r_r:
+ si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
+def HEXAGON_S2_lsl_r_r:
+ si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
+def HEXAGON_S2_lsr_r_r:
+ si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
+def HEXAGON_S2_asl_r_p:
+ di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
+def HEXAGON_S2_asr_r_p:
+ di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
+def HEXAGON_S2_lsl_r_p:
+ di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
+def HEXAGON_S2_lsr_r_p:
+ di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def HEXAGON_S2_asl_r_r_acc:
+ si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
+def HEXAGON_S2_asr_r_r_acc:
+ si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
+def HEXAGON_S2_lsl_r_r_acc:
+ si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
+def HEXAGON_S2_lsr_r_r_acc:
+ si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
+def HEXAGON_S2_asl_r_p_acc:
+ di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
+def HEXAGON_S2_asr_r_p_acc:
+ di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
+def HEXAGON_S2_lsl_r_p_acc:
+ di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
+def HEXAGON_S2_lsr_r_p_acc:
+ di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
+
+def HEXAGON_S2_asl_r_r_nac:
+ si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
+def HEXAGON_S2_asr_r_r_nac:
+ si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
+def HEXAGON_S2_lsl_r_r_nac:
+ si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
+def HEXAGON_S2_lsr_r_r_nac:
+ si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
+def HEXAGON_S2_asl_r_p_nac:
+ di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
+def HEXAGON_S2_asr_r_p_nac:
+ di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
+def HEXAGON_S2_lsl_r_p_nac:
+ di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
+def HEXAGON_S2_lsr_r_p_nac:
+ di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def HEXAGON_S2_asl_r_r_and:
+ si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
+def HEXAGON_S2_asr_r_r_and:
+ si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
+def HEXAGON_S2_lsl_r_r_and:
+ si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
+def HEXAGON_S2_lsr_r_r_and:
+ si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
+
+def HEXAGON_S2_asl_r_r_or:
+ si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
+def HEXAGON_S2_asr_r_r_or:
+ si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
+def HEXAGON_S2_lsl_r_r_or:
+ si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
+def HEXAGON_S2_lsr_r_r_or:
+ si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
+
+def HEXAGON_S2_asl_r_p_and:
+ di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
+def HEXAGON_S2_asr_r_p_and:
+ di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
+def HEXAGON_S2_lsl_r_p_and:
+ di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
+def HEXAGON_S2_lsr_r_p_and:
+ di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
+
+def HEXAGON_S2_asl_r_p_or:
+ di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
+def HEXAGON_S2_asr_r_p_or:
+ di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
+def HEXAGON_S2_lsl_r_p_or:
+ di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
+def HEXAGON_S2_lsr_r_p_or:
+ di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def HEXAGON_S2_asl_r_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
+def HEXAGON_S2_asr_r_r_sat:
+ si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def Hexagon_S2_tableidxb_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def Hexagon_S2_tableidxd_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def Hexagon_S2_tableidxh_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def Hexagon_S2_tableidxw_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+* STYPE/VH *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def HEXAGON_A2_vabsh:
+ di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
+def HEXAGON_A2_vabshsat:
+ di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def HEXAGON_S2_asl_i_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
+def HEXAGON_S2_asr_i_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
+def HEXAGON_S2_lsr_i_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def HEXAGON_S2_asl_r_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
+def HEXAGON_S2_asr_r_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
+def HEXAGON_S2_lsl_r_vh:
+ di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
+def HEXAGON_S2_lsr_r_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+* STYPE/VW *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def HEXAGON_A2_vabsw:
+ di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
+def HEXAGON_A2_vabswsat:
+ di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def HEXAGON_S2_asl_i_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
+def HEXAGON_S2_asr_i_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
+def HEXAGON_S2_lsr_i_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def HEXAGON_S2_asl_r_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
+def HEXAGON_S2_asr_r_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
+def HEXAGON_S2_lsl_r_vw:
+ di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
+def HEXAGON_S2_lsr_r_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def HEXAGON_S2_asr_r_svw_trun:
+ si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
+def HEXAGON_S2_asr_i_svw_trun:
+ si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
+
+// LD / Circular loads.
+def HEXAGON_circ_ldd:
+ di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
+include "HexagonIntrinsicsV5.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 000000000000..2788101d5a66
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,39 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+ (i64
+ (COMBINE_rr
+ (HEXAGON_M2_maci
+ (HEXAGON_M2_maci
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))),
+ subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), subreg_loreg))))>;
+
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 000000000000..2a54e62d20ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+ di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+ di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+ si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+ si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+ di_ALU64_sidi <"add", int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+ di_ALU64_didi <"add", int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+ di_ALU64_didi <"max", int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+ di_ALU64_didi <"maxu", int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 000000000000..dd28ebb57231
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, add($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, sub(#$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, #$src3))")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+ qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+ qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+ qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+ qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+ qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+ qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+ qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+ qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+ qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+ qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+* XTYPE/ALU *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+ si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+ si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+ di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+ di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+ di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+ si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+ si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+ si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+ si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+ si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+ si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+ si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+ si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+ si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+ si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+ si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+ si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+ si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+ si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+ si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+ si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+ si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+ si_SInst_siu5 <"round", int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+ si_SInst_sisi <"round", int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+ si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+ si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+* XTYPE/BIT *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+* XTYPE/COMPLEX *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+* XTYPE/MPY *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
new file mode 100644
index 000000000000..1d44b526d298
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -0,0 +1,395 @@
+class sf_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class sf_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class sf_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class sf_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class si_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class df_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class di_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class df_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class df_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class di_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+
+class df_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class sf_MInst_sfsf<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class df_MInst_dfdf<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class qi_ALU64_dfdf<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class qi_ALU64_dfu5<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+
+class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+
+class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2, $src3):scale")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+
+class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2, $src3):scale")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2, IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class qi_SInst_sfsf<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_sfu5<string opc, Intrinsic IntID>
+ : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class sf_ALU64_u10_pos<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class sf_ALU64_u10_neg<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class df_ALU64_u10_pos<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class df_ALU64_u10_neg<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_MInst_diu6<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class di_MInst_diu4_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_diu4_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+
+def HEXAGON_C4_fastcorner9:
+ qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>;
+def HEXAGON_C4_fastcorner9_not:
+ qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>;
+def HEXAGON_M5_vrmpybuu:
+ di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>;
+def HEXAGON_M5_vrmacbuu:
+ di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>;
+def HEXAGON_M5_vrmpybsu:
+ di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>;
+def HEXAGON_M5_vrmacbsu:
+ di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>;
+def HEXAGON_M5_vmpybuu:
+ di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>;
+def HEXAGON_M5_vmpybsu:
+ di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>;
+def HEXAGON_M5_vmacbuu:
+ di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>;
+def HEXAGON_M5_vmacbsu:
+ di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>;
+def HEXAGON_M5_vdmpybsu:
+ di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>;
+def HEXAGON_M5_vdmacbsu:
+ di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>;
+def HEXAGON_A5_vaddhubs:
+ si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>;
+def HEXAGON_S5_popcountp:
+ si_SInst_di <"popcount", int_hexagon_S5_popcountp>;
+def HEXAGON_S5_asrhub_rnd_sat_goodsyntax:
+ si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+def HEXAGON_S5_asrhub_sat:
+ si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>;
+def HEXAGON_S5_vasrhrnd_goodsyntax:
+ di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>;
+def HEXAGON_S2_asr_i_p_rnd:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>;
+def HEXAGON_S2_asr_i_p_rnd_goodsyntax:
+ di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+def HEXAGON_F2_sfadd:
+ sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>;
+def HEXAGON_F2_sfsub:
+ sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>;
+def HEXAGON_F2_sfmpy:
+ sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>;
+def HEXAGON_F2_sffma:
+ sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>;
+def HEXAGON_F2_sffma_sc:
+ sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>;
+def HEXAGON_F2_sffms:
+ sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>;
+def HEXAGON_F2_sffma_lib:
+ sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>;
+def HEXAGON_F2_sffms_lib:
+ sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>;
+def HEXAGON_F2_sfcmpeq:
+ qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>;
+def HEXAGON_F2_sfcmpgt:
+ qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>;
+def HEXAGON_F2_sfcmpge:
+ qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>;
+def HEXAGON_F2_sfcmpuo:
+ qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>;
+def HEXAGON_F2_sfmax:
+ sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>;
+def HEXAGON_F2_sfmin:
+ sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>;
+def HEXAGON_F2_sfclass:
+ qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>;
+def HEXAGON_F2_sfimm_p:
+ sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>;
+def HEXAGON_F2_sfimm_n:
+ sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>;
+def HEXAGON_F2_sffixupn:
+ sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>;
+def HEXAGON_F2_sffixupd:
+ sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>;
+def HEXAGON_F2_sffixupr:
+ sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>;
+def HEXAGON_F2_dfadd:
+ df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>;
+def HEXAGON_F2_dfsub:
+ df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>;
+def HEXAGON_F2_dfmpy:
+ df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>;
+def HEXAGON_F2_dffma:
+ df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>;
+def HEXAGON_F2_dffms:
+ df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>;
+def HEXAGON_F2_dffma_lib:
+ df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>;
+def HEXAGON_F2_dffms_lib:
+ df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>;
+def HEXAGON_F2_dffma_sc:
+ df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>;
+def HEXAGON_F2_dfmax:
+ df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>;
+def HEXAGON_F2_dfmin:
+ df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>;
+def HEXAGON_F2_dfcmpeq:
+ qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>;
+def HEXAGON_F2_dfcmpgt:
+ qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>;
+def HEXAGON_F2_dfcmpge:
+ qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>;
+def HEXAGON_F2_dfcmpuo:
+ qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>;
+def HEXAGON_F2_dfclass:
+ qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>;
+def HEXAGON_F2_dfimm_p:
+ df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>;
+def HEXAGON_F2_dfimm_n:
+ df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>;
+def HEXAGON_F2_dffixupn:
+ df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>;
+def HEXAGON_F2_dffixupd:
+ df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>;
+def HEXAGON_F2_dffixupr:
+ df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>;
+def HEXAGON_F2_conv_sf2df:
+ df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>;
+def HEXAGON_F2_conv_df2sf:
+ sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>;
+def HEXAGON_F2_conv_uw2sf:
+ sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>;
+def HEXAGON_F2_conv_uw2df:
+ df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>;
+def HEXAGON_F2_conv_w2sf:
+ sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>;
+def HEXAGON_F2_conv_w2df:
+ df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>;
+def HEXAGON_F2_conv_ud2sf:
+ sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>;
+def HEXAGON_F2_conv_ud2df:
+ df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>;
+def HEXAGON_F2_conv_d2sf:
+ sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>;
+def HEXAGON_F2_conv_d2df:
+ df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>;
+def HEXAGON_F2_conv_sf2uw:
+ si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>;
+def HEXAGON_F2_conv_sf2w:
+ si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>;
+def HEXAGON_F2_conv_sf2ud:
+ di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>;
+def HEXAGON_F2_conv_sf2d:
+ di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>;
+def HEXAGON_F2_conv_df2uw:
+ si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>;
+def HEXAGON_F2_conv_df2w:
+ si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>;
+def HEXAGON_F2_conv_df2ud:
+ di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>;
+def HEXAGON_F2_conv_df2d:
+ di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>;
+def HEXAGON_F2_conv_sf2uw_chop:
+ si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>;
+def HEXAGON_F2_conv_sf2w_chop:
+ si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>;
+def HEXAGON_F2_conv_sf2ud_chop:
+ di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>;
+def HEXAGON_F2_conv_sf2d_chop:
+ di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>;
+def HEXAGON_F2_conv_df2uw_chop:
+ si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>;
+def HEXAGON_F2_conv_df2w_chop:
+ si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>;
+def HEXAGON_F2_conv_df2ud_chop:
+ di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>;
+def HEXAGON_F2_conv_df2d_chop:
+ di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
new file mode 100644
index 000000000000..f011d51bd61a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -0,0 +1,95 @@
+//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Hexagon MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
+ HexagonAsmPrinter& Printer) {
+ MCContext &MC = Printer.OutContext;
+ const MCExpr *ME;
+
+ ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC);
+
+ if (!MO.isJTI() && MO.getOffset())
+ ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
+ MC);
+
+ return (MCOperand::CreateExpr(ME));
+}
+
+// Create an MCInst from a MachineInstr
+void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
+ HexagonAsmPrinter& AP) {
+ MCI.setOpcode(MI->getOpcode());
+ MCI.setDesc(MI->getDesc());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCO;
+
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCO = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ // FP immediates are used only when setting GPRs, so they may be dealt
+ // with like regular immediates from this point on.
+ MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCO = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCO = MCOperand::CreateExpr
+ (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
+ AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
+ AP);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+ break;
+ }
+
+ MCI.addOperand(MCO);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 000000000000..0318c519e453
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ namespace Hexagon {
+ const unsigned int StartPacket = 0x1;
+ const unsigned int EndPacket = 0x2;
+ }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register. This field
+ // holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+ std::vector<MachineInstr*> AllocaAdjustInsts;
+ int VarArgsFrameIndex;
+ bool HasClobberLR;
+
+ std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+ HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ HasClobberLR(0) {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ void addAllocaAdjustInst(MachineInstr* MI) {
+ AllocaAdjustInsts.push_back(MI);
+ }
+ const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+ return AllocaAdjustInsts;
+ }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ void setStartPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::StartPacket;
+ }
+ void setEndPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::EndPacket;
+ }
+ bool isStartPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+ }
+ bool isEndPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+ }
+ void setHasClobberLR(bool v) { HasClobberLR = v; }
+ bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
new file mode 100644
index 000000000000..1388ad4f167d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -0,0 +1,692 @@
+//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "HexagonMachineScheduler.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+/// Platform specific modifications to DAG.
+void VLIWMachineScheduler::postprocessDAG() {
+ SUnit* LastSequentialCall = NULL;
+ // Currently we only catch the situation when compare gets scheduled
+ // before preceding call.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ // Remember the call.
+ if (SUnits[su].getInstr()->isCall())
+ LastSequentialCall = &(SUnits[su]);
+ // Look for a compare that defines a predicate.
+ else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall)
+ SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+ }
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(SU->getInstr()))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ if (Packet[i]->Succs.size() == 0)
+ continue;
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU)) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ DEBUG(dbgs() << "\t[" << i << "] SU(");
+ DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= SchedModel->getIssueWidth()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ return startNewCycle;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ DEBUG(dbgs()
+ << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
+ << " " << BB->getName()
+ << " in_func " << BB->getParent()->getFunction()->getName()
+ << " at loop depth " << MLI.getLoopDepth(BB)
+ << " \n");
+
+ buildDAGWithRegPressure();
+
+ // Postprocess the DAG to add platform specific artificial dependencies.
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ // To view Height/Depth correctly, they should be accessed at least once.
+ //
+ // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+ // depth/height could be computed directly from the roots and leaves.
+ DEBUG(unsigned maxH = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getHeight() > maxH)
+ maxH = SUnits[su].getHeight();
+ dbgs() << "Max Height " << maxH << "\n";);
+ DEBUG(unsigned maxD = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getDepth() > maxD)
+ maxD = SUnits[su].getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";);
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler*>(dag);
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetMachine &TM = DAG->MF.getTarget();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
+ Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
+
+ assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU);
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+ else
+ DEBUG(dbgs() << "*** IssueCount " << IssueCount
+ << " at cycle " << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ ResourceModel->reserveResources(0);
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q,
+ SUnit *SU, PressureElement P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+ << " ";
+ else
+ dbgs() << " ";
+ SU->dump(DAG);
+}
+#endif
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+ SUnit *OnlyAvailableSucc = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ SUnit &Succ = *I->getSUnit();
+ if (!Succ.isScheduled) {
+ // We found an available, but not scheduled, successor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+ return 0;
+ OnlyAvailableSucc = &Succ;
+ }
+ }
+ return OnlyAvailableSucc;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned FactorOne = 2;
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ ResCount += (SU->getHeight() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ } else {
+ ResCount += (SU->getDepth() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ // Factor in reg pressure as a heuristic.
+ ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
+ ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
+
+ DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ DEBUG(traceCandidate("CCAND", Q, *I));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult = pickNodeFromQueue(Top.Available,
+ DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
+ }
+ SUnit *SU;
+ if (llvm::ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (llvm::ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+ SU->dump(DAG));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ } else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
new file mode 100644
index 000000000000..f68dadf29210
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -0,0 +1,244 @@
+//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom Hexagon MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+class VLIWResourceModel {
+ /// ResourcesModel - Represents VLIW state.
+ /// Not limited to VLIW targets per say, but assumes
+ /// definition of DFA by a target.
+ DFAPacketizer *ResourcesModel;
+
+ const TargetSchedModel *SchedModel;
+
+ /// Local packet/bundle model. Purely
+ /// internal to the MI schedulre at the time.
+ std::vector<SUnit*> Packet;
+
+ /// Total packets created.
+ unsigned TotalPackets;
+
+public:
+VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
+ SchedModel(SM), TotalPackets(0) {
+ ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.resize(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ ~VLIWResourceModel() {
+ delete ResourcesModel;
+ }
+
+ void resetPacketState() {
+ Packet.clear();
+ }
+
+ void resetDFA() {
+ ResourcesModel->clearResources();
+ }
+
+ void reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ bool isResourceAvailable(SUnit *SU);
+ bool reserveResources(SUnit *SU);
+ unsigned getTotalPackets() const { return TotalPackets; }
+};
+
+/// Extend the standard ScheduleDAGMI to provide more context and override the
+/// top-level schedule() driver.
+class VLIWMachineScheduler : public ScheduleDAGMI {
+public:
+ VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S):
+ ScheduleDAGMI(C, S) {}
+
+ /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
+ /// time to do some work.
+ virtual void schedule();
+ /// Perform platform specific DAG postprocessing.
+ void postprocessDAG();
+};
+
+/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
+/// to balance the schedule.
+class ConvergingVLIWScheduler : public MachineSchedStrategy {
+
+ /// Store the state used by ConvergingVLIWScheduler heuristics, required
+ /// for the lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ // Best scheduling cost.
+ int SCost;
+
+ SchedCandidate(): SU(NULL), SCost(0) {}
+ };
+ /// Represent the type of SchedCandidate found within a single queue.
+ enum CandResult {
+ NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
+ BestCost};
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in whichever direction at has moved, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ VLIWMachineScheduler *DAG;
+ const TargetSchedModel *SchedModel;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ ScheduleHazardRecognizer *HazardRec;
+ VLIWResourceModel *ResourceModel;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), SchedModel(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
+ CheckPending(false), HazardRec(0), ResourceModel(0),
+ CurrCycle(0), IssueCount(0),
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+ ~SchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+ }
+
+ void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
+ DAG = dag;
+ SchedModel = smodel;
+ }
+
+ bool isTop() const {
+ return Available.getID() == ConvergingVLIWScheduler::TopQID;
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
+ VLIWMachineScheduler *DAG;
+ const TargetSchedModel *SchedModel;
+ const TargetRegisterInfo *TRI;
+
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedBoundary Top;
+ SchedBoundary Bot;
+
+public:
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingVLIWScheduler():
+ DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(ScheduleDAGMI *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+ unsigned ReportPackets() {
+ return Top.ResourceModel->getTotalPackets() +
+ Bot.ResourceModel->getTotalPackets();
+ }
+
+protected:
+ SUnit *pickNodeBidrectional(bool &IsTopNode);
+
+ int SchedulingCost(ReadyQueue &Q,
+ SUnit *SU, SchedCandidate &Candidate,
+ RegPressureDelta &Delta, bool verbose);
+
+ CandResult pickNodeFromQueue(ReadyQueue &Q,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+#ifndef NDEBUG
+ void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+ PressureElement P = PressureElement());
+#endif
+};
+
+} // namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
new file mode 100644
index 000000000000..5e80e48b01d5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -0,0 +1,645 @@
+//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements NewValueJump pass in Hexagon.
+// Ideally, we should merge this as a Peephole pass prior to register
+// allocation, but because we have a spill in between the feeder and new value
+// jump instructions, we are forced to write after register allocation.
+// Having said that, we should re-attempt to pull this earlier at some point
+// in future.
+
+// The basic approach looks for sequence of predicated jump, compare instruciton
+// that genereates the predicate and, the feeder to the predicate. Once it finds
+// all, it collapses compare and jump instruction into a new valu jump
+// intstructions.
+//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-nvj"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
+
+static cl::opt<int>
+DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc(
+ "Maximum number of predicated jumps to be converted to New Value Jump"));
+
+static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable New Value Jumps"));
+
+namespace {
+ struct HexagonNewValueJump : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+
+ public:
+ static char ID;
+
+ HexagonNewValueJump() : MachineFunctionPass(ID) { }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Hexagon NewValueJump";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+
+ };
+
+} // end of anonymous namespace
+
+char HexagonNewValueJump::ID = 0;
+
+// We have identified this II could be feeder to NVJ,
+// verify that it can be.
+static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ MachineBasicBlock::iterator end,
+ MachineBasicBlock::iterator skip,
+ MachineFunction &MF) {
+
+ // Predicated instruction can not be feeder to NVJ.
+ if (QII->isPredicated(II))
+ return false;
+
+ // Bail out if feederReg is a paired register (double regs in
+ // our case). One would think that we can check to see if a given
+ // register cmpReg1 or cmpReg2 is a sub register of feederReg
+ // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
+ // before the callsite of this function
+ // But we can not as it comes in the following fashion.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // Hence, we need to check if it's a KILL instruction.
+ if (II->getOpcode() == TargetOpcode::KILL)
+ return false;
+
+
+ // Make sure there there is no 'def' or 'use' of any of the uses of
+ // feeder insn between it's definition, this MI and jump, jmpInst
+ // skipping compare, cmpInst.
+ // Here's the example.
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // r4=memub(r3+r21<<#0)
+ // if (p0.new) jump:t .LBB29_45
+ // Without this check, it will be converted into
+ // r4=memub(r3+r21<<#0)
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // if (p0.new) jump:t .LBB29_45
+ // and result WAR hazards if converted to New Value Jump.
+
+ for (unsigned i = 0; i < II->getNumOperands(); ++i) {
+ if (II->getOperand(i).isReg() &&
+ (II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
+ MachineBasicBlock::iterator localII = II;
+ ++localII;
+ unsigned Reg = II->getOperand(i).getReg();
+ for (MachineBasicBlock::iterator localBegin = localII;
+ localBegin != end; ++localBegin) {
+ if (localBegin == skip ) continue;
+ // Check for Subregisters too.
+ if (localBegin->modifiesRegister(Reg, TRI) ||
+ localBegin->readsRegister(Reg, TRI))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// These are the common checks that need to performed
+// to determine if
+// 1. compare instruction can be moved before jump.
+// 2. feeder to the compare instruction can be moved before jump.
+static bool commonChecksToProhibitNewValueJump(bool afterRA,
+ MachineBasicBlock::iterator MII) {
+
+ // If store in path, bail out.
+ if (MII->getDesc().mayStore())
+ return false;
+
+ // if call in path, bail out.
+ if (MII->getOpcode() == Hexagon::CALLv3)
+ return false;
+
+ // if NVJ is running prior to RA, do the following checks.
+ if (!afterRA) {
+ // The following Target Opcode instructions are spurious
+ // to new value jump. If they are in the path, bail out.
+ // KILL sets kill flag on the opcode. It also sets up a
+ // single register, out of pair.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // PHI can be anything after RA.
+ // COPY can remateriaze things in between feeder, compare and nvj.
+ if (MII->getOpcode() == TargetOpcode::KILL ||
+ MII->getOpcode() == TargetOpcode::PHI ||
+ MII->getOpcode() == TargetOpcode::COPY)
+ return false;
+
+ // The following pseudo Hexagon instructions sets "use" and "def"
+ // of registers by individual passes in the backend. At this time,
+ // we don't know the scope of usage and definitions of these
+ // instructions.
+ if (MII->getOpcode() == Hexagon::TFR_condset_rr ||
+ MII->getOpcode() == Hexagon::TFR_condset_ii ||
+ MII->getOpcode() == Hexagon::TFR_condset_ri ||
+ MII->getOpcode() == Hexagon::TFR_condset_ir ||
+ MII->getOpcode() == Hexagon::LDriw_pred ||
+ MII->getOpcode() == Hexagon::STriw_pred)
+ return false;
+ }
+
+ return true;
+}
+
+static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ unsigned pReg,
+ bool secondReg,
+ bool optLocation,
+ MachineBasicBlock::iterator end,
+ MachineFunction &MF) {
+
+ MachineInstr *MI = II;
+
+ // If the second operand of the compare is an imm, make sure it's in the
+ // range specified by the arch.
+ if (!secondReg) {
+ int64_t v = MI->getOperand(2).getImm();
+ if (MI->getOpcode() == Hexagon::CMPGEri ||
+ (MI->getOpcode() == Hexagon::CMPGEUri && v > 0))
+ --v;
+
+ if (!(isUInt<5>(v) ||
+ ((MI->getOpcode() == Hexagon::CMPEQri ||
+ MI->getOpcode() == Hexagon::CMPGTri ||
+ MI->getOpcode() == Hexagon::CMPGEri) &&
+ (v == -1))))
+ return false;
+ }
+
+ unsigned cmpReg1, cmpOp2;
+ cmpReg1 = MI->getOperand(1).getReg();
+
+ if (secondReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+
+ // Make sure that that second register is not from COPY
+ // At machine code level, we don't need this, but if we decide
+ // to move new value jump prior to RA, we would be needing this.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
+ MachineInstr *def = MRI.getVRegDef(cmpOp2);
+ if (def->getOpcode() == TargetOpcode::COPY)
+ return false;
+ }
+ }
+
+ // Walk the instructions after the compare (predicate def) to the jump,
+ // and satisfy the following conditions.
+ ++II ;
+ for (MachineBasicBlock::iterator localII = II; localII != end;
+ ++localII) {
+
+ // Check 1.
+ // If "common" checks fail, bail out.
+ if (!commonChecksToProhibitNewValueJump(optLocation, localII))
+ return false;
+
+ // Check 2.
+ // If there is a def or use of predicate (result of compare), bail out.
+ if (localII->modifiesRegister(pReg, TRI) ||
+ localII->readsRegister(pReg, TRI))
+ return false;
+
+ // Check 3.
+ // If there is a def of any of the use of the compare (operands of compare),
+ // bail out.
+ // Eg.
+ // p0 = cmp.eq(r2, r0)
+ // r2 = r4
+ // if (p0.new) jump:t .LBB28_3
+ if (localII->modifiesRegister(cmpReg1, TRI) ||
+ (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
+ return false;
+ }
+ return true;
+}
+
+// Given a compare operator, return a matching New Value Jump
+// compare operator. Make sure that MI here is included in
+// HexagonInstrInfo.cpp::isNewValueJumpCandidate
+static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg,
+ bool secondRegNewified) {
+ switch (MI->getOpcode()) {
+ case Hexagon::CMPEQrr:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+
+ case Hexagon::CMPEQri: {
+ if (reg >= 0)
+ return Hexagon::JMP_EQriPt_nv_V4;
+ else
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPLTrr:
+ case Hexagon::CMPGTrr: {
+ if (secondRegNewified)
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+ else
+ return Hexagon::JMP_GTrrPt_nv_V4;
+ }
+
+ case Hexagon::CMPGEri: {
+ if (reg >= 1)
+ return Hexagon::JMP_GTriPt_nv_V4;
+ else
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPGTri: {
+ if (reg >= 0)
+ return Hexagon::JMP_GTriPt_nv_V4;
+ else
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPGTUrr: {
+ if (secondRegNewified)
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+ else
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+ }
+
+ case Hexagon::CMPGTUri:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+
+ case Hexagon::CMPGEUri: {
+ if (reg == 0)
+ return Hexagon::JMP_EQrrPt_nv_V4;
+ else
+ return Hexagon::JMP_GTUriPt_nv_V4;
+ }
+
+ default:
+ llvm_unreachable("Could not find matching New Value Jump instruction.");
+ }
+ // return *some value* to avoid compiler warning
+ return 0;
+}
+
+bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
+ << "********** Function: "
+ << MF.getName() << "\n");
+
+#if 0
+ // for now disable this, if we move NewValueJump before register
+ // allocation we need this information.
+ LiveVariables &LVs = getAnalysis<LiveVariables>();
+#endif
+
+ QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
+ QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+
+ if (!QRI->Subtarget.hasV4TOps() ||
+ DisableNewValueJumps) {
+ return false;
+ }
+
+ int nvjCount = DbgNVJCount;
+ int nvjGenerated = 0;
+
+ // Loop through all the bb's of the function
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ DEBUG(dbgs() << "** dumping bb ** "
+ << MBB->getNumber() << "\n");
+ DEBUG(MBB->dump());
+ DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
+ bool foundJump = false;
+ bool foundCompare = false;
+ bool invertPredicate = false;
+ unsigned predReg = 0; // predicate reg of the jump.
+ unsigned cmpReg1 = 0;
+ int cmpOp2 = 0;
+ bool MO1IsKill = false;
+ bool MO2IsKill = false;
+ MachineBasicBlock::iterator jmpPos;
+ MachineBasicBlock::iterator cmpPos;
+ MachineInstr *cmpInstr = NULL, *jmpInstr = NULL;
+ MachineBasicBlock *jmpTarget = NULL;
+ bool afterRA = false;
+ bool isSecondOpReg = false;
+ bool isSecondOpNewified = false;
+ // Traverse the basic block - bottom up
+ for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
+ MII != E;) {
+ MachineInstr *MI = --MII;
+ if (MI->isDebugValue()) {
+ continue;
+ }
+
+ if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
+ break;
+
+ DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
+
+ if (!foundJump &&
+ (MI->getOpcode() == Hexagon::JMP_c ||
+ MI->getOpcode() == Hexagon::JMP_cNot ||
+ MI->getOpcode() == Hexagon::JMP_cdnPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnPnt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
+ // This is where you would insert your compare and
+ // instr that feeds compare
+ jmpPos = MII;
+ jmpInstr = MI;
+ predReg = MI->getOperand(0).getReg();
+ afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);
+
+ // If ifconverter had not messed up with the kill flags of the
+ // operands, the following check on the kill flag would suffice.
+ // if(!jmpInstr->getOperand(0).isKill()) break;
+
+ // This predicate register is live out out of BB
+ // this would only work if we can actually use Live
+ // variable analysis on phy regs - but LLVM does not
+ // provide LV analysis on phys regs.
+ //if(LVs.isLiveOut(predReg, *MBB)) break;
+
+ // Get all the successors of this block - which will always
+ // be 2. Check if the predicate register is live in in those
+ // successor. If yes, we can not delete the predicate -
+ // I am doing this only because LLVM does not provide LiveOut
+ // at the BB level.
+ bool predLive = false;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SIE = MBB->succ_end(); SI != SIE; ++SI) {
+ MachineBasicBlock* succMBB = *SI;
+ if (succMBB->isLiveIn(predReg)) {
+ predLive = true;
+ }
+ }
+ if (predLive)
+ break;
+
+ jmpTarget = MI->getOperand(1).getMBB();
+ foundJump = true;
+ if (MI->getOpcode() == Hexagon::JMP_cNot ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
+ invertPredicate = true;
+ }
+ continue;
+ }
+
+ // No new value jump if there is a barrier. A barrier has to be in its
+ // own packet. A barrier has zero operands. We conservatively bail out
+ // here if we see any instruction with zero operands.
+ if (foundJump && MI->getNumOperands() == 0)
+ break;
+
+ if (foundJump &&
+ !foundCompare &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(0).getReg() == predReg) {
+
+ // Not all compares can be new value compare. Arch Spec: 7.6.1.1
+ if (QII->isNewValueJumpCandidate(MI)) {
+
+ assert((MI->getDesc().isCompare()) &&
+ "Only compare instruction can be collapsed into New Value Jump");
+ isSecondOpReg = MI->getOperand(2).isReg();
+
+ if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
+ afterRA, jmpPos, MF))
+ break;
+
+ cmpInstr = MI;
+ cmpPos = MII;
+ foundCompare = true;
+
+ // We need cmpReg1 and cmpOp2(imm or reg) while building
+ // new value jump instruction.
+ cmpReg1 = MI->getOperand(1).getReg();
+ if (MI->getOperand(1).isKill())
+ MO1IsKill = true;
+
+ if (isSecondOpReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+ if (MI->getOperand(2).isKill())
+ MO2IsKill = true;
+ } else
+ cmpOp2 = MI->getOperand(2).getImm();
+ continue;
+ }
+ }
+
+ if (foundCompare && foundJump) {
+
+ // If "common" checks fail, bail out on this BB.
+ if (!commonChecksToProhibitNewValueJump(afterRA, MII))
+ break;
+
+ bool foundFeeder = false;
+ MachineBasicBlock::iterator feederPos = MII;
+ if (MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isDef() &&
+ (MI->getOperand(0).getReg() == cmpReg1 ||
+ (isSecondOpReg &&
+ MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {
+
+ unsigned feederReg = MI->getOperand(0).getReg();
+
+ // First try to see if we can get the feeder from the first operand
+ // of the compare. If we can not, and if secondOpReg is true
+ // (second operand of the compare is also register), try that one.
+ // TODO: Try to come up with some heuristic to figure out which
+ // feeder would benefit.
+
+ if (feederReg == cmpReg1) {
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
+ if (!isSecondOpReg)
+ break;
+ else
+ continue;
+ } else
+ foundFeeder = true;
+ }
+
+ if (!foundFeeder &&
+ isSecondOpReg &&
+ feederReg == (unsigned) cmpOp2)
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
+ break;
+
+ if (isSecondOpReg) {
+ // In case of CMPLT, or CMPLTU, or EQ with the second register
+ // to newify, swap the operands.
+ if (cmpInstr->getOpcode() == Hexagon::CMPLTrr ||
+ cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
+ (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+ feederReg == (unsigned) cmpOp2)) {
+ unsigned tmp = cmpReg1;
+ bool tmpIsKill = MO1IsKill;
+ cmpReg1 = cmpOp2;
+ MO1IsKill = MO2IsKill;
+ cmpOp2 = tmp;
+ MO2IsKill = tmpIsKill;
+ }
+
+ // Now we have swapped the operands, all we need to check is,
+ // if the second operand (after swap) is the feeder.
+ // And if it is, make a note.
+ if (feederReg == (unsigned)cmpOp2)
+ isSecondOpNewified = true;
+ }
+
+ // Now that we are moving feeder close the jump,
+ // make sure we are respecting the kill values of
+ // the operands of the feeder.
+
+ bool updatedIsKill = false;
+ for (unsigned i = 0; i < MI->getNumOperands(); i++) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned feederReg = MO.getReg();
+ for (MachineBasicBlock::iterator localII = feederPos,
+ end = jmpPos; localII != end; localII++) {
+ MachineInstr *localMI = localII;
+ for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
+ MachineOperand &localMO = localMI->getOperand(j);
+ if (localMO.isReg() && localMO.isUse() &&
+ localMO.isKill() && feederReg == localMO.getReg()) {
+ // We found that there is kill of a use register
+ // Set up a kill flag on the register
+ localMO.setIsKill(false);
+ MO.setIsKill();
+ updatedIsKill = true;
+ break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+
+ MBB->splice(jmpPos, MI->getParent(), MI);
+ MBB->splice(jmpPos, MI->getParent(), cmpInstr);
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
+ "This compare is not a New Value Jump candidate.");
+ unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
+ isSecondOpNewified);
+ if (invertPredicate)
+ opc = QII->getInvertedPredicatedOpcode(opc);
+
+ // Manage the conversions from CMPGEUri to either CMPEQrr
+ // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
+ // This has to be after the getNewValueJumpOpcode function call as
+ // second operand of the compare could be modified in this logic.
+ if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
+ if (cmpOp2 == 0) {
+ cmpOp2 = cmpReg1;
+ MO2IsKill = MO1IsKill;
+ isSecondOpReg = true;
+ } else
+ --cmpOp2;
+ }
+
+ // Manage the conversions from CMPGEri to CMPGTUri properly.
+ // See Arch spec for CMPGEri instructions.
+ if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
+ --cmpOp2;
+
+ if (isSecondOpReg) {
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addReg(cmpOp2, getKillRegState(MO2IsKill))
+ .addMBB(jmpTarget);
+ }
+ else {
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addImm(cmpOp2)
+ .addMBB(jmpTarget);
+ }
+
+ assert(NewMI && "New Value Jump Instruction Not created!");
+ if (cmpInstr->getOperand(0).isReg() &&
+ cmpInstr->getOperand(0).isKill())
+ cmpInstr->getOperand(0).setIsKill(false);
+ if (cmpInstr->getOperand(1).isReg() &&
+ cmpInstr->getOperand(1).isKill())
+ cmpInstr->getOperand(1).setIsKill(false);
+ cmpInstr->eraseFromParent();
+ jmpInstr->eraseFromParent();
+ ++nvjGenerated;
+ ++NumNVJGenerated;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+
+}
+
+FunctionPass *llvm::createHexagonNewValueJump() {
+ return new HexagonNewValueJump();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
new file mode 100644
index 000000000000..c79d78f21080
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -0,0 +1,858 @@
+//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Immediate operands.
+
+let PrintMethod = "printImmOperand" in {
+ // f32Ext type is used to identify constant extended floating point immediates.
+ def f32Ext : Operand<f32>;
+ def s32Imm : Operand<i32>;
+ def s26_6Imm : Operand<i32>;
+ def s16Imm : Operand<i32>;
+ def s12Imm : Operand<i32>;
+ def s11Imm : Operand<i32>;
+ def s11_0Imm : Operand<i32>;
+ def s11_1Imm : Operand<i32>;
+ def s11_2Imm : Operand<i32>;
+ def s11_3Imm : Operand<i32>;
+ def s10Imm : Operand<i32>;
+ def s9Imm : Operand<i32>;
+ def m9Imm : Operand<i32>;
+ def s8Imm : Operand<i32>;
+ def s8Imm64 : Operand<i64>;
+ def s6Imm : Operand<i32>;
+ def s4Imm : Operand<i32>;
+ def s4_0Imm : Operand<i32>;
+ def s4_1Imm : Operand<i32>;
+ def s4_2Imm : Operand<i32>;
+ def s4_3Imm : Operand<i32>;
+ def u64Imm : Operand<i64>;
+ def u32Imm : Operand<i32>;
+ def u26_6Imm : Operand<i32>;
+ def u16Imm : Operand<i32>;
+ def u16_0Imm : Operand<i32>;
+ def u16_1Imm : Operand<i32>;
+ def u16_2Imm : Operand<i32>;
+ def u11_3Imm : Operand<i32>;
+ def u10Imm : Operand<i32>;
+ def u9Imm : Operand<i32>;
+ def u8Imm : Operand<i32>;
+ def u7Imm : Operand<i32>;
+ def u6Imm : Operand<i32>;
+ def u6_0Imm : Operand<i32>;
+ def u6_1Imm : Operand<i32>;
+ def u6_2Imm : Operand<i32>;
+ def u6_3Imm : Operand<i32>;
+ def u5Imm : Operand<i32>;
+ def u4Imm : Operand<i32>;
+ def u3Imm : Operand<i32>;
+ def u2Imm : Operand<i32>;
+ def u1Imm : Operand<i32>;
+ def n8Imm : Operand<i32>;
+ def m6Imm : Operand<i32>;
+}
+
+let PrintMethod = "printNOneImmOperand" in
+def nOneImm : Operand<i32>;
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred : PatLeaf<(i32 imm), [{
+ // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x1000000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
+ // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x10000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<24,16>(v);
+}]>;
+
+def s26_6ImmPred : PatLeaf<(i32 imm), [{
+ // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<26,6>(v);
+}]>;
+
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred : PatLeaf<(i32 imm), [{
+ // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred : PatLeaf<(i32 imm), [{
+ // s12ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field and is a multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
+ // sign extended field and is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
+ // sign extended field and is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred : PatLeaf<(i32 imm), [{
+ // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v);
+}]>;
+
+def m9ImmPred : PatLeaf<(i32 imm), [{
+ // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
+ // field. The range of m9 is -255 to 255.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v) && (v != -256);
+}]>;
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // Adding "N ||" to suppress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u26_6ImmPred : PatLeaf<(i32 imm), [{
+ // u26_6ImmPred - True if the immediate fits in a 32-bit field and
+ // is a multiple of 64.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<26,6>(v);
+}]>;
+
+def u16ImmPred : PatLeaf<(i32 imm), [{
+ // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+ // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+ // extended s8 field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+ // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+ // unsigned field and is strictly greater than 0.
+ return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field. Same as u6ImmPred.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+ // field that is 1 bit alinged - multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field that is 2 bits alinged - multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field that is 3 bits alinged - multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m5BImmPred : PatLeaf<(i32 imm), [{
+ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int8_t v = (int8_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred : PatLeaf<(i32 imm), [{
+ // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int16_t v = (int16_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred : PatLeaf<(i32 imm), [{
+ // m5ImmPred predicate - True if the number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred : PatLeaf<(i32 imm), [{
+ // nOneImmPred predicate - True if the immediate is -1.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-1 == v);
+}]>;
+
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+ // Set5ImmPred predicate - True if the number is in the series of values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in setbit immediate.
+ uint32_t v = (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+ // Clr5ImmPred predicate - True if the number is in the series of
+ // bit negated values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in clrbit immediate.
+ // Note: we are bit NOTing the value.
+ uint32_t v = ~ (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+ int32_t v = (int32_t)N->getSExtValue();
+ return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+ // Set4ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit immediate.
+ uint16_t v = (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+ // Clr4ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit and clrbit immediate.
+ uint16_t v = ~ (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+ int16_t v = (int16_t)N->getSExtValue();
+ return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+ // Set3ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit immediate.
+ uint8_t v = (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+ // Clr3ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit and clrbit immediate.
+ uint8_t v = ~ (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr3ImmPred predicate - True if the immediate is in the range 0..7.
+ int8_t v = (int8_t)N->getSExtValue();
+ return (v >= 0 && v <= 7);
+}]>;
+
+
+// Extendable immediate operands.
+
+let PrintMethod = "printExtOperand" in {
+ def s16Ext : Operand<i32>;
+ def s12Ext : Operand<i32>;
+ def s10Ext : Operand<i32>;
+ def s9Ext : Operand<i32>;
+ def s8Ext : Operand<i32>;
+ def s6Ext : Operand<i32>;
+ def s11_0Ext : Operand<i32>;
+ def s11_1Ext : Operand<i32>;
+ def s11_2Ext : Operand<i32>;
+ def s11_3Ext : Operand<i32>;
+ def u6Ext : Operand<i32>;
+ def u7Ext : Operand<i32>;
+ def u8Ext : Operand<i32>;
+ def u9Ext : Operand<i32>;
+ def u10Ext : Operand<i32>;
+ def u6_0Ext : Operand<i32>;
+ def u6_1Ext : Operand<i32>;
+ def u6_2Ext : Operand<i32>;
+ def u6_3Ext : Operand<i32>;
+}
+
+let PrintMethod = "printImmOperand" in
+def u0AlwaysExt : Operand<i32>;
+
+// Predicates for constant extendable operands
+def s16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 16-bit sign extended field.
+ return isInt<16>(v);
+ else {
+ if (isInt<16>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s10ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field.
+ return isInt<10>(v);
+ else {
+ if (isInt<10>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s9ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field.
+ return isInt<9>(v);
+ else {
+ if (isInt<9>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s8ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit sign extended field.
+ return isInt<8>(v);
+ else {
+ if (isInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s8_16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate fits in a 8-bit sign extended field.
+ return isInt<8>(v);
+ else {
+ if (isInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 16-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<16>(v);
+ }
+}]>;
+
+def s6ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s6_16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate fits in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 16-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<16>(v);
+ }
+}]>;
+
+def s6_10ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 10-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<10>(v);
+ }
+}]>;
+
+def s11_0ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 11-bit sign extended field.
+ return isShiftedInt<11,0>(v);
+ else {
+ if (isInt<11>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s11_1ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 12-bit sign extended field and
+ // is 2 byte aligned.
+ return isShiftedInt<11,1>(v);
+ else {
+ if (isInt<12>(v))
+ return isShiftedInt<11,1>(v);
+
+ // Return true if extending this immediate is profitable and the low 1 bit
+ // is zero (2-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
+ }
+}]>;
+
+def s11_2ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 13-bit sign extended field and
+ // is 4-byte aligned.
+ return isShiftedInt<11,2>(v);
+ else {
+ if (isInt<13>(v))
+ return isShiftedInt<11,2>(v);
+
+ // Return true if extending this immediate is profitable and the low 2-bits
+ // are zero (4-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0);
+ }
+}]>;
+
+def s11_3ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 14-bit sign extended field and
+ // is 8-byte aligned.
+ return isShiftedInt<11,3>(v);
+ else {
+ if (isInt<14>(v))
+ return isShiftedInt<11,3>(v);
+
+ // Return true if extending this immediate is profitable and the low 3-bits
+ // are zero (8-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0);
+ }
+}]>;
+
+def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
+ // Predicate for an unsigned 32-bit value that always needs to be extended.
+ if (Subtarget.hasV4TOps()) {
+ if (isConstExtProfitable(Node)) {
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+ }
+ }
+ return false;
+}]>;
+
+def u6ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit unsigned field.
+ return isUInt<6>(v);
+ else {
+ if (isUInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u7ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 7-bit unsigned field.
+ return isUInt<7>(v);
+ else {
+ if (isUInt<7>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u8ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit unsigned field.
+ return isUInt<8>(v);
+ else {
+ if (isUInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u9ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit unsigned field.
+ return isUInt<9>(v);
+ else {
+ if (isUInt<9>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u6_1ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 7-bit unsigned field and
+ // is 2-byte aligned.
+ return isShiftedUInt<6,1>(v);
+ else {
+ if (isUInt<7>(v))
+ return isShiftedUInt<6,1>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
+ }
+}]>;
+
+def u6_2ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit unsigned field and
+ // is 4-byte aligned.
+ return isShiftedUInt<6,2>(v);
+ else {
+ if (isUInt<8>(v))
+ return isShiftedUInt<6,2>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
+ }
+}]>;
+
+def u6_3ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit unsigned field and
+ // is 8-byte aligned.
+ return isShiftedUInt<6,3>(v);
+ else {
+ if (isUInt<9>(v))
+ return isShiftedUInt<6,3>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
+ }
+}]>;
+
+// Addressing modes.
+
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMEMrrOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+ let PrintMethod = "printFrameIndexOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in {
+ def globaladdress : Operand<i32>;
+ def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
new file mode 100644
index 000000000000..576f1d7d0790
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -0,0 +1,323 @@
+//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This peephole pass optimizes in the following cases.
+// 1. Optimizes redundant sign extends for the following case
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+//
+// 2. Optimizes redundant negation of predicates.
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2
+// ...
+// %vreg16<def> = NOT_p %vreg15<kill>
+// ...
+// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead>
+//
+// Into
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2;
+// ...
+// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>;
+//
+// Note: The peephole pass makes the instrucstions like
+// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
+// redundant and relies on some form of dead removal instrucions, like
+// DCE or DIE to actually eliminate them.
+
+
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Peephole Optimization"));
+
+static cl::opt<int>
+DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
+ cl::desc("Maximum number of P=NOT(P) to be optimized"));
+
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of PNotP"));
+
+static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of Sign/Zero Extends"));
+
+namespace {
+ struct HexagonPeephole : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+ const MachineRegisterInfo *MRI;
+
+ public:
+ static char ID;
+ HexagonPeephole() : MachineFunctionPass(ID) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Hexagon optimize redundant zero and size extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
+ };
+}
+
+char HexagonPeephole::ID = 0;
+
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+
+ QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
+ getInstrInfo());
+ QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
+ getRegisterInfo());
+ MRI = &MF.getRegInfo();
+
+ DenseMap<unsigned, unsigned> PeepholeMap;
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;
+
+ if (DisableHexagonPeephole) return false;
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ PeepholeMap.clear();
+ PeepholeDoubleRegsMap.clear();
+
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ // Look for sign extends:
+ // %vreg170<def> = SXTW %vreg166
+ if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = SXTW %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for this sequence below
+ // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
+ // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+ // and convert into
+ // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+ if (MI->getOpcode() == Hexagon::LSRd_ri) {
+ assert(MI->getNumOperands() == 3);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+ if (Src2.getImm() != 32)
+ continue;
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src1.getReg();
+ PeepholeDoubleRegsMap[DstReg] =
+ std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+ }
+
+ // Look for P=NOT(P).
+ if (!DisablePNotP &&
+ (MI->getOpcode() == Hexagon::NOT_p)) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = NOT_xx %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for copy:
+ // %vreg176<def> = COPY %vreg170:subreg_loreg
+ if (!DisableOptSZExt && MI->isCopy()) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+
+ // Make sure we are copying the lower 32 bits.
+ if (Src.getSubReg() != Hexagon::subreg_loreg)
+ continue;
+
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
+ // Change the 1st operand.
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+ } else {
+ DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
+ PeepholeDoubleRegsMap.find(SrcReg);
+ if (DI != PeepholeDoubleRegsMap.end()) {
+ std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
+ false /*isDef*/,
+ false /*isImp*/,
+ false /*isKill*/,
+ false /*isDead*/,
+ false /*isUndef*/,
+ false /*isEarlyClobber*/,
+ PeepholeSrc.second));
+ }
+ }
+ }
+ }
+
+ // Look for Predicated instructions.
+ if (!DisablePNotP) {
+ bool Done = false;
+ if (QII->isPredicated(MI)) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ unsigned Reg0 = Op0.getReg();
+ const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
+ if (RC0->getID() == Hexagon::PredRegsRegClassID) {
+ // Handle instructions that have a prediate register in op0
+ // (most cases of predicable instructions).
+ if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
+ // Change the 1st operand and, flip the opcode.
+ MI->getOperand(0).setReg(PeepholeSrc);
+ int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
+ MI->setDesc(QII->get(NewOp));
+ Done = true;
+ }
+ }
+ }
+ }
+
+ if (!Done) {
+ // Handle special instructions.
+ unsigned Op = MI->getOpcode();
+ unsigned NewOp = 0;
+ unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices.
+
+ switch (Op) {
+ case Hexagon::TFR_condset_rr:
+ case Hexagon::TFR_condset_ii:
+ case Hexagon::MUX_ii:
+ case Hexagon::MUX_rr:
+ NewOp = Op;
+ break;
+ case Hexagon::TFR_condset_ri:
+ NewOp = Hexagon::TFR_condset_ir;
+ break;
+ case Hexagon::TFR_condset_ir:
+ NewOp = Hexagon::TFR_condset_ri;
+ break;
+ case Hexagon::MUX_ri:
+ NewOp = Hexagon::MUX_ir;
+ break;
+ case Hexagon::MUX_ir:
+ NewOp = Hexagon::MUX_ri;
+ break;
+ }
+ if (NewOp) {
+ unsigned PSrc = MI->getOperand(PR).getReg();
+ if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
+ MI->getOperand(PR).setReg(POrig);
+ MI->setDesc(QII->get(NewOp));
+ // Swap operands S1 and S2.
+ MachineOperand Op1 = MI->getOperand(S1);
+ MachineOperand Op2 = MI->getOperand(S2);
+ ChangeOpInto(MI->getOperand(S1), Op2);
+ ChangeOpInto(MI->getOperand(S2), Op1);
+ }
+ } // if (NewOp)
+ } // if (!Done)
+
+ } // if (!DisablePNotP)
+
+ } // Instruction
+ } // Basic Block
+ return true;
+}
+
+void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
+ assert (&Dst != &Src && "Cannot duplicate into itself");
+ switch (Dst.getType()) {
+ case MachineOperand::MO_Register:
+ if (Src.isReg()) {
+ Dst.setReg(Src.getReg());
+ } else if (Src.isImm()) {
+ Dst.ChangeToImmediate(Src.getImm());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ case MachineOperand::MO_Immediate:
+ if (Src.isImm()) {
+ Dst.setImm(Src.getImm());
+ } else if (Src.isReg()) {
+ Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
+ Src.isKill(), Src.isDead(), Src.isUndef(),
+ Src.isDebug());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ default:
+ llvm_unreachable("Unexpected dst operand type");
+ break;
+ }
+}
+
+FunctionPass *llvm::createHexagonPeephole() {
+ return new HexagonPeephole();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 000000000000..d8b4e2fcb368
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,317 @@
+//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+ const HexagonInstrInfo &tii)
+ : HexagonGenRegisterInfo(Hexagon::R31),
+ Subtarget(st),
+ TII(tii) {
+}
+
+const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+ *MF)
+ const {
+ static const uint16_t CalleeSavedRegsV2[] = {
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+ static const uint16_t CalleeSavedRegsV3[] = {
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V1:
+ break;
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegsV2;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
+ return CalleeSavedRegsV3;
+ }
+ llvm_unreachable("Callee saved registers requested for unknown architecture "
+ "version");
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+ const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(HEXAGON_RESERVED_REG_1);
+ Reserved.set(HEXAGON_RESERVED_REG_2);
+ Reserved.set(Hexagon::R29);
+ Reserved.set(Hexagon::R30);
+ Reserved.set(Hexagon::R31);
+ Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::D15);
+ Reserved.set(Hexagon::LC0);
+ Reserved.set(Hexagon::LC1);
+ Reserved.set(Hexagon::SA0);
+ Reserved.set(Hexagon::SA1);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V1:
+ break;
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegClassesV2;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
+ return CalleeSavedRegClassesV3;
+ }
+ llvm_unreachable("Callee saved register classes requested for unknown "
+ "architecture version");
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ //
+ // Hexagon_TODO: Do we need to enforce this for Hexagon?
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ unsigned FrameReg = getFrameRegister(MF);
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasFP(MF)) {
+ // We will not reserve space on the stack for the lr and fp registers.
+ Offset -= 2 * Hexagon_WordSize;
+ }
+
+ const unsigned FrameSize = MFI.getStackSize();
+
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+ !TII.isSpillPredRegOp(&MI)) {
+ // Replace frame index with a stack pointer reference.
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
+ false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
+ } else {
+ // Replace frame index with a frame pointer reference.
+ if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+ // If the offset overflows, then correct it.
+ //
+ // For loads, we do not need a reserved register
+ // r0 = memw(r30 + #10000) to:
+ //
+ // r0 = add(r30, #10000)
+ // r0 = memw(r0)
+ if ( (MI.getOpcode() == Hexagon::LDriw) ||
+ (MI.getOpcode() == Hexagon::LDrid) ||
+ (MI.getOpcode() == Hexagon::LDrih) ||
+ (MI.getOpcode() == Hexagon::LDriuh) ||
+ (MI.getOpcode() == Hexagon::LDrib) ||
+ (MI.getOpcode() == Hexagon::LDriub) ||
+ (MI.getOpcode() == Hexagon::LDriw_f) ||
+ (MI.getOpcode() == Hexagon::LDrid_f)) {
+ unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+ getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) :
+ MI.getOperand(0).getReg();
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ dstReg).addReg(FrameReg).addImm(Offset);
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ } else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
+ (MI.getOpcode() == Hexagon::STriw) ||
+ (MI.getOpcode() == Hexagon::STrid) ||
+ (MI.getOpcode() == Hexagon::STrih) ||
+ (MI.getOpcode() == Hexagon::STrib) ||
+ (MI.getOpcode() == Hexagon::STrid_f) ||
+ (MI.getOpcode() == Hexagon::STriw_f)) {
+ // For stores, we need a reserved register. Change
+ // memw(r30 + #10000) = r0 to:
+ //
+ // rs = add(r30, #10000);
+ // memw(rs) = r0
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ }
+ MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ } else if (TII.isMemOp(&MI)) {
+ // use the constant extender if the instruction provides it
+ // and we are V4TOps.
+ if (Subtarget.hasV4TOps()) {
+ if (TII.isConstExtended(&MI)) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ TII.immediateExtend(&MI);
+ } else {
+ llvm_unreachable("Need to implement for memops");
+ }
+ } else {
+ // Only V3 and older instructions here.
+ unsigned ResReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+ false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+ addReg(ResReg);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
+ addImm(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ }
+ }
+ } else {
+ unsigned dstReg = MI.getOperand(0).getReg();
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ // Can we delete MI??? r2 = add (r2, #0).
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ }
+ } else {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ }
+ }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+ return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+ &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (TFI->hasFP(MF)) {
+ return Hexagon::R30;
+ }
+
+ return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+ return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+ return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+ &Moves) const
+{
+ // VirtualFP = (R30 + #0).
+ unsigned FPReg = getFrameRegister();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 000000000000..8a3f94a3fd12
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,91 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+
+//
+// We try not to hard code the reserved registers in our code,
+// so the following two macros were defined. However, there
+// are still a few places that R11 and R10 are hard wired.
+// See below. If, in the future, we decided to change the reserved
+// register. Don't forget changing the following places.
+//
+// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+// 3. the definition of "IntRegs" in HexagonRegisterInfo.td
+// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+ HexagonSubtarget &Subtarget;
+ const HexagonInstrInfo &TII;
+
+ HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - returns true since we may need scavenging for
+ /// a temporary register when generating hardware loop instructions.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister() const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 000000000000..fe41fc3bc60c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,167 @@
+//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Hexagon" in {
+
+ class HexagonReg<string n> : Register<n> {
+ field bits<5> Num;
+ }
+
+ class HexagonDoubleReg<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ }
+
+ // Registers are identified with 5-bit ID numbers.
+ // Ri - 32-bit integer registers.
+ class Ri<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rf - 32-bit floating-point registers.
+ class Rf<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+
+ // Rd - 64-bit registers.
+ class Rd<bits<5> num, string n, list<Register> subregs> :
+ HexagonDoubleReg<n, subregs> {
+ let Num = num;
+ let SubRegs = subregs;
+ }
+
+ // Rp - predicate registers
+ class Rp<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rc - control registers
+ class Rc<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rj - aliased integer registers
+ class Rj<string n, Ri R>: HexagonReg<n> {
+ let Num = R.Num;
+ let Aliases = [R];
+ }
+
+ def subreg_loreg : SubRegIndex;
+ def subreg_hireg : SubRegIndex;
+
+ // Integer registers.
+ def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+ def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+ def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+ def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+ def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+ def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+ def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+ def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+ def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+ def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+ def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+ def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+ def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+ def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+ def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+ def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+ def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+ def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+ def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+ def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+ def SP : Rj<"sp", R29>, DwarfRegNum<[29]>;
+ def FP : Rj<"fp", R30>, DwarfRegNum<[30]>;
+ def LR : Rj<"lr", R31>, DwarfRegNum<[31]>;
+
+ // Aliases of the R* registers used to hold 64-bit int values (doubles).
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+ }
+
+ // Predicate registers.
+ def P0 : Rp<0, "p0">, DwarfRegNum<[63]>;
+ def P1 : Rp<1, "p1">, DwarfRegNum<[64]>;
+ def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
+ def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
+
+ // Control registers.
+ def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+ def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>;
+
+ def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>;
+ def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>;
+
+ def M0 : Rc<6, "m0">, DwarfRegNum<[71]>;
+ def M1 : Rc<7, "m1">, DwarfRegNum<[72]>;
+
+ def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct?
+ def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct?
+}
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32,
+ (add (sequence "R%u", 0, 9),
+ (sequence "R%u", 12, 28),
+ R10, R11, R29, R30, R31)> {
+}
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64,
+ (add (sequence "D%u", 0, 4),
+ (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+ let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add (sequence "LC%u", 0, 1),
+ (sequence "SA%u", 0, 1),
+ (sequence "M%u", 0, 1), PC, GP)> {
+ let Size = 32;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 000000000000..34bf4eacfdc0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,83 @@
+//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends //
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+namespace {
+ struct HexagonRemoveExtendArgs : public FunctionPass {
+ public:
+ static char ID;
+ HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Remove sign extends";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+ "Remove Sign and Zero Extends for Args"
+ );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+ ++AI, ++Idx) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ Argument* Arg = AI;
+ if (!isa<PointerType>(Arg->getType())) {
+ for (Instruction::use_iterator UI = Arg->use_begin();
+ UI != Arg->use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = F.getEntryBlock().begin();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+ return new HexagonRemoveExtendArgs();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 000000000000..c2cfbb9710a6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,69 @@
+//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LSUNIT : FuncUnit; // SLOT0
+def LUNIT : FuncUnit; // SLOT1
+def MUNIT : FuncUnit; // SLOT2
+def SUNIT : FuncUnit; // SLOT3
+def LOOPUNIT : FuncUnit;
+
+// Itinerary classes
+def ALU32 : InstrItinClass;
+def ALU64 : InstrItinClass;
+def CR : InstrItinClass;
+def J : InstrItinClass;
+def JR : InstrItinClass;
+def LD : InstrItinClass;
+def LD0 : InstrItinClass;
+def M : InstrItinClass;
+def ST : InstrItinClass;
+def ST0 : InstrItinClass;
+def S : InstrItinClass;
+def SYS : InstrItinClass;
+def ENDLOOP : InstrItinClass;
+def PSEUDO : InstrItinClass;
+def PSEUDOM : InstrItinClass;
+
+def HexagonItineraries :
+ ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
+ InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
+ InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>,
+ InstrStage<1, [MUNIT, SUNIT]>]>
+ ]>;
+
+def HexagonModel : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItineraries;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 000000000000..ef72cf4068bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,74 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+// Functional Units.
+def SLOT0 : FuncUnit;
+def SLOT1 : FuncUnit;
+def SLOT2 : FuncUnit;
+def SLOT3 : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
+
+// Itinerary classes.
+def NV_V4 : InstrItinClass;
+def MEM_V4 : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def PREFIX : InstrItinClass;
+
+def HexagonItinerariesV4 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
+ InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
+ ]>;
+
+def HexagonModelV4 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV4;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 000000000000..d8feb89c0ab5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGT)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGT)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGE)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGE)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+// pt = not(p1 xor p2)
+// Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+ IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETGT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETLT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a52c604505b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+ &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ flag_aligned_memcpy = false;
+ if ((Align & 0x3) == 0) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+ flag_aligned_memcpy = true;
+ }
+ }
+
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 000000000000..0673e4d35472
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ ~HexagonSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 000000000000..814249fa6832
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,216 @@
+//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+// {p0 = cmp.eq(r0,r1)}
+// {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets. If we use .new predicated immediate transfers,
+// then we can do this in a single packet, e.g.:
+//
+// {p0 = cmp.eq(r0,r1)
+// if (p0.new) r3 = #1
+// if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Split TFRCondSets";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+ const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc1, Opc2;
+ switch(MI->getOpcode()) {
+ case Hexagon::TFR_condset_rr:
+ case Hexagon::TFR_condset_rr_f:
+ case Hexagon::TFR_condset_rr64_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_rr ||
+ MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
+ Opc1 = Hexagon::TFR_cPt;
+ Opc2 = Hexagon::TFR_cNotPt;
+ }
+ else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
+ Opc1 = Hexagon::TFR64_cPt;
+ Opc2 = Hexagon::TFR64_cNotPt;
+ }
+
+ // Minor optimization: do not emit the predicated copy if the source
+ // and the destination is the same register.
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ case Hexagon::TFR_condset_ri:
+ case Hexagon::TFR_condset_ri_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+
+ // Do not emit the predicated copy if the source and the destination
+ // is the same register.
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFR_cPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (MI->getOpcode() == Hexagon::TFR_condset_ri ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addImm(MI->getOperand(3).getImm());
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addFPImm(MI->getOperand(3).getFPImm());
+ }
+
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ case Hexagon::TFR_condset_ir:
+ case Hexagon::TFR_condset_ir_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_ir ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addImm(MI->getOperand(2).getImm());
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt_f), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addFPImm(MI->getOperand(2).getFPImm());
+ }
+
+ // Do not emit the predicated copy if the source and
+ // the destination is the same register.
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFR_cNotPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ case Hexagon::TFR_condset_ii:
+ case Hexagon::TFR_condset_ii_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(1).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_ii ) {
+ int Immed1 = MI->getOperand(2).getImm();
+ int Immed2 = MI->getOperand(3).getImm();
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt),
+ DestReg).addReg(SrcReg1).addImm(Immed1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt),
+ DestReg).addReg(SrcReg1).addImm(Immed2);
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt_f), DestReg).
+ addReg(SrcReg1).
+ addFPImm(MI->getOperand(2).getFPImm());
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
+ addReg(SrcReg1).
+ addFPImm(MI->getOperand(3).getFPImm());
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+ return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 000000000000..07d5ce1d8ab0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,88 @@
+//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+ cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+ "enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc(
+ "Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+DisableMemOps(
+ "disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc(
+ "Do not generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+EnableIEEERndNear(
+ "enable-hexagon-ieee-rnd-near",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate non-chopped conversion from fp to int."));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+ HexagonGenSubtargetInfo(TT, CPU, FS),
+ CPUString(CPU.str()) {
+
+ // If the programmer has not specified a Hexagon version, default to -mv4.
+ if (CPUString.empty())
+ CPUString = "hexagonv4";
+
+ if (CPUString == "hexagonv2") {
+ HexagonArchVersion = V2;
+ } else if (CPUString == "hexagonv3") {
+ EnableV3 = true;
+ HexagonArchVersion = V3;
+ } else if (CPUString == "hexagonv4") {
+ HexagonArchVersion = V4;
+ } else if (CPUString == "hexagonv5") {
+ HexagonArchVersion = V5;
+ } else {
+ llvm_unreachable("Unrecognized Hexagon processor version");
+ }
+
+ ParseSubtargetFeatures(CPUString, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // UseMemOps on by default unless disabled explicitly
+ if (DisableMemOps)
+ UseMemOps = false;
+ else if (EnableMemOps)
+ UseMemOps = true;
+ else
+ UseMemOps = false;
+
+ if (EnableIEEERndNear)
+ ModeIEEERndNear = true;
+ else
+ ModeIEEERndNear = false;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 000000000000..76a8fba195f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,80 @@
+//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+#define Hexagon_SLOTS 4
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+ bool UseMemOps;
+ bool ModeIEEERndNear;
+
+public:
+ enum HexagonArchEnum {
+ V1, V2, V3, V4, V5
+ };
+
+ HexagonArchEnum HexagonArchVersion;
+ std::string CPUString;
+ InstrItineraryData InstrItins;
+
+public:
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+ bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+ bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+ bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+ bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+ bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; }
+ bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+ bool hasV5TOps () const { return HexagonArchVersion >= V5; }
+ bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; }
+ bool modeIEEERndNear () const { return ModeIEEERndNear; }
+
+ bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+ const std::string &getCPUString () const { return CPUString; }
+
+ // Threshold for small data section
+ unsigned getSmallDataThreshold() const {
+ return Hexagon_SMALL_DATA_THRESHOLD;
+ }
+ const HexagonArchEnum &getHexagonArchVersion() const {
+ return HexagonArchVersion;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 000000000000..ce45c626f799
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,180 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Hexagon target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonMachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+ "disable-hexagon-hwloops", cl::Hidden,
+ cl::desc("Disable Hardware Loops for Hexagon target"));
+
+static cl::
+opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
+ return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
+ createVLIWMachineSched);
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ DL("e-p:32:32:32-"
+ "i64:64:64-i32:32:32-i16:16:16-i1:32:32-"
+ "f64:64:64-f32:32:32-a0:0-n32") ,
+ Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+ if (getOptLevel() != CodeGenOpt::None) {
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass());
+ }
+ return true;
+}
+
+namespace {
+/// Hexagon Code Generator Pass Configuration Options.
+class HexagonPassConfig : public TargetPassConfig {
+public:
+ HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ // Enable MI scheduler.
+ if (!DisableHexagonMISched) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createVLIWMachineSched);
+ }
+ }
+
+ HexagonTargetMachine &getHexagonTargetMachine() const {
+ return getTM<HexagonTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new HexagonPassConfig(this, PM);
+}
+
+bool HexagonPassConfig::addInstSelector() {
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
+ addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPeephole());
+
+ return false;
+}
+
+
+bool HexagonPassConfig::addPreRegAlloc() {
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonHardwareLoops());
+ return false;
+}
+
+bool HexagonPassConfig::addPostRegAlloc() {
+ if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ return true;
+}
+
+
+bool HexagonPassConfig::addPreSched2() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+ return true;
+}
+
+bool HexagonPassConfig::addPreEmitPass() {
+
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonFixupHwLoops());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonNewValueJump());
+
+ // Expand Spill code for predicate registers.
+ addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+
+ // Split up TFRcondsets into conditional transfers.
+ addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+
+ // Create Packets.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPacketizer());
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 000000000000..cf8f9aa3612f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,83 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "HexagonFrameLowering.h"
+#include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+ const DataLayout DL; // Calculates type size & alignment.
+ HexagonSubtarget Subtarget;
+ HexagonInstrInfo InstrInfo;
+ HexagonTargetLowering TLInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
+ const InstrItineraryData* InstrItins;
+
+public:
+ HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const HexagonInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const HexagonSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+
+ virtual const HexagonTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const HexagonFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration.
+ virtual bool addPassesForOptimizations(PassManagerBase &PM);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 000000000000..993fcfaed43e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+ cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // If the primary definition of this global value is outside the current
+ // translation unit or the global value is available for inspection but not
+ // emission, then do nothing.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+ // up in getKindForGlobal(GV, TM).
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ }
+
+ return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 000000000000..693345081ee3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+ class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSectionELF *SmallDataSection;
+ const MCSectionELF *SmallBSSSection;
+ public:
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 000000000000..c0d86da1c05e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,3086 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "packets"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+
+#include <map>
+
+using namespace llvm;
+
+namespace {
+ class HexagonPacketizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ HexagonPacketizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Hexagon Packetizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char HexagonPacketizer::ID = 0;
+
+ class HexagonPacketizerList : public VLIWPacketizerList {
+
+ private:
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState();
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(MachineInstr *MI);
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ);
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ);
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI);
+ private:
+ bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
+ bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII);
+ bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool DemoteToDotOld(MachineInstr* MI);
+ bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool RestrictingDepExistInPacket(MachineInstr*,
+ unsigned, std::map <MachineInstr*, SUnit*>);
+ bool isNewifiable(MachineInstr* MI);
+ bool isCondInst(MachineInstr* MI);
+ bool IsNewifyStore (MachineInstr* MI);
+ bool tryAllocateResourcesForConstExt(MachineInstr* MI);
+ bool canReserveResourcesForConstExt(MachineInstr *MI);
+ void reserveResourcesForConstExt(MachineInstr* MI);
+ bool isNewValueInst(MachineInstr* MI);
+ };
+}
+
+// HexagonPacketizerList Ctor.
+HexagonPacketizerList::HexagonPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+ : VLIWPacketizerList(MF, MLI, MDT, true){
+}
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ // Instantiate the packetizer.
+ HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+
+ // DFA state table should not be empty.
+ assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+ //
+ // Loop over all basic blocks and remove KILL pseudo-instructions
+ // These instructions confuse the dependence analysis. Consider:
+ // D0 = ... (Insn 0)
+ // R0 = KILL R0, D0 (Insn 1)
+ // R0 = ... (Insn 2)
+ // Here, Insn 1 will result in the dependence graph not emitting an output
+ // dependence between Insn 0 and Insn 2. This can lead to incorrect
+ // packetization
+ //
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ MachineBasicBlock::iterator End = MBB->end();
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != End) {
+ if (MI->isKill()) {
+ MachineBasicBlock::iterator DeleteMI = MI;
+ ++MI;
+ MBB->erase(DeleteMI);
+ End = MBB->end();
+ continue;
+ }
+ ++MI;
+ }
+ }
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Find scheduling regions and schedule / packetize each region.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin();) {
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+ break;
+ }
+ I = MBB->begin();
+
+ // Skip empty scheduling regions.
+ if (I == RegionEnd) {
+ RegionEnd = llvm::prior(RegionEnd);
+ --RemainingCount;
+ continue;
+ }
+ // Skip regions with one instruction.
+ if (I == llvm::prior(RegionEnd)) {
+ RegionEnd = llvm::prior(RegionEnd);
+ continue;
+ }
+
+ Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ RegionEnd = I;
+ }
+ }
+
+ return true;
+}
+
+
+static bool IsIndirectCall(MachineInstr* MI) {
+ return ((MI->getOpcode() == Hexagon::CALLR) ||
+ (MI->getOpcode() == Hexagon::CALLRv3));
+}
+
+// Reserve resources for constant extender. Trigure an assertion if
+// reservation fail.
+void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ llvm_unreachable("can not reserve resources for constant extender.");
+ }
+ return;
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
+ "Should only be called for constant extended instructions");
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
+ bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
+ MF->DeleteMachineInstr(PseudoMI);
+ return CanReserve;
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
+// true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return true;
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return false;
+ }
+}
+
+
+bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
+ SDep::Kind DepType,
+ unsigned DepReg) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+
+ // Check for lr dependence
+ if (DepReg == QRI->getRARegister()) {
+ return true;
+ }
+
+ if (QII->isDeallocRet(MI)) {
+ if (DepReg == QRI->getFrameRegister() ||
+ DepReg == QRI->getStackRegister())
+ return true;
+ }
+
+ // Check if this is a predicate dependence
+ const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
+ if (RC == &Hexagon::PredRegsRegClass) {
+ return true;
+ }
+
+ //
+ // Lastly check for an operand used in an indirect call
+ // If we had an attribute for checking if an instruction is an indirect call,
+ // then we could have avoided this relatively brittle implementation of
+ // IsIndirectCall()
+ //
+ // Assumes that the first operand of the CALLr is the function address
+ //
+ if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+ MachineOperand MO = MI->getOperand(0);
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool IsRegDependence(const SDep::Kind DepType) {
+ return (DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output);
+}
+
+static bool IsDirectJump(MachineInstr* MI) {
+ return (MI->getOpcode() == Hexagon::JMP);
+}
+
+static bool IsSchedBarrier(MachineInstr* MI) {
+ switch (MI->getOpcode()) {
+ case Hexagon::BARRIER:
+ return true;
+ }
+ return false;
+}
+
+static bool IsControlFlow(MachineInstr* MI) {
+ return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+// Function returns true if an instruction can be promoted to the new-value
+// store. It will always return false for v2 and v3.
+// It lists all the conditional and unconditional stores that can be promoted
+// to the new-value stores.
+
+bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ // store byte
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_indexed_shl_V4:
+ case Hexagon::STrib_shl_V4:
+ case Hexagon::STb_GP_V4:
+ case Hexagon::POST_STbri:
+ case Hexagon::STrib_cPt:
+ case Hexagon::STrib_cdnPt_V4:
+ case Hexagon::STrib_cNotPt:
+ case Hexagon::STrib_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_cPt:
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ case Hexagon::STrib_indexed_cNotPt:
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ case Hexagon::STb_GP_cPt_V4:
+ case Hexagon::STb_GP_cNotPt_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+
+ // store halfword
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_indexed_shl_V4:
+ case Hexagon::STrih_shl_V4:
+ case Hexagon::STh_GP_V4:
+ case Hexagon::POST_SThri:
+ case Hexagon::STrih_cPt:
+ case Hexagon::STrih_cdnPt_V4:
+ case Hexagon::STrih_cNotPt:
+ case Hexagon::STrih_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_cPt:
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ case Hexagon::STrih_indexed_cNotPt:
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ case Hexagon::STh_GP_cPt_V4:
+ case Hexagon::STh_GP_cNotPt_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+
+ // store word
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_indexed_shl_V4:
+ case Hexagon::STriw_shl_V4:
+ case Hexagon::STw_GP_V4:
+ case Hexagon::POST_STwri:
+ case Hexagon::STriw_cPt:
+ case Hexagon::STriw_cdnPt_V4:
+ case Hexagon::STriw_cNotPt:
+ case Hexagon::STriw_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_cPt:
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ case Hexagon::STriw_indexed_cNotPt:
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ case Hexagon::STw_GP_cPt_V4:
+ case Hexagon::STw_GP_cNotPt_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ return QRI->Subtarget.hasV4TOps();
+ }
+ return false;
+}
+
+static bool IsLoopN(MachineInstr *MI) {
+ return (MI->getOpcode() == Hexagon::LOOP0_i ||
+ MI->getOpcode() == Hexagon::LOOP0_r);
+}
+
+/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
+/// callee-saved register.
+static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ unsigned CalleeSavedReg = *CSR;
+ if (MI->modifiesRegister(CalleeSavedReg, TRI))
+ return true;
+ }
+ return false;
+}
+
+// Return the new value instruction for a given store.
+static int GetDotNewOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+ // store new value byte
+ case Hexagon::STrib:
+ return Hexagon::STrib_nv_V4;
+
+ case Hexagon::STrib_indexed:
+ return Hexagon::STrib_indexed_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_V4:
+ return Hexagon::STrib_indexed_shl_nv_V4;
+
+ case Hexagon::STrib_shl_V4:
+ return Hexagon::STrib_shl_nv_V4;
+
+ case Hexagon::STb_GP_V4:
+ return Hexagon::STb_GP_nv_V4;
+
+ case Hexagon::POST_STbri:
+ return Hexagon::POST_STbri_nv_V4;
+
+ case Hexagon::STrib_cPt:
+ return Hexagon::STrib_cPt_nv_V4;
+
+ case Hexagon::STrib_cdnPt_V4:
+ return Hexagon::STrib_cdnPt_nv_V4;
+
+ case Hexagon::STrib_cNotPt:
+ return Hexagon::STrib_cNotPt_nv_V4;
+
+ case Hexagon::STrib_cdnNotPt_V4:
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt:
+ return Hexagon::STrib_indexed_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cNotPt:
+ return Hexagon::STrib_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ return Hexagon::STrib_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt:
+ return Hexagon::POST_STbri_cPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnPt_V4:
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STbri_cNotPt:
+ return Hexagon::POST_STbri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_V4:
+ return Hexagon::STb_GP_cPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4:
+ return Hexagon::STb_GP_cNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnPt_V4:
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ // store new value halfword
+ case Hexagon::STrih:
+ return Hexagon::STrih_nv_V4;
+
+ case Hexagon::STrih_indexed:
+ return Hexagon::STrih_indexed_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_V4:
+ return Hexagon::STrih_indexed_shl_nv_V4;
+
+ case Hexagon::STrih_shl_V4:
+ return Hexagon::STrih_shl_nv_V4;
+
+ case Hexagon::STh_GP_V4:
+ return Hexagon::STh_GP_nv_V4;
+
+ case Hexagon::POST_SThri:
+ return Hexagon::POST_SThri_nv_V4;
+
+ case Hexagon::STrih_cPt:
+ return Hexagon::STrih_cPt_nv_V4;
+
+ case Hexagon::STrih_cdnPt_V4:
+ return Hexagon::STrih_cdnPt_nv_V4;
+
+ case Hexagon::STrih_cNotPt:
+ return Hexagon::STrih_cNotPt_nv_V4;
+
+ case Hexagon::STrih_cdnNotPt_V4:
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt:
+ return Hexagon::STrih_indexed_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cNotPt:
+ return Hexagon::STrih_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ return Hexagon::STrih_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt:
+ return Hexagon::POST_SThri_cPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnPt_V4:
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+
+ case Hexagon::POST_SThri_cNotPt:
+ return Hexagon::POST_SThri_cNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_V4:
+ return Hexagon::STh_GP_cPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4:
+ return Hexagon::STh_GP_cNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnPt_V4:
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ // store new value word
+ case Hexagon::STriw:
+ return Hexagon::STriw_nv_V4;
+
+ case Hexagon::STriw_indexed:
+ return Hexagon::STriw_indexed_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_V4:
+ return Hexagon::STriw_indexed_shl_nv_V4;
+
+ case Hexagon::STriw_shl_V4:
+ return Hexagon::STriw_shl_nv_V4;
+
+ case Hexagon::STw_GP_V4:
+ return Hexagon::STw_GP_nv_V4;
+
+ case Hexagon::POST_STwri:
+ return Hexagon::POST_STwri_nv_V4;
+
+ case Hexagon::STriw_cPt:
+ return Hexagon::STriw_cPt_nv_V4;
+
+ case Hexagon::STriw_cdnPt_V4:
+ return Hexagon::STriw_cdnPt_nv_V4;
+
+ case Hexagon::STriw_cNotPt:
+ return Hexagon::STriw_cNotPt_nv_V4;
+
+ case Hexagon::STriw_cdnNotPt_V4:
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt:
+ return Hexagon::STriw_indexed_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cNotPt:
+ return Hexagon::STriw_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ return Hexagon::STriw_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt:
+ return Hexagon::POST_STwri_cPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnPt_V4:
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STwri_cNotPt:
+ return Hexagon::POST_STwri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_V4:
+ return Hexagon::STw_GP_cPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4:
+ return Hexagon::STw_GP_cNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnPt_V4:
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ }
+}
+
+// Return .new predicate version for an instruction
+static int GetDotNewPredOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+ // Conditional stores
+ // Store byte conditionally
+ case Hexagon::STrib_cPt :
+ return Hexagon::STrib_cdnPt_V4;
+
+ case Hexagon::STrib_cNotPt :
+ return Hexagon::STrib_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_cPt :
+ return Hexagon::STrib_indexed_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_cNotPt :
+ return Hexagon::STrib_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrib_imm_cPt_V4 :
+ return Hexagon::STrib_imm_cdnPt_V4;
+
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ return Hexagon::STrib_imm_cdnNotPt_V4;
+
+ case Hexagon::POST_STbri_cPt :
+ return Hexagon::POST_STbri_cdnPt_V4;
+
+ case Hexagon::POST_STbri_cNotPt :
+ return Hexagon::POST_STbri_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::STb_GP_cPt_V4 :
+ return Hexagon::STb_GP_cdnPt_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4 :
+ return Hexagon::STb_GP_cdnNotPt_V4;
+
+ // Store doubleword conditionally
+ case Hexagon::STrid_cPt :
+ return Hexagon::STrid_cdnPt_V4;
+
+ case Hexagon::STrid_cNotPt :
+ return Hexagon::STrid_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_cPt :
+ return Hexagon::STrid_indexed_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_cNotPt :
+ return Hexagon::STrid_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STdri_cPt :
+ return Hexagon::POST_STdri_cdnPt_V4;
+
+ case Hexagon::POST_STdri_cNotPt :
+ return Hexagon::POST_STdri_cdnNotPt_V4;
+
+ case Hexagon::STd_GP_cPt_V4 :
+ return Hexagon::STd_GP_cdnPt_V4;
+
+ case Hexagon::STd_GP_cNotPt_V4 :
+ return Hexagon::STd_GP_cdnNotPt_V4;
+
+ // Store halfword conditionally
+ case Hexagon::STrih_cPt :
+ return Hexagon::STrih_cdnPt_V4;
+
+ case Hexagon::STrih_cNotPt :
+ return Hexagon::STrih_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_cPt :
+ return Hexagon::STrih_indexed_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_cNotPt :
+ return Hexagon::STrih_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrih_imm_cPt_V4 :
+ return Hexagon::STrih_imm_cdnPt_V4;
+
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ return Hexagon::STrih_imm_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_SThri_cPt :
+ return Hexagon::POST_SThri_cdnPt_V4;
+
+ case Hexagon::POST_SThri_cNotPt :
+ return Hexagon::POST_SThri_cdnNotPt_V4;
+
+ case Hexagon::STh_GP_cPt_V4 :
+ return Hexagon::STh_GP_cdnPt_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4 :
+ return Hexagon::STh_GP_cdnNotPt_V4;
+
+ // Store word conditionally
+ case Hexagon::STriw_cPt :
+ return Hexagon::STriw_cdnPt_V4;
+
+ case Hexagon::STriw_cNotPt :
+ return Hexagon::STriw_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_cPt :
+ return Hexagon::STriw_indexed_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_cNotPt :
+ return Hexagon::STriw_indexed_cdnNotPt_V4;
+
+ case Hexagon::STriw_imm_cPt_V4 :
+ return Hexagon::STriw_imm_cdnPt_V4;
+
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ return Hexagon::STriw_imm_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STwri_cPt :
+ return Hexagon::POST_STwri_cdnPt_V4;
+
+ case Hexagon::POST_STwri_cNotPt :
+ return Hexagon::POST_STwri_cdnNotPt_V4;
+
+ case Hexagon::STw_GP_cPt_V4 :
+ return Hexagon::STw_GP_cdnPt_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return Hexagon::STw_GP_cdnNotPt_V4;
+
+ // Condtional Jumps
+ case Hexagon::JMP_c:
+ return Hexagon::JMP_cdnPt;
+
+ case Hexagon::JMP_cNot:
+ return Hexagon::JMP_cdnNotPt;
+
+ case Hexagon::JMPR_cPt:
+ return Hexagon::JMPR_cdnPt_V3;
+
+ case Hexagon::JMPR_cNotPt:
+ return Hexagon::JMPR_cdnNotPt_V3;
+
+ // Conditional Transfers
+ case Hexagon::TFR_cPt:
+ return Hexagon::TFR_cdnPt;
+
+ case Hexagon::TFR_cNotPt:
+ return Hexagon::TFR_cdnNotPt;
+
+ case Hexagon::TFRI_cPt:
+ return Hexagon::TFRI_cdnPt;
+
+ case Hexagon::TFRI_cNotPt:
+ return Hexagon::TFRI_cdnNotPt;
+
+ // Load double word
+ case Hexagon::LDrid_cPt :
+ return Hexagon::LDrid_cdnPt;
+
+ case Hexagon::LDrid_cNotPt :
+ return Hexagon::LDrid_cdnNotPt;
+
+ case Hexagon::LDrid_indexed_cPt :
+ return Hexagon::LDrid_indexed_cdnPt;
+
+ case Hexagon::LDrid_indexed_cNotPt :
+ return Hexagon::LDrid_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrid_cPt :
+ return Hexagon::POST_LDrid_cdnPt_V4;
+
+ case Hexagon::POST_LDrid_cNotPt :
+ return Hexagon::POST_LDrid_cdnNotPt_V4;
+
+ // Load word
+ case Hexagon::LDriw_cPt :
+ return Hexagon::LDriw_cdnPt;
+
+ case Hexagon::LDriw_cNotPt :
+ return Hexagon::LDriw_cdnNotPt;
+
+ case Hexagon::LDriw_indexed_cPt :
+ return Hexagon::LDriw_indexed_cdnPt;
+
+ case Hexagon::LDriw_indexed_cNotPt :
+ return Hexagon::LDriw_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriw_cPt :
+ return Hexagon::POST_LDriw_cdnPt_V4;
+
+ case Hexagon::POST_LDriw_cNotPt :
+ return Hexagon::POST_LDriw_cdnNotPt_V4;
+
+ // Load halfword
+ case Hexagon::LDrih_cPt :
+ return Hexagon::LDrih_cdnPt;
+
+ case Hexagon::LDrih_cNotPt :
+ return Hexagon::LDrih_cdnNotPt;
+
+ case Hexagon::LDrih_indexed_cPt :
+ return Hexagon::LDrih_indexed_cdnPt;
+
+ case Hexagon::LDrih_indexed_cNotPt :
+ return Hexagon::LDrih_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrih_cPt :
+ return Hexagon::POST_LDrih_cdnPt_V4;
+
+ case Hexagon::POST_LDrih_cNotPt :
+ return Hexagon::POST_LDrih_cdnNotPt_V4;
+
+ // Load byte
+ case Hexagon::LDrib_cPt :
+ return Hexagon::LDrib_cdnPt;
+
+ case Hexagon::LDrib_cNotPt :
+ return Hexagon::LDrib_cdnNotPt;
+
+ case Hexagon::LDrib_indexed_cPt :
+ return Hexagon::LDrib_indexed_cdnPt;
+
+ case Hexagon::LDrib_indexed_cNotPt :
+ return Hexagon::LDrib_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrib_cPt :
+ return Hexagon::POST_LDrib_cdnPt_V4;
+
+ case Hexagon::POST_LDrib_cNotPt :
+ return Hexagon::POST_LDrib_cdnNotPt_V4;
+
+ // Load unsigned halfword
+ case Hexagon::LDriuh_cPt :
+ return Hexagon::LDriuh_cdnPt;
+
+ case Hexagon::LDriuh_cNotPt :
+ return Hexagon::LDriuh_cdnNotPt;
+
+ case Hexagon::LDriuh_indexed_cPt :
+ return Hexagon::LDriuh_indexed_cdnPt;
+
+ case Hexagon::LDriuh_indexed_cNotPt :
+ return Hexagon::LDriuh_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriuh_cPt :
+ return Hexagon::POST_LDriuh_cdnPt_V4;
+
+ case Hexagon::POST_LDriuh_cNotPt :
+ return Hexagon::POST_LDriuh_cdnNotPt_V4;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cPt :
+ return Hexagon::LDriub_cdnPt;
+
+ case Hexagon::LDriub_cNotPt :
+ return Hexagon::LDriub_cdnNotPt;
+
+ case Hexagon::LDriub_indexed_cPt :
+ return Hexagon::LDriub_indexed_cdnPt;
+
+ case Hexagon::LDriub_indexed_cNotPt :
+ return Hexagon::LDriub_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriub_cPt :
+ return Hexagon::POST_LDriub_cdnPt_V4;
+
+ case Hexagon::POST_LDriub_cNotPt :
+ return Hexagon::POST_LDriub_cdnNotPt_V4;
+
+ // V4 indexed+scaled load
+
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cPt_V4:
+ return Hexagon::LDd_GP_cdnPt_V4;
+
+ case Hexagon::LDd_GP_cNotPt_V4:
+ return Hexagon::LDd_GP_cdnNotPt_V4;
+
+ case Hexagon::LDb_GP_cPt_V4:
+ return Hexagon::LDb_GP_cdnPt_V4;
+
+ case Hexagon::LDb_GP_cNotPt_V4:
+ return Hexagon::LDb_GP_cdnNotPt_V4;
+
+ case Hexagon::LDub_GP_cPt_V4:
+ return Hexagon::LDub_GP_cdnPt_V4;
+
+ case Hexagon::LDub_GP_cNotPt_V4:
+ return Hexagon::LDub_GP_cdnNotPt_V4;
+
+ case Hexagon::LDh_GP_cPt_V4:
+ return Hexagon::LDh_GP_cdnPt_V4;
+
+ case Hexagon::LDh_GP_cNotPt_V4:
+ return Hexagon::LDh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDuh_GP_cPt_V4:
+ return Hexagon::LDuh_GP_cdnPt_V4;
+
+ case Hexagon::LDuh_GP_cNotPt_V4:
+ return Hexagon::LDuh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDw_GP_cPt_V4:
+ return Hexagon::LDw_GP_cdnPt_V4;
+
+ case Hexagon::LDw_GP_cNotPt_V4:
+ return Hexagon::LDw_GP_cdnNotPt_V4;
+
+ // Conditional store new-value byte
+ case Hexagon::STrib_cPt_nv_V4 :
+ return Hexagon::STrib_cdnPt_nv_V4;
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+ case Hexagon::POST_STbri_cNotPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_nv_V4 :
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_nv_V4 :
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value halfword
+ case Hexagon::STrih_cPt_nv_V4 :
+ return Hexagon::STrih_cdnPt_nv_V4;
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_nv_V4 :
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_nv_V4 :
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value word
+ case Hexagon::STriw_cPt_nv_V4 :
+ return Hexagon::STriw_cdnPt_nv_V4;
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_nv_V4 :
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_nv_V4 :
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ // Conditional add
+ case Hexagon::ADD_ri_cPt :
+ return Hexagon::ADD_ri_cdnPt;
+ case Hexagon::ADD_ri_cNotPt :
+ return Hexagon::ADD_ri_cdnNotPt;
+
+ case Hexagon::ADD_rr_cPt :
+ return Hexagon::ADD_rr_cdnPt;
+ case Hexagon::ADD_rr_cNotPt :
+ return Hexagon::ADD_rr_cdnNotPt;
+
+ // Conditional logical Operations
+ case Hexagon::XOR_rr_cPt :
+ return Hexagon::XOR_rr_cdnPt;
+ case Hexagon::XOR_rr_cNotPt :
+ return Hexagon::XOR_rr_cdnNotPt;
+
+ case Hexagon::AND_rr_cPt :
+ return Hexagon::AND_rr_cdnPt;
+ case Hexagon::AND_rr_cNotPt :
+ return Hexagon::AND_rr_cdnNotPt;
+
+ case Hexagon::OR_rr_cPt :
+ return Hexagon::OR_rr_cdnPt;
+ case Hexagon::OR_rr_cNotPt :
+ return Hexagon::OR_rr_cdnNotPt;
+
+ // Conditional Subtract
+ case Hexagon::SUB_rr_cPt :
+ return Hexagon::SUB_rr_cdnPt;
+ case Hexagon::SUB_rr_cNotPt :
+ return Hexagon::SUB_rr_cdnNotPt;
+
+ // Conditional combine
+ case Hexagon::COMBINE_rr_cPt :
+ return Hexagon::COMBINE_rr_cdnPt;
+ case Hexagon::COMBINE_rr_cNotPt :
+ return Hexagon::COMBINE_rr_cdnNotPt;
+
+ case Hexagon::ASLH_cPt_V4 :
+ return Hexagon::ASLH_cdnPt_V4;
+ case Hexagon::ASLH_cNotPt_V4 :
+ return Hexagon::ASLH_cdnNotPt_V4;
+
+ case Hexagon::ASRH_cPt_V4 :
+ return Hexagon::ASRH_cdnPt_V4;
+ case Hexagon::ASRH_cNotPt_V4 :
+ return Hexagon::ASRH_cdnNotPt_V4;
+
+ case Hexagon::SXTB_cPt_V4 :
+ return Hexagon::SXTB_cdnPt_V4;
+ case Hexagon::SXTB_cNotPt_V4 :
+ return Hexagon::SXTB_cdnNotPt_V4;
+
+ case Hexagon::SXTH_cPt_V4 :
+ return Hexagon::SXTH_cdnPt_V4;
+ case Hexagon::SXTH_cNotPt_V4 :
+ return Hexagon::SXTH_cdnNotPt_V4;
+
+ case Hexagon::ZXTB_cPt_V4 :
+ return Hexagon::ZXTB_cdnPt_V4;
+ case Hexagon::ZXTB_cNotPt_V4 :
+ return Hexagon::ZXTB_cdnNotPt_V4;
+
+ case Hexagon::ZXTH_cPt_V4 :
+ return Hexagon::ZXTH_cdnPt_V4;
+ case Hexagon::ZXTH_cNotPt_V4 :
+ return Hexagon::ZXTH_cdnNotPt_V4;
+ }
+}
+
+// Returns true if an instruction can be promoted to .new predicate
+// or new-value store.
+bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
+ if ( isCondInst(MI) || IsNewifyStore(MI))
+ return true;
+ else
+ return false;
+}
+
+bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const MCInstrDesc& TID = MI->getDesc();
+ // bug 5670: until that is fixed,
+ // this portion is disabled.
+ if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
+ || QII->isConditionalTransfer(MI)
+ || QII->isConditionalALU32(MI)
+ || QII->isConditionalLoad(MI)
+ || QII->isConditionalStore(MI)) {
+ return true;
+ }
+ return false;
+}
+
+
+// Promote an instructiont to its .new form.
+// At this time, we have already made a call to CanPromoteToDotNew
+// and made sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+
+ assert (DepType == SDep::Data);
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ int NewOpcode;
+ if (RC == &Hexagon::PredRegsRegClass)
+ NewOpcode = GetDotNewPredOp(MI->getOpcode());
+ else
+ NewOpcode = GetDotNewOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+
+ return true;
+}
+
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new --->
+// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1 --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+static int GetDotOldOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .old type");
+ case Hexagon::TFR_cdnPt:
+ return Hexagon::TFR_cPt;
+
+ case Hexagon::TFR_cdnNotPt:
+ return Hexagon::TFR_cNotPt;
+
+ case Hexagon::TFRI_cdnPt:
+ return Hexagon::TFRI_cPt;
+
+ case Hexagon::TFRI_cdnNotPt:
+ return Hexagon::TFRI_cNotPt;
+
+ case Hexagon::JMP_cdnPt:
+ return Hexagon::JMP_c;
+
+ case Hexagon::JMP_cdnNotPt:
+ return Hexagon::JMP_cNot;
+
+ case Hexagon::JMPR_cdnPt_V3:
+ return Hexagon::JMPR_cPt;
+
+ case Hexagon::JMPR_cdnNotPt_V3:
+ return Hexagon::JMPR_cNotPt;
+
+ // Load double word
+
+ case Hexagon::LDrid_cdnPt :
+ return Hexagon::LDrid_cPt;
+
+ case Hexagon::LDrid_cdnNotPt :
+ return Hexagon::LDrid_cNotPt;
+
+ case Hexagon::LDrid_indexed_cdnPt :
+ return Hexagon::LDrid_indexed_cPt;
+
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ return Hexagon::LDrid_indexed_cNotPt;
+
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ return Hexagon::POST_LDrid_cPt;
+
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ return Hexagon::POST_LDrid_cNotPt;
+
+ // Load word
+
+ case Hexagon::LDriw_cdnPt :
+ return Hexagon::LDriw_cPt;
+
+ case Hexagon::LDriw_cdnNotPt :
+ return Hexagon::LDriw_cNotPt;
+
+ case Hexagon::LDriw_indexed_cdnPt :
+ return Hexagon::LDriw_indexed_cPt;
+
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ return Hexagon::LDriw_indexed_cNotPt;
+
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ return Hexagon::POST_LDriw_cPt;
+
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ return Hexagon::POST_LDriw_cNotPt;
+
+ // Load half
+
+ case Hexagon::LDrih_cdnPt :
+ return Hexagon::LDrih_cPt;
+
+ case Hexagon::LDrih_cdnNotPt :
+ return Hexagon::LDrih_cNotPt;
+
+ case Hexagon::LDrih_indexed_cdnPt :
+ return Hexagon::LDrih_indexed_cPt;
+
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ return Hexagon::LDrih_indexed_cNotPt;
+
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ return Hexagon::POST_LDrih_cPt;
+
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ return Hexagon::POST_LDrih_cNotPt;
+
+ // Load byte
+
+ case Hexagon::LDrib_cdnPt :
+ return Hexagon::LDrib_cPt;
+
+ case Hexagon::LDrib_cdnNotPt :
+ return Hexagon::LDrib_cNotPt;
+
+ case Hexagon::LDrib_indexed_cdnPt :
+ return Hexagon::LDrib_indexed_cPt;
+
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ return Hexagon::LDrib_indexed_cNotPt;
+
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ return Hexagon::POST_LDrib_cPt;
+
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ return Hexagon::POST_LDrib_cNotPt;
+
+ // Load unsigned half
+
+ case Hexagon::LDriuh_cdnPt :
+ return Hexagon::LDriuh_cPt;
+
+ case Hexagon::LDriuh_cdnNotPt :
+ return Hexagon::LDriuh_cNotPt;
+
+ case Hexagon::LDriuh_indexed_cdnPt :
+ return Hexagon::LDriuh_indexed_cPt;
+
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ return Hexagon::LDriuh_indexed_cNotPt;
+
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ return Hexagon::POST_LDriuh_cPt;
+
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ return Hexagon::POST_LDriuh_cNotPt;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cdnPt :
+ return Hexagon::LDriub_cPt;
+
+ case Hexagon::LDriub_cdnNotPt :
+ return Hexagon::LDriub_cNotPt;
+
+ case Hexagon::LDriub_indexed_cdnPt :
+ return Hexagon::LDriub_indexed_cPt;
+
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ return Hexagon::LDriub_indexed_cNotPt;
+
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ return Hexagon::POST_LDriub_cPt;
+
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ return Hexagon::POST_LDriub_cNotPt;
+
+ // V4 indexed+scaled Load
+
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cdnPt_V4:
+ return Hexagon::LDd_GP_cPt_V4;
+
+ case Hexagon::LDd_GP_cdnNotPt_V4:
+ return Hexagon::LDd_GP_cNotPt_V4;
+
+ case Hexagon::LDb_GP_cdnPt_V4:
+ return Hexagon::LDb_GP_cPt_V4;
+
+ case Hexagon::LDb_GP_cdnNotPt_V4:
+ return Hexagon::LDb_GP_cNotPt_V4;
+
+ case Hexagon::LDub_GP_cdnPt_V4:
+ return Hexagon::LDub_GP_cPt_V4;
+
+ case Hexagon::LDub_GP_cdnNotPt_V4:
+ return Hexagon::LDub_GP_cNotPt_V4;
+
+ case Hexagon::LDh_GP_cdnPt_V4:
+ return Hexagon::LDh_GP_cPt_V4;
+
+ case Hexagon::LDh_GP_cdnNotPt_V4:
+ return Hexagon::LDh_GP_cNotPt_V4;
+
+ case Hexagon::LDuh_GP_cdnPt_V4:
+ return Hexagon::LDuh_GP_cPt_V4;
+
+ case Hexagon::LDuh_GP_cdnNotPt_V4:
+ return Hexagon::LDuh_GP_cNotPt_V4;
+
+ case Hexagon::LDw_GP_cdnPt_V4:
+ return Hexagon::LDw_GP_cPt_V4;
+
+ case Hexagon::LDw_GP_cdnNotPt_V4:
+ return Hexagon::LDw_GP_cNotPt_V4;
+
+ // Conditional add
+
+ case Hexagon::ADD_ri_cdnPt :
+ return Hexagon::ADD_ri_cPt;
+ case Hexagon::ADD_ri_cdnNotPt :
+ return Hexagon::ADD_ri_cNotPt;
+
+ case Hexagon::ADD_rr_cdnPt :
+ return Hexagon::ADD_rr_cPt;
+ case Hexagon::ADD_rr_cdnNotPt:
+ return Hexagon::ADD_rr_cNotPt;
+
+ // Conditional logical Operations
+
+ case Hexagon::XOR_rr_cdnPt :
+ return Hexagon::XOR_rr_cPt;
+ case Hexagon::XOR_rr_cdnNotPt :
+ return Hexagon::XOR_rr_cNotPt;
+
+ case Hexagon::AND_rr_cdnPt :
+ return Hexagon::AND_rr_cPt;
+ case Hexagon::AND_rr_cdnNotPt :
+ return Hexagon::AND_rr_cNotPt;
+
+ case Hexagon::OR_rr_cdnPt :
+ return Hexagon::OR_rr_cPt;
+ case Hexagon::OR_rr_cdnNotPt :
+ return Hexagon::OR_rr_cNotPt;
+
+ // Conditional Subtract
+
+ case Hexagon::SUB_rr_cdnPt :
+ return Hexagon::SUB_rr_cPt;
+ case Hexagon::SUB_rr_cdnNotPt :
+ return Hexagon::SUB_rr_cNotPt;
+
+ // Conditional combine
+
+ case Hexagon::COMBINE_rr_cdnPt :
+ return Hexagon::COMBINE_rr_cPt;
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ return Hexagon::COMBINE_rr_cNotPt;
+
+// Conditional shift operations
+
+ case Hexagon::ASLH_cdnPt_V4 :
+ return Hexagon::ASLH_cPt_V4;
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ return Hexagon::ASLH_cNotPt_V4;
+
+ case Hexagon::ASRH_cdnPt_V4 :
+ return Hexagon::ASRH_cPt_V4;
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ return Hexagon::ASRH_cNotPt_V4;
+
+ case Hexagon::SXTB_cdnPt_V4 :
+ return Hexagon::SXTB_cPt_V4;
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ return Hexagon::SXTB_cNotPt_V4;
+
+ case Hexagon::SXTH_cdnPt_V4 :
+ return Hexagon::SXTH_cPt_V4;
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ return Hexagon::SXTH_cNotPt_V4;
+
+ case Hexagon::ZXTB_cdnPt_V4 :
+ return Hexagon::ZXTB_cPt_V4;
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ return Hexagon::ZXTB_cNotPt_V4;
+
+ case Hexagon::ZXTH_cdnPt_V4 :
+ return Hexagon::ZXTH_cPt_V4;
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+ return Hexagon::ZXTH_cNotPt_V4;
+
+ // Store byte
+
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ return Hexagon::STrib_imm_cPt_V4;
+
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ return Hexagon::STrib_imm_cNotPt_V4;
+
+ case Hexagon::STrib_cdnPt_nv_V4 :
+ case Hexagon::STrib_cPt_nv_V4 :
+ case Hexagon::STrib_cdnPt_V4 :
+ return Hexagon::STrib_cPt;
+
+ case Hexagon::STrib_cdnNotPt_nv_V4 :
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ return Hexagon::STrib_cNotPt;
+
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnPt_nv_V4 :
+ return Hexagon::STrib_indexed_cPt;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cNotPt;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrib_indexed_shl_cPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STbri_cdnPt_nv_V4 :
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ return Hexagon::POST_STbri_cPt;
+
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ return Hexagon::POST_STbri_cNotPt;
+
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ return Hexagon::STb_GP_cPt_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ return Hexagon::STb_GP_cNotPt_V4;
+
+ // Store new-value byte - unconditional
+ case Hexagon::STrib_nv_V4:
+ return Hexagon::STrib;
+
+ case Hexagon::STrib_indexed_nv_V4:
+ return Hexagon::STrib_indexed;
+
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ return Hexagon::STrib_indexed_shl_V4;
+
+ case Hexagon::STrib_shl_nv_V4:
+ return Hexagon::STrib_shl_V4;
+
+ case Hexagon::STb_GP_nv_V4:
+ return Hexagon::STb_GP_V4;
+
+ case Hexagon::POST_STbri_nv_V4:
+ return Hexagon::POST_STbri;
+
+ // Store halfword
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ return Hexagon::STrih_imm_cPt_V4;
+
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ return Hexagon::STrih_imm_cNotPt_V4;
+
+ case Hexagon::STrih_cdnPt_nv_V4 :
+ case Hexagon::STrih_cPt_nv_V4 :
+ case Hexagon::STrih_cdnPt_V4 :
+ return Hexagon::STrih_cPt;
+
+ case Hexagon::STrih_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ return Hexagon::STrih_cNotPt;
+
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ return Hexagon::STrih_indexed_cPt;
+
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_cNotPt;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrih_indexed_shl_cPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_SThri_cdnPt_nv_V4 :
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ return Hexagon::POST_SThri_cPt;
+
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ return Hexagon::POST_SThri_cNotPt;
+
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ return Hexagon::STh_GP_cPt_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ return Hexagon::STh_GP_cNotPt_V4;
+
+ // Store new-value halfword - unconditional
+
+ case Hexagon::STrih_nv_V4:
+ return Hexagon::STrih;
+
+ case Hexagon::STrih_indexed_nv_V4:
+ return Hexagon::STrih_indexed;
+
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ return Hexagon::STrih_indexed_shl_V4;
+
+ case Hexagon::STrih_shl_nv_V4:
+ return Hexagon::STrih_shl_V4;
+
+ case Hexagon::STh_GP_nv_V4:
+ return Hexagon::STh_GP_V4;
+
+ case Hexagon::POST_SThri_nv_V4:
+ return Hexagon::POST_SThri;
+
+ // Store word
+
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ return Hexagon::STriw_imm_cPt_V4;
+
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ return Hexagon::STriw_imm_cNotPt_V4;
+
+ case Hexagon::STriw_cdnPt_nv_V4 :
+ case Hexagon::STriw_cPt_nv_V4 :
+ case Hexagon::STriw_cdnPt_V4 :
+ return Hexagon::STriw_cPt;
+
+ case Hexagon::STriw_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ return Hexagon::STriw_cNotPt;
+
+ case Hexagon::STriw_indexed_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ return Hexagon::STriw_indexed_cPt;
+
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_cNotPt;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::STriw_indexed_shl_cPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STwri_cdnPt_nv_V4 :
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ return Hexagon::POST_STwri_cPt;
+
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ return Hexagon::POST_STwri_cNotPt;
+
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ return Hexagon::STw_GP_cPt_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ return Hexagon::STw_GP_cNotPt_V4;
+
+ // Store new-value word - unconditional
+
+ case Hexagon::STriw_nv_V4:
+ return Hexagon::STriw;
+
+ case Hexagon::STriw_indexed_nv_V4:
+ return Hexagon::STriw_indexed;
+
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ return Hexagon::STriw_indexed_shl_V4;
+
+ case Hexagon::STriw_shl_nv_V4:
+ return Hexagon::STriw_shl_V4;
+
+ case Hexagon::STw_GP_nv_V4:
+ return Hexagon::STw_GP_V4;
+
+ case Hexagon::POST_STwri_nv_V4:
+ return Hexagon::POST_STwri;
+
+ // Store doubleword
+
+ case Hexagon::STrid_cdnPt_V4 :
+ return Hexagon::STrid_cPt;
+
+ case Hexagon::STrid_cdnNotPt_V4 :
+ return Hexagon::STrid_cNotPt;
+
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ return Hexagon::STrid_indexed_cPt;
+
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_cNotPt;
+
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrid_indexed_shl_cPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ return Hexagon::POST_STdri_cPt;
+
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ return Hexagon::POST_STdri_cNotPt;
+
+ case Hexagon::STd_GP_cdnPt_V4 :
+ return Hexagon::STd_GP_cPt_V4;
+
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ return Hexagon::STd_GP_cNotPt_V4;
+
+ }
+}
+
+bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ int NewOpcode = GetDotOldOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+ return true;
+}
+
+// Returns true if an instruction is predicated on p0 and false if it's
+// predicated on !p0.
+
+static bool GetPredicateSense(MachineInstr* MI,
+ const HexagonInstrInfo *QII) {
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown predicate sense of the instruction");
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt :
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cdnPt_V4 :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cdnPt_V4 :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ case Hexagon::STrid_cPt :
+ case Hexagon::STrid_cdnPt_V4 :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ case Hexagon::JMP_cdnPt :
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::ADD_ri_cPt :
+ case Hexagon::ADD_ri_cdnPt :
+ case Hexagon::ADD_rr_cPt :
+ case Hexagon::ADD_rr_cdnPt :
+ case Hexagon::XOR_rr_cPt :
+ case Hexagon::XOR_rr_cdnPt :
+ case Hexagon::AND_rr_cPt :
+ case Hexagon::AND_rr_cdnPt :
+ case Hexagon::OR_rr_cPt :
+ case Hexagon::OR_rr_cdnPt :
+ case Hexagon::SUB_rr_cPt :
+ case Hexagon::SUB_rr_cdnPt :
+ case Hexagon::COMBINE_rr_cPt :
+ case Hexagon::COMBINE_rr_cdnPt :
+ case Hexagon::ASLH_cPt_V4 :
+ case Hexagon::ASLH_cdnPt_V4 :
+ case Hexagon::ASRH_cPt_V4 :
+ case Hexagon::ASRH_cdnPt_V4 :
+ case Hexagon::SXTB_cPt_V4 :
+ case Hexagon::SXTB_cdnPt_V4 :
+ case Hexagon::SXTH_cPt_V4 :
+ case Hexagon::SXTH_cdnPt_V4 :
+ case Hexagon::ZXTB_cPt_V4 :
+ case Hexagon::ZXTB_cdnPt_V4 :
+ case Hexagon::ZXTH_cPt_V4 :
+ case Hexagon::ZXTH_cdnPt_V4 :
+ case Hexagon::LDd_GP_cPt_V4 :
+ case Hexagon::LDb_GP_cPt_V4 :
+ case Hexagon::LDub_GP_cPt_V4 :
+ case Hexagon::LDh_GP_cPt_V4 :
+ case Hexagon::LDuh_GP_cPt_V4 :
+ case Hexagon::LDw_GP_cPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::LDd_GP_cdnPt_V4 :
+ case Hexagon::LDb_GP_cdnPt_V4 :
+ case Hexagon::LDub_GP_cdnPt_V4 :
+ case Hexagon::LDh_GP_cdnPt_V4 :
+ case Hexagon::LDuh_GP_cdnPt_V4 :
+ case Hexagon::LDw_GP_cdnPt_V4 :
+ case Hexagon::STd_GP_cdnPt_V4 :
+ case Hexagon::STb_GP_cdnPt_V4 :
+ case Hexagon::STh_GP_cdnPt_V4 :
+ case Hexagon::STw_GP_cdnPt_V4 :
+ return true;
+
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt :
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STwri_cNotPt :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ case Hexagon::STrid_cNotPt :
+ case Hexagon::STrid_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ case Hexagon::JMP_cdnNotPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::ADD_ri_cNotPt :
+ case Hexagon::ADD_ri_cdnNotPt :
+ case Hexagon::ADD_rr_cNotPt :
+ case Hexagon::ADD_rr_cdnNotPt :
+ case Hexagon::XOR_rr_cNotPt :
+ case Hexagon::XOR_rr_cdnNotPt :
+ case Hexagon::AND_rr_cNotPt :
+ case Hexagon::AND_rr_cdnNotPt :
+ case Hexagon::OR_rr_cNotPt :
+ case Hexagon::OR_rr_cdnNotPt :
+ case Hexagon::SUB_rr_cNotPt :
+ case Hexagon::SUB_rr_cdnNotPt :
+ case Hexagon::COMBINE_rr_cNotPt :
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ case Hexagon::ASLH_cNotPt_V4 :
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ case Hexagon::ASRH_cNotPt_V4 :
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ case Hexagon::SXTB_cNotPt_V4 :
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ case Hexagon::SXTH_cNotPt_V4 :
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ case Hexagon::ZXTB_cNotPt_V4 :
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ case Hexagon::ZXTH_cNotPt_V4 :
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+
+ case Hexagon::LDd_GP_cNotPt_V4 :
+ case Hexagon::LDb_GP_cNotPt_V4 :
+ case Hexagon::LDub_GP_cNotPt_V4 :
+ case Hexagon::LDh_GP_cNotPt_V4 :
+ case Hexagon::LDuh_GP_cNotPt_V4 :
+ case Hexagon::LDw_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::LDd_GP_cdnNotPt_V4 :
+ case Hexagon::LDb_GP_cdnNotPt_V4 :
+ case Hexagon::LDub_GP_cdnNotPt_V4 :
+ case Hexagon::LDh_GP_cdnNotPt_V4 :
+ case Hexagon::LDuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDw_GP_cdnNotPt_V4 :
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ case Hexagon::STb_GP_cdnNotPt_V4 :
+ case Hexagon::STh_GP_cdnNotPt_V4 :
+ case Hexagon::STw_GP_cdnNotPt_V4 :
+ return false;
+ }
+ // return *some value* to avoid compiler warning
+ return false;
+}
+
+static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
+ const HexagonInstrInfo *QII) {
+ assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+ // Post Increment means duplicates. Use dense map to find duplicates in the
+ // list. Caution: Densemap initializes with the minimum of 64 buckets,
+ // whereas there are at most 5 operands in the post increment.
+ DenseMap<unsigned, unsigned> DefRegsSet;
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isDef()) {
+ DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
+ }
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isUse()) {
+ if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
+ return MI->getOperand(opNum);
+ }
+ }
+#else
+ if (MI->getDesc().mayLoad()) {
+ // The 2nd operand is always the post increment operand in load.
+ assert(MI->getOperand(1).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(1));
+ }
+ if (MI->getDesc().mayStore()) {
+ // The 1st operand is always the post increment operand in store.
+ assert(MI->getOperand(0).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(0));
+ }
+#endif
+ // we should never come here.
+ llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// get the value being stored
+static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+ // value being stored is always the last operand.
+ return (MI->getOperand(MI->getNumOperands()-1));
+}
+
+// can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+// be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode,
+// its address register cannot be a new-value register.
+// Arch Spec 5.4.2.1.TODO: This is not enabled as
+// as absolute-set address mode patters are not implemented.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+// as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets a new-value register is conditional, then
+// the instruction that uses the new-value register must also be conditional,
+// and both must always have their predicates evaluate identically.
+// Arch Spec 5.4.2.3.
+// 5. There is an implied restriction of a packet can not have another store,
+// if there is a new value store in the packet. Corollary, if there is
+// already a store in a packet, there can not be a new value store.
+// Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
+ MachineInstr *PacketMI, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit)
+{
+ // Make sure we are looking at the store
+ if (!IsNewifyStore(MI))
+ return false;
+
+ // Make sure there is dependency and can be new value'ed
+ if (GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() != DepReg)
+ return false;
+
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const MCInstrDesc& MCID = PacketMI->getDesc();
+ // first operand is always the result
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
+
+ // if there is already an store in the packet, no can do new value store
+ // Arch Spec 3.4.4.2.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
+ // then we don't need this
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
+ return false;
+ }
+
+ if (PacketRC == &Hexagon::DoubleRegsRegClass) {
+ // new value store constraint: double regs can not feed into new value store
+ // arch spec section: 5.4.2.2
+ return false;
+ }
+
+ // Make sure it's NOT the post increment register that we are going to
+ // new value.
+ if (QII->isPostIncrement(MI) &&
+ MI->getDesc().mayStore() &&
+ GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+ return false;
+ }
+
+ if (QII->isPostIncrement(PacketMI) &&
+ PacketMI->getDesc().mayLoad() &&
+ GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
+ // if source is post_inc, or absolute-set addressing,
+ // it can not feed into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1
+ return false;
+ }
+
+ // If the source that feeds the store is predicated, new value store must
+ // also be also predicated.
+ if (QII->isPredicated(PacketMI)) {
+ if (!QII->isPredicated(MI))
+ return false;
+
+ // Check to make sure that they both will have their predicates
+ // evaluate identically
+ unsigned predRegNumSrc = 0;
+ unsigned predRegNumDst = 0;
+ const TargetRegisterClass* predRegClass = NULL;
+
+ // Get predicate register used in the source instruction
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if ( PacketMI->getOperand(opNum).isReg())
+ predRegNumSrc = PacketMI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == &Hexagon::PredRegsRegClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
+ ("predicate register not found in a predicated PacketMI instruction"));
+
+ // Get predicate register used in new-value store instruction
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if ( MI->getOperand(opNum).isReg())
+ predRegNumDst = MI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == &Hexagon::PredRegsRegClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
+ ("predicate register not found in a predicated MI instruction"));
+
+ // New-value register producer and user (store) need to satisfy these
+ // constraints:
+ // 1) Both instructions should be predicated on the same register.
+ // 2) If producer of the new-value register is .new predicated then store
+ // should also be .new predicated and if producer is not .new predicated
+ // then store should not be .new predicated.
+ // 3) Both new-value register producer and user should have same predicate
+ // sense, i.e, either both should be negated or both should be none negated.
+
+ if (( predRegNumDst != predRegNumSrc) ||
+ QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
+ GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
+ return false;
+ }
+ }
+
+ // Make sure that other than the new-value register no other store instruction
+ // register has been modified in the same packet. Predicate registers can be
+ // modified by they should not be modified between the producer and the store
+ // instruction as it will make them both conditional on different values.
+ // We already know this to be true for all the instructions before and
+ // including PacketMI. Howerver, we need to perform the check for the
+ // remaining instructions in the packet.
+
+ std::vector<MachineInstr*>::iterator VI;
+ std::vector<MachineInstr*>::iterator VE;
+ unsigned StartCheck = 0;
+
+ for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* TempSU = MIToSUnit[*VI];
+ MachineInstr* TempMI = TempSU->getInstr();
+
+ // Following condition is true for all the instructions until PacketMI is
+ // reached (StartCheck is set to 0 before the for loop).
+ // StartCheck flag is 1 for all the instructions after PacketMI.
+ if (TempMI != PacketMI && !StartCheck) // start processing only after
+ continue; // encountering PacketMI
+
+ StartCheck = 1;
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+ continue;
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
+ QRI))
+ return false;
+ }
+ }
+
+ // Make sure that for non POST_INC stores:
+ // 1. The only use of reg is DepReg and no other registers.
+ // This handles V4 base+index registers.
+ // The following store can not be dot new.
+ // Eg. r0 = add(r0, #3)a
+ // memw(r1+r0<<#2) = r0
+ if (!QII->isPostIncrement(MI) &&
+ GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() == DepReg) {
+ for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).getReg() == DepReg) {
+ return false;
+ }
+ }
+ // 2. If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if (PacketMI->getOperand(opNum).isReg() &&
+ PacketMI->getOperand(opNum).getReg() == DepReg &&
+ PacketMI->getOperand(opNum).isDef() &&
+ PacketMI->getOperand(opNum).isImplicit()) {
+ return false;
+ }
+ }
+ }
+
+ // Can be dot new store.
+ return true;
+}
+
+// can this MI to promoted to either
+// new value store or new value jump
+bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII)
+{
+
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ if (!QRI->Subtarget.hasV4TOps() ||
+ !IsNewifyStore(MI))
+ return false;
+
+ MachineInstr *PacketMI = PacketSU->getInstr();
+
+ // Check to see the store can be new value'ed.
+ if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+ return true;
+
+ // Check to see the compare/jump can be new value'ed.
+ // This is done as a pass on its own. Don't need to check it here.
+ return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC )
+{
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Already a dot new instruction.
+ if (QII->isDotNewInst(MI) && !IsNewifyStore(MI))
+ return false;
+
+ if (!isNewifiable(MI))
+ return false;
+
+ // predicate .new
+ if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
+ return true;
+ else if (RC != &Hexagon::PredRegsRegClass &&
+ !IsNewifyStore(MI)) // MI is not a new-value store
+ return false;
+ else {
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ int NewOpcode = GetDotNewOp(MI->getOpcode());
+ const MCInstrDesc &desc = QII->get(NewOpcode);
+ DebugLoc dl;
+ MachineInstr *NewMI =
+ MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
+
+ if (!ResourcesAvailable)
+ return false;
+
+ // new value store only
+ // new new value jump generated as a passes
+ if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Go through the packet instructions and search for anti dependency
+// between them and DepReg from MI
+// Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
+// c) %P3<def> = TFR_PdRs %R23
+// d) %R1<def> = TFRI_cdnPt %P3, 4
+// }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti Dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ SUnit* PacketSUDep = MIToSUnit[MI];
+
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // We only care for dependencies to predicated instructions
+ if(!QII->isPredicated(*VIN)) continue;
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // Look at dependencies between current members of the packet
+ // and predicate defining instruction MI.
+ // Make sure that dependency is on the exact register
+ // we care about.
+ if (PacketSU->isSucc(PacketSUDep)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
+ (PacketSU->Succs[i].getKind() == SDep::Anti) &&
+ (PacketSU->Succs[i].getReg() == DepReg)) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements
+bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
+ MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Currently can only reason about conditional transfers
+ if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) {
+ return false;
+ }
+
+ // Scheduling unit for candidate
+ SUnit* SU = MIToSUnit[MI1];
+
+ // One corner case deals with the following scenario:
+ // Trying to add
+ // a) %R24<def> = TFR_cPt %P0, %R25
+ // to this packet:
+ //
+ // {
+ // b) %R25<def> = TFR_cNotPt %P0, %R24
+ // c) %P0<def> = CMPEQri %R26, 1
+ // }
+ //
+ // On general check a) and b) are complements, but
+ // presence of c) will convert a) to .new form, and
+ // then it is not a complement
+ // We attempt to detect it by analyzing existing
+ // dependencies in the packet
+
+ // Analyze relationships between all existing members of the packet.
+ // Look for Anti dependecy on the same predicate reg
+ // as used in the candidate
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // If this instruction in the packet is succeeded by the candidate...
+ if (PacketSU->isSucc(SU)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ // The corner case exist when there is true data
+ // dependency between candidate and one of current
+ // packet members, this dep is on predicate reg, and
+ // there already exist anti dep on the same pred in
+ // the packet.
+ if (PacketSU->Succs[i].getSUnit() == SU &&
+ Hexagon::PredRegsRegClass.contains(
+ PacketSU->Succs[i].getReg()) &&
+ PacketSU->Succs[i].getKind() == SDep::Data &&
+ // Here I know that *VIN is predicate setting instruction
+ // with true data dep to candidate on the register
+ // we care about - c) in the above example.
+ // Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the
+ // same packet on the pred reg of interest
+ RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
+ MIToSUnit)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ // If the above case does not apply, check regular
+ // complement condition.
+ // Check that the predicate register is the same and
+ // that the predicate sense is different
+ // We also need to differentiate .old vs. .new:
+ // !p0 is not complimentary to p0.new
+ return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
+ (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
+ (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
+}
+
+// initPacketizerState - Initialize packetizer flags
+void HexagonPacketizerList::initPacketizerState() {
+
+ Dependence = false;
+ PromotedToDotNew = false;
+ GlueToNewValueJump = false;
+ GlueAllocframeStore = false;
+ FoundSequentialDependence = false;
+
+ return;
+}
+
+// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (MI->isDebugValue())
+ return true;
+
+ // We must print out inline assembly
+ if (MI->isInlineAsm())
+ return false;
+
+ // We check if MI has any functional units mapped to it.
+ // If it doesn't, we ignore the instruction.
+ const MCInstrDesc& TID = MI->getDesc();
+ unsigned SchedClass = TID.getSchedClass();
+ const InstrStage* IS =
+ ResourceTracker->getInstrItins()->beginStage(SchedClass);
+ unsigned FuncUnits = IS->getUnits();
+ return !FuncUnits;
+}
+
+// isSoloInstruction: - Returns true for instructions that must be
+// scheduled in their own packet.
+bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+
+ if (MI->isInlineAsm())
+ return true;
+
+ if (MI->isEHLabel())
+ return true;
+
+ // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+ // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+ // They must not be grouped with other instructions in a packet.
+ if (IsSchedBarrier(MI))
+ return true;
+
+ return false;
+}
+
+// isLegalToPacketizeTogether:
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ const MCInstrDesc &MCIDI = I->getDesc();
+ const MCInstrDesc &MCIDJ = J->getDesc();
+
+ MachineBasicBlock::iterator II = I;
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ // Inline asm cannot go in the packet.
+ if (I->getOpcode() == Hexagon::INLINEASM)
+ llvm_unreachable("Should not meet inline asm here!");
+
+ if (isSoloInstruction(I))
+ llvm_unreachable("Should not meet solo instr here!");
+
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((QII->isSaveCalleeSavedRegsCall(I) &&
+ DoesModifyCalleeSavedReg(J, QRI)) ||
+ (QII->isSaveCalleeSavedRegsCall(J) &&
+ DoesModifyCalleeSavedReg(I, QRI))) {
+ Dependence = true;
+ return false;
+ }
+
+ // Two control flow instructions cannot go in the same packet.
+ if (IsControlFlow(I) && IsControlFlow(J)) {
+ Dependence = true;
+ return false;
+ }
+
+ // A LoopN instruction cannot appear in the same packet as a jump or call.
+ if (IsLoopN(I) && ( IsDirectJump(J)
+ || MCIDJ.isCall()
+ || QII->isDeallocRet(J))) {
+ Dependence = true;
+ return false;
+ }
+ if (IsLoopN(J) && ( IsDirectJump(I)
+ || MCIDI.isCall()
+ || QII->isDeallocRet(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ // dealloc_return cannot appear in the same packet as a conditional or
+ // unconditional jump.
+ if (QII->isDeallocRet(I) && ( MCIDJ.isBranch()
+ || MCIDJ.isCall()
+ || MCIDJ.isBarrier())) {
+ Dependence = true;
+ return false;
+ }
+
+
+ // V4 allows dual store. But does not allow second store, if the
+ // first store is not in SLOT0. New value store, new value jump,
+ // dealloc_return and memop always take SLOT0.
+ // Arch spec 3.4.4.2
+ if (QRI->Subtarget.hasV4TOps()) {
+ if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+ (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ if ((QII->isMemOp(J) && MCIDI.mayStore())
+ || (MCIDJ.mayStore() && QII->isMemOp(I))
+ || (QII->isMemOp(J) && QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ //if dealloc_return
+ if (MCIDJ.mayStore() && QII->isDeallocRet(I)){
+ Dependence = true;
+ return false;
+ }
+
+ // If an instruction feeds new value jump, glue it.
+ MachineBasicBlock::iterator NextMII = I;
+ ++NextMII;
+ MachineInstr *NextMI = NextMII;
+
+ if (QII->isNewValueJump(NextMI)) {
+
+ bool secondRegMatch = false;
+ bool maintainNewValueJump = false;
+
+ if (NextMI->getOperand(1).isReg() &&
+ I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ secondRegMatch = true;
+ maintainNewValueJump = true;
+ }
+
+ if (!secondRegMatch &&
+ I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+ maintainNewValueJump = true;
+ }
+
+ for (std::vector<MachineInstr*>::iterator
+ VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE && maintainNewValueJump); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+ if (PacketSU->getInstr()->getDesc().isCall()) {
+ Dependence = true;
+ break;
+ }
+ // Validate
+ // 1. Packet does not have a store in it.
+ // 2. If the first operand of the nvj is newified, and the second
+ // operand is also a reg, it (second reg) is not defined in
+ // the same packet.
+ // 3. If the second operand of the nvj is newified, (which means
+ // first operand is also a reg), first reg is not defined in
+ // the same packet.
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ // Check #2.
+ (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(1).getReg(), QRI)) ||
+ // Check #3.
+ (secondRegMatch &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(0).getReg(), QRI))) {
+ Dependence = true;
+ break;
+ }
+ }
+ if (!Dependence)
+ GlueToNewValueJump = true;
+ else
+ return false;
+ }
+ }
+
+ if (SUJ->isSucc(SUI)) {
+ for (unsigned i = 0;
+ (i < SUJ->Succs.size()) && !FoundSequentialDependence;
+ ++i) {
+
+ if (SUJ->Succs[i].getSUnit() != SUI) {
+ continue;
+ }
+
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+
+ // For direct calls:
+ // Ignore register dependences for call instructions for
+ // packetization purposes except for those due to r31 and
+ // predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly IsCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (IsCallDependent) function. Bug 6216 is opened for this.
+ //
+ unsigned DepReg = 0;
+ const TargetRegisterClass* RC = NULL;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = QRI->getMinimalPhysRegClass(DepReg);
+ }
+ if ((MCIDI.isCall() || MCIDI.isReturn()) &&
+ (!IsRegDependence(DepType) ||
+ !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
+ /* do nothing */
+ }
+
+ // For instructions that can be promoted to dot-new, try to promote.
+ else if ((DepType == SDep::Data) &&
+ CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
+ PromoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ /* do nothing */
+ }
+
+ else if ((DepType == SDep::Data) &&
+ (QII->isNewValueJump(I))) {
+ /* do nothing */
+ }
+
+ // For predicated instructions, if the predicates are complements
+ // then there can be no dependence.
+ else if (QII->isPredicated(I) &&
+ QII->isPredicated(J) &&
+ ArePredicatesComplements(I, J, MIToSUnit)) {
+ /* do nothing */
+
+ }
+ else if (IsDirectJump(I) &&
+ !MCIDJ.isBranch() &&
+ !MCIDJ.isCall() &&
+ (DepType == SDep::Order)) {
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions
+ /* do nothing */
+ }
+ else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
+ (DepType != SDep::Output)) {
+ // Ignore all dependences for jumps except for true and output
+ // dependences
+ /* do nothing */
+ }
+
+ // Ignore output dependences due to superregs. We can
+ // write to two different subregisters of R1:0 for instance
+ // in the same cycle
+ //
+
+ //
+ // Let the
+ // If neither I nor J defines DepReg, then this is a
+ // superfluous output dependence. The dependence must be of the
+ // form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions
+ // with
+ // DepReg = D0
+ // We want to ignore these dependences.
+ // Ideally, the dependence constructor should annotate such
+ // dependences. We can then avoid this relatively expensive check.
+ //
+ else if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (I->definesRegister(DepReg) ||
+ J->definesRegister(DepReg)) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // We ignore Order dependences for
+ // 1. Two loads unless they are volatile.
+ // 2. Two stores in V4 unless they are volatile.
+ else if ((DepType == SDep::Order) &&
+ !I->hasOrderedMemoryRef() &&
+ !J->hasOrderedMemoryRef()) {
+ if (QRI->Subtarget.hasV4TOps() &&
+ // hexagonv4 allows dual store.
+ MCIDI.mayStore() && MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ // store followed by store-- not OK on V2
+ // store followed by load -- not OK on all (OK if addresses
+ // are not aliased)
+ // load followed by store -- OK on all
+ // load followed by load -- OK on all
+ else if ( !MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ else {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCAFRAME and subsequent store, allow it to be
+ // packetized in a same packet. This implies that the store is using
+ // caller's SP. Hense, offset needs to be updated accordingly.
+ else if (DepType == SDep::Data
+ && QRI->Subtarget.hasV4TOps()
+ && J->getOpcode() == Hexagon::ALLOCFRAME
+ && (I->getOpcode() == Hexagon::STrid
+ || I->getOpcode() == Hexagon::STriw
+ || I->getOpcode() == Hexagon::STrib)
+ && I->getOperand(0).getReg() == QRI->getStackRegister()
+ && QII->isValidOffset(I->getOpcode(),
+ I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE)))
+ {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e
+ // caller's SP. Therefore, we need to recalculate offset according
+ // to this change.
+ I->getOperand(1).setImm(I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE));
+ }
+
+ //
+ // Skip over anti-dependences. Two instructions that are
+ // anti-dependent can share a packet
+ //
+ else if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// isLegalToPruneDependencies
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ if (Dependence) {
+
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew) {
+ DemoteToDotOld(I);
+ }
+
+ // Check if the instruction (must be a store) was glued with an Allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // curent SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ I->getOperand(1).setImm(I->getOperand(1).getImm() +
+ FrameSize + HEXAGON_LRFP_SIZE);
+ }
+
+ return false;
+ }
+ return true;
+}
+
+MachineBasicBlock::iterator
+HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (GlueToNewValueJump) {
+
+ ++MII;
+ MachineInstr *nvjMI = MII;
+ assert(ResourceTracker->canReserveResources(MI));
+ ResourceTracker->reserveResources(MI);
+ if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
+ !tryAllocateResourcesForConstExt(MI)) {
+ endPacket(MBB, MI);
+ ResourceTracker->reserveResources(MI);
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(MI);
+ // Reserve resources for new value jump constant extender.
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(nvjMI);
+ assert(ResourceTracker->canReserveResources(nvjMI) &&
+ "Ensure that there is a slot");
+
+ } else if ( // Extended instruction takes two slots in the packet.
+ // Try reserve and allocate 4-byte in the current packet first.
+ (QII->isExtended(nvjMI)
+ && (!tryAllocateResourcesForConstExt(nvjMI)
+ || !ResourceTracker->canReserveResources(nvjMI)))
+ || // For non-extended instruction, no need to allocate extra 4 bytes.
+ (!QII->isExtended(nvjMI) &&
+ !ResourceTracker->canReserveResources(nvjMI)))
+ {
+ endPacket(MBB, MI);
+ // A new and empty packet starts.
+ // We are sure that the resources requirements can be satisfied.
+ // Therefore, do not need to call "canReserveResources" anymore.
+ ResourceTracker->reserveResources(MI);
+ if (QII->isExtended(nvjMI))
+ reserveResourcesForConstExt(nvjMI);
+ }
+ // Here, we are sure that "reserveResources" would succeed.
+ ResourceTracker->reserveResources(nvjMI);
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(nvjMI);
+ } else {
+ if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
+ && ( !tryAllocateResourcesForConstExt(MI)
+ || !ResourceTracker->canReserveResources(MI)))
+ {
+ endPacket(MBB, MI);
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old
+ if (PromotedToDotNew) {
+ DemoteToDotOld(MI);
+ }
+ reserveResourcesForConstExt(MI);
+ }
+ // In case that "MI" is not an extended insn,
+ // the resource availability has already been checked.
+ ResourceTracker->reserveResources(MI);
+ CurrentPacketMIs.push_back(MI);
+ }
+ return MII;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+ return new HexagonPacketizer();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 000000000000..c607b5d35649
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+ unsigned ByValSize = 0;
+ if (ArgFlags.isByVal() &&
+ ((ByValSize = ArgFlags.getByValSize()) >
+ (MVT(MVT::i64).getSizeInBits() / 8))) {
+ ForceMem = true;
+ }
+
+
+ // Only assign registers for named (non varargs) arguments
+ if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+ NonVarArgsParams))) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::i16 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+
+ // If it's passed by value, then we need the size of the aggregate not of
+ // the pointer.
+ if (ArgFlags.isByVal()) {
+ Size = ByValSize;
+
+ // Hexagon_TODO: Get the alignment of the contained type here.
+ Alignment = 8;
+ }
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
new file mode 100644
index 000000000000..36da6dfcc3d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -0,0 +1,210 @@
+//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "HexagonAsmPrinter.h"
+#include "Hexagon.h"
+#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "HexagonGenAsmWriter.inc"
+
+const char HexagonInstPrinter::PacketPadding = '\t';
+
+StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return MII.getName(Opcode);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
+}
+
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInst((const HexagonMCInst*)(MI), O, Annot);
+}
+
+void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ const char startPacket = '{',
+ endPacket = '}';
+ // TODO: add outer HW loop when it's supported too.
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ // Ending a harware loop is different from ending an regular packet.
+ assert(MI->isPacketEnd() && "Loop-end must also end the packet");
+
+ if (MI->isPacketStart()) {
+ // There must be a packet to end a loop.
+ // FIXME: when shuffling is always run, this shouldn't be needed.
+ HexagonMCInst Nop;
+ StringRef NoAnnot;
+
+ Nop.setOpcode (Hexagon::NOP);
+ Nop.setPacketStart (MI->isPacketStart());
+ printInst (&Nop, O, NoAnnot);
+ }
+
+ // Close the packet.
+ if (MI->isPacketEnd())
+ O << PacketPadding << endPacket;
+
+ printInstruction(MI, O);
+ }
+ else {
+ // Prefix the insn opening the packet.
+ if (MI->isPacketStart())
+ O << PacketPadding << startPacket << '\n';
+
+ printInstruction(MI, O);
+
+ // Suffix the insn closing the packet.
+ if (MI->isPacketEnd())
+ // Suffix the packet in a new line always, since the GNU assembler has
+ // issues with a closing brace on the same line as CONST{32,64}.
+ O << '\n' << PacketPadding << endPacket;
+ }
+
+ printAnnotation(O, Annot);
+}
+
+void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if(MO.isExpr()) {
+ O << *MO.getExpr();
+ } else if(MO.isImm()) {
+ printImmOperand(MI, OpNo, O);
+ } else {
+ llvm_unreachable("Unknown operand");
+ }
+}
+
+void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ if(MO.isExpr()) {
+ O << *MO.getExpr();
+ } else if(MO.isImm()) {
+ O << MI->getOperand(OpNo).getImm();
+ } else {
+ llvm_unreachable("Unknown operand");
+ }
+}
+
+void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
+ if (HMCI->isConstExtended())
+ O << "#";
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
+ unsigned OpNo, raw_ostream &O) const {
+ O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -1;
+}
+
+void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO0 = MI->getOperand(OpNo);
+ const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+ O << getRegisterName(MO0.getReg());
+ O << " + #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO0 = MI->getOperand(OpNo);
+ const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+ O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ assert("Unknown branch operand.");
+}
+
+void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, bool hi) const {
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ O << '#' << (hi? "HI": "LO") << '(';
+ if (MO.isImm()) {
+ O << '#';
+ printOperand(MI, OpNo, O);
+ } else {
+ assert("Unknown symbol operand");
+ printOperand(MI, OpNo, O);
+ }
+ O << ')';
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
new file mode 100644
index 000000000000..d0cef683da95
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -0,0 +1,87 @@
+//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONINSTPRINTER_H
+#define HEXAGONINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+namespace llvm {
+ class HexagonMCInst;
+
+ class HexagonInstPrinter : public MCInstPrinter {
+ public:
+ explicit HexagonInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ StringRef getRegName(unsigned RegNo) const;
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printConstantPool(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
+
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+ { printSymbol(MI, OpNo, O, true); }
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+ { printSymbol(MI, OpNo, O, false); }
+
+ const MCInstrInfo &getMII() const {
+ return MII;
+ }
+
+ protected:
+ void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
+ const;
+
+ static const char PacketPadding;
+
+ private:
+ const MCInstrInfo &MII;
+
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
new file mode 100644
index 000000000000..d4a93b5c87a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -0,0 +1,183 @@
+//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Hexagon target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBASEINFO_H
+#define HEXAGONBASEINFO_H
+
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// HexagonII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace HexagonII {
+ // *** The code below must match HexagonInstrFormat*.td *** //
+
+ // Insn types.
+ // *** Must match HexagonInstrFormat*.td ***
+ enum Type {
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypePREFIX = 30, // Such as extenders.
+ TypeENDLOOP = 31 // Such as end of a HW loop.
+ };
+
+ enum SubTarget {
+ HasV2SubT = 0xf,
+ HasV2SubTOnly = 0x1,
+ NoV2SubT = 0x0,
+ HasV3SubT = 0xe,
+ HasV3SubTOnly = 0x2,
+ NoV3SubT = 0x1,
+ HasV4SubT = 0xc,
+ NoV4SubT = 0x3,
+ HasV5SubT = 0x8,
+ NoV5SubT = 0x7
+ };
+
+ enum AddrMode {
+ NoAddrMode = 0, // No addressing mode
+ Absolute = 1, // Absolute addressing mode
+ AbsoluteSet = 2, // Absolute set addressing mode
+ BaseImmOffset = 3, // Indirect with offset
+ BaseLongOffset = 4, // Indirect with long offset
+ BaseRegOffset = 5 // Indirect with register offset
+ };
+
+ enum MemAccessSize {
+ NoMemAccess = 0, // Not a memory acces instruction.
+ ByteAccess = 1, // Byte access instruction (memb).
+ HalfWordAccess = 2, // Half word access instruction (memh).
+ WordAccess = 3, // Word access instrution (memw).
+ DoubleWordAccess = 4 // Double word access instruction (memd)
+ };
+
+ // MCInstrDesc TSFlags
+ // *** Must match HexagonInstrFormat*.td ***
+ enum {
+ // This 5-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x1f,
+
+ // Solo instructions.
+ SoloPos = 5,
+ SoloMask = 0x1,
+
+ // Predicated instructions.
+ PredicatedPos = 6,
+ PredicatedMask = 0x1,
+ PredicatedFalsePos = 7,
+ PredicatedFalseMask = 0x1,
+ PredicatedNewPos = 8,
+ PredicatedNewMask = 0x1,
+
+ // New-Value consumer instructions.
+ NewValuePos = 9,
+ NewValueMask = 0x1,
+
+ // New-Value producer instructions.
+ hasNewValuePos = 10,
+ hasNewValueMask = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos = 11,
+ NewValueOpMask = 0x7,
+
+ // Which bits encode the new value.
+ NewValueBitsPos = 14,
+ NewValueBitsMask = 0x3,
+
+ // Stores that can become new-value stores.
+ mayNVStorePos = 16,
+ mayNVStoreMask = 0x1,
+
+ // New-value store instructions.
+ NVStorePos = 17,
+ NVStoreMask = 0x1,
+
+ // Extendable insns.
+ ExtendablePos = 18,
+ ExtendableMask = 0x1,
+
+ // Insns must be extended.
+ ExtendedPos = 19,
+ ExtendedMask = 0x1,
+
+ // Which operand may be extended.
+ ExtendableOpPos = 20,
+ ExtendableOpMask = 0x7,
+
+ // Signed or unsigned range.
+ ExtentSignedPos = 23,
+ ExtentSignedMask = 0x1,
+
+ // Number of bits of range before extending operand.
+ ExtentBitsPos = 24,
+ ExtentBitsMask = 0x1f,
+
+ // Valid subtargets
+ validSubTargetPos = 29,
+ validSubTargetMask = 0xf,
+
+ // Addressing mode for load/store instructions.
+ AddrModePos = 33,
+ AddrModeMask = 0x7,
+
+ // Access size of memory access instructions (load/store).
+ MemAccessSizePos = 36,
+ MemAccesSizeMask = 0x7
+ };
+
+ // *** The code above must match HexagonInstrFormat*.td *** //
+
+ // Hexagon specific MO operand flag mask.
+ enum HexagonMOTargetFlagVal {
+ //===------------------------------------------------------------------===//
+ // Hexagon Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ HMOTF_ConstExtended = 1,
+
+ /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+ /// Used for computing a global address for PIC compilations
+ MO_PCREL,
+
+ /// MO_GOT - Indicates a GOT-relative relocation
+ MO_GOT,
+
+ // Low or high part of a symbol.
+ MO_LO16, MO_HI16,
+
+ // Offset from the base of the SDA.
+ MO_GPREL
+ };
+
+} // End namespace HexagonII.
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 000000000000..3deb8d1deb42
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,37 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "//";
+ HasLEB128 = true;
+
+ PrivateGlobalPrefix = ".L";
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ ZeroDirective = "\t.space\t";
+ AscizDirective = "\t.string\t";
+ WeakRefDirective = "\t.weak\t";
+
+ SupportsDebugInformation = true;
+ UsesELFSectionDirectiveForBSS = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 000000000000..d336cd5be917
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class HexagonMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
new file mode 100644
index 000000000000..9260b4a27661
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -0,0 +1,175 @@
+//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some Hexagon VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
+using namespace llvm;
+
+// Return the slots used by the insn.
+unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
+ const HexagonInstrInfo* QII = TM->getInstrInfo();
+ const InstrItineraryData* II = TM->getInstrItineraryData();
+ const InstrStage*
+ IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
+
+ return (IS->getUnits());
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInst::getType() const {
+ const uint64_t F = MCID->TSFlags;
+
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInst::isCanon() const {
+ return (!MCID->isPseudo() &&
+ !isPrefix() &&
+ getType() != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInst::isPrefix() const {
+ return (getType() == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInst::isSolo() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInst::isNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInst::hasNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return the operand that consumes or produces a new value.
+const MCOperand& HexagonMCInst::getNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ const unsigned O = (F >> HexagonII::NewValueOpPos) &
+ HexagonII::NewValueOpMask;
+ const MCOperand& MCO = getOperand(O);
+
+ assert ((isNewValue() || hasNewValue()) && MCO.isReg());
+ return (MCO);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+// out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInst::isConstExtended(void) const {
+ if (isExtended())
+ return true;
+
+ if (!isExtendable())
+ return false;
+
+ short ExtOpNum = getCExtOpNum();
+ int MinValue = getMinValue();
+ int MaxValue = getMaxValue();
+ const MCOperand& MO = getOperand(ExtOpNum);
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isExpr())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int ImmValue = MO.getImm();
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInst::isExtended(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInst::isExtendable(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return number of bits in the constant extended operand.
+unsigned HexagonMCInst::getBitCount(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInst::getCExtOpNum(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMinValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMaxValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
new file mode 100644
index 000000000000..3ca71f00b241
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -0,0 +1,100 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "HexagonTargetMachine.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class HexagonMCInst: public MCInst {
+ // MCID is set during instruction lowering.
+ // It is needed in order to access TSFlags for
+ // use in checking MC instruction properties.
+ const MCInstrDesc *MCID;
+
+ // Packet start and end markers
+ unsigned packetStart: 1, packetEnd: 1;
+
+ public:
+ explicit HexagonMCInst():
+ MCInst(), MCID(0), packetStart(0), packetEnd(0) {};
+ HexagonMCInst(const MCInstrDesc& mcid):
+ MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
+
+ bool isPacketStart() const { return (packetStart); };
+ bool isPacketEnd() const { return (packetEnd); };
+ void setPacketStart(bool Y) { packetStart = Y; };
+ void setPacketEnd(bool Y) { packetEnd = Y; };
+ void resetPacket() { setPacketStart(false); setPacketEnd(false); };
+
+ // Return the slots used by the insn.
+ unsigned getUnits(const HexagonTargetMachine* TM) const;
+
+ // Return the Hexagon ISA class for the insn.
+ unsigned getType() const;
+
+ void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; };
+ const MCInstrDesc& getDesc(void) const { return *MCID; };
+
+ // Return whether the insn is an actual insn.
+ bool isCanon() const;
+
+ // Return whether the insn is a prefix.
+ bool isPrefix() const;
+
+ // Return whether the insn is solo, i.e., cannot be in a packet.
+ bool isSolo() const;
+
+ // Return whether the instruction needs to be constant extended.
+ bool isConstExtended() const;
+
+ // Return constant extended operand number.
+ unsigned short getCExtOpNum(void) const;
+
+ // Return whether the insn is a new-value consumer.
+ bool isNewValue() const;
+
+ // Return whether the instruction is a legal new-value producer.
+ bool hasNewValue() const;
+
+ // Return the operand that consumes or produces a new value.
+ const MCOperand& getNewValue() const;
+
+ // Return number of bits in the constant extended operand.
+ unsigned getBitCount(void) const;
+
+ private:
+ // Return whether the instruction must be always extended.
+ bool isExtended() const;
+
+ // Return true if the insn may be extended based on the operand value.
+ bool isExtendable() const;
+
+ // Return true if the operand can be constant extended.
+ bool isOperandExtended(const unsigned short OperandNum) const;
+
+ // Return the min value that a constant extendable operand can have
+ // without being extended.
+ int getMinValue() const;
+
+ // Return the max value that a constant extendable operand can have
+ // without being extended.
+ int getMaxValue() const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 000000000000..6b1d2d161958
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,97 @@
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "InstPrinter/HexagonInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+ // VirtualFP = (R30 + #0).
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Hexagon::R30, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+ createHexagonMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+ createHexagonMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+ createHexagonMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 000000000000..2238b1ae5f35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheHexagonTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 000000000000..40f6c8d23ea8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon");
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 000000000000..dda6e247ac4f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,572 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public MCTargetAsmParser {
+ MCAsmParser &Parser;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ MBlazeOperand *ParseRegister();
+ MBlazeOperand *ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc);
+ MBlazeOperand *ParseImmediate();
+ MBlazeOperand *ParseFsl();
+ MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), Parser(_Parser) {}
+
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Register,
+ Memory,
+ Fsl
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
+ union {
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
+ };
+
+ MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case Fsl:
+ FslImm = o.FslImm;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getFslImm() const {
+ assert(Kind == Fsl && "Invalid access!");
+ return FslImm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr* getMemOff() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Off;
+ }
+
+ unsigned getMemOffReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.OffReg;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { return Kind == Memory; }
+ bool isFsl() const { return Kind == Fsl; }
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFslOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getFslImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ Inst.addOperand(MCOperand::CreateReg(RegOff));
+ else
+ addExpr(Inst, getMemOff());
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+ MBlazeOperand *Op = new MBlazeOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Fsl);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->Mem.OffReg = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.OffReg = Off;
+ Op->Mem.Off = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << getMBlazeRegisterNumbering(getReg()) << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Memory: {
+ OS << "<memory R";
+ OS << getMBlazeRegisterNumbering(getMemBase());
+ OS << ", ";
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ OS << "R" << getMBlazeRegisterNumbering(RegOff);
+ else
+ OS << getMemOff();
+ OS << ">";
+ }
+ break;
+ case Fsl:
+ getFslImm()->print(OS);
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm)) {
+ default: break;
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Operands.size() != 4)
+ return 0;
+
+ MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+ MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+ SMLoc S = Base.getStartLoc();
+ SMLoc O = Offset.getStartLoc();
+ SMLoc E = Offset.getEndLoc();
+
+ if (!Base.isReg()) {
+ Error(S, "base address must be a register");
+ return 0;
+ }
+
+ if (!Offset.isReg() && !Offset.isImm()) {
+ Error(O, "offset must be a register or immediate");
+ return 0;
+ }
+
+ MBlazeOperand *Op;
+ if (Offset.isReg())
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+ else
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+ delete Operands.pop_back_val();
+ delete Operands.pop_back_val();
+ Operands.push_back(Op);
+
+ return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ MBlazeOperand *Reg = ParseRegister(StartLoc, EndLoc);
+ if (!Reg)
+ return true;
+ RegNo = Reg->getReg();
+ return false;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister() {
+ SMLoc S, E;
+ return ParseRegister(S, E);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return 0;
+
+ unsigned RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, StartLoc, EndLoc);
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+ if (!String.startswith("rfsl"))
+ return -1;
+
+ unsigned regNum;
+ if (String.substr(4).getAsInteger(10,regNum))
+ return -1;
+
+ return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+ if (reg >= 16)
+ return 0;
+
+ getLexer().Lex();
+ const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+ return MBlazeOperand::CreateFslImm(EVal,S,E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
+
+ const MCExpr *EVal;
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ if (getParser().parseExpression(EVal))
+ return 0;
+
+ return MBlazeOperand::CreateImm(EVal, S, E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MBlazeOperand *Op;
+
+ // Attempt to parse the next token as a register name
+ Op = ParseRegister();
+
+ // Attempt to parse the next token as an FSL immediate
+ if (!Op)
+ Op = ParseFsl();
+
+ // Attempt to parse the next token as an immediate
+ if (!Op)
+ Op = ParseImmediate();
+
+ // If the token could not be parsed then fail
+ if (!Op) {
+ Error(Parser.getTok().getLoc(), "unknown operand");
+ return 0;
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(Op);
+ return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The first operands is the token for the instruction name
+ size_t dotLoc = Name.find('.');
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+ if (dotLoc < Name.size())
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (!ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (!ParseOperand(Operands))
+ return true;
+ }
+
+ // If the instruction requires a memory operand then we need to
+ // replace the last two operands (base+offset) with a single
+ // memory operand.
+ if (Name.startswith("lw") || Name.startswith("sw") ||
+ Name.startswith("lh") || Name.startswith("sh") ||
+ Name.startswith("lb") || Name.startswith("sb"))
+ return (ParseMemory(Operands) == NULL);
+
+ return false;
+}
+
+/// ParseDirective parses the MBlaze specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+ RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 000000000000..c03ab3803b60
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,719 @@
+//===-- MBlazeDisassembler.cpp - Disassembler for MicroBlaze -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeDisassembler.h"
+#include "MBlaze.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+
+namespace llvm {
+extern const MCInstrDesc MBlazeInsts[];
+}
+
+using namespace llvm;
+
+const uint16_t UNSUPPORTED = -1;
+
+static const uint16_t mblazeBinary2Opcode[] = {
+ MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
+ MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
+ MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
+ MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+ MBlaze::MUL, MBlaze::BSRL, MBlaze::IDIV, MBlaze::GETD, //10,11,12,13
+ UNSUPPORTED, UNSUPPORTED, MBlaze::FADD, UNSUPPORTED, //14,15,16,17
+ MBlaze::MULI, MBlaze::BSRLI, UNSUPPORTED, MBlaze::GET, //18,19,1A,1B
+ UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, //1C,1D,1E,1F
+
+ MBlaze::OR, MBlaze::AND, MBlaze::XOR, MBlaze::ANDN, //20,21,22,23
+ MBlaze::SEXT8, MBlaze::MFS, MBlaze::BR, MBlaze::BEQ, //24,25,26,27
+ MBlaze::ORI, MBlaze::ANDI, MBlaze::XORI, MBlaze::ANDNI, //28,29,2A,2B
+ MBlaze::IMM, MBlaze::RTSD, MBlaze::BRI, MBlaze::BEQI, //2C,2D,2E,2F
+
+ MBlaze::LBU, MBlaze::LHU, MBlaze::LW, UNSUPPORTED, //30,31,32,33
+ MBlaze::SB, MBlaze::SH, MBlaze::SW, UNSUPPORTED, //34,35,36,37
+ MBlaze::LBUI, MBlaze::LHUI, MBlaze::LWI, UNSUPPORTED, //38,39,3A,3B
+ MBlaze::SBI, MBlaze::SHI, MBlaze::SWI, UNSUPPORTED, //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+ if (!isMBlazeRegister((insn>>21)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+ if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+ if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F))
+ return UNSUPPORTED;
+ return getMBlazeRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+ if (!isSpecialMBlazeRegister(insn&0x3FFF))
+ return UNSUPPORTED;
+ return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+ int16_t val = (insn & 0xFFFF);
+ return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+ int16_t val = (insn & 0x1F);
+ return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+ return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+ int16_t val = (insn & 0xF);
+ return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0: return MBlaze::MUL;
+ case 1: return MBlaze::MULH;
+ case 2: return MBlaze::MULHSU;
+ case 3: return MBlaze::MULHU;
+ }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+ switch (insn&0x7FF) {
+ default: return UNSUPPORTED;
+ case 0x60: return MBlaze::SEXT8;
+ case 0x68: return MBlaze::WIC;
+ case 0x64: return MBlaze::WDC;
+ case 0x66: return MBlaze::WDCC;
+ case 0x74: return MBlaze::WDCF;
+ case 0x61: return MBlaze::SEXT16;
+ case 0x41: return MBlaze::SRL;
+ case 0x21: return MBlaze::SRC;
+ case 0x01: return MBlaze::SRA;
+ case 0xE0: return MBlaze::CLZ;
+ }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQ;
+ case 0x10: return MBlaze::BEQD;
+ case 0x05: return MBlaze::BGE;
+ case 0x15: return MBlaze::BGED;
+ case 0x04: return MBlaze::BGT;
+ case 0x14: return MBlaze::BGTD;
+ case 0x03: return MBlaze::BLE;
+ case 0x13: return MBlaze::BLED;
+ case 0x02: return MBlaze::BLT;
+ case 0x12: return MBlaze::BLTD;
+ case 0x01: return MBlaze::BNE;
+ case 0x11: return MBlaze::BNED;
+ }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQI;
+ case 0x10: return MBlaze::BEQID;
+ case 0x05: return MBlaze::BGEI;
+ case 0x15: return MBlaze::BGEID;
+ case 0x04: return MBlaze::BGTI;
+ case 0x14: return MBlaze::BGTID;
+ case 0x03: return MBlaze::BLEI;
+ case 0x13: return MBlaze::BLEID;
+ case 0x02: return MBlaze::BLTI;
+ case 0x12: return MBlaze::BLTID;
+ case 0x01: return MBlaze::BNEI;
+ case 0x11: return MBlaze::BNEID;
+ }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BR;
+ case 0x08: return MBlaze::BRA;
+ case 0x0C: return MBlaze::BRK;
+ case 0x10: return MBlaze::BRD;
+ case 0x14: return MBlaze::BRLD;
+ case 0x18: return MBlaze::BRAD;
+ case 0x1C: return MBlaze::BRALD;
+ }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+ switch (insn&0x3FFFFFF) {
+ default: break;
+ case 0x0020004: return MBlaze::IDMEMBAR;
+ case 0x0220004: return MBlaze::DMEMBAR;
+ case 0x0420004: return MBlaze::IMEMBAR;
+ }
+
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BRI;
+ case 0x08: return MBlaze::BRAI;
+ case 0x0C: return MBlaze::BRKI;
+ case 0x10: return MBlaze::BRID;
+ case 0x14: return MBlaze::BRLID;
+ case 0x18: return MBlaze::BRAID;
+ case 0x1C: return MBlaze::BRALID;
+ }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLL;
+ case 0x1: return MBlaze::BSRA;
+ case 0x0: return MBlaze::BSRL;
+ }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLLI;
+ case 0x1: return MBlaze::BSRAI;
+ case 0x0: return MBlaze::BSRLI;
+ }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::RSUBK;
+ case 0x1: return MBlaze::CMP;
+ case 0x3: return MBlaze::CMPU;
+ }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::FADD;
+ case 0x080: return MBlaze::FRSUB;
+ case 0x100: return MBlaze::FMUL;
+ case 0x180: return MBlaze::FDIV;
+ case 0x200: return MBlaze::FCMP_UN;
+ case 0x210: return MBlaze::FCMP_LT;
+ case 0x220: return MBlaze::FCMP_EQ;
+ case 0x230: return MBlaze::FCMP_LE;
+ case 0x240: return MBlaze::FCMP_GT;
+ case 0x250: return MBlaze::FCMP_NE;
+ case 0x260: return MBlaze::FCMP_GE;
+ case 0x280: return MBlaze::FLT;
+ case 0x300: return MBlaze::FINT;
+ case 0x380: return MBlaze::FSQRT;
+ }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+ switch ((insn>>10)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GET;
+ case 0x01: return MBlaze::EGET;
+ case 0x02: return MBlaze::AGET;
+ case 0x03: return MBlaze::EAGET;
+ case 0x04: return MBlaze::TGET;
+ case 0x05: return MBlaze::TEGET;
+ case 0x06: return MBlaze::TAGET;
+ case 0x07: return MBlaze::TEAGET;
+ case 0x08: return MBlaze::CGET;
+ case 0x09: return MBlaze::ECGET;
+ case 0x0A: return MBlaze::CAGET;
+ case 0x0B: return MBlaze::ECAGET;
+ case 0x0C: return MBlaze::TCGET;
+ case 0x0D: return MBlaze::TECGET;
+ case 0x0E: return MBlaze::TCAGET;
+ case 0x0F: return MBlaze::TECAGET;
+ case 0x10: return MBlaze::NGET;
+ case 0x11: return MBlaze::NEGET;
+ case 0x12: return MBlaze::NAGET;
+ case 0x13: return MBlaze::NEAGET;
+ case 0x14: return MBlaze::TNGET;
+ case 0x15: return MBlaze::TNEGET;
+ case 0x16: return MBlaze::TNAGET;
+ case 0x17: return MBlaze::TNEAGET;
+ case 0x18: return MBlaze::NCGET;
+ case 0x19: return MBlaze::NECGET;
+ case 0x1A: return MBlaze::NCAGET;
+ case 0x1B: return MBlaze::NECAGET;
+ case 0x1C: return MBlaze::TNCGET;
+ case 0x1D: return MBlaze::TNECGET;
+ case 0x1E: return MBlaze::TNCAGET;
+ case 0x1F: return MBlaze::TNECAGET;
+ case 0x20: return MBlaze::PUT;
+ case 0x22: return MBlaze::APUT;
+ case 0x24: return MBlaze::TPUT;
+ case 0x26: return MBlaze::TAPUT;
+ case 0x28: return MBlaze::CPUT;
+ case 0x2A: return MBlaze::CAPUT;
+ case 0x2C: return MBlaze::TCPUT;
+ case 0x2E: return MBlaze::TCAPUT;
+ case 0x30: return MBlaze::NPUT;
+ case 0x32: return MBlaze::NAPUT;
+ case 0x34: return MBlaze::TNPUT;
+ case 0x36: return MBlaze::TNAPUT;
+ case 0x38: return MBlaze::NCPUT;
+ case 0x3A: return MBlaze::NCAPUT;
+ case 0x3C: return MBlaze::TNCPUT;
+ case 0x3E: return MBlaze::TNCAPUT;
+ }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+ switch ((insn>>5)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GETD;
+ case 0x01: return MBlaze::EGETD;
+ case 0x02: return MBlaze::AGETD;
+ case 0x03: return MBlaze::EAGETD;
+ case 0x04: return MBlaze::TGETD;
+ case 0x05: return MBlaze::TEGETD;
+ case 0x06: return MBlaze::TAGETD;
+ case 0x07: return MBlaze::TEAGETD;
+ case 0x08: return MBlaze::CGETD;
+ case 0x09: return MBlaze::ECGETD;
+ case 0x0A: return MBlaze::CAGETD;
+ case 0x0B: return MBlaze::ECAGETD;
+ case 0x0C: return MBlaze::TCGETD;
+ case 0x0D: return MBlaze::TECGETD;
+ case 0x0E: return MBlaze::TCAGETD;
+ case 0x0F: return MBlaze::TECAGETD;
+ case 0x10: return MBlaze::NGETD;
+ case 0x11: return MBlaze::NEGETD;
+ case 0x12: return MBlaze::NAGETD;
+ case 0x13: return MBlaze::NEAGETD;
+ case 0x14: return MBlaze::TNGETD;
+ case 0x15: return MBlaze::TNEGETD;
+ case 0x16: return MBlaze::TNAGETD;
+ case 0x17: return MBlaze::TNEAGETD;
+ case 0x18: return MBlaze::NCGETD;
+ case 0x19: return MBlaze::NECGETD;
+ case 0x1A: return MBlaze::NCAGETD;
+ case 0x1B: return MBlaze::NECAGETD;
+ case 0x1C: return MBlaze::TNCGETD;
+ case 0x1D: return MBlaze::TNECGETD;
+ case 0x1E: return MBlaze::TNCAGETD;
+ case 0x1F: return MBlaze::TNECAGETD;
+ case 0x20: return MBlaze::PUTD;
+ case 0x22: return MBlaze::APUTD;
+ case 0x24: return MBlaze::TPUTD;
+ case 0x26: return MBlaze::TAPUTD;
+ case 0x28: return MBlaze::CPUTD;
+ case 0x2A: return MBlaze::CAPUTD;
+ case 0x2C: return MBlaze::TCPUTD;
+ case 0x2E: return MBlaze::TCAPUTD;
+ case 0x30: return MBlaze::NPUTD;
+ case 0x32: return MBlaze::NAPUTD;
+ case 0x34: return MBlaze::TNPUTD;
+ case 0x36: return MBlaze::TNAPUTD;
+ case 0x38: return MBlaze::NCPUTD;
+ case 0x3A: return MBlaze::NCAPUTD;
+ case 0x3C: return MBlaze::TNCPUTD;
+ case 0x3E: return MBlaze::TNCAPUTD;
+ }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+ switch (insn&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::IDIV;
+ case 0x2: return MBlaze::IDIVU;
+ }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LBU;
+ case 0x1: return MBlaze::LBUR;
+ }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LHU;
+ case 0x1: return MBlaze::LHUR;
+ }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LW;
+ case 0x1: return MBlaze::LWR;
+ case 0x2: return MBlaze::LWX;
+ }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SB;
+ case 0x1: return MBlaze::SBR;
+ }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SH;
+ case 0x1: return MBlaze::SHR;
+ }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SW;
+ case 0x1: return MBlaze::SWR;
+ case 0x2: return MBlaze::SWX;
+ }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+ switch ((insn>>15)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0:
+ switch ((insn>>16)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MSRSET;
+ case 0x1: return MBlaze::MSRCLR;
+ }
+ case 0x1:
+ switch ((insn>>14)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MFS;
+ case 0x1: return MBlaze::MTS;
+ }
+ }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::OR;
+ case 0x400: return MBlaze::PCMPBF;
+ }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::XOR;
+ case 0x400: return MBlaze::PCMPEQ;
+ }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::ANDN;
+ case 0x400: return MBlaze::PCMPNE;
+ }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x10: return MBlaze::RTSD;
+ case 0x11: return MBlaze::RTID;
+ case 0x12: return MBlaze::RTBD;
+ case 0x14: return MBlaze::RTED;
+ }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+ unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+ switch (opcode) {
+ case MBlaze::MUL: return decodeMUL(insn);
+ case MBlaze::SEXT8: return decodeSEXT(insn);
+ case MBlaze::BEQ: return decodeBEQ(insn);
+ case MBlaze::BEQI: return decodeBEQI(insn);
+ case MBlaze::BR: return decodeBR(insn);
+ case MBlaze::BRI: return decodeBRI(insn);
+ case MBlaze::BSRL: return decodeBSRL(insn);
+ case MBlaze::BSRLI: return decodeBSRLI(insn);
+ case MBlaze::RSUBK: return decodeRSUBK(insn);
+ case MBlaze::FADD: return decodeFADD(insn);
+ case MBlaze::GET: return decodeGET(insn);
+ case MBlaze::GETD: return decodeGETD(insn);
+ case MBlaze::IDIV: return decodeIDIV(insn);
+ case MBlaze::LBU: return decodeLBU(insn);
+ case MBlaze::LHU: return decodeLHU(insn);
+ case MBlaze::LW: return decodeLW(insn);
+ case MBlaze::SB: return decodeSB(insn);
+ case MBlaze::SH: return decodeSH(insn);
+ case MBlaze::SW: return decodeSW(insn);
+ case MBlaze::MFS: return decodeMFS(insn);
+ case MBlaze::OR: return decodeOR(insn);
+ case MBlaze::XOR: return decodeXOR(insn);
+ case MBlaze::ANDN: return decodeANDN(insn);
+ case MBlaze::RTSD: return decodeRTSD(insn);
+ default: return opcode;
+ }
+}
+
+//
+// Public interface for the disassembler
+//
+
+MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint64_t read;
+ uint8_t bytes[4];
+
+ // By default we consume 1 byte on failure
+ size = 1;
+
+ // We want to read exactly 4 bytes of data.
+ if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
+ return Fail;
+
+ // Encoded as a big-endian 32-bit word in the stream.
+ insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+ // Get the MCInst opcode from the binary instruction and make sure
+ // that it is a valid instruction.
+ unsigned opcode = getOPCODE(insn);
+ if (opcode == UNSUPPORTED)
+ return Fail;
+
+ instr.setOpcode(opcode);
+
+ unsigned RD = getRD(insn);
+ unsigned RA = getRA(insn);
+ unsigned RB = getRB(insn);
+ unsigned RS = getRS(insn);
+
+ uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+ switch ((tsFlags & MBlazeII::FormMask)) {
+ default:
+ return Fail;
+
+ case MBlazeII::FC:
+ break;
+
+ case MBlazeII::FRRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRI:
+ switch (opcode) {
+ default:
+ return Fail;
+ case MBlaze::MFS:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ break;
+ case MBlaze::MTS:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ case MBlaze::MSRSET:
+ case MBlaze::MSRCLR:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+ break;
+ }
+ break;
+
+ case MBlazeII::FRRI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ switch (opcode) {
+ default:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+ case MBlaze::BSRLI:
+ case MBlaze::BSRAI:
+ case MBlaze::BSLLI:
+ instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+ break;
+ }
+ break;
+
+ case MBlazeII::FCRR:
+ if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCRI:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRCR:
+ if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRCI:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FCCR:
+ if (RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCCI:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRRCI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+ break;
+
+ case MBlazeII::FRRC:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRCX:
+ if (RD == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FRCS:
+ if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RS));
+ break;
+
+ case MBlazeII::FCRCS:
+ if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RS));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FCRCX:
+ if (RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCX:
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCR:
+ if (RB == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRIR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ }
+
+ // We always consume 4 bytes of data on success
+ size = 4;
+
+ return Success;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MBlazeDisassembler(STI);
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+ createMBlazeDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 000000000000..b8ff8f607265
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,49 @@
+//===-- MBlazeDisassembler.h - Disassembler for MicroBlaze -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MBlazeDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~MBlazeDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 000000000000..fc2b3d51b44c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,71 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlazeInstPrinter.h"
+#include "MBlaze.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << (int32_t)Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << "rfsl" << MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, OpNo, O, NULL);
+ O << ", ";
+ printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 000000000000..51ba7c359a1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//= MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MBlazeInstPrinter : public MCInstPrinter {
+ public:
+ MBlazeInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+ const char *Modifier = 0);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 000000000000..1399b85d34d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,32 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MBlazeTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+
+ FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+ FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.td b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 000000000000..c2888553c5e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,73 @@
+//===-- MBlaze.td - Describe the MBlaze Target Machine -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
+ "Implements barrel shifter">;
+def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
+ "Implements hardware divider">;
+def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
+ "Implements hardware multiplier">;
+def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+ "Implements pattern compare instruction">;
+def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
+ "Implements floating point unit">;
+def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
+ "Implements multiplier with 64-bit result">;
+def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
+ "Implements sqrt and floating point convert">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"mblaze", NoItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MBlaze : Target {
+ let InstructionSet = MBlazeInstrInfo;
+ let AssemblyWriters = [MBlazeAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 000000000000..7dafaef0af08
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,326 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMCInstLower.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+ class MBlazeAsmPrinter : public AsmPrinter {
+ const MBlazeSubtarget *Subtarget;
+ public:
+ explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Assembly Printer";
+ }
+
+ void printSavedRegsBitmask();
+ void emitFrameDirective();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+// MBlaze Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame R19,48,R15
+// .mask 0xc0000000,-8
+// addiu R1, R1, -48
+// sw R15, 40(R1)
+// sw R19, 36(R1)
+//
+// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+// 19 (R19) are saved at prologue. As the save order on prologue is from
+// left to right, R15 is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (R15) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O.write_hex((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ // CPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+
+ // Set the CPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = getMBlazeRegisterNumbering(Reg);
+ if (MBlaze::GPRRegClass.contains(Reg))
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (TFI->hasFP(*MF))
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF)));
+
+ if (MFI->adjustsStack())
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned stkReg = RI.getFrameRegister(*MF);
+ unsigned retReg = RI.getRARegister();
+ unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t" +
+ Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+ "," + Twine(stkSze) + "," +
+ Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ emitFrameDirective();
+ printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MBlazeMCInstLower MCInstLowering(OutContext, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int32_t)MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate: {
+ const ConstantFP *fp = MO.getFPImm();
+ printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+ O << ";\t# immediate = " << *fp;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << "rfsl" << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->isBarrier();
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+ RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 000000000000..00a4219d047b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,24 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+ // i32 are returned in registers R3, R4
+ CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
+
+def CC_MBlaze : CallingConv<[
+ CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+ CCIfType<[i32,f32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 000000000000..3d0d1cecd1f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,254 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> MBDisableDelaySlotFiller(
+ "disable-mblaze-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the MBlaze delay slot filter."),
+ cl::Hidden);
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+ // Any instruction with an immediate mode operand greater than
+ // 16-bits requires an implicit IMM instruction.
+ unsigned numOper = candidate->getNumOperands();
+ for (unsigned op = 0; op < numOper; ++op) {
+ MachineOperand &mop = candidate->getOperand(op);
+
+ // The operand requires more than 16-bits to represent.
+ if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+ return true;
+
+ // We must assume that unknown immediate values require more than
+ // 16-bits to represent.
+ if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
+ return true;
+
+ // FIXME: we could probably check to see if the FP value happens
+ // to not need an IMM instruction. For now we just always
+ // assume that FP values do.
+ if (mop.isFPImm())
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+ switch (instr->getOpcode()) {
+ default: return instr->getNumOperands();
+
+ // These instructions have a variable number of operands but the first two
+ // are the "real" operands that we care about during hazard detection.
+ case MBlaze::BRLID:
+ case MBlaze::BRALID:
+ case MBlaze::BRLD:
+ case MBlaze::BRALD:
+ return 2;
+ }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+ MachineBasicBlock::iterator &slot) {
+ // Hazard check
+ MachineBasicBlock::iterator a = candidate;
+ MachineBasicBlock::iterator b = slot;
+
+ // MBB layout:-
+ // candidate := a0 = operation(a1, a2)
+ // ...middle bit...
+ // slot := b0 = operation(b1, b2)
+
+ // Possible hazards:-/
+ // 1. a1 or a2 was written during the middle bit
+ // 2. a0 was read or written during the middle bit
+ // 3. a0 is one or more of {b0, b1, b2}
+ // 4. b0 is one or more of {a1, a2}
+ // 5. a accesses memory, and the middle bit
+ // contains a store operation.
+ bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
+
+ // Determine the number of operands in the slot instruction and in the
+ // candidate instruction.
+ const unsigned aend = getLastRealOperand(a);
+ const unsigned bend = getLastRealOperand(b);
+
+ // Check hazards type 1, 2 and 5 by scanning the middle bit
+ MachineBasicBlock::iterator m = a;
+ for (++m; m != b; ++m) {
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ bool aop_is_reg = a->getOperand(aop).isReg();
+ if (!aop_is_reg) continue;
+
+ bool aop_is_def = a->getOperand(aop).isDef();
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ const unsigned mend = getLastRealOperand(m);
+ for (unsigned mop = 0; mop<mend; ++mop) {
+ bool mop_is_reg = m->getOperand(mop).isReg();
+ if (!mop_is_reg) continue;
+
+ bool mop_is_def = m->getOperand(mop).isDef();
+ unsigned mop_reg = m->getOperand(mop).getReg();
+
+ if (aop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 2, because aop = a0
+ else if (mop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 1, because aop in {a1, a2}
+ }
+ }
+
+ // Check hazard type 5
+ if (a_is_memory && m->mayStore())
+ return true;
+ }
+
+ // Check hazard type 3 & 4
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ if (a->getOperand(aop).isReg()) {
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ for (unsigned bop = 0; bop<bend; ++bop) {
+ if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+ unsigned bop_reg = b->getOperand(bop).getReg();
+ if (aop_reg == bop_reg)
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate) {
+ if (candidate == MBB.begin())
+ return false;
+
+ --candidate;
+ return (candidate->hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+ if (!I->hasUnmodeledSideEffects())
+ return false;
+
+ unsigned op = I->getOpcode();
+ if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+ op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+ op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+ op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+ op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+ op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+ return false;
+
+ return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+ MachineBasicBlock::iterator I = slot;
+ while (true) {
+ if (I == MBB.begin())
+ break;
+
+ --I;
+ if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+ I->isCall() || I->isReturn() || I->isBarrier() ||
+ hasUnknownSideEffects(I))
+ break;
+
+ if (hasImmInstruction(I) || delayHasHazard(I,slot))
+ continue;
+
+ return I;
+ }
+
+ return MBB.end();
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!MBDisableDelaySlotFiller)
+ D = findDelayInstr(MBB,I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ }
+ return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 000000000000..172304bd5b45
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,488 @@
+//===-- MBlazeFrameLowering.cpp - MBlaze Frame Information ---------------====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool> MBDisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
+
+static void replaceFrameIndexes(MachineFunction &MF,
+ SmallVector<std::pair<int,int64_t>, 16> &FR) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+ SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+ for (; FRI != FRE; ++FRI) {
+ MFI->RemoveStackObject(FRI->first);
+ int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+ MBlazeFI->recordReplacement(FRI->first, NFI);
+
+ for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+ MachineBasicBlock::iterator MBB = MB->begin();
+ const MachineBasicBlock::iterator MBE = MB->end();
+
+ for (; MBB != MBE; ++MBB) {
+ MachineInstr::mop_iterator MIB = MBB->operands_begin();
+ const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+ for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+ if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+ DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+ MII->setIndex(NFI);
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+ if (MBDisableStackAdjust) return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+ MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+ const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+ SmallVector<MachineInstr*, 16> EraseInstr;
+ SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+ MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+ MachineBasicBlock::iterator MIB = MBB->begin();
+ MachineBasicBlock::iterator MIE = MBB->end();
+
+ int StackAdjust = 0;
+ int StackOffset = -28;
+
+ // In this loop we are searching frame indexes that corrospond to incoming
+ // arguments that are already in the stack. We look for instruction sequences
+ // like the following:
+ //
+ // LWI REG, FI1, 0
+ // ...
+ // SWI REG, FI2, 0
+ //
+ // As long as there are no defs of REG in the ... part, we can eliminate
+ // the SWI instruction because the value has already been stored to the
+ // stack by the caller. All we need to do is locate FI at the correct
+ // stack location according to the calling convensions.
+ //
+ // Additionally, if the SWI operation kills the def of REG then we don't
+ // need the LWI operation so we can erase it as well.
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+ unsigned FIReg = I->getOperand(0).getReg();
+ MachineBasicBlock::iterator SI = I;
+ for (SI++; SI != MIE; ++SI) {
+ if (!SI->getOperand(0).isReg() ||
+ !SI->getOperand(1).isFI() ||
+ SI->getOpcode() != MBlaze::SWI) continue;
+
+ int FI = SI->getOperand(1).getIndex();
+ if (SI->getOperand(0).getReg() != FIReg ||
+ MFI->isFixedObjectIndex(FI) ||
+ MFI->getObjectSize(FI) != 4) continue;
+
+ if (SI->getOperand(0).isDef()) break;
+
+ if (SI->getOperand(0).isKill()) {
+ DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex()
+ << " removed\n");
+ EraseInstr.push_back(I);
+ }
+
+ EraseInstr.push_back(SI);
+ DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+ FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+ StackOffset -= 4;
+ StackAdjust += 4;
+ break;
+ }
+ }
+ }
+
+ // In this loop we are searching for frame indexes that corrospond to
+ // incoming arguments that are in registers. We look for instruction
+ // sequences like the following:
+ //
+ // ... SWI REG, FI, 0
+ //
+ // As long as the ... part does not define REG and if REG is an incoming
+ // parameter register then we know that, according to ABI convensions, the
+ // caller has allocated stack space for it already. Instead of allocating
+ // stack space on our frame, we record the correct location in the callers
+ // frame.
+ for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->definesRegister(LI->first))
+ break;
+
+ if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() < 0) continue;
+
+ if (I->getOperand(0).getReg() == LI->first) {
+ int FI = I->getOperand(1).getIndex();
+ MBlazeFI->recordLiveIn(FI);
+
+ int FILoc = 0;
+ switch (LI->first) {
+ default: llvm_unreachable("invalid incoming parameter!");
+ case MBlaze::R5: FILoc = -4; break;
+ case MBlaze::R6: FILoc = -8; break;
+ case MBlaze::R7: FILoc = -12; break;
+ case MBlaze::R8: FILoc = -16; break;
+ case MBlaze::R9: FILoc = -20; break;
+ case MBlaze::R10: FILoc = -24; break;
+ }
+
+ StackAdjust += 4;
+ FrameRelocate.push_back(std::make_pair(FI,FILoc));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+ break;
+ }
+ }
+ }
+
+ // Go ahead and erase all of the instructions that we determined were
+ // no longer needed.
+ for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+ MBB->erase(EraseInstr[i]);
+
+ // Replace all of the frame indexes that we have relocated with new
+ // fixed object frame indexes.
+ replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ CallingConv::ID CallConv = F->getCallingConv();
+
+ // If this function is not using either the interrupt_handler
+ // calling convention or the save_volatiles calling convention
+ // then we don't need to do any additional frame layout.
+ if (CallConv != CallingConv::MBLAZE_INTR &&
+ CallConv != CallingConv::MBLAZE_SVOL)
+ return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ // Determine if the calling convention is the interrupt_handler
+ // calling convention. Some pieces of the prologue and epilogue
+ // only need to be emitted if we are lowering and interrupt handler.
+ bool isIntr = CallConv == CallingConv::MBLAZE_INTR;
+
+ // Determine where to put prologue and epilogue additions
+ MachineBasicBlock &MENT = MF.front();
+ MachineBasicBlock &MEXT = MF.back();
+
+ MachineBasicBlock::iterator MENTI = MENT.begin();
+ MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+ DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+ DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+ // Store the frame indexes generated during prologue additions for use
+ // when we are generating the epilogue additions.
+ SmallVector<int, 10> VFI;
+
+ // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+ // always have a SWI because it is used when processing RMSR.
+ for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+ if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+
+ int FI = MFI->CreateStackObject(4,4,false,false);
+ VFI.push_back(FI);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+ .addFrameIndex(FI).addImm(0);
+ }
+
+ // Build the prologue SWI for R17, R18
+ int R17FI = MFI->CreateStackObject(4,4,false,false);
+ int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+ if (isIntr) {
+ int MSRFI = MFI->CreateStackObject(4,4,false,false);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+ .addReg(MBlaze::RMSR);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+ .addReg(MBlaze::R11);
+ }
+
+ // Build the epilogue LWI for R17, R18
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ // Build the epilogue LWI for R3 - R12 if needed
+ for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+ if (!MRI.isPhysRegUsed(r)) continue;
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+ .addFrameIndex(VFI[--i]).addImm(0);
+ }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+ DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ // unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ MFI->setStackSize(FrameSize);
+ DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
+ const {
+ const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->hasReplacement(FI))
+ FI = MBlazeFI->getReplacement(FI);
+ return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // swi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addReg(MBlaze::R1).addImm(FPOffset);
+ }
+
+ // lwi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ // Get the number of bytes from FrameInfo
+ int StackSize = (int) MFI->getStackSize();
+
+ // addi R1, R1, imm
+ if (StackSize) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(StackSize);
+ }
+}
+
+// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
+
+ if (MFI->adjustsStack() || requiresRA) {
+ MBlazeFI->setRAStackOffset(0);
+ MFI->CreateFixedObject(4,0,true);
+ }
+
+ if (hasFP(MF)) {
+ MBlazeFI->setFPStackOffset(4);
+ MFI->CreateFixedObject(4,4,true);
+ }
+
+ interruptFrameLayout(MF);
+ analyzeFrameIndexes(MF);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 000000000000..f4228c5f0890
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,56 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+ const MBlazeSubtarget &STI;
+
+public:
+ explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+ }
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 000000000000..78ad24debb1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,277 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MBlazeTargetMachine.
+ MBlazeTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MBlazeSubtarget &Subtarget;
+
+public:
+ explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MBlaze DAG->DAG Pattern Instruction Selection";
+ }
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MBlazeGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MBlazeTargetMachine &getTargetMachine() {
+ return static_cast<const MBlazeTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MBlazeInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Address Selection
+ bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
+
+ // getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::Constant)
+ return false;
+
+ Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+ return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
+ if (N.getOpcode() == ISD::FrameIndex) return false;
+ if (N.getOpcode() == ISD::TargetExternalSymbol ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ int32_t imm = 0;
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ if (isIntS32Immediate(N.getOperand(1), imm))
+ return false; // r+i
+
+ if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+ N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+ return false; // jump tables.
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddrRegReg(N, Base, Disp))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int32_t imm = 0;
+ if (isIntS32Immediate(N.getOperand(1), imm)) {
+ Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+ uint32_t Imm = CN->getZExtValue();
+ Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+ Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+ return true;
+ }
+
+ Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode())
+ return NULL;
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch (Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+ int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT VT = Node->getValueType(0);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned Opc = MBlaze::ADDIK;
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+ return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+ }
+
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MBlazeISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ((isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)))
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+ MVT::i32, MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+ MVT::Glue, R20Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+ return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 000000000000..d4f943297acb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,1154 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
+ case MBlazeISD::GPRel : return "MBlazeISD::GPRel";
+ case MBlazeISD::Wrap : return "MBlazeISD::Wrap";
+ case MBlazeISD::ICmp : return "MBlazeISD::ICmp";
+ case MBlazeISD::Ret : return "MBlazeISD::Ret";
+ case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC";
+ default : return NULL;
+ }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+ : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+ // MBlaze does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &MBlaze::GPRRegClass);
+ if (Subtarget->hasFPU()) {
+ addRegisterClass(MVT::f32, &MBlaze::GPRRegClass);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+
+ // Floating point operations which are not supported
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Sign extended loads must be expanded
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // MBlaze has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // If the processor doesn't support multiply then expand it
+ if (!Subtarget->hasMul()) {
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+ }
+
+ // If the processor doesn't support 64-bit multiply then expand
+ if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ }
+
+ // If the processor doesn't support division then expand
+ if (!Subtarget->hasDiv()) {
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ }
+
+ // Expand unsupported conversions
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Expand SELECT_CC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // MBlaze doesn't have MUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+ // MBlaze Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Variable Argument support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+
+ // Operations not directly supported by MBlaze.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ // MBlaze doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setMinFunctionAlignment(2);
+
+ setStackPointerRegisterToSaveRestore(MBlaze::R1);
+ computeRegisterProperties();
+}
+
+EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB)
+ const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unexpected instr type to insert");
+
+ case MBlaze::ShiftRL:
+ case MBlaze::ShiftRA:
+ case MBlaze::ShiftL:
+ return EmitCustomShift(MI, MBB);
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC:
+ return EmitCustomSelect(MI, MBB);
+
+ case MBlaze::CAS32:
+ case MBlaze::SWP32:
+ case MBlaze::LAA32:
+ case MBlaze::LAS32:
+ case MBlaze::LAD32:
+ case MBlaze::LAO32:
+ case MBlaze::LAX32:
+ case MBlaze::LAN32:
+ return EmitCustomAtomic(MI, MBB);
+
+ case MBlaze::MEMBARRIER:
+ // The Microblaze does not need memory barriers. Just delete the pseudo
+ // instruction and finish.
+ MI->eraseFromParent();
+ return MBB;
+ }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the true and fallthrough blocks as its successors.
+ MBB->addSuccessor(loop);
+ MBB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned IAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ unsigned DST = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ unsigned NDST = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ unsigned NAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(MBB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable("Cannot lower unknown shift instruction");
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable("Unknown branch condition");
+ case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+ }
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Transfer the remainder of MBB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ MBB->addSuccessor(flsBB);
+ MBB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ BuildMI(MBB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // All atomic instructions on the Microblaze are implemented using the
+ // load-linked / store-conditional style atomic instruction sequences.
+ // Thus, all operations will look something like the following:
+ //
+ // start:
+ // lwx RV, RP, 0
+ // <do stuff>
+ // swx RV, RP, 0
+ // addic RC, R0, 0
+ // bneid RC, start
+ //
+ // exit:
+ //
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+
+ // Create the start and exit basic blocks for the atomic operation
+ MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, start);
+ F->insert(It, exit);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the fallthrough block as its successors.
+ MBB->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ MachineBasicBlock *final = start;
+ unsigned finalReg = 0;
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+ case MBlaze::SWP32:
+ finalReg = MI->getOperand(2).getReg();
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+ break;
+
+ case MBlaze::LAN32:
+ case MBlaze::LAX32:
+ case MBlaze::LAO32:
+ case MBlaze::LAD32:
+ case MBlaze::LAS32:
+ case MBlaze::LAA32: {
+ unsigned opcode = 0;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic load!");
+ case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+ case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+ case MBlaze::LAD32: opcode = MBlaze::AND; break;
+ case MBlaze::LAO32: opcode = MBlaze::OR; break;
+ case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+ case MBlaze::LAN32: opcode = MBlaze::AND; break;
+ }
+
+ finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(opcode), finalReg)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ if (MI->getOpcode() == MBlaze::LAN32) {
+ unsigned tmp = finalReg;
+ finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+ .addReg(tmp)
+ .addImm(-1);
+ }
+ break;
+ }
+
+ case MBlaze::CAS32: {
+ finalReg = MI->getOperand(3).getReg();
+ final = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, final);
+ start->addSuccessor(exit);
+ start->addSuccessor(final);
+ final->addSuccessor(exit);
+ final->addSuccessor(start);
+
+ unsigned CMP = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ BuildMI(start, dl, TII->get(MBlaze::BNEID))
+ .addReg(CMP)
+ .addMBB(exit);
+
+ final->moveAfter(start);
+ exit->moveAfter(final);
+ break;
+ }
+ }
+
+ unsigned CHK = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(final, dl, TII->get(MBlaze::SWX))
+ .addReg(finalReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+ .addReg(MBlaze::R0)
+ .addImm(0);
+
+ BuildMI(final, dl, TII->get(MBlaze::BNEID))
+ .addReg(CHK)
+ .addMBB(start);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return exit;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ Opc = MBlazeISD::Select_CC;
+ CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+ .getValue(1);
+ } else {
+ llvm_unreachable("Cannot lower select_cc with unknown type");
+ }
+
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("TLS not implemented for MicroBlaze.");
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = N->getConstVal();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const uint16_t ArgRegs[] = {
+ MBlaze::R5, MBlaze::R6, MBlaze::R7,
+ MBlaze::R8, MBlaze::R9, MBlaze::R10
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+ if (!Reg) return false;
+
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ // MBlaze does not yet support tail call optimization
+ isTailCall = false;
+
+ // The MBlaze requires stack slots for arguments passed to var arg
+ // functions even if they are passed in registers.
+ bool needsRegArgSlots = isVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Variable argument function calls require a minimum of 24-bytes of stack
+ if (isVarArg && NumBytes < 24) NumBytes = 24;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Since we are alread passing values on the stack we don't
+ // need to worry about creating additional slots for the
+ // values passed via registers.
+ needsRegArgSlots = false;
+
+ // Create the frame index object for this incoming parameter
+ unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // If we need to reserve stack space for the arguments passed via registers
+ // then create a fixed stack object at the beginning of the stack.
+ if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+ MFI->CreateFixedObject(28,0,true);
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, 0);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), 0);
+
+ // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+ MBlazeFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Keep track of the last register used for arguments
+ unsigned ArgRegEnd = 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+ SDValue StackPtr;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ ArgRegEnd = VA.getLocReg();
+ const TargetRegisterClass *RC;
+
+ if (RegVT == MVT::i32)
+ RC = &MBlaze::GPRRegClass;
+ else if (RegVT == MVT::f32)
+ RC = &MBlaze::GPRRegClass;
+ else
+ llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size. If if is a floating point value
+ // then convert to the correct type.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The last argument is not a register
+ ArgRegEnd = 0;
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+ MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+ MBlazeFI->recordLiveIn(FI);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0));
+ }
+ }
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame. If the last
+ // argument was placed in the stack, there's no need to save any register.
+ if ((isVarArg) && ArgRegEnd) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The last register argument that must be saved is MBlaze::R10
+ const TargetRegisterClass *RC = &MBlaze::GPRRegClass;
+
+ unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
+ unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
+ unsigned End = getMBlazeRegisterNumbering(MBlaze::R10);
+ unsigned StackLoc = Start - Begin + 1;
+
+ for (; Start <= End; ++Start, ++StackLoc) {
+ unsigned Reg = getMBlazeRegisterFromNumbering(Start);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ if (!MBlazeFI->getVarArgsFrameIndex())
+ MBlazeFI->setVarArgsFrameIndex(FI);
+ }
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ RetOps.push_back(DAG.getRegister(Reg, MVT::i32));
+
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // MBlaze specific constrainy
+ //
+ // 'd' : An address register. Equivalent to r.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, &MBlaze::GPRRegClass);
+ // TODO: These can't possibly be right, but match what was in
+ // getRegClassForInlineAsmConstraint.
+ case 'd':
+ case 'y':
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, &MBlaze::GPRRegClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The MBlaze target isn't yet aware of offsets.
+ return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return VT != MVT::f32;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 000000000000..f6b4095a93dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,179 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MBlazeCC {
+ enum CC {
+ FIRST = 0,
+ EQ,
+ NE,
+ GT,
+ LT,
+ GE,
+ LE
+ };
+
+ inline static CC getOppositeCondition(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case GT: return LE;
+ case LT: return GE;
+ case GE: return LT;
+ case LE: return GE;
+ }
+ }
+
+ inline static const char *MBlazeCCToString(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return "eq";
+ case NE: return "ne";
+ case GT: return "gt";
+ case LT: return "lt";
+ case GE: return "ge";
+ case LE: return "le";
+ }
+ }
+ }
+
+ namespace MBlazeISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Select CC Pseudo Instruction
+ Select_CC,
+
+ // Wrap up multiple types of instructions
+ Wrap,
+
+ // Integer Compare
+ ICmp,
+
+ // Return from subroutine
+ Ret,
+
+ // Return from interrupt
+ IRet
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MBlazeTargetLowering : public TargetLowering {
+ public:
+ explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ EVT getSetCCResultType(EVT VT) const;
+
+ private:
+ // Subtarget Info
+ const MBlazeSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // Lower Operand specifics
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 000000000000..3f145938728c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,219 @@
+//===-- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+let rb=0 in {
+ class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+ def FORI : LogicFI<0x28, "ori ">;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>;
+}
+
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x3A, "lwi ", load>;
+
+ def SWF : StoreFM<0x36, "sw ", store>;
+ def SWFI : StoreFMI<0x3E, "swi ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>;
+ def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
+}
+
+
+let usesCustomInserter = 1 in {
+ def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
+ "; SELECT_FCC PSEUDO!",
+ []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+ def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+ def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+ def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (XOR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_NE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+ (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 000000000000..91b69de05102
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,229 @@
+//===-- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
+{
+ bits<5> rd;
+ bits<4> fslno;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x0;
+ let Inst{17-21} = flags; // NCTAE
+ let Inst{22-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
+{
+ bits<5> rd;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x0;
+ let Inst{22-26} = flags; // NCTAE
+ let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIC_FSLp>
+{
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode GPR:$b)], IIC_FSLp>
+{
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET : FSLGet<0x1B, 0x00, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, 0x02, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, 0x08, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, 0x0A, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, 0x01, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, 0x03, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, 0x09, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, 0x0B, "ecaget ", int_mblaze_fsl_ecaget>;
+def TGET : FSLGet<0x1B, 0x04, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, 0x06, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, 0x0C, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, 0x0E, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, 0x05, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, 0x07, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, 0x0D, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, 0x0F, "tecaget ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGET : FSLGet<0x1B, 0x10, "nget ", int_mblaze_fsl_nget>;
+ def NAGET : FSLGet<0x1B, 0x12, "naget ", int_mblaze_fsl_naget>;
+ def NCGET : FSLGet<0x1B, 0x18, "ncget ", int_mblaze_fsl_ncget>;
+ def NCAGET : FSLGet<0x1B, 0x1A, "ncaget ", int_mblaze_fsl_ncaget>;
+ def NEGET : FSLGet<0x1B, 0x11, "neget ", int_mblaze_fsl_neget>;
+ def NEAGET : FSLGet<0x1B, 0x13, "neaget ", int_mblaze_fsl_neaget>;
+ def NECGET : FSLGet<0x1B, 0x19, "necget ", int_mblaze_fsl_necget>;
+ def NECAGET : FSLGet<0x1B, 0x1B, "necaget ", int_mblaze_fsl_necaget>;
+ def TNGET : FSLGet<0x1B, 0x14, "tnget ", int_mblaze_fsl_tnget>;
+ def TNAGET : FSLGet<0x1B, 0x16, "tnaget ", int_mblaze_fsl_tnaget>;
+ def TNCGET : FSLGet<0x1B, 0x1C, "tncget ", int_mblaze_fsl_tncget>;
+ def TNCAGET : FSLGet<0x1B, 0x1E, "tncaget ", int_mblaze_fsl_tncaget>;
+ def TNEGET : FSLGet<0x1B, 0x15, "tneget ", int_mblaze_fsl_tneget>;
+ def TNEAGET : FSLGet<0x1B, 0x17, "tneaget ", int_mblaze_fsl_tneaget>;
+ def TNECGET : FSLGet<0x1B, 0x1D, "tnecget ", int_mblaze_fsl_tnecget>;
+ def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD : FSLGetD<0x13, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x13, 0x02, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x13, 0x08, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x13, 0x0A, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x13, 0x01, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x13, 0x03, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x13, 0x09, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x13, 0x0B, "ecagetd ", int_mblaze_fsl_ecaget>;
+def TGETD : FSLGetD<0x13, 0x04, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x13, 0x06, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x13, 0x0C, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x13, 0x0E, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x13, 0x05, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x13, 0x07, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x13, 0x0D, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGETD : FSLGetD<0x13, 0x10, "ngetd ", int_mblaze_fsl_nget>;
+ def NAGETD : FSLGetD<0x13, 0x12, "nagetd ", int_mblaze_fsl_naget>;
+ def NCGETD : FSLGetD<0x13, 0x18, "ncgetd ", int_mblaze_fsl_ncget>;
+ def NCAGETD : FSLGetD<0x13, 0x1A, "ncagetd ", int_mblaze_fsl_ncaget>;
+ def NEGETD : FSLGetD<0x13, 0x11, "negetd ", int_mblaze_fsl_neget>;
+ def NEAGETD : FSLGetD<0x13, 0x13, "neagetd ", int_mblaze_fsl_neaget>;
+ def NECGETD : FSLGetD<0x13, 0x19, "necgetd ", int_mblaze_fsl_necget>;
+ def NECAGETD : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+ def TNGETD : FSLGetD<0x13, 0x14, "tngetd ", int_mblaze_fsl_tnget>;
+ def TNAGETD : FSLGetD<0x13, 0x16, "tnagetd ", int_mblaze_fsl_tnaget>;
+ def TNCGETD : FSLGetD<0x13, 0x1C, "tncgetd ", int_mblaze_fsl_tncget>;
+ def TNCAGETD : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+ def TNEGETD : FSLGetD<0x13, 0x15, "tnegetd ", int_mblaze_fsl_tneget>;
+ def TNEAGETD : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+ def TNECGETD : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+ def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT : FSLPut<0x1B, 0x0, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, 0x1, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, 0x4, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, 0x5, "caput ", int_mblaze_fsl_caput>;
+def TPUT : FSLPutT<0x1B, 0x2, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, 0x3, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, 0x6, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, 0x7, "tcaput ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUT : FSLPut<0x1B, 0x8, "nput ", int_mblaze_fsl_nput>;
+ def NAPUT : FSLPut<0x1B, 0x9, "naput ", int_mblaze_fsl_naput>;
+ def NCPUT : FSLPut<0x1B, 0xC, "ncput ", int_mblaze_fsl_ncput>;
+ def NCAPUT : FSLPut<0x1B, 0xD, "ncaput ", int_mblaze_fsl_ncaput>;
+ def TNPUT : FSLPutT<0x1B, 0xA, "tnput ", int_mblaze_fsl_tnput>;
+ def TNAPUT : FSLPutT<0x1B, 0xB, "tnaput ", int_mblaze_fsl_tnaput>;
+ def TNCPUT : FSLPutT<0x1B, 0xE, "tncput ", int_mblaze_fsl_tncput>;
+ def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput ", int_mblaze_fsl_tncaput>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD : FSLPutD<0x13, 0x0, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x13, 0x1, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x13, 0x4, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x13, 0x5, "caputd ", int_mblaze_fsl_caput>;
+def TPUTD : FSLPutTD<0x13, 0x2, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x13, 0x3, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x13, 0x6, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x13, 0x7, "tcaputd ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUTD : FSLPutD<0x13, 0x8, "nputd ", int_mblaze_fsl_nput>;
+ def NAPUTD : FSLPutD<0x13, 0x9, "naputd ", int_mblaze_fsl_naput>;
+ def NCPUTD : FSLPutD<0x13, 0xC, "ncputd ", int_mblaze_fsl_ncput>;
+ def NCAPUTD : FSLPutD<0x13, 0xD, "ncaputd ", int_mblaze_fsl_ncaput>;
+ def TNPUTD : FSLPutTD<0x13, 0xA, "tnputd ", int_mblaze_fsl_tnput>;
+ def TNAPUTD : FSLPutTD<0x13, 0xB, "tnaputd ", int_mblaze_fsl_tnaput>;
+ def TNCPUTD : FSLPutTD<0x13, 0xE, "tncputd ", int_mblaze_fsl_tncput>;
+ def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 000000000000..e40432a1b9a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,228 @@
+//===-- MBlazeInstrFormats.td - MB Instruction defs --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR : Format<1>; // ADD, OR, etc.
+def FRRI : Format<2>; // ADDI, ORI, etc.
+def FCRR : Format<3>; // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI : Format<4>; // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR : Format<5>; // BRLD, BRALD, GETD
+def FRCI : Format<6>; // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR : Format<7>; // BR, BRA, BRD, etc.
+def FCCI : Format<8>; // IMM, BRI, BRAI, BRID, etc.
+def FRRCI : Format<9>; // BSRLI, BSRAI, BSLLI
+def FRRC : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX : Format<11>; // GET
+def FRCS : Format<12>; // MFS
+def FCRCS : Format<13>; // MTS
+def FCRCX : Format<14>; // PUT
+def FCX : Format<15>; // TPUT
+def FCR : Format<16>; // TPUTD
+def FRIR : Format<17>; // RSUBI
+def FRRRR : Format<18>; // RSUB, FRSUB
+def FRI : Format<19>; // RSUB, FRSUB
+def FC : Format<20>; // NOP
+def FRR : Format<21>; // CLZ
+
+//===----------------------------------------------------------------------===//
+// Describe MBlaze instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rd - dst reg.
+// ra - first src. reg.
+// rb - second src. reg.
+// imm16 - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> : Instruction {
+ let Namespace = "MBlaze";
+ field bits<32> Inst;
+
+ bits<6> opcode = op;
+ Format Form = form;
+ bits<6> FormBits = Form.Value;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{0-5} = opcode;
+
+ // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+ // assembler and disassmbler ignore it.
+ let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ TA<op, flags, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rrd;
+ bits<5> rrb;
+ bits<5> rra;
+
+ let Form = FRRRR;
+
+ let rd = rrd;
+ let ra = rra;
+ let rb = rrb;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ TB<op, outs, ins, asmstr, pattern, itin> {
+ bits<5> rrd;
+ bits<16> rimm16;
+ bits<5> rra;
+
+ let Form = FRIR;
+
+ let rd = rrd;
+ let ra = rra;
+ let imm16 = rimm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> imm5;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0x0;
+ let Inst{21-22} = flags;
+ let Inst{23-26} = 0x0;
+ let Inst{27-31} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<14> imm14;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-17} = flags;
+ let Inst{18-31} = imm14;
+}
+
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<15> imm15;
+
+ let Inst{6-10} = rd;
+ let Inst{11-16} = flags;
+ let Inst{17-31} = imm15;
+}
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+ let Inst{6-31} = flags;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 000000000000..79449f73f74e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,297 @@
+//===-- MBlazeInstrInfo.cpp - MBlaze Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+ : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::LWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::SWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (MBlaze::isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with something like BEQID then BRID, handle it.
+ if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it.
+ // The second one is not executed, so remove it.
+ if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "MBlaze branch conditions have two components!");
+
+ unsigned Opc = MBlaze::BRID;
+ if (!Cond.empty())
+ Opc = (unsigned)Cond[0].getImm();
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+
+ if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+ !MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand>
+ &Cond) const {
+ assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+ switch (Cond[0].getImm()) {
+ default: return true;
+ case MBlaze::BEQ: Cond[0].setImm(MBlaze::BNE); return false;
+ case MBlaze::BNE: Cond[0].setImm(MBlaze::BEQ); return false;
+ case MBlaze::BGT: Cond[0].setImm(MBlaze::BLE); return false;
+ case MBlaze::BGE: Cond[0].setImm(MBlaze::BLT); return false;
+ case MBlaze::BLT: Cond[0].setImm(MBlaze::BGE); return false;
+ case MBlaze::BLE: Cond[0].setImm(MBlaze::BGT); return false;
+ case MBlaze::BEQI: Cond[0].setImm(MBlaze::BNEI); return false;
+ case MBlaze::BNEI: Cond[0].setImm(MBlaze::BEQI); return false;
+ case MBlaze::BGTI: Cond[0].setImm(MBlaze::BLEI); return false;
+ case MBlaze::BGEI: Cond[0].setImm(MBlaze::BLTI); return false;
+ case MBlaze::BLTI: Cond[0].setImm(MBlaze::BGEI); return false;
+ case MBlaze::BLEI: Cond[0].setImm(MBlaze::BGTI); return false;
+ case MBlaze::BEQD: Cond[0].setImm(MBlaze::BNED); return false;
+ case MBlaze::BNED: Cond[0].setImm(MBlaze::BEQD); return false;
+ case MBlaze::BGTD: Cond[0].setImm(MBlaze::BLED); return false;
+ case MBlaze::BGED: Cond[0].setImm(MBlaze::BLTD); return false;
+ case MBlaze::BLTD: Cond[0].setImm(MBlaze::BGED); return false;
+ case MBlaze::BLED: Cond[0].setImm(MBlaze::BGTD); return false;
+ case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+ case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+ case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+ case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+ case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+ case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+ }
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+ unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&MBlaze::GPRRegClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(MBlaze::R20);
+ RegInfo.addLiveIn(MBlaze::R20);
+
+ MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 000000000000..5252147b48e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,240 @@
+//===-- MBlazeInstrInfo.h - MBlaze Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace MBlaze {
+
+ // MBlaze Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // MBlaze Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_EQ,
+ COND_GT,
+ COND_GE,
+ COND_LT,
+ COND_LE,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ inline static unsigned GetCondBranchFromCond(CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case COND_EQ: return MBlaze::BEQID;
+ case COND_NE: return MBlaze::BNEID;
+ case COND_GT: return MBlaze::BGTID;
+ case COND_GE: return MBlaze::BGEID;
+ case COND_LT: return MBlaze::BLTID;
+ case COND_LE: return MBlaze::BLEID;
+ }
+ }
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+ /// MBlazeCCToString - Map each FP condition code to its string
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+
+ inline static bool isUncondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BRI:
+ case MBlaze::BRAI:
+ case MBlaze::BRID:
+ case MBlaze::BRAID:
+ return true;
+ }
+ }
+
+ inline static bool isCondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BEQI: case MBlaze::BEQID:
+ case MBlaze::BNEI: case MBlaze::BNEID:
+ case MBlaze::BGTI: case MBlaze::BGTID:
+ case MBlaze::BGEI: case MBlaze::BGEID:
+ case MBlaze::BLTI: case MBlaze::BLTID:
+ case MBlaze::BLEI: case MBlaze::BLEID:
+ return true;
+ }
+ }
+}
+
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
+ MBlazeTargetMachine &TM;
+ const MBlazeRegisterInfo RI;
+public:
+ explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 000000000000..f86bc0b0b5a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,1052 @@
+//===-- MBlazeInstrInfo.td - MBlaze Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze type profiles
+//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeIRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
+
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+ SDNPVariadic]>;
+
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
+// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv : Predicate<"Subtarget.hasDiv()">;
+def HasMul : Predicate<"Subtarget.hasMul()">;
+// def HasFSL : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU : Predicate<"Subtarget.hasFPU()">;
+// def HasESR : Predicate<"Subtarget.hasESR()">;
+// def HasPVR : Predicate<"Subtarget.hasPVR()">;
+def HasMul64 : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
+// def HasMMU : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def MBlazeMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+ let Name = "Fsl";
+ let SuperClasses = [];
+}
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def uimm5 : Operand<i32>;
+def uimm15 : Operand<i32>;
+def fimm : Operand<f32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm : Operand<i32> {
+ let PrintMethod = "printFSLImm";
+ let ParserMatchClass = MBlazeFslAsmOperand;
+}
+
+// Address operand
+def memri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, simm16);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+def memrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, GPR);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+def immSExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+ "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MBlazePseudo<(outs),
+ (ins uimm16:$amt1, simm16:$amt2),
+ "#ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+ IIC_ALU>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMl>;
+}
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+let mayStore = 1 in {
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMs>;
+}
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0x0;
+ let ra = br;
+ let Form = FCCR;
+}
+
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0;
+ let ra = br;
+ let Form = FCCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$link, GPR:$target),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCR;
+}
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$link, calltarget:$target),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs),
+ (ins GPR:$a, GPR:$b),
+ !strconcat(instr_asm, " $a, $b"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRR;
+}
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRI;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+ def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+
+ let Predicates=[HasPatCmp] in {
+ def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+ def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+ def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+ }
+
+ let Defs = [CARRY] in {
+ def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>;
+ }
+}
+
+let isAsCheapAsAMove = 1 in {
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>;
+
+ let Defs = [CARRY] in {
+ def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
+ }
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
+}
+
+let Predicates=[HasBarrel] in {
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>;
+ def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+ def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>;
+ def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+ def ADDIK : ArithI<0x0C, "addik ", add, simm16, immSExt16>;
+ def RSUBIK : ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+
+ let Defs = [CARRY] in {
+ def ADDI : ArithI<0x08, "addi ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", subc, simm16, immSExt16>;
+
+ let Uses = [CARRY] in {
+ def ADDIC : ArithI<0x0A, "addic ", adde, simm16, immSExt16>;
+ def RSUBIC : ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDIKC : ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+ def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+ }
+}
+
+let Predicates=[HasMul] in {
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ let neverHasSideEffects = 1 in {
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
+
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
+
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
+
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
+ }
+
+ def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x39, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x3A, "lwi ", load>;
+}
+
+def SB : StoreM<0x34, 0x000, "sb ">;
+def SBR : StoreM<0x34, 0x200, "sbr ">;
+
+def SH : StoreM<0x35, 0x000, "sh ">;
+def SHR : StoreM<0x35, 0x200, "shr ">;
+
+def SW : StoreM<0x36, 0x000, "sw ">;
+def SWR : StoreM<0x36, 0x200, "swr ">;
+
+let Defs = [CARRY] in {
+ def SWX : StoreM<0x36, 0x400, "swx ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ">;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ">;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ">;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ">;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ">;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ">;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ">;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ">;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ">;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ">;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ">;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ">;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ">;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ">;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ">;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ">;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ">;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ">;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ">;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ">;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x10, Form=FCRI in {
+ def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtsd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x11, Form=FCRI in {
+ def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtid $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x12, Form=FCRI in {
+ def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtbd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x14, Form=FCRI in {
+ def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rted $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+}
+
+let Predicates=[HasPatCmp] in {
+ def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+ "clz $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>;
+def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>;
+
+let usesCustomInserter = 1 in {
+ def Select_CC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
+ "; SELECT_CC PSEUDO!",
+ []>;
+
+ def ShiftL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftL PSEUDO!",
+ []>;
+
+ def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRA PSEUDO!",
+ []>;
+
+ def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRL PSEUDO!",
+ []>;
+}
+
+let rb = 0 in {
+ def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+ "sext16 $dst, $src", [], IIC_ALU>;
+ def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+ "sext8 $dst, $src", [], IIC_ALU>;
+ let Defs = [CARRY] in {
+ def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+ "srl $dst, $src", [], IIC_ALU>;
+ def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+ "sra $dst, $src", [], IIC_ALU>;
+ let Uses = [CARRY] in {
+ def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+ "src $dst, $src", [], IIC_ALU>;
+ }
+ }
+}
+
+let isCodeGenOnly=1 in {
+ def ADDIK32 : ArithI32<0x08, "addik ", simm16, immSExt16>;
+ def ORI32 : LogicI32<0x28, "ori ">;
+ def BRLID32 : BranchLI<0x2E, 0x14, "brlid ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+ def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+ "mfs $dst, $src", [], IIC_ALU>;
+}
+
+let Form=FCRCS in {
+ def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+ "mts $dst, $src", [], IIC_ALU>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+ "msrset $dst, $set", [], IIC_ALU>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+ "msrclr $dst, $clr", [], IIC_ALU>;
+
+let rd=0x0, Form=FCRR in {
+ def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+ "wdc $a, $b", [], IIC_WDC>;
+ def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.flush $a, $b", [], IIC_WDC>;
+ def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.clear $a, $b", [], IIC_WDC>;
+ def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+ "wic $a, $b", [], IIC_WDC>;
+}
+
+def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+ "imm $imm", [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+ def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+ "# atomic compare and swap",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+ def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+ "# atomic swap",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+ def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and add",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and sub",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and and",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and or",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and xor",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and nand",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+ def MEMBARRIER : MBlazePseudo<(outs), (ins),
+ "# memory barrier",
+ [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8), (SEXT8 GPR:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+ (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+ (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+ (BRALD (i32 R15), GPR:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 6)>;
+
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 6)>;
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+ (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+ (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+ (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+ (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+ (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+ (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+ (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+ (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+ (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+ (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+ (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+ (BNEID GPR:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8 xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+
+// Atomic fence
+def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 000000000000..8d262a01e706
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,112 @@
+//===-- MBlazeIntrinsicInfo.cpp - Intrinsic Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+ enum ID {
+ last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_mblaze_intrinsics
+ };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+ return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ if (IntrID == 0)
+ return false;
+
+ unsigned id = IntrID - Intrinsic::num_intrinsics + 1;
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "getAttributes(LLVMContext &C, ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
+ SmallVector<Type*, 8> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ AttributeSet AList = getAttributes(M->getContext(),
+ (mblazeIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 000000000000..34f379230def
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===-- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ unsigned lookupGCCName(const char *Name) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 000000000000..b5dc59547bbf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,131 @@
+//===-- IntrinsicsMBlaze.td - Defines MBlaze intrinsics ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+ MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+ MBFSL_PutT_Intrinsic;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 000000000000..ad414ac40fd7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,167 @@
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ bool ignored;
+ APFloat FVal = MO.getFPImm()->getValueAPF();
+ FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+ APInt IVal = FVal.bitcastToAPInt();
+ uint64_t Val = *IVal.getRawData();
+ MCOp = MCOperand::CreateImm(Val);
+ break;
+ }
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 000000000000..8ab2c9a3a633
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,47 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+
+ /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+ MCContext &Ctx;
+
+ AsmPrinter &Printer;
+public:
+ MBlazeMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.cpp
new file mode 100644
index 000000000000..2217b5477d6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.cpp
@@ -0,0 +1,14 @@
+//===-- MBlazeMachineFunctionInfo.cpp - Private data ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMachineFunction.h"
+
+using namespace llvm;
+
+void MBlazeFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 000000000000..10d507f37bbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,169 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MBlazeFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MBlazeFIHolder GPHolder;
+
+ /// On LowerFormalArguments the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFormalArguments, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ // When determining the final stack layout some of the frame indexes may
+ // be replaced by new frame indexes that reside in the caller's stack
+ // frame. The replacements are recorded in this structure.
+ DenseMap<int,int> FIReplacements;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// LiveInFI - keeps track of the frame indexes in a callers stack
+ /// frame that are live into a function.
+ SmallVector<int, 16> LiveInFI;
+
+public:
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), LiveInFI()
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLiveIn(int FI) {
+ LiveInFI.push_back(FI);
+ }
+
+ bool isLiveIn(int FI) {
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+ if (FI == LiveInFI[i]) return true;
+
+ return false;
+ }
+
+ const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+ void recordReplacement(int OFI, int NFI) {
+ FIReplacements.insert(std::make_pair(OFI,NFI));
+ }
+
+ bool hasReplacement(int OFI) const {
+ return FIReplacements.find(OFI) != FIReplacements.end();
+ }
+
+ int getReplacement(int OFI) const {
+ return FIReplacements.lookup(OFI);
+ }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
+ }
+
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 000000000000..bd83afc1cc83
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,145 @@
+//===-- MBlazeRegisterInfo.cpp - MBlaze Register Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-info"
+
+#include "MBlazeRegisterInfo.h"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+ : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+ return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const uint16_t* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ // MBlaze callee-save register range is R20 - R31
+ static const uint16_t CalleeSavedRegs[] = {
+ MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+ MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+ MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(MBlaze::R0);
+ Reserved.set(MBlaze::R1);
+ Reserved.set(MBlaze::R2);
+ Reserved.set(MBlaze::R13);
+ Reserved.set(MBlaze::R14);
+ Reserved.set(MBlaze::R15);
+ Reserved.set(MBlaze::R16);
+ Reserved.set(MBlaze::R17);
+ Reserved.set(MBlaze::R18);
+ Reserved.set(MBlaze::R19);
+ return Reserved;
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2;
+
+ DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n";
+ dbgs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ int stackSize = MFI->getStackSize();
+ int spOffset = MFI->getObjectOffset(FrameIndex);
+
+ DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ dbgs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n"
+ << "isFixed : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+ << "isLiveIn : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+ << "isSpill : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+ << "\n" );
+
+ // as explained on LowerFormalArguments, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+ Offset += MI.getOperand(OFIOperandNum).getImm();
+
+ DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
+ // Set the stack offset where GP must be saved/loaded from.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->needGPSaveRestore())
+ MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 000000000000..497f3866c9ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,71 @@
+//===-- MBlazeRegisterInfo.h - MBlaze Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// MBlazeRegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+ };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+ const MBlazeSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+ const TargetInstrInfo &tii);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ /// Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 000000000000..64cae5cff85d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,148 @@
+//===-- MBlazeRegisterInfo.td - MBlaze Register defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+ field bits<15> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+ let Num = num;
+}
+
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+ // General Purpose Registers
+ def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>;
+ def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>;
+ def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>;
+ def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>;
+ def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>;
+ def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>;
+ def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>;
+ def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>;
+ def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>;
+ def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>;
+ def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>;
+ def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>;
+ def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>;
+ def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>;
+ def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>;
+ def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>;
+ def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>;
+ def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>;
+ def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>;
+ def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>;
+ def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
+ def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
+
+ // Special Purpose Registers
+ def RPC : MBlazeSPRReg<0x0000, "rpc">, DwarfRegNum<[32]>;
+ def RMSR : MBlazeSPRReg<0x0001, "rmsr">, DwarfRegNum<[33]>;
+ def REAR : MBlazeSPRReg<0x0003, "rear">, DwarfRegNum<[34]>;
+ def RESR : MBlazeSPRReg<0x0005, "resr">, DwarfRegNum<[35]>;
+ def RFSR : MBlazeSPRReg<0x0007, "rfsr">, DwarfRegNum<[36]>;
+ def RBTR : MBlazeSPRReg<0x000B, "rbtr">, DwarfRegNum<[37]>;
+ def REDR : MBlazeSPRReg<0x000D, "redr">, DwarfRegNum<[38]>;
+ def RPID : MBlazeSPRReg<0x1000, "rpid">, DwarfRegNum<[39]>;
+ def RZPR : MBlazeSPRReg<0x1001, "rzpr">, DwarfRegNum<[40]>;
+ def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>;
+ def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+ def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+ def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+ def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>;
+ def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>;
+ def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>;
+ def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>;
+ def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>;
+ def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>;
+ def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>;
+ def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>;
+ def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>;
+ def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>;
+ def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+ def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
+
+ // The carry bit. In the Microblaze this is really bit 29 of the
+ // MSR register but this is the only bit of that register that we
+ // are interested in modeling.
+ def CARRY : MBlazeSPRReg<0x0000, "rmsr[c]">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
+
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
+ // Reserved
+ RPC,
+ RMSR,
+ REAR,
+ RESR,
+ RFSR,
+ RBTR,
+ REDR,
+ RPID,
+ RZPR,
+ RTLBX,
+ RTLBLO,
+ RTLBHI,
+ RPVR0,
+ RPVR1,
+ RPVR2,
+ RPVR3,
+ RPVR4,
+ RPVR5,
+ RPVR6,
+ RPVR7,
+ RPVR8,
+ RPVR9,
+ RPVR10,
+ RPVR11
+ )>
+{
+ // None of the special purpose registers are allocatable.
+ let isAllocatable = 0;
+}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 000000000000..6387ee23ec9b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===-- MBlazeRelocations.h - MBlaze Code Relocations -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace MBlaze {
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 000000000000..cd5691ce644f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,50 @@
+//===-- MBlazeSchedule.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze functional units.
+//===----------------------------------------------------------------------===//
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIC_ALU : InstrItinClass;
+def IIC_ALUm : InstrItinClass;
+def IIC_ALUd : InstrItinClass;
+def IIC_SHT : InstrItinClass;
+def IIC_FSLg : InstrItinClass;
+def IIC_FSLp : InstrItinClass;
+def IIC_MEMs : InstrItinClass;
+def IIC_MEMl : InstrItinClass;
+def IIC_FPU : InstrItinClass;
+def IIC_FPUd : InstrItinClass;
+def IIC_FPUf : InstrItinClass;
+def IIC_FPUi : InstrItinClass;
+def IIC_FPUs : InstrItinClass;
+def IIC_FPUc : InstrItinClass;
+def IIC_BR : InstrItinClass;
+def IIC_BRc : InstrItinClass;
+def IIC_BRl : InstrItinClass;
+def IIC_WDC : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 000000000000..20257a60a0fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===-- MBlazeSchedule3.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+ [IF,ID,EX], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes three cycles. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the execute stage, which takes 34 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the execute stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<34,[EX]>], // 34 cycles in execute stage
+ [ 35 // result ready after 35 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages
+ // except the execute stage, which takes two cycles. The two source operands
+ // are read during the decode stage and the result is ready after the execute
+ // stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the execute stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the execute stage, which takes six cycles. The
+ // source operands are read during the decode stage and the results are ready
+ // after the execute stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 30
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<30,[EX]>], // one cycle in execute stage
+ [ 31 // result ready after 31 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes seven cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<7,[EX]>], // seven cycles in execute stage
+ [ 8 // result ready after eight cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes six cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 29
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<29,[EX]>], // 29 cycles in execute stage
+ [ 30 // result ready after 30 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages except the execute stage, which takes three
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages except the execute stage, which takes two cycles.
+ // The one source operand is read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. The two source operands are read during the decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. All of the source operands are read during the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes two cycles. All of
+ // the source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 000000000000..ab53b424ded3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===-- MBlazeSchedule5.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+ [IF,ID,EX,MA,WB], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. The two source operands are read during the decode stage
+ // and the result is ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the memory access stage, which takes 31 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the memory access stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<31,[MA]> // 31 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 33 // result ready after 33 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages.
+ // The two source operands are read during the decode stage and the result is
+ // ready after the memory access stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the writeback stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the memory access stage, which takes two
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 26
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<26,[MA]> // 26 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 29 // result ready after 29 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes three cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<3,[MA]> // three cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 6 // result ready after six cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes two cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 25
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<25,[MA]> // 25 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 28 // result ready after 28 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages. The source operands are read during the
+ // decode stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages. The one source operand is read during the decode
+ // stage and the result is ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. The two source operands are read during the
+ // decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. All of the source operands are read during
+ // the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. All of the source operands are read during the decode
+ // stage and the result is ready after the writeback stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6a115b22bd4a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeTargetMachine.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 000000000000..9f8e2aad4b20
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeTargetMachine;
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
+ ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 000000000000..dc2ad29be2a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,56 @@
+//===-- MBlazeSubtarget.cpp - MBlaze Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS):
+ MBlazeGenSubtargetInfo(TT, CPU, FS),
+ HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+ HasFPU(false), HasMul64(false), HasSqrt(false)
+{
+ // Parse features string.
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mblaze";
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Only use instruction scheduling if the selected CPU has an instruction
+ // itinerary (the default CPU is the only one that doesn't).
+ HasItin = CPUName != "mblaze";
+ DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+ return HasItin && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 000000000000..ed43d21f30c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,75 @@
+//===-- MBlazeSubtarget.h - Define Subtarget for the MBlaze ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
+
+protected:
+ bool HasBarrel;
+ bool HasDiv;
+ bool HasMul;
+ bool HasPatCmp;
+ bool HasFPU;
+ bool HasMul64;
+ bool HasSqrt;
+ bool HasItin;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// Compute the number of maximum number of issues per cycle for the
+ /// MBlaze scheduling itineraries.
+ void computeIssueWidth();
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ bool hasItin() const { return HasItin; }
+ bool hasPCMP() const { return HasPatCmp; }
+ bool hasFPU() const { return HasFPU; }
+ bool hasSqrt() const { return HasSqrt; }
+ bool hasMul() const { return HasMul; }
+ bool hasMul64() const { return HasMul64; }
+ bool hasDiv() const { return HasDiv; }
+ bool hasBarrel() const { return HasBarrel; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 000000000000..bcdd32fed947
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,81 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+ // Register the target.
+ RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DL("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+namespace {
+/// MBlaze Code Generator Pass Configuration Options.
+class MBlazePassConfig : public TargetPassConfig {
+public:
+ MBlazePassConfig(MBlazeTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MBlazeTargetMachine &getMBlazeTargetMachine() const {
+ return getTM<MBlazeTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MBlazePassConfig(this, PM);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazePassConfig::addInstSelector() {
+ addPass(createMBlazeISelDag(getMBlazeTargetMachine()));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazePassConfig::addPreEmitPass() {
+ addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 000000000000..956794dddaf9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,80 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MBlazeTargetMachine : public LLVMTargetMachine {
+ MBlazeSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ MBlazeFrameLowering FrameLowering;
+ MBlazeTargetLowering TLInfo;
+ MBlazeSelectionDAGInfo TSInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ InstrItineraryData InstrItins;
+
+ public:
+ MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const MBlazeInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const
+ { return &InstrItins; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+
+ virtual const MBlazeSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+
+ virtual const DataLayout *getDataLayout() const
+ { return &DL;}
+
+ virtual const MBlazeRegisterInfo *getRegisterInfo() const
+ { return &InstrInfo.getRegisterInfo(); }
+
+ virtual const MBlazeTargetLowering *getTargetLowering() const
+ { return &TLInfo; }
+
+ virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+ { return &TSInfo; }
+
+ const TargetIntrinsicInfo *getIntrinsicInfo() const
+ { return &IntrinsicInfo; }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 000000000000..a7a0a68b1612
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,90 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 000000000000..c313722427db
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
new file mode 100644
index 000000000000..6f9752c42951
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -0,0 +1,171 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_1: return 1;
+ case FK_PCRel_2:
+ case FK_Data_2: return 2;
+ case FK_PCRel_4:
+ case FK_Data_4: return 4;
+ case FK_Data_8: return 8;
+ }
+}
+
+
+namespace {
+
+class MBlazeAsmBackend : public MCAsmBackend {
+public:
+ MBlazeAsmBackend(const Target &T)
+ : MCAsmBackend() {
+ }
+
+ unsigned getNumFixupKinds() const {
+ return 2;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ unsigned getPointerSize() const {
+ return 4;
+ }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case MBlaze::ADDIK: return MBlaze::ADDIK32;
+ case MBlaze::ORI: return MBlaze::ORI32;
+ case MBlaze::BRLID: return MBlaze::BRLID32;
+ }
+}
+
+bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+ bool hasExprOrImm = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+ return hasExprOrImm;
+}
+
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME: Is this right? It's what the "generic" code was doing before,
+ // but is X86 specific. Is it actually true for MBlaze also, or was it
+ // just close enough to not be a big deal?
+ //
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+void MBlazeAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ Res = Inst;
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if ((Count % 4) != 0)
+ return false;
+
+ for (uint64_t i = 0; i < Count; i += 4)
+ OW->Write32(0x00000000);
+
+ return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+ uint8_t OSABI;
+ ELFMBlazeAsmBackend(const Target &T, uint8_t _OSABI)
+ : MBlazeAsmBackend(T), OSABI(_OSABI) { }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createMBlazeELFObjectWriter(OS, OSABI);
+ }
+};
+
+void ELFMBlazeAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned Size = getFixupKindSize(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ char *data = Data + Fixup.getOffset();
+ switch (Size) {
+ default: llvm_unreachable("Cannot fixup unknown value.");
+ case 1: llvm_unreachable("Cannot fixup 1 byte value.");
+ case 8: llvm_unreachable("Cannot fixup 8 byte value.");
+
+ case 4:
+ *(data+7) = uint8_t(Value);
+ *(data+6) = uint8_t(Value >> 8);
+ *(data+3) = uint8_t(Value >> 16);
+ *(data+2) = uint8_t(Value >> 24);
+ break;
+
+ case 2:
+ *(data+3) = uint8_t(Value >> 0);
+ *(data+2) = uint8_t(Value >> 8);
+ }
+}
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ assert(0 && "Mac not supported on MBlaze");
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on MBlaze");
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFMBlazeAsmBackend(T, OSABI);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
new file mode 100644
index 000000000000..437026e7bbc0
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -0,0 +1,237 @@
+//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the MBlaze target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeBASEINFO_H
+#define MBlazeBASEINFO_H
+
+#include "MBlazeMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FRR,
+ FormMask = 63
+
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ // MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ // MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ // MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ // MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ // MO_ABS_HILO
+
+ };
+}
+
+static inline bool isMBlazeRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+static inline bool isSpecialMBlazeRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
+ default: llvm_unreachable("Unknown register number!");
+ }
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+}
+
+static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
new file mode 100644
index 000000000000..2824b3c35cf1
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
@@ -0,0 +1,77 @@
+//===-- MBlazeELFObjectWriter.cpp - MBlaze ELF Writer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ MBlazeELFObjectWriter(uint8_t OSABI);
+
+ virtual ~MBlazeELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ };
+}
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MBLAZE,
+ /*HasRelocationAddend*/ false) {}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case FK_PCRel_4:
+ Type = ELF::R_MICROBLAZE_64_PCREL;
+ break;
+ case FK_PCRel_2:
+ Type = ELF::R_MICROBLAZE_32_PCREL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ((IsRelocWithSymbol || Addend !=0)
+ ? ELF::R_MICROBLAZE_32
+ : ELF::R_MICROBLAZE_64);
+ break;
+ case FK_Data_2:
+ Type = ELF::R_MICROBLAZE_32;
+ break;
+ }
+ }
+ return Type;
+}
+
+
+
+MCObjectWriter *llvm::createMBlazeELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new MBlazeELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/ false);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
new file mode 100644
index 000000000000..8231f07dfa80
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,26 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+void MBlazeMCAsmInfo::anchor() { }
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+ IsLittleEndian = false;
+ StackGrowsUp = false;
+ SupportsDebugInformation = true;
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ GPRel32Directive = "\t.gpword\t";
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
new file mode 100644
index 000000000000..977f9a6866d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MBlazeMCAsmInfo.h - MBlaze asm properties --------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class MBlazeMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit MBlazeMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 000000000000..8faff6ade441
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,222 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+ MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const MBlazeMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+
+public:
+ MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii) {
+ }
+
+ ~MBlazeMCCodeEmitter() {}
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+ unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+ // FIXME: getMBlazeRegisterNumbering() is sufficient?
+ llvm_unreachable("MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet "
+ "implemented.");
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ // The MicroBlaze uses a bit reversed format so we need to reverse the
+ // order of the bits. Taken from:
+ // http://graphics.stanford.edu/~seander/bithacks.html
+ C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ assert(Size <= 8 && "size too big in emit constant");
+
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+ void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO) const {
+ if (MO.isReg())
+ return getMBlazeRegisterNumbering(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ if (MO.isExpr())
+ return 0; // The relocation has already been recorded at this point.
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable(0);
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+ int32_t val = (int32_t)imm.getImm();
+ if (val > 32767 || val < -32768) {
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+ EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+ switch (MI.getOpcode()) {
+ default: break;
+
+ case MBlaze::ADDIK32:
+ case MBlaze::ORI32:
+ case MBlaze::BRLID32:
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+ raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+ MCOperand oper = MI.getOperand(opNo);
+
+ if (oper.isImm()) {
+ EmitIMM(oper, CurByte, OS);
+ } else if (oper.isExpr()) {
+ MCFixupKind FixupKind;
+ switch (MI.getOpcode()) {
+ default:
+ FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ case MBlaze::ORI32:
+ case MBlaze::ADDIK32:
+ case MBlaze::BRLID32:
+ FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ }
+ }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Emit an IMM instruction if the instruction we are encoding requires it
+ EmitIMM(MI,CurByte,OS);
+
+ switch ((TSFlags & MBlazeII::FormMask)) {
+ default: break;
+ case MBlazeII::FPseudo:
+ // Pseudo instructions don't get encoded.
+ return;
+ case MBlazeII::FRRI:
+ EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRIR:
+ EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FCRI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRCI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ case MBlazeII::FCCI:
+ EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+ break;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted
+ unsigned Value = getBinaryCodeForInstr(MI);
+ EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 000000000000..380750d50f4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,141 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMBlazeMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMBlazeMCRegisterInfo(X, MBlaze::R15);
+ return X;
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return new MBlazeMCAsmInfo();
+ }
+}
+
+static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ }
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ }
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI, MII, MRI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget,
+ createMBlazeMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget,
+ createMBlazeMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+ createMBlazeMCSubtargetInfo);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 000000000000..7bc7d8f20724
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,56 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCContext;
+class MCCodeEmitter;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+
+extern Target TheMBlazeTarget;
+
+MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+
+MCObjectWriter *createMBlazeELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 000000000000..323a7f647d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+ RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 000000000000..4b12aeadd3e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,114 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430InstPrinter.h"
+#include "MSP430.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << '#' << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+ // Print displacement first
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Base.getReg())
+ O << '&';
+
+ if (Disp.isExpr())
+ O << *Disp.getExpr();
+ else {
+ assert(Disp.isImm() && "Expected immediate in displacement field");
+ O << Disp.getImm();
+ }
+
+ // Print register base field
+ if (Base.getReg())
+ O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CC = MI->getOperand(OpNo).getImm();
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported CC code");
+ case MSP430CC::COND_E:
+ O << "eq";
+ break;
+ case MSP430CC::COND_NE:
+ O << "ne";
+ break;
+ case MSP430CC::COND_HS:
+ O << "hs";
+ break;
+ case MSP430CC::COND_LO:
+ O << "lo";
+ break;
+ case MSP430CC::COND_GE:
+ O << "ge";
+ break;
+ case MSP430CC::COND_L:
+ O << 'l';
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 000000000000..d32eb3a21a37
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,44 @@
+//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MSP430InstPrinter : public MCInstPrinter {
+ public:
+ MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
new file mode 100644
index 000000000000..3c9576056946
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -0,0 +1,31 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+void MSP430MCAsmInfo::anchor() { }
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+ PointerSize = CalleeSaveStackSlotSize = 2;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective ="\t.weak\t";
+ PCSymbol=".";
+ CommentString = ";";
+
+ AlignmentIsInBytes = false;
+ AllowNameToStartWithDigit = true;
+ UsesELFSectionDirectiveForBSS = true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
new file mode 100644
index 000000000000..e5c2fc283b17
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class StringRef;
+ class Target;
+
+ class MSP430MCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 000000000000..530e6aae92fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,94 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430MCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMSP430MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ return X;
+}
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(MAI, MII, MRI);
+ return 0;
+}
+
+extern "C" void LLVMInitializeMSP430TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target,
+ createMSP430MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMSP430Target,
+ createMSP430MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+ createMSP430MCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 000000000000..7f3505ca5514
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,36 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MCTARGETDESC_H
+#define MSP430MCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
new file mode 100644
index 000000000000..4574ce5f98b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,47 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace MSP430CC {
+ // MSP430 specific condition code.
+ enum CondCodes {
+ COND_E = 0, // aka COND_Z
+ COND_NE = 1, // aka COND_NZ
+ COND_HS = 2, // aka COND_C
+ COND_LO = 3, // aka COND_NC
+ COND_GE = 4,
+ COND_L = 5,
+
+ COND_INVALID = -1
+ };
+}
+
+namespace llvm {
+ class MSP430TargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createMSP430BranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
new file mode 100644
index 000000000000..c6796b3789ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,66 @@
+//===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+ "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo;
+
+def MSP430InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+ let InstructionSet = MSP430InstrInfo;
+ let AssemblyWriters = [MSP430InstPrinter];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..0a04e5ddb75d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,167 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+namespace {
+ class MSP430AsmPrinter : public AsmPrinter {
+ public:
+ MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "MSP430 Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char* Modifier = 0);
+ void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Not implemented yet!");
+ case MachineOperand::MO_Register:
+ O << MSP430InstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << '#';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ uint64_t Offset = MO.getOffset();
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << (isMemOp ? '&' : '#');
+ if (Offset)
+ O << '(' << Offset << '+';
+
+ O << *Mang->getSymbol(MO.getGlobal());
+
+ if (Offset)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ O << (isMemOp ? '&' : '#');
+ O << MAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+ // Print displacement first
+
+ // Imm here is in fact global address - print extra modifier.
+ if (Disp.isImm() && !Base.getReg())
+ O << '&';
+ printOperand(MI, OpNum+1, O, "nohash");
+
+ // Print register base field
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ O << ')';
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ return true; // Unknown modifier.
+ }
+ printSrcMemOperand(MI, OpNo, O);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MSP430MCInstLower MCInstLowering(OutContext, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+ RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
new file mode 100644
index 000000000000..f128427f8066
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -0,0 +1,180 @@
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 10 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-branch-select"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct MSP430BSel : public MachineFunctionPass {
+ static char ID;
+ MSP430BSel() : MachineFunctionPass(ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "MSP430 Branch Selector";
+ }
+ };
+ char MSP430BSel::ID = 0;
+}
+
+/// createMSP430BranchSelectionPass - returns an instance of the Branch
+/// Selection Pass
+///
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+ return new MSP430BSel();
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 9)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+6
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
+ I->getOpcode() != MSP430::JMP) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(0).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<10>(BranchSize)) {
+ MBBStartOffset += 2;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ unsigned NewSize;
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == MSP430::JMP) {
+ NewSize = 4;
+ } else {
+ // The BCC operands are:
+ // 0. MSP430 branch predicate
+ // 1. Target MBB
+ SmallVector<MachineOperand, 1> Cond;
+ Cond.push_back(I->getOperand(1));
+
+ // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
+ TII->ReverseBranchCondition(Cond);
+ BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
+ .addImm(4).addOperand(Cond[0]);
+
+ NewSize = 6;
+ }
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is NewSize bytes, increase the size of the
+ // block by NewSize-2, remember to iterate.
+ BlockSizes[MBB.getNumber()] += NewSize-2;
+ MBBStartOffset += NewSize;
+
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 000000000000..b448cc4ed9b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,40 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+ // i8 are returned in registers R15B, R14B, R13B, R12B
+ CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+ // i16 are returned in registers R15, R14, R13, R12
+ CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+ // Pass by value if the byval attribute is given
+ CCIfByVal<CCPassByVal<2, 2>>,
+
+ // Promote i8 arguments to i16.
+ CCIfType<[i8], CCPromoteToType<i16>>,
+
+ // The first 4 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+ // Integer values get stored in stack slots that are 2 bytes in
+ // size and 2-byte aligned.
+ CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 000000000000..e504011dfdc8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,297 @@
+//===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ if (CSSize) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(CSSize);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
+
+void MSP430FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ unsigned StackAlign = getStackAlignment();
+
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
+ // Create a frame entry for the FPW register that must be saved.
+ if (hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 000000000000..c673f59b5efc
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,59 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+ const MSP430Subtarget &STI;
+
+public:
+ explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..1566c096037e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,492 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+ struct MSP430ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ int16_t Disp;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ MSP430ISelAddressMode()
+ : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+ ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ void dump() {
+ errs() << "MSP430ISelAddressMode " << this << '\n';
+ if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+ errs() << "Base.Reg ";
+ Base.Reg.getNode()->dump();
+ } else if (BaseType == FrameIndexBase) {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ errs() << " Disp " << Disp << '\n';
+ if (GV) {
+ errs() << "GV ";
+ GV->dump();
+ } else if (CP) {
+ errs() << " CP ";
+ CP->dump();
+ errs() << " Align" << Align << '\n';
+ } else if (ES) {
+ errs() << "ES ";
+ errs() << ES << '\n';
+ } else if (JT != -1)
+ errs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class MSP430DAGToDAGISel : public SelectionDAGISel {
+ const MSP430TargetLowering &Lowering;
+ const MSP430Subtarget &Subtarget;
+
+ public:
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual const char *getPassName() const {
+ return "MSP430 DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
+ virtual bool
+ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Include the pieces autogenerated from the target description.
+ #include "MSP430GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectIndexedLoad(SDNode *Op);
+ SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16);
+
+ bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
+ };
+} // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference. If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ //AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ //AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ //AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ //AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+ return false;
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = MSP430ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ AM.Disp += Val;
+ return false;
+ }
+
+ case MSP430ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::ADD: {
+ MSP430ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM))
+ return false;
+ AM = Backup;
+
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ MSP430ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
+ SDValue &Base, SDValue &Disp) {
+ MSP430ISelAddressMode AM;
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
+ MVT::i16, AM.Disp,
+ 0/*AM.SymbolFlags*/);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+ AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, 0,
+ 0/*AM.SymbolFlags*/);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+
+ return true;
+}
+
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+ return false;
+
+ break;
+ case MVT::i16:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+ return false;
+
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+
+ unsigned Opcode = 0;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opcode = MSP430::MOV8rm_POST;
+ break;
+ case MVT::i16:
+ Opcode = MSP430::MOV16rm_POST;
+ break;
+ default:
+ return NULL;
+ }
+
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
+ VT, MVT::i16, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
+ SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16) {
+ if (N1.getOpcode() == ISD::LOAD &&
+ N1.hasOneUse() &&
+ IsLegalToFold(N1, Op, Op, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N1);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+ unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+ SDNode *ResNode =
+ CurDAG->SelectNodeTo(Op, Opc,
+ VT, MVT::i16, MVT::Other,
+ Ops0, 3);
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ // Transfer chain.
+ ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+ // Transfer writeback.
+ ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+ return ResNode;
+ }
+
+ return NULL;
+}
+
+
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== ";
+ Node->dump(CurDAG);
+ errs() << "\n");
+ return NULL;
+ }
+
+ // Few custom selection stuff.
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ assert(Node->getValueType(0) == MVT::i16);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ }
+ case ISD::LOAD:
+ if (SDNode *ResNode = SelectIndexedLoad(Node))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ case ISD::ADD:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SUB:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::AND:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::OR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::XOR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 000000000000..09cdf3268553
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,1247 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430Subtarget.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef enum {
+ NoHWMult,
+ HWMultIntr,
+ HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+ cl::desc("Hardware multiplier use mode"),
+ cl::init(HWMultNoIntr),
+ cl::values(
+ clEnumValN(NoHWMult, "no",
+ "Do not use hardware multiplier"),
+ clEnumValN(HWMultIntr, "interrupts",
+ "Assume hardware multiplier can be used inside interrupts"),
+ clEnumValN(HWMultNoIntr, "use",
+ "Assume hardware multiplier cannot be used inside interrupts"),
+ clEnumValEnd));
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+ TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()) {
+
+ TD = getDataLayout();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
+ addRegisterClass(MVT::i16, &MSP430::GR16RegClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // We have post-incremented loads / stores.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // We don't have any truncstores
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+ setOperationAction(ISD::CTTZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+ // varargs support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+ // Libcalls names.
+ if (HWMultMode == HWMultIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+ } else if (HWMultMode == HWMultNoIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+ }
+
+ setMinFunctionAlignment(1);
+ setPrefFunctionAlignment(2);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::SHL: // FALLTHROUGH
+ case ISD::SRL:
+ case ISD::SRA: return LowerShifts(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, &MSP430::GR8RegClass);
+
+ return std::make_pair(0U, &MSP430::GR16RegClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ if (Ins.empty())
+ return Chain;
+ report_fatal_error("ISRs cannot have arguments");
+ }
+}
+
+SDValue
+MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ // MSP430 target does not yet support tail call optimization.
+ isTailCall = false;
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ report_fatal_error("ISRs cannot be called directly");
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+
+ // Create frame index for the start of the first vararg value
+ if (isVarArg) {
+ unsigned Offset = CCInfo.getNextStackOffset();
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, Offset, true));
+ }
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i16:
+ unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+ // If this is an 8-bit value, it is really passed promoted to 16
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ }
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+
+ SDValue InVal;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), true);
+ InVal = DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(InVal);
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // ISRs cannot return any value.
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty())
+ report_fatal_error("ISRs cannot return any value");
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+ MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size());
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+ SDValue MemOp;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
+ MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile*/false,
+ /*AlwaysInline=*/true,
+ MachinePointerInfo(),
+ MachinePointerInfo());
+ } else {
+ MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ MemOpChains.push_back(MemOp);
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ SDNode* N = Op.getNode();
+ EVT VT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand non-constant shifts to loops:
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ switch (Opc) {
+ default: llvm_unreachable("Invalid shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(MSP430ISD::SHL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(MSP430ISD::SRA, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(MSP430ISD::SRL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ }
+
+ uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+ // Expand the stuff into sequence of shifts.
+ // FIXME: for some shift amounts this might be done better!
+ // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+ SDValue Victim = N->getOperand(0);
+
+ if (Opc == ISD::SRL && ShiftAmount) {
+ // Emit a special goodness here:
+ // srl A, 1 => clrc; rrc A
+ Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ ShiftAmount -= 1;
+ }
+
+ while (ShiftAmount--)
+ Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+ dl, VT, Victim);
+
+ return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ // Create the TargetGlobalAddress node, folding in the constant offset.
+ SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ getPointerTy(), Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+ getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
+ ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // FIXME: Handle bittests someday
+ assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+ // FIXME: Handle jump negative someday
+ MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID;
+ switch (CC) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ:
+ TCC = MSP430CC::COND_E; // aka COND_Z
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETNE:
+ TCC = MSP430CC::COND_NE; // aka COND_NZ
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETULE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETUGE:
+ // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_LO;
+ break;
+ }
+ TCC = MSP430CC::COND_HS; // aka COND_C
+ break;
+ case ISD::SETUGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETULT:
+ // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_HS;
+ break;
+ }
+ TCC = MSP430CC::COND_LO; // aka COND_NC
+ break;
+ case ISD::SETLE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETGE:
+ // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+ TCC = MSP430CC::COND_GE;
+ break;
+ case ISD::SETGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETLT:
+ // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_GE;
+ break;
+ }
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+
+ TargetCC = DAG.getConstant(TCC, MVT::i8);
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+ Chain, Dest, TargetCC, Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we are doing an AND and testing against zero, then the CMP
+ // will not be generated. The AND (or BIT) will generate the condition codes,
+ // but they are different from CMP.
+ // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
+ // lowering & isel wouldn't diverge.
+ bool andCC = false;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND))) {
+ andCC = true;
+ }
+ }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ // Get the condition codes directly from the status register, if its easy.
+ // Otherwise a branch will be generated. Note that the AND and BIT
+ // instructions generate different flags than CMP, the carry bit can be used
+ // for NE/EQ.
+ bool Invert = false;
+ bool Shift = false;
+ bool Convert = true;
+ switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+ default:
+ Convert = false;
+ break;
+ case MSP430CC::COND_HS:
+ // Res = SRW & 1, no processing is required
+ break;
+ case MSP430CC::COND_LO:
+ // Res = ~(SRW & 1)
+ Invert = true;
+ break;
+ case MSP430CC::COND_NE:
+ if (andCC) {
+ // C = ~Z, thus Res = SRW & 1, no processing is required
+ } else {
+ // Res = ~((SRW >> 1) & 1)
+ Shift = true;
+ Invert = true;
+ }
+ break;
+ case MSP430CC::COND_E:
+ Shift = true;
+ // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+ // Res = (SRW >> 1) & 1 is 1 word shorter.
+ break;
+ }
+ EVT VT = Op.getValueType();
+ SDValue One = DAG.getConstant(1, VT);
+ if (Convert) {
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ MVT::i16, Flag);
+ if (Shift)
+ // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+ SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+ SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+ if (Invert)
+ SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+ return SR;
+ } else {
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(One);
+ Ops.push_back(Zero);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ }
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Val = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(VT == MVT::i16 && "Only support i16 for now!");
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+ DAG.getValueType(Val.getValueType()));
+}
+
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ MSP430::FPW, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+
+ // Frame index of first vararg argument
+ SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Create a store of the frame index to the location operand
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), FrameIndex,
+ Op.getOperand(1), MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+ if (VT != MVT::i8 && VT != MVT::i16)
+ return false;
+
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ uint64_t RHSC = RHS->getZExtValue();
+ if ((VT == MVT::i16 && RHSC != 2) ||
+ (VT == MVT::i8 && RHSC != 1))
+ return false;
+
+ Base = Op->getOperand(0);
+ Offset = DAG.getConstant(RHSC, VT);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
+ return false;
+}
+
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
+ case MSP430ISD::RRA: return "MSP430ISD::RRA";
+ case MSP430ISD::RLA: return "MSP430ISD::RLA";
+ case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::CALL: return "MSP430ISD::CALL";
+ case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
+ case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
+ case MSP430ISD::CMP: return "MSP430ISD::CMP";
+ case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ case MSP430ISD::SHL: return "MSP430ISD::SHL";
+ case MSP430ISD::SRA: return "MSP430ISD::SRA";
+ }
+}
+
+bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
+ Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+}
+
+bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+
+ return (VT1.getSizeInBits() > VT2.getSizeInBits());
+}
+
+bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+}
+
+bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+}
+
+bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RI = F->getRegInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ const TargetRegisterClass * RC;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid shift opcode!");
+ case MSP430::Shl8:
+ Opc = MSP430::SHL8r1;
+ RC = &MSP430::GR8RegClass;
+ break;
+ case MSP430::Shl16:
+ Opc = MSP430::SHL16r1;
+ RC = &MSP430::GR16RegClass;
+ break;
+ case MSP430::Sra8:
+ Opc = MSP430::SAR8r1;
+ RC = &MSP430::GR8RegClass;
+ break;
+ case MSP430::Sra16:
+ Opc = MSP430::SAR16r1;
+ RC = &MSP430::GR16RegClass;
+ break;
+ case MSP430::Srl8:
+ Opc = MSP430::SAR8r1c;
+ RC = &MSP430::GR8RegClass;
+ break;
+ case MSP430::Srl16:
+ Opc = MSP430::SAR16r1c;
+ RC = &MSP430::GR16RegClass;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // Create loop block
+ MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(I, LoopBB);
+ F->insert(I, RemBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the block containing instructions after shift.
+ RemBB->splice(RemBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ BB->addSuccessor(LoopBB);
+ BB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass);
+ unsigned ShiftReg = RI.createVirtualRegister(RC);
+ unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+ unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // BB:
+ // cmp 0, N
+ // je RemBB
+ BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+ .addReg(ShiftAmtSrcReg).addImm(0);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(RemBB)
+ .addImm(MSP430CC::COND_E);
+
+ // LoopBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // ShiftReg2 = shift ShiftReg
+ // ShiftAmt2 = ShiftAmt - 1;
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+ .addReg(ShiftAmtSrcReg).addMBB(BB)
+ .addReg(ShiftAmtReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
+ BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+ .addReg(ShiftAmtReg).addImm(1);
+ BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+ .addMBB(LoopBB)
+ .addImm(MSP430CC::COND_NE);
+
+ // RemBB:
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+ BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return RemBB;
+}
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ return EmitShiftInstr(MI, BB);
+
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 000000000000..e0ed870f5653
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,178 @@
+//===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MSP430ISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// Same as RET_FLAG, but used for returning from ISRs.
+ RETI_FLAG,
+
+ /// Y = R{R,L}A X, rotate right (left) arithmetically
+ RRA, RLA,
+
+ /// Y = RRC X, rotate right via carry
+ RRC,
+
+ /// CALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+ /// and TargetGlobalAddress.
+ Wrapper,
+
+ /// CMP - Compare instruction.
+ CMP,
+
+ /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// instruction.
+ BR_CC,
+
+ /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+ /// is condition code and operand 4 is flag operand.
+ SELECT_CC,
+
+ /// SHL, SRA, SRL - Non-constant shifts.
+ SHL, SRA, SRL
+ };
+ }
+
+ class MSP430Subtarget;
+ class MSP430TargetMachine;
+
+ class MSP430TargetLowering : public TargetLowering {
+ public:
+ explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of type
+ /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
+ /// register R15W to i8 by referencing its sub-register R15B.
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a value
+ /// of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in unknown
+ /// ways, such as incoming arguments, or copies from unknown virtual
+ /// registers. Also, if isTruncateFree(Ty2, Ty1) is true, this does not
+ /// necessarily apply to truncate instructions. e.g. on msp430, all
+ /// instructions that define 8-bit values implicit zero-extend the result
+ /// out to 16 bits.
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ private:
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ const MSP430Subtarget &Subtarget;
+ const DataLayout *TD;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 000000000000..a9e87dad0cd8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,211 @@
+//===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MSP430 instructions format here
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def PseudoFrm : Format<0>;
+def SingleOpFrm : Format<1>;
+def DoubleOpFrm : Format<2>;
+def CondJumpFrm : Format<3>;
+
+class SourceMode<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def SrcReg : SourceMode<0>;
+def SrcMem : SourceMode<1>;
+def SrcIndReg : SourceMode<2>;
+def SrcPostInc : SourceMode<3>;
+def SrcImm : SourceMode<3>;
+
+class DestMode<bit val> {
+ bit Value = val;
+}
+
+def DstReg : DestMode<0>;
+def DstMem : DestMode<1>;
+
+class SizeVal<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def SizeUnknown : SizeVal<0>; // Unknown / unset size
+def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
+def Size2Bytes : SizeVal<2>;
+def Size4Bytes : SizeVal<3>;
+def Size6Bytes : SizeVal<4>;
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
+ string asmstr> : Instruction {
+ field bits<16> Inst;
+
+ let Namespace = "MSP430";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ Format Form = f;
+ SizeVal Sz = sz;
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
+ let TSFlags{1-0} = Form.Value;
+ let TSFlags{4-2} = Sz.Value;
+
+ let AsmString = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ DestMode ad = dest;
+ SourceMode as = src;
+
+ let Inst{12-15} = opcode;
+ let Inst{7} = ad.Value;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IForm instructions
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class I8rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I8ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I8mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IForm instructions
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class I16rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I16ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I16mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ SourceMode as = src;
+
+ let Inst{7-15} = opcode;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IIForm instructions
+class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class II8r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II8m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II8i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IIForm instructions
+class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class II16r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II16m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II16i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{13-15} = opcode;
+ let Inst{10-12} = cond;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+ let Pattern = pattern;
+ let Inst{15-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 000000000000..a6b5f2f6d0bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,326 @@
+//===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430InstrInfo.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+ : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ RI(tm, *this) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV16rr;
+ else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV8rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != MSP430::JMP &&
+ I->getOpcode() != MSP430::JCC &&
+ I->getOpcode() != MSP430::Br &&
+ I->getOpcode() != MSP430::Bm)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+bool MSP430InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+ MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
+
+ switch (CC) {
+ default: llvm_unreachable("Invalid branch condition!");
+ case MSP430CC::COND_E:
+ CC = MSP430CC::COND_NE;
+ break;
+ case MSP430CC::COND_NE:
+ CC = MSP430CC::COND_E;
+ break;
+ case MSP430CC::COND_L:
+ CC = MSP430CC::COND_GE;
+ break;
+ case MSP430CC::COND_GE:
+ CC = MSP430CC::COND_L;
+ break;
+ case MSP430CC::COND_HS:
+ CC = MSP430CC::COND_LO;
+ break;
+ case MSP430CC::COND_LO:
+ CC = MSP430CC::COND_HS;
+ break;
+ }
+
+ Cond[0].setImm(CC);
+ return false;
+}
+
+bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->isBranch())
+ return true;
+
+ // Cannot handle indirect branches.
+ if (I->getOpcode() == MSP430::Br ||
+ I->getOpcode() == MSP430::Bm)
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == MSP430::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
+ MSP430CC::CondCodes BranchCode =
+ static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
+ if (BranchCode == MSP430CC::COND_INVALID)
+ return true; // Can't handle weird stuff.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "MSP430 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+
+ switch (Desc.TSFlags & MSP430II::SizeMask) {
+ default:
+ switch (Desc.getOpcode()) {
+ default: llvm_unreachable("Unknown instruction size!");
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
+ }
+ }
+ case MSP430II::SizeSpecial:
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown instruction size!");
+ case MSP430::SAR8r1c:
+ case MSP430::SAR16r1c:
+ return 4;
+ }
+ case MSP430II::Size2Bytes:
+ return 2;
+ case MSP430II::Size4Bytes:
+ return 4;
+ case MSP430II::Size6Bytes:
+ return 6;
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 000000000000..d79f99245e71
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,91 @@
+//===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+/// MSP430II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MSP430II {
+ enum {
+ SizeShift = 2,
+ SizeMask = 7 << SizeShift,
+
+ SizeUnknown = 0 << SizeShift,
+ SizeSpecial = 1 << SizeShift,
+ Size2Bytes = 2 << SizeShift,
+ Size4Bytes = 3 << SizeShift,
+ Size6Bytes = 4 << SizeShift
+ };
+}
+
+class MSP430InstrInfo : public MSP430GenInstrInfo {
+ const MSP430RegisterInfo RI;
+public:
+ explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ // Branch folding goodness
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 000000000000..e45780d05803
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,1211 @@
+//===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisI8<2>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def MSP430callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+ [SDNPHasChain, SDNPInGlue]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+ [SDNPInGlue]>;
+def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImmOperand";
+}
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+ let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
+ "# Select8 PSEUDO",
+ [(set GR8:$dst,
+ (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
+ "# Select16 PSEUDO",
+ [(set GR16:$dst,
+ (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+ let Defs = [SRW] in {
+ def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Shl8 PSEUDO",
+ [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Shl16 PSEUDO",
+ [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Sra8 PSEUDO",
+ [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Sra16 PSEUDO",
+ [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Srl8 PSEUDO",
+ [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Srl16 PSEUDO",
+ [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+ }
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def RET : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs), (ins), "ret", [(MSP430retflag)]>;
+ def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// FIXME: expand opcode & cond field for branches!
+
+// Direct branch
+let isBarrier = 1 in {
+ // Short branch
+ def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+ "jmp\t$dst",
+ [(br bb:$dst)]>;
+ let isIndirectBranch = 1 in {
+ // Long branches
+ def Bi : I16ri<0, (outs), (ins i16imm:$brdst),
+ "br\t$brdst",
+ [(brind tblockaddress:$brdst)]>;
+ def Br : I16rr<0, (outs), (ins GR16:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind GR16:$brdst)]>;
+ def Bm : I16rm<0, (outs), (ins memsrc:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind (load addr:$brdst))]>;
+ }
+}
+
+// Conditional branches
+let Uses = [SRW] in
+ def JCC : CJForm<0, 0,
+ (outs), (ins jmptarget:$dst, cc:$cc),
+ "j$cc\t$dst",
+ [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. SPW is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [R12W, R13W, R14W, R15W, SRW],
+ Uses = [SPW] in {
+ def CALLi : II16i<0x0,
+ (outs), (ins i16imm:$dst),
+ "call\t$dst", [(MSP430call imm:$dst)]>;
+ def CALLr : II16r<0x0,
+ (outs), (ins GR16:$dst),
+ "call\t$dst", [(MSP430call GR16:$dst)]>;
+ def CALLm : II16m<0x0,
+ (outs), (ins memsrc:$dst),
+ "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r : II16r<0x0,
+ (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ []>;
+def MOV16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.w\t{$src, $dst}",
+ []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : I8rr<0x0,
+ (outs GR16:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : I8rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.w\t{@$base+, $dst}", []>;
+}
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+def MOV8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "mov.w\t{$src, $dst}",
+ [(store GR16:$src, addr:$dst)]>;
+
+def MOV8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 (load addr:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Constraints = "$src = $dst" in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+
+def ADD8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADD16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def ADD8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADD16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "add.w\t{@$base+, $dst}", []>;
+}
+
+
+def ADD8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADD16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADD8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
+def ADC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def ADC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def AND16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def AND8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def AND16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def AND8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def AND16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "and.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def AND8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def AND16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def AND16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def AND16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
+def OR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
+}
+
+def OR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
+def OR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
+
+def OR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
+def OR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "bis.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def OR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (i8 (load addr:$dst)),
+ (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (i16 (load addr:$dst)),
+ (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
+def BIC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
+
+def BIC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
+
+let Constraints = "" in {
+def BIC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def XOR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def XOR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def XOR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def XOR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def XOR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "xor.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def XOR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+def SUB8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SUB16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SUB8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SUB16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SUB8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SUB16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "sub.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def SUB8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SBC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SBC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SBC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SBC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SBC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def SBC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: memory variant!
+def SAR8r1 : II8r<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rra.b\t$dst",
+ [(set GR8:$dst, (MSP430rra GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1 : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rra.w\t$dst",
+ [(set GR16:$dst, (MSP430rra GR16:$src)),
+ (implicit SRW)]>;
+
+def SHL8r1 : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rla.b\t$dst",
+ [(set GR8:$dst, (MSP430rla GR8:$src)),
+ (implicit SRW)]>;
+def SHL16r1 : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rla.w\t$dst",
+ [(set GR16:$dst, (MSP430rla GR16:$src)),
+ (implicit SRW)]>;
+
+def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "clrc\n\t"
+ "rrc.b\t$dst",
+ [(set GR8:$dst, (MSP430rrc GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "clrc\n\t"
+ "rrc.w\t$dst",
+ [(set GR16:$dst, (MSP430rrc GR16:$src)),
+ (implicit SRW)]>;
+
+// FIXME: Memory sext's ?
+def SEXT16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "sxt\t$dst",
+ [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+ (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def ZEXT16r : I8rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
+// FIXME: Memory bitswaps?
+def SWPB16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "swpb\t$dst",
+ [(set GR16:$dst, (bswap GR16:$src))]>;
+
+} // Constraints = "$src = $dst"
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+def CMP16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i16 imm:$src2)), (implicit SRW)]>;
+
+def CMP8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memsrc:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+def CMP16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memsrc:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+
+def CMP8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR8:$src2),
+ (implicit SRW)]>;
+def CMP16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR16:$src2),
+ (implicit SRW)]>;
+
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
+ (implicit SRW)]>;
+}
+def BIT8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+def BIT16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memdst:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memdst:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mm : I8mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (i8 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+def BIT16mm : I16mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (i16 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
+
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+ (ADD16ri GR16:$src, tblockaddress:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+ (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tblockaddress:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+ (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+ (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src, GR16:$src2),
+ (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+ (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+ (ADD16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+ (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src, GR8:$src2),
+ (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+ (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+ (ADD8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+ (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src, GR16:$src2),
+ (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+ (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+ (SUB16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+ (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src, GR8:$src2),
+ (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+ (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+ (SUB8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+ (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
+ (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 000000000000..043e5becadbb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,153 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 000000000000..794aa56bf04c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,47 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+
+ /// MSP430MCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+ MCContext &Ctx;
+
+ AsmPrinter &Printer;
+public:
+ MSP430MCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..0f7539908458
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void MSP430MachineFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 000000000000..d1697f478cc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,54 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex;}
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 000000000000..0b3e9e259649
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,162 @@
+//===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430RegisterInfo.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) {
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
+}
+
+const uint16_t*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const Function* F = MF->getFunction();
+ static const uint16_t CalleeSavedRegs[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const uint16_t CalleeSavedRegsFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const uint16_t CalleeSavedRegsIntr[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+ static const uint16_t CalleeSavedRegsIntrFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+
+ if (TFI->hasFP(*MF))
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
+ else
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntr : CalleeSavedRegs);
+
+}
+
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Mark 4 special registers with subregisters as reserved.
+ Reserved.set(MSP430::PCB);
+ Reserved.set(MSP430::SPB);
+ Reserved.set(MSP430::SRB);
+ Reserved.set(MSP430::CGB);
+ Reserved.set(MSP430::PCW);
+ Reserved.set(MSP430::SPW);
+ Reserved.set(MSP430::SRW);
+ Reserved.set(MSP430::CGW);
+
+ // Mark frame pointer as reserved if needed.
+ if (TFI->hasFP(MF))
+ Reserved.set(MSP430::FPW);
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &MSP430::GR16RegClass;
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // Skip the saved PC
+ Offset += 2;
+
+ if (!TFI->hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 2; // Skip the saved FPW
+
+ // Fold imm into offset
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ if (MI.getOpcode() == MSP430::ADD16ri) {
+ // This is actually "load effective address" of the stack slot
+ // instruction. We have only two-address instructions, thus we need to
+ // expand it into mov + add
+
+ MI.setDesc(TII.get(MSP430::MOV16rr));
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+
+ if (Offset == 0)
+ return;
+
+ // We need to materialize the offset via add instruction.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (Offset < 0)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ .addReg(DstReg).addImm(-Offset);
+ else
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ .addReg(DstReg).addImm(Offset);
+
+ return;
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 000000000000..69cccb275259
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,55 @@
+//===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+ MSP430TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+public:
+ MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 000000000000..07619d0675b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,81 @@
+//===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def PCB : MSP430Reg<0, "r0">;
+def SPB : MSP430Reg<1, "r1">;
+def SRB : MSP430Reg<2, "r2">;
+def CGB : MSP430Reg<3, "r3">;
+def FPB : MSP430Reg<4, "r4">;
+def R5B : MSP430Reg<5, "r5">;
+def R6B : MSP430Reg<6, "r6">;
+def R7B : MSP430Reg<7, "r7">;
+def R8B : MSP430Reg<8, "r8">;
+def R9B : MSP430Reg<9, "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
+def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+}
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+ // Volatile registers
+ (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+ // Frame pointer, sometimes allocable
+ FPB,
+ // Volatile, but not allocable
+ PCB, SPB, SRB, CGB)>;
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+ // Volatile registers
+ (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ // Frame pointer, sometimes allocable
+ FPW,
+ // Volatile, but not allocable
+ PCW, SPW, SRW, CGW)>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..24f45fa3881b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430TargetMachine.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 000000000000..fa8194830ff8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+ ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 000000000000..edeaf34676bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,34 @@
+//===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+void MSP430Subtarget::anchor() { }
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS) :
+ MSP430GenSubtargetInfo(TT, CPU, FS) {
+ std::string CPUName = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 000000000000..4d8792eede7f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,42 @@
+//===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
+ virtual void anchor();
+ bool ExtendedInsts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ MSP430Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 000000000000..164e351df952
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,71 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430TargetMachine.h"
+#include "MSP430.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMSP430Target() {
+ // Register the target.
+ RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+}
+
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ // FIXME: Check DataLayout string.
+ DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) { }
+
+namespace {
+/// MSP430 Code Generator Pass Configuration Options.
+class MSP430PassConfig : public TargetPassConfig {
+public:
+ MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MSP430TargetMachine &getMSP430TargetMachine() const {
+ return getTM<MSP430TargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MSP430PassConfig(this, PM);
+}
+
+bool MSP430PassConfig::addInstSelector() {
+ // Install an instruction selector.
+ addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
+ return false;
+}
+
+bool MSP430PassConfig::addPreEmitPass() {
+ // Must run branch selection immediately preceding the asm printer.
+ addPass(createMSP430BranchSelectionPass());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 000000000000..be695a211109
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,69 @@
+//===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430FrameLowering.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+ MSP430Subtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
+ MSP430FrameLowering FrameLowering;
+
+public:
+ MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL;}
+ virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const MSP430TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 000000000000..0d71d04ebe22
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() {
+ RegisterTarget<Triple::msp430>
+ X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
new file mode 100644
index 000000000000..edfd421d8532
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -0,0 +1,237 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
+ if ((C < 'a' || C > 'z') &&
+ (C < 'A' || C > 'Z') &&
+ (C < '0' || C > '9') &&
+ C != '_' && C != '$' && C != '@' &&
+ !(AllowPeriod && C == '.') &&
+ !(AllowUTF8 && (C & 0x80)))
+ return false;
+ return true;
+}
+
+static char HexDigit(int V) {
+ return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+ OutName.push_back('_');
+ OutName.push_back(HexDigit(C >> 4));
+ OutName.push_back(HexDigit(C & 15));
+ OutName.push_back('_');
+}
+
+/// NameNeedsEscaping - Return true if the identifier \p Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
+ assert(!Str.empty() && "Cannot create an empty MCSymbol");
+
+ // If the first character is a number and the target does not allow this, we
+ // need quotes.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+ return true;
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ bool AllowUTF8 = MAI.doesAllowUTF8();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
+ return true;
+ return false;
+}
+
+/// appendMangledName - Add the specified string in mangled form if it uses
+/// any unusual characters.
+static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
+ const MCAsmInfo &MAI) {
+ // The first character is not allowed to be a number unless the target
+ // explicitly allows it.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+ MangleLetter(OutName, Str[0]);
+ Str = Str.substr(1);
+ }
+
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ bool AllowUTF8 = MAI.doesAllowUTF8();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// appendMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
+ StringRef Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] == '"' || Str[i] == '\n')
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name. GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const Twine &GVName, ManglerPrefixTy PrefixTy) {
+ SmallString<256> TmpData;
+ StringRef Name = GVName.toStringRef(TmpData);
+ assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+
+ // If the global name is not led with \1, add the appropriate prefixes.
+ if (Name[0] == '\1') {
+ Name = Name.substr(1);
+ } else {
+ if (PrefixTy == Mangler::Private) {
+ const char *Prefix = MAI.getPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ } else if (PrefixTy == Mangler::LinkerPrivate) {
+ const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ }
+
+ const char *Prefix = MAI.getGlobalPrefix();
+ if (Prefix[0] == 0)
+ ; // Common noop, no prefix.
+ else if (Prefix[1] == 0)
+ OutName.push_back(Prefix[0]); // Common, one character prefix.
+ else
+ OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
+ }
+
+ // If this is a simple string that doesn't need escaping, just append it.
+ if (!NameNeedsEscaping(Name, MAI) ||
+ // If quotes are supported, they can be used unless the string contains
+ // a quote or newline.
+ (MAI.doesAllowQuotesInName() &&
+ Name.find_first_of("\n\"") == StringRef::npos)) {
+ OutName.append(Name.begin(), Name.end());
+ return;
+ }
+
+ // On systems that do not allow quoted names, we need to mangle most
+ // strange characters.
+ if (!MAI.doesAllowQuotesInName())
+ return appendMangledName(OutName, Name, MAI);
+
+ // Okay, the system allows quoted strings. We can quote most anything, the
+ // only characters that need escaping are " and \n.
+ assert(Name.find_first_of("\n\"") != StringRef::npos);
+ return appendMangledQuotedName(OutName, Name);
+}
+
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+ const Function *F, const DataLayout &TD) {
+ // Calculate arguments size total.
+ unsigned ArgWords = 0;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+ // 'Dereference' type in case of byval parameter attribute
+ if (AI->hasByValAttr())
+ Ty = cast<PointerType>(Ty)->getElementType();
+ // Size should be aligned to DWORD boundary
+ ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name. If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const GlobalValue *GV,
+ bool isImplicitlyPrivate) {
+ ManglerPrefixTy PrefixTy = Mangler::Default;
+ if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+ PrefixTy = Mangler::Private;
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
+ PrefixTy = Mangler::LinkerPrivate;
+
+ // If this global has a name, handle it simply.
+ if (GV->hasName()) {
+ getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+ } else {
+ // Get the ID for the global, assigning a new one if we haven't got one
+ // already.
+ unsigned &ID = AnonGlobalIDs[GV];
+ if (ID == 0) ID = NextAnonGlobalID++;
+
+ // Must mangle the global into a unique ID.
+ getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+ }
+
+ // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+ // add it.
+ if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ CallingConv::ID CC = F->getCallingConv();
+
+ // fastcall functions need to start with @.
+ // FIXME: This logic seems unlikely to be right.
+ if (CC == CallingConv::X86_FastCall) {
+ if (OutName[0] == '_')
+ OutName[0] = '@';
+ else
+ OutName.insert(OutName.begin(), '@');
+ }
+
+ // fastcall and stdcall functions usually need @42 at the end to specify
+ // the argument info.
+ FunctionType *FT = F->getFunctionType();
+ if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ // "Pure" variadic functions do not receive @0 suffix.
+ (!FT->isVarArg() || FT->getNumParams() == 0 ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+ AddFastCallStdCallSuffix(OutName, F, TD);
+ }
+ }
+}
+
+/// getSymbol - Return the MCSymbol for the specified global value. This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+ SmallString<60> NameStr;
+ getNameWithPrefix(NameStr, GV, false);
+ return Context.GetOrCreateSymbol(NameStr.str());
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
new file mode 100644
index 000000000000..c403f216b0d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -0,0 +1,1775 @@
+//===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+class MipsAssemblerOptions {
+public:
+ MipsAssemblerOptions():
+ aTReg(1), reorder(true), macro(true) {
+ }
+
+ unsigned getATRegNum() {return aTReg;}
+ bool setATReg(unsigned Reg);
+
+ bool isReorder() {return reorder;}
+ void setReorder() {reorder = true;}
+ void setNoreorder() {reorder = false;}
+
+ bool isMacro() {return macro;}
+ void setMacro() {macro = true;}
+ void setNomacro() {macro = false;}
+
+private:
+ unsigned aTReg;
+ bool reorder;
+ bool macro;
+};
+}
+
+namespace {
+class MipsAsmParser : public MCTargetAsmParser {
+
+ enum FpFormatTy {
+ FP_FORMAT_NONE = -1,
+ FP_FORMAT_S,
+ FP_FORMAT_D,
+ FP_FORMAT_L,
+ FP_FORMAT_W
+ } FpFormat;
+
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ MipsAssemblerOptions Options;
+
+
+#define GET_ASSEMBLER_HEADER
+#include "MipsGenAsmMatcher.inc"
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool parseMathOperation(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirective(AsmToken DirectiveID);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterClass);
+
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
+ StringRef Mnemonic);
+
+ int tryParseRegister(bool is64BitReg);
+
+ bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ bool is64BitReg);
+
+ bool needsExpansion(MCInst &Inst);
+
+ void expandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ void expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd);
+ bool reportParseError(StringRef ErrorMsg);
+
+ bool parseMemOffset(const MCExpr *&Res);
+ bool parseRelocOperand(const MCExpr *&Res);
+
+ bool parseDirectiveSet();
+
+ bool parseSetAtDirective();
+ bool parseSetNoAtDirective();
+ bool parseSetMacroDirective();
+ bool parseSetNoMacroDirective();
+ bool parseSetReorderDirective();
+ bool parseSetNoReorderDirective();
+
+ bool parseSetAssignment();
+
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+
+ MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
+
+ bool isMips64() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64) != 0;
+ }
+
+ bool isFP64() const {
+ return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
+ }
+
+ int matchRegisterName(StringRef Symbol, bool is64BitReg);
+
+ int matchCPURegisterName(StringRef Symbol);
+
+ int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
+
+ void setFpFormat(FpFormatTy Format) {
+ FpFormat = Format;
+ }
+
+ void setDefaultFpFormat();
+
+ void setFpFormat(StringRef Format);
+
+ FpFormatTy getFpFormat() {return FpFormat;}
+
+ bool requestsDoubleOperand(StringRef Mnemonic);
+
+ unsigned getReg(int RC,int RegNo);
+
+ int getATReg();
+
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+public:
+ MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+};
+}
+
+namespace {
+
+/// MipsOperand - Instances of this class represent a parsed Mips machine
+/// instruction.
+class MipsOperand : public MCParsedAsmOperand {
+
+public:
+ enum RegisterKind {
+ Kind_None,
+ Kind_CPURegs,
+ Kind_CPU64Regs,
+ Kind_HWRegs,
+ Kind_HW64Regs,
+ Kind_FGR32Regs,
+ Kind_FGR64Regs,
+ Kind_AFGR64Regs,
+ Kind_CCRRegs
+ };
+
+private:
+ enum KindTy {
+ k_CondCode,
+ k_CoprocNum,
+ k_Immediate,
+ k_Memory,
+ k_PostIndexRegister,
+ k_Register,
+ k_Token
+ } Kind;
+
+ MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
+ union {
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ };
+
+ SMLoc StartLoc, EndLoc;
+
+public:
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+ // Add as immediate when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCExpr *Expr = getImm();
+ addExpr(Inst,Expr);
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ const MCExpr *Expr = getMemOff();
+ addExpr(Inst,Expr);
+ }
+
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMem() const { return Kind == k_Memory; }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ void setRegKind(RegisterKind RegKind) {
+ assert((Kind == k_Register) && "Invalid access!");
+ Reg.Kind = RegKind;
+ }
+
+ const MCExpr *getImm() const {
+ assert((Kind == k_Immediate) && "Invalid access!");
+ return Imm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert((Kind == k_Memory) && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr *getMemOff() const {
+ assert((Kind == k_Memory) && "Invalid access!");
+ return Mem.Off;
+ }
+
+ static MipsOperand *CreateToken(StringRef Str, SMLoc S) {
+ MipsOperand *Op = new MipsOperand(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MipsOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off,
+ SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ bool isCPURegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPURegs;
+ }
+ void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCPU64RegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPU64Regs;
+ }
+ void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHWRegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HWRegs;
+ }
+ void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHW64RegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HW64Regs;
+ }
+ void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ void addCCRAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCCRAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_CCRRegs;
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ virtual void print(raw_ostream &OS) const {
+ llvm_unreachable("unimplemented!");
+ }
+};
+}
+
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ Inst.setLoc(IDLoc);
+ if (MCID.mayLoad() || MCID.mayStore()) {
+ // Check the offset of memory operand, if it is a symbol
+ // reference or immediate we may have to expand instructions
+ for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ MCOperand &Op = Inst.getOperand(i);
+ if (Op.isImm()) {
+ int MemOffset = Op.getImm();
+ if (MemOffset < -32768 || MemOffset > 32767) {
+ // Offset can't exceed 16bit value
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ return false;
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() == MCExpr::SymbolRef){
+ const MCSymbolRefExpr *SR =
+ static_cast<const MCSymbolRefExpr*>(Expr);
+ if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+ // Expand symbol
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (needsExpansion(Inst))
+ expandInstruction(Inst, IDLoc, Instructions);
+ else
+ Instructions.push_back(Inst);
+
+ return false;
+}
+
+bool MipsAsmParser::needsExpansion(MCInst &Inst) {
+
+ switch(Inst.getOpcode()) {
+ case Mips::LoadImm32Reg:
+ case Mips::LoadAddr32Imm:
+ case Mips::LoadAddr32Reg:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions){
+ switch(Inst.getOpcode()) {
+ case Mips::LoadImm32Reg:
+ return expandLoadImm(Inst, IDLoc, Instructions);
+ case Mips::LoadAddr32Imm:
+ return expandLoadAddressImm(Inst,IDLoc,Instructions);
+ case Mips::LoadAddr32Reg:
+ return expandLoadAddressReg(Inst,IDLoc,Instructions);
+ }
+}
+
+void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions){
+ MCInst tmpInst;
+ const MCOperand &ImmOp = Inst.getOperand(1);
+ assert(ImmOp.isImm() && "expected immediate operand kind");
+ const MCOperand &RegOp = Inst.getOperand(0);
+ assert(RegOp.isReg() && "expected register operand kind");
+
+ int ImmValue = ImmOp.getImm();
+ tmpInst.setLoc(IDLoc);
+ if ( 0 <= ImmValue && ImmValue <= 65535) {
+ // for 0 <= j <= 65535.
+ // li d,j => ori d,$zero,j
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(
+ MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+ Instructions.push_back(tmpInst);
+ } else if ( ImmValue < 0 && ImmValue >= -32768) {
+ // for -32768 <= j < 0.
+ // li d,j => addiu d,$zero,j
+ tmpInst.setOpcode(Mips::ADDiu);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(
+ MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+ Instructions.push_back(tmpInst);
+ } else {
+ // for any other value of j that is representable as a 32-bit integer.
+ // li d,j => lui d,hi16(j)
+ // ori d,d,lo16(j)
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+ }
+}
+
+void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions){
+ MCInst tmpInst;
+ const MCOperand &ImmOp = Inst.getOperand(2);
+ assert(ImmOp.isImm() && "expected immediate operand kind");
+ const MCOperand &SrcRegOp = Inst.getOperand(1);
+ assert(SrcRegOp.isReg() && "expected register operand kind");
+ const MCOperand &DstRegOp = Inst.getOperand(0);
+ assert(DstRegOp.isReg() && "expected register operand kind");
+ int ImmValue = ImmOp.getImm();
+ if ( -32768 <= ImmValue && ImmValue <= 65535) {
+ //for -32768 <= j <= 65535.
+ //la d,j(s) => addiu d,s,j
+ tmpInst.setOpcode(Mips::ADDiu);
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+ Instructions.push_back(tmpInst);
+ } else {
+ //for any other value of j that is representable as a 32-bit integer.
+ //la d,j(s) => lui d,hi16(j)
+ // ori d,d,lo16(j)
+ // addu d,d,s
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ tmpInst.setOpcode(Mips::ADDu);
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
+ Instructions.push_back(tmpInst);
+ }
+}
+
+void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions){
+ MCInst tmpInst;
+ const MCOperand &ImmOp = Inst.getOperand(1);
+ assert(ImmOp.isImm() && "expected immediate operand kind");
+ const MCOperand &RegOp = Inst.getOperand(0);
+ assert(RegOp.isReg() && "expected register operand kind");
+ int ImmValue = ImmOp.getImm();
+ if ( -32768 <= ImmValue && ImmValue <= 65535) {
+ //for -32768 <= j <= 65535.
+ //la d,j => addiu d,$zero,j
+ tmpInst.setOpcode(Mips::ADDiu);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(
+ MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+ Instructions.push_back(tmpInst);
+ } else {
+ //for any other value of j that is representable as a 32-bit integer.
+ //la d,j => lui d,hi16(j)
+ // ori d,d,lo16(j)
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
+ Instructions.push_back(tmpInst);
+ }
+}
+
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd) {
+ const MCSymbolRefExpr *SR;
+ MCInst TempInst;
+ unsigned ImmOffset,HiOffset,LoOffset;
+ const MCExpr *ExprOffset;
+ unsigned TmpRegNum;
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID,
+ getATReg());
+ // 1st operand is either source or dst register
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ unsigned RegOpNum = Inst.getOperand(0).getReg();
+ // 2nd operand is base register
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ unsigned BaseRegNum = Inst.getOperand(1).getReg();
+ // 3rd operand is either immediate or expression
+ if (isImmOpnd) {
+ assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+ ImmOffset = Inst.getOperand(2).getImm();
+ LoOffset = ImmOffset & 0x0000ffff;
+ HiOffset = (ImmOffset & 0xffff0000) >> 16;
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ if (LoOffset & 0x8000)
+ HiOffset++;
+ }
+ else
+ ExprOffset = Inst.getOperand(2).getExpr();
+ // All instructions will have the same location
+ TempInst.setLoc(IDLoc);
+ // 1st instruction in expansion is LUi. For load instruction we can use
+ // the dst register as a temporary if base and dst are different,
+ // but for stores we must use $at
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ TempInst.setOpcode(Mips::LUi);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ }
+ }
+ // Add the instruction to the list
+ Instructions.push_back(TempInst);
+ // and prepare TempInst for next instruction
+ TempInst.clear();
+ // which is add temp register to base
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ // and finaly, create original instruction with low part
+ // of offset and new base
+ TempInst.setOpcode(Inst.getOpcode());
+ TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ }
+ }
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+}
+
+bool MipsAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ SmallVector<MCInst, 8> Instructions;
+ unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+
+ switch (MatchResult) {
+ default: break;
+ case Match_Success: {
+ if (processInstruction(Inst,IDLoc,Instructions))
+ return true;
+ for(unsigned i =0; i < Instructions.size(); i++)
+ Out.EmitInstruction(Instructions[i]);
+ return false;
+ }
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+ }
+ return true;
+}
+
+int MipsAsmParser::matchCPURegisterName(StringRef Name) {
+ int CC;
+
+ if (Name == "at")
+ return getATReg();
+
+ CC = StringSwitch<unsigned>(Name)
+ .Case("zero", 0)
+ .Case("a0", 4)
+ .Case("a1", 5)
+ .Case("a2", 6)
+ .Case("a3", 7)
+ .Case("v0", 2)
+ .Case("v1", 3)
+ .Case("s0", 16)
+ .Case("s1", 17)
+ .Case("s2", 18)
+ .Case("s3", 19)
+ .Case("s4", 20)
+ .Case("s5", 21)
+ .Case("s6", 22)
+ .Case("s7", 23)
+ .Case("k0", 26)
+ .Case("k1", 27)
+ .Case("sp", 29)
+ .Case("fp", 30)
+ .Case("gp", 28)
+ .Case("ra", 31)
+ .Case("t0", 8)
+ .Case("t1", 9)
+ .Case("t2", 10)
+ .Case("t3", 11)
+ .Case("t4", 12)
+ .Case("t5", 13)
+ .Case("t6", 14)
+ .Case("t7", 15)
+ .Case("t8", 24)
+ .Case("t9", 25)
+ .Default(-1);
+
+ // Although SGI documentation just cut out t0-t3 for n32/n64,
+ // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+ // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+ if (isMips64() && 8 <= CC && CC <= 11)
+ CC += 4;
+
+ if (CC == -1 && isMips64())
+ CC = StringSwitch<unsigned>(Name)
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Case("s8", 30)
+ .Default(-1);
+
+ return CC;
+}
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+
+ int CC;
+ CC = matchCPURegisterName(Name);
+ if (CC != -1)
+ return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID);
+
+ if (Name[0] == 'f') {
+ StringRef NumString = Name.substr(1);
+ unsigned IntVal;
+ if( NumString.getAsInteger(10, IntVal))
+ return -1; // not integer
+ if (IntVal > 31)
+ return -1;
+
+ FpFormatTy Format = getFpFormat();
+
+ if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
+ return getReg(Mips::FGR32RegClassID, IntVal);
+ if (Format == FP_FORMAT_D) {
+ if(isFP64()) {
+ return getReg(Mips::FGR64RegClassID, IntVal);
+ }
+ // only even numbers available as register pairs
+ if (( IntVal > 31) || (IntVal%2 != 0))
+ return -1;
+ return getReg(Mips::AFGR64RegClassID, IntVal/2);
+ }
+ }
+
+ return -1;
+}
+void MipsAsmParser::setDefaultFpFormat() {
+
+ if (isMips64() || isFP64())
+ FpFormat = FP_FORMAT_D;
+ else
+ FpFormat = FP_FORMAT_S;
+}
+
+bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
+
+ bool IsDouble = StringSwitch<bool>(Mnemonic.lower())
+ .Case("ldxc1", true)
+ .Case("ldc1", true)
+ .Case("sdxc1", true)
+ .Case("sdc1", true)
+ .Default(false);
+
+ return IsDouble;
+}
+void MipsAsmParser::setFpFormat(StringRef Format) {
+
+ FpFormat = StringSwitch<FpFormatTy>(Format.lower())
+ .Case(".s", FP_FORMAT_S)
+ .Case(".d", FP_FORMAT_D)
+ .Case(".l", FP_FORMAT_L)
+ .Case(".w", FP_FORMAT_W)
+ .Default(FP_FORMAT_NONE);
+}
+
+bool MipsAssemblerOptions::setATReg(unsigned Reg) {
+ if (Reg > 31)
+ return false;
+
+ aTReg = Reg;
+ return true;
+}
+
+int MipsAsmParser::getATReg() {
+ return Options.getATRegNum();
+}
+
+unsigned MipsAsmParser::getReg(int RC,int RegNo) {
+ return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
+}
+
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
+
+ if (RegNum > 31)
+ return -1;
+
+ return getReg(RegClass, RegNum);
+}
+
+int MipsAsmParser::tryParseRegister(bool is64BitReg) {
+ const AsmToken &Tok = Parser.getTok();
+ int RegNum = -1;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ std::string lowerCase = Tok.getString().lower();
+ RegNum = matchRegisterName(lowerCase, is64BitReg);
+ } else if (Tok.is(AsmToken::Integer))
+ RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
+ is64BitReg ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID);
+ return RegNum;
+}
+
+bool MipsAsmParser::
+ tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ bool is64BitReg){
+
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = -1;
+
+ RegNo = tryParseRegister(is64BitReg);
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(MipsOperand::CreateReg(RegNo, S,
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat register token.
+ return false;
+}
+
+bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
+ StringRef Mnemonic) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Dollar: {
+ // parse register
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat dollar token.
+ // parse register operand
+ if (!tryParseRegisterOperand(Operands, isMips64())) {
+ if (getLexer().is(AsmToken::LParen)) {
+ // check if it is indexed addressing operand
+ Operands.push_back(MipsOperand::CreateToken("(", S));
+ Parser.Lex(); // eat parenthesis
+ if (getLexer().isNot(AsmToken::Dollar))
+ return true;
+
+ Parser.Lex(); // eat dollar
+ if (tryParseRegisterOperand(Operands, isMips64()))
+ return true;
+
+ if (!getLexer().is(AsmToken::RParen))
+ return true;
+
+ S = Parser.getTok().getLoc();
+ Operands.push_back(MipsOperand::CreateToken(")", S));
+ Parser.Lex();
+ }
+ return false;
+ }
+ // maybe it is a symbol reference
+ StringRef Identifier;
+ if (Parser.parseIdentifier(Identifier))
+ return true;
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
+
+ // Otherwise create a symbol ref.
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+
+ Operands.push_back(MipsOperand::CreateImm(Res, S, E));
+ return false;
+ }
+ case AsmToken::Identifier:
+ // Look for the existing symbol, we should check if
+ // we need to assigne the propper RegisterKind
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+ return false;
+ //else drop to expression parsing
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Integer:
+ case AsmToken::String: {
+ // quoted label names
+ const MCExpr *IdVal;
+ SMLoc S = Parser.getTok().getLoc();
+ if (getParser().parseExpression(IdVal))
+ return true;
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::Percent: {
+ // it is a symbol reference or constant expression
+ const MCExpr *IdVal;
+ SMLoc S = Parser.getTok().getLoc(); // start location of the operand
+ if (parseRelocOperand(IdVal))
+ return true;
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return false;
+ } // case AsmToken::Percent
+ } // switch(getLexer().getKind())
+ return true;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+
+ Parser.Lex(); // eat % token
+ const AsmToken &Tok = Parser.getTok(); // get next token, operation
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+
+ std::string Str = Tok.getIdentifier().str();
+
+ Parser.Lex(); // eat identifier
+ // now make expression from the rest of the operand
+ const MCExpr *IdVal;
+ SMLoc EndLoc;
+
+ if (getLexer().getKind() == AsmToken::LParen) {
+ while (1) {
+ Parser.Lex(); // eat '(' token
+ if (getLexer().getKind() == AsmToken::Percent) {
+ Parser.Lex(); // eat % token
+ const AsmToken &nextTok = Parser.getTok();
+ if (nextTok.isNot(AsmToken::Identifier))
+ return true;
+ Str += "(%";
+ Str += nextTok.getIdentifier();
+ Parser.Lex(); // eat identifier
+ if (getLexer().getKind() != AsmToken::LParen)
+ return true;
+ } else
+ break;
+ }
+ if (getParser().parseParenExpression(IdVal,EndLoc))
+ return true;
+
+ while (getLexer().getKind() == AsmToken::RParen)
+ Parser.Lex(); // eat ')' token
+
+ } else
+ return true; // parenthesis must follow reloc operand
+
+ // Check the type of the expression
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
+ // It's a constant, evaluate lo or hi value
+ if (Str == "lo") {
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
+ } else if (Str == "hi") {
+ int Val = MCE->getValue();
+ int LoSign = Val & 0x8000;
+ Val = (Val & 0xffff0000) >> 16;
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate
+ if (LoSign)
+ Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
+ }
+ return false;
+ }
+
+ if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
+ // It's a symbol, create symbolic expression from symbol
+ StringRef Symbol = MSRE->getSymbol().getName();
+ MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
+ Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
+ return false;
+ }
+ return true;
+}
+
+bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+
+ StartLoc = Parser.getTok().getLoc();
+ RegNo = tryParseRegister(isMips64());
+ EndLoc = Parser.getTok().getLoc();
+ return (RegNo == (unsigned)-1);
+}
+
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
+
+ SMLoc S;
+
+ switch(getLexer().getKind()) {
+ default:
+ return true;
+ case AsmToken::Identifier:
+ case AsmToken::Integer:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ return (getParser().parseExpression(Res));
+ case AsmToken::Percent:
+ return parseRelocOperand(Res);
+ case AsmToken::LParen:
+ return false; // it's probably assuming 0
+ }
+ return true;
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+
+ const MCExpr *IdVal = 0;
+ SMLoc S;
+ // first operand is the offset
+ S = Parser.getTok().getLoc();
+
+ if (parseMemOffset(IdVal))
+ return MatchOperand_ParseFail;
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::LParen)) {
+ MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+ if (Mnemonic->getToken() == "la") {
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return MatchOperand_Success;
+ }
+ Error(Parser.getTok().getLoc(), "'(' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat '(' token.
+
+ const AsmToken &Tok1 = Parser.getTok(); // get next token
+ if (Tok1.is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat '$' token.
+ if (tryParseRegisterOperand(Operands, isMips64())) {
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ }
+
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ }
+
+ const AsmToken &Tok2 = Parser.getTok(); // get next token
+ if (Tok2.isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "')' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Parser.Lex(); // Eat ')' token.
+
+ if (IdVal == 0)
+ IdVal = MCConstantExpr::Create(0, getContext());
+
+ // now replace register operand with the mem operand
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ int RegNo = op->getReg();
+ // remove register from operands
+ Operands.pop_back();
+ // and add memory operand
+ Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
+ delete op;
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, true)) {
+ // set the proper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPU64Regs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterKind) {
+
+ MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+ if (Sym) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *Expr;
+ if (Sym->isVariable())
+ Expr = Sym->getVariableValue();
+ else
+ return false;
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const StringRef DefSymbol = Ref->getSymbol().getName();
+ if (DefSymbol.startswith("$")) {
+ // Lookup for the register with corresponding name
+ int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ if (RegNum > -1) {
+ Parser.Lex();
+ MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ } else if (Expr->getKind() == MCExpr::Constant) {
+ Parser.Lex();
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+ MipsOperand *op = MipsOperand::CreateImm(Const,S,
+ Parser.getTok().getLoc());
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ return false;
+}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, false)) {
+ // set the propper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPURegs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (isMips64())
+ return MatchOperand_NoMatch;
+
+ // if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HWRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HW64Regs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ unsigned RegNum;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.is(AsmToken::Integer)) {
+ RegNum = Tok.getIntVal();
+ // at the moment only fcc0 is supported
+ if (RegNum != 0)
+ return MatchOperand_ParseFail;
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // at the moment only fcc0 is supported
+ if (Tok.getIdentifier() != "fcc0")
+ return MatchOperand_ParseFail;
+ } else
+ return MatchOperand_NoMatch;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_CCRRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
+
+ MCSymbolRefExpr::VariantKind VK
+ = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
+ .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
+ .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
+ .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
+ .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
+ .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
+ .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
+ .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
+ .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
+ .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
+ .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
+ .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
+ .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
+ .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
+ .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
+ .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
+ .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
+ .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
+ .Default(MCSymbolRefExpr::VK_None);
+
+ return VK;
+}
+
+static int ConvertCcString(StringRef CondString) {
+ int CC = StringSwitch<unsigned>(CondString)
+ .Case(".f", 0)
+ .Case(".un", 1)
+ .Case(".eq", 2)
+ .Case(".ueq", 3)
+ .Case(".olt", 4)
+ .Case(".ult", 5)
+ .Case(".ole", 6)
+ .Case(".ule", 7)
+ .Case(".sf", 8)
+ .Case(".ngle", 9)
+ .Case(".seq", 10)
+ .Case(".ngl", 11)
+ .Case(".lt", 12)
+ .Case(".nge", 13)
+ .Case(".le", 14)
+ .Case(".ngt", 15)
+ .Default(-1);
+
+ return CC;
+}
+
+bool MipsAsmParser::
+parseMathOperation(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // split the format
+ size_t Start = Name.find('.'), Next = Name.rfind('.');
+ StringRef Format1 = Name.slice(Start, Next);
+ // and add the first format to the operands
+ Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
+ // now for the second format
+ StringRef Format2 = Name.slice(Next, StringRef::npos);
+ Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
+
+ // set the format for the first register
+ setFpFormat(Format1);
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+
+ }
+ Parser.Lex(); // Eat the comma.
+
+ //set the format for the first register
+ setFpFormat(Format2);
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef Mnemonic;
+ // floating point instructions: should register be treated as double?
+ if (requestsDoubleOperand(Name)) {
+ setFpFormat(FP_FORMAT_D);
+ Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+ Mnemonic = Name;
+ }
+ else {
+ setDefaultFpFormat();
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ Mnemonic = Name.slice(Start, Next);
+
+ Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
+
+ if (Next != StringRef::npos) {
+ // there is a format token in mnemonic
+ // StringRef Rest = Name.slice(Next, StringRef::npos);
+ size_t Dot = Name.find('.', Next+1);
+ StringRef Format = Name.slice(Next, Dot);
+ if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+ Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
+ else {
+ if (Name.startswith("c.")){
+ // floating point compare, add '.' and immediate represent for cc
+ Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
+ int Cc = ConvertCcString(Format);
+ if (Cc == -1) {
+ return Error(NameLoc, "Invalid conditional code");
+ }
+ SMLoc E = SMLoc::getFromPointer(
+ Parser.getTok().getLoc().getPointer() -1 );
+ Operands.push_back(MipsOperand::CreateImm(
+ MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+ } else {
+ // trunc, ceil, floor ...
+ return parseMathOperation(Name, NameLoc, Operands);
+ }
+
+ // the rest is a format
+ Format = Name.slice(Dot, StringRef::npos);
+ Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
+ }
+
+ setFpFormat(Format);
+ }
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ while (getLexer().is(AsmToken::Comma) ) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, ErrorMsg);
+}
+
+bool MipsAsmParser::parseSetNoAtDirective() {
+ // Line should look like:
+ // .set noat
+ // set at reg to 0
+ Options.setATReg(0);
+ // eat noat
+ Parser.Lex();
+ // If this is not the end of the statement, report error
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+bool MipsAsmParser::parseSetAtDirective() {
+ // line can be
+ // .set at - defaults to $1
+ // or .set at=$reg
+ int AtRegNo;
+ getParser().Lex();
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Options.setATReg(1);
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+ } else if (getLexer().is(AsmToken::Equal)) {
+ getParser().Lex(); // eat '='
+ if (getLexer().isNot(AsmToken::Dollar)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Eat '$'
+ const AsmToken &Reg = Parser.getTok();
+ if (Reg.is(AsmToken::Identifier)) {
+ AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+ } else if (Reg.is(AsmToken::Integer)) {
+ AtRegNo = Reg.getIntVal();
+ } else {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ if ( AtRegNo < 1 || AtRegNo > 31) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ if (!Options.setATReg(AtRegNo)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ getParser().Lex(); // Eat reg
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+ } else {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+}
+
+bool MipsAsmParser::parseSetReorderDirective() {
+ Parser.Lex();
+ // If this is not the end of the statement, report error
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Options.setReorder();
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoReorderDirective() {
+ Parser.Lex();
+ // if this is not the end of the statement, report error
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Options.setNoreorder();
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::parseSetMacroDirective() {
+ Parser.Lex();
+ // if this is not the end of the statement, report error
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Options.setMacro();
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMacroDirective() {
+ Parser.Lex();
+ // if this is not the end of the statement, report error
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("`noreorder' must be set before `nomacro'");
+ return false;
+ }
+ if (Options.isReorder()) {
+ reportParseError("`noreorder' must be set before `nomacro'");
+ return false;
+ }
+ Options.setNomacro();
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool MipsAsmParser::parseSetAssignment() {
+ StringRef Name;
+ const MCExpr *Value;
+
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier after .set");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return reportParseError("unexpected token in .set directive");
+ Lex(); //eat comma
+
+ if (Parser.parseExpression(Value))
+ reportParseError("expected valid expression after comma");
+
+ // check if the Name already exists as a symbol
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ return reportParseError("symbol already defined");
+ }
+ Sym = getContext().GetOrCreateSymbol(Name);
+ Sym->setVariableValue(Value);
+
+ return false;
+}
+bool MipsAsmParser::parseDirectiveSet() {
+
+ // get next token
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.getString() == "noat") {
+ return parseSetNoAtDirective();
+ } else if (Tok.getString() == "at") {
+ return parseSetAtDirective();
+ } else if (Tok.getString() == "reorder") {
+ return parseSetReorderDirective();
+ } else if (Tok.getString() == "noreorder") {
+ return parseSetNoReorderDirective();
+ } else if (Tok.getString() == "macro") {
+ return parseSetMacroDirective();
+ } else if (Tok.getString() == "nomacro") {
+ return parseSetNoMacroDirective();
+ } else if (Tok.getString() == "nomips16") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ } else if (Tok.getString() == "nomicromips") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ } else {
+ // it is just an identifier, look for assignment
+ parseSetAssignment();
+ return false;
+ }
+
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
+
+ StringRef IDVal = DirectiveID.getString();
+
+ if ( IDVal == ".ent") {
+ // ignore this directive for now
+ Parser.Lex();
+ return false;
+ }
+
+ if (IDVal == ".end") {
+ // ignore this directive for now
+ Parser.Lex();
+ return false;
+ }
+
+ if (IDVal == ".frame") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".set") {
+ return parseDirectiveSet();
+ }
+
+ if (IDVal == ".fmask") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".mask") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".gpword") {
+ // ignore this directive for now
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".word") {
+ parseDirectiveWord(4, DirectiveID.getLoc());
+ return false;
+ }
+
+ return true;
+}
+
+extern "C" void LLVMInitializeMipsAsmParser() {
+ RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
+ RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
+ RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target);
+ RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MipsGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
new file mode 100644
index 000000000000..59e49d8ddc6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -0,0 +1,556 @@
+//===- MipsDisassembler.cpp - Disassembler for Mips -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the Mips Disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// MipsDisassemblerBase - a disasembler class for Mips.
+class MipsDisassemblerBase : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {}
+
+ virtual ~MipsDisassemblerBase() {}
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+
+private:
+ const MCRegisterInfo *RegInfo;
+protected:
+ bool isBigEndian;
+};
+
+/// MipsDisassembler - a disasembler class for Mips32.
+class MipsDisassembler : public MipsDisassemblerBase {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MipsDisassemblerBase(STI, Info, bigEndian) {}
+
+ /// getInstruction - See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+};
+
+
+/// Mips64Disassembler - a disasembler class for Mips64.
+class Mips64Disassembler : public MipsDisassemblerBase {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MipsDisassemblerBase(STI, Info, bigEndian) {}
+
+ /// getInstruction - See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+};
+
+} // end anonymous namespace
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBranchTarget(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBC1(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeJumpTarget(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMem(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeSimm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCondCode(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeInsSize(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeExtSize(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+namespace llvm {
+extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
+ TheMips64elTarget;
+}
+
+static MCDisassembler *createMipsDisassembler(
+ const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MipsDisassembler(STI, T.createMCRegInfo(""), true);
+}
+
+static MCDisassembler *createMipselDisassembler(
+ const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MipsDisassembler(STI, T.createMCRegInfo(""), false);
+}
+
+static MCDisassembler *createMips64Disassembler(
+ const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new Mips64Disassembler(STI, T.createMCRegInfo(""), true);
+}
+
+static MCDisassembler *createMips64elDisassembler(
+ const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new Mips64Disassembler(STI, T.createMCRegInfo(""), false);
+}
+
+extern "C" void LLVMInitializeMipsDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheMipsTarget,
+ createMipsDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheMipselTarget,
+ createMipselDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheMips64Target,
+ createMips64Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheMips64elTarget,
+ createMips64elDisassembler);
+}
+
+
+#include "MipsGenDisassemblerTables.inc"
+
+ /// readInstruction - read four bytes from the MemoryObject
+ /// and return 32 bit word sorted according to the given endianess
+static DecodeStatus readInstruction32(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint32_t &insn,
+ bool isBigEndian) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 4 Bytes of data.
+ if (region.readBytes(address, 4, (uint8_t*)Bytes, NULL) == -1) {
+ size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ if (isBigEndian) {
+ // Encoded as a big-endian 32-bit word in the stream.
+ insn = (Bytes[3] << 0) |
+ (Bytes[2] << 8) |
+ (Bytes[1] << 16) |
+ (Bytes[0] << 24);
+ }
+ else {
+ // Encoded as a small-endian 32-bit word in the stream.
+ insn = (Bytes[0] << 0) |
+ (Bytes[1] << 8) |
+ (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ }
+
+ return MCDisassembler::Success;
+}
+
+DecodeStatus
+MipsDisassembler::getInstruction(MCInst &instr,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ uint32_t Insn;
+
+ DecodeStatus Result = readInstruction32(Region, Address, Size,
+ Insn, isBigEndian);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+DecodeStatus
+Mips64Disassembler::getInstruction(MCInst &instr,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ uint32_t Insn;
+
+ DecodeStatus Result = readInstruction32(Region, Address, Size,
+ Insn, isBigEndian);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ // If we fail to decode in Mips64 decoder space we can try in Mips32
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const MipsDisassemblerBase *Dis = static_cast<const MipsDisassemblerBase*>(D);
+ return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+
+ return MCDisassembler::Fail;
+
+}
+
+static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::CPU64RegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, Mips::CPURegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMem(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
+ Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
+
+ if(Inst.getOpcode() == Mips::SC){
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ }
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMem(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
+ Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ // Currently only hardware register 29 is supported.
+ if (RegNo != 29)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(Mips::HWR29));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCondCode(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int CondCode = Insn & 0xf;
+ Inst.addOperand(MCOperand::CreateImm(CondCode));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 30 || RegNo %2)
+ return MCDisassembler::Fail;
+
+ ;
+ unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ //Currently only hardware register 29 is supported
+ if (RegNo != 29)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBranchTarget(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned BranchOffset = Offset & 0xffff;
+ BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4;
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBC1(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned BranchOffset = Insn & 0xffff;
+ BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4;
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeJumpTarget(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
+ Inst.addOperand(MCOperand::CreateImm(JumpOffset));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeSimm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Insn)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeInsSize(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // First we need to grab the pos(lsb) from MCInst.
+ int Pos = Inst.getOperand(2).getImm();
+ int Size = (int) Insn - Pos + 1;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeExtSize(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Size = (int) Insn + 1;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size)));
+ return MCDisassembler::Success;
+}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 000000000000..fc23cd380352
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,211 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "MipsInstrInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define PRINT_ALIAS_INSTR
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+ switch (CC) {
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_OEQ:
+ case FCOND_UNE: return "eq";
+ case FCOND_UEQ:
+ case FCOND_ONE: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "nge";
+ case FCOND_LE:
+ case FCOND_NLE: return "le";
+ case FCOND_NGT:
+ case FCOND_GT: return "ngt";
+ }
+ llvm_unreachable("Impossible condition code!");
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << '$' << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::RDHWR:
+ case Mips::RDHWR64:
+ O << "\t.set\tpush\n";
+ O << "\t.set\tmips32r2\n";
+ }
+
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::RDHWR:
+ case Mips::RDHWR64:
+ O << "\n\t.set\tpop";
+ }
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+ int Offset = 0;
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert(SRE && CE && "Binary expression must be sym+const.");
+ Offset = CE->getValue();
+ }
+ else if (!(SRE = dyn_cast<MCSymbolRefExpr>(Expr)))
+ assert(false && "Unexpected MCExpr type.");
+
+ MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case MCSymbolRefExpr::VK_None: break;
+ case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break;
+ case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break;
+ case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI: OS << "%dtprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO: OS << "%dtprel_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
+ case MCSymbolRefExpr::VK_Mips_HIGHER: OS << "%higher("; break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST: OS << "%highest("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_HI16: OS << "%got_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_LO16: OS << "%got_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_CALL_HI16: OS << "%call_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_CALL_LO16: OS << "%call_lo("; break;
+ }
+
+ OS << SRE->getSymbol();
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+
+ if ((Kind == MCSymbolRefExpr::VK_Mips_GPOFF_HI) ||
+ (Kind == MCSymbolRefExpr::VK_Mips_GPOFF_LO))
+ OS << ")))";
+ else if (Kind != MCSymbolRefExpr::VK_None)
+ OS << ')';
+}
+
+void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNo).getReg());
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ printExpr(Op.getExpr(), O);
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ const MCOperand& MO = MI->getOperand(opNum);
+ O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 000000000000..d1b561f9764e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,103 @@
+//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were originally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_OEQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_UNE,
+ FCOND_ONE,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+ MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
new file mode 100644
index 000000000000..0b13607a572d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -0,0 +1,279 @@
+//===-- MipsASMBackend.cpp - Mips Asm Backend ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsAsmBackend and MipsELFObjectWriter classes.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Prepare value for the target space for it
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+
+ // Add/subtract and shift
+ switch (Kind) {
+ default:
+ return 0;
+ case FK_GPRel_4:
+ case FK_Data_4:
+ case FK_Data_8:
+ case Mips::fixup_Mips_LO16:
+ case Mips::fixup_Mips_GPREL16:
+ case Mips::fixup_Mips_GPOFF_HI:
+ case Mips::fixup_Mips_GPOFF_LO:
+ case Mips::fixup_Mips_GOT_PAGE:
+ case Mips::fixup_Mips_GOT_OFST:
+ case Mips::fixup_Mips_GOT_DISP:
+ case Mips::fixup_Mips_GOT_LO16:
+ case Mips::fixup_Mips_CALL_LO16:
+ break;
+ case Mips::fixup_Mips_PC16:
+ // So far we are only using this type for branches.
+ // For branches we start 1 instruction after the branch
+ // so the displacement will be one instruction size less.
+ Value -= 4;
+ // The displacement is then divided by 4 to give us an 18 bit
+ // address range.
+ Value >>= 2;
+ break;
+ case Mips::fixup_Mips_26:
+ // So far we are only using this type for jumps.
+ // The displacement is then divided by 4 to give us an 28 bit
+ // address range.
+ Value >>= 2;
+ break;
+ case Mips::fixup_Mips_HI16:
+ case Mips::fixup_Mips_GOT_Local:
+ case Mips::fixup_Mips_GOT_HI16:
+ case Mips::fixup_Mips_CALL_HI16:
+ // Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
+ Value = ((Value + 0x8000) >> 16) & 0xffff;
+ break;
+ case Mips::fixup_Mips_HIGHER:
+ // Get the 3rd 16-bits.
+ Value = ((Value + 0x80008000LL) >> 32) & 0xffff;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ // Get the 4th 16-bits.
+ Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
+ break;
+ }
+
+ return Value;
+}
+
+namespace {
+class MipsAsmBackend : public MCAsmBackend {
+ Triple::OSType OSType;
+ bool IsLittle; // Big or little endian
+ bool Is64Bit; // 32 or 64 bit words
+
+public:
+ MipsAsmBackend(const Target &T, Triple::OSType _OSType,
+ bool _isLittle, bool _is64Bit)
+ :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createMipsELFObjectWriter(OS,
+ MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
+ }
+
+ /// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
+ /// data fragment, at the offset specified by the fixup and following the
+ /// fixup kind as appropriate.
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ MCFixupKind Kind = Fixup.getKind();
+ Value = adjustFixupValue((unsigned)Kind, Value);
+
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ // Where do we start in the object
+ unsigned Offset = Fixup.getOffset();
+ // Number of bytes we need to fixup
+ unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+ // Used to point to big endian bytes
+ unsigned FullSize;
+
+ switch ((unsigned)Kind) {
+ case Mips::fixup_Mips_16:
+ FullSize = 2;
+ break;
+ case Mips::fixup_Mips_64:
+ FullSize = 8;
+ break;
+ default:
+ FullSize = 4;
+ break;
+ }
+
+ // Grab current value, if any, from bits.
+ uint64_t CurVal = 0;
+
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ }
+
+ uint64_t Mask = ((uint64_t)(-1) >>
+ (64 - getFixupKindInfo(Kind).TargetSize));
+ CurVal |= Value & Mask;
+
+ // Write out the fixed up bytes back to the code/data bits.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
+ }
+ }
+
+ unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
+ // This table *must* be in same the order of fixup_* kinds in
+ // MipsFixupKinds.h.
+ //
+ // name offset bits flags
+ { "fixup_Mips_16", 0, 16, 0 },
+ { "fixup_Mips_32", 0, 32, 0 },
+ { "fixup_Mips_REL32", 0, 32, 0 },
+ { "fixup_Mips_26", 0, 26, 0 },
+ { "fixup_Mips_HI16", 0, 16, 0 },
+ { "fixup_Mips_LO16", 0, 16, 0 },
+ { "fixup_Mips_GPREL16", 0, 16, 0 },
+ { "fixup_Mips_LITERAL", 0, 16, 0 },
+ { "fixup_Mips_GOT_Global", 0, 16, 0 },
+ { "fixup_Mips_GOT_Local", 0, 16, 0 },
+ { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_CALL16", 0, 16, 0 },
+ { "fixup_Mips_GPREL32", 0, 32, 0 },
+ { "fixup_Mips_SHIFT5", 6, 5, 0 },
+ { "fixup_Mips_SHIFT6", 6, 5, 0 },
+ { "fixup_Mips_64", 0, 64, 0 },
+ { "fixup_Mips_TLSGD", 0, 16, 0 },
+ { "fixup_Mips_GOTTPREL", 0, 16, 0 },
+ { "fixup_Mips_TPREL_HI", 0, 16, 0 },
+ { "fixup_Mips_TPREL_LO", 0, 16, 0 },
+ { "fixup_Mips_TLSLDM", 0, 16, 0 },
+ { "fixup_Mips_DTPREL_HI", 0, 16, 0 },
+ { "fixup_Mips_DTPREL_LO", 0, 16, 0 },
+ { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_GPOFF_HI", 0, 16, 0 },
+ { "fixup_Mips_GPOFF_LO", 0, 16, 0 },
+ { "fixup_Mips_GOT_PAGE", 0, 16, 0 },
+ { "fixup_Mips_GOT_OFST", 0, 16, 0 },
+ { "fixup_Mips_GOT_DISP", 0, 16, 0 },
+ { "fixup_Mips_HIGHER", 0, 16, 0 },
+ { "fixup_Mips_HIGHEST", 0, 16, 0 },
+ { "fixup_Mips_GOT_HI16", 0, 16, 0 },
+ { "fixup_Mips_GOT_LO16", 0, 16, 0 },
+ { "fixup_Mips_CALL_HI16", 0, 16, 0 },
+ { "fixup_Mips_CALL_LO16", 0, 16, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ /// @name Target Relaxation Interfaces
+ /// @{
+
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool mayNeedRelaxation(const MCInst &Inst) const {
+ return false;
+ }
+
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
+ /// RelaxInstruction - Relax the instruction in the given fragment
+ /// to the next wider instruction.
+ ///
+ /// \param Inst - The instruction to relax, which may be the same
+ /// as the output.
+ /// \param [out] Res On return, the relaxed instruction.
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ }
+
+ /// @}
+
+ /// WriteNopData - Write an (optimal) nop sequence of Count bytes
+ /// to the given output. If the target cannot generate such a sequence,
+ /// it should return an error.
+ ///
+ /// \return - True on success.
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // Check for a less than instruction size number of bytes
+ // FIXME: 16 bit instructions are not handled yet here.
+ // We shouldn't be using a hard coded number for instruction size.
+ if (Count % 4) return false;
+
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0);
+ return true;
+ }
+}; // class MipsAsmBackend
+
+} // namespace
+
+// MCAsmBackend
+MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/true, /*Is64Bit*/false);
+}
+
+MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/false, /*Is64Bit*/false);
+}
+
+MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/true, /*Is64Bit*/true);
+}
+
+MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/false, /*Is64Bit*/true);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
new file mode 100644
index 000000000000..7a55efd5c330
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -0,0 +1,153 @@
+//===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Mips target useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPSBASEINFO_H
+#define MIPSBASEINFO_H
+
+#include "MipsFixupKinds.h"
+#include "MipsMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // Mips Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT16 - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT16,
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HI,
+ MO_ABS_LO,
+
+ /// MO_TLSGD - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (General
+ // Dynamic TLS).
+ MO_TLSGD,
+
+ /// MO_TLSLDM - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (Local
+ // Dynamic TLS).
+ MO_TLSLDM,
+ MO_DTPREL_HI,
+ MO_DTPREL_LO,
+
+ /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+ // Exec TLS).
+ MO_GOTTPREL,
+
+ /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+ // the thread pointer (Local Exec TLS).
+ MO_TPREL_HI,
+ MO_TPREL_LO,
+
+ // N32/64 Flags.
+ MO_GPOFF_HI,
+ MO_GPOFF_LO,
+ MO_GOT_DISP,
+ MO_GOT_PAGE,
+ MO_GOT_OFST,
+
+ /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a
+ /// 64-bit symbol address.
+ MO_HIGHER,
+ MO_HIGHEST,
+
+ /// MO_GOT_HI16/LO16, MO_CALL_HI16/LO16 - Relocations used for large GOTs.
+ MO_GOT_HI16,
+ MO_GOT_LO16,
+ MO_CALL_HI16,
+ MO_CALL_LO16
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for
+ // Mips instructions.
+ //
+
+ // Pseudo - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// FrmR - This form is for instructions of the format R.
+ FrmR = 1,
+ /// FrmI - This form is for instructions of the format I.
+ FrmI = 2,
+ /// FrmJ - This form is for instructions of the format J.
+ FrmJ = 3,
+ /// FrmFR - This form is for instructions of the format FR.
+ FrmFR = 4,
+ /// FrmFI - This form is for instructions of the format FI.
+ FrmFI = 5,
+ /// FrmOther - This form is for instructions that have no specific format.
+ FrmOther = 6,
+
+ FormMask = 15
+ };
+}
+
+inline static std::pair<const MCSymbolRefExpr*, int64_t>
+MipsGetSymAndOffset(const MCFixup &Fixup) {
+ MCFixupKind FixupKind = Fixup.getKind();
+
+ if ((FixupKind < FirstTargetFixupKind) ||
+ (FixupKind >= MCFixupKind(Mips::LastTargetFixupKind)))
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ const MCExpr *Expr = Fixup.getValue();
+ MCExpr::ExprKind Kind = Expr->getKind();
+
+ if (Kind == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr);
+ const MCExpr *LHS = BE->getLHS();
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+
+ if ((LHS->getKind() != MCExpr::SymbolRef) || !CE)
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ return std::make_pair(cast<MCSymbolRefExpr>(LHS), CE->getValue());
+ }
+
+ if (Kind != MCExpr::SymbolRef)
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ return std::make_pair(cast<MCSymbolRefExpr>(Expr), 0);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp
new file mode 100644
index 000000000000..15c4282030db
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp
@@ -0,0 +1,81 @@
+//===-- MipsDirectObjLower.cpp - Mips LLVM direct object lowering -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MCInst records that are normally
+// left to the assembler to lower such as large shifts.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsDirectObjLower.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// If the D<shift> instruction has a shift amount that is greater
+// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
+void Mips::LowerLargeShift(MCInst& Inst) {
+
+ assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!");
+ assert(Inst.getOperand(2).isImm());
+
+ int64_t Shift = Inst.getOperand(2).getImm();
+ if (Shift <= 31)
+ return; // Do nothing
+ Shift -= 32;
+
+ // saminus32
+ Inst.getOperand(2).setImm(Shift);
+
+ switch (Inst.getOpcode()) {
+ default:
+ // Calling function is not synchronized
+ llvm_unreachable("Unexpected shift instruction");
+ case Mips::DSLL:
+ Inst.setOpcode(Mips::DSLL32);
+ return;
+ case Mips::DSRL:
+ Inst.setOpcode(Mips::DSRL32);
+ return;
+ case Mips::DSRA:
+ Inst.setOpcode(Mips::DSRA32);
+ return;
+ }
+}
+
+// Pick a DEXT or DINS instruction variant based on the pos and size operands
+void Mips::LowerDextDins(MCInst& InstIn) {
+ int Opcode = InstIn.getOpcode();
+
+ if (Opcode == Mips::DEXT)
+ assert(InstIn.getNumOperands() == 4 &&
+ "Invalid no. of machine operands for DEXT!");
+ else // Only DEXT and DINS are possible
+ assert(InstIn.getNumOperands() == 5 &&
+ "Invalid no. of machine operands for DINS!");
+
+ assert(InstIn.getOperand(2).isImm());
+ int64_t pos = InstIn.getOperand(2).getImm();
+ assert(InstIn.getOperand(3).isImm());
+ int64_t size = InstIn.getOperand(3).getImm();
+
+ if (size <= 32) {
+ if (pos < 32) // DEXT/DINS, do nothing
+ return;
+ // DEXTU/DINSU
+ InstIn.getOperand(2).setImm(pos - 32);
+ InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTU : Mips::DINSU);
+ return;
+ }
+ // DEXTM/DINSM
+ assert(pos < 32 && "DEXT/DINS cannot have both size and pos > 32");
+ InstIn.getOperand(3).setImm(size - 32);
+ InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTM : Mips::DINSM);
+ return;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h
new file mode 100644
index 000000000000..8813cc9ac7a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h
@@ -0,0 +1,28 @@
+//===-- MipsDirectObjLower.h - Mips LLVM direct object lowering *- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSDIRECTOBJLOWER_H
+#define MIPSDIRECTOBJLOWER_H
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCInst;
+ class MCStreamer;
+
+ namespace Mips {
+ /// MipsDirectObjLower - This name space is used to lower MCInstr in cases
+ // where the assembler usually finishes the lowering
+ // such as large shifts.
+ void LowerLargeShift(MCInst &Inst);
+ void LowerDextDins(MCInst &Inst);
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
new file mode 100644
index 000000000000..6471b51583ce
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -0,0 +1,285 @@
+//===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <list>
+
+using namespace llvm;
+
+namespace {
+ struct RelEntry {
+ RelEntry(const ELFRelocationEntry &R, const MCSymbol *S, int64_t O) :
+ Reloc(R), Sym(S), Offset(O) {}
+ ELFRelocationEntry Reloc;
+ const MCSymbol *Sym;
+ int64_t Offset;
+ };
+
+ typedef std::list<RelEntry> RelLs;
+ typedef RelLs::iterator RelLsIter;
+
+ class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64, bool IsLittleEndian);
+
+ virtual ~MipsELFObjectWriter();
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+}
+
+MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64, bool IsLittleEndian)
+ : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+ /*HasRelocationAddend*/ (_isN64) ? true : false,
+ /*IsN64*/ _isN64) {}
+
+MipsELFObjectWriter::~MipsELFObjectWriter() {}
+
+const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0.");
+ const MCSymbol &Sym = Target.getSymA()->getSymbol().AliasedSymbol();
+
+ if (Sym.getSection().getKind().isMergeableCString() ||
+ Sym.getSection().getKind().isMergeableConst())
+ return &Sym;
+
+ return NULL;
+}
+
+unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+ unsigned Type = (unsigned)ELF::R_MIPS_NONE;
+ unsigned Kind = (unsigned)Fixup.getKind();
+
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ELF::R_MIPS_32;
+ break;
+ case FK_Data_8:
+ Type = ELF::R_MIPS_64;
+ break;
+ case FK_GPRel_4:
+ if (isN64()) {
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
+ }
+ else
+ Type = ELF::R_MIPS_GPREL32;
+ break;
+ case Mips::fixup_Mips_GPREL16:
+ Type = ELF::R_MIPS_GPREL16;
+ break;
+ case Mips::fixup_Mips_26:
+ Type = ELF::R_MIPS_26;
+ break;
+ case Mips::fixup_Mips_CALL16:
+ Type = ELF::R_MIPS_CALL16;
+ break;
+ case Mips::fixup_Mips_GOT_Global:
+ case Mips::fixup_Mips_GOT_Local:
+ Type = ELF::R_MIPS_GOT16;
+ break;
+ case Mips::fixup_Mips_HI16:
+ Type = ELF::R_MIPS_HI16;
+ break;
+ case Mips::fixup_Mips_LO16:
+ Type = ELF::R_MIPS_LO16;
+ break;
+ case Mips::fixup_Mips_TLSGD:
+ Type = ELF::R_MIPS_TLS_GD;
+ break;
+ case Mips::fixup_Mips_GOTTPREL:
+ Type = ELF::R_MIPS_TLS_GOTTPREL;
+ break;
+ case Mips::fixup_Mips_TPREL_HI:
+ Type = ELF::R_MIPS_TLS_TPREL_HI16;
+ break;
+ case Mips::fixup_Mips_TPREL_LO:
+ Type = ELF::R_MIPS_TLS_TPREL_LO16;
+ break;
+ case Mips::fixup_Mips_TLSLDM:
+ Type = ELF::R_MIPS_TLS_LDM;
+ break;
+ case Mips::fixup_Mips_DTPREL_HI:
+ Type = ELF::R_MIPS_TLS_DTPREL_HI16;
+ break;
+ case Mips::fixup_Mips_DTPREL_LO:
+ Type = ELF::R_MIPS_TLS_DTPREL_LO16;
+ break;
+ case Mips::fixup_Mips_Branch_PCRel:
+ case Mips::fixup_Mips_PC16:
+ Type = ELF::R_MIPS_PC16;
+ break;
+ case Mips::fixup_Mips_GOT_PAGE:
+ Type = ELF::R_MIPS_GOT_PAGE;
+ break;
+ case Mips::fixup_Mips_GOT_OFST:
+ Type = ELF::R_MIPS_GOT_OFST;
+ break;
+ case Mips::fixup_Mips_GOT_DISP:
+ Type = ELF::R_MIPS_GOT_DISP;
+ break;
+ case Mips::fixup_Mips_GPOFF_HI:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type);
+ break;
+ case Mips::fixup_Mips_GPOFF_LO:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
+ break;
+ case Mips::fixup_Mips_HIGHER:
+ Type = ELF::R_MIPS_HIGHER;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ Type = ELF::R_MIPS_HIGHEST;
+ break;
+ case Mips::fixup_Mips_GOT_HI16:
+ Type = ELF::R_MIPS_GOT_HI16;
+ break;
+ case Mips::fixup_Mips_GOT_LO16:
+ Type = ELF::R_MIPS_GOT_LO16;
+ break;
+ case Mips::fixup_Mips_CALL_HI16:
+ Type = ELF::R_MIPS_CALL_HI16;
+ break;
+ case Mips::fixup_Mips_CALL_LO16:
+ Type = ELF::R_MIPS_CALL_LO16;
+ break;
+ }
+ return Type;
+}
+
+// Return true if R is either a GOT16 against a local symbol or HI16.
+static bool NeedsMatchingLo(const MCAssembler &Asm, const RelEntry &R) {
+ if (!R.Sym)
+ return false;
+
+ MCSymbolData &SD = Asm.getSymbolData(R.Sym->AliasedSymbol());
+
+ return ((R.Reloc.Type == ELF::R_MIPS_GOT16) && !SD.isExternal()) ||
+ (R.Reloc.Type == ELF::R_MIPS_HI16);
+}
+
+static bool HasMatchingLo(const MCAssembler &Asm, RelLsIter I, RelLsIter Last) {
+ if (I == Last)
+ return false;
+
+ RelLsIter Hi = I++;
+
+ return (I->Reloc.Type == ELF::R_MIPS_LO16) && (Hi->Sym == I->Sym) &&
+ (Hi->Offset == I->Offset);
+}
+
+static bool HasSameSymbol(const RelEntry &R0, const RelEntry &R1) {
+ return R0.Sym == R1.Sym;
+}
+
+static int CompareOffset(const RelEntry &R0, const RelEntry &R1) {
+ return (R0.Offset > R1.Offset) ? 1 : ((R0.Offset == R1.Offset) ? 0 : -1);
+}
+
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+ // Call the default function first. Relocations are sorted in descending
+ // order of r_offset.
+ MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+ RelLs RelocLs;
+ std::vector<RelLsIter> Unmatched;
+
+ // Fill RelocLs. Traverse Relocs backwards so that relocations in RelocLs
+ // are in ascending order of r_offset.
+ for (std::vector<ELFRelocationEntry>::reverse_iterator R = Relocs.rbegin();
+ R != Relocs.rend(); ++R) {
+ std::pair<const MCSymbolRefExpr*, int64_t> P =
+ MipsGetSymAndOffset(*R->Fixup);
+ RelocLs.push_back(RelEntry(*R, P.first ? &P.first->getSymbol() : 0,
+ P.second));
+ }
+
+ // Get list of unmatched HI16 and GOT16.
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+ if (NeedsMatchingLo(Asm, *R) && !HasMatchingLo(Asm, R, --RelocLs.end()))
+ Unmatched.push_back(R);
+
+ // Insert unmatched HI16 and GOT16 immediately before their matching LO16.
+ for (std::vector<RelLsIter>::iterator U = Unmatched.begin();
+ U != Unmatched.end(); ++U) {
+ RelLsIter LoPos = RelocLs.end(), HiPos = *U;
+ bool MatchedLo = false;
+
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) {
+ if ((R->Reloc.Type == ELF::R_MIPS_LO16) && HasSameSymbol(*HiPos, *R) &&
+ (CompareOffset(*R, *HiPos) >= 0) &&
+ ((LoPos == RelocLs.end()) || ((CompareOffset(*R, *LoPos) < 0)) ||
+ (!MatchedLo && !CompareOffset(*R, *LoPos))))
+ LoPos = R;
+
+ MatchedLo = NeedsMatchingLo(Asm, *R) &&
+ HasMatchingLo(Asm, R, --RelocLs.end());
+ }
+
+ // If a matching LoPos was found, move HiPos and insert it before LoPos.
+ // Make the offsets of HiPos and LoPos match.
+ if (LoPos != RelocLs.end()) {
+ HiPos->Offset = LoPos->Offset;
+ RelocLs.insert(LoPos, *HiPos);
+ RelocLs.erase(HiPos);
+ }
+ }
+
+ // Put the sorted list back in reverse order.
+ assert(Relocs.size() == RelocLs.size());
+ unsigned I = RelocLs.size();
+
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+ Relocs[--I] = R->Reloc;
+}
+
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian,
+ bool Is64Bit) {
+ MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
+ (Is64Bit) ? true : false,
+ IsLittleEndian);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
new file mode 100644
index 000000000000..c33bc9ae3034
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -0,0 +1,89 @@
+//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "MipsSubtarget.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
+ RelaxAll, NoExecStack);
+ return S;
+ }
+
+ // For llc. Set a group of ELF header flags
+ void
+ MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
+
+ if (hasRawTextSupport())
+ return;
+
+ // Update e_header flags
+ MCAssembler& MCA = getAssembler();
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+
+ if (Subtarget.inMips16Mode())
+ EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+ else
+ EFlags |= ELF::EF_MIPS_NOREORDER;
+
+ // Architecture
+ if (Subtarget.hasMips64r2())
+ EFlags |= ELF::EF_MIPS_ARCH_64R2;
+ else if (Subtarget.hasMips64())
+ EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Subtarget.hasMips32r2())
+ EFlags |= ELF::EF_MIPS_ARCH_32R2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_32;
+
+ if (Subtarget.inMicroMipsMode())
+ EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+ // ABI
+ if (Subtarget.isABI_O32())
+ EFlags |= ELF::EF_MIPS_ABI_O32;
+
+ // Relocation Model
+ Reloc::Model RM = Subtarget.getRelocationModel();
+ if (RM == Reloc::PIC_ || RM == Reloc::Default)
+ EFlags |= ELF::EF_MIPS_PIC;
+ else if (RM == Reloc::Static)
+ ; // Do nothing for Reloc::Static
+ else
+ llvm_unreachable("Unsupported relocation model for e_flags");
+
+ MCA.setELFHeaderEFlags(EFlags);
+ }
+
+ // For llc. Set a symbol's STO flags
+ void
+ MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val) {
+
+ if (hasRawTextSupport())
+ return;
+
+ MCSymbolData &Data = getOrCreateSymbolData(Sym);
+ // The "other" values are stored in the last 6 bits of the second byte
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, Val >> 2);
+ }
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
new file mode 100644
index 000000000000..b10ccc78e665
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -0,0 +1,43 @@
+//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#ifndef MIPSELFSTREAMER_H_
+#define MIPSELFSTREAMER_H_
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MipsAsmPrinter;
+class MipsSubtarget;
+class MCSymbol;
+
+class MipsELFStreamer : public MCELFStreamer {
+public:
+ MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack)
+ : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
+ }
+
+ ~MipsELFStreamer() {}
+ void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
+ void emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val);
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_MipsELFStreamer;
+ }
+};
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif /* MIPSELFSTREAMER_H_ */
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
new file mode 100644
index 000000000000..f96390043a3b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -0,0 +1,139 @@
+//===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H
+#define LLVM_MIPS_MIPSFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Mips {
+ // Although most of the current fixup types reflect a unique relocation
+ // one can have multiple fixup types for a given relocation and thus need
+ // to be uniquely named.
+ //
+ // This table *must* be in the save order of
+ // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+ // in MipsAsmBackend.cpp.
+ //
+ enum Fixups {
+ // Branch fixups resulting in R_MIPS_16.
+ fixup_Mips_16 = FirstTargetFixupKind,
+
+ // Pure 32 bit data fixup resulting in - R_MIPS_32.
+ fixup_Mips_32,
+
+ // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+ fixup_Mips_REL32,
+
+ // Jump 26 bit fixup resulting in - R_MIPS_26.
+ fixup_Mips_26,
+
+ // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+ fixup_Mips_HI16,
+
+ // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+ fixup_Mips_LO16,
+
+ // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
+
+ // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
+
+ // Global symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Global,
+
+ // Local symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Local,
+
+ // PC relative branch fixup resulting in - R_MIPS_PC16.
+ fixup_Mips_PC16,
+
+ // resulting in - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
+
+ // resulting in - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
+
+ // resulting in - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
+
+ // resulting in - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
+
+ // Pure 64 bit data fixup resulting in - R_MIPS_64.
+ fixup_Mips_64,
+
+ // resulting in - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
+
+ // resulting in - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
+
+ // resulting in - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
+
+ // resulting in - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
+
+ // resulting in - R_MIPS_TLS_LDM.
+ fixup_Mips_TLSLDM,
+
+ // resulting in - R_MIPS_TLS_DTPREL_HI16.
+ fixup_Mips_DTPREL_HI,
+
+ // resulting in - R_MIPS_TLS_DTPREL_LO16.
+ fixup_Mips_DTPREL_LO,
+
+ // PC relative branch fixup resulting in - R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
+
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+ fixup_Mips_GPOFF_HI,
+
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+ fixup_Mips_GPOFF_LO,
+
+ // resulting in - R_MIPS_PAGE
+ fixup_Mips_GOT_PAGE,
+
+ // resulting in - R_MIPS_GOT_OFST
+ fixup_Mips_GOT_OFST,
+
+ // resulting in - R_MIPS_GOT_DISP
+ fixup_Mips_GOT_DISP,
+
+ // resulting in - R_MIPS_GOT_HIGHER
+ fixup_Mips_HIGHER,
+
+ // resulting in - R_MIPS_HIGHEST
+ fixup_Mips_HIGHEST,
+
+ // resulting in - R_MIPS_GOT_HI16
+ fixup_Mips_GOT_HI16,
+
+ // resulting in - R_MIPS_GOT_LO16
+ fixup_Mips_GOT_LO16,
+
+ // resulting in - R_MIPS_CALL_HI16
+ fixup_Mips_CALL_HI16,
+
+ // resulting in - R_MIPS_CALL_LO16
+ fixup_Mips_CALL_LO16,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+} // namespace Mips
+} // namespace llvm
+
+
+#endif // LLVM_MIPS_MIPSFIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
new file mode 100644
index 000000000000..5d4b32d30578
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -0,0 +1,47 @@
+//===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+void MipsMCAsmInfo::anchor() { }
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ if ((TheTriple.getArch() == Triple::mips) ||
+ (TheTriple.getArch() == Triple::mips64))
+ IsLittleEndian = false;
+
+ if ((TheTriple.getArch() == Triple::mips64el) ||
+ (TheTriple.getArch() == Triple::mips64)) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+
+ AlignmentIsInBytes = false;
+ Data16bitsDirective = "\t.2byte\t";
+ Data32bitsDirective = "\t.4byte\t";
+ Data64bitsDirective = "\t.8byte\t";
+ PrivateGlobalPrefix = "$";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ GPRel32Directive = "\t.gpword\t";
+ GPRel64Directive = "\t.gpdword\t";
+ WeakRefDirective = "\t.weak\t";
+ DebugLabelSuffix = "=.";
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ HasLEB128 = true;
+ DwarfRegNumForCFI = true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
new file mode 100644
index 000000000000..e1d878936f31
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class StringRef;
+ class Target;
+
+ class MipsMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit MipsMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
new file mode 100644
index 000000000000..e198a7c983f0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -0,0 +1,344 @@
+//===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+//
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsDirectObjLower.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class MipsMCCodeEmitter : public MCCodeEmitter {
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ MCContext &Ctx;
+ bool IsLittleEndian;
+
+public:
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
+ const MCSubtargetInfo &sti, bool IsLittle) :
+ MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+
+ ~MipsMCCodeEmitter() {}
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the instruction encoding in little endian byte order.
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBranchJumpOpValue - Return binary encoding of the jump
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBranchTargetOpValue - Return binary encoding of the branch
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getMachineOpValue - Return binary encoding of operand. If the machin
+ // operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+}; // class MipsMCCodeEmitter
+} // namespace
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx)
+{
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, false);
+}
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx)
+{
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, true);
+}
+
+/// EncodeInstruction - Emit the instruction.
+/// Size the instruction (currently only 4 bytes
+void MipsMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const
+{
+
+ // Non-pseudo instructions that get changed for direct object
+ // only based on operand values.
+ // If this list of instructions get much longer we will move
+ // the check to a function call. Until then, this is more efficient.
+ MCInst TmpInst = MI;
+ switch (MI.getOpcode()) {
+ // If shift amount is >= 32 it the inst needs to be lowered further
+ case Mips::DSLL:
+ case Mips::DSRL:
+ case Mips::DSRA:
+ Mips::LowerLargeShift(TmpInst);
+ break;
+ // Double extract instruction is chosen by pos and size operands
+ case Mips::DEXT:
+ case Mips::DINS:
+ Mips::LowerDextDins(TmpInst);
+ }
+
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups);
+
+ // Check for unimplemented opcodes.
+ // Unfortunately in MIPS both NOP and SLL will come in with Binary == 0
+ // so we have to special check for them.
+ unsigned Opcode = TmpInst.getOpcode();
+ if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
+ llvm_unreachable("unimplemented opcode in EncodeInstruction()");
+
+ const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
+
+ // Get byte count of instruction
+ unsigned Size = Desc.getSize();
+ if (!Size)
+ llvm_unreachable("Desc.getSize() returns 0");
+
+ EmitInstruction(Binary, Size, OS);
+}
+
+/// getBranchTargetOpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
+ assert(MO.isExpr() &&
+ "getBranchTargetOpValue expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_Mips_PC16)));
+ return 0;
+}
+
+/// getJumpTargetOpValue - Return binary encoding of the jump
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm()>>2;
+
+ assert(MO.isExpr() &&
+ "getJumpTargetOpValue expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_Mips_26)));
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
+ return RegNo;
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ // MO must be an Expr.
+ assert(MO.isExpr());
+
+ const MCExpr *Expr = MO.getExpr();
+ MCExpr::ExprKind Kind = Expr->getKind();
+
+ if (Kind == MCExpr::Binary) {
+ Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
+ Kind = Expr->getKind();
+ }
+
+ assert (Kind == MCExpr::SymbolRef);
+
+ Mips::Fixups FixupKind = Mips::Fixups(0);
+
+ switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
+ default: llvm_unreachable("Unknown fixup kind!");
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
+ FixupKind = Mips::fixup_Mips_GPOFF_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_LO :
+ FixupKind = Mips::fixup_Mips_GPOFF_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_PAGE :
+ FixupKind = Mips::fixup_Mips_GOT_PAGE;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_OFST :
+ FixupKind = Mips::fixup_Mips_GOT_OFST;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_DISP :
+ FixupKind = Mips::fixup_Mips_GOT_DISP;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPREL:
+ FixupKind = Mips::fixup_Mips_GPREL16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_CALL:
+ FixupKind = Mips::fixup_Mips_CALL16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT16:
+ FixupKind = Mips::fixup_Mips_GOT_Global;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT:
+ FixupKind = Mips::fixup_Mips_GOT_Local;
+ break;
+ case MCSymbolRefExpr::VK_Mips_ABS_HI:
+ FixupKind = Mips::fixup_Mips_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_ABS_LO:
+ FixupKind = Mips::fixup_Mips_LO16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TLSGD:
+ FixupKind = Mips::fixup_Mips_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM:
+ FixupKind = Mips::fixup_Mips_TLSLDM;
+ break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI:
+ FixupKind = Mips::fixup_Mips_DTPREL_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO:
+ FixupKind = Mips::fixup_Mips_DTPREL_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+ FixupKind = Mips::fixup_Mips_GOTTPREL;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+ FixupKind = Mips::fixup_Mips_TPREL_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO:
+ FixupKind = Mips::fixup_Mips_TPREL_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_HIGHER:
+ FixupKind = Mips::fixup_Mips_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST:
+ FixupKind = Mips::fixup_Mips_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_HI16:
+ FixupKind = Mips::fixup_Mips_GOT_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_LO16:
+ FixupKind = Mips::fixup_Mips_GOT_LO16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_CALL_HI16:
+ FixupKind = Mips::fixup_Mips_CALL_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_CALL_LO16:
+ FixupKind = Mips::fixup_Mips_CALL_LO16;
+ break;
+ } // switch
+
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+/// getMemEncoding - Return binary encoding of memory related operand.
+/// If the offset operand requires relocation, record the relocation.
+unsigned
+MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups);
+
+ return (OffBits & 0xFFFF) | RegBits;
+}
+
+unsigned
+MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ return SizeEncoding - 1;
+}
+
+// FIXME: should be called getMSBEncoding
+//
+unsigned
+MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo-1).isImm());
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+ unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+
+ return Position + Size - 1;
+}
+
+#include "MipsGenMCCodeEmitter.inc"
+
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 000000000000..be83b54b6124
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,215 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "MipsMCTargetDesc.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static std::string ParseMipsTriple(StringRef TT, StringRef CPU) {
+ std::string MipsArchFeature;
+ size_t DashPosition = 0;
+ StringRef TheTriple;
+
+ // Let's see if there is a dash, like mips-unknown-linux.
+ DashPosition = TT.find('-');
+
+ if (DashPosition == StringRef::npos) {
+ // No dash, we check the string size.
+ TheTriple = TT.substr(0);
+ } else {
+ // We are only interested in substring before dash.
+ TheTriple = TT.substr(0,DashPosition);
+ }
+
+ if (TheTriple == "mips" || TheTriple == "mipsel") {
+ if (CPU.empty() || CPU == "mips32") {
+ MipsArchFeature = "+mips32";
+ } else if (CPU == "mips32r2") {
+ MipsArchFeature = "+mips32r2";
+ }
+ } else {
+ if (CPU.empty() || CPU == "mips64") {
+ MipsArchFeature = "+mips64";
+ } else if (CPU == "mips64r2") {
+ MipsArchFeature = "+mips64r2";
+ }
+ }
+ return MipsArchFeature;
+}
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMipsMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMipsMCRegisterInfo(X, Mips::RA);
+ return X;
+}
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = ParseMipsTriple(TT,CPU);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMipsMCSubtargetInfo(X, TT, CPU, ArchFS);
+ return X;
+}
+
+static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Mips::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (CM == CodeModel::JITDefault)
+ RM = Reloc::Static;
+ else if (RM == Reloc::Default)
+ RM = Reloc::PIC_;
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new MipsInstPrinter(MAI, MII, MRI);
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeMipsTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
+ createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
+ createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
+ createMipsMCRegisterInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
+ createMipsMCCodeEmitterEB);
+ TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
+ createMipsMCCodeEmitterEL);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
+ createMipsMCCodeEmitterEB);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
+ createMipsMCCodeEmitterEL);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
+ createMCStreamer);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
+ createMipsAsmBackendEB32);
+ TargetRegistry::RegisterMCAsmBackend(TheMipselTarget,
+ createMipsAsmBackendEL32);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64Target,
+ createMipsAsmBackendEB64);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
+ createMipsAsmBackendEL64);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
+ createMipsMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
+ createMipsMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 000000000000..71954a4bd862
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,72 @@
+//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCTARGETDESC_H
+#define MIPSMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+extern Target TheMips64Target;
+extern Target TheMips64elTarget;
+
+MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT,
+ StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT,
+ StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT,
+ StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT,
+ StringRef CPU);
+
+MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian,
+ bool Is64Bit);
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
new file mode 100644
index 000000000000..1dc9bcb36a5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
@@ -0,0 +1,80 @@
+//===-- MipsReginfo.cpp - Registerinfo handling --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// .reginfo
+// Elf32_Word ri_gprmask
+// Elf32_Word ri_cprmask[4]
+// Elf32_Word ri_gp_value
+//
+// .MIPS.options - N64
+// Elf64_Byte kind (ODK_REGINFO)
+// Elf64_Byte size (40 bytes)
+// Elf64_Section section (0)
+// Elf64_Word info (unused)
+// Elf64_Word ri_gprmask ()
+// Elf64_Word ri_pad ()
+// Elf64_Word[4] ri_cprmask ()
+// Elf64_Addr ri_gp_value ()
+//
+// .MIPS.options - N32
+// Elf32_Byte kind (ODK_REGINFO)
+// Elf32_Byte size (36 bytes)
+// Elf32_Section section (0)
+// Elf32_Word info (unused)
+// Elf32_Word ri_gprmask ()
+// Elf32_Word ri_pad ()
+// Elf32_Word[4] ri_cprmask ()
+// Elf32_Addr ri_gp_value ()
+//
+//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsReginfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetObjectFile.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// Integrated assembler version
+void
+MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const
+{
+
+ if (OS.hasRawTextSupport())
+ return;
+
+ const MipsTargetObjectFile &TLOFELF =
+ static_cast<const MipsTargetObjectFile &>(TLOF);
+ OS.SwitchSection(TLOFELF.getReginfoSection());
+
+ // .reginfo
+ if (MST.isABI_O32()) {
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 4); // ri_gp_value
+ }
+ // .MIPS.options
+ else if (MST.isABI_N64()) {
+ OS.EmitIntValue(1, 1); // kind
+ OS.EmitIntValue(40, 1); // size
+ OS.EmitIntValue(0, 2); // section
+ OS.EmitIntValue(0, 4); // info
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // pad
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 8); // ri_gp_value
+ }
+ else llvm_unreachable("Unsupported abi for reginfo");
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
new file mode 100644
index 000000000000..039b8eaaf287
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
@@ -0,0 +1,31 @@
+//=== MipsReginfo.h - MipsReginfo -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGINFO_H
+#define MIPSREGINFO_H
+
+namespace llvm {
+ class MCStreamer;
+ class TargetLoweringObjectFile;
+ class MipsSubtarget;
+
+ class MipsReginfo {
+ void anchor();
+ public:
+ MipsReginfo() {}
+
+ void emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const;
+ };
+
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
new file mode 100644
index 000000000000..8c65bb4020b5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -0,0 +1,34 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPass;
+
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+ FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE);
+ FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
new file mode 100644
index 000000000000..eefb02a494ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -0,0 +1,118 @@
+//===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
+ "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
+ "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+ "true", "Only supports single precision float">;
+def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+ "Enable o32 ABI">;
+def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+ "Enable n32 ABI">;
+def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+ "Enable n64 ABI">;
+def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+ "Enable eabi ABI">;
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+ "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+ "Enable 'signext in register' instructions.">;
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+ "Enable 'conditional move' instructions.">;
+def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
+ "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
+ "Enable 'count leading bits' instructions.">;
+def FeatureFPIdx : SubtargetFeature<"FPIdx", "HasFPIdx", "true",
+ "Enable 'FP indexed load/store' instructions.">;
+def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+ "Mips32 ISA Support",
+ [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
+ "Mips32r2", "Mips32r2 ISA Support",
+ [FeatureMips32, FeatureSEInReg, FeatureSwap,
+ FeatureFPIdx]>;
+def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
+ "Mips64", "Mips64 ISA Support",
+ [FeatureGP64Bit, FeatureFP64Bit,
+ FeatureMips32, FeatureFPIdx]>;
+def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
+ "Mips64r2", "Mips64r2 ISA Support",
+ [FeatureMips64, FeatureMips32r2]>;
+
+def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
+ "Mips16 mode">;
+
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
+def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
+ "Mips DSP-R2 ASE", [FeatureDSP]>;
+
+def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
+ "microMips mode">;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
+def : Proc<"mips64r2", [FeatureMips64r2]>;
+def : Proc<"mips16", [FeatureMips16]>;
+
+def MipsAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def MipsAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def MipsAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "$";
+}
+
+def Mips : Target {
+ let InstructionSet = MipsInstrInfo;
+ let AssemblyParsers = [MipsAsmParser];
+ let AssemblyWriters = [MipsAsmWriter];
+ let AssemblyParserVariants = [MipsAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
new file mode 100644
index 000000000000..1bb6fe46295b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -0,0 +1,180 @@
+//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16FrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16InstrInfo.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack.
+ TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP, -8);
+ SrcML = MachineLocation(Mips::S1);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -12);
+ SrcML = MachineLocation(Mips::S0);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -4);
+ SrcML = MachineLocation(Mips::RA);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ if (hasFP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
+ .addReg(Mips::SP);
+
+}
+
+void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ if (hasFP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::Move32R16), Mips::SP)
+ .addReg(Mips::S0);
+
+ // Adjust stack.
+ // assumes stacksize multiple of 8
+ TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI);
+}
+
+bool Mips16FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *EntryBlock = MF->begin();
+
+ //
+ // Registers RA, S0,S1 are the callee saved registers and they
+ // will be saved with the "save" instruction
+ // during emitPrologue
+ //
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. Do not add if the register is
+ // RA and return address is taken, because it has already been added in
+ // method MipsTargetLowering::LowerRETURNADDR.
+ // It's killed at the spill, unless the register is RA and return address
+ // is taken.
+ unsigned Reg = CSI[i].getReg();
+ bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
+ && MF->getFrameInfo()->isReturnAddressTaken();
+ if (!IsRAAndRetAddrIsTaken)
+ EntryBlock->addLiveIn(Reg);
+ }
+
+ return true;
+}
+
+bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ //
+ // Registers RA,S0,S1 are the callee saved registers and they will be restored
+ // with the restore instruction during emitEpilogue.
+ // We need to override this virtual function, otherwise llvm will try and
+ // restore the registers on it's on from the stack.
+ //
+
+ return true;
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void Mips16FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
+bool
+Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Reserve call frame if the size of the maximum call frame fits into 15-bit
+ // immediate field and there are no variable sized objects on the stack.
+ return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+}
+
+void Mips16FrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MF.getRegInfo().setPhysRegUsed(Mips::RA);
+ MF.getRegInfo().setPhysRegUsed(Mips::S0);
+ MF.getRegInfo().setPhysRegUsed(Mips::S1);
+}
+
+const MipsFrameLowering *
+llvm::createMips16FrameLowering(const MipsSubtarget &ST) {
+ return new Mips16FrameLowering(ST);
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
new file mode 100644
index 000000000000..54fdb7871466
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16_FRAMEINFO_H
+#define MIPS16_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+class Mips16FrameLowering : public MipsFrameLowering {
+public:
+ explicit Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, 8) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..00b3449300c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 000000000000..baa85877d957
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 000000000000..23eb5375ac94
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 000000000000..b23e2a1f37db
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips16InstrFormats.td
new file mode 100644
index 000000000000..4ff62ef3b6f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrFormats.td
@@ -0,0 +1,626 @@
+//===- Mips16InstrFormats.td - Mips Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// funct or f Function field
+//
+// immediate 4-,5-,8- or 11-bit immediate, branch displacement, or
+// or imm address displacement
+//
+// op 5-bit major operation code
+//
+// rx 3-bit source or destination register
+//
+// ry 3-bit source or destination register
+//
+// rz 3-bit source or destination register
+//
+// sa 3- or 5-bit shift amount
+//
+//===----------------------------------------------------------------------===//
+
+
+// Base class for Mips 16 Format
+// This class does not depend on the instruction size
+//
+class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
+{
+
+ let Namespace = "Mips";
+
+ let OutOperandList = outs;
+ let InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let Predicates = [InMips16Mode];
+}
+
+//
+// Generic Mips 16 Format
+//
+class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
+{
+ field bits<16> Inst;
+ bits<5> Opcode = 0;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{15-11} = Opcode;
+
+ let Size=2;
+ field bits<16> SoftFail = 0;
+}
+
+//
+// For 32 bit extended instruction forms.
+//
+class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
+{
+ field bits<32> Inst;
+
+ let Size=4;
+ field bits<32> SoftFail = 0;
+}
+
+class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
+{
+ let Inst{31-27} = 0b11110;
+}
+
+
+
+// Mips Pseudo Instructions Format
+class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst16<outs, ins, asmstr, pattern, IIPseudo> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|imm11|>
+//===----------------------------------------------------------------------===//
+
+class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<11> imm11;
+
+ let Opcode = op;
+
+ let Inst{10-0} = imm11;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RI instruction class in Mips : <|opcode|rx|imm8|>
+//===----------------------------------------------------------------------===//
+
+class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<8> imm8;
+
+ let Opcode = op;
+
+ let Inst{10-8} = rx;
+ let Inst{7-0} = imm8;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RR instruction class in Mips : <|opcode|rx|ry|funct|>
+//===----------------------------------------------------------------------===//
+
+class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<5> funct;
+
+ let Opcode = 0b11101;
+ let funct = _funct;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = funct;
+}
+
+//
+// For conversion functions.
+//
+class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> subfunct;
+ bits<5> funct;
+
+ let Opcode = 0b11101; // RR
+ let funct = _funct;
+ let subfunct = _subfunct;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = subfunct;
+ let Inst{4-0} = funct;
+}
+
+//
+// just used for breakpoint (hardware and software) instructions.
+//
+class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<6> _code; // code is a keyword in tablegen
+ bits<5> funct;
+
+ let Opcode = 0b11101; // RR
+ let funct = _funct;
+
+ let Inst{10-5} = _code;
+ let Inst{4-0} = funct;
+}
+
+//
+// J(AL)R(C) subformat
+//
+class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
+ dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<1> nd;
+ bits<1> l;
+ bits<1> ra;
+
+ let nd = _nd;
+ let l = _l;
+ let ra = r_a;
+
+ let Opcode = 0b11101;
+
+ let Inst{10-8} = rx;
+ let Inst{7} = nd;
+ let Inst{6} = l;
+ let Inst{5} = ra;
+ let Inst{4-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRI instruction class in Mips : <|opcode|rx|ry|imm5|>
+//===----------------------------------------------------------------------===//
+
+class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<5> imm5;
+
+ let Opcode = op;
+
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRR instruction class in Mips : <|opcode|rx|ry|rz|f|>
+//===----------------------------------------------------------------------===//
+
+class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<3> rz;
+ bits<2> f;
+
+ let Opcode = 0b11100;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = rz;
+ let Inst{1-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRI-A instruction class in Mips : <|opcode|rx|ry|f|imm4|>
+//===----------------------------------------------------------------------===//
+
+class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<1> f;
+ bits<4> imm4;
+
+ let Opcode = 0b01000;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4} = f;
+ let Inst{3-0} = imm4;
+}
+
+//===----------------------------------------------------------------------===//
+// Format Shift instruction class in Mips : <|opcode|rx|ry|sa|f|>
+//===----------------------------------------------------------------------===//
+
+class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<3> sa;
+ bits<2> f;
+
+ let Opcode = 0b00110;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = sa;
+ let Inst{1-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8 instruction class in Mips : <|opcode|funct|imm8>
+//===----------------------------------------------------------------------===//
+
+class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> func;
+ bits<8> imm8;
+
+ let Opcode = 0b01100;
+ let func = _func;
+
+ let Inst{10-8} = func;
+ let Inst{7-0} = imm8;
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8_MOVR32 instruction class in Mips : <|opcode|func|ry|r32>
+//===----------------------------------------------------------------------===//
+
+class FI8_MOVR3216<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+
+ bits<4> ry;
+ bits<4> r32;
+
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b111;
+ let Inst{7-4} = ry;
+ let Inst{3-0} = r32;
+
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Format i8_MOV32R instruction class in Mips : <|opcode|func|r32|rz>
+//===----------------------------------------------------------------------===//
+
+class FI8_MOV32R16<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+
+ bits<3> func;
+ bits<5> r32;
+ bits<3> rz;
+
+
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b101;
+ let Inst{7-5} = r32{2-0};
+ let Inst{4-3} = r32{4-3};
+ let Inst{2-0} = rz;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8_SVRS instruction class in Mips :
+// <|opcode|svrs|s|ra|s0|s1|framesize>
+//===----------------------------------------------------------------------===//
+
+class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin>
+{
+ bits<1> s;
+ bits<1> ra = 0;
+ bits<1> s0 = 0;
+ bits<1> s1 = 0;
+ bits<4> framesize = 0;
+
+ let s =_s;
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b100;
+ let Inst{7} = s;
+ let Inst{6} = ra;
+ let Inst{5} = s0;
+ let Inst{4} = s1;
+ let Inst{3-0} = framesize;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format JAL instruction class in Mips16 :
+// <|opcode|svrs|s|ra|s0|s1|framesize>
+//===----------------------------------------------------------------------===//
+
+class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
+{
+ bits<1> X;
+ bits<26> imm26;
+
+
+ let X = _X;
+
+ let Inst{31-27} = 0b00011;
+ let Inst{26} = X;
+ let Inst{25-21} = imm26{20-16};
+ let Inst{20-16} = imm26{25-21};
+ let Inst{15-0} = imm26{15-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|0|0|0|0|0|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+ bits<5> eop;
+
+ let eop = _eop;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = eop;
+ let Inst{10-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format ASMACRO instruction class in Mips16 :
+// <EXTEND|select|p4|p3|RRR|p2|p1|p0>
+//===----------------------------------------------------------------------===//
+
+class FASMACRO16<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> select;
+ bits<3> p4;
+ bits<5> p3;
+ bits<5> RRR = 0b11100;
+ bits<3> p2;
+ bits<3> p1;
+ bits<5> p0;
+
+
+ let Inst{26-24} = select;
+ let Inst{23-21} = p4;
+ let Inst{20-16} = p3;
+ let Inst{15-11} = RRR;
+ let Inst{10-8} = p2;
+ let Inst{7-5} = p1;
+ let Inst{4-0} = p0;
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RI instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|rx|0|0|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+ bits<5> op;
+ bits<3> rx;
+
+ let op = _op;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = op;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RRI instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|rx|ry|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> op;
+ bits<16> imm16;
+ bits<3> rx;
+ bits<3> ry;
+
+ let op=_op;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = op;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RRI-A instruction class in Mips16 :
+// <|EXTEND|imm10:4|imm14:11|RRI-A|rx|ry|f|imm3:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<15> imm15;
+ bits<3> rx;
+ bits<3> ry;
+ bits<1> f;
+
+ let f = _f;
+
+ let Inst{26-20} = imm15{10-4};
+ let Inst{19-16} = imm15{14-11};
+ let Inst{15-11} = 0b01000;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4} = f;
+ let Inst{3-0} = imm15{3-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-SHIFT instruction class in Mips16 :
+// <|EXTEND|sa 4:0|s5|0|SHIFT|rx|ry|0|f>
+//===----------------------------------------------------------------------===//
+
+class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<6> sa6;
+ bits<3> rx;
+ bits<3> ry;
+ bits<2> f;
+
+ let f = _f;
+
+ let Inst{26-22} = sa6{4-0};
+ let Inst{21} = sa6{5};
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 0b00110;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = 0;
+ let Inst{1-0} = f;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I8 instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|I8|funct|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+ bits<5> I8;
+ bits<3> funct;
+
+ let funct = _funct;
+ let I8 = 0b0110;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = I8;
+ let Inst{10-8} = funct;
+ let Inst{7-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I8_SVRS instruction class in Mips16 :
+// <|EXTEND|xsregs|framesize7:4|aregs|I8|SVRS|s|ra|s0|s1|framesize3:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
+{
+ bits<3> xsregs =0;
+ bits<8> framesize =0;
+ bits<3> aregs =0;
+ bits<5> I8 = 0b01100;
+ bits<3> SVRS = 0b100;
+ bits<1> s;
+ bits<1> ra = 0;
+ bits<1> s0 = 0;
+ bits<1> s1 = 0;
+
+ let s= s_;
+
+ let Inst{26-24} = xsregs;
+ let Inst{23-20} = framesize{7-4};
+ let Inst{19} = 0;
+ let Inst{18-16} = aregs;
+ let Inst{15-11} = I8;
+ let Inst{10-8} = SVRS;
+ let Inst{7} = s;
+ let Inst{6} = ra;
+ let Inst{5} = s0;
+ let Inst{4} = s1;
+ let Inst{3-0} = framesize{3-0};
+
+
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
new file mode 100644
index 000000000000..17dd2c07967a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -0,0 +1,417 @@
+//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16InstrInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static cl::opt<bool> NeverUseSaveRestore(
+ "mips16-never-use-save-restore",
+ cl::init(false),
+ cl::desc("For testing ability to adjust stack pointer "
+ "without save/restore instruction"),
+ cl::Hidden);
+
+
+Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm, Mips::BimmX16),
+ RI(*tm.getSubtargetImpl(), *this) {}
+
+const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Mips16InstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Mips16InstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0;
+
+ if (Mips::CPU16RegsRegClass.contains(DestReg) &&
+ Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::MoveR3216;
+ else if (Mips::CPURegsRegClass.contains(DestReg) &&
+ Mips::CPU16RegsRegClass.contains(SrcReg))
+ Opc = Mips::Move32R16;
+ else if ((SrcReg == Mips::HI) &&
+ (Mips::CPU16RegsRegClass.contains(DestReg)))
+ Opc = Mips::Mfhi16, SrcReg = 0;
+
+ else if ((SrcReg == Mips::LO) &&
+ (Mips::CPU16RegsRegClass.contains(DestReg)))
+ Opc = Mips::Mflo16, SrcReg = 0;
+
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Mips16InstrInfo::
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+ unsigned Opc = 0;
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::SwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+}
+
+void Mips16InstrInfo::
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::LwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
+ .addMemOperand(MMO);
+}
+
+bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA16:
+ ExpandRetRA16(MBB, MI, Mips::JrcRa16);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
+ case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
+ case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16;
+ case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16;
+ case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16;
+ case Mips::BtnezX16: return Mips::BteqzX16;
+ case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16;
+ case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16;
+ case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16;
+ case Mips::BteqzX16: return Mips::BtnezX16;
+ case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16;
+ case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16;
+ case Mips::BteqzT8SltiuX16: return Mips::BtnezT8SltiuX16;
+ case Mips::BtnezT8CmpX16: return Mips::BteqzT8CmpX16;
+ case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16;
+ case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16;
+ }
+ assert(false && "Implement this function.");
+ return 0;
+}
+
+// Adjust SP by FrameSize bytes. Save RA, S0, S1
+void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base);
+ if (isInt<16>(-Remainder))
+ BuildAddiuSpImm(MBB, I, -Remainder);
+ else
+ adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1);
+ }
+
+ }
+ else {
+ //
+ // sw ra, -4[sp]
+ // sw s1, -8[sp]
+ // sw s0, -12[sp]
+
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::RA);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1);
+ }
+}
+
+// Adjust SP by FrameSize bytes. Restore RA, S0, S1
+void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ if (isInt<16>(Remainder))
+ BuildAddiuSpImm(MBB, I, Remainder);
+ else
+ adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base);
+ }
+ }
+ else {
+ adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1);
+ // lw ra, -4[sp]
+ // lw s1, -8[sp]
+ // lw s0, -12[sp]
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::A0);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::RA);
+ MIB0.addReg(Mips::A0);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ }
+
+}
+
+// Adjust SP by Amount bytes where bytes can be up to 32bit number.
+// This can only be called at times that we know that there is at least one free
+// register.
+// This is clearly safe at prologue and epilogue.
+//
+void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+// MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+// unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+// unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+ //
+ // li reg1, constant
+ // move reg2, sp
+ // add reg1, reg1, reg2
+ // move sp, reg1
+ //
+ //
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1);
+ MIB1.addImm(Amount);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2);
+ MIB2.addReg(Mips::SP, RegState::Kill);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1);
+ MIB3.addReg(Reg1);
+ MIB3.addReg(Reg2, RegState::Kill);
+ MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::SP);
+ MIB4.addReg(Reg1, RegState::Kill);
+}
+
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ assert(false && "adjust stack pointer amount exceeded");
+}
+
+/// Adjust SP by Amount bytes.
+void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
+ BuildAddiuSpImm(MBB, I, Amount);
+ else
+ adjustStackPtrBigUnrestricted(SP, Amount, MBB, I);
+}
+
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+Mips16InstrInfo::loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned &NewImm) const {
+ //
+ // given original instruction is:
+ // Instr rx, T[offset] where offset is too big.
+ //
+ // lo = offset & 0xFFFF
+ // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF;
+ //
+ // let T = temporary register
+ // li T, hi
+ // shl T, 16
+ // add T, Rx, T
+ //
+ RegScavenger rs;
+ int32_t lo = Imm & 0xFFFF;
+ int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF;
+ NewImm = lo;
+ unsigned Reg =0;
+ unsigned SpReg = 0;
+ rs.enterBasicBlock(&MBB);
+ rs.forward(II);
+ //
+ // we use T0 for the first register, if we need to save something away.
+ // we use T1 for the second register, if we need to save something away.
+ //
+ unsigned FirstRegSaved =0, SecondRegSaved=0;
+ unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;
+
+ Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (Reg == 0) {
+ FirstRegSaved = Reg = Mips::V0;
+ FirstRegSavedTo = Mips::T0;
+ copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true);
+ }
+ else
+ rs.setUsed(Reg);
+ BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi);
+ BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg).
+ addImm(16);
+ if (FrameReg == Mips::SP) {
+ SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (SpReg == 0) {
+ if (Reg != Mips::V1) {
+ SecondRegSaved = SpReg = Mips::V1;
+ SecondRegSavedTo = Mips::T1;
+ }
+ else {
+ SecondRegSaved = SpReg = Mips::V0;
+ SecondRegSavedTo = Mips::T0;
+ }
+ copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+ }
+ else
+ rs.setUsed(SpReg);
+
+ copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg)
+ .addReg(Reg);
+ }
+ else
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+ if (FirstRegSaved || SecondRegSaved) {
+ II = llvm::next(II);
+ if (FirstRegSaved)
+ copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true);
+ if (SecondRegSaved)
+ copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true);
+ }
+ return Reg;
+}
+
+unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 ||
+ Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 ||
+ Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 ||
+ Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 ||
+ Opc == Mips::BteqzT8SltiX16 || Opc == Mips::BteqzT8SltiuX16 ||
+ Opc == Mips::BtnezX16 || Opc == Mips::BtnezT8CmpX16 ||
+ Opc == Mips::BtnezT8CmpiX16 || Opc == Mips::BtnezT8SltX16 ||
+ Opc == Mips::BtnezT8SltuX16 || Opc == Mips::BtnezT8SltiX16 ||
+ Opc == Mips::BtnezT8SltiuX16 ) ? Opc : 0;
+}
+
+void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+}
+
+
+const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
+ if (validSpImm8(Imm))
+ return get(Mips::AddiuSpImm16);
+ else
+ return get(Mips::AddiuSpImmX16);
+}
+
+void Mips16InstrInfo::BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
+}
+
+const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
+ return new Mips16InstrInfo(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
new file mode 100644
index 000000000000..a77a9043bb17
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -0,0 +1,124 @@
+//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16INSTRUCTIONINFO_H
+#define MIPS16INSTRUCTIONINFO_H
+
+#include "Mips16RegisterInfo.h"
+#include "MipsInstrInfo.h"
+
+namespace llvm {
+
+class Mips16InstrInfo : public MipsInstrInfo {
+ const Mips16RegisterInfo RI;
+
+public:
+ explicit Mips16InstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+ // Adjust SP by FrameSize bytes. Save RA, S0, S1
+ void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // Adjust SP by FrameSize bytes. Restore RA, S0, S1
+ void restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+
+ /// Adjust SP by Amount bytes.
+ void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Emit a series of instructions to load an immediate.
+ // This is to adjust some FrameReg. We return the new register to be used
+ // in place of FrameReg and the adjusted immediate field (&NewImm)
+ //
+ unsigned loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned &NewImm) const;
+
+ static bool validSpImm8(int offset) {
+ return ((offset & 7) == 0) && isInt<11>(offset);
+ }
+
+ //
+ // build the proper one based on the Imm field
+ //
+
+ const MCInstrDesc& AddiuSpImm(int64_t Imm) const;
+
+ void BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
new file mode 100644
index 000000000000..aa51aaf46565
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -0,0 +1,1791 @@
+//===- Mips16InstrInfo.td - Target Description for Mips16 -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips16 instructions.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// Mips Address
+//
+def addr16 :
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
+
+//
+// Address operand
+def mem16 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU16Regs, simm16, CPU16Regs);
+ let EncoderMethod = "getMemEncoding";
+}
+
+def mem16_ea : Operand<i32> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPU16Regs, simm16);
+ let EncoderMethod = "getMemEncoding";
+}
+
+//
+//
+// I8 instruction format
+//
+
+class FI816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+
+class FI816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
+
+//
+// RI instruction format
+//
+
+
+class FRI16_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16R_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16R_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class F2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+class FRI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm # 16 bit inst"), [], itin>;
+//
+// Compare a register and immediate and place result in CC
+// Implicit use of T8
+//
+// EXT-CCRR Instruction format
+//
+class FEXT_CCRXI16_ins<string asmstr>:
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> {
+ let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
+}
+
+// JAL and JALX instruction format
+//
+class FJAL16_ins<bits<1> _X, string asmstr,
+ InstrItinClass itin>:
+ FJAL16<_X, (outs), (ins simm20:$imm),
+ !strconcat(asmstr, "\t$imm\n\tnop"),[],
+ itin> {
+ let isCodeGenOnly=1;
+}
+//
+// EXT-I instruction format
+//
+class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
+ FEXT_I16<eop, (outs), (ins brtarget:$imm16),
+ !strconcat(asmstr, "\t$imm16"),[], itin>;
+
+//
+// EXT-I8 instruction format
+//
+
+class FEXT_I816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FEXT_I816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+class FEXT_I816_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
+
+class FEXT_I816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$$sp, $imm", itin>;
+
+//
+// Assembler formats in alphabetical order.
+// Natural and pseudos are mixed together.
+//
+// Compare two registers and place result in CC
+// Implicit use of T8
+//
+// CC-RR Instruction format
+//
+class FCCRR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> {
+ let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
+}
+
+//
+// EXT-RI instruction format
+//
+
+class FEXT_RI16_ins_base<bits<5> _op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
+class FEXT_RI16R_ins_base<bits<5> _op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16R_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
+class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
+ FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+
+class FEXT_RI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+
+class FEXT_2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+
+// this has an explicit sp argument that we ignore to work around a problem
+// in the compiler
+class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin>;
+
+//
+// EXT-RRI instruction format
+//
+
+class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
+class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
+//
+//
+// EXT-RRI-A instruction format
+//
+
+class FEXT_RRI_A16_mem_ins<bits<1> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI_A16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
+//
+// EXT-SHIFT instruction format
+//
+class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
+ !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
+
+//
+// EXT-T8I8
+//
+class FEXT_T8I816_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+ !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+ !strconcat(asmstr, "\t$imm"))),[]> {
+ let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
+}
+
+//
+// EXT-T8I8I
+//
+class FEXT_T8I8I16_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+ !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+ !strconcat(asmstr, "\t$targ"))), []> {
+ let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
+}
+//
+
+
+//
+// I8_MOVR32 instruction format (used only by the MOVR32 instructio
+//
+class FI8_MOVR3216_ins<string asmstr, InstrItinClass itin>:
+ FI8_MOVR3216<(outs CPU16Regs:$rz), (ins CPURegs:$r32),
+ !strconcat(asmstr, "\t$rz, $r32"), [], itin>;
+
+//
+// I8_MOV32R instruction format (used only by MOV32R instruction)
+//
+
+class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
+ FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz),
+ !strconcat(asmstr, "\t$r32, $rz"), [], itin>;
+
+//
+// This are pseudo formats for multiply
+// This first one can be changed to non pseudo now.
+//
+// MULT
+//
+class FMULT16_ins<string asmstr, InstrItinClass itin> :
+ MipsPseudo16<(outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), []>;
+
+//
+// MULT-LO
+//
+class FMULT16_LO_ins<string asmstr, InstrItinClass itin> :
+ MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmflo\t$rz"), []> {
+ let isCodeGenOnly=1;
+}
+
+//
+// RR-type instruction format
+//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRR16R_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRRTR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ;
+
+//
+// maybe refactor but need a $zero as a dummy first parameter
+//
+class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ;
+
+class FUnaryRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> ;
+
+
+class FRR16_M_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins),
+ !strconcat(asmstr, "\t$rx"), [], itin>;
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
+let rx=0 in
+class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
+ string asmstr, InstrItinClass itin>:
+ FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
+ [], itin> ;
+
+
+class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
+ string asmstr, InstrItinClass itin>:
+ FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
+ !strconcat(asmstr, "\t $rx"), [], itin> ;
+
+//
+// RRR-type instruction format
+//
+
+class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> :
+ FRRR16<_f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>;
+
+//
+// These Sel patterns support the generation of conditional move
+// pseudo instructions.
+//
+// The nomenclature uses the components making up the pseudo and may
+// be a bit counter intuitive when compared with the end result we seek.
+// For example using a bqez in the example directly below results in the
+// conditional move being done if the tested register is not zero.
+// I considered in easier to check by keeping the pseudo consistent with
+// it's components but it could have been done differently.
+//
+// The simplest case is when can test and operand directly and do the
+// conditional move based on a simple mips16 conditional
+// branch instruction.
+// for example:
+// if $op == beqz or bnez:
+//
+// $op1 $rt, .+4
+// move $rd, $rs
+//
+// if $op == beqz, then if $rt != 0, then the conditional assignment
+// $rd = $rs is done.
+
+// if $op == bnez, then if $rt == 0, then the conditional assignment
+// $rd = $rs is done.
+//
+// So this pseudo class only has one operand, i.e. op
+//
+class Sel<string op>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rt),
+ !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> {
+ //let isCodeGenOnly=1;
+ let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
+}
+
+//
+// The next two instruction classes allow for an operand which tests
+// two operands and returns a value in register T8 and
+//then does a conditional branch based on the value of T8
+//
+
+// op2 can be cmpi or slti/sltiu
+// op1 can bteqz or btnez
+// the operands for op2 are a register and a signed constant
+//
+// $op2 $t, $imm ;test register t and branch conditionally
+// $op1 .+4 ;op1 is a conditional branch
+// move $rd, $rs
+//
+//
+class SeliT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rl, simm16:$imm),
+ !strconcat(op2,
+ !strconcat("\t$rl, $imm\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
+ let isCodeGenOnly=1;
+ let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
+}
+
+//
+// op2 can be cmp or slt/sltu
+// op1 can be bteqz or btnez
+// the operands for op2 are two registers
+// op1 is a conditional branch
+//
+//
+// $op2 $rl, $rr ;test registers rl,rr
+// $op1 .+4 ;op2 is a conditional branch
+// move $rd, $rs
+//
+//
+class SelT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_),
+ (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rl, CPU16Regs:$rr),
+ !strconcat(op2,
+ !strconcat("\t$rl, $rr\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
+ let isCodeGenOnly=1;
+ let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
+}
+
+//
+// 32 bit constant
+//
+def imm32: Operand<i32>;
+
+def Constant32:
+ MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
+
+def LwConstant32:
+ MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm),
+ "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
+
+
+//
+// Some general instruction class info
+//
+//
+
+class ArithLogic16Defs<bit isCom=0> {
+ bits<5> shamt = 0;
+ bit isCommutable = isCom;
+ bit isReMaterializable = 1;
+ bit neverHasSideEffects = 1;
+}
+
+class branch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit isBarrier = 1;
+}
+
+class cbranch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+}
+
+class MayLoad {
+ bit mayLoad = 1;
+}
+
+class MayStore {
+ bit mayStore = 1;
+}
+//
+
+
+// Format: ADDIU rx, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
+// To add a constant to a 32-bit integer.
+//
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0> {
+ let AddedComplexity = 5;
+}
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0> {
+ let isCodeGenOnly = 1;
+}
+
+def AddiuRxRyOffMemX16:
+ FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
+
+//
+
+// Format: ADDIU rx, pc, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
+// To add a constant to the program counter.
+//
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+
+//
+// Format: ADDIU sp, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended)
+// To add a constant to the stack pointer.
+//
+def AddiuSpImm16
+ : FI816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+ let AddedComplexity = 5;
+}
+
+def AddiuSpImmX16
+ : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+}
+
+//
+// Format: ADDU rz, rx, ry MIPS16e
+// Purpose: Add Unsigned Word (3-Operand)
+// To add 32-bit integers.
+//
+
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: AND rx, ry MIPS16e
+// Purpose: AND
+// To do a bitwise logical AND.
+
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+// Format: B offset MIPS16e
+// Purpose: Unconditional Branch
+// To do an unconditional PC-relative branch.
+//
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BTEQZ offset MIPS16e
+// Purpose: Branch on T Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
+
+def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16;
+
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">,
+ cbranch16;
+
+def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16;
+
+def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16;
+
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16;
+
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
+ cbranch16;
+
+//
+// Format: BTNEZ offset MIPS16e
+// Purpose: Branch on T Not Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+ let Uses = [T8];
+}
+
+def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16;
+
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16;
+
+def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16;
+
+def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16;
+
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16;
+
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
+ cbranch16;
+
+//
+// Format: CMP rx, ry MIPS16e
+// Purpose: Compare
+// To compare the contents of two GPRs.
+//
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate (Extended)
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+
+//
+// Format: DIV rx, ry MIPS16e
+// Purpose: Divide Word
+// To divide 32-bit signed integers.
+//
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+ let Defs = [HI, LO];
+}
+
+//
+// Format: DIVU rx, ry MIPS16e
+// Purpose: Divide Unsigned Word
+// To divide 32-bit unsigned integers.
+//
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+ let Defs = [HI, LO];
+}
+//
+// Format: JAL target MIPS16e
+// Purpose: Jump and Link
+// To execute a procedure call within the current 256 MB-aligned
+// region and preserve the current ISA.
+//
+
+def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+ let isBranch = 1;
+ let hasDelaySlot = 0; // not true, but we add the nop for now
+ let isTerminator=1;
+ let isBarrier=1;
+}
+
+//
+// Format: JR ra MIPS16e
+// Purpose: Jump Register Through Register ra
+// To execute a branch to the instruction address in the return
+// address register.
+//
+
+def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+ let hasDelaySlot = 1;
+ let isTerminator=1;
+ let isBarrier=1;
+}
+
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+ let isTerminator=1;
+ let isBarrier=1;
+}
+
+def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+ let isTerminator=1;
+ let isBarrier=1;
+}
+//
+// Format: LB ry, offset(rx) MIPS16e
+// Purpose: Load Byte (Extended)
+// To load a byte from memory as a signed value.
+//
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LBU ry, offset(rx) MIPS16e
+// Purpose: Load Byte Unsigned (Extended)
+// To load a byte from memory as a unsigned value.
+//
+def LbuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LH ry, offset(rx) MIPS16e
+// Purpose: Load Halfword signed (Extended)
+// To load a halfword from memory as a signed value.
+//
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LHU ry, offset(rx) MIPS16e
+// Purpose: Load Halfword unsigned (Extended)
+// To load a halfword from memory as an unsigned value.
+//
+def LhuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate
+// To load a constant into a GPR.
+//
+def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate (Extended)
+// To load a constant into a GPR.
+//
+def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+
+//
+// Format: LW ry, offset(rx) MIPS16e
+// Purpose: Load Word (Extended)
+// To load a word from memory as a signed value.
+//
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
+
+// Format: LW rx, offset(sp) MIPS16e
+// Purpose: Load Word (SP-Relative, Extended)
+// To load an SP-relative word from memory as a signed value.
+//
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+ let Uses = [SP];
+}
+
+//
+// Format: MOVE r32, rz MIPS16e
+// Purpose: Move
+// To move the contents of a GPR to a GPR.
+//
+def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+
+//
+// Format: MOVE ry, r32 MIPS16e
+//Purpose: Move
+// To move the contents of a GPR to a GPR.
+//
+def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
+
+//
+// Format: MFHI rx MIPS16e
+// Purpose: Move From HI Register
+// To copy the special purpose HI register to a GPR.
+//
+def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
+ let Uses = [HI];
+ let neverHasSideEffects = 1;
+}
+
+//
+// Format: MFLO rx MIPS16e
+// Purpose: Move From LO Register
+// To copy the special purpose LO register to a GPR.
+//
+def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
+ let Uses = [LO];
+ let neverHasSideEffects = 1;
+}
+
+//
+// Pseudo Instruction for mult
+//
+def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
+ let isCommutable = 1;
+ let neverHasSideEffects = 1;
+ let Defs = [HI, LO];
+}
+
+def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
+ let isCommutable = 1;
+ let neverHasSideEffects = 1;
+ let Defs = [HI, LO];
+}
+
+//
+// Format: MULT rx, ry MIPS16e
+// Purpose: Multiply Word
+// To multiply 32-bit signed integers.
+//
+def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
+ let isCommutable = 1;
+ let neverHasSideEffects = 1;
+ let Defs = [HI, LO];
+}
+
+//
+// Format: MULTU rx, ry MIPS16e
+// Purpose: Multiply Unsigned Word
+// To multiply 32-bit unsigned integers.
+//
+def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
+ let isCommutable = 1;
+ let neverHasSideEffects = 1;
+ let Defs = [HI, LO];
+}
+
+//
+// Format: NEG rx, ry MIPS16e
+// Purpose: Negate
+// To negate an integer value.
+//
+def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>;
+
+//
+// Format: NOT rx, ry MIPS16e
+// Purpose: Not
+// To complement an integer value
+//
+def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>;
+
+//
+// Format: OR rx, ry MIPS16e
+// Purpose: Or
+// To do a bitwise logical OR.
+//
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
+// (All args are optional) MIPS16e
+// Purpose: Restore Registers and Deallocate Stack Frame
+// To deallocate a stack frame before exit from a subroutine,
+// restoring return address and static registers, and adjusting
+// stack
+//
+
+// fixed form for restoring RA and the frame
+// for direct object emitter, encoding needs to be adjusted for the
+// frame size
+//
+let ra=1, s=0,s0=1,s1=1 in
+def RestoreRaF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
+ let isCodeGenOnly = 1;
+ let Defs = [S0, S1, RA, SP];
+ let Uses = [SP];
+}
+
+// Use Restore to increment SP since SP is not a Mip 16 register, this
+// is an easy way to do that which does not require a register.
+//
+let ra=0, s=0,s0=0,s1=0 in
+def RestoreIncSpF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "restore\t$frame_size", [], IILoad >, MayLoad {
+ let isCodeGenOnly = 1;
+ let Defs = [SP];
+ let Uses = [SP];
+}
+
+//
+// Format: SAVE {ra,}{s0/s1/s0-1,}{framesize} (All arguments are optional)
+// MIPS16e
+// Purpose: Save Registers and Set Up Stack Frame
+// To set up a stack frame on entry to a subroutine,
+// saving return address and static registers, and adjusting stack
+//
+let ra=1, s=1,s0=1,s1=1 in
+def SaveRaF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
+ let isCodeGenOnly = 1;
+ let Uses = [RA, SP, S0, S1];
+ let Defs = [SP];
+}
+
+//
+// Use Save to decrement the SP by a constant since SP is not
+// a Mips16 register.
+//
+let ra=0, s=0,s0=0,s1=0 in
+def SaveDecSpF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "save\t$frame_size", [], IIStore >, MayStore {
+ let isCodeGenOnly = 1;
+ let Uses = [SP];
+ let Defs = [SP];
+}
+//
+// Format: SB ry, offset(rx) MIPS16e
+// Purpose: Store Byte (Extended)
+// To store a byte to memory.
+//
+def SbRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore;
+
+//
+// The Sel(T) instructions are pseudos
+// T means that they use T8 implicitly.
+//
+//
+// Format: SelBeqZ rd, rs, rt
+// Purpose: if rt==0, do nothing
+// else rs = rt
+//
+def SelBeqZ: Sel<"beqz">;
+
+//
+// Format: SelTBteqZCmp rd, rs, rl, rr
+// Purpose: b = Cmp rl, rr.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZCmp: SelT<"bteqz", "cmp">;
+
+//
+// Format: SelTBteqZCmpi rd, rs, rl, rr
+// Purpose: b = Cmpi rl, imm.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">;
+
+//
+// Format: SelTBteqZSlt rd, rs, rl, rr
+// Purpose: b = Slt rl, rr.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZSlt: SelT<"bteqz", "slt">;
+
+//
+// Format: SelTBteqZSlti rd, rs, rl, rr
+// Purpose: b = Slti rl, imm.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZSlti: SeliT<"bteqz", "slti">;
+
+//
+// Format: SelTBteqZSltu rd, rs, rl, rr
+// Purpose: b = Sltu rl, rr.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZSltu: SelT<"bteqz", "sltu">;
+
+//
+// Format: SelTBteqZSltiu rd, rs, rl, rr
+// Purpose: b = Sltiu rl, imm.
+// If b==0 then do nothing.
+// if b!=0 then rd = rs
+//
+def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">;
+
+//
+// Format: SelBnez rd, rs, rt
+// Purpose: if rt!=0, do nothing
+// else rs = rt
+//
+def SelBneZ: Sel<"bnez">;
+
+//
+// Format: SelTBtneZCmp rd, rs, rl, rr
+// Purpose: b = Cmp rl, rr.
+// If b!=0 then do nothing.
+// if b0=0 then rd = rs
+//
+def SelTBtneZCmp: SelT<"btnez", "cmp">;
+
+//
+// Format: SelTBtnezCmpi rd, rs, rl, rr
+// Purpose: b = Cmpi rl, imm.
+// If b!=0 then do nothing.
+// if b==0 then rd = rs
+//
+def SelTBtneZCmpi: SeliT<"btnez", "cmpi">;
+
+//
+// Format: SelTBtneZSlt rd, rs, rl, rr
+// Purpose: b = Slt rl, rr.
+// If b!=0 then do nothing.
+// if b==0 then rd = rs
+//
+def SelTBtneZSlt: SelT<"btnez", "slt">;
+
+//
+// Format: SelTBtneZSlti rd, rs, rl, rr
+// Purpose: b = Slti rl, imm.
+// If b!=0 then do nothing.
+// if b==0 then rd = rs
+//
+def SelTBtneZSlti: SeliT<"btnez", "slti">;
+
+//
+// Format: SelTBtneZSltu rd, rs, rl, rr
+// Purpose: b = Sltu rl, rr.
+// If b!=0 then do nothing.
+// if b==0 then rd = rs
+//
+def SelTBtneZSltu: SelT<"btnez", "sltu">;
+
+//
+// Format: SelTBtneZSltiu rd, rs, rl, rr
+// Purpose: b = Slti rl, imm.
+// If b!=0 then do nothing.
+// if b==0 then rd = rs
+//
+def SelTBtneZSltiu: SeliT<"btnez", "sltiu">;
+//
+//
+// Format: SH ry, offset(rx) MIPS16e
+// Purpose: Store Halfword (Extended)
+// To store a halfword to memory.
+//
+def ShRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore;
+
+//
+// Format: SLL rx, ry, sa MIPS16e
+// Purpose: Shift Word Left Logical (Extended)
+// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
+//
+def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
+
+//
+// Format: SLLV ry, rx MIPS16e
+// Purpose: Shift Word Left Logical Variable
+// To execute a left-shift of a word by a variable number of bits.
+//
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+
+def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
+
+// Format: SLTIU rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
+//
+// Format: SLTIU rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
+
+//
+// Format: SLT rx, ry MIPS16e
+// Purpose: Set on Less Than
+// To record the result of a less-than comparison.
+//
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+ let Defs = [T8];
+}
+
+def SltCCRxRy16: FCCRR16_ins<"slt">;
+
+// Format: SLTU rx, ry MIPS16e
+// Purpose: Set on Less Than Unsigned
+// To record the result of an unsigned less-than comparison.
+//
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+ let Defs = [T8];
+}
+
+def SltuRxRyRz16: FRRTR16_ins<"sltu"> {
+ let isCodeGenOnly=1;
+ let Defs = [T8];
+}
+
+
+def SltuCCRxRy16: FCCRR16_ins<"sltu">;
+//
+// Format: SRAV ry, rx MIPS16e
+// Purpose: Shift Word Right Arithmetic Variable
+// To execute an arithmetic right-shift of a word by a variable
+// number of bits.
+//
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+
+
+//
+// Format: SRA rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Arithmetic (Extended)
+// To execute an arithmetic right-shift of a word by a fixed
+// number of bits—1 to 8 bits.
+//
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+
+
+//
+// Format: SRLV ry, rx MIPS16e
+// Purpose: Shift Word Right Logical Variable
+// To execute a logical right-shift of a word by a variable
+// number of bits.
+//
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+
+
+//
+// Format: SRL rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Logical (Extended)
+// To execute a logical right-shift of a word by a fixed
+// number of bits—1 to 31 bits.
+//
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+
+//
+// Format: SUBU rz, rx, ry MIPS16e
+// Purpose: Subtract Unsigned Word
+// To subtract 32-bit integers
+//
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+
+//
+// Format: SW ry, offset(rx) MIPS16e
+// Purpose: Store Word (Extended)
+// To store a word to memory.
+//
+def SwRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore;
+
+//
+// Format: SW rx, offset(sp) MIPS16e
+// Purpose: Store Word rx (SP-Relative)
+// To store an SP-relative word to memory.
+//
+def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore;
+
+//
+//
+// Format: XOR rx, ry MIPS16e
+// Purpose: Xor
+// To do a bitwise logical XOR.
+//
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
+
+class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [InMips16Mode];
+}
+
+// Unary Arith/Logic
+//
+class ArithLogicU_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r),
+ (I CPU16Regs:$r)>;
+
+def: ArithLogicU_pat<not, NotRxRy16>;
+def: ArithLogicU_pat<ineg, NegRxRy16>;
+
+class ArithLogic16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r),
+ (I CPU16Regs:$l, CPU16Regs:$r)>;
+
+def: ArithLogic16_pat<add, AdduRxRyRz16>;
+def: ArithLogic16_pat<and, AndRxRxRy16>;
+def: ArithLogic16_pat<mul, MultRxRyRz16>;
+def: ArithLogic16_pat<or, OrRxRxRy16>;
+def: ArithLogic16_pat<sub, SubuRxRyRz16>;
+def: ArithLogic16_pat<xor, XorRxRxRy16>;
+
+// Arithmetic and logical instructions with 2 register operands.
+
+class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
+ (I CPU16Regs:$in, imm_type:$imm)>;
+
+def: ArithLogicI16_pat<add, immSExt8, AddiuRxRxImm16>;
+def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
+def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
+def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
+def: ArithLogicI16_pat<sra, immZExt5, SraX16>;
+
+class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra),
+ (I CPU16Regs:$r, CPU16Regs:$ra)>;
+
+def: shift_rotate_reg16_pat<shl, SllvRxRy16>;
+def: shift_rotate_reg16_pat<sra, SravRxRy16>;
+def: shift_rotate_reg16_pat<srl, SrlvRxRy16>;
+
+class LoadM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode addr16:$addr), (I addr16:$addr)>;
+
+def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>;
+def: LoadM16_pat<sextloadi16, LhRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi16, LhuRxRyOffMemX16>;
+def: LoadM16_pat<load, LwRxRyOffMemX16>;
+
+class StoreM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, addr16:$addr),
+ (I CPU16Regs:$r, addr16:$addr)>;
+
+def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
+def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>;
+def: StoreM16_pat<store, SwRxRyOffMemX16>;
+
+// Unconditional branch
+class UncondBranch16_pat<SDNode OpNode, Instruction I>:
+ Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
+ let Predicates = [InMips16Mode];
+ }
+
+def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (Jal16 tglobaladdr:$dst)>;
+
+def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (Jal16 texternalsym:$dst)>;
+
+// Indirect branch
+def: Mips16Pat<
+ (brind CPU16Regs:$rs),
+ (JrcRx16 CPU16Regs:$rs)>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=0 in
+def JumpLinkReg16:
+ FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs),
+ "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>;
+
+// Mips16 pseudos
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
+ hasExtraSrcRegAllocReq = 1 in
+def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
+
+
+// setcc patterns
+
+class SetCC_R16<PatFrag cond_op, Instruction I>:
+ Mips16Pat<(cond_op CPU16Regs:$rx, CPU16Regs:$ry),
+ (I CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+class SetCC_I16<PatFrag cond_op, PatLeaf imm_type, Instruction I>:
+ Mips16Pat<(cond_op CPU16Regs:$rx, imm_type:$imm16),
+ (I CPU16Regs:$rx, imm_type:$imm16)>;
+
+
+def: Mips16Pat<(i32 addr16:$addr),
+ (AddiuRxRyOffMemX16 addr16:$addr)>;
+
+
+// Large (>16 bit) immediate loads
+def : Mips16Pat<(i32 imm:$imm),
+ (OrRxRxRy16 (SllX16 (LiRxImmX16 (HI16 imm:$imm)), 16),
+ (LiRxImmX16 (LO16 imm:$imm)))>;
+
+// Carry MipsPatterns
+def : Mips16Pat<(subc CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (SubuRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>;
+def : Mips16Pat<(addc CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (AdduRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>;
+def : Mips16Pat<(addc CPU16Regs:$src, immSExt16:$imm),
+ (AddiuRxRxImmX16 CPU16Regs:$src, imm:$imm)>;
+
+//
+// Some branch conditional patterns are not generated by llvm at this time.
+// Some are for seemingly arbitrary reasons not used: i.e. with signed number
+// comparison they are used and for unsigned a different pattern is used.
+// I am pushing upstream from the full mips16 port and it seemed that I needed
+// these earlier and the mips32 port has these but now I cannot create test
+// cases that use these patterns. While I sort this all out I will leave these
+// extra patterns commented out and if I can be sure they are really not used,
+// I will delete the code. I don't want to check the code in uncommented without
+// a valid test case. In some cases, the compiler is generating patterns with
+// setcc instead and earlier I had implemented setcc first so may have masked
+// the problem. The setcc variants are suboptimal for mips16 so I may wantto
+// figure out how to enable the brcond patterns or else possibly new
+// combinations of of brcond and setcc.
+//
+//
+// bcond-seteq
+//
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BteqzT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16),
+ (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// bcond-setgt (do we need to have this pair of setlt, setgt??)
+//
+def: Mips16Pat
+ <(brcond (i32 (setgt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setge
+//
+def: Mips16Pat
+ <(brcond (i32 (setge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+//
+// never called because compiler transforms a >= k to a > (k-1)
+def: Mips16Pat
+ <(brcond (i32 (setge CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+ (BteqzT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+ >;
+
+//
+// bcond-setlt
+//
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+ (BtnezT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+ >;
+
+//
+// bcond-setle
+//
+def: Mips16Pat
+ <(brcond (i32 (setle CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setne
+//
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BtnezT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// This needs to be there but I forget which code will generate it
+//
+def: Mips16Pat
+ <(brcond CPU16Regs:$rx, bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+
+//
+// bcond-setugt
+//
+//def: Mips16Pat
+// <(brcond (i32 (setugt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+// >;
+
+//
+// bcond-setuge
+//
+//def: Mips16Pat
+// <(brcond (i32 (setuge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BteqzT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+
+//
+// bcond-setult
+//
+//def: Mips16Pat
+// <(brcond (i32 (setult CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+def: UncondBranch16_pat<br, BimmX16>;
+
+// Small immediates
+def: Mips16Pat<(i32 immSExt16:$in),
+ (AddiuRxRxImmX16 (Move32R16 ZERO), immSExt16:$in)>;
+
+def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+
+//
+// MipsDivRem
+//
+def: Mips16Pat
+ <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry),
+ (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+//
+// MipsDivRemU
+//
+def: Mips16Pat
+ <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry),
+ (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+// signed a,b
+// x = (a>=b)?x:y
+//
+// if !(a < b) x = y
+//
+def : Mips16Pat<(select (i32 (setge CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a, CPU16Regs:$b)>;
+
+// signed a,b
+// x = (a>b)?x:y
+//
+// if (b < a) x = y
+//
+def : Mips16Pat<(select (i32 (setgt CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBtneZSlt CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+// unsigned a,b
+// x = (a>=b)?x:y
+//
+// if !(a < b) x = y;
+//
+def : Mips16Pat<
+ (select (i32 (setuge CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a, CPU16Regs:$b)>;
+
+// unsigned a,b
+// x = (a>b)?x:y
+//
+// if (b < a) x = y
+//
+def : Mips16Pat<(select (i32 (setugt CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBtneZSltu CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+// signed
+// x = (a >= k)?x:y
+// due to an llvm optimization, i don't think that this will ever
+// be used. This is transformed into x = (a > k-1)?x:y
+//
+//
+
+//def : Mips16Pat<
+// (select (i32 (setge CPU16Regs:$lhs, immSExt16:$rhs)),
+// CPU16Regs:$T, CPU16Regs:$F),
+// (SelTBteqZSlti CPU16Regs:$T, CPU16Regs:$F,
+// CPU16Regs:$lhs, immSExt16:$rhs)>;
+
+//def : Mips16Pat<
+// (select (i32 (setuge CPU16Regs:$lhs, immSExt16:$rhs)),
+// CPU16Regs:$T, CPU16Regs:$F),
+// (SelTBteqZSltiu CPU16Regs:$T, CPU16Regs:$F,
+// CPU16Regs:$lhs, immSExt16:$rhs)>;
+
+// signed
+// x = (a < k)?x:y
+//
+// if !(a < k) x = y;
+//
+def : Mips16Pat<
+ (select (i32 (setlt CPU16Regs:$a, immSExt16:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBtneZSlti CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a, immSExt16:$b)>;
+
+
+//
+//
+// signed
+// x = (a <= b)? x : y
+//
+// if (b < a) x = y
+//
+def : Mips16Pat<(select (i32 (setle CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+//
+// unnsigned
+// x = (a <= b)? x : y
+//
+// if (b < a) x = y
+//
+def : Mips16Pat<(select (i32 (setule CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+//
+// signed/unsigned
+// x = (a == b)? x : y
+//
+// if (a != b) x = y
+//
+def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZCmp CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+//
+// signed/unsigned
+// x = (a == 0)? x : y
+//
+// if (a != 0) x = y
+//
+def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, 0)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelBeqZ CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a)>;
+
+
+//
+// signed/unsigned
+// x = (a == k)? x : y
+//
+// if (a != k) x = y
+//
+def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, immZExt16:$k)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBteqZCmpi CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a, immZExt16:$k)>;
+
+
+//
+// signed/unsigned
+// x = (a != b)? x : y
+//
+// if (a == b) x = y
+//
+//
+def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, CPU16Regs:$b)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBtneZCmp CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$b, CPU16Regs:$a)>;
+
+//
+// signed/unsigned
+// x = (a != 0)? x : y
+//
+// if (a == 0) x = y
+//
+def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, 0)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelBneZ CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a)>;
+
+// signed/unsigned
+// x = (a)? x : y
+//
+// if (!a) x = y
+//
+def : Mips16Pat<(select CPU16Regs:$a,
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelBneZ CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a)>;
+
+
+//
+// signed/unsigned
+// x = (a != k)? x : y
+//
+// if (a == k) x = y
+//
+def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, immZExt16:$k)),
+ CPU16Regs:$x, CPU16Regs:$y),
+ (SelTBtneZCmpi CPU16Regs:$x, CPU16Regs:$y,
+ CPU16Regs:$a, immZExt16:$k)>;
+
+//
+// When writing C code to test setxx these patterns,
+// some will be transformed into
+// other things. So we test using C code but using -O3 and -O0
+//
+// seteq
+//
+def : Mips16Pat
+ <(seteq CPU16Regs:$lhs,CPU16Regs:$rhs),
+ (SltiuCCRxImmX16 (XorRxRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs), 1)>;
+
+def : Mips16Pat
+ <(seteq CPU16Regs:$lhs, 0),
+ (SltiuCCRxImmX16 CPU16Regs:$lhs, 1)>;
+
+
+//
+// setge
+//
+
+def: Mips16Pat
+ <(setge CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (LiRxImmX16 1))>;
+
+//
+// For constants, llvm transforms this to:
+// x > (k -1) and then reverses the operands to use setlt. So this pattern
+// is not used now by the compiler. (Presumably checking that k-1 does not
+// overflow). The compiler never uses this at a the current time, due to
+// other optimizations.
+//
+//def: Mips16Pat
+// <(setge CPU16Regs:$lhs, immSExt16:$rhs),
+// (XorRxRxRy16 (SltiCCRxImmX16 CPU16Regs:$lhs, immSExt16:$rhs),
+// (LiRxImmX16 1))>;
+
+// This catches the x >= -32768 case by transforming it to x > -32769
+//
+def: Mips16Pat
+ <(setgt CPU16Regs:$lhs, -32769),
+ (XorRxRxRy16 (SltiCCRxImmX16 CPU16Regs:$lhs, -32768),
+ (LiRxImmX16 1))>;
+
+//
+// setgt
+//
+//
+
+def: Mips16Pat
+ <(setgt CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs)>;
+
+//
+// setle
+//
+def: Mips16Pat
+ <(setle CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>;
+
+//
+// setlt
+//
+def: SetCC_R16<setlt, SltCCRxRy16>;
+
+def: SetCC_I16<setlt, immSExt16, SltiCCRxImmX16>;
+
+//
+// setne
+//
+def : Mips16Pat
+ <(setne CPU16Regs:$lhs,CPU16Regs:$rhs),
+ (SltuCCRxRy16 (LiRxImmX16 0),
+ (XorRxRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs))>;
+
+
+//
+// setuge
+//
+def: Mips16Pat
+ <(setuge CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (XorRxRxRy16 (SltuCCRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (LiRxImmX16 1))>;
+
+// this pattern will never be used because the compiler will transform
+// x >= k to x > (k - 1) and then use SLT
+//
+//def: Mips16Pat
+// <(setuge CPU16Regs:$lhs, immZExt16:$rhs),
+// (XorRxRxRy16 (SltiuCCRxImmX16 CPU16Regs:$lhs, immZExt16:$rhs),
+// (LiRxImmX16 1))>;
+
+//
+// setugt
+//
+def: Mips16Pat
+ <(setugt CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (SltuCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs)>;
+
+//
+// setule
+//
+def: Mips16Pat
+ <(setule CPU16Regs:$lhs, CPU16Regs:$rhs),
+ (XorRxRxRy16 (SltuCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>;
+
+//
+// setult
+//
+def: SetCC_R16<setult, SltuCCRxRy16>;
+
+def: SetCC_I16<setult, immSExt16, SltiuCCRxImmX16>;
+
+def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
+
+// hi/lo relocs
+
+def : Mips16Pat<(MipsHi tglobaladdr:$in),
+ (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
+def : Mips16Pat<(MipsHi tjumptable:$in),
+ (SllX16 (LiRxImmX16 tjumptable:$in), 16)>;
+def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
+ (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
+
+// wrapper_pic
+class Wrapper16Pat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
+ Mips16Pat<(MipsWrapper RC:$gp, node:$in),
+ (ADDiuOp RC:$gp, node:$in)>;
+
+
+def : Wrapper16Pat<tglobaladdr, AddiuRxRxImmX16, CPU16Regs>;
+def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>;
+
+def : Mips16Pat<(i32 (extloadi8 addr16:$src)),
+ (LbuRxRyOffMemX16 addr16:$src)>;
+def : Mips16Pat<(i32 (extloadi16 addr16:$src)),
+ (LhuRxRyOffMemX16 addr16:$src)>;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
new file mode 100644
index 000000000000..6cca2276856d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -0,0 +1,151 @@
+
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16RegisterInfo.h"
+#include "Mips16InstrInfo.h"
+#include "Mips.h"
+#include "Mips16InstrInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
+ const Mips16InstrInfo &I)
+ : MipsRegisterInfo(ST), TII(I) {}
+
+bool Mips16RegisterInfo::requiresRegisterScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
+bool Mips16RegisterInfo::requiresFrameIndexScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
+
+bool Mips16RegisterInfo::useFPForScavengingIndex
+ (const MachineFunction &MF) const {
+ return false;
+}
+
+bool Mips16RegisterInfo::saveScavengerRegister
+ (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ DebugLoc DL;
+ TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
+ TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
+ return true;
+}
+
+const TargetRegisterClass *
+Mips16RegisterInfo::intRegClass(unsigned Size) const {
+ assert(Size == 4);
+ return &Mips::CPU16RegsRegClass;
+}
+
+void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always
+ // referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
+ FrameReg = Mips::SP;
+ else {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (TFI->hasFP(MF)) {
+ FrameReg = Mips::S0;
+ }
+ else {
+ if ((MI.getNumOperands()> OpNo+2) && MI.getOperand(OpNo+2).isReg())
+ FrameReg = MI.getOperand(OpNo+2).getReg();
+ else
+ FrameReg = Mips::SP;
+ }
+ }
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object
+ // is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following,
+ // its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+ bool IsKill = false;
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) ||
+ ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned NewImm;
+ FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
+ }
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+
+
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h
new file mode 100644
index 000000000000..2b3d2b1a4ecb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h
@@ -0,0 +1,50 @@
+//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16REGISTERINFO_H
+#define MIPS16REGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+class Mips16InstrInfo;
+
+class Mips16RegisterInfo : public MipsRegisterInfo {
+ const Mips16InstrInfo &TII;
+public:
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const Mips16InstrInfo &TII);
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
new file mode 100644
index 000000000000..846a8224af35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -0,0 +1,392 @@
+//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips64 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def shamt_64 : Operand<i64>;
+
+// Unsigned Operand
+def uimm16_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Transformation Function - get Imm - 32.
+def Subtract32 : SDNodeXForm<imm, [{
+ return getImm(N, (unsigned)N->getZExtValue() - 32);
+}]>;
+
+// shamt must fit in 6 bits.
+def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+let DecoderNamespace = "Mips64" in {
+
+multiclass Atomic2Ops64<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
+ let isCodeGenOnly = 1;
+ }
+}
+
+multiclass AtomicCmpSwap64<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
+ let isCodeGenOnly = 1;
+ }
+}
+}
+let usesCustomInserter = 1, Predicates = [HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>;
+ defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>;
+ defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>;
+ defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>;
+ defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>;
+ defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
+ defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>;
+ defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>;
+ defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
+}
+
+def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+let DecoderNamespace = "Mips64" in {
+/// Arithmetic Instructions (ALU Immediate)
+def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>;
+def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>,
+ ADDI_FM<0x19>, IsAsCheapAsAMove;
+def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xa>;
+def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xb>;
+def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
+def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>;
+def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>,
+ ADD_FM<0, 0x2d>;
+def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>,
+ ADD_FM<0, 0x2f>;
+def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>;
+def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>;
+def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>;
+
+/// Shift Instructions
+def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>,
+ SRA_FM<0x38, 0>;
+def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>,
+ SRA_FM<0x3a, 0>;
+def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>,
+ SRA_FM<0x3b, 0>;
+def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>;
+def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>;
+def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>;
+}
+// Rotate Instructions
+let Predicates = [HasMips64r2, HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>,
+ SRA_FM<0x3a, 1>;
+ def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>,
+ SRLV_FM<0x16, 1>;
+}
+
+let DecoderNamespace = "Mips64" in {
+/// Load and Store Instructions
+/// aligned
+defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>;
+defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>;
+defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>;
+defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>;
+defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>;
+defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>;
+defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>;
+defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>;
+defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>;
+defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>;
+defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>;
+
+/// load/store left/right
+defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
+defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
+defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
+defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
+
+defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>;
+defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>;
+defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>;
+defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>;
+
+/// Load-linked, Store-conditional
+let Predicates = [NotN64, HasStdEnc] in {
+ def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>;
+ def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>;
+}
+
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
+ def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>;
+ def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>;
+}
+
+/// Jump and Branch Instructions
+def JR64 : IndirectBranch<CPU64Regs>, MTLO_FM<8>;
+def BEQ64 : CBranch<"beq", seteq, CPU64Regs>, BEQ_FM<4>;
+def BNE64 : CBranch<"bne", setne, CPU64Regs>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", setge, CPU64Regs>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", setgt, CPU64Regs>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", setle, CPU64Regs>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>;
+}
+let DecoderNamespace = "Mips64" in
+def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM;
+def JALR64Pseudo : JumpLinkRegPseudo<CPU64Regs, JALR64, RA_64>;
+def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+
+let DecoderNamespace = "Mips64" in {
+/// Multiply and Divide Instructions.
+def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1c>;
+def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1d>;
+def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult,
+ IIImul>;
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu,
+ IIImul>;
+def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem,
+ IIIdiv, 0>;
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU,
+ IIIdiv, 0>;
+
+def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", CPU64Regs, [HI64]>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", CPU64Regs, [LO64]>, MFLO_FM<0x12>;
+
+/// Sign Ext In Register Instructions.
+def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>;
+def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>;
+
+/// Count Leading
+def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>;
+def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>;
+
+/// Double Word Swap Bytes/HalfWords
+def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>;
+def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>;
+
+}
+let DecoderNamespace = "Mips64" in {
+def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM;
+
+def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>;
+let Pattern = []<dag> in {
+ def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>;
+ def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>;
+}
+def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>;
+let Pattern = []<dag> in {
+ def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>;
+ def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>;
+}
+
+let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
+ def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll\t$rd, $rt, 32", [], IIAlu>;
+ def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+}
+}
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// extended loads
+let Predicates = [NotN64, HasStdEnc] in {
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
+}
+let Predicates = [IsN64, HasStdEnc] in {
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64_P8 addr:$src)>;
+}
+
+// hi/lo relocs
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsHi texternalsym:$in), (LUi64 texternalsym:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in),
+ (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsLo texternalsym:$in), (DADDiu ZERO_64, texternalsym:$in)>;
+
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+ (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+ (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+ (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>;
+def : WrapperPat<tconstpool, DADDiu, CPU64Regs>;
+def : WrapperPat<texternalsym, DADDiu, CPU64Regs>;
+def : WrapperPat<tblockaddress, DADDiu, CPU64Regs>;
+def : WrapperPat<tjumptable, DADDiu, CPU64Regs>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, CPU64Regs>;
+
+defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
+ ZERO_64>;
+
+// setcc patterns
+defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>;
+defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
+
+// truncate
+def : MipsPat<(i32 (trunc CPU64Regs:$src)),
+ (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
+ Requires<[IsN64, HasStdEnc]>;
+
+// 32-to-64-bit extension
+def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
+def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+
+// Sign extend in register
+def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
+ (SLL64_64 CPU64Regs:$src)>;
+
+// bswap MipsPattern
+def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
+
+// mflo/hi patterns.
+def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
+
+//===----------------------------------------------------------------------===//
+// Instruction aliases
+//===----------------------------------------------------------------------===//
+def : InstAlias<"move $dst, $src",
+ (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
+def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"daddu $rs, $rt, $imm",
+ (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
+def : InstAlias<"dadd $rs, $rt, $imm",
+ (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>, Requires<[HasMips64]>;
+/// Move between CPU and coprocessor registers
+
+let DecoderNamespace = "Mips64" in {
+def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+ "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
+def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
+ "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
+def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+ "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
+def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
+ "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
+}
+
+// Two operand (implicit 0 selector) versions:
+def : InstAlias<"dmfc0 $rt, $rd",
+ (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc0 $rt, $rd",
+ (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
+def : InstAlias<"dmfc2 $rt, $rd",
+ (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc2 $rt, $rd",
+ (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
new file mode 100644
index 000000000000..99b163ec33ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -0,0 +1,153 @@
+//===-- MipsAnalyzeImmediate.cpp - Analyze Immediates ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsAnalyzeImmediate.h"
+#include "Mips.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+MipsAnalyzeImmediate::Inst::Inst(unsigned O, unsigned I) : Opc(O), ImmOpnd(I) {}
+
+// Add I to the instruction sequences.
+void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
+ // Add an instruction seqeunce consisting of just I.
+ if (SeqLs.empty()) {
+ SeqLs.push_back(InstSeq(1, I));
+ return;
+ }
+
+ for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter)
+ Iter->push_back(I);
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ unsigned Shamt = CountTrailingZeros_64(Imm);
+ GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
+ AddInstr(SeqLs, Inst(SLL, Shamt));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
+
+ // Do nothing if Imm is 0.
+ if (!MaskedImm)
+ return;
+
+ // A single ADDiu will do if RemSize <= 16.
+ if (RemSize <= 16) {
+ AddInstr(SeqLs, Inst(ADDiu, MaskedImm));
+ return;
+ }
+
+ // Shift if the lower 16-bit is cleared.
+ if (!(Imm & 0xffff)) {
+ GetInstSeqLsSLL(Imm, RemSize, SeqLs);
+ return;
+ }
+
+ GetInstSeqLsADDiu(Imm, RemSize, SeqLs);
+
+ // If bit 15 is cleared, it doesn't make a difference whether the last
+ // instruction is an ADDiu or ORi. In that case, do not call GetInstSeqLsORi.
+ if (Imm & 0x8000) {
+ InstSeqLs SeqLsORi;
+ GetInstSeqLsORi(Imm, RemSize, SeqLsORi);
+ SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end());
+ }
+}
+
+// Replace a ADDiu & SLL pair with a LUi.
+// e.g. the following two instructions
+// ADDiu 0x0111
+// SLL 18
+// are replaced with
+// LUi 0x444
+void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) {
+ // Check if the first two instructions are ADDiu and SLL and the shift amount
+ // is at least 16.
+ if ((Seq.size() < 2) || (Seq[0].Opc != ADDiu) ||
+ (Seq[1].Opc != SLL) || (Seq[1].ImmOpnd < 16))
+ return;
+
+ // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit.
+ int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd);
+ int64_t ShiftedImm = (uint64_t)Imm << (Seq[1].ImmOpnd - 16);
+
+ if (!isInt<16>(ShiftedImm))
+ return;
+
+ // Replace the first instruction and erase the second.
+ Seq[0].Opc = LUi;
+ Seq[0].ImmOpnd = (unsigned)(ShiftedImm & 0xffff);
+ Seq.erase(Seq.begin() + 1);
+}
+
+void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
+ InstSeqLs::iterator ShortestSeq = SeqLs.end();
+ // The length of an instruction sequence is at most 7.
+ unsigned ShortestLength = 8;
+
+ for (InstSeqLs::iterator S = SeqLs.begin(); S != SeqLs.end(); ++S) {
+ ReplaceADDiuSLLWithLUi(*S);
+ assert(S->size() <= 7);
+
+ if (S->size() < ShortestLength) {
+ ShortestSeq = S;
+ ShortestLength = S->size();
+ }
+ }
+
+ Insts.clear();
+ Insts.append(ShortestSeq->begin(), ShortestSeq->end());
+}
+
+const MipsAnalyzeImmediate::InstSeq
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
+ bool LastInstrIsADDiu) {
+ this->Size = Size;
+
+ if (Size == 32) {
+ ADDiu = Mips::ADDiu;
+ ORi = Mips::ORi;
+ SLL = Mips::SLL;
+ LUi = Mips::LUi;
+ } else {
+ ADDiu = Mips::DADDiu;
+ ORi = Mips::ORi64;
+ SLL = Mips::DSLL;
+ LUi = Mips::LUi64;
+ }
+
+ InstSeqLs SeqLs;
+
+ // Get the list of instruction sequences.
+ if (LastInstrIsADDiu | !Imm)
+ GetInstSeqLsADDiu(Imm, Size, SeqLs);
+ else
+ GetInstSeqLs(Imm, Size, SeqLs);
+
+ // Set Insts to the shortest instruction sequence.
+ GetShortestSeq(SeqLs, Insts);
+
+ return Insts;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h
new file mode 100644
index 000000000000..a094ddae45de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -0,0 +1,63 @@
+//===-- MipsAnalyzeImmediate.h - Analyze Immediates ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPS_ANALYZE_IMMEDIATE_H
+#define MIPS_ANALYZE_IMMEDIATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+ class MipsAnalyzeImmediate {
+ public:
+ struct Inst {
+ unsigned Opc, ImmOpnd;
+ Inst(unsigned Opc, unsigned ImmOpnd);
+ };
+ typedef SmallVector<Inst, 7 > InstSeq;
+
+ /// Analyze - Get an instrucion sequence to load immediate Imm. The last
+ /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
+ /// true;
+ const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+ private:
+ typedef SmallVector<InstSeq, 5> InstSeqLs;
+
+ /// AddInstr - Add I to all instruction sequences in SeqLs.
+ void AddInstr(InstSeqLs &SeqLs, const Inst &I);
+
+ /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
+ /// load immediate Imm
+ void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
+ /// load immediate Imm
+ void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
+ /// load immediate Imm
+ void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
+ void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
+ void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
+
+ /// GetShortestSeq - Find the shortest instruction sequence in SeqLs and
+ /// return it in Insts.
+ void GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts);
+
+ unsigned Size;
+ unsigned ADDiu, ORi, SLL, LUi;
+ InstSeq Insts;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 000000000000..1876cb6ffae4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,597 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "Mips.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCInstLower.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ MipsFI = MF.getInfo<MipsFunctionInfo>();
+ AsmPrinter::runOnMachineFunction(MF);
+ return true;
+}
+
+bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
+ MCOp = MCInstLowering.LowerOperand(MO);
+ return MCOp.isValid();
+}
+
+#include "MipsGenMCPseudoLowering.inc"
+
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->isDebugValue()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ PrintDebugValueComment(MI, OS);
+ return;
+ }
+
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ do {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, &*I))
+ continue;
+
+ // The inMips16Mode() test is not permanent.
+ // Some instructions are marked as pseudo right now which
+ // would make the test fail for the wrong reason but
+ // that will be fixed soon. We need this here because we are
+ // removing another test for this situation downstream in the
+ // callchain.
+ //
+ if (I->isPseudo() && !Subtarget->inMips16Mode())
+ llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+
+ MCInst TmpInst0;
+ MCInstLowering.Lower(I, TmpInst0);
+ OutStreamer.EmitInstruction(TmpInst0);
+ } while ((++I != E) && I->isInsideBundle()); // Delay slot check
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Mips Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "(f)mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame $fp,48,$ra
+// .mask 0xc0000000,-8
+// addiu $sp, $sp, -48
+// sw $ra, 40($sp)
+// sw $fp, 36($sp)
+//
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (RA) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+ // CPU and FPU Saved Registers Bitmasks
+ unsigned CPUBitmask = 0, FPUBitmask = 0;
+ int CPUTopSavedRegOff, FPUTopSavedRegOff;
+
+ // Set the CPU and FPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ // size of stack area to which FP callee-saved regs are saved.
+ unsigned CPURegSize = Mips::CPURegsRegClass.getSize();
+ unsigned FGR32RegSize = Mips::FGR32RegClass.getSize();
+ unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
+ bool HasAFGR64Reg = false;
+ unsigned CSFPRegsSize = 0;
+ unsigned i, e = CSI.size();
+
+ // Set FPU Bitmask.
+ for (i = 0; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegClass.contains(Reg))
+ break;
+
+ unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ if (Mips::AFGR64RegClass.contains(Reg)) {
+ FPUBitmask |= (3 << RegNum);
+ CSFPRegsSize += AFGR64RegSize;
+ HasAFGR64Reg = true;
+ continue;
+ }
+
+ FPUBitmask |= (1 << RegNum);
+ CSFPRegsSize += FGR32RegSize;
+ }
+
+ // Set CPU Bitmask.
+ for (; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // FP Regs are saved right below where the virtual frame pointer points to.
+ FPUTopSavedRegOff = FPUBitmask ?
+ (HasAFGR64Reg ? -AFGR64RegSize : -FGR32RegSize) : 0;
+
+ // CPU Regs are saved below FP Regs.
+ CPUTopSavedRegOff = CPUBitmask ? -CSFPRegsSize - CPURegSize : 0;
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask, O);
+ O << ',' << CPUTopSavedRegOff << '\n';
+
+ // Print FPUBitmask
+ O << "\t.fmask\t"; printHex32(FPUBitmask, O);
+ O << "," << FPUTopSavedRegOff << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O.write_hex((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(*MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.frame\t$" +
+ StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() +
+ "," + Twine(stackSize) + ",$" +
+ StringRef(MipsInstPrinter::getRegisterName(returnReg)).lower());
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const {
+ switch (Subtarget->getTargetABI()) {
+ case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::N32: return "abiN32";
+ case MipsSubtarget::N64: return "abi64";
+ case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ default: llvm_unreachable("Unknown Mips ABI");
+ }
+}
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+ if (OutStreamer.hasRawTextSupport()) {
+ if (Subtarget->inMips16Mode())
+ OutStreamer.EmitRawText(StringRef("\t.set\tmips16"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomips16"));
+ // leave out until FSF available gas has micromips changes
+ // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ }
+
+ if (Subtarget->inMicroMipsMode())
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
+ (unsigned)ELF::STO_MIPS_MICROMIPS);
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+ MCInstLowering.Initialize(Mang, &MF->getContext());
+
+ emitFrameDirective();
+
+ if (OutStreamer.hasRawTextSupport()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printSavedRegsBitmask(OS);
+ OutStreamer.EmitRawText(OS.str());
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ }
+ }
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+ // There are instruction for this macros, but they must
+ // always be at the function end, and we can't emit and
+ // break with BB logic.
+ if (OutStreamer.hasRawTextSupport()) {
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ }
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+ }
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->isTerminator()) ;
+
+ return !I->isBarrier();
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O);
+ case 'X': // hex const int
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << "0x" << StringRef(utohexstr(MO.getImm())).lower();
+ return false;
+ case 'x': // hex const int (low 16 bits)
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << "0x" << StringRef(utohexstr(MO.getImm() & 0xffff)).lower();
+ return false;
+ case 'd': // decimal const int
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'm': // decimal const int minus 1
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm() - 1;
+ return false;
+ case 'z': {
+ // $0 if zero, regular printing otherwise
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ int64_t Val = MO.getImm();
+ if (Val)
+ O << Val;
+ else
+ O << "$0";
+ return false;
+ }
+ case 'D': // Second part of a double word register operand
+ case 'L': // Low order register of a double word register operand
+ case 'M': // High order register of a double word register operand
+ {
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ // Number of registers represented by this operand. We are looking
+ // for 2 for 32 bit mode and 1 for 64 bit mode.
+ if (NumVals != 2) {
+ if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ O << '$' << MipsInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ return true;
+ }
+
+ unsigned RegOp = OpNum;
+ if (!Subtarget->isGP64bit()){
+ // Endianess reverses which register holds the high or low value
+ // between M and L.
+ switch(ExtraCode[0]) {
+ case 'M':
+ RegOp = (Subtarget->isLittle()) ? OpNum + 1 : OpNum;
+ break;
+ case 'L':
+ RegOp = (Subtarget->isLittle()) ? OpNum : OpNum + 1;
+ break;
+ case 'D': // Always the second part
+ RegOp = OpNum + 1;
+ }
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << '$' << MipsInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ }
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+
+ return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ bool closeP = false;
+
+ if (MO.getTargetFlags())
+ closeP = true;
+
+ switch(MO.getTargetFlags()) {
+ case MipsII::MO_GPREL: O << "%gp_rel("; break;
+ case MipsII::MO_GOT_CALL: O << "%call16("; break;
+ case MipsII::MO_GOT: O << "%got("; break;
+ case MipsII::MO_ABS_HI: O << "%hi("; break;
+ case MipsII::MO_ABS_LO: O << "%lo("; break;
+ case MipsII::MO_TLSGD: O << "%tlsgd("; break;
+ case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
+ case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
+ case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+ case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break;
+ case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break;
+ case MipsII::MO_GOT_DISP: O << "%got_disp("; break;
+ case MipsII::MO_GOT_PAGE: O << "%got_page("; break;
+ case MipsII::MO_GOT_OFST: O << "%got_ofst("; break;
+ }
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << '$'
+ << StringRef(MipsInstPrinter::getRegisterName(MO.getReg())).lower();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ O << BA->getName();
+ break;
+ }
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ // FIXME: Use SwitchSection.
+
+ // Tell the assembler which ABI we are using
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.section .mdebug." +
+ Twine(getCurrentABIString()));
+
+ // TODO: handle O64 ABI
+ if (OutStreamer.hasRawTextSupport()) {
+ if (Subtarget->isABI_EABI()) {
+ if (Subtarget->isGP32bit())
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+ }
+ }
+
+ // return to previous section
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
+
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+ if (OutStreamer.hasRawTextSupport()) return;
+
+ // Emit Mips ELF register info
+ Subtarget->getMReginfo().emitMipsReginfoSectionCG(
+ OutStreamer, getObjFileLowering(), *Subtarget);
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitELFHeaderFlagsCG(*Subtarget);
+}
+
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ // TODO: implement
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() {
+ RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+ RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+ RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target);
+ RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 000000000000..dbdaf266b75f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,90 @@
+//===-- MipsAsmPrinter.h - Mips LLVM Assembly Printer ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsMCInstLower.h"
+#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class MachineBasicBlock;
+class Module;
+class raw_ostream;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+
+ void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+
+private:
+ // tblgen'erated function.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+public:
+
+ const MipsSubtarget *Subtarget;
+ const MipsFunctionInfo *MipsFI;
+ MipsMCInstLower MCInstLowering;
+
+ explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), MCInstLowering(*this) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ void EmitInstruction(const MachineInstr *MI);
+ void printSavedRegsBitmask(raw_ostream &O);
+ void printHex32(unsigned int Value, raw_ostream &O);
+ void emitFrameDirective();
+ const char *getCurrentABIString() const;
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 000000000000..462def76cc80
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,225 @@
+//===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+ // i32 are returned in registers V0, V1, A0, A1
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in register D0, D1
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips N32/64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsN : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
+ T0, T1, T2, T3],
+ [F12, F13, F14, F15,
+ F16, F17, F18, F19]>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>>,
+
+ // f32 arguments are passed in single precision FP registers.
+ CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15,
+ F16, F17, F18, F19],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // f64 arguments are passed in double precision FP registers.
+ CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+// N32/64 variable arguments.
+// All arguments are passed in integer registers.
+def CC_MipsN_VarArg : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+def RetCC_MipsN : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // i64 are returned in registers V0_64, V1_64
+ CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in registers D0, D2
+ CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
+]>;
+
+// In soft-mode, register A0_64, instead of V1_64, is used to return a long
+// double value.
+def RetCC_F128Soft : CallingConv<[
+ CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+ // The first 4 double fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips FastCC Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_MipsO32_FastCC : CallingConv<[
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>,
+
+ // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_MipsN_FastCC : CallingConv<[
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64,
+ T2_64, T3_64, T4_64, T5_64, T6_64, T7_64,
+ T8_64, V1_64]>>,
+
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64,
+ D6_64, D7_64, D8_64, D9_64, D10_64, D11_64,
+ D12_64, D13_64, D14_64, D15_64, D16_64, D17_64,
+ D18_64, D19_64]>>,
+
+ // Stack parameter slots for i64 and f64 are 64-bit doublewords and
+ // 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_Mips_FastCC : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers. All scratch registers,
+ // except for AT, V0 and T9, are available to be used as argument registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6,
+ T7, T8, V1]>>,
+
+ // f32 arguments are passed in single-precision floating pointer registers.
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
+ F11, F12, F13, F14, F15, F16, F17, F18, F19]>>,
+
+ // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FastCC>>,
+ CCDelegateTo<CC_MipsN_FastCC>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def RetCC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+ CCDelegateTo<RetCC_MipsO32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
+def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
+def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
+ D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
+ (sequence "S%u_64", 7, 0))>;
+
+def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
+ GP_64, (sequence "S%u_64", 7, 0))>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
new file mode 100644
index 000000000000..1d86d903c12e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -0,0 +1,363 @@
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Mips machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class MipsCodeEmitter : public MachineFunctionPass {
+ MipsJITInfo *JTI;
+ const MipsInstrInfo *II;
+ const DataLayout *TD;
+ const MipsSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()),
+ TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB);
+
+private:
+
+ void emitWord(unsigned Word);
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+ int Offset) const;
+
+ /// Expand pseudo instructions with accumulator register operands.
+ void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB, unsigned Opc) const;
+
+ /// \brief Expand pseudo instruction. Return true if MI was expanded.
+ bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const;
+};
+}
+
+char MipsCodeEmitter::ID = 0;
+
+bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
+ const_cast<TargetMachine &>(MF.getTarget()));
+
+ JTI = Target.getJITInfo();
+ II = Target.getInstrInfo();
+ TD = Target.getDataLayout();
+ Subtarget = &TM.getSubtarget<MipsSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ JTI->Initialize(MF, IsPIC, Subtarget->isLittle());
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
+ E = MBB->instr_end(); I != E;)
+ emitInstruction(*I++, *MBB);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ // NOTE: This relocations are for static.
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ uint64_t Form = TSFlags & MipsII::FormMask;
+ if (Form == MipsII::FrmJ)
+ return Mips::reloc_mips_26;
+ if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
+ && MI.isBranch())
+ return Mips::reloc_mips_pc16;
+ if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
+ return Mips::reloc_mips_hi;
+ return Mips::reloc_mips_lo;
+}
+
+unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ MachineOperand MO = MI.getOperand(OpNo);
+ if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unexpected jump target operand kind.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ MachineOperand MO = MI.getOperand(OpNo);
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16;
+ return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
+}
+
+unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // size is encoded as size-1.
+ return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
+}
+
+unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // size is encoded as pos+size-1.
+ return getMachineOpValue(MI, MI.getOperand(OpNo-1)) +
+ getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0,
+ MayNeedFarStub));
+}
+
+void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV,
+ unsigned Reloc, int Offset) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0, false));
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset() + Offset,
+ Reloc, const_cast<GlobalValue *>(GV), 0, false));
+}
+
+void MipsCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0));
+}
+
+void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void MipsCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, false));
+}
+
+void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
+
+ // Expand pseudo instruction. Skip if MI was not expanded.
+ if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
+ !expandPseudos(MI, MBB))
+ return;
+
+ MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+ emitWord(getBinaryCodeForInstr(*MI));
+ ++NumEmitted; // Keep track of the # of mi's emitted
+
+ MCE.processDebugLoc(MI->getDebugLoc(), false);
+}
+
+void MipsCodeEmitter::emitWord(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ if (Subtarget->isLittle())
+ MCE.emitWordLE(Word);
+ else
+ MCE.emitWordBE(Word);
+}
+
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB,
+ unsigned Opc) const {
+ // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
+ .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
+}
+
+bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const {
+ switch (MI->getOpcode()) {
+ case Mips::NOP:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
+ .addReg(Mips::ZERO).addImm(0);
+ break;
+ case Mips::JALRPseudo:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+ case Mips::PseudoMULT:
+ expandACCInstr(MI, MBB, Mips::MULT);
+ break;
+ case Mips::PseudoMULTu:
+ expandACCInstr(MI, MBB, Mips::MULTu);
+ break;
+ case Mips::PseudoSDIV:
+ expandACCInstr(MI, MBB, Mips::SDIV);
+ break;
+ case Mips::PseudoUDIV:
+ expandACCInstr(MI, MBB, Mips::UDIV);
+ break;
+ case Mips::PseudoMADD:
+ expandACCInstr(MI, MBB, Mips::MADD);
+ break;
+ case Mips::PseudoMADDU:
+ expandACCInstr(MI, MBB, Mips::MADDU);
+ break;
+ case Mips::PseudoMSUB:
+ expandACCInstr(MI, MBB, Mips::MSUB);
+ break;
+ case Mips::PseudoMSUBU:
+ expandACCInstr(MI, MBB, Mips::MSUBU);
+ break;
+ default:
+ return false;
+ }
+
+ (MI--)->eraseFromBundle();
+ return true;
+}
+
+/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
+/// code to the specified MCE object.
+FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new MipsCodeEmitter(TM, JCE);
+}
+
+#include "MipsGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
new file mode 100644
index 000000000000..42e4c99f05d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
@@ -0,0 +1,253 @@
+//===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the Conditional Moves implementation.
+//
+//===----------------------------------------------------------------------===//
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// cond:int, data:int
+class CMov_I_I_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
+ let Constraints = "$F = $rd";
+}
+
+// cond:int, data:float
+class CMov_I_F_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> {
+ let Constraints = "$F = $fd";
+}
+
+// cond:float, data:int
+class CMov_F_I_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $$fcc0"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$F))], Itin, FrmFR> {
+ let Uses = [FCR31];
+ let Constraints = "$F = $rd";
+}
+
+// cond:float, data:float
+class CMov_F_F_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $$fcc0"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$F))], Itin, FrmFR> {
+ let Uses = [FCR31];
+ let Constraints = "$F = $fd";
+}
+
+// select patterns
+multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction SLTOp,
+ Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : MipsPat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setgt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, (Plus1 imm:$rhs)), DRC:$F)>;
+ def : MipsPat<(select (i32 (setugt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lhs, (Plus1 imm:$rhs)),
+ DRC:$F)>;
+}
+
+multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction XOROp> {
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+multiclass MovzPats2<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction XORiOp> {
+ def : MipsPat<
+ (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
+ Instruction XOROp> {
+ def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
+ def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+// Instantiation of instructions.
+def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa>;
+let Predicates = [HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa> {
+ let isCodeGenOnly = 1;
+ }
+ def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa> {
+ let isCodeGenOnly = 1;
+ }
+}
+
+def MOVN_I_I : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb>;
+let Predicates = [HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb> {
+ let isCodeGenOnly = 1;
+ }
+ def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb> {
+ let isCodeGenOnly = 1;
+ }
+}
+
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>;
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+}
+
+def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>;
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+}
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
+}
+let Predicates = [IsFP64bit, HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17> {
+ let isCodeGenOnly = 1;
+ }
+ def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17> {
+ let isCodeGenOnly = 1;
+ }
+}
+
+def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>;
+def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>,
+ CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+}
+
+def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>;
+def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>,
+ CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+}
+
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<16, 1>;
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<16, 0>;
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
+}
+let Predicates = [IsFP64bit, HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
+}
+
+// Instantiation of conditional move patterns.
+defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
+defm : MovzPats2<CPURegs, CPURegs, MOVZ_I_I, XORi>;
+let Predicates = [HasMips64, HasStdEnc] in {
+ defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats0<CPU64Regs, CPU64Regs, MOVZ_I_I64, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPURegs, CPU64Regs, MOVZ_I_I64, XOR>;
+ defm : MovzPats1<CPU64Regs, CPURegs, MOVZ_I64_I, XOR64>;
+ defm : MovzPats1<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XOR64>;
+ defm : MovzPats2<CPURegs, CPU64Regs, MOVZ_I_I64, XORi>;
+ defm : MovzPats2<CPU64Regs, CPURegs, MOVZ_I64_I, XORi64>;
+ defm : MovzPats2<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XORi64>;
+}
+
+defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
+let Predicates = [HasMips64, HasStdEnc] in {
+ defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
+ defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
+ defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
+}
+
+defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
+defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
+let Predicates = [HasMips64, HasStdEnc] in {
+ defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
+ defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
+}
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
+ defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
+}
+let Predicates = [IsFP64bit, HasStdEnc] in {
+ defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPURegs, FGR64, MOVZ_I_D64, XOR>;
+ defm : MovzPats1<CPU64Regs, FGR64, MOVZ_I64_D64, XOR64>;
+ defm : MovnPats<CPURegs, FGR64, MOVN_I_D64, XOR>;
+ defm : MovnPats<CPU64Regs, FGR64, MOVN_I64_D64, XOR64>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
new file mode 100644
index 000000000000..b5de1ebad22b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -0,0 +1,85 @@
+//===-- MipsConstantIslandPass.cpp - Emit Pc Relative loads----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// This pass is used to make Pc relative loads of constants.
+// For now, only Mips16 will use this. While it has the same name and
+// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
+// to reuse any of the code from the ARM version.
+//
+// Loading constants inline is expensive on Mips16 and it's in general better
+// to place the constant nearby in code space and then it can be loaded with a
+// simple 16 bit load instruction.
+//
+// The constants can be not just numbers but addresses of functions and labels.
+// This can be particularly helpful in static relocation mode for embedded
+// non linux targets.
+//
+//
+
+#define DEBUG_TYPE "mips-constant-islands"
+
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ class MipsConstantIslands : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsConstantIslands(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Constant Islands";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ bool IsPIC;
+ unsigned ABI;
+
+ };
+
+ char MipsConstantIslands::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
+ return new MipsConstantIslands(tm);
+}
+
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
new file mode 100644
index 000000000000..a72a763fde06
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -0,0 +1,310 @@
+//===- MipsDSPInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HasDSP : Predicate<"Subtarget.hasDSP()">,
+ AssemblerPredicate<"FeatureDSP">;
+def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">,
+ AssemblerPredicate<"FeatureDSPR2">;
+
+// Fields.
+class Field6<bits<6> val> {
+ bits<6> V = val;
+}
+
+def SPECIAL3_OPCODE : Field6<0b011111>;
+def REGIMM_OPCODE : Field6<0b000001>;
+
+class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
+ let Predicates = [HasDSP];
+}
+
+class PseudoDSP<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
+ let Predicates = [HasDSP];
+}
+
+// ADDU.QB sub-class format.
+class ADDU_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+class RADDU_W_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+// CMPU.EQ.QB sub-class format.
+class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+// ABSQ_S.PH sub-class format.
+class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+
+class REPL_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> imm;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = imm;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+// SHLL.QB sub-class format.
+class SHLL_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs_sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs_sa;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010011;
+}
+
+// LX sub-class format.
+class LX_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> base;
+ bits<5> index;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b001010;
+}
+
+// ADDUH.QB sub-class format.
+class ADDUH_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b011000;
+}
+
+// APPEND sub-class format.
+class APPEND_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110001;
+}
+
+// DPA.W.PH sub-class format.
+class DPA_W_PH_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110000;
+}
+
+// MULT sub-class format.
+class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = opcode;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+// EXTR.W sub-class format (type 1).
+class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<2> ac;
+ bits<5> shift_rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = shift_rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+// SHILO sub-class format.
+class SHILO_R1_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<6> shift;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-20} = shift;
+ let Inst{19-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class SHILO_R2_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class RDDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = mask;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class WRDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-11} = mask;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class BPOSGE32_FMT<bits<5> op> : DSPInst {
+ bits<16> offset;
+
+ let Opcode = REGIMM_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = op;
+ let Inst{15-0} = offset;
+}
+
+// INSV sub-class format.
+class INSV_FMT<bits<6> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = op;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
new file mode 100644
index 000000000000..3c116e1264b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -0,0 +1,1271 @@
+//===- MipsDSPInstrInfo.td - DSP ASE instructions -*- tablegen ------------*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips DSP ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// ImmLeaf
+def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
+def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
+def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
+def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
+def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
+def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+
+// Mips-specific dsp nodes
+def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, untyped>]>;
+def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+class MipsDSPBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof>;
+
+class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>;
+
+def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
+def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
+def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>;
+def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>;
+def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
+def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
+
+def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>;
+
+def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>;
+
+def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>;
+def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>;
+def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>;
+def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>;
+def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>;
+def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>;
+
+def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>;
+def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>;
+def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>;
+def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>;
+def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>;
+
+def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>;
+def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>;
+def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
+def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
+def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
+def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+
+// Flags.
+class UseAC {
+ list<Register> Uses = [AC0];
+}
+
+class UseDSPCtrl {
+ list<Register> Uses = [DSPCtrl];
+}
+
+class ClearDefs {
+ list<Register> Defs = [];
+}
+
+// Instruction encoding.
+class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>;
+class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>;
+class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>;
+class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>;
+class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>;
+class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>;
+class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>;
+class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>;
+class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>;
+class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>;
+class ADDSC_ENC : ADDU_QB_FMT<0b10000>;
+class ADDWC_ENC : ADDU_QB_FMT<0b10001>;
+class MODSUB_ENC : ADDU_QB_FMT<0b10010>;
+class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>;
+class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>;
+class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>;
+class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>;
+class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>;
+class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>;
+class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>;
+class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>;
+class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>;
+class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>;
+class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>;
+class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>;
+class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>;
+class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>;
+class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>;
+class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>;
+class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>;
+class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>;
+class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>;
+class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>;
+class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>;
+class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>;
+class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>;
+class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>;
+class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>;
+class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>;
+class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>;
+class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>;
+class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>;
+class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>;
+class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>;
+class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>;
+class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>;
+class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>;
+class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>;
+class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>;
+class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>;
+class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>;
+class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>;
+class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
+class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
+class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
+class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
+class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
+class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
+class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>;
+class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>;
+class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>;
+class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>;
+class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>;
+class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>;
+class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>;
+class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>;
+class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>;
+class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>;
+class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>;
+class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>;
+class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>;
+class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>;
+class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>;
+class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>;
+class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>;
+class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>;
+class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>;
+class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>;
+class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>;
+class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>;
+class REPL_QB_ENC : REPL_FMT<0b00010>;
+class REPL_PH_ENC : REPL_FMT<0b01010>;
+class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>;
+class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>;
+class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>;
+class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>;
+class LWX_ENC : LX_FMT<0b00000>;
+class LHX_ENC : LX_FMT<0b00100>;
+class LBUX_ENC : LX_FMT<0b00110>;
+class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>;
+class INSV_ENC : INSV_FMT<0b001100>;
+
+class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>;
+class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>;
+class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>;
+class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>;
+class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>;
+class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>;
+class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>;
+class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>;
+class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>;
+class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>;
+class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>;
+class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>;
+class SHILO_ENC : SHILO_R1_FMT<0b11010>;
+class SHILOV_ENC : SHILO_R2_FMT<0b11011>;
+class MTHLIP_ENC : SHILO_R2_FMT<0b11111>;
+
+class RDDSP_ENC : RDDSP_FMT<0b10010>;
+class WRDSP_ENC : WRDSP_FMT<0b10011>;
+class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>;
+class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>;
+class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>;
+class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>;
+class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>;
+class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>;
+class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>;
+class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>;
+class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>;
+class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>;
+class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>;
+class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>;
+class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>;
+class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>;
+class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>;
+class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>;
+class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>;
+class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>;
+class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>;
+class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>;
+class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>;
+class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>;
+class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>;
+class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>;
+class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>;
+class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>;
+class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>;
+class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>;
+class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>;
+class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>;
+class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>;
+class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>;
+class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>;
+class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>;
+class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>;
+class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>;
+class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>;
+class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>;
+class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>;
+class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>;
+class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>;
+class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>;
+class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>;
+class APPEND_ENC : APPEND_FMT<0b00000>;
+class BALIGN_ENC : APPEND_FMT<0b10000>;
+class PREPEND_ENC : APPEND_FMT<0b00001>;
+
+// Instruction desc.
+class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCS,
+ RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
+ list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCT,
+ RegisterClass RCS = RCT> {
+ dag OutOperandList = (outs RCT:$rt);
+ dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins uimm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
+ list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmPat, InstrItinClass itin,
+ RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, uimm16:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins CPURegs:$base, CPURegs:$index);
+ string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})");
+ list<dag> Pattern = [(set CPURegs:$rd,
+ (OpNode CPURegs:$base, CPURegs:$index))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ bit mayLoad = 1;
+}
+
+class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD, RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmOp, InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set CPURegs:$rt,
+ (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
+}
+
+class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
+}
+
+class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$acin = $ac";
+}
+
+class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
+ list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+}
+
+class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins CPURegs:$rs, uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
+ list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
+}
+
+class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))];
+ InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ bit isCommutable = 1;
+}
+
+class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ string Constraints = "$acin = $ac";
+}
+
+class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
+ MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
+ list<Register> Uses = [DSPCtrl];
+ bit usesCustomInserter = 1;
+}
+
+class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins brtarget:$offset);
+ string AsmString = !strconcat(instr_asm, "\t$offset");
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 1;
+}
+
+class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$src, CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+ list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 1
+//===----------------------------------------------------------------------===//
+
+// Addition/subtraction
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
+ NoItinerary, CPURegs, CPURegs>,
+ IsCommutable;
+
+class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
+ NoItinerary, CPURegs, CPURegs>;
+
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
+ CPURegs, CPURegs>, IsCommutable;
+
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+ CPURegs, CPURegs>,
+ IsCommutable, UseDSPCtrl;
+
+class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
+ CPURegs, CPURegs>, ClearDefs;
+
+class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+// Absolute value
+class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
+ NoItinerary, DSPRegs>;
+
+class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
+ NoItinerary, CPURegs>;
+
+// Precision reduce/expand
+class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
+ int_mips_precrq_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
+ int_mips_precrq_ph_w,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
+ int_mips_precrq_rs_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>;
+
+class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
+ int_mips_precrqu_s_qb_ph,
+ NoItinerary, DSPRegs,
+ DSPRegs>;
+
+class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
+ int_mips_preceq_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
+ int_mips_preceq_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
+ int_mips_precequ_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
+ int_mips_precequ_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
+ int_mips_precequ_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
+ int_mips_precequ_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
+ int_mips_preceu_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
+ int_mips_preceu_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
+ int_mips_preceu_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
+ int_mips_preceu_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+// Shift
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
+ NoItinerary, DSPRegs>;
+
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
+ NoItinerary, DSPRegs>;
+
+class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
+ immZExt4, NoItinerary, DSPRegs>;
+
+class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
+ NoItinerary, DSPRegs>;
+
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
+ immZExt4, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
+ immZExt5, NoItinerary, CPURegs>;
+
+class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
+ NoItinerary, CPURegs>;
+
+class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
+ immZExt5, NoItinerary, CPURegs>,
+ ClearDefs;
+
+class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
+ NoItinerary, CPURegs>;
+
+// Multiplication
+class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
+ int_mips_muleu_s_ph_qbl,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
+ int_mips_muleu_s_ph_qbr,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
+ int_mips_muleq_s_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
+ int_mips_muleq_s_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
+ MipsMULSAQ_S_W_PH>;
+
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
+
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
+
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
+
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
+
+// Dot product with accumulate/subtract
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
+
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>;
+
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
+
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
+
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
+
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
+
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
+
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
+
+class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
+class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>;
+class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>;
+class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>;
+class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
+
+// Comparison
+class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
+ int_mips_cmpu_eq_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
+ int_mips_cmpu_lt_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
+ int_mips_cmpu_le_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
+ int_mips_cmpgu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
+ int_mips_cmpgu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
+ int_mips_cmpgu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+// Misc
+class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+
+class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
+
+// Extr
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+
+class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
+ NoItinerary>;
+
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+
+class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
+ NoItinerary>;
+
+class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
+
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
+
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
+
+class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
+
+class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
+
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 2
+// Addition/subtraction
+class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+// Comparison
+class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
+ int_mips_cmpgdu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
+ int_mips_cmpgdu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
+ int_mips_cmpgdu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+// Absolute
+class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
+ NoItinerary, DSPRegs>;
+
+// Multiplication
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
+ NoItinerary, DSPRegs>, IsCommutable;
+
+class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+// Dot product with accumulate/subtract
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
+
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
+
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
+
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
+ MipsDPAQX_SA_W_PH>;
+
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
+
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
+
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
+
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
+ MipsDPSQX_SA_W_PH>;
+
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
+
+// Precision reduce/expand
+class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
+ int_mips_precr_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
+ int_mips_precr_sra_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
+ int_mips_precr_sra_r_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+// Shift
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
+ immZExt3, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+// Misc
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+ NoItinerary>, ClearDefs;
+
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+ NoItinerary>, ClearDefs;
+
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
+ NoItinerary>, ClearDefs;
+
+// Pseudos.
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+
+// Instruction defs.
+// MIPS DSP Rev 1
+def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : ADDSC_ENC, ADDSC_DESC;
+def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def MODSUB : MODSUB_ENC, MODSUB_DESC;
+def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
+def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
+def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
+def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
+def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC;
+def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC;
+def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC;
+def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
+def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
+def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
+def BITREV : BITREV_ENC, BITREV_DESC;
+def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
+def LWX : LWX_ENC, LWX_DESC;
+def LHX : LHX_ENC, LHX_DESC;
+def LBUX : LBUX_ENC, LBUX_DESC;
+def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
+def INSV : INSV_ENC, INSV_DESC;
+def EXTP : EXTP_ENC, EXTP_DESC;
+def EXTPV : EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : SHILO_ENC, SHILO_DESC;
+def SHILOV : SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : RDDSP_ENC, RDDSP_DESC;
+def WRDSP : WRDSP_ENC, WRDSP_DESC;
+
+// MIPS DSP Rev 2
+let Predicates = [HasDSPR2] in {
+
+def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
+def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
+def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
+def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
+def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
+def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
+def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
+def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
+def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
+def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
+def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
+def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
+def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
+def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
+def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
+def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
+def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
+def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
+def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
+def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
+def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
+def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
+def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
+def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
+def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
+def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
+def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
+def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
+def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
+def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
+def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
+def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
+def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
+def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
+def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
+def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
+def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
+def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
+def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
+def APPEND : APPEND_ENC, APPEND_DESC;
+def BALIGN : BALIGN_ENC, BALIGN_DESC;
+def PREPEND : PREPEND_ENC, PREPEND_DESC;
+
+}
+
+// Pseudos.
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>;
+ defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
+}
+
+def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+
+// Patterns.
+class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
+ Pat<pattern, result>, Requires<[pred]>;
+
+class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC,
+ RegisterClass SrcRC> :
+ DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))),
+ (COPY_TO_REGCLASS SrcRC:$src, DstRC)>;
+
+def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>;
+def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>;
+def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>;
+def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>;
+
+def : DSPPat<(v2i16 (load addr:$a)),
+ (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(v4i8 (load addr:$a)),
+ (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+
+// Extr patterns.
+class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, CPURegs:$rs)>;
+
+class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
+
+def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
+
+// mflo/hi patterns.
+let AddedComplexity = 20 in
+def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
+
+// Indexed load patterns.
+class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
+ DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
+ (Instr i32:$base, i32:$index)>;
+
+let AddedComplexity = 20 in {
+ def : IndexedLoadPat<zextloadi8, LBUX>;
+ def : IndexedLoadPat<sextloadi16, LHX>;
+ def : IndexedLoadPat<load, LWX>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 000000000000..d07a595af38a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,721 @@
+//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fill delay slots with useful instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
+ " are not NOP.");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-mips-delay-filler",
+ cl::init(false),
+ cl::desc("Fill all delay slots with NOPs."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableForwardSearch(
+ "disable-mips-df-forward-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search forward."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableSuccBBSearch(
+ "disable-mips-df-succbb-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableBackwardSearch(
+ "disable-mips-df-backward-search",
+ cl::init(false),
+ cl::desc("Disallow MIPS delay filler to search backward."),
+ cl::Hidden);
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
+
+ /// \brief A functor comparing edge weight of two blocks.
+ struct CmpWeight {
+ CmpWeight(const MachineBasicBlock &S,
+ const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {}
+
+ bool operator()(const MachineBasicBlock *Dst0,
+ const MachineBasicBlock *Dst1) const {
+ return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1);
+ }
+
+ const MachineBasicBlock &Src;
+ const MachineBranchProbabilityInfo &Prob;
+ };
+
+ class RegDefsUses {
+ public:
+ RegDefsUses(TargetMachine &TM);
+ void init(const MachineInstr &MI);
+
+ /// This function sets all caller-saved registers in Defs.
+ void setCallerSaved(const MachineInstr &MI);
+
+ /// This function sets all unallocatable registers in Defs.
+ void setUnallocatableRegs(const MachineFunction &MF);
+
+ /// Set bits in Uses corresponding to MBB's live-out registers except for
+ /// the registers that are live-in to SuccBB.
+ void addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB);
+
+ bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
+
+ private:
+ bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
+ bool IsDef) const;
+
+ /// Returns true if Reg or its alias is in RegSet.
+ bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
+
+ const TargetRegisterInfo &TRI;
+ BitVector Defs, Uses;
+ };
+
+ /// Base class for inspecting loads and stores.
+ class InspectMemInstr {
+ public:
+ InspectMemInstr(bool ForbidMemInstr_)
+ : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
+ SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+
+ /// Return true if MI cannot be moved to delay slot.
+ bool hasHazard(const MachineInstr &MI);
+
+ virtual ~InspectMemInstr() {}
+
+ protected:
+ /// Flags indicating whether loads or stores have been seen.
+ bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+
+ /// Memory instructions are not allowed to move to delay slot if this flag
+ /// is true.
+ bool ForbidMemInstr;
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) = 0;
+ };
+
+ /// This subclass rejects any memory instructions.
+ class NoMemInstr : public InspectMemInstr {
+ public:
+ NoMemInstr() : InspectMemInstr(true) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+ };
+
+ /// This subclass accepts loads from stacks and constant loads.
+ class LoadFromStackOrConst : public InspectMemInstr {
+ public:
+ LoadFromStackOrConst() : InspectMemInstr(false) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+ };
+
+ /// This subclass uses memory dependence information to determine whether a
+ /// memory instruction can be moved to a delay slot.
+ class MemDefsUses : public InspectMemInstr {
+ public:
+ MemDefsUses(const MachineFrameInfo *MFI);
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+
+ /// Update Defs and Uses. Return true if there exist dependences that
+ /// disqualify the delay slot candidate between V and values in Uses and
+ /// Defs.
+ bool updateDefsUses(const Value *V, bool MayStore);
+
+ /// Get the list of underlying objects of MI's memory operand.
+ bool getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const;
+
+ const MachineFrameInfo *MFI;
+ SmallPtrSet<const Value*, 4> Uses, Defs;
+
+ /// Flags indicating whether loads or stores with no underlying objects have
+ /// been seen.
+ bool SeenNoObjLoad, SeenNoObjStore;
+ };
+
+ class Filler : public MachineFunctionPass {
+ public:
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Delay Slot Filler";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ /// This function checks if it is valid to move Candidate to the delay slot
+ /// and returns true if it isn't. It also updates memory and register
+ /// dependence information.
+ bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const;
+
+ /// This function searches range [Begin, End) for an instruction that can be
+ /// moved to the delay slot. Returns true on success.
+ template<typename IterTy>
+ bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr &IM,
+ IterTy &Filler) const;
+
+ /// This function searches in the backward direction for an instruction that
+ /// can be moved to the delay slot. Returns true on success.
+ bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches MBB in the forward direction for an instruction
+ /// that can be moved to the delay slot. Returns true on success.
+ bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches one of MBB's successor blocks for an instruction
+ /// that can be moved to the delay slot and inserts clones of the
+ /// instruction into the successor's predecessor blocks.
+ bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
+ /// successor block that is not a landing pad.
+ MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
+
+ /// This function analyzes MBB and returns an instruction with an unoccupied
+ /// slot that branches to Dst.
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+ getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
+
+ /// Examine Pred and see if it is possible to insert an instruction into
+ /// one of its branches delay slot or its end.
+ bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const;
+
+ bool terminateSearch(const MachineInstr &Candidate) const;
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasUnoccupiedSlot(const MachineInstr *MI) {
+ return MI->hasDelaySlot() && !MI->isBundledWithSucc();
+}
+
+/// This function inserts clones of Filler into predecessor blocks.
+static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
+ MachineFunction *MF = Filler->getParent()->getParent();
+
+ for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
+ if (I->second) {
+ MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+ ++UsefulSlots;
+ } else {
+ I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+ }
+ }
+}
+
+/// This function adds registers Filler defines to MBB's live-in register list.
+static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
+ for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Filler->getOperand(I);
+ unsigned R;
+
+ if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
+ continue;
+
+#ifndef NDEBUG
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ "Shouldn't move an instruction with unallocatable registers across "
+ "basic block boundaries.");
+#endif
+
+ if (!MBB.isLiveIn(R))
+ MBB.addLiveIn(R);
+ }
+}
+
+RegDefsUses::RegDefsUses(TargetMachine &TM)
+ : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
+ Uses(TRI.getNumRegs(), false) {}
+
+void RegDefsUses::init(const MachineInstr &MI) {
+ // Add all register operands which are explicit and non-variadic.
+ update(MI, 0, MI.getDesc().getNumOperands());
+
+ // If MI is a call, add RA to Defs to prevent users of RA from going into
+ // delay slot.
+ if (MI.isCall())
+ Defs.set(Mips::RA);
+
+ // Add all implicit register operands of branch instructions except
+ // register AT.
+ if (MI.isBranch()) {
+ update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
+ Defs.reset(Mips::AT);
+ }
+}
+
+void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
+ assert(MI.isCall());
+
+ // If MI is a call, add all caller-saved registers to Defs.
+ BitVector CallerSavedRegs(TRI.getNumRegs(), true);
+
+ CallerSavedRegs.reset(Mips::ZERO);
+ CallerSavedRegs.reset(Mips::ZERO_64);
+
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+ for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
+ CallerSavedRegs.reset(*AI);
+
+ Defs |= CallerSavedRegs;
+}
+
+void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
+ BitVector AllocSet = TRI.getAllocatableSet(MF);
+
+ for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AllocSet.set(*AI);
+
+ AllocSet.set(Mips::ZERO);
+ AllocSet.set(Mips::ZERO_64);
+
+ Defs |= AllocSet.flip();
+}
+
+void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if (*SI != &SuccBB)
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI)
+ Uses.set(*LI);
+}
+
+bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
+ BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
+ bool HasHazard = false;
+
+ for (unsigned I = Begin; I != End; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+
+ if (MO.isReg() && MO.getReg())
+ HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
+ }
+
+ Defs |= NewDefs;
+ Uses |= NewUses;
+
+ return HasHazard;
+}
+
+bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
+ unsigned Reg, bool IsDef) const {
+ if (IsDef) {
+ NewDefs.set(Reg);
+ // check whether Reg has already been defined or used.
+ return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
+ }
+
+ NewUses.set(Reg);
+ // check whether Reg has already been defined.
+ return isRegInSet(Defs, Reg);
+}
+
+bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ if (RegSet.test(*AI))
+ return true;
+ return false;
+}
+
+bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
+ if (!MI.mayStore() && !MI.mayLoad())
+ return false;
+
+ if (ForbidMemInstr)
+ return true;
+
+ OrigSeenLoad = SeenLoad;
+ OrigSeenStore = SeenStore;
+ SeenLoad |= MI.mayLoad();
+ SeenStore |= MI.mayStore();
+
+ // If MI is an ordered or volatile memory reference, disallow moving
+ // subsequent loads and stores to delay slot.
+ if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
+ ForbidMemInstr = true;
+ return true;
+ }
+
+ return hasHazard_(MI);
+}
+
+bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
+ if (MI.mayStore())
+ return true;
+
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return true;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ if (isa<FixedStackPseudoSourceValue>(V))
+ return false;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
+ return !PSV->PseudoSourceValue::isConstant(0) &&
+ (V != PseudoSourceValue::getStack());
+
+ return true;
+}
+
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
+
+bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
+ bool HasHazard = false;
+ SmallVector<const Value *, 4> Objs;
+
+ // Check underlying object list.
+ if (getUnderlyingObjects(MI, Objs)) {
+ for (SmallVector<const Value *, 4>::const_iterator I = Objs.begin();
+ I != Objs.end(); ++I)
+ HasHazard |= updateDefsUses(*I, MI.mayStore());
+
+ return HasHazard;
+ }
+
+ // No underlying objects found.
+ HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
+ HasHazard |= MI.mayLoad() || OrigSeenStore;
+
+ SeenNoObjLoad |= MI.mayLoad();
+ SeenNoObjStore |= MI.mayStore();
+
+ return HasHazard;
+}
+
+bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
+ if (MayStore)
+ return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+
+ Uses.insert(V);
+ return Defs.count(V) || SeenNoObjStore;
+}
+
+bool MemDefsUses::
+getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const {
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return false;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+ for (SmallVector<Value*, 4>::iterator I = Objs.begin(), E = Objs.end();
+ I != E; ++I) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) {
+ if (PSV->isAliased(MFI))
+ return false;
+ } else if (!isIdentifiedObject(V))
+ return false;
+
+ Objects.push_back(*I);
+ }
+
+ return true;
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!hasUnoccupiedSlot(&*I))
+ continue;
+
+ ++FilledSlots;
+ Changed = true;
+
+ // Delay slot filling is disabled at -O0.
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (searchBackward(MBB, I))
+ continue;
+
+ if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I))
+ continue;
+ } else if (searchForward(MBB, I)) {
+ continue;
+ }
+ }
+
+ // Bundle the NOP to the instruction with the delay slot.
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+ MIBundleBuilder(MBB, I, llvm::next(llvm::next(I)));
+ }
+
+ return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
+
+template<typename IterTy>
+bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr& IM,
+ IterTy &Filler) const {
+ for (IterTy I = Begin; I != End; ++I) {
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+ if (terminateSearch(*I))
+ break;
+
+ assert((!I->isCall() && !I->isReturn() && !I->isBranch()) &&
+ "Cannot put calls, returns or branches in delay slot.");
+
+ if (delayHasHazard(*I, RegDU, IM))
+ continue;
+
+ Filler = I;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableBackwardSearch)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+ ReverseIter Filler;
+
+ RegDU.init(*Slot);
+
+ if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
+ // Can handle only calls.
+ if (DisableForwardSearch || !Slot->isCall())
+ return false;
+
+ RegDefsUses RegDU(TM);
+ NoMemInstr NM;
+ Iter Filler;
+
+ RegDU.setCallerSaved(*Slot);
+
+ if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, Filler);
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableSuccBBSearch)
+ return false;
+
+ MachineBasicBlock *SuccBB = selectSuccBB(MBB);
+
+ if (!SuccBB)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ bool HasMultipleSuccs = false;
+ BB2BrMap BrMap;
+ OwningPtr<InspectMemInstr> IM;
+ Iter Filler;
+
+ // Iterate over SuccBB's predecessor list.
+ for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
+ PE = SuccBB->pred_end(); PI != PE; ++PI)
+ if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ return false;
+
+ // Do not allow moving instructions which have unallocatable register operands
+ // across basic block boundaries.
+ RegDU.setUnallocatableRegs(*MBB.getParent());
+
+ // Only allow moving loads from stack or constants if any of the SuccBB's
+ // predecessors have multiple successors.
+ if (HasMultipleSuccs) {
+ IM.reset(new LoadFromStackOrConst());
+ } else {
+ const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
+ IM.reset(new MemDefsUses(MFI));
+ }
+
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+ return false;
+
+ insertDelayFiller(Filler, BrMap);
+ addLiveInRegs(Filler, *SuccBB);
+ Filler->eraseFromParent();
+
+ return true;
+}
+
+MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
+ if (B.succ_empty())
+ return NULL;
+
+ // Select the successor with the larget edge weight.
+ CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>());
+ MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp);
+ return S->isLandingPad() ? NULL : S;
+}
+
+std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
+ const MipsInstrInfo *TII =
+ static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ SmallVector<MachineOperand, 2> Cond;
+
+ MipsInstrInfo::BranchType R =
+ TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
+
+ if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
+ return std::make_pair(R, (MachineInstr*)NULL);
+
+ if (R != MipsInstrInfo::BT_CondUncond) {
+ if (!hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+
+ assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
+
+ return std::make_pair(R, BranchInstrs[0]);
+ }
+
+ assert((TrueBB == &Dst) || (FalseBB == &Dst));
+
+ // Examine the conditional branch. See if its slot is occupied.
+ if (hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
+
+ // If that fails, try the unconditional branch.
+ if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
+ return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
+
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+}
+
+bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const {
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
+ getBranch(Pred, Succ);
+
+ // Return if either getBranch wasn't able to analyze the branches or there
+ // were no branches with unoccupied slots.
+ if (P.first == MipsInstrInfo::BT_None)
+ return false;
+
+ if ((P.first != MipsInstrInfo::BT_Uncond) &&
+ (P.first != MipsInstrInfo::BT_NoBranch)) {
+ HasMultipleSuccs = true;
+ RegDU.addLiveOut(Pred, Succ);
+ }
+
+ BrMap[&Pred] = P.second;
+ return true;
+}
+
+bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const {
+ bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+
+ HasHazard |= IM.hasHazard(Candidate);
+ HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
+
+ return HasHazard;
+}
+
+bool Filler::terminateSearch(const MachineInstr &Candidate) const {
+ return (Candidate.isTerminator() || Candidate.isCall() ||
+ Candidate.isLabel() || Candidate.isInlineAsm() ||
+ Candidate.hasUnmodeledSideEffects());
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 000000000000..eb9d49fefb2f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,134 @@
+//===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16FrameLowering(ST);
+
+ return llvm::createMipsSEFrameLowering(ST);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+}
+
+uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+
+ int64_t Offset = 0;
+
+ // Iterate over fixed sized objects.
+ for (int I = MFI->getObjectIndexBegin(); I != 0; ++I)
+ Offset = std::max(Offset, -MFI->getObjectOffset(I));
+
+ // Conservatively assume all callee-saved registers will be saved.
+ for (const uint16_t *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
+ unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize();
+ Offset = RoundUpToAlignment(Offset + Size, Size);
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Check that MaxAlign is not zero if there is a stack object that is not a
+ // callee-saved spill.
+ assert(!MFI->getObjectIndexEnd() || MaxAlign);
+
+ // Iterate over other objects.
+ for (unsigned I = 0, E = MFI->getObjectIndexEnd(); I != E; ++I)
+ Offset = RoundUpToAlignment(Offset + MFI->getObjectSize(I), MaxAlign);
+
+ // Call frame.
+ if (MFI->adjustsStack() && hasReservedCallFrame(MF))
+ Offset = RoundUpToAlignment(Offset + MFI->getMaxCallFrameSize(),
+ std::max(MaxAlign, getStackAlignment()));
+
+ return RoundUpToAlignment(Offset, getStackAlignment());
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 000000000000..6a5f79d0dfc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,47 @@
+//===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+ const MipsSubtarget &STI;
+
+public:
+ explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
+ : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
+
+ static const MipsFrameLowering *create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST);
+
+ bool hasFP(const MachineFunction &MF) const;
+
+protected:
+ uint64_t estimateStackSize(const MachineFunction &MF) const;
+};
+
+/// Create MipsFrameLowering objects.
+const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
+const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 000000000000..77b08cb11e0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,154 @@
+//===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+
+bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
+
+ processFunctionAfterISel(MF);
+
+ return Ret;
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL;
+ }
+
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
+
+ switch(Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+#ifndef NDEBUG
+ case ISD::LOAD:
+ case ISD::STORE:
+ assert(cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
+ cast<MemSDNode>(Node)->getAlignment() &&
+ "Unexpected unaligned loads/stores.");
+ break;
+#endif
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 000000000000..cf0f9c58aa9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 000000000000..e2219f257ecd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,3596 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "MipsISelLowering.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+static cl::opt<bool>
+LargeGOT("mxgot", cl::Hidden,
+ cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
+
+static const uint16_t O32IntRegs[4] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+};
+
+static const uint16_t Mips64IntRegs[8] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+ Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64
+};
+
+static const uint16_t Mips64DPRegs[8] = {
+ Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
+ Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
+};
+
+// If I is a shifted mask, set the size (Size) and the first bit of the
+// mask (Pos), and return true.
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
+static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+ if (!isShiftedMask_64(I))
+ return false;
+
+ Size = CountPopulation_64(I);
+ Pos = CountTrailingZeros_64(I);
+ return true;
+}
+
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
+ MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
+ return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
+}
+
+static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+ EVT Ty = Op.getValueType();
+
+ if (GlobalAddressSDNode *N = dyn_cast<GlobalAddressSDNode>(Op))
+ return DAG.getTargetGlobalAddress(N->getGlobal(), Op.getDebugLoc(), Ty, 0,
+ Flag);
+ if (ExternalSymbolSDNode *N = dyn_cast<ExternalSymbolSDNode>(Op))
+ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
+ if (BlockAddressSDNode *N = dyn_cast<BlockAddressSDNode>(Op))
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
+ if (JumpTableSDNode *N = dyn_cast<JumpTableSDNode>(Op))
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+ if (ConstantPoolSDNode *N = dyn_cast<ConstantPoolSDNode>(Op))
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
+
+ llvm_unreachable("Unexpected node type.");
+ return SDValue();
+}
+
+static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+}
+
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(Op, DAG, GOTFlag));
+ SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(), false, false, false,
+ 0);
+ unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag));
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
+}
+
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(Op, DAG, Flag));
+ return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+}
+
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
+ SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
+ getTargetNode(Op, DAG, LoFlag));
+ return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+}
+
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MipsISD::JmpLink: return "MipsISD::JmpLink";
+ case MipsISD::TailCall: return "MipsISD::TailCall";
+ case MipsISD::Hi: return "MipsISD::Hi";
+ case MipsISD::Lo: return "MipsISD::Lo";
+ case MipsISD::GPRel: return "MipsISD::GPRel";
+ case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
+ case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
+ case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
+ case MipsISD::FPCmp: return "MipsISD::FPCmp";
+ case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
+ case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
+ case MipsISD::FPRound: return "MipsISD::FPRound";
+ case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI";
+ case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI";
+ case MipsISD::Mult: return "MipsISD::Mult";
+ case MipsISD::Multu: return "MipsISD::Multu";
+ case MipsISD::MAdd: return "MipsISD::MAdd";
+ case MipsISD::MAddu: return "MipsISD::MAddu";
+ case MipsISD::MSub: return "MipsISD::MSub";
+ case MipsISD::MSubu: return "MipsISD::MSubu";
+ case MipsISD::DivRem: return "MipsISD::DivRem";
+ case MipsISD::DivRemU: return "MipsISD::DivRemU";
+ case MipsISD::DivRem16: return "MipsISD::DivRem16";
+ case MipsISD::DivRemU16: return "MipsISD::DivRemU16";
+ case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
+ case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
+ case MipsISD::Wrapper: return "MipsISD::Wrapper";
+ case MipsISD::Sync: return "MipsISD::Sync";
+ case MipsISD::Ext: return "MipsISD::Ext";
+ case MipsISD::Ins: return "MipsISD::Ins";
+ case MipsISD::LWL: return "MipsISD::LWL";
+ case MipsISD::LWR: return "MipsISD::LWR";
+ case MipsISD::SWL: return "MipsISD::SWL";
+ case MipsISD::SWR: return "MipsISD::SWR";
+ case MipsISD::LDL: return "MipsISD::LDL";
+ case MipsISD::LDR: return "MipsISD::LDR";
+ case MipsISD::SDL: return "MipsISD::SDL";
+ case MipsISD::SDR: return "MipsISD::SDR";
+ case MipsISD::EXTP: return "MipsISD::EXTP";
+ case MipsISD::EXTPDP: return "MipsISD::EXTPDP";
+ case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H";
+ case MipsISD::EXTR_W: return "MipsISD::EXTR_W";
+ case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W";
+ case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W";
+ case MipsISD::SHILO: return "MipsISD::SHILO";
+ case MipsISD::MTHLIP: return "MipsISD::MTHLIP";
+ case MipsISD::MULT: return "MipsISD::MULT";
+ case MipsISD::MULTU: return "MipsISD::MULTU";
+ case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP";
+ case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP";
+ case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP";
+ case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP";
+ default: return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM)
+ : TargetLowering(TM, new MipsTargetObjectFile()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()),
+ HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
+ IsO32(Subtarget->isABI_O32()) {
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // MIPS doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+ // Mips Custom Operations
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+
+ if (!TM.Options.NoNaNsFPMath) {
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f64, Custom);
+ }
+
+ if (HasMips64) {
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+ }
+
+ if (!HasMips64) {
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ if (HasMips64)
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Operations not directly supported by Mips.
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ if (!Subtarget->hasMips32r2())
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+
+ if (!Subtarget->hasMips64r2())
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ if (!TM.Options.NoNaNsFPMath) {
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+
+ setInsertFencesForAtomic(true);
+
+ if (!Subtarget->hasSEInReg()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
+
+ if (!Subtarget->hasBitCount()) {
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+ }
+
+ if (!Subtarget->hasSwap()) {
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+ }
+
+ if (HasMips64) {
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
+ setTruncStoreAction(MVT::i64, MVT::i32, Custom);
+ }
+
+ setTargetDAGCombine(ISD::SDIVREM);
+ setTargetDAGCombine(ISD::UDIVREM);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::ADD);
+
+ setMinFunctionAlignment(HasMips64 ? 3 : 2);
+
+ setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
+
+ setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
+ setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
+
+ MaxStoresPerMemcpy = 16;
+}
+
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
+
+ return llvm::createMipsSETargetLowering(TM);
+}
+
+EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ EVT Ty = N->getValueType(0);
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
+ unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
+ MipsISD::DivRemU16;
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
+ N->getOperand(0), N->getOperand(1));
+ SDValue InChain = DAG.getEntryNode();
+ SDValue InGlue = DivRem;
+
+ // insert MFLO
+ if (N->hasAnyUseOfValue(0)) {
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty,
+ InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+ InChain = CopyFromLo.getValue(1);
+ InGlue = CopyFromLo.getValue(2);
+ }
+
+ // insert MFHI
+ if (N->hasAnyUseOfValue(1)) {
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL,
+ HI, Ty, InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+ }
+
+ return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_OEQ;
+ case ISD::SETUNE: return Mips::FCOND_UNE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_ONE;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+
+/// This function returns true if the floating point conditional branches and
+/// conditional moves which use condition code CC should be inverted.
+static bool invertFPCondCodeUser(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return false;
+
+ assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
+ "Illegal Condition Code");
+
+ return true;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
+ // must be a SETCC node
+ if (Op.getOpcode() != ISD::SETCC)
+ return Op;
+
+ SDValue LHS = Op.getOperand(0);
+
+ if (!LHS.getValueType().isFloatingPoint())
+ return Op;
+
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+ // node if necessary.
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
+ SDValue False, DebugLoc DL) {
+ ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
+ bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue());
+
+ return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+ True.getValueType(), True, False, Cond);
+}
+
+static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue SetCC = N->getOperand(0);
+
+ if ((SetCC.getOpcode() != ISD::SETCC) ||
+ !SetCC.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue False = N->getOperand(2);
+ EVT FalseTy = False.getValueType();
+
+ if (!FalseTy.isInteger())
+ return SDValue();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+ if (!CN || CN->getZExtValue())
+ return SDValue();
+
+ const DebugLoc DL = N->getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ SDValue True = N->getOperand(1);
+
+ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+ SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+
+ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
+}
+
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, size, pos
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
+ unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
+ // Op's first operand must be a shift right.
+ if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
+ return SDValue();
+
+ // The second operand of the shift must be an immediate.
+ ConstantSDNode *CN;
+ if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
+ return SDValue();
+
+ uint64_t Pos = CN->getZExtValue();
+ uint64_t SMPos, SMSize;
+
+ // Op's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
+ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ EVT ValTy = N->getValueType(0);
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
+ ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32),
+ DAG.getConstant(SMSize, MVT::i32));
+}
+
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ // Pattern match INS.
+ // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
+ // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+ // => ins $dst, $src, size, pos, $src1
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
+ uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
+ ConstantSDNode *CN;
+
+ // See if Op's first operand matches (and $src1 , mask0).
+ if (And0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
+ !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ return SDValue();
+
+ // See if Op's second operand matches (and (shl $src, pos), mask1).
+ if (And1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
+ !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ return SDValue();
+
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
+
+ SDValue Shl = And1.getOperand(0);
+ if (Shl.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
+
+ unsigned Shamt = CN->getZExtValue();
+
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0),
+ DAG.getConstant(SMPos0, MVT::i32),
+ DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
+}
+
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Add = N->getOperand(1);
+
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue Lo = Add.getOperand(1);
+
+ if ((Lo.getOpcode() != MipsISD::Lo) ||
+ (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable))
+ return SDValue();
+
+ EVT ValTy = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0),
+ Add.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo);
+}
+
+SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned Opc = N->getOpcode();
+
+ switch (Opc) {
+ default: break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return performDivRemCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ return performSELECTCombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ return performANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return performORCombine(N, DAG, DCI, Subtarget);
+ case ISD::ADD:
+ return performADDCombine(N, DAG, DCI, Subtarget);
+ }
+
+ return SDValue();
+}
+
+void
+MipsTargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+ for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+}
+
+void
+MipsTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+ for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+}
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::BR_JT: return lowerBR_JT(Op, DAG);
+ case ISD::BRCOND: return lowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return lowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return lowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return lowerJumpTable(Op, DAG);
+ case ISD::SELECT: return lowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return lowerSETCC(Op, DAG);
+ case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return lowerFABS(Op, DAG);
+ case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
+ case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
+ case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::ADD: return lowerADD(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// addLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
+{
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ case Mips::ATOMIC_LOAD_ADD_I8:
+ case Mips::ATOMIC_LOAD_ADD_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I16:
+ case Mips::ATOMIC_LOAD_ADD_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I32:
+ case Mips::ATOMIC_LOAD_ADD_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I64:
+ case Mips::ATOMIC_LOAD_ADD_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
+
+ case Mips::ATOMIC_LOAD_AND_I8:
+ case Mips::ATOMIC_LOAD_AND_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I16:
+ case Mips::ATOMIC_LOAD_AND_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I32:
+ case Mips::ATOMIC_LOAD_AND_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I64:
+ case Mips::ATOMIC_LOAD_AND_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, Mips::AND64);
+
+ case Mips::ATOMIC_LOAD_OR_I8:
+ case Mips::ATOMIC_LOAD_OR_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I16:
+ case Mips::ATOMIC_LOAD_OR_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I32:
+ case Mips::ATOMIC_LOAD_OR_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I64:
+ case Mips::ATOMIC_LOAD_OR_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, Mips::OR64);
+
+ case Mips::ATOMIC_LOAD_XOR_I8:
+ case Mips::ATOMIC_LOAD_XOR_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I16:
+ case Mips::ATOMIC_LOAD_XOR_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I32:
+ case Mips::ATOMIC_LOAD_XOR_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I64:
+ case Mips::ATOMIC_LOAD_XOR_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
+
+ case Mips::ATOMIC_LOAD_NAND_I8:
+ case Mips::ATOMIC_LOAD_NAND_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I16:
+ case Mips::ATOMIC_LOAD_NAND_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I32:
+ case Mips::ATOMIC_LOAD_NAND_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I64:
+ case Mips::ATOMIC_LOAD_NAND_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, 0, true);
+
+ case Mips::ATOMIC_LOAD_SUB_I8:
+ case Mips::ATOMIC_LOAD_SUB_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I16:
+ case Mips::ATOMIC_LOAD_SUB_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I32:
+ case Mips::ATOMIC_LOAD_SUB_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I64:
+ case Mips::ATOMIC_LOAD_SUB_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
+
+ case Mips::ATOMIC_SWAP_I8:
+ case Mips::ATOMIC_SWAP_I8_P8:
+ return emitAtomicBinaryPartword(MI, BB, 1, 0);
+ case Mips::ATOMIC_SWAP_I16:
+ case Mips::ATOMIC_SWAP_I16_P8:
+ return emitAtomicBinaryPartword(MI, BB, 2, 0);
+ case Mips::ATOMIC_SWAP_I32:
+ case Mips::ATOMIC_SWAP_I32_P8:
+ return emitAtomicBinary(MI, BB, 4, 0);
+ case Mips::ATOMIC_SWAP_I64:
+ case Mips::ATOMIC_SWAP_I64_P8:
+ return emitAtomicBinary(MI, BB, 8, 0);
+
+ case Mips::ATOMIC_CMP_SWAP_I8:
+ case Mips::ATOMIC_CMP_SWAP_I8_P8:
+ return emitAtomicCmpSwapPartword(MI, BB, 1);
+ case Mips::ATOMIC_CMP_SWAP_I16:
+ case Mips::ATOMIC_CMP_SWAP_I16_P8:
+ return emitAtomicCmpSwapPartword(MI, BB, 2);
+ case Mips::ATOMIC_CMP_SWAP_I32:
+ case Mips::ATOMIC_CMP_SWAP_I32_P8:
+ return emitAtomicCmpSwap(MI, BB, 4);
+ case Mips::ATOMIC_CMP_SWAP_I64:
+ case Mips::ATOMIC_CMP_SWAP_I64_P8:
+ return emitAtomicCmpSwap(MI, BB, 8);
+ }
+}
+
+// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
+// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
+MachineBasicBlock *
+MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LL, SC, AND, NOR, ZERO, BEQ;
+
+ if (Size == 4) {
+ LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ AND = Mips::AND;
+ NOR = Mips::NOR;
+ ZERO = Mips::ZERO;
+ BEQ = Mips::BEQ;
+ }
+ else {
+ LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+ SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ AND = Mips::AND64;
+ NOR = Mips::NOR64;
+ ZERO = Mips::ZERO_64;
+ BEQ = Mips::BEQ64;
+ }
+
+ unsigned OldVal = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(exitMBB);
+
+ // loopMBB:
+ // ll oldval, 0(ptr)
+ // <binop> storeval, oldval, incr
+ // sc success, storeval, 0(ptr)
+ // beq success, $0, loopMBB
+ BB = loopMBB;
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
+ if (Nand) {
+ // and andres, oldval, incr
+ // nor storeval, $0, andres
+ BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
+ } else if (BinOpcode) {
+ // <binop> storeval, oldval, incr
+ BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
+ } else {
+ StoreVal = Incr;
+ }
+ BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicBinaryPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned BinOpRes = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, sinkMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // thisMBB:
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
+ // nor mask2,$0,mask
+ // sll incr2,incr,shiftamt
+
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
+
+ // atomic.load.binop
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // binop binopres,oldval,incr2
+ // and newval,binopres,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ // atomic.swap
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // and newval,incr2,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ BB = loopMBB;
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ if (Nand) {
+ // and andres, oldval, incr2
+ // nor binopres, $0, andres
+ // and newval, binopres, mask
+ BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes)
+ .addReg(Mips::ZERO).addReg(AndRes);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else if (BinOpcode) {
+ // <binop> binopres, oldval, incr2
+ // and newval, binopres, mask
+ BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else {// atomic.swap
+ // and newval, incr2, mask
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
+ }
+
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal0).addReg(NewVal);
+ BuildMI(BB, DL, TII->get(SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+
+ // sinkMBB:
+ // and maskedoldval1,oldval,mask
+ // srl srlres,maskedoldval1,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal1);
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LL, SC, ZERO, BNE, BEQ;
+
+ if (Size == 4) {
+ LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ ZERO = Mips::ZERO;
+ BNE = Mips::BNE;
+ BEQ = Mips::BEQ;
+ }
+ else {
+ LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+ SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ ZERO = Mips::ZERO_64;
+ BNE = Mips::BNE64;
+ BEQ = Mips::BEQ64;
+ }
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned OldVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
+
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(exitMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(exitMBB);
+
+ // loop1MBB:
+ // ll dest, 0(ptr)
+ // bne dest, oldval, exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BNE))
+ .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
+
+ // loop2MBB:
+ // sc success, newval, 0(ptr)
+ // beq success, $0, loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, DL, TII->get(SC), Success)
+ .addReg(NewVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BEQ))
+ .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicCmpSwapPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned CmpVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
+
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, sinkMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(sinkMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // FIXME: computation of newval2 can be moved to loop2MBB.
+ // thisMBB:
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
+ // nor mask2,$0,mask
+ // andi maskedcmpval,cmpval,255
+ // sll shiftedcmpval,maskedcmpval,shiftamt
+ // andi maskednewval,newval,255
+ // sll shiftednewval,maskednewval,shiftamt
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal)
+ .addReg(CmpVal).addImm(MaskImm);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal)
+ .addReg(ShiftAmt).addReg(MaskedCmpVal);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal)
+ .addReg(NewVal).addImm(MaskImm);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
+ .addReg(ShiftAmt).addReg(MaskedNewVal);
+
+ // loop1MBB:
+ // ll oldval,0(alginedaddr)
+ // and maskedoldval0,oldval,mask
+ // bne maskedoldval0,shiftedcmpval,sinkMBB
+ BB = loop1MBB;
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::BNE))
+ .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
+
+ // loop2MBB:
+ // and maskedoldval1,oldval,mask2
+ // or storeval,maskedoldval1,shiftednewval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
+ BuildMI(BB, DL, TII->get(SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
+
+ // sinkMBB:
+ // srl srlres,maskedoldval0,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal0);
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return exitMBB;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PTy = getPointerTy();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+
+ Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
+ DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT, false, false,
+ 0);
+ Chain = Addr.getValue(1);
+
+ if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || IsN64) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr,
+ getPICJumpTableRelocBase(Table, DAG));
+ }
+
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
+}
+
+SDValue MipsTargetLowering::
+lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ // The first operand is the chain, the second is the condition, the third is
+ // the block to branch to if the condition is true.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
+
+ // Return if flag is not set by a floating point comparison.
+ if (CondRes.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue CCNode = CondRes.getOperand(2);
+ Mips::CondCode CC =
+ (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+ unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
+ SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
+ return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
+ Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+lowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
+
+ // Return if flag is not set by a floating point comparison.
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ Op.getDebugLoc());
+}
+
+SDValue MipsTargetLowering::
+lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getOperand(0).getValueType();
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, getSetCCResultType(Ty),
+ Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(4));
+
+ return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
+ Op.getOperand(3));
+}
+
+SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = createFPCmp(DAG, Op);
+
+ assert(Cond.getOpcode() == MipsISD::FPCmp &&
+ "Floating point operand expected.");
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return createCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
+SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
+ const MipsTargetObjectFile &TLOF =
+ (const MipsTargetObjectFile&)getObjFileLowering();
+
+ // %gp_rel relocation
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
+ MipsII::MO_GPREL);
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
+ DAG.getVTList(MVT::i32), &GA, 1);
+ SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
+ return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
+ }
+
+ // %hi/%lo relocation
+ return getAddrNonPIC(Op, DAG);
+ }
+
+ if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
+ return getAddrLocal(Op, DAG, HasMips64);
+
+ if (LargeGOT)
+ return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16,
+ MipsII::MO_GOT_LO16);
+
+ return getAddrGlobal(Op, DAG,
+ HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
+}
+
+SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
+
+ return getAddrLocal(Op, DAG, HasMips64);
+}
+
+SDValue MipsTargetLowering::
+lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // If the relocation model is PIC, use the General Dynamic TLS Model or
+ // Local Dynamic TLS model, otherwise use the Initial Exec or
+ // Local Exec TLS Model.
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc DL = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+ // General Dynamic and Local Dynamic TLS Model.
+ unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
+ : MipsII::MO_TLSGD;
+
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT,
+ getGlobalReg(DAG, PtrVT), TGA);
+ unsigned PtrSize = PtrVT.getSizeInBits();
+ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = PtrTy;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
+ false, false, false, false, 0, CallingConv::C,
+ /*IsTailCall=*/false, /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
+ TlsGetAddr, Args, DAG, DL);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue Ret = CallResult.first;
+
+ if (model != TLSModel::LocalDynamic)
+ return Ret;
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ MipsII::MO_DTPREL_HI);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ MipsII::MO_DTPREL_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo);
+ }
+
+ SDValue Offset;
+ if (model == TLSModel::InitialExec) {
+ // Initial Exec TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ MipsII::MO_GOTTPREL);
+ TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT),
+ TGA);
+ Offset = DAG.getLoad(PtrVT, DL,
+ DAG.getEntryNode(), TGA, MachinePointerInfo(),
+ false, false, false, 0);
+ } else {
+ // Local Exec TLS Model
+ assert(model == TLSModel::LocalExec);
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ MipsII::MO_TPREL_HI);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ MipsII::MO_TPREL_LO);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+ }
+
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue MipsTargetLowering::
+lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
+
+ return getAddrLocal(Op, DAG, HasMips64);
+}
+
+SDValue MipsTargetLowering::
+lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ // gp_rel relocation
+ // FIXME: we should reference the constant pool using small data sections,
+ // but the asm printer currently doesn't support this feature without
+ // hacking it. This feature should come soon so we can uncomment the
+ // stuff below.
+ //if (IsInSmallSection(C->getType())) {
+ // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+ // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
+
+ return getAddrLocal(Op, DAG, HasMips64);
+}
+
+SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ EVT TyX = Op.getOperand(0).getValueType();
+ EVT TyY = Op.getOperand(1).getValueType();
+ SDValue Const1 = DAG.getConstant(1, MVT::i32);
+ SDValue Const31 = DAG.getConstant(31, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+
+ // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+ // to i32.
+ SDValue X = (TyX == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ Const1);
+ SDValue Y = (TyY == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
+ Const1);
+
+ if (HasR2) {
+ // ext E, Y, 31, 1 ; extract bit31 of Y
+ // ins X, E, 31, 1 ; insert extracted bit at bit31 of X
+ SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X);
+ } else {
+ // sll SllX, X, 1
+ // srl SrlX, SllX, 1
+ // srl SrlY, Y, 31
+ // sll SllY, SrlX, 31
+ // or Or, SrlX, SllY
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+ SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31);
+ SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31);
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY);
+ }
+
+ if (TyX == MVT::f32)
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res);
+
+ SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
+ unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
+ EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
+ SDValue Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Bitcast to integer nodes.
+ SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
+ SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
+
+ if (HasR2) {
+ // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y
+ // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X
+ SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
+ DAG.getConstant(WidthY - 1, MVT::i32), Const1);
+
+ if (WidthX > WidthY)
+ E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E);
+ else if (WidthY > WidthX)
+ E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E);
+
+ SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E,
+ DAG.getConstant(WidthX - 1, MVT::i32), Const1, X);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I);
+ }
+
+ // (d)sll SllX, X, 1
+ // (d)srl SrlX, SllX, 1
+ // (d)srl SrlY, Y, width(Y)-1
+ // (d)sll SllY, SrlX, width(Y)-1
+ // or Or, SrlX, SllY
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1);
+ SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y,
+ DAG.getConstant(WidthY - 1, MVT::i32));
+
+ if (WidthX > WidthY)
+ SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY);
+ else if (WidthY > WidthX)
+ SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY);
+
+ SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY,
+ DAG.getConstant(WidthX - 1, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or);
+}
+
+SDValue
+MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasMips64())
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+}
+
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+ // to i32.
+ SDValue X = (Op.getValueType() == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ Const1);
+
+ // Clear MSB.
+ if (HasR2)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
+ DAG.getRegister(Mips::ZERO, MVT::i32),
+ DAG.getConstant(31, MVT::i32), Const1, X);
+ else {
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+ }
+
+ if (Op.getValueType() == MVT::f32)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
+
+ SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Bitcast to integer node.
+ SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
+
+ // Clear MSB.
+ if (HasR2)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
+ DAG.getRegister(Mips::ZERO_64, MVT::i64),
+ DAG.getConstant(63, MVT::i32), Const1, X);
+ else {
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
+}
+
+SDValue
+MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
+ return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+
+ return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+}
+
+SDValue MipsTargetLowering::
+lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ // check the depth
+ assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+ "Frame address can only be determined for current frame.");
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
+ IsN64 ? Mips::FP_64 : Mips::FP, VT);
+ return FrameAddr;
+}
+
+SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ // check the depth
+ assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+ "Return address can be determined only for current frame.");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MVT VT = Op.getSimpleValueType();
+ unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA;
+ MFI->setReturnAddressIsTaken(true);
+
+ // Return RA, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT);
+}
+
+// An EH_RETURN is the result of lowering llvm.eh.return which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setCallsEhReturn();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+
+ // Store stack offset in V1, store jump target in V0. Glue CopyToReg and
+ // EH_RETURN nodes, so that instructions are emitted back-to-back.
+ unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0;
+ Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
+ Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
+ return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
+ DAG.getRegister(OffsetReg, Ty),
+ DAG.getRegister(AddrReg, getPointerTy()),
+ Chain.getValue(1));
+}
+
+// TODO: set SType according to the desired memory barrier behavior.
+SDValue
+MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
+ unsigned SType = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // FIXME: Need pseudo-fence for 'singlethread' fences
+ // FIXME: Set SType for weaker fences where supported/appropriate.
+ unsigned SType = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+
+ // if shamt < 32:
+ // lo = (shl lo, shamt)
+ // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt))
+ // else:
+ // lo = 0
+ // hi = (shl lo, shamt[4:0])
+ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
+ DAG.getConstant(-1, MVT::i32));
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo,
+ DAG.getConstant(1, MVT::i32));
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo,
+ Not);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt);
+ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
+ DAG.getConstant(0x20, MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
+ DAG.getConstant(0, MVT::i32), ShiftLeftLo);
+ Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
+
+ SDValue Ops[2] = {Lo, Hi};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+ bool IsSRA) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+
+ // if shamt < 32:
+ // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt))
+ // if isSRA:
+ // hi = (sra hi, shamt)
+ // else:
+ // hi = (srl hi, shamt)
+ // else:
+ // if isSRA:
+ // lo = (sra hi, shamt[4:0])
+ // hi = (sra hi, 31)
+ // else:
+ // lo = (srl hi, shamt[4:0])
+ // hi = 0
+ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
+ DAG.getConstant(-1, MVT::i32));
+ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi,
+ DAG.getConstant(1, MVT::i32));
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not);
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32,
+ Hi, Shamt);
+ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
+ DAG.getConstant(0x20, MVT::i32));
+ SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi,
+ DAG.getConstant(31, MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or);
+ Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
+ IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32),
+ ShiftRightHi);
+
+ SDValue Ops[2] = {Lo, Hi};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+ SDValue Chain, SDValue Src, unsigned Offset) {
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
+ EVT BasePtrVT = Ptr.getValueType();
+ DebugLoc DL = LD->getDebugLoc();
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+
+ if (Offset)
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
+ DAG.getConstant(Offset, BasePtrVT));
+
+ SDValue Ops[] = { Chain, Ptr, Src };
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ LD->getMemOperand());
+}
+
+// Expand an unaligned 32 or 64-bit integer load node.
+SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT MemVT = LD->getMemoryVT();
+
+ // Return if load is aligned or if MemVT is neither i32 nor i64.
+ if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
+ ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
+ return SDValue();
+
+ bool IsLittle = Subtarget->isLittle();
+ EVT VT = Op.getValueType();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT);
+
+ assert((VT == MVT::i32) || (VT == MVT::i64));
+
+ // Expand
+ // (set dst, (i64 (load baseptr)))
+ // to
+ // (set tmp, (ldl (add baseptr, 7), undef))
+ // (set dst, (ldr baseptr, tmp))
+ if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
+ SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+ IsLittle ? 7 : 0);
+ return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+ IsLittle ? 0 : 7);
+ }
+
+ SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+ IsLittle ? 3 : 0);
+ SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+ IsLittle ? 0 : 3);
+
+ // Expand
+ // (set dst, (i32 (load baseptr))) or
+ // (set dst, (i64 (sextload baseptr))) or
+ // (set dst, (i64 (extload baseptr)))
+ // to
+ // (set tmp, (lwl (add baseptr, 3), undef))
+ // (set dst, (lwr baseptr, tmp))
+ if ((VT == MVT::i32) || (ExtType == ISD::SEXTLOAD) ||
+ (ExtType == ISD::EXTLOAD))
+ return LWR;
+
+ assert((VT == MVT::i64) && (ExtType == ISD::ZEXTLOAD));
+
+ // Expand
+ // (set dst, (i64 (zextload baseptr)))
+ // to
+ // (set tmp0, (lwl (add baseptr, 3), undef))
+ // (set tmp1, (lwr baseptr, tmp0))
+ // (set tmp2, (shl tmp1, 32))
+ // (set dst, (srl tmp2, 32))
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue Const32 = DAG.getConstant(32, MVT::i32);
+ SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32);
+ SDValue Ops[] = { SRL, LWR.getValue(1) };
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+ SDValue Chain, unsigned Offset) {
+ SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
+ EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
+ DebugLoc DL = SD->getDebugLoc();
+ SDVTList VTList = DAG.getVTList(MVT::Other);
+
+ if (Offset)
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
+ DAG.getConstant(Offset, BasePtrVT));
+
+ SDValue Ops[] = { Chain, Value, Ptr };
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ SD->getMemOperand());
+}
+
+// Expand an unaligned 32 or 64-bit integer store node.
+SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *SD = cast<StoreSDNode>(Op);
+ EVT MemVT = SD->getMemoryVT();
+
+ // Return if store is aligned or if MemVT is neither i32 nor i64.
+ if ((SD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
+ ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
+ return SDValue();
+
+ bool IsLittle = Subtarget->isLittle();
+ SDValue Value = SD->getValue(), Chain = SD->getChain();
+ EVT VT = Value.getValueType();
+
+ // Expand
+ // (store val, baseptr) or
+ // (truncstore val, baseptr)
+ // to
+ // (swl val, (add baseptr, 3))
+ // (swr val, baseptr)
+ if ((VT == MVT::i32) || SD->isTruncatingStore()) {
+ SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+ IsLittle ? 3 : 0);
+ return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+ }
+
+ assert(VT == MVT::i64);
+
+ // Expand
+ // (store val, baseptr)
+ // to
+ // (sdl val, (add baseptr, 7))
+ // (sdr val, baseptr)
+ SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+ return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
+}
+
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+ DebugLoc DL = Op.getDebugLoc();
+ bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+ SmallVector<SDValue, 3> Ops;
+ unsigned OpNo = 0;
+
+ // See if Op has a chain input.
+ if (HasChainIn)
+ Ops.push_back(Op->getOperand(OpNo++));
+
+ // The next operand is the intrinsic opcode.
+ assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
+
+ // See if the next operand has type i64.
+ SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+ if (Opnd.getValueType() == MVT::i64)
+ In64 = initAccumulator(Opnd, DL, DAG);
+ else
+ Ops.push_back(Opnd);
+
+ // Push the remaining operands.
+ for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+ Ops.push_back(Op->getOperand(OpNo));
+
+ // Add In64 to the end of the list.
+ if (In64.getNode())
+ Ops.push_back(In64);
+
+ // Scan output.
+ SmallVector<EVT, 2> ResTys;
+
+ for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+ I != E; ++I)
+ ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+ // Create node.
+ SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+ SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
+
+ if (!HasChainIn)
+ return Out;
+
+ assert(Val->getValueType(1) == MVT::Other);
+ SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_shilo:
+ return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
+ case Intrinsic::mips_dpau_h_qbl:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
+ case Intrinsic::mips_dpau_h_qbr:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
+ case Intrinsic::mips_dpsu_h_qbl:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
+ case Intrinsic::mips_dpsu_h_qbr:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
+ case Intrinsic::mips_dpa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
+ case Intrinsic::mips_dps_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
+ case Intrinsic::mips_dpax_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
+ case Intrinsic::mips_dpsx_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
+ case Intrinsic::mips_mulsa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
+ case Intrinsic::mips_mult:
+ return lowerDSPIntr(Op, DAG, MipsISD::Mult);
+ case Intrinsic::mips_multu:
+ return lowerDSPIntr(Op, DAG, MipsISD::Multu);
+ case Intrinsic::mips_madd:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
+ case Intrinsic::mips_maddu:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
+ case Intrinsic::mips_msub:
+ return lowerDSPIntr(Op, DAG, MipsISD::MSub);
+ case Intrinsic::mips_msubu:
+ return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+ }
+}
+
+SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_extp:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
+ case Intrinsic::mips_extpdp:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
+ case Intrinsic::mips_extr_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
+ case Intrinsic::mips_extr_r_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
+ case Intrinsic::mips_extr_rs_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
+ case Intrinsic::mips_extr_s_h:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
+ case Intrinsic::mips_mthlip:
+ return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
+ case Intrinsic::mips_mulsaq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
+ case Intrinsic::mips_maq_s_w_phl:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
+ case Intrinsic::mips_maq_s_w_phr:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
+ case Intrinsic::mips_maq_sa_w_phl:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
+ case Intrinsic::mips_maq_sa_w_phr:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
+ case Intrinsic::mips_dpaq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
+ case Intrinsic::mips_dpsq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
+ case Intrinsic::mips_dpaq_sa_l_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
+ case Intrinsic::mips_dpsq_sa_l_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
+ case Intrinsic::mips_dpaqx_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
+ case Intrinsic::mips_dpaqx_sa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
+ case Intrinsic::mips_dpsqx_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
+ case Intrinsic::mips_dpsqx_sa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+ }
+}
+
+SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR
+ || cast<ConstantSDNode>
+ (Op->getOperand(0).getOperand(0))->getZExtValue() != 0
+ || Op->getOperand(1).getOpcode() != ISD::FRAME_TO_ARGS_OFFSET)
+ return SDValue();
+
+ // The pattern
+ // (add (frameaddr 0), (frame_to_args_offset))
+ // results from lowering llvm.eh.dwarf.cfa intrinsic. Transform it to
+ // (add FrameObject, 0)
+ // where FrameObject is a fixed StackObject with offset 0 which points to
+ // the old stack pointer.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ EVT ValTy = Op->getValueType(0);
+ int FI = MFI->CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false);
+ SDValue InArgsAddr = DAG.getFrameIndex(FI, ValTy);
+ return DAG.getNode(ISD::ADD, Op->getDebugLoc(), ValTy, InArgsAddr,
+ DAG.getConstant(0, ValTy));
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// go to stack.
+//
+// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+ static const uint16_t IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const uint16_t F32Regs[] = {
+ Mips::F12, Mips::F14
+ };
+ static const uint16_t F64Regs[] = {
+ Mips::D6, Mips::D7
+ };
+
+ // Do not process byval args here.
+ if (ArgFlags.isByVal())
+ return true;
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ unsigned Reg;
+
+ // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
+ // is true: function is vararg, argument is 3rd or higher, there is previous
+ // argument which is not f32 or f64.
+ bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1
+ || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo;
+ unsigned OrigAlign = ArgFlags.getOrigAlign();
+ bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+
+ if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ // If this is the first part of an i64 arg,
+ // the allocated register must be either A0 or A2.
+ if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3))
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
+ // Allocate int register and shadow next int register. If first
+ // available register is Mips::A1 or Mips::A3, shadow it too.
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg == Mips::A1 || Reg == Mips::A3)
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
+ // we are guaranteed to find an available float register
+ if (ValVT == MVT::f32) {
+ Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+ // Shadow int register
+ State.AllocateReg(IntRegs, IntRegsSize);
+ } else {
+ Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+ // Shadow int registers
+ unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
+ State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ }
+ } else
+ llvm_unreachable("Cannot handle this ValVT.");
+
+ if (!Reg) {
+ unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() >> 3,
+ OrigAlign);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ } else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return false;
+}
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static const unsigned O32IntRegsSize = 4;
+
+// Return next O32 integer argument register.
+static unsigned getNextIntArgReg(unsigned Reg) {
+ assert((Reg == Mips::A0) || (Reg == Mips::A2));
+ return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
+}
+
+SDValue
+MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
+ SDValue Chain, SDValue Arg, DebugLoc DL,
+ bool IsTailCall, SelectionDAG &DAG) const {
+ if (!IsTailCall) {
+ SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
+ DAG.getIntPtrConstant(Offset));
+ return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false,
+ false, 0);
+ }
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
+ /*isVolatile=*/ true, false, 0);
+}
+
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+}
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &DL = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+
+ MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
+ getTargetMachine().Options.UseSoftFloat,
+ Callee.getNode(), CLI.Args);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NextStackOffset = CCInfo.getNextStackOffset();
+
+ // Check if it's really possible to do a tail call.
+ if (IsTailCall)
+ IsTailCall =
+ isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
+ *MF.getInfo<MipsFunctionInfo>());
+
+ if (IsTailCall)
+ ++NumTailCalls;
+
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL,
+ IsN64 ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
+
+ // With EABI is it possible to have 16 args on registers.
+ std::deque< std::pair<unsigned, SDValue> > RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue Arg = OutVals[i];
+ CCValAssign &VA = ArgLocs[i];
+ MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // ByVal Arg.
+ if (Flags.isByVal()) {
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ assert(ByValArg != MipsCCInfo.byval_end());
+ assert(!IsTailCall &&
+ "Do not tail-call optimize if there is a byval argument.");
+ passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
+ MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle());
+ ++ByValArg;
+ continue;
+ }
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ if (VA.isRegLoc()) {
+ if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
+ (ValVT == MVT::f64 && LocVT == MVT::i64) ||
+ (ValVT == MVT::i64 && LocVT == MVT::f64))
+ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
+ else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Arg, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Arg, DAG.getConstant(1, MVT::i32));
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+ unsigned LocRegLo = VA.getLocReg();
+ unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
+ RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
+ RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
+ continue;
+ }
+ }
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(),
+ Chain, Arg, DL, IsTailCall, DAG));
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
+ bool GlobalOrExternal = false, InternalLinkage = false;
+ SDValue CalleeLo;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (IsPICCall) {
+ InternalLinkage = G->getGlobal()->hasInternalLinkage();
+
+ if (InternalLinkage)
+ Callee = getAddrLocal(Callee, DAG, HasMips64);
+ else if (LargeGOT)
+ Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16);
+ else
+ Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ } else
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
+ MipsII::MO_NO_FLAG);
+ GlobalOrExternal = true;
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (!IsN64 && !IsPIC) // !N64 && static
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ MipsII::MO_NO_FLAG);
+ else if (LargeGOT)
+ Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16);
+ else // N64 || PIC
+ Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+
+ GlobalOrExternal = true;
+ }
+
+ SmallVector<SDValue, 8> Ops(1, Chain);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
+
+ if (IsTailCall)
+ return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
+
+ Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
+ SDValue InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
+ Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode,
+ const Type *RetTy) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+
+ MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+ CallNode, RetTy);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
+ RVLocs[i].getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+/// LowerFormalArguments - transform physical registers into virtual registers
+/// and generate load operations for arguments places on the stack.
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
+
+ MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
+ MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
+ MipsCCInfo.hasByValArg());
+
+ unsigned CurArgIdx = 0;
+ MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[i].OrigArgIndex;
+ EVT ValVT = VA.getValVT();
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool IsRegLoc = VA.isRegLoc();
+
+ if (Flags.isByVal()) {
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ assert(ByValArg != MipsCCInfo.byval_end());
+ copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
+ MipsCCInfo, *ByValArg);
+ ++ByValArg;
+ continue;
+ }
+
+ // Arguments stored on registers
+ if (IsRegLoc) {
+ EVT RegVT = VA.getLocVT();
+ unsigned ArgReg = VA.getLocReg();
+ const TargetRegisterClass *RC;
+
+ if (RegVT == MVT::i32)
+ RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
+ &Mips::CPURegsRegClass;
+ else if (RegVT == MVT::i64)
+ RC = &Mips::CPU64RegsRegClass;
+ else if (RegVT == MVT::f32)
+ RC = &Mips::FGR32RegClass;
+ else if (RegVT == MVT::f64)
+ RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+ else
+ llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
+ DAG.getValueType(ValVT));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
+ }
+
+ // Handle floating point arguments passed in integer registers and
+ // long double arguments passed in floating point registers.
+ if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
+ (RegVT == MVT::i64 && ValVT == MVT::f64) ||
+ (RegVT == MVT::f64 && ValVT == MVT::i64))
+ ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
+ else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
+ unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
+ getNextIntArgReg(ArgReg), RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
+ if (!Subtarget->isLittle())
+ std::swap(ArgValue, ArgValue2);
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64,
+ ArgValue, ArgValue2);
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0));
+ }
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().
+ createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
+ }
+
+ if (IsVarArg)
+ writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool
+MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_Mips);
+}
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
+ *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+
+ // Analyze return values.
+ MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+ MF.getFunction()->getReturnType());
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ SDValue Val = OutVals[i];
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into $v0.
+ if (MF.getFunction()->hasStructRetAttr()) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned Reg = MipsFI->getSRetReturnReg();
+
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0;
+
+ Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ // Return on Mips is always a "jr $ra"
+ return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // Mips specific constrainy
+ // GCC config/mips/constraints.md
+ //
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'c' : A register suitable for use in an indirect
+ // jump. This will always be $25 for -mabicalls.
+ // 'l' : The lo register. 1 word storage.
+ // 'x' : The hilo register pair. Double word storage.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ case 'c':
+ case 'l':
+ case 'x':
+ return C_RegisterClass;
+ case 'R':
+ return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'c': // $25 for indirect jumps
+ case 'l': // lo register
+ case 'x': // hilo register pair
+ if (type->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'I': // signed 16 bit immediate
+ case 'J': // integer zero
+ case 'K': // unsigned 16 bit immediate
+ case 'L': // signed 32 bit immediate where lower 16 bits are 0
+ case 'N': // immediate in the range of -65535 to -1 (inclusive)
+ case 'O': // signed 15 bit immediate (+- 16383)
+ case 'P': // immediate in the range of 65535 to 1 (inclusive)
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'R':
+ weight = CW_Memory;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+ case 'y': // Same as 'r'. Exists for compatibility.
+ case 'r':
+ if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ if (Subtarget->inMips16Mode())
+ return std::make_pair(0U, &Mips::CPU16RegsRegClass);
+ return std::make_pair(0U, &Mips::CPURegsRegClass);
+ }
+ if (VT == MVT::i64 && !HasMips64)
+ return std::make_pair(0U, &Mips::CPURegsRegClass);
+ if (VT == MVT::i64 && HasMips64)
+ return std::make_pair(0U, &Mips::CPU64RegsRegClass);
+ // This will generate an error message
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, &Mips::FGR32RegClass);
+ if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
+ if (Subtarget->isFP64bit())
+ return std::make_pair(0U, &Mips::FGR64RegClass);
+ return std::make_pair(0U, &Mips::AFGR64RegClass);
+ }
+ break;
+ case 'c': // register suitable for indirect jump
+ if (VT == MVT::i32)
+ return std::make_pair((unsigned)Mips::T9, &Mips::CPURegsRegClass);
+ assert(VT == MVT::i64 && "Unexpected type.");
+ return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass);
+ case 'l': // register suitable for indirect jump
+ if (VT == MVT::i32)
+ return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass);
+ return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass);
+ case 'x': // register suitable for indirect jump
+ // Fixme: Not triggering the use of both hi and low
+ // This will generate an error message
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break; // This will fall through to the generic implementation
+ case 'I': // Signed 16 bit constant
+ // If this fails, the parent routine will give an error
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if (isInt<16>(Val)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'J': // integer zero
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getZExtValue();
+ if (Val == 0) {
+ Result = DAG.getTargetConstant(0, Type);
+ break;
+ }
+ }
+ return;
+ case 'K': // unsigned 16 bit immediate
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ uint64_t Val = (uint64_t)C->getZExtValue();
+ if (isUInt<16>(Val)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'L': // signed 32 bit immediate where lower 16 bits are 0
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'N': // immediate in the range of -65535 to -1 (inclusive)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((Val >= -65535) && (Val <= -1)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'O': // signed 15 bit immediate
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((isInt<15>(Val))) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'P': // immediate in the range of 1 to 65535 (inclusive)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((Val <= 65535) && (Val >= 1)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (!AM.HasBaseReg) // allow "r+i".
+ break;
+ return false; // disallow "r+r" or "r+r+i".
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Mips target isn't yet aware of offsets.
+ return false;
+}
+
+EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (Subtarget->hasMips64())
+ return MVT::i64;
+
+ return MVT::i32;
+}
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ if (Imm.isNegZero())
+ return false;
+ return Imm.isZero();
+}
+
+unsigned MipsTargetLowering::getJumpTableEncoding() const {
+ if (IsN64)
+ return MachineJumpTableInfo::EK_GPRel64BlockAddress;
+
+ return TargetLowering::getJumpTableEncoding();
+}
+
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+ const char *const LibCalls[] =
+ {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
+ "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
+ "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
+ "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
+ "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
+ "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+ "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
+ "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
+ "truncl"};
+
+ const char * const *End = LibCalls + array_lengthof(LibCalls);
+
+ // Check that LibCalls is sorted alphabetically.
+ MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+ for (const char * const *I = LibCalls; I < End - 1; ++I)
+ assert(Comp(*I, *(I + 1)));
+#endif
+
+ return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128 or i128 which was originally a
+/// fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+ if (Ty->isFP128Ty())
+ return true;
+
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+ // If the Ty is i128 and the function being called is a long double emulation
+ // routine, then the original type is f128.
+ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
+ CCState &Info)
+ : CCInfo(Info), CallConv(CC), IsO32(IsO32_) {
+ // Pre-allocate reserved argument area.
+ CCInfo.AllocateStack(reservedArgArea(), 1);
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
+ bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs) {
+ assert((CallConv != CallingConv::Fast || !IsVarArg) &&
+ "CallingConv::Fast shouldn't be used for vararg functions.");
+
+ unsigned NumOpnds = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
+
+ for (unsigned I = 0; I != NumOpnds; ++I) {
+ MVT ArgVT = Args[I].VT;
+ ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
+ bool R;
+
+ if (ArgFlags.isByVal()) {
+ handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
+ continue;
+ }
+
+ if (IsVarArg && !Args[I].IsFixed)
+ R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ else {
+ MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
+ IsSoftFloat);
+ R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
+ }
+
+ if (R) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << I << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
+ bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
+ unsigned NumArgs = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn();
+ unsigned CurArgIdx = 0;
+
+ for (unsigned I = 0; I != NumArgs; ++I) {
+ MVT ArgVT = Args[I].VT;
+ ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
+ std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Args[I].OrigArgIndex;
+
+ if (ArgFlags.isByVal()) {
+ handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
+ continue;
+ }
+
+ MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat);
+
+ if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
+ continue;
+
+#ifndef NDEBUG
+ dbgs() << "Formal Arg #" << I << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+}
+
+template<typename Ty>
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ CCAssignFn *Fn;
+
+ if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
+ Fn = RetCC_F128Soft;
+ else
+ Fn = RetCC_Mips;
+
+ for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
+ MVT VT = RetVals[I].VT;
+ ISD::ArgFlagsTy Flags = RetVals[I].Flags;
+ MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
+
+ if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << I << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
+ const Type *RetTy) const {
+ analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
+}
+
+void
+MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
+ MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags) {
+ assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
+
+ struct ByValArgInfo ByVal;
+ unsigned RegSize = regSize();
+ unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
+ unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
+ RegSize * 2);
+
+ if (useRegsForByval())
+ allocateRegs(ByVal, ByValSize, Align);
+
+ // Allocate space on caller's stack.
+ ByVal.Address = CCInfo.AllocateStack(ByValSize - RegSize * ByVal.NumRegs,
+ Align);
+ CCInfo.addLoc(CCValAssign::getMem(ValNo, ValVT, ByVal.Address, LocVT,
+ LocInfo));
+ ByValArgs.push_back(ByVal);
+}
+
+unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
+ return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
+}
+
+unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
+ return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const {
+ return IsO32 ? O32IntRegs : Mips64IntRegs;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
+ if (CallConv == CallingConv::Fast)
+ return CC_Mips_FastCC;
+
+ return IsO32 ? CC_MipsO32 : CC_MipsN;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
+ return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
+ return IsO32 ? O32IntRegs : Mips64DPRegs;
+}
+
+void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
+ unsigned ByValSize,
+ unsigned Align) {
+ unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
+ const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
+ assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
+ "Byval argument's size and alignment should be a multiple of"
+ "RegSize.");
+
+ ByVal.FirstIdx = CCInfo.getFirstUnallocated(IntArgRegs, NumIntArgRegs);
+
+ // If Align > RegSize, the first arg register must be even.
+ if ((Align > RegSize) && (ByVal.FirstIdx % 2)) {
+ CCInfo.AllocateReg(IntArgRegs[ByVal.FirstIdx], ShadowRegs[ByVal.FirstIdx]);
+ ++ByVal.FirstIdx;
+ }
+
+ // Mark the registers allocated.
+ for (unsigned I = ByVal.FirstIdx; ByValSize && (I < NumIntArgRegs);
+ ByValSize -= RegSize, ++I, ++ByVal.NumRegs)
+ CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
+}
+
+MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
+ const SDNode *CallNode,
+ bool IsSoftFloat) const {
+ if (IsSoftFloat || IsO32)
+ return VT;
+
+ // Check if the original type was fp128.
+ if (originalTypeIsF128(OrigTy, CallNode)) {
+ assert(VT == MVT::i64);
+ return MVT::f64;
+ }
+
+ return VT;
+}
+
+void MipsTargetLowering::
+copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
+ SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
+ SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg,
+ const MipsCC &CC, const ByValArgInfo &ByVal) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned RegAreaSize = ByVal.NumRegs * CC.regSize();
+ unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize);
+ int FrameObjOffset;
+
+ if (RegAreaSize)
+ FrameObjOffset = (int)CC.reservedArgArea() -
+ (int)((CC.numIntArgRegs() - ByVal.FirstIdx) * CC.regSize());
+ else
+ FrameObjOffset = ByVal.Address;
+
+ // Create frame object.
+ EVT PtrTy = getPointerTy();
+ int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+ InVals.push_back(FIN);
+
+ if (!ByVal.NumRegs)
+ return;
+
+ // Copy arg registers.
+ MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+ const TargetRegisterClass *RC = getRegClassFor(RegTy);
+
+ for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
+ unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
+ unsigned VReg = addLiveIn(MF, ArgReg, RC);
+ unsigned Offset = I * CC.regSize();
+ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy),
+ StorePtr, MachinePointerInfo(FuncArg, Offset),
+ false, false, 0);
+ OutChains.push_back(Store);
+ }
+}
+
+// Copy byVal arg to registers and stack.
+void MipsTargetLowering::
+passByValArg(SDValue Chain, DebugLoc DL,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+ const MipsCC &CC, const ByValArgInfo &ByVal,
+ const ISD::ArgFlagsTy &Flags, bool isLittle) const {
+ unsigned ByValSize = Flags.getByValSize();
+ unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
+ unsigned RegSize = CC.regSize();
+ unsigned Alignment = std::min(Flags.getByValAlign(), RegSize);
+ EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8);
+
+ if (ByVal.NumRegs) {
+ const uint16_t *ArgRegs = CC.intArgRegs();
+ bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize);
+ unsigned I = 0;
+
+ // Copy words to registers.
+ for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) {
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
+ MachinePointerInfo(), false, false, false,
+ Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+ unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
+ }
+
+ // Return if the struct has been fully copied.
+ if (ByValSize == Offset)
+ return;
+
+ // Copy the remainder of the byval argument with sub-word loads and shifts.
+ if (LeftoverBytes) {
+ assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) &&
+ "Size of the remainder should be smaller than RegSize.");
+ SDValue Val;
+
+ for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0;
+ Offset < ByValSize; LoadSize /= 2) {
+ unsigned RemSize = ByValSize - Offset;
+
+ if (RemSize < LoadSize)
+ continue;
+
+ // Load subword.
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue LoadVal =
+ DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr,
+ MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
+ false, false, Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+
+ // Shift the loaded value.
+ unsigned Shamt;
+
+ if (isLittle)
+ Shamt = TotalSizeLoaded;
+ else
+ Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8;
+
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
+ DAG.getConstant(Shamt, MVT::i32));
+
+ if (Val.getNode())
+ Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift);
+ else
+ Val = Shift;
+
+ Offset += LoadSize;
+ TotalSizeLoaded += LoadSize;
+ Alignment = std::min(Alignment, LoadSize);
+ }
+
+ unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ RegsToPass.push_back(std::make_pair(ArgReg, Val));
+ return;
+ }
+ }
+
+ // Copy remainder of byval arg to it with memcpy.
+ unsigned MemCpySize = ByValSize - Offset;
+ SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
+ DAG.getIntPtrConstant(ByVal.Address));
+ Chain = DAG.getMemcpy(Chain, DL, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
+}
+
+void
+MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
+ const MipsCC &CC, SDValue Chain,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ unsigned NumRegs = CC.numIntArgRegs();
+ const uint16_t *ArgRegs = CC.intArgRegs();
+ const CCState &CCInfo = CC.getCCInfo();
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
+ unsigned RegSize = CC.regSize();
+ MVT RegTy = MVT::getIntegerVT(RegSize * 8);
+ const TargetRegisterClass *RC = getRegClassFor(RegTy);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // Offset of the first variable argument from stack pointer.
+ int VaArgOffset;
+
+ if (NumRegs == Idx)
+ VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize);
+ else
+ VaArgOffset =
+ (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ int FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ MipsFI->setVarArgsFrameIndex(FI);
+
+ // Copy the integer registers that have not been used for argument passing
+ // to the argument register save area. For O32, the save area is allocated
+ // in the caller's stack frame, while for N32/64, it is allocated in the
+ // callee's stack frame.
+ for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
+ unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
+ FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(0);
+ OutChains.push_back(Store);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 000000000000..cab71a61e07a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,465 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetLowering.h"
+#include <deque>
+#include <string>
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Tail call
+ TailCall,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Thread Pointer
+ ThreadPointer,
+
+ // Floating Point Branch Conditional
+ FPBrcond,
+
+ // Floating Point Compare
+ FPCmp,
+
+ // Floating Point Conditional Moves
+ CMovFP_T,
+ CMovFP_F,
+
+ // Floating Point Rounding
+ FPRound,
+
+ // Return
+ Ret,
+
+ EH_RETURN,
+
+ // Node used to extract integer from accumulator.
+ ExtractLOHI,
+
+ // Node used to insert integers to accumulator.
+ InsertLOHI,
+
+ // Mult nodes.
+ Mult,
+ Multu,
+
+ // MAdd/Sub nodes
+ MAdd,
+ MAddu,
+ MSub,
+ MSubu,
+
+ // DivRem(u)
+ DivRem,
+ DivRemU,
+ DivRem16,
+ DivRemU16,
+
+ BuildPairF64,
+ ExtractElementF64,
+
+ Wrapper,
+
+ DynAlloc,
+
+ Sync,
+
+ Ext,
+ Ins,
+
+ // EXTR.W instrinsic nodes.
+ EXTP,
+ EXTPDP,
+ EXTR_S_H,
+ EXTR_W,
+ EXTR_R_W,
+ EXTR_RS_W,
+ SHILO,
+ MTHLIP,
+
+ // DPA.W intrinsic nodes.
+ MULSAQ_S_W_PH,
+ MAQ_S_W_PHL,
+ MAQ_S_W_PHR,
+ MAQ_SA_W_PHL,
+ MAQ_SA_W_PHR,
+ DPAU_H_QBL,
+ DPAU_H_QBR,
+ DPSU_H_QBL,
+ DPSU_H_QBR,
+ DPAQ_S_W_PH,
+ DPSQ_S_W_PH,
+ DPAQ_SA_L_W,
+ DPSQ_SA_L_W,
+ DPA_W_PH,
+ DPS_W_PH,
+ DPAQX_S_W_PH,
+ DPAQX_SA_W_PH,
+ DPAX_W_PH,
+ DPSX_W_PH,
+ DPSQX_S_W_PH,
+ DPSQX_SA_W_PH,
+ MULSA_W_PH,
+
+ MULT,
+ MULTU,
+ MADD_DSP,
+ MADDU_DSP,
+ MSUB_DSP,
+ MSUBU_DSP,
+
+ // Load/Store Left/Right nodes.
+ LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LWR,
+ SWL,
+ SWR,
+ LDL,
+ LDR,
+ SDL,
+ SDR
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class MipsFunctionInfo;
+
+ class MipsTargetLowering : public TargetLowering {
+ public:
+ explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
+
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ virtual void LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ EVT getSetCCResultType(EVT VT) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
+
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
+
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
+
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
+
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ /// ByValArgInfo - Byval argument information.
+ struct ByValArgInfo {
+ unsigned FirstIdx; // Index of the first register used.
+ unsigned NumRegs; // Number of registers used for this argument.
+ unsigned Address; // Offset of the stack area used to pass this argument.
+
+ ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {}
+ };
+
+ /// MipsCC - This class provides methods used to analyze formal and call
+ /// arguments and inquire about calling convention information.
+ class MipsCC {
+ public:
+ MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info);
+
+ void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsVarArg, bool IsSoftFloat,
+ const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs);
+ void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat,
+ Function::const_arg_iterator FuncArg);
+
+ void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat, const SDNode *CallNode,
+ const Type *RetTy) const;
+
+ void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsSoftFloat, const Type *RetTy) const;
+
+ const CCState &getCCInfo() const { return CCInfo; }
+
+ /// hasByValArg - Returns true if function has byval arguments.
+ bool hasByValArg() const { return !ByValArgs.empty(); }
+
+ /// regSize - Size (in number of bits) of integer registers.
+ unsigned regSize() const { return IsO32 ? 4 : 8; }
+
+ /// numIntArgRegs - Number of integer registers available for calls.
+ unsigned numIntArgRegs() const;
+
+ /// reservedArgArea - The size of the area the caller reserves for
+ /// register arguments. This is 16-byte if ABI is O32.
+ unsigned reservedArgArea() const;
+
+ /// Return pointer to array of integer argument registers.
+ const uint16_t *intArgRegs() const;
+
+ typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator;
+ byval_iterator byval_begin() const { return ByValArgs.begin(); }
+ byval_iterator byval_end() const { return ByValArgs.end(); }
+
+ private:
+ void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags);
+
+ /// useRegsForByval - Returns true if the calling convention allows the
+ /// use of registers to pass byval arguments.
+ bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
+
+ /// Return the function that analyzes fixed argument list functions.
+ llvm::CCAssignFn *fixedArgFn() const;
+
+ /// Return the function that analyzes variable argument list functions.
+ llvm::CCAssignFn *varArgFn() const;
+
+ const uint16_t *shadowRegs() const;
+
+ void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
+ unsigned Align);
+
+ /// Return the type of the register which is used to pass an argument or
+ /// return a value. This function returns f64 if the argument is an i64
+ /// value which has been generated as a result of softening an f128 value.
+ /// Otherwise, it just returns VT.
+ MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
+ bool IsSoftFloat) const;
+
+ template<typename Ty>
+ void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const;
+
+ CCState &CCInfo;
+ CallingConv::ID CallConv;
+ bool IsO32;
+ SmallVector<ByValArgInfo, 2> ByValArgs;
+ };
+
+ // Subtarget Info
+ const MipsSubtarget *Subtarget;
+
+ bool HasMips64, IsN64, IsO32;
+
+ private:
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode, const Type *RetTy) const;
+
+ // Lower Operand specifics
+ SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+ bool IsSRA) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
+
+ /// isEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization.
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
+
+ /// copyByValArg - Copy argument registers which were used to pass a byval
+ /// argument to the stack. Create a stack frame object for the byval
+ /// argument.
+ void copyByValRegs(SDValue Chain, DebugLoc DL,
+ std::vector<SDValue> &OutChains, SelectionDAG &DAG,
+ const ISD::ArgFlagsTy &Flags,
+ SmallVectorImpl<SDValue> &InVals,
+ const Argument *FuncArg,
+ const MipsCC &CC, const ByValArgInfo &ByVal) const;
+
+ /// passByValArg - Pass a byval argument in registers or on stack.
+ void passByValArg(SDValue Chain, DebugLoc DL,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+ const MipsCC &CC, const ByValArgInfo &ByVal,
+ const ISD::ArgFlagsTy &Flags, bool isLittle) const;
+
+ /// writeVarArgRegs - Write variable function arguments passed in registers
+ /// to the stack. Also create a stack frame object for the first variable
+ /// argument.
+ void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
+ SDValue Chain, DebugLoc DL, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain,
+ SDValue Arg, DebugLoc DL, bool IsTailCall,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ virtual unsigned getJumpTableEncoding() const;
+
+ MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode, bool Nand = false) const;
+ MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+ bool Nand = false) const;
+ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ };
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 000000000000..6b23057c9cdb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,529 @@
+//===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Mips FPU instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+// - 32 64-bit registers (default mode)
+// - 16 even 32-bit registers (32-bit compatible mode) for
+// single and double access.
+// * 32bit fp:
+// - 16 even 32-bit registers - single and double (aliased)
+// - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+ SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, f64>,
+ SDTCisVT<2, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+ SDT_MipsExtractElementF64>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
+ def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">,
+ AssemblerPredicate<"FeatureFP64Bit">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">,
+ AssemblerPredicate<"!FeatureFP64Bit">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"FeatureSingleFloat">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"!FeatureSingleFloat">;
+
+// FP immediate patterns.
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+// single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+class ADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> {
+ let isCommutable = IsComm;
+}
+
+multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode = null_frag> {
+ def _D32 : ADDS_FT<opstr, AFGR64, Itin, IsComm, OpNode>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ADDS_FT<opstr, FGR64, Itin, IsComm, OpNode>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ string DecoderNamespace = "Mips64";
+ }
+}
+
+class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>,
+ NeverHasSideEffects;
+
+multiclass ABSS_M<string opstr, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> {
+ def _D32 : ABSS_FT<opstr, AFGR64, AFGR64, Itin, OpNode>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ABSS_FT<opstr, FGR64, FGR64, Itin, OpNode>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ string DecoderNamespace = "Mips64";
+ }
+}
+
+multiclass ROUND_M<string opstr, InstrItinClass Itin> {
+ def _D32 : ABSS_FT<opstr, FGR32, AFGR64, Itin>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ABSS_FT<opstr, FGR32, FGR64, Itin>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ }
+}
+
+class MFC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>;
+
+class NMADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
+ Itin, FrmFR>;
+
+class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fd, ${index}(${base})"),
+ [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
+
+class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fs, ${index}(${base})"),
+ [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
+
+class BC1F_FT<string opstr, InstrItinClass Itin,
+ SDPatternOperator Op = null_frag> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(MipsFPBrcond Op, bb:$offset)], Itin, FrmFI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+ let Uses = [FCR31];
+}
+
+class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond),
+ !strconcat("c.$cond.", typestr, "\t$fs, $ft"),
+ [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> {
+ let Defs = [FCR31];
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>;
+def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>;
+def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>;
+def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>;
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>;
+
+defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
+defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>;
+defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>;
+defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
+defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>;
+ def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x8, 17>;
+ def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>;
+ def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x9, 17>;
+ def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>;
+ def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>;
+ def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>;
+ def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0xb, 17>;
+}
+
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>;
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>;
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>;
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+}
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>;
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>;
+}
+
+let Predicates = [NoNaNsFPMath, HasStdEnc] in {
+ def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>;
+ def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>;
+ defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
+ defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
+}
+
+def FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>,
+ ABSS_FM<0x4, 16>;
+defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+
+/// Move Control Registers From/To CPU Registers
+def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>;
+def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>;
+def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>;
+def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>;
+def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>;
+def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>;
+
+def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>;
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+}
+
+/// Floating Point Memory Instructions
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>;
+ def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>;
+ def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> {
+ let isCodeGenOnly =1;
+ }
+ def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> {
+ let isCodeGenOnly =1;
+ }
+}
+
+let Predicates = [NotN64, HasStdEnc] in {
+ def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>;
+ def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>;
+}
+
+let Predicates = [NotN64, HasMips64, HasStdEnc],
+ DecoderNamespace = "Mips64" in {
+ def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>;
+}
+
+let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+ def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>;
+}
+
+// Indexed loads and stores.
+let Predicates = [HasFPIdx, HasStdEnc] in {
+ def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
+ def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
+}
+
+let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
+}
+
+let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
+}
+
+// n64
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
+ def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
+ def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
+ LWXC1_FM<1>;
+ def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>,
+ SWXC1_FM<8>;
+ def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>,
+ SWXC1_FM<9>;
+}
+
+// Load/store doubleword indexed unaligned.
+let Predicates = [NotMips64, HasStdEnc] in {
+ def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
+}
+
+let Predicates = [HasMips64, HasStdEnc],
+ DecoderNamespace="Mips64" in {
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
+}
+
+/// Floating-point Aritmetic
+def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>;
+defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
+def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>;
+defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
+def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>;
+defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
+def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>;
+defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
+
+let Predicates = [HasMips32r2, HasStdEnc] in {
+ def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>;
+ def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>;
+}
+
+let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
+ def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>;
+ def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
+ def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
+ def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
+ def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
+ isCodeGenOnly=1 in {
+ def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
+
+let DecoderMethod = "DecodeBC1" in {
+def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>;
+def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>;
+}
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
+def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+let DecoderNamespace = "Mips64" in
+def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[IsFP64bit, HasStdEnc]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+ PseudoSE<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi),
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n),
+ [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>;
+def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
+
+def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D32_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
+ (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+ def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
+}
+
+let Predicates = [IsFP64bit, HasStdEnc] in {
+ def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
+ def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
+
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D64_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)),
+ (CVT_S_L (DMTC1 CPU64Regs:$src))>;
+ def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)),
+ (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
+
+ def : MipsPat<(i32 (fp_to_sint FGR64:$src)),
+ (MFC1 (TRUNC_W_D64 FGR64:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR64:$src)),
+ (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
+
+ def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
+}
+
+// Patterns for loads/stores with a reg+imm operand.
+let AddedComplexity = 40 in {
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : LoadRegImmPat<LWC1_P8, f32, load>;
+ def : StoreRegImmPat<SWC1_P8, f32>;
+ def : LoadRegImmPat<LDC164_P8, f64, load>;
+ def : StoreRegImmPat<SDC164_P8, f64>;
+ }
+
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : LoadRegImmPat<LWC1, f32, load>;
+ def : StoreRegImmPat<SWC1, f32>;
+ }
+
+ let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+ def : LoadRegImmPat<LDC164, f64, load>;
+ def : StoreRegImmPat<SDC164, f64>;
+ }
+
+ let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+ def : LoadRegImmPat<LDC1, f64, load>;
+ def : StoreRegImmPat<SDC1, f64>;
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 000000000000..ee432c875355
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,665 @@
+//===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<4> val> {
+ bits<4> Value = val;
+}
+
+def Pseudo : Format<0>;
+def FrmR : Format<1>;
+def FrmI : Format<2>;
+def FrmJ : Format<3>;
+def FrmFR : Format<4>;
+def FrmFI : Format<5>;
+def FrmOther : Format<6>; // Instruction w/ a custom format
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format f>: Instruction
+{
+ field bits<32> Inst;
+ Format Form = f;
+
+ let Namespace = "Mips";
+
+ let Size = 4;
+
+ bits<6> Opcode = 0;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{31-26} = Opcode;
+
+ let OutOperandList = outs;
+ let InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ //
+ // Attributes specific to Mips instructions...
+ //
+ bits<4> FormBits = Form.Value;
+
+ // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+ let TSFlags{3-0} = FormBits;
+
+ let DecoderNamespace = "Mips";
+
+ field bits<32> SoftFail = 0;
+}
+
+// Mips32/64 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format f>:
+ MipsInst<outs, ins, asmstr, pattern, itin, f> {
+ let Predicates = [HasStdEnc];
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo> :
+ MipsInst<outs, ins, "", pattern, itin, Pseudo> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// Mips32/64 Pseudo Instruction Format
+class PseudoSE<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
+ let Predicates = [HasStdEnc];
+}
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
+ MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
+ let isPseudo = 1;
+ let Pattern = [];
+}
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ InstSE<outs, ins, asmstr, pattern, itin, FrmR>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let Opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let Opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ InstSE<outs, ins, asmstr, pattern, itin, FrmI>
+{
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ let Opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op>
+{
+ bits<26> target;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-0} = target;
+}
+
+//===----------------------------------------------------------------------===//
+// MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|>
+//===----------------------------------------------------------------------===//
+class MFC3OP_FM<bits<6> op, bits<5> mfmt>
+{
+ bits<5> rt;
+ bits<5> rd;
+ bits<3> sel;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = mfmt;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-3} = 0;
+ let Inst{2-0} = sel;
+}
+
+class ADD_FM<bits<6> op, bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class ADDI_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class SRA_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> shamt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-22} = 0;
+ let Inst{21} = rotate;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+class SRLV_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-7} = 0;
+ let Inst{6} = rotate;
+ let Inst{5-0} = funct;
+}
+
+class BEQ_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+class BGEZ_FM<bits<6> op, bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class B_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 4;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = offset;
+}
+
+class SLTI_FM<bits<6> op> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class MFLO_FM<bits<6> funct> {
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class MTLO_FM<bits<6> funct> {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class SEB_FM<bits<5> funct, bits<6> funct2> {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = funct;
+ let Inst{5-0} = funct2;
+}
+
+class CLO_FM<bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1c;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+ let rt = rd;
+}
+
+class LUI_FM {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0xf;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class JALR_FM {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 9;
+}
+
+class BAL_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0x11;
+ let Inst{15-0} = offset;
+}
+
+class BGEZAL_FM<bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class SYNC_FM {
+ bits<5> stype;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{10-6} = stype;
+ let Inst{5-0} = 0xf;
+}
+
+class MULT_FM<bits<6> op, bits<6> funct> {
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class EXT_FM<bits<6> funct> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> pos;
+ bits<5> size;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class RDHWR_FM {
+ bits<5> rt;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 0x3b;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// FLOATING POINT INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// fs - src reg.
+// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// fd - dst reg, only used on 3 regs instr.
+// fmt - double or single precision.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
+{
+ bits<5> ft;
+ bits<5> base;
+ bits<16> imm16;
+
+ let Opcode = op;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = imm16;
+}
+
+class ADDS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class ABSS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class MFC1_FM<bits<5> funct> {
+ bits<5> rt;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = fs;
+ let Inst{10-0} = 0;
+}
+
+class LW_FM<bits<6> op> {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = addr{20-16};
+ let Inst{20-16} = rt;
+ let Inst{15-0} = addr{15-0};
+}
+
+class MADDS_FM<bits<3> funct, bits<3> fmt> {
+ bits<5> fd;
+ bits<5> fr;
+ bits<5> fs;
+ bits<5> ft;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
+ let Inst{25-21} = fr;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-3} = funct;
+ let Inst{2-0} = fmt;
+}
+
+class LWXC1_FM<bits<6> funct> {
+ bits<5> fd;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class SWXC1_FM<bits<6> funct> {
+ bits<5> fs;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class BC1F_FM<bit nd, bit tf> {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = 0x8;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = nd;
+ let Inst{16} = tf;
+ let Inst{15-0} = offset;
+}
+
+class CEQS_FM<bits<5> fmt> {
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cond;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-8} = 0; // cc
+ let Inst{7-4} = 0x3;
+ let Inst{3-0} = cond;
+}
+
+class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class CMov_F_I_FM<bit tf> {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class CMov_F_F_FM<bits<5> fmt, bit tf> {
+ bits<5> fd;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 0x11;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 000000000000..ad92d41209e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,284 @@
+//===-- MipsInstrInfo.cpp - Mips Instruction Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
+ : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ TM(tm), UncondBrOpc(UncondBr) {}
+
+const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16InstrInfo(TM);
+
+ return llvm::createMipsSEInstrInfo(TM);
+}
+
+bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
+ MFI.getObjectSize(FI), Align);
+}
+
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+ int NumOp = Inst->getNumExplicitOperands();
+
+ // for both int and fp branches, the last explicit operand is the
+ // MBB.
+ BB = Inst->getOperand(NumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Opc));
+
+ for (int i=0; i<NumOp-1; i++)
+ Cond.push_back(Inst->getOperand(i));
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ BranchType BT = AnalyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs);
+
+ return (BT == BT_None) || (BT == BT_Indirect);
+}
+
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond)
+ const {
+ unsigned Opc = Cond[0].getImm();
+ const MCInstrDesc &MCID = get(Opc);
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
+
+ for (unsigned i = 1; i < Cond.size(); ++i) {
+ if (Cond[i].isReg())
+ MIB.addReg(Cond[i].getReg());
+ else if (Cond[i].isImm())
+ MIB.addImm(Cond[i].getImm());
+ else
+ assert(true && "Cannot copy operand");
+ }
+ MIB.addMBB(TBB);
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ // # of condition operands:
+ // Unconditional branches: 0
+ // Floating point branches: 1 (opc)
+ // Int BranchZero: 2 (opc, reg)
+ // Int Branch: 3 (opc, reg0, reg1)
+ assert((Cond.size() <= 3) &&
+ "# of Mips branch conditions must be <= 3!");
+
+ // Two-way Conditional branch.
+ if (FBB) {
+ BuildCondBr(MBB, TBB, DL, Cond);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
+ return 2;
+ }
+
+ // One way branch.
+ // Unconditional branch.
+ if (Cond.empty())
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
+ else // Conditional branch.
+ BuildCondBr(MBB, TBB, DL, Cond);
+ return 1;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+ MachineBasicBlock::reverse_iterator FirstBr;
+ unsigned removed;
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ FirstBr = I;
+
+ // Up to 2 branches are removed.
+ // Note that indirect branches are not removed.
+ for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+ if (!GetAnalyzableBrOpc(I->getOpcode()))
+ break;
+
+ MBB.erase(I.base(), FirstBr.base());
+
+ return removed;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert( (Cond.size() && Cond.size() <= 3) &&
+ "Invalid Mips branch condition!");
+ Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
+ return false;
+}
+
+MipsInstrInfo::BranchType MipsInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const {
+
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // This block ends with no branches (it just falls through to its succ).
+ // Leave TBB/FBB null.
+ TBB = FBB = NULL;
+ return BT_NoBranch;
+ }
+
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ BranchInstrs.push_back(LastInst);
+
+ // Not an analyzable branch (e.g., indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+ return BT_None;
+ }
+
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
+ // Unconditional branch
+ if (LastOpc == UncondBrOpc) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return BT_Uncond;
+ }
+
+ // Conditional branch
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return BT_Cond;
+ }
+
+ // If we reached here, there are two branches.
+ // If there are three terminators, we don't know what sort of block this is.
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
+ return BT_None;
+
+ BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);
+
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == UncondBrOpc) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return BT_None;
+
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ BranchInstrs.pop_back();
+ return BT_Uncond;
+ }
+
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != UncondBrOpc)
+ return BT_None;
+
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+
+ return BT_CondUncond;
+}
+
+/// Return the number of bytes of code the specified instruction may be.
+unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MI->getDesc().getSize();
+ case TargetOpcode::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 000000000000..8c05d97beac2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,142 @@
+//===-- MipsInstrInfo.h - Mips Instruction Information ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
+namespace llvm {
+
+class MipsInstrInfo : public MipsGenInstrInfo {
+protected:
+ MipsTargetMachine &TM;
+ unsigned UncondBrOpc;
+
+public:
+ enum BranchType {
+ BT_None, // Couldn't analyze branch.
+ BT_NoBranch, // No branches found.
+ BT_Uncond, // One unconditional branch.
+ BT_Cond, // One conditional branch.
+ BT_CondUncond, // A conditional branch followed by an unconditional branch.
+ BT_Indirect // One indirct branch.
+ };
+
+ explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
+
+ static const MipsInstrInfo *create(MipsTargetMachine &TM);
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const;
+
+ virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+
+ /// Return the number of bytes of code the specified instruction may be.
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
+protected:
+ bool isZeroImm(const MachineOperand &op) const;
+
+ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+
+ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const;
+
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
+};
+
+/// Create MipsInstrInfo objects.
+const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 000000000000..3a82e8171301
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,1328 @@
+//===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
+ SDTCisVT<2, i32>]>;
+def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
+ [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
+
+def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>;
+def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
+
+def SDTMipsLoadLR : SDTypeProfile<1, 2,
+ [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisSameAs<0, 2>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// Tail call
+def MipsTailCall : SDNode<"MipsISD::TailCall", SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// TlsGd node is used to handle General Dynamic TLS
+def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
+
+// TprelHi and TprelLo nodes are used to handle Local Exec TLS
+def MipsTprelHi : SDNode<"MipsISD::TprelHi", SDTIntUnaryOp>;
+def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
+
+// Thread pointer
+def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
+
+// Node used to extract integer from LO/HI register.
+def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+
+// Node used to insert 32-bit integers to LOHI register pair.
+def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+
+// Mult nodes.
+def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
+def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>;
+
+// MAdd*/MSub* nodes
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>;
+
+// DivRem(u) nodes
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>;
+def MipsDivRem16 : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>;
+def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
+ [SDNPOutGlue]>;
+
+// Target constant nodes that are not part of any isel patterns and remain
+// unchanged can cause instructions with illegal operands to be emitted.
+// Wrapper node patterns give the instruction selector a chance to replace
+// target constant nodes that would otherwise remain unchanged with ADDiu
+// nodes. Without these wrapper node patterns, the following conditional move
+// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// compiled:
+// movn %got(d)($gp), %got(c)($gp), $4
+// This instruction is illegal since movn can take only register operands.
+
+def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
+
+def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
+
+def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
+def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+
+def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsLWR : SDNode<"MipsISD::LWR", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsSWL : SDNode<"MipsISD::SWL", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsSWR : SDNode<"MipsISD::SWR", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsLDL : SDNode<"MipsISD::LDL", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsLDR : SDNode<"MipsISD::LDR", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsSDL : SDNode<"MipsISD::SDL", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">,
+ AssemblerPredicate<"FeatureSEInReg">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">,
+ AssemblerPredicate<"FeatureBitCount">;
+def HasSwap : Predicate<"Subtarget.hasSwap()">,
+ AssemblerPredicate<"FeatureSwap">;
+def HasCondMov : Predicate<"Subtarget.hasCondMov()">,
+ AssemblerPredicate<"FeatureCondMov">;
+def HasFPIdx : Predicate<"Subtarget.hasFPIdx()">,
+ AssemblerPredicate<"FeatureFPIdx">;
+def HasMips32 : Predicate<"Subtarget.hasMips32()">,
+ AssemblerPredicate<"FeatureMips32">;
+def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
+ AssemblerPredicate<"FeatureMips32r2">;
+def HasMips64 : Predicate<"Subtarget.hasMips64()">,
+ AssemblerPredicate<"FeatureMips64">;
+def NotMips64 : Predicate<"!Subtarget.hasMips64()">,
+ AssemblerPredicate<"!FeatureMips64">;
+def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
+ AssemblerPredicate<"FeatureMips64r2">;
+def IsN64 : Predicate<"Subtarget.isABI_N64()">,
+ AssemblerPredicate<"FeatureN64">;
+def NotN64 : Predicate<"!Subtarget.isABI_N64()">,
+ AssemblerPredicate<"!FeatureN64">;
+def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">,
+ AssemblerPredicate<"FeatureMips16">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
+ AssemblerPredicate<"FeatureMips32">;
+def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
+ AssemblerPredicate<"FeatureMips32">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
+ AssemblerPredicate<"FeatureMips32">;
+def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
+ AssemblerPredicate<"!FeatureMips16">;
+
+class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [HasStdEnc];
+}
+
+class IsCommutable {
+ bit isCommutable = 1;
+}
+
+class IsBranch {
+ bit isBranch = 1;
+}
+
+class IsReturn {
+ bit isReturn = 1;
+}
+
+class IsCall {
+ bit isCall = 1;
+}
+
+class IsTailCall {
+ bit isCall = 1;
+ bit isTerminator = 1;
+ bit isReturn = 1;
+ bit isBarrier = 1;
+ bit hasExtraSrcRegAllocReq = 1;
+ bit isCodeGenOnly = 1;
+}
+
+class IsAsCheapAsAMove {
+ bit isAsCheapAsAMove = 1;
+}
+
+class NeverHasSideEffects {
+ bit neverHasSideEffects = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def jmptarget : Operand<OtherVT> {
+ let EncoderMethod = "getJumpTargetOpValue";
+}
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget";
+}
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getJumpTargetOpValue";
+}
+def calltarget64: Operand<i64>;
+def simm16 : Operand<i32> {
+ let DecoderMethod= "DecodeSimm16";
+}
+
+def simm20 : Operand<i32> {
+}
+
+def simm16_64 : Operand<i64>;
+def shamt : Operand<i32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+def MipsMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let ParserMethod = "parseMemOperand";
+}
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPURegs, simm16);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+def mem64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+def mem_ea : Operand<i32> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPURegs, simm16);
+ let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
+}
+
+def mem_ea_64 : Operand<i64> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+ let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// size operand of ext instruction
+def size_ext : Operand<i32> {
+ let EncoderMethod = "getSizeExtEncoding";
+ let DecoderMethod = "DecodeExtSize";
+}
+
+// size operand of ins instruction
+def size_ins : Operand<i32> {
+ let EncoderMethod = "getSizeInsEncoding";
+ let DecoderMethod = "DecodeInsSize";
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getImm(N, N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
+}]>;
+
+// Plus 1.
+def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 15-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt15 : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+ else
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared).
+def immLow16Zero : PatLeaf<(imm), [{
+ int64_t Val = N->getSExtValue();
+ return isInt<32>(Val) && !(Val & 0xffff);
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+
+// True if (N + 1) fits in 16-bit field.
+def immSExt16Plus1 : PatLeaf<(imm), [{
+ return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr :
+ ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
+
+def addrRegImm :
+ ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+
+def addrDefault :
+ ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic and logical instructions with 3 register operands.
+class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ let isCommutable = isComm;
+ let isReMaterializable = 1;
+ string BaseOpcode;
+ string Arch;
+}
+
+// Arithmetic and logical instructions with 2 register operands.
+class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
+ SDPatternOperator imm_type = null_frag,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+ let isReMaterializable = 1;
+}
+
+// Arithmetic Multiply ADD/SUB
+class MArithR<string opstr, bit isComm = 0> :
+ InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
+ !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> {
+ let Defs = [HI, LO];
+ let Uses = [HI, LO];
+ let isCommutable = isComm;
+}
+
+// Logical
+class LogicNOR<string opstr, RegisterOperand RC>:
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
+ let isCommutable = 1;
+}
+
+// Shifts
+class shift_rotate_imm<string opstr, Operand ImmOpnd,
+ RegisterOperand RC, SDPatternOperator OpNode = null_frag,
+ SDPatternOperator PF = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
+ !strconcat(opstr, "\t$rd, $rt, $shamt"),
+ [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+
+class shift_rotate_reg<string opstr, RegisterOperand RC,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rt, $rs"),
+ [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
+
+// Load Upper Imediate
+class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
+ InstSE<(outs RC:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
+ [], IIAlu, FrmI>, IsAsCheapAsAMove {
+ let neverHasSideEffects = 1;
+ let isReMaterializable = 1;
+}
+
+class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: FFI<op, outs, ins, asmstr, pattern> {
+ bits<21> addr;
+ let Inst{25-21} = addr{20-16};
+ let Inst{15-0} = addr{15-0};
+ let DecoderMethod = "DecodeMem";
+}
+
+// Memory Load/Store
+class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd, ComplexPattern Addr> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
+}
+
+class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd, ComplexPattern Addr> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
+}
+
+multiclass LoadM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Load<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+multiclass StoreM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Store<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+// Load/Store Left/Right
+let canFoldAsLoad = 1 in
+class LoadLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr, RC:$src))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ string Constraints = "$src = $rt";
+}
+
+class StoreLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd>:
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+}
+
+multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : LoadLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : LoadLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : StoreLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : StoreLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+// Conditional Branch
+class CBranch<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch,
+ FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+}
+
+class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+}
+
+// SetCC
+class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+
+class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
+ RegisterClass RC>:
+ InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
+ IIAlu, FrmI>;
+
+// Jump
+class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
+ SDPatternOperator targetoperator> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [(operator targetoperator:$target)], IIBranch, FrmJ> {
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let DecoderMethod = "DecodeJumpTarget";
+ let Defs = [AT];
+}
+
+// Unconditional branch
+class UncondBranch<string opstr> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(br bb:$offset)], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocPIC, HasStdEnc];
+ let Defs = [AT];
+}
+
+// Base class for indirect branch and return instruction classes.
+let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>:
+ InstSE<(outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch, FrmR>;
+
+// Indirect branch
+class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Return instruction
+class RetBase<RegisterClass RC>: JumpFR<RC> {
+ let isReturn = 1;
+ let isCodeGenOnly = 1;
+ let hasCtrlDep = 1;
+ let hasExtraSrcRegAllocReq = 1;
+}
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1, Defs = [RA] in {
+ class JumpLink<string opstr> :
+ InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+ let DecoderMethod = "DecodeJumpTarget";
+ }
+
+ class JumpLinkRegPseudo<RegisterClass RC, Instruction JALRInst,
+ Register RetReg>:
+ PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>,
+ PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>;
+
+ class JumpLinkReg<string opstr, RegisterClass RC>:
+ InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [], IIBranch, FrmR>;
+
+ class BGEZAL_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+
+}
+
+class BAL_FT :
+ InstSE<(outs), (ins brtarget:$offset), "bal\t$offset", [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Defs = [RA];
+}
+
+// Sync
+let hasSideEffects = 1 in
+class SYNC_FT :
+ InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)],
+ NoItinerary, FrmOther>;
+
+// Mul, Div
+class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
+ itin, FrmR> {
+ let isCommutable = 1;
+ let Defs = DefRegs;
+ let neverHasSideEffects = 1;
+}
+
+// Pseudo multiply/divide instruction with explicit accumulator register
+// operands.
+class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ bit IsComm = 1, bit HasSideEffects = 0> :
+ PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt),
+ [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>,
+ PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> {
+ let isCommutable = IsComm;
+ let hasSideEffects = HasSideEffects;
+}
+
+// Pseudo multiply add/sub instruction with explicit accumulator register
+// operands.
+class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
+ : PseudoSE<(outs ACRegs:$ac),
+ (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin),
+ [(set ACRegs:$ac,
+ (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))],
+ IIImul>,
+ PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> {
+ string Constraints = "$acin = $ac";
+}
+
+class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
+ [], itin, FrmR> {
+ let Defs = DefRegs;
+}
+
+// Move from Hi/Lo
+class MoveFromLOHI<string opstr, RegisterClass RC, list<Register> UseRegs>:
+ InstSE<(outs RC:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
+ let Uses = UseRegs;
+ let neverHasSideEffects = 1;
+}
+
+class MoveToLOHI<string opstr, RegisterClass RC, list<Register> DefRegs>:
+ InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
+ let Defs = DefRegs;
+ let neverHasSideEffects = 1;
+}
+
+class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> :
+ InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let isCodeGenOnly = 1;
+ let DecoderMethod = "DecodeMem";
+}
+
+// Count Leading Ones/Zeros in Word
+class CountLeading0<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
+
+class CountLeading1<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
+
+
+// Sign Extend in Register.
+class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> :
+ InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"),
+ [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> {
+ let Predicates = [HasSEInReg, HasStdEnc];
+}
+
+// Subword Swap
+class SubwordSwap<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
+ NoItinerary, FrmR> {
+ let Predicates = [HasSwap, HasStdEnc];
+ let neverHasSideEffects = 1;
+}
+
+// Read Hardware
+class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> :
+ InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
+ IIAlu, FrmR>;
+
+// Ext and Ins
+class ExtBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ FrmR> {
+ let Predicates = [HasMips32r2, HasStdEnc];
+}
+
+class InsBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
+ NoItinerary, FrmR> {
+ let Predicates = [HasMips32r2, HasStdEnc];
+ let Constraints = "$src = $rt";
+}
+
+// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
+class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+
+multiclass Atomic2Ops32<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPURegs, CPURegs>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ }
+}
+
+// Atomic Compare & Swap.
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+
+multiclass AtomicCmpSwap32<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPURegs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
+ let DecoderNamespace = "Mips64";
+ }
+}
+
+class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let mayLoad = 1;
+}
+
+class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
+ let Constraints = "$rt = $dst";
+}
+
+class MFC3OP<dag outs, dag ins, string asmstr> :
+ InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// Return RA.
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
+def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
+
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+ defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>;
+ defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>;
+ defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>;
+ defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>;
+ defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>;
+ defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>;
+ defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>;
+ defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>;
+ defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>;
+ defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>;
+ defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>;
+ defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>;
+ defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>;
+ defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>;
+ defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>;
+ defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>;
+ defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
+ defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
+
+ defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>;
+ defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>;
+ defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>;
+
+ defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>;
+ defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>;
+ defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>;
+ defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
+}
+
+def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+ ADDI_FM<0x9>, IsAsCheapAsAMove;
+def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
+def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
+def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
+def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
+def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
+def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
+def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
+
+/// Shift Instructions
+def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+ SRA_FM<0, 0>;
+def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+ SRA_FM<2, 0>;
+def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+ SRA_FM<3, 0>;
+def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
+
+// Rotate Instructions
+let Predicates = [HasMips32r2, HasStdEnc] in {
+ def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+ SRA_FM<2, 1>;
+ def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
+}
+
+/// Load and Store Instructions
+/// aligned
+defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
+defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
+defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
+defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
+defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
+defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
+
+/// load/store left/right
+defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
+defm LWR : LoadLeftRightM<"lwr", MipsLWR, CPURegs>, LW_FM<0x26>;
+defm SWL : StoreLeftRightM<"swl", MipsSWL, CPURegs>, LW_FM<0x2a>;
+defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>;
+
+def SYNC : SYNC_FT, SYNC_FM;
+
+/// Load-linked, Store-conditional
+let Predicates = [NotN64, HasStdEnc] in {
+ def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>;
+ def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>;
+}
+
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>;
+ def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>;
+}
+
+/// Jump and Branch Instructions
+def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
+ Requires<[RelocStatic, HasStdEnc]>, IsBranch;
+def JR : IndirectBranch<CPURegs>, MTLO_FM<8>;
+def B : UncondBranch<"b">, B_FM;
+def BEQ : CBranch<"beq", seteq, CPURegs>, BEQ_FM<4>;
+def BNE : CBranch<"bne", setne, CPURegs>, BEQ_FM<5>;
+def BGEZ : CBranchZero<"bgez", setge, CPURegs>, BGEZ_FM<1, 1>;
+def BGTZ : CBranchZero<"bgtz", setgt, CPURegs>, BGEZ_FM<7, 0>;
+def BLEZ : CBranchZero<"blez", setle, CPURegs>, BGEZ_FM<6, 0>;
+def BLTZ : CBranchZero<"bltz", setlt, CPURegs>, BGEZ_FM<1, 0>;
+
+def BAL_BR: BAL_FT, BAL_FM;
+
+def JAL : JumpLink<"jal">, FJ<3>;
+def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM;
+def JALRPseudo : JumpLinkRegPseudo<CPURegs, JALR, RA>;
+def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>;
+def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>;
+def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
+def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+
+def RET : RetBase<CPURegs>, MTLO_FM<8>;
+
+// Exception handling related node and instructions.
+// The conversion sequence is:
+// ISD::EH_RETURN -> MipsISD::EH_RETURN ->
+// MIPSeh_return -> (stack change + indirect branch)
+//
+// MIPSeh_return takes the place of regular return instruction
+// but takes two arguments (V1, V0) which are used for storing
+// the offset and return address respectively.
+def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst),
+ [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>;
+ def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff,
+ CPU64Regs:$dst),
+ [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>;
+}
+
+/// Multiply and Divide Instructions.
+def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
+def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
+def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>;
+def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv,
+ 0>;
+
+def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
+def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
+def MFHI : MoveFromLOHI<"mfhi", CPURegs, [HI]>, MFLO_FM<0x10>;
+def MFLO : MoveFromLOHI<"mflo", CPURegs, [LO]>, MFLO_FM<0x12>;
+
+/// Sign Ext In Register Instructions.
+def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>;
+def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>;
+
+/// Count Leading
+def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>;
+def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>;
+
+/// Word Swap Bytes Within Halfwords
+def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>;
+
+/// No operation.
+def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
+
+// MADD*/MSUB*
+def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
+def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
+
+def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
+
+def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>;
+def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>;
+
+/// Move Control Registers From/To CPU Registers
+def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
+ "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
+
+def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
+ "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
+
+def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
+ "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
+
+def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
+ "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
+
+//===----------------------------------------------------------------------===//
+// Instruction aliases
+//===----------------------------------------------------------------------===//
+def : InstAlias<"move $dst, $src",
+ (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+ Requires<[NotMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+ Requires<[NotMips64]>;
+def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>;
+def : InstAlias<"addu $rs, $rt, $imm",
+ (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"add $rs, $rt, $imm",
+ (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
+def : InstAlias<"neg $rt, $rs",
+ (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"negu $rt, $rs",
+ (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+def : InstAlias<"mfc0 $rt, $rd",
+ (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd",
+ (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+def : InstAlias<"mfc2 $rt, $rd",
+ (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd",
+ (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>;
+
+class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr")> ;
+def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>;
+
+class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Load/store pattern templates.
+class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
+ MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
+
+class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
+ MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
+
+// Small immediates
+def : MipsPat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : MipsPat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+def : MipsPat<(i32 immLow16Zero:$in),
+ (LUi (HI16 imm:$in))>;
+
+// Arbitrary immediates
+def : MipsPat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry MipsPatterns
+def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+//def : MipsPat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
+
+// Tail call
+def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+ (TAILCALL tglobaladdr:$dst)>;
+def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+ (TAILCALL texternalsym:$dst)>;
+// hi/lo relocs
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsHi texternalsym:$in), (LUi texternalsym:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsLo texternalsym:$in), (ADDiu ZERO, texternalsym:$in)>;
+
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+
+// gp_rel relocs
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// wrapper_pic
+class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
+ MipsPat<(MipsWrapper RC:$gp, node:$in),
+ (ADDiuOp RC:$gp, node:$in)>;
+
+def : WrapperPat<tglobaladdr, ADDiu, CPURegs>;
+def : WrapperPat<tconstpool, ADDiu, CPURegs>;
+def : WrapperPat<texternalsym, ADDiu, CPURegs>;
+def : WrapperPat<tblockaddress, ADDiu, CPURegs>;
+def : WrapperPat<tjumptable, ADDiu, CPURegs>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
+
+// Mips does not have "not", so we expand our way
+def : MipsPat<(not CPURegs:$in),
+ (NOR CPURegsOpnd:$in, ZERO)>;
+
+// extended loads
+let Predicates = [NotN64, HasStdEnc] in {
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
+}
+let Predicates = [IsN64, HasStdEnc] in {
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
+}
+
+// peepholes
+let Predicates = [NotN64, HasStdEnc] in {
+ def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+}
+let Predicates = [IsN64, HasStdEnc] in {
+ def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
+}
+
+// brcond patterns
+multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
+ Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp, Register ZEROReg> {
+def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+
+def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+
+def : MipsPat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+}
+
+defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
+
+// setcc patterns
+multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
+ Instruction SLTuOp, Register ZEROReg> {
+ def : MipsPat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+}
+
+multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : MipsPat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : MipsPat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+}
+
+multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : MipsPat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : MipsPat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
+}
+
+multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : MipsPat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+}
+
+multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : MipsPat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+}
+
+defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
+defm : SetlePats<CPURegs, SLT, SLTu>;
+defm : SetgtPats<CPURegs, SLT, SLTu>;
+defm : SetgePats<CPURegs, SLT, SLTu>;
+defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
+
+// bswap pattern
+def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+
+// mflo/hi patterns.
+def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
+
+// Load halfword/word patterns.
+let AddedComplexity = 40 in {
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu, i32, zextloadi8>;
+ def : LoadRegImmPat<LH, i32, sextloadi16>;
+ def : LoadRegImmPat<LW, i32, load>;
+ }
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
+ def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
+ def : LoadRegImmPat<LW_P8, i32, load>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+include "Mips64InstrInfo.td"
+include "MipsCondMov.td"
+
+//
+// Mips16
+
+include "Mips16InstrFormats.td"
+include "Mips16InstrInfo.td"
+
+// DSP
+include "MipsDSPInstrFormats.td"
+include "MipsDSPInstrInfo.td"
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
new file mode 100644
index 000000000000..1b2a325d3ce6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp
@@ -0,0 +1,285 @@
+//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Mips target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "MipsJITInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+
+void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)Old;
+ const unsigned NopInstr = 0x0;
+
+ // If the functions are in the same memory segment, insert PC-region branch.
+ if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
+ unsigned *OldInstruction = (unsigned *)Old;
+ *OldInstruction = 0x08000000;
+ unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
+
+ JTargetAddr >>= 2;
+ *OldInstruction |= JTargetAddr;
+
+ // Insert a NOP.
+ OldInstruction++;
+ *OldInstruction = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
+ } else {
+ // We need to clear hint bits from the instruction, in case it is 'jr ra'.
+ const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
+ unsigned* CurrentInstr = (unsigned*)Old;
+ unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
+ unsigned* NextInstr = CurrentInstr + 1;
+ unsigned NextInstrHintClear = (*NextInstr) & HintMask;
+
+ // Do absolute jump if there are 2 or more instructions before return from
+ // the old function.
+ if ((CurrInstrHintClear != ReturnSequence) &&
+ (NextInstrHintClear != ReturnSequence)) {
+ const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
+ const unsigned JrT0Instr = 0x01000008;
+ // lui t0, high 16 bit of the NewAddr
+ (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
+ // addiu t0, t0, low 16 bit of the NewAddr
+ (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
+ // jr t0
+ (*(CurrentInstr++)) = JrT0Instr;
+ (*CurrentInstr) = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
+ } else {
+ // Unsupported case
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ }
+ }
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring. This code saves registers, calls
+// MipsCompilationCallbackC and restores registers.
+extern "C" {
+#if defined (__mips__)
+void MipsCompilationCallback();
+
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "MipsCompilationCallback\n"
+ ASMPREFIX "MipsCompilationCallback:\n"
+ ".ent " ASMPREFIX "MipsCompilationCallback\n"
+ ".frame $sp, 32, $ra\n"
+ ".set noreorder\n"
+ ".cpload $t9\n"
+
+ "addiu $sp, $sp, -64\n"
+ ".cprestore 16\n"
+
+ // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
+ // stuff for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned. We also need to save the ra register since it contains the
+ // original return address, and t8 register since it contains the address
+ // of the end of function stub.
+ "sw $a0, 20($sp)\n"
+ "sw $a1, 24($sp)\n"
+ "sw $a2, 28($sp)\n"
+ "sw $a3, 32($sp)\n"
+ "sw $ra, 36($sp)\n"
+ "sw $t8, 40($sp)\n"
+ "sdc1 $f12, 48($sp)\n"
+ "sdc1 $f14, 56($sp)\n"
+
+ // t8 points at the end of function stub. Pass the beginning of the stub
+ // to the MipsCompilationCallbackC.
+ "addiu $a0, $t8, -16\n"
+ "jal " ASMPREFIX "MipsCompilationCallbackC\n"
+ "nop\n"
+
+ // Restore registers.
+ "lw $a0, 20($sp)\n"
+ "lw $a1, 24($sp)\n"
+ "lw $a2, 28($sp)\n"
+ "lw $a3, 32($sp)\n"
+ "lw $ra, 36($sp)\n"
+ "lw $t8, 40($sp)\n"
+ "ldc1 $f12, 48($sp)\n"
+ "ldc1 $f14, 56($sp)\n"
+ "addiu $sp, $sp, 64\n"
+
+ // Jump to the (newly modified) stub to invoke the real function.
+ "addiu $t8, $t8, -16\n"
+ "jr $t8\n"
+ "nop\n"
+
+ ".set reorder\n"
+ ".end " ASMPREFIX "MipsCompilationCallback\n"
+ );
+#else // host != Mips
+ void MipsCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call MipsCompilationCallback() on a non-Mips arch!");
+ }
+#endif
+}
+
+/// MipsCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first four instructions of the
+ // stub with code that jumps to the compiled function:
+ // lui $t9, %hi(NewVal)
+ // addiu $t9, $t9, %lo(NewVal)
+ // jr $t9
+ // nop
+
+ int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
+ if ((NewVal & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(NewVal & 0xffff);
+
+ *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
+ *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
+ *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
+ *(intptr_t *)(StubAddr + 12) = 0;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
+}
+
+TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
+ JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return MipsCompilationCallback;
+}
+
+TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
+ // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+ StubLayout Result = { 4*4, 4 };
+ return Result;
+}
+
+void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) {
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)MipsCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)MipsCompilationCallback;
+
+
+ int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
+ if ((EmittedAddr & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(EmittedAddr & 0xffff);
+
+ // lui t9, %hi(EmittedAddr)
+ // addiu t9, t9, %lo(EmittedAddr)
+ // jalr t8, t9
+ // nop
+ if (IsLittleEndian) {
+ JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
+ JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+ JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
+ JCE.emitWordLE(0);
+ } else {
+ JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi);
+ JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+ JCE.emitWordBE(25 << 21 | 24 << 11 | 9);
+ JCE.emitWordBE(0);
+ }
+
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((Mips::RelocationType) MR->getRelocationType()) {
+ case Mips::reloc_mips_pc16:
+ ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_26:
+ ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_hi:
+ ResultPtr = ResultPtr >> 16;
+ if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
+ ResultPtr += 1;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_lo: {
+ // Addend is needed for unaligned load/store instructions, where offset
+ // for the second load/store in the expanded instruction sequence must
+ // be modified by +1 or +3. Otherwise, Addend is 0.
+ int Addend = *((unsigned*) RelocPos) & 0xffff;
+ ResultPtr = (ResultPtr + Addend) & 0xffff;
+ *((unsigned*) RelocPos) &= 0xffff0000;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.h b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
new file mode 100644
index 000000000000..ecda3101a003
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h
@@ -0,0 +1,71 @@
+//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSJITINFO_H
+#define MIPSJITINFO_H
+
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+class MipsTargetMachine;
+
+class MipsJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+ bool IsLittleEndian;
+
+ public:
+ explicit MipsJITInfo() :
+ IsPIC(false), IsLittleEndian(true) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Mips.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC,
+ bool isLittleEndian) {
+ IsPIC = isPIC;
+ IsLittleEndian = isLittleEndian;
+ }
+
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
new file mode 100644
index 000000000000..2efe534053a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -0,0 +1,455 @@
+//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands a branch or jump instruction into a long branch if its
+// offset is too large to fit into its immediate field.
+//
+// FIXME:
+// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
+// 2. If program has inline assembly statements whose size cannot be
+// determined accurately, load branch target addresses from the GOT.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-long-branch"
+
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+static cl::opt<bool> SkipLongBranch(
+ "skip-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Skip long branch pass."),
+ cl::Hidden);
+
+static cl::opt<bool> ForceLongBranch(
+ "force-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Expand all branches to long format."),
+ cl::Hidden);
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ struct MBBInfo {
+ uint64_t Size, Address;
+ bool HasLongBranch;
+ MachineInstr *Br;
+
+ MBBInfo() : Size(0), HasLongBranch(false), Br(0) {}
+ };
+
+ class MipsLongBranch : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsLongBranch(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Long Branch";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ void splitMBB(MachineBasicBlock *MBB);
+ void initMBBInfo();
+ int64_t computeOffset(const MachineInstr *Br);
+ void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL,
+ MachineBasicBlock *MBBOpnd);
+ void expandToLongBranch(MBBInfo &Info);
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBInfos;
+ bool IsPIC;
+ unsigned ABI;
+ unsigned LongBranchSeqSize;
+ };
+
+ char MipsLongBranch::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
+ return new MipsLongBranch(tm);
+}
+
+/// Iterate over list of Br's operands and search for a MachineBasicBlock
+/// operand.
+static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) {
+ for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = Br.getOperand(I);
+
+ if (MO.isMBB())
+ return MO.getMBB();
+ }
+
+ assert(false && "This instruction does not have an MBB operand.");
+ return 0;
+}
+
+// Traverse the list of instructions backwards until a non-debug instruction is
+// found or it reaches E.
+static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) {
+ for (; B != E; ++B)
+ if (!B->isDebugValue())
+ return B;
+
+ return E;
+}
+
+// Split MBB if it has two direct jumps/branches.
+void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
+ ReverseIter End = MBB->rend();
+ ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End);
+
+ // Return if MBB has no branch instructions.
+ if ((LastBr == End) ||
+ (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch()))
+ return;
+
+ ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End);
+
+ // MBB has only one branch instruction if FirstBr is not a branch
+ // instruction.
+ if ((FirstBr == End) ||
+ (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch()))
+ return;
+
+ assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found.");
+
+ // Create a new MBB. Move instructions in MBB to the newly created MBB.
+ MachineBasicBlock *NewMBB =
+ MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+
+ // Insert NewMBB and fix control flow.
+ MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
+ NewMBB->transferSuccessors(MBB);
+ NewMBB->removeSuccessor(Tgt);
+ MBB->addSuccessor(NewMBB);
+ MBB->addSuccessor(Tgt);
+ MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+
+ NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end());
+}
+
+// Fill MBBInfos.
+void MipsLongBranch::initMBBInfo() {
+ // Split the MBBs if they have two branches. Each basic block should have at
+ // most one branch after this loop is executed.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
+ splitMBB(I++);
+
+ MF->RenumberBlocks();
+ MBBInfos.clear();
+ MBBInfos.resize(MF->size());
+
+ for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+
+ // Compute size of MBB.
+ for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
+ MI != MBB->instr_end(); ++MI)
+ MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI);
+
+ // Search for MBB's branch instruction.
+ ReverseIter End = MBB->rend();
+ ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
+
+ if ((Br != End) && !Br->isIndirectBranch() &&
+ (Br->isConditionalBranch() ||
+ (Br->isUnconditionalBranch() &&
+ TM.getRelocationModel() == Reloc::PIC_)))
+ MBBInfos[I].Br = (++Br).base();
+ }
+}
+
+// Compute offset of branch in number of bytes.
+int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
+ int64_t Offset = 0;
+ int ThisMBB = Br->getParent()->getNumber();
+ int TargetMBB = getTargetMBB(*Br)->getNumber();
+
+ // Compute offset of a forward branch.
+ if (ThisMBB < TargetMBB) {
+ for (int N = ThisMBB + 1; N < TargetMBB; ++N)
+ Offset += MBBInfos[N].Size;
+
+ return Offset + 4;
+ }
+
+ // Compute offset of a backward branch.
+ for (int N = ThisMBB; N >= TargetMBB; --N)
+ Offset += MBBInfos[N].Size;
+
+ return -Offset + 4;
+}
+
+// Replace Br with a branch which has the opposite condition code and a
+// MachineBasicBlock operand MBBOpnd.
+void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
+ DebugLoc DL, MachineBasicBlock *MBBOpnd) {
+ unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
+ const MCInstrDesc &NewDesc = TII->get(NewOpc);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
+
+ for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = Br->getOperand(I);
+
+ if (!MO.isReg()) {
+ assert(MO.isMBB() && "MBB operand expected.");
+ break;
+ }
+
+ MIB.addReg(MO.getReg());
+ }
+
+ MIB.addMBB(MBBOpnd);
+
+ Br->eraseFromParent();
+}
+
+// Expand branch instructions to long branches.
+void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br);
+ DebugLoc DL = I.Br->getDebugLoc();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB);
+ MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB);
+
+ MF->insert(FallThroughMBB, LongBrMBB);
+ MBB->removeSuccessor(TgtMBB);
+ MBB->addSuccessor(LongBrMBB);
+
+ if (IsPIC) {
+ MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(FallThroughMBB, BalTgtMBB);
+ LongBrMBB->addSuccessor(BalTgtMBB);
+ BalTgtMBB->addSuccessor(TgtMBB);
+
+ int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
+ unsigned BalTgtMBBSize = 5;
+ int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4);
+ int64_t Lo = SignExtend64<16>(Offset & 0xffff);
+ int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
+
+ if (ABI != MipsSubtarget::N64) {
+ // $longbr:
+ // addiu $sp, $sp, -8
+ // sw $ra, 0($sp)
+ // bal $baltgt
+ // lui $at, %hi($tgt - $baltgt)
+ // $baltgt:
+ // addiu $at, $at, %lo($tgt - $baltgt)
+ // addu $at, $ra, $at
+ // lw $ra, 0($sp)
+ // jr $at
+ // addiu $sp, $sp, 8
+ // $fallthrough:
+ //
+
+ Pos = LongBrMBB->begin();
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-8);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
+ .addReg(Mips::SP).addImm(0);
+
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi));
+
+ Pos = BalTgtMBB->begin();
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT)
+ .addReg(Mips::AT).addImm(Lo);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT)
+ .addReg(Mips::RA).addReg(Mips::AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
+ .addReg(Mips::SP).addImm(0);
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8));
+ } else {
+ // $longbr:
+ // daddiu $sp, $sp, -16
+ // sd $ra, 0($sp)
+ // lui64 $at, %highest($tgt - $baltgt)
+ // daddiu $at, $at, %higher($tgt - $baltgt)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %hi($tgt - $baltgt)
+ // bal $baltgt
+ // dsll $at, $at, 16
+ // $baltgt:
+ // daddiu $at, $at, %lo($tgt - $baltgt)
+ // daddu $at, $ra, $at
+ // ld $ra, 0($sp)
+ // jr64 $at
+ // daddiu $sp, $sp, 16
+ // $fallthrough:
+ //
+
+ int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff);
+ int64_t Highest =
+ SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff);
+
+ Pos = LongBrMBB->begin();
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ .addReg(Mips::SP_64).addImm(-16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64)
+ .addReg(Mips::SP_64).addImm(0);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64)
+ .addImm(Highest);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Higher);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Hi);
+
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(16));
+
+ Pos = BalTgtMBB->begin();
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Lo);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64)
+ .addReg(Mips::RA_64).addReg(Mips::AT_64);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
+ .addReg(Mips::SP_64).addImm(0);
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64))
+ .append(BuildMI(*MF, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ .addReg(Mips::SP_64).addImm(16));
+ }
+
+ assert(BalTgtMBBSize == BalTgtMBB->size());
+ assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize);
+ } else {
+ // $longbr:
+ // j $tgt
+ // nop
+ // $fallthrough:
+ //
+ Pos = LongBrMBB->begin();
+ LongBrMBB->addSuccessor(TgtMBB);
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
+
+ assert(LongBrMBB->size() == LongBranchSeqSize);
+ }
+
+ if (I.Br->isUnconditionalBranch()) {
+ // Change branch destination.
+ assert(I.Br->getDesc().getNumOperands() == 1);
+ I.Br->RemoveOperand(0);
+ I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
+ } else
+ // Change branch destination and reverse condition.
+ replaceBranch(*MBB, I.Br, DL, FallThroughMBB);
+}
+
+static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
+ MachineBasicBlock &MBB = F.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL = MBB.findDebugLoc(MBB.begin());
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0)
+ .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ MBB.removeLiveIn(Mips::V0);
+}
+
+bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+ if ((TM.getRelocationModel() == Reloc::PIC_) &&
+ TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
+ F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
+ emitGPDisp(F, TII);
+
+ if (SkipLongBranch)
+ return true;
+
+ MF = &F;
+ initMBBInfo();
+
+ SmallVector<MBBInfo, 16>::iterator I, E = MBBInfos.end();
+ bool EverMadeChange = false, MadeChange = true;
+
+ while (MadeChange) {
+ MadeChange = false;
+
+ for (I = MBBInfos.begin(); I != E; ++I) {
+ // Skip if this MBB doesn't have a branch or the branch has already been
+ // converted to a long branch.
+ if (!I->Br || I->HasLongBranch)
+ continue;
+
+ // Check if offset fits into 16-bit immediate field of branches.
+ if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
+ continue;
+
+ I->HasLongBranch = true;
+ I->Size += LongBranchSeqSize * 4;
+ ++LongBranches;
+ EverMadeChange = MadeChange = true;
+ }
+ }
+
+ if (!EverMadeChange)
+ return true;
+
+ // Compute basic block addresses.
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ uint64_t Address = 0;
+
+ for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I)
+ I->Address = Address;
+ }
+
+ // Do the expansion.
+ for (I = MBBInfos.begin(); I != E; ++I)
+ if (I->HasLongBranch)
+ expandToLongBranch(*I);
+
+ MF->RenumberBlocks();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 000000000000..d836975eb7d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsMCInstLower.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
+ : AsmPrinter(asmprinter) {}
+
+void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) {
+ Mang = M;
+ Ctx = C;
+}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy,
+ unsigned Offset) const {
+ MCSymbolRefExpr::VariantKind Kind;
+ const MCSymbol *Symbol;
+
+ switch(MO.getTargetFlags()) {
+ default: llvm_unreachable("Invalid target flag!");
+ case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break;
+ case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
+ case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
+ case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
+ case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
+ case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
+ case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+ case MipsII::MO_DTPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+ case MipsII::MO_DTPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
+ case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+ case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+ case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ case MipsII::MO_GPOFF_HI: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+ case MipsII::MO_GPOFF_LO: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+ case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+ case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+ case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break;
+ case MipsII::MO_HIGHER: Kind = MCSymbolRefExpr::VK_Mips_HIGHER; break;
+ case MipsII::MO_HIGHEST: Kind = MCSymbolRefExpr::VK_Mips_HIGHEST; break;
+ case MipsII::MO_GOT_HI16: Kind = MCSymbolRefExpr::VK_Mips_GOT_HI16; break;
+ case MipsII::MO_GOT_LO16: Kind = MCSymbolRefExpr::VK_Mips_GOT_LO16; break;
+ case MipsII::MO_CALL_HI16: Kind = MCSymbolRefExpr::VK_Mips_CALL_HI16; break;
+ case MipsII::MO_CALL_LO16: Kind = MCSymbolRefExpr::VK_Mips_CALL_LO16; break;
+ }
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ Offset += MO.getOffset();
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ Offset += MO.getOffset();
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ Offset += MO.getOffset();
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ Offset += MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+ if (!Offset)
+ return MCOperand::CreateExpr(MCSym);
+
+ // Assume offset is never negative.
+ assert(Offset > 0);
+
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
+}
+
+/*
+static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0,
+ const MCOperand &Opnd1,
+ const MCOperand &Opnd2 = MCOperand()) {
+ Inst.setOpcode(Opc);
+ Inst.addOperand(Opnd0);
+ Inst.addOperand(Opnd1);
+ if (Opnd2.isValid())
+ Inst.addOperand(Opnd2);
+}
+*/
+
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
+ unsigned offset) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm() + offset);
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, offset);
+ case MachineOperand::MO_RegisterMask:
+ break;
+ }
+
+ return MCOperand();
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MO);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 000000000000..c4a6016105b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,44 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class MipsAsmPrinter;
+
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+// MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext *Ctx;
+ Mangler *Mang;
+ MipsAsmPrinter &AsmPrinter;
+public:
+ MipsMCInstLower(MipsAsmPrinter &asmprinter);
+ void Initialize(Mangler *mang, MCContext *C);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy, unsigned Offset) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
new file mode 100644
index 000000000000..59b23f7ad7c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -0,0 +1,75 @@
+//===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
+ cl::desc("Always use $gp as the global base register."));
+
+bool MipsFunctionInfo::globalBaseRegSet() const {
+ return GlobalBaseReg;
+}
+
+unsigned MipsFunctionInfo::getGlobalBaseReg() {
+ // Return if it has already been initialized.
+ if (GlobalBaseReg)
+ return GlobalBaseReg;
+
+ const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+
+ const TargetRegisterClass *RC;
+ if (ST.inMips16Mode())
+ RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ else
+ RC = ST.isABI_N64() ?
+ (const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
+ (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+ return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
+}
+
+bool MipsFunctionInfo::mips16SPAliasRegSet() const {
+ return Mips16SPAliasReg;
+}
+unsigned MipsFunctionInfo::getMips16SPAliasReg() {
+ // Return if it has already been initialized.
+ if (Mips16SPAliasReg)
+ return Mips16SPAliasReg;
+
+ const TargetRegisterClass *RC;
+ RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC);
+}
+
+void MipsFunctionInfo::createEhDataRegsFI() {
+ for (int I = 0; I < 4; ++I) {
+ const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+ const TargetRegisterClass *RC = ST.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ }
+}
+
+bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
+ return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
+ || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
+}
+
+void MipsFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 000000000000..b05b348037d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,99 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include <utility>
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ MachineFunction& MF;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// Mips16SPAliasReg - keeps track of the virtual register initialized for
+ /// use as an alias for SP for use in load/store of halfword/byte from/to
+ /// the stack
+ unsigned Mips16SPAliasReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// True if function has a byval argument.
+ bool HasByvalArg;
+
+ /// Size of incoming argument area.
+ unsigned IncomingArgSize;
+
+ /// CallsEhReturn - Whether the function calls llvm.eh.return.
+ bool CallsEhReturn;
+
+ /// Frame objects for spilling eh data registers.
+ int EhDataRegFI[4];
+
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
+ VarArgsFrameIndex(0), CallsEhReturn(false)
+ {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ bool globalBaseRegSet() const;
+ unsigned getGlobalBaseReg();
+
+ bool mips16SPAliasRegSet() const;
+ unsigned getMips16SPAliasReg();
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasByvalArg() const { return HasByvalArg; }
+ void setFormalArgInfo(unsigned Size, bool HasByval) {
+ IncomingArgSize = Size;
+ HasByvalArg = HasByval;
+ }
+
+ unsigned getIncomingArgSize() const { return IncomingArgSize; }
+
+ bool callsEhReturn() const { return CallsEhReturn; }
+ void setCallsEhReturn() { CallsEhReturn = true; }
+
+ void createEhDataRegsFI();
+ int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
+ bool isEhDataRegFI(int FI) const;
+
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 000000000000..32507334e9c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,219 @@
+//===-- MipsRegisterInfo.cpp - MIPS Register Information -== --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "MipsRegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
+ : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
+
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+
+unsigned
+MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case Mips::CPURegsRegClassID:
+ case Mips::CPU64RegsRegClassID:
+ case Mips::DSPRegsRegClassID: {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return 28 - TFI->hasFP(MF);
+ }
+ case Mips::FGR32RegClassID:
+ return 32;
+ case Mips::AFGR64RegClassID:
+ return 16;
+ case Mips::FGR64RegClassID:
+ return 32;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const uint16_t* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (Subtarget.isSingleFloat())
+ return CSR_SingleFloatOnly_SaveList;
+ else if (!Subtarget.hasMips64())
+ return CSR_O32_SaveList;
+ else if (Subtarget.isABI_N32())
+ return CSR_N32_SaveList;
+
+ assert(Subtarget.isABI_N64());
+ return CSR_N64_SaveList;
+}
+
+const uint32_t*
+MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ if (Subtarget.isSingleFloat())
+ return CSR_SingleFloatOnly_RegMask;
+ else if (!Subtarget.hasMips64())
+ return CSR_O32_RegMask;
+ else if (Subtarget.isABI_N32())
+ return CSR_N32_RegMask;
+
+ assert(Subtarget.isABI_N64());
+ return CSR_N64_RegMask;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ static const uint16_t ReservedCPURegs[] = {
+ Mips::ZERO, Mips::K0, Mips::K1, Mips::SP
+ };
+
+ static const uint16_t ReservedCPU64Regs[] = {
+ Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64
+ };
+
+ BitVector Reserved(getNumRegs());
+ typedef TargetRegisterClass::const_iterator RegIter;
+
+ for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
+ Reserved.set(ReservedCPURegs[I]);
+
+ for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I)
+ Reserved.set(ReservedCPU64Regs[I]);
+
+ if (Subtarget.hasMips64()) {
+ // Reserve all registers in AFGR64.
+ for (RegIter Reg = Mips::AFGR64RegClass.begin(),
+ EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
+ Reserved.set(*Reg);
+ } else {
+ // Reserve all registers in FGR64.
+ for (RegIter Reg = Mips::FGR64RegClass.begin(),
+ EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg)
+ Reserved.set(*Reg);
+ }
+ // Reserve FP if this function should have a dedicated frame pointer register.
+ if (MF.getTarget().getFrameLowering()->hasFP(MF)) {
+ if (Subtarget.inMips16Mode())
+ Reserved.set(Mips::S0);
+ else {
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::FP_64);
+ }
+ }
+
+ // Reserve hardware registers.
+ Reserved.set(Mips::HWR29);
+ Reserved.set(Mips::HWR29_64);
+
+ // Reserve DSP control register.
+ Reserved.set(Mips::DSPCtrl);
+
+ // Reserve RA if in mips16 mode.
+ if (Subtarget.inMips16Mode()) {
+ Reserved.set(Mips::RA);
+ Reserved.set(Mips::RA_64);
+ }
+
+ // Reserve GP if small section is used.
+ if (Subtarget.useSmallSection()) {
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::GP_64);
+ }
+
+ return Reserved;
+}
+
+bool
+MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool
+MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
+ errs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ uint64_t stackSize = MF.getFrameInfo()->getStackSize();
+ int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n");
+
+ eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool IsN64 = Subtarget.isABI_N64();
+
+ if (Subtarget.inMips16Mode())
+ return TFI->hasFP(MF) ? Mips::S0 : Mips::SP;
+ else
+ return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) :
+ (IsN64 ? Mips::SP_64 : Mips::SP);
+
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 000000000000..5ed51241391f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,82 @@
+//===-- MipsRegisterInfo.h - Mips Register Information Impl -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class Type;
+
+class MipsRegisterInfo : public MipsGenRegisterInfo {
+protected:
+ const MipsSubtarget &Subtarget;
+
+public:
+ MipsRegisterInfo(const MipsSubtarget &Subtarget);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// Mips::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Adjust the Mips stack frame.
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ /// Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ /// \brief Return GPR register class.
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const = 0;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 000000000000..64458bcef7ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,406 @@
+//===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_lo : SubRegIndex;
+def sub_hi : SubRegIndex;
+}
+
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
+// We have banks of 32 registers each.
+class MipsReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
+ let Namespace = "Mips";
+}
+
+class MipsRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ let HWEncoding = Enc;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<16> Enc, string n> : MipsReg<Enc, n>;
+
+// Mips 64-bit CPU Registers
+class Mips64GPRReg<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_32];
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
+
+// Mips 64-bit (aliased) FPU Registers
+class AFPR<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_fpeven, sub_fpodd];
+ let CoveredBySubRegs = 1;
+}
+
+class AFPR64<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_32];
+}
+
+// Accumulator Registers
+class ACC<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_lo, sub_hi];
+ let CoveredBySubRegs = 1;
+}
+
+// Mips Hardware Registers
+class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+ // General Purpose Registers
+ def ZERO : MipsGPRReg< 0, "zero">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "1">, DwarfRegNum<[1]>;
+ def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
+ def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
+ def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>;
+ def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>;
+ def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>;
+ def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>;
+ def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>;
+ def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>;
+ def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>;
+ def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>;
+ def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>;
+ def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>;
+ def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>;
+ def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>;
+ def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>;
+ def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>;
+ def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>;
+ def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>;
+ def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>;
+ def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>;
+ def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>;
+ def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>;
+ def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>;
+ def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
+ def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
+ def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
+ def GP : MipsGPRReg< 28, "gp">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "sp">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "fp">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "ra">, DwarfRegNum<[31]>;
+
+ // General Purpose 64-bit Registers
+ def ZERO_64 : Mips64GPRReg< 0, "zero", [ZERO]>, DwarfRegNum<[0]>;
+ def AT_64 : Mips64GPRReg< 1, "1", [AT]>, DwarfRegNum<[1]>;
+ def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>;
+ def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>;
+ def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>;
+ def A1_64 : Mips64GPRReg< 5, "5", [A1]>, DwarfRegNum<[5]>;
+ def A2_64 : Mips64GPRReg< 6, "6", [A2]>, DwarfRegNum<[6]>;
+ def A3_64 : Mips64GPRReg< 7, "7", [A3]>, DwarfRegNum<[7]>;
+ def T0_64 : Mips64GPRReg< 8, "8", [T0]>, DwarfRegNum<[8]>;
+ def T1_64 : Mips64GPRReg< 9, "9", [T1]>, DwarfRegNum<[9]>;
+ def T2_64 : Mips64GPRReg< 10, "10", [T2]>, DwarfRegNum<[10]>;
+ def T3_64 : Mips64GPRReg< 11, "11", [T3]>, DwarfRegNum<[11]>;
+ def T4_64 : Mips64GPRReg< 12, "12", [T4]>, DwarfRegNum<[12]>;
+ def T5_64 : Mips64GPRReg< 13, "13", [T5]>, DwarfRegNum<[13]>;
+ def T6_64 : Mips64GPRReg< 14, "14", [T6]>, DwarfRegNum<[14]>;
+ def T7_64 : Mips64GPRReg< 15, "15", [T7]>, DwarfRegNum<[15]>;
+ def S0_64 : Mips64GPRReg< 16, "16", [S0]>, DwarfRegNum<[16]>;
+ def S1_64 : Mips64GPRReg< 17, "17", [S1]>, DwarfRegNum<[17]>;
+ def S2_64 : Mips64GPRReg< 18, "18", [S2]>, DwarfRegNum<[18]>;
+ def S3_64 : Mips64GPRReg< 19, "19", [S3]>, DwarfRegNum<[19]>;
+ def S4_64 : Mips64GPRReg< 20, "20", [S4]>, DwarfRegNum<[20]>;
+ def S5_64 : Mips64GPRReg< 21, "21", [S5]>, DwarfRegNum<[21]>;
+ def S6_64 : Mips64GPRReg< 22, "22", [S6]>, DwarfRegNum<[22]>;
+ def S7_64 : Mips64GPRReg< 23, "23", [S7]>, DwarfRegNum<[23]>;
+ def T8_64 : Mips64GPRReg< 24, "24", [T8]>, DwarfRegNum<[24]>;
+ def T9_64 : Mips64GPRReg< 25, "25", [T9]>, DwarfRegNum<[25]>;
+ def K0_64 : Mips64GPRReg< 26, "26", [K0]>, DwarfRegNum<[26]>;
+ def K1_64 : Mips64GPRReg< 27, "27", [K1]>, DwarfRegNum<[27]>;
+ def GP_64 : Mips64GPRReg< 28, "gp", [GP]>, DwarfRegNum<[28]>;
+ def SP_64 : Mips64GPRReg< 29, "sp", [SP]>, DwarfRegNum<[29]>;
+ def FP_64 : Mips64GPRReg< 30, "fp", [FP]>, DwarfRegNum<[30]>;
+ def RA_64 : Mips64GPRReg< 31, "ra", [RA]>, DwarfRegNum<[31]>;
+
+ /// Mips Single point precision FPU Registers
+ def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "f10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "f11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "f12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "f13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "f14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "f15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "f16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "f17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "f18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "f19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "f20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "f21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "f22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "f23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "f24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "f25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "f26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "f27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "f28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "f29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "f30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "f31">, DwarfRegNum<[63]>;
+
+ /// Mips Double point precision FPU Registers (aliased
+ /// with the single precision to hold 64 bit values)
+ def D0 : AFPR< 0, "f0", [F0, F1]>;
+ def D1 : AFPR< 2, "f2", [F2, F3]>;
+ def D2 : AFPR< 4, "f4", [F4, F5]>;
+ def D3 : AFPR< 6, "f6", [F6, F7]>;
+ def D4 : AFPR< 8, "f8", [F8, F9]>;
+ def D5 : AFPR<10, "f10", [F10, F11]>;
+ def D6 : AFPR<12, "f12", [F12, F13]>;
+ def D7 : AFPR<14, "f14", [F14, F15]>;
+ def D8 : AFPR<16, "f16", [F16, F17]>;
+ def D9 : AFPR<18, "f18", [F18, F19]>;
+ def D10 : AFPR<20, "f20", [F20, F21]>;
+ def D11 : AFPR<22, "f22", [F22, F23]>;
+ def D12 : AFPR<24, "f24", [F24, F25]>;
+ def D13 : AFPR<26, "f26", [F26, F27]>;
+ def D14 : AFPR<28, "f28", [F28, F29]>;
+ def D15 : AFPR<30, "f30", [F30, F31]>;
+
+ /// Mips Double point precision FPU Registers in MFP64 mode.
+ def D0_64 : AFPR64<0, "f0", [F0]>, DwarfRegNum<[32]>;
+ def D1_64 : AFPR64<1, "f1", [F1]>, DwarfRegNum<[33]>;
+ def D2_64 : AFPR64<2, "f2", [F2]>, DwarfRegNum<[34]>;
+ def D3_64 : AFPR64<3, "f3", [F3]>, DwarfRegNum<[35]>;
+ def D4_64 : AFPR64<4, "f4", [F4]>, DwarfRegNum<[36]>;
+ def D5_64 : AFPR64<5, "f5", [F5]>, DwarfRegNum<[37]>;
+ def D6_64 : AFPR64<6, "f6", [F6]>, DwarfRegNum<[38]>;
+ def D7_64 : AFPR64<7, "f7", [F7]>, DwarfRegNum<[39]>;
+ def D8_64 : AFPR64<8, "f8", [F8]>, DwarfRegNum<[40]>;
+ def D9_64 : AFPR64<9, "f9", [F9]>, DwarfRegNum<[41]>;
+ def D10_64 : AFPR64<10, "f10", [F10]>, DwarfRegNum<[42]>;
+ def D11_64 : AFPR64<11, "f11", [F11]>, DwarfRegNum<[43]>;
+ def D12_64 : AFPR64<12, "f12", [F12]>, DwarfRegNum<[44]>;
+ def D13_64 : AFPR64<13, "f13", [F13]>, DwarfRegNum<[45]>;
+ def D14_64 : AFPR64<14, "f14", [F14]>, DwarfRegNum<[46]>;
+ def D15_64 : AFPR64<15, "f15", [F15]>, DwarfRegNum<[47]>;
+ def D16_64 : AFPR64<16, "f16", [F16]>, DwarfRegNum<[48]>;
+ def D17_64 : AFPR64<17, "f17", [F17]>, DwarfRegNum<[49]>;
+ def D18_64 : AFPR64<18, "f18", [F18]>, DwarfRegNum<[50]>;
+ def D19_64 : AFPR64<19, "f19", [F19]>, DwarfRegNum<[51]>;
+ def D20_64 : AFPR64<20, "f20", [F20]>, DwarfRegNum<[52]>;
+ def D21_64 : AFPR64<21, "f21", [F21]>, DwarfRegNum<[53]>;
+ def D22_64 : AFPR64<22, "f22", [F22]>, DwarfRegNum<[54]>;
+ def D23_64 : AFPR64<23, "f23", [F23]>, DwarfRegNum<[55]>;
+ def D24_64 : AFPR64<24, "f24", [F24]>, DwarfRegNum<[56]>;
+ def D25_64 : AFPR64<25, "f25", [F25]>, DwarfRegNum<[57]>;
+ def D26_64 : AFPR64<26, "f26", [F26]>, DwarfRegNum<[58]>;
+ def D27_64 : AFPR64<27, "f27", [F27]>, DwarfRegNum<[59]>;
+ def D28_64 : AFPR64<28, "f28", [F28]>, DwarfRegNum<[60]>;
+ def D29_64 : AFPR64<29, "f29", [F29]>, DwarfRegNum<[61]>;
+ def D30_64 : AFPR64<30, "f30", [F30]>, DwarfRegNum<[62]>;
+ def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
+
+ // Hi/Lo registers
+ def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
+ def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
+ def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
+ def LO : Register<"lo">, DwarfRegNum<[65]>;
+ def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
+ def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
+ def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
+
+ let SubRegIndices = [sub_32] in {
+ def HI64 : RegisterWithSubRegs<"hi", [HI]>;
+ def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ }
+
+ // Status flags register
+ def FCR31 : Register<"31">;
+
+ // fcc0 register
+ def FCC0 : MipsReg<0, "fcc0">;
+
+ // PC register
+ def PC : Register<"pc">;
+
+ // Hardware register $29
+ def HWR29 : MipsReg<29, "29">;
+ def HWR29_64 : MipsReg<29, "29">;
+
+ // Accum registers
+ def AC0 : ACC<0, "ac0", [LO, HI]>;
+ def AC1 : ACC<1, "ac1", [LO1, HI1]>;
+ def AC2 : ACC<2, "ac2", [LO2, HI2]>;
+ def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+
+ def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
+
+ def DSPCtrl : Register<"dspctrl">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+class CPURegsClass<list<ValueType> regTypes> :
+ RegisterClass<"Mips", regTypes, 32, (add
+ // Reserved
+ ZERO, AT,
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Not preserved across procedure calls
+ T8, T9,
+ // Reserved
+ K0, K1, GP, SP, FP, RA)>;
+
+def CPURegs : CPURegsClass<[i32]>;
+def DSPRegs : CPURegsClass<[v4i8, v2i16]>;
+
+def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add
+// Reserved
+ ZERO_64, AT_64,
+ // Return Values and Arguments
+ V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
+ // Not preserved across procedure calls
+ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64,
+ // Callee save
+ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64,
+ // Not preserved across procedure calls
+ T8_64, T9_64,
+ // Reserved
+ K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)>;
+
+def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Callee save
+ S0, S1)>;
+
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
+
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
+
+// 64bit fp:
+// * FGR64 - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
+ // Return Values and Arguments
+ D0, D1,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5,
+ // Return Values and Arguments
+ D6, D7,
+ // Not preserved across procedure calls
+ D8, D9,
+ // Callee save
+ D10, D11, D12, D13, D14, D15)>;
+
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
+
+// Condition Register for floating point operations
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
+
+// Hardware registers
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
+
+// Accumulator Registers
+def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+ let Size = 64;
+}
+
+def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+ let Size = 128;
+}
+
+def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+ let Size = 64;
+}
+
+def CPURegsAsmOperand : AsmOperandClass {
+ let Name = "CPURegsAsm";
+ let ParserMethod = "parseCPURegs";
+}
+
+def CPU64RegsAsmOperand : AsmOperandClass {
+ let Name = "CPU64RegsAsm";
+ let ParserMethod = "parseCPU64Regs";
+}
+
+def CCRAsmOperand : AsmOperandClass {
+ let Name = "CCRAsm";
+ let ParserMethod = "parseCCRRegs";
+}
+
+def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> {
+ let ParserMatchClass = CPURegsAsmOperand;
+}
+
+def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> {
+ let ParserMatchClass = CPU64RegsAsmOperand;
+}
+
+def CCROpnd : RegisterOperand<CCR, "printCPURegs"> {
+ let ParserMatchClass = CCRAsmOperand;
+}
+
+def HWRegsAsmOperand : AsmOperandClass {
+ let Name = "HWRegsAsm";
+ let ParserMethod = "parseHWRegs";
+}
+
+def HW64RegsAsmOperand : AsmOperandClass {
+ let Name = "HW64RegsAsm";
+ let ParserMethod = "parseHW64Regs";
+}
+
+def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+ let ParserMatchClass = HWRegsAsmOperand;
+}
+
+def HW64RegsOpnd : RegisterOperand<HWRegs64, "printCPURegs"> {
+ let ParserMatchClass = HW64RegsAsmOperand;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRelocations.h b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
new file mode 100644
index 000000000000..0787ed399d5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRelocations.h
@@ -0,0 +1,41 @@
+//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips target-specific relocation types
+// (for relocation-model=static).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSRELOCATIONS_H_
+#define MIPSRELOCATIONS_H_
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Mips{
+ enum RelocationType {
+ // reloc_mips_pc16 - pc relative relocation for branches. The lower 18
+ // bits of the difference between the branch target and the branch
+ // instruction, shifted right by 2.
+ reloc_mips_pc16 = 1,
+
+ // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
+ // lower 16 bits of the address is negative).
+ reloc_mips_hi = 2,
+
+ // reloc_mips_lo - lower 16 bits of the address.
+ reloc_mips_lo = 3,
+
+ // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
+ reloc_mips_26 = 4
+ };
+ }
+}
+
+#endif /* MIPSRELOCATIONS_H_ */
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
new file mode 100644
index 000000000000..68ec92188802
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -0,0 +1,468 @@
+//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsSEInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+namespace {
+typedef MachineBasicBlock::iterator Iter;
+
+/// Helper class to expand accumulator pseudos.
+class ExpandACCPseudo {
+public:
+ ExpandACCPseudo(MachineFunction &MF);
+ bool expand();
+
+private:
+ bool expandInstr(MachineBasicBlock &MBB, Iter I);
+ void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+
+ MachineFunction &MF;
+ const MipsSEInstrInfo &TII;
+ const MipsRegisterInfo &RegInfo;
+ MachineRegisterInfo &MRI;
+};
+}
+
+ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ : MF(MF_),
+ TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
+ RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
+
+bool ExpandACCPseudo::expand() {
+ bool Expanded = false;
+
+ for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
+ BB != BBEnd; ++BB)
+ for (Iter I = BB->begin(), End = BB->end(); I != End;)
+ Expanded |= expandInstr(*BB, I++);
+
+ return Expanded;
+}
+
+bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+ switch(I->getOpcode()) {
+ case Mips::LOAD_AC64:
+ case Mips::LOAD_AC64_P8:
+ case Mips::LOAD_AC_DSP:
+ case Mips::LOAD_AC_DSP_P8:
+ expandLoad(MBB, I, 4);
+ break;
+ case Mips::LOAD_AC128:
+ case Mips::LOAD_AC128_P8:
+ expandLoad(MBB, I, 8);
+ break;
+ case Mips::STORE_AC64:
+ case Mips::STORE_AC64_P8:
+ case Mips::STORE_AC_DSP:
+ case Mips::STORE_AC_DSP_P8:
+ expandStore(MBB, I, 4);
+ break;
+ case Mips::STORE_AC128:
+ case Mips::STORE_AC128_P8:
+ expandStore(MBB, I, 8);
+ break;
+ case Mips::COPY_AC64:
+ case Mips::COPY_AC_DSP:
+ expandCopy(MBB, I, 4);
+ break;
+ case Mips::COPY_AC128:
+ expandCopy(MBB, I, 8);
+ break;
+ default:
+ return false;
+ }
+
+ MBB.erase(I);
+ return true;
+}
+
+void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // load $vr0, FI
+ // copy lo, $vr0
+ // load $vr1, FI + 4
+ // copy hi, $vr1
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+
+ TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill);
+ TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize);
+ BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
+}
+
+void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, lo
+ // store $vr0, FI
+ // copy $vr1, hi
+ // store $vr1, FI + 4
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
+ unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+ TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+ TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
+}
+
+void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, src_lo
+ // copy dst_lo, $vr0
+ // copy $vr1, src_hi
+ // copy dst_hi, $vr1
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+ unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
+ unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
+ .addReg(VR0, RegState::Kill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
+ .addReg(VR1, RegState::Kill);
+}
+
+unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
+ static const unsigned EhDataReg[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned EhDataReg64[] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
+ };
+
+ return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I];
+}
+
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // First, compute final stack size.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegClass.contains(Reg)) {
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
+ MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ } else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Insert instructions that spill eh data registers.
+ for (int I = 0; I < 4; ++I) {
+ if (!MBB.isLiveIn(ehDataReg(I)))
+ MBB.addLiveIn(ehDataReg(I));
+ TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false,
+ MipsFI->getEhDataRegFI(I), RC, RegInfo);
+ }
+
+ // Emit .cfi_offset directives for eh data registers.
+ MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2);
+ for (int I = 0; I < 4; ++I) {
+ int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(ehDataReg(I));
+ Moves.push_back(MachineMove(CSLabel2, DstML, SrcML));
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+}
+
+void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ }
+
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Find first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instructions that restore eh data registers.
+ for (int J = 0; J < 4; ++J) {
+ TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J),
+ RC, RegInfo);
+ }
+ }
+
+ // Get the number of bytes from FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
+}
+
+bool MipsSEFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *EntryBlock = MF->begin();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. Do not add if the register is
+ // RA and return address is taken, because it has already been added in
+ // method MipsTargetLowering::LowerRETURNADDR.
+ // It's killed at the spill, unless the register is RA and return address
+ // is taken.
+ unsigned Reg = CSI[i].getReg();
+ bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
+ && MF->getFrameInfo()->isReturnAddressTaken();
+ if (!IsRAAndRetAddrIsTaken)
+ EntryBlock->addLiveIn(Reg);
+
+ // Insert the spill to the stack frame.
+ bool IsKill = !IsRAAndRetAddrIsTaken;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool
+MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Reserve call frame if the size of the maximum call frame fits into 16-bit
+ // immediate field and there are no variable sized objects on the stack.
+ // Make sure the second register scavenger spill slot can be accessed with one
+ // instruction.
+ return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
+ !MFI->hasVarSizedObjects();
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsSEFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ TII.adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
+void MipsSEFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+
+ // Mark $fp as used if function has dedicated frame pointer.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(FP);
+
+ // Create spill slots for eh data registers if function calls eh_return.
+ if (MipsFI->callsEhReturn())
+ MipsFI->createEhDataRegsFI();
+
+ // Expand pseudo instructions which load, store or copy accumulators.
+ // Add an emergency spill slot if a pseudo was expanded.
+ if (ExpandACCPseudo(MF).expand()) {
+ // The spill slot should be half the size of the accumulator. If target is
+ // mips64, it should be 64-bit, otherwise it should be 32-bt.
+ const TargetRegisterClass *RC = STI.hasMips64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+ int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ RS->addScavengingFrameIndex(FI);
+ }
+
+ // Set scavenging frame index if necessary.
+ uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
+ estimateStackSize(MF);
+
+ if (isInt<16>(MaxSPOffset))
+ return;
+
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+ int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ RS->addScavengingFrameIndex(FI);
+}
+
+const MipsFrameLowering *
+llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) {
+ return new MipsSEFrameLowering(ST);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
new file mode 100644
index 000000000000..193a66cc65a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -0,0 +1,49 @@
+//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSE_FRAMEINFO_H
+#define MIPSSE_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+
+class MipsSEFrameLowering : public MipsFrameLowering {
+public:
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+ unsigned ehDataReg(unsigned I) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 000000000000..d6d220750c61
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,473 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+
+ case MipsISD::InsertLOHI: {
+ unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
+ Mips::ACRegsRegClassID;
+ SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
+ SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
+ SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
+ const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
+ Node->getOperand(1), HiIdx };
+ SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops, 5);
+ return std::make_pair(true, Res);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 000000000000..6137ab040bbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 000000000000..4f219218d31f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,442 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Custom);
+
+ if (HasMips64)
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch(Op.getOpcode()) {
+ case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
+ case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
+ case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
+ case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
+ case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
+ case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
+ case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+ }
+
+ return MipsTargetLowering::LowerOperation(Op, DAG);
+}
+
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = ADDENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ ADDCNode->getOperand(1),
+ ADDENode->getOperand(1));
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+ }
+ if (!SDValue(ADDENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = SUBENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SUBCNode->getOperand(0),
+ SUBENode->getOperand(0));
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+ }
+ if (!SDValue(SUBENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMADD(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMSUB(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue
+MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ case ISD::ADDE:
+ return performADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return performSUBECombine(N, DAG, DCI, Subtarget);
+ default:
+ return MipsTargetLowering::PerformDAGCombine(N, DCI);
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
+ bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const {
+ EVT Ty = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Lo, Hi;
+
+ if (HasLo)
+ Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ if (HasHi)
+ Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+
+ if (!HasLo || !HasHi)
+ return HasLo ? Lo : Hi;
+
+ SDValue Vals[] = { Lo, Hi };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 000000000000..186f6a343dee
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,62 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
+ &Mips::ACRegsRegClass;
+
+ return TargetLowering::getRepRegClassFor(VT);
+ }
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
new file mode 100644
index 000000000000..ca0315ed9f6e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -0,0 +1,410 @@
+//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEInstrInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm,
+ tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
+ RI(*tm.getSubtargetImpl(), *this),
+ IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+
+const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsSEInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsSEInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::OR, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ Opc = Mips::MFC1;
+ else if (SrcReg == Mips::HI)
+ Opc = Mips::MFHI, SrcReg = 0;
+ else if (SrcReg == Mips::LO)
+ Opc = Mips::MFLO, SrcReg = 0;
+ }
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+ if (Mips::CCRRegClass.contains(DestReg))
+ Opc = Mips::CTC1;
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ Opc = Mips::MTC1;
+ else if (DestReg == Mips::HI)
+ Opc = Mips::MTHI, DestReg = 0;
+ else if (DestReg == Mips::LO)
+ Opc = Mips::MTLO, DestReg = 0;
+ }
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D64;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
+ else if (Mips::FGR64RegClass.contains(SrcReg))
+ Opc = Mips::DMFC1;
+ }
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
+ else if (Mips::FGR64RegClass.contains(DestReg))
+ Opc = Mips::DMTC1;
+ }
+ else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC64;
+ else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC_DSP;
+ else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC128;
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+}
+
+void MipsSEInstrInfo::
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+}
+
+void MipsSEInstrInfo::
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
+ .addMemOperand(MMO);
+}
+
+bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA:
+ ExpandRetRA(MBB, MI, Mips::RET);
+ break;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, MI);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, MI);
+ break;
+ case Mips::MIPSeh_return32:
+ case Mips::MIPSeh_return64:
+ ExpandEhReturn(MBB, MI);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ: return Mips::BNE;
+ case Mips::BNE: return Mips::BEQ;
+ case Mips::BGTZ: return Mips::BLEZ;
+ case Mips::BGEZ: return Mips::BLTZ;
+ case Mips::BLTZ: return Mips::BGEZ;
+ case Mips::BLEZ: return Mips::BGTZ;
+ case Mips::BEQ64: return Mips::BNE64;
+ case Mips::BNE64: return Mips::BEQ64;
+ case Mips::BGTZ64: return Mips::BLEZ64;
+ case Mips::BGEZ64: return Mips::BLTZ64;
+ case Mips::BLTZ64: return Mips::BGEZ64;
+ case Mips::BLEZ64: return Mips::BGTZ64;
+ case Mips::BC1T: return Mips::BC1F;
+ case Mips::BC1F: return Mips::BC1T;
+ }
+}
+
+/// Adjust SP by Amount bytes.
+void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+
+ if (isInt<16>(Amount))// addi sp, sp, amount
+ BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0);
+ BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill);
+ }
+}
+
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const {
+ MipsAnalyzeImmediate AnalyzeImm;
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+ unsigned Size = STI.isABI_N64() ? 64 : 32;
+ unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi;
+ unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+ bool LastInstrIsADDiu = NewImm;
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1)));
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ unsigned Reg = RegInfo.createVirtualRegister(RC);
+
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ // Build the remaining instructions in Seq.
+ for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst)
+ BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(Reg, RegState::Kill)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ if (LastInstrIsADDiu)
+ *NewImm = Inst->ImmOpnd;
+
+ return Reg;
+}
+
+unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
+ Opc : 0;
+}
+
+void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+}
+
+void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+
+ assert(N < 2 && "Invalid immediate");
+ unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+}
+
+void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
+ .addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
+ .addReg(HiReg);
+}
+
+void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // This pseudo instruction is generated as part of the lowering of
+ // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
+ // indirect jump to TargetReg
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR;
+ unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+ unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned OffsetReg = I->getOperand(0).getReg();
+ unsigned TargetReg = I->getOperand(1).getReg();
+
+ // or $ra, $v0, $zero
+ // addu $sp, $sp, $v1
+ // jr $ra
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), T9)
+ .addReg(TargetReg).addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA)
+ .addReg(TargetReg).addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
+ .addReg(SP).addReg(OffsetReg);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+}
+
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
+ return new MipsSEInstrInfo(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
new file mode 100644
index 000000000000..0bf7876f0fe0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -0,0 +1,96 @@
+//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEINSTRUCTIONINFO_H
+#define MIPSSEINSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "MipsSERegisterInfo.h"
+
+namespace llvm {
+
+class MipsSEInstrInfo : public MipsInstrInfo {
+ const MipsSERegisterInfo RI;
+ bool IsN64;
+
+public:
+ explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+ /// Adjust SP by Amount bytes.
+ void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Emit a series of instructions to load an immediate. If NewImm is a
+ /// non-NULL parameter, the last instruction is not emitted, but instead
+ /// its immediate operand is returned in NewImm.
+ unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+ void ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
new file mode 100644
index 000000000000..96967380b29d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -0,0 +1,134 @@
+//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSERegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
+ const MipsSEInstrInfo &I)
+ : MipsRegisterInfo(ST), TII(I) {}
+
+bool MipsSERegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool MipsSERegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+const TargetRegisterClass *
+MipsSERegisterInfo::intRegClass(unsigned Size) const {
+ if (Size == 4)
+ return &Mips::CPURegsRegClass;
+
+ assert(Size == 8);
+ return &Mips::CPU64RegsRegClass;
+}
+
+void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // 4. Locations for eh data registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ bool IsKill = false;
+ int64_t Offset;
+
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && !isInt<16>(Offset)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned NewImm;
+
+ unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
+ BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+
+ FrameReg = Reg;
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
+ }
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h
new file mode 100644
index 000000000000..2f7c37bb460d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h
@@ -0,0 +1,44 @@
+//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEREGISTERINFO_H
+#define MIPSSEREGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+class MipsSEInstrInfo;
+
+class MipsSERegisterInfo : public MipsRegisterInfo {
+ const MipsSEInstrInfo &TII;
+
+public:
+ MipsSERegisterInfo(const MipsSubtarget &Subtarget,
+ const MipsSEInstrInfo &TII);
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 000000000000..1add02ff83e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..e4d70fcec591
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsTargetMachine.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 000000000000..6cafb558b35a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsTargetMachine;
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+ ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 000000000000..e11e5d142b74
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,74 @@
+//===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+void MipsSubtarget::anchor() { }
+
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little,
+ Reloc::Model _RM) :
+ MipsGenSubtargetInfo(TT, CPU, FS),
+ MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
+ IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
+ IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
+ HasBitCount(false), HasFPIdx(false),
+ InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ RM(_RM)
+{
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mips32";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Set MipsABI if it hasn't been set yet.
+ if (MipsABI == UnknownABI)
+ MipsABI = hasMips64() ? N64 : O32;
+
+ // Check if Architecture and ABI are compatible.
+ assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
+ (hasMips64() && (isABI_N32() || isABI_N64()))) &&
+ "Invalid Arch & ABI pair.");
+
+ // Is the target system Linux ?
+ if (TT.find("linux") == std::string::npos)
+ IsLinux = false;
+
+ // Set UseSmallSection.
+ UseSmallSection = !IsLinux && (RM == Reloc::Static);
+}
+
+bool
+MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ RegClassVector &CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(hasMips64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
+ return OptLevel >= CodeGenOpt::Aggressive;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 000000000000..7a2e47ce5a9d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,164 @@
+//===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "MCTargetDesc/MipsReginfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MipsSubtarget : public MipsGenSubtargetInfo {
+ virtual void anchor();
+
+public:
+ // NOTE: O64 will not be supported.
+ enum MipsABIEnum {
+ UnknownABI, O32, N32, N64, EABI
+ };
+
+protected:
+
+ enum MipsArchEnum {
+ Mips32, Mips32r2, Mips64, Mips64r2
+ };
+
+ // Mips architecture version
+ MipsArchEnum MipsArchVersion;
+
+ // Mips supported ABIs
+ MipsABIEnum MipsABI;
+
+ // IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ // IsSingleFloat - The target only supports single precision float
+ // point operations. This enable the target to use all 32 32-bit
+ // floating point registers instead of only using even ones.
+ bool IsSingleFloat;
+
+ // IsFP64bit - The target processor has 64-bit floating point registers.
+ bool IsFP64bit;
+
+ // IsFP64bit - General-purpose registers are 64 bits wide
+ bool IsGP64bit;
+
+ // HasVFPU - Processor has a vector floating point unit.
+ bool HasVFPU;
+
+ // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+ bool IsLinux;
+
+ // UseSmallSection - Small section is used.
+ bool UseSmallSection;
+
+ /// Features related to the presence of specific instructions.
+
+ // HasSEInReg - SEB and SEH (signext in register) instructions.
+ bool HasSEInReg;
+
+ // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+ bool HasCondMov;
+
+ // HasSwap - Byte and half swap instructions.
+ bool HasSwap;
+
+ // HasBitCount - Count leading '1' and '0' bits.
+ bool HasBitCount;
+
+ // HasFPIdx -- Floating point indexed load/store instructions.
+ bool HasFPIdx;
+
+ // InMips16 -- can process Mips16 instructions
+ bool InMips16Mode;
+
+ // InMicroMips -- can process MicroMips instructions
+ bool InMicroMipsMode;
+
+ // HasDSP, HasDSPR2 -- supports DSP ASE.
+ bool HasDSP, HasDSPR2;
+
+ InstrItineraryData InstrItins;
+
+ // The instance to the register info section object
+ MipsReginfo MRI;
+
+ // Relocation Model
+ Reloc::Model RM;
+
+public:
+ virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// Only O32 and EABI supported right now.
+ bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_N64() const { return MipsABI == N64; }
+ bool isABI_N32() const { return MipsABI == N32; }
+ bool isABI_O32() const { return MipsABI == O32; }
+ unsigned getTargetABI() const { return MipsABI; }
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little, Reloc::Model RM);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasMips32() const { return MipsArchVersion >= Mips32; }
+ bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
+ MipsArchVersion == Mips64r2; }
+ bool hasMips64() const { return MipsArchVersion >= Mips64; }
+ bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+
+ bool isLittle() const { return IsLittle; }
+ bool isFP64bit() const { return IsFP64bit; }
+ bool isGP64bit() const { return IsGP64bit; }
+ bool isGP32bit() const { return !IsGP64bit; }
+ bool isSingleFloat() const { return IsSingleFloat; }
+ bool isNotSingleFloat() const { return !IsSingleFloat; }
+ bool hasVFPU() const { return HasVFPU; }
+ bool inMips16Mode() const { return InMips16Mode; }
+ bool inMicroMipsMode() const { return InMicroMipsMode; }
+ bool hasDSP() const { return HasDSP; }
+ bool hasDSPR2() const { return HasDSPR2; }
+ bool isLinux() const { return IsLinux; }
+ bool useSmallSection() const { return UseSmallSection; }
+
+ bool hasStandardEncoding() const { return !inMips16Mode(); }
+
+ /// Features related to the presence of specific instructions.
+ bool hasSEInReg() const { return HasSEInReg; }
+ bool hasCondMov() const { return HasCondMov; }
+ bool hasSwap() const { return HasSwap; }
+ bool hasBitCount() const { return HasBitCount; }
+ bool hasFPIdx() const { return HasFPIdx; }
+
+ // Grab MipsRegInfo object
+ const MipsReginfo &getMReginfo() const { return MRI; }
+
+ // Grab relocation model
+ Reloc::Model getRelocationModel() const {return RM;}
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 000000000000..33363580aba7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,130 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetMachine.h"
+#include "Mips.h"
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMipsTarget() {
+ // Register the target.
+ RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
+ RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+ RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle, RM),
+ DL(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
+ InstrInfo(MipsInstrInfo::create(*this)),
+ FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
+}
+
+void MipsebTargetMachine::anchor() { }
+
+MipsebTargetMachine::
+MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void MipselTargetMachine::anchor() { }
+
+MipselTargetMachine::
+MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+namespace {
+/// Mips Code Generator Pass Configuration Options.
+class MipsPassConfig : public TargetPassConfig {
+public:
+ MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MipsTargetMachine &getMipsTargetMachine() const {
+ return getTM<MipsTargetMachine>();
+ }
+
+ const MipsSubtarget &getMipsSubtarget() const {
+ return *getMipsTargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MipsPassConfig(this, PM);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsPassConfig::addInstSelector() {
+ addPass(createMipsISelDag(getMipsTargetMachine()));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsPassConfig::addPreEmitPass() {
+ MipsTargetMachine &TM = getMipsTargetMachine();
+ addPass(createMipsDelaySlotFillerPass(TM));
+
+ // NOTE: long branch has not been implemented for mips16.
+ if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+ addPass(createMipsLongBranchPass(TM));
+ if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ addPass(createMipsConstantIslandPass(TM));
+
+ return true;
+}
+
+bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Mips.
+ PM.add(createMipsJITCodeEmitterPass(*this, JCE));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 000000000000..7e5f19226433
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,102 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsFrameLowering.h"
+#include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsJITInfo.h"
+#include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class formatted_raw_ostream;
+class MipsRegisterInfo;
+
+class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ OwningPtr<const MipsInstrInfo> InstrInfo;
+ OwningPtr<const MipsFrameLowering> FrameLowering;
+ OwningPtr<const MipsTargetLowering> TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+
+public:
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool isLittle);
+
+ virtual ~MipsTargetMachine() {}
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return InstrInfo.get(); }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return FrameLowering.get(); }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const DataLayout *getDataLayout() const
+ { return &DL;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return TLInfo.get();
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+};
+
+/// MipsebTargetMachine - Mips32/64 big endian target machine.
+///
+class MipsebTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
+public:
+ MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// MipselTargetMachine - Mips32/64 little endian target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
+public:
+ MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 000000000000..4c748c5b57cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,118 @@
+//===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+ cl::desc("Small data and bss section threshold size (default=8)"),
+ cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ // Register info information
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if (Subtarget.isABI_N64() || Subtarget.isABI_N32())
+ ReginfoSection =
+ getContext().getELFSection(".MIPS.options",
+ ELF::SHT_MIPS_OPTIONS,
+ ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP,
+ SectionKind::getMetadata());
+ else
+ ReginfoSection =
+ getContext().getELFSection(".reginfo",
+ ELF::SHT_MIPS_REGINFO,
+ ELF::SHF_ALLOC,
+ SectionKind::getMetadata());
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ // Return if small section is not available.
+ if (!Subtarget.useSmallSection())
+ return false;
+
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 000000000000..c0e9140c829c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,43 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ const MCSection *ReginfoSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM, SectionKind Kind)const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+
+ // TODO: Classify globals as mips wishes.
+ const MCSection *getReginfoSection() const { return ReginfoSection; }
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 000000000000..3615c146a527
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,31 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+Target llvm::TheMips64Target, llvm::TheMips64elTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() {
+ RegisterTarget<Triple::mips,
+ /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips");
+
+ RegisterTarget<Triple::mipsel,
+ /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
+
+ RegisterTarget<Triple::mips64,
+ /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+
+ RegisterTarget<Triple::mips64el,
+ /*HasJIT=*/false> B(TheMips64elTarget,
+ "mips64el", "Mips64el [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
new file mode 100644
index 000000000000..10051c768058
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -0,0 +1 @@
+// Placeholder
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
new file mode 100644
index 000000000000..b3e8b5d2622d
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -0,0 +1,86 @@
+//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the NVPTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXBASEINFO_H
+#define NVPTXBASEINFO_H
+
+namespace llvm {
+
+enum AddressSpace {
+ ADDRESS_SPACE_GENERIC = 0,
+ ADDRESS_SPACE_GLOBAL = 1,
+ ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
+ ADDRESS_SPACE_SHARED = 3,
+ ADDRESS_SPACE_CONST = 4,
+ ADDRESS_SPACE_LOCAL = 5,
+
+ // NVVM Internal
+ ADDRESS_SPACE_PARAM = 101
+};
+
+enum PropertyAnnotation {
+ PROPERTY_MAXNTID_X = 0,
+ PROPERTY_MAXNTID_Y,
+ PROPERTY_MAXNTID_Z,
+ PROPERTY_REQNTID_X,
+ PROPERTY_REQNTID_Y,
+ PROPERTY_REQNTID_Z,
+ PROPERTY_MINNCTAPERSM,
+ PROPERTY_ISTEXTURE,
+ PROPERTY_ISSURFACE,
+ PROPERTY_ISSAMPLER,
+ PROPERTY_ISREADONLY_IMAGE_PARAM,
+ PROPERTY_ISWRITEONLY_IMAGE_PARAM,
+ PROPERTY_ISKERNEL_FUNCTION,
+ PROPERTY_ALIGN,
+
+ // last property
+ PROPERTY_LAST
+};
+
+const unsigned AnnotationNameLen = 8; // length of each annotation name
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+ "maxntidx", // PROPERTY_MAXNTID_X
+ "maxntidy", // PROPERTY_MAXNTID_Y
+ "maxntidz", // PROPERTY_MAXNTID_Z
+ "reqntidx", // PROPERTY_REQNTID_X
+ "reqntidy", // PROPERTY_REQNTID_Y
+ "reqntidz", // PROPERTY_REQNTID_Z
+ "minctasm", // PROPERTY_MINNCTAPERSM
+ "texture", // PROPERTY_ISTEXTURE
+ "surface", // PROPERTY_ISSURFACE
+ "sampler", // PROPERTY_ISSAMPLER
+ "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
+ "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "kernel", // PROPERTY_ISKERNEL_FUNCTION
+ "align", // PROPERTY_ALIGN
+
+ // last property
+ "proplast", // PROPERTY_LAST
+};
+
+// name of named metadata used for global annotations
+#if defined(__GNUC__)
+// As this is declared to be static but some of the .cpp files that
+// include NVVM.h do not use this array, gcc gives a warning when
+// compiling those .cpp files, hence __attribute__((unused)).
+__attribute__((unused))
+#endif
+ static const char *NamedMDForAnnotations = "nvvm.annotations";
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
new file mode 100644
index 000000000000..459cd96cb0cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -0,0 +1,63 @@
+//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the NVPTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool CompileForDebugging;
+
+// -debug-compile - Command line option to inform opt and llc passes to
+// compile for debugging
+static cl::opt<bool, true>
+Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
+ cl::location(CompileForDebugging), cl::init(false));
+
+void NVPTXMCAsmInfo::anchor() {}
+
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::nvptx64) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasSingleParameterDotFile = false;
+
+ InlineAsmStart = " inline asm";
+ InlineAsmEnd = " inline asm";
+
+ SupportsDebugInformation = CompileForDebugging;
+ HasDotTypeDotSizeDirective = false;
+
+ Data8bitsDirective = " .b8 ";
+ Data16bitsDirective = " .b16 ";
+ Data32bitsDirective = " .b32 ";
+ Data64bitsDirective = " .b64 ";
+ PrivateGlobalPrefix = "";
+ ZeroDirective = " .b8";
+ AsciiDirective = " .b8";
+ AscizDirective = " .b8";
+
+ // @TODO: Can we just disable this?
+ GlobalDirective = "\t// .globl\t";
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
new file mode 100644
index 000000000000..82097daa4ae5
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVPTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_MCASM_INFO_H
+#define NVPTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class Target;
+class StringRef;
+
+class NVPTXMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+public:
+ explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
+};
+} // namespace llvm
+
+#endif // NVPTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
new file mode 100644
index 000000000000..ccd29705df72
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -0,0 +1,88 @@
+//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides NVPTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCTargetDesc.h"
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "NVPTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "NVPTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "NVPTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createNVPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitNVPTXMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitNVPTXMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+ StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
+ createNVPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
+ createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
+ createNVPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
+ createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
+ createNVPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
+ createNVPTXMCSubtargetInfo);
+
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
new file mode 100644
index 000000000000..af95c76f92b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -0,0 +1,36 @@
+//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides NVPTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXMCTARGETDESC_H
+#define NVPTXMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "NVPTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "NVPTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "NVPTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h b/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h
new file mode 100644
index 000000000000..d6c79b5110cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h
@@ -0,0 +1,48 @@
+//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The strings allocated from a managed string pool are owned by the string
+// pool and will be deleted together with the managed string pool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MANAGED_STRING_H
+#define LLVM_SUPPORT_MANAGED_STRING_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+
+namespace llvm {
+
+/// ManagedStringPool - The strings allocated from a managed string pool are
+/// owned by the string pool and will be deleted together with the managed
+/// string pool.
+class ManagedStringPool {
+ SmallVector<std::string *, 8> Pool;
+
+public:
+ ManagedStringPool() {}
+ ~ManagedStringPool() {
+ SmallVector<std::string *, 8>::iterator Current = Pool.begin();
+ while (Current != Pool.end()) {
+ delete *Current;
+ Current++;
+ }
+ }
+
+ std::string *getManagedString(const char *S) {
+ std::string *Str = new std::string(S);
+ Pool.push_back(Str);
+ return Str;
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
new file mode 100644
index 000000000000..6a53a443bfb6
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -0,0 +1,141 @@
+//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM NVPTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_H
+#define LLVM_TARGET_NVPTX_H
+
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <iosfwd>
+
+namespace llvm {
+class NVPTXTargetMachine;
+class FunctionPass;
+class formatted_raw_ostream;
+
+namespace NVPTXCC {
+enum CondCodes {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE
+};
+}
+
+inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
+ switch (CC) {
+ case NVPTXCC::NE:
+ return "ne";
+ case NVPTXCC::EQ:
+ return "eq";
+ case NVPTXCC::LT:
+ return "lt";
+ case NVPTXCC::LE:
+ return "le";
+ case NVPTXCC::GT:
+ return "gt";
+ case NVPTXCC::GE:
+ return "ge";
+ }
+ llvm_unreachable("Unknown condition code");
+}
+
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+
+bool isImageOrSamplerVal(const Value *, const Module *);
+
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
+
+namespace NVPTX {
+enum DrvInterface {
+ NVCL,
+ CUDA,
+ TEST
+};
+
+// A field inside TSFlags needs a shift and a mask. The usage is
+// always as follows :
+// ((TSFlags & fieldMask) >> fieldShift)
+// The enum keeps the mask, the shift, and all valid values of the
+// field in one place.
+enum VecInstType {
+ VecInstTypeShift = 0,
+ VecInstTypeMask = 0xF,
+
+ VecNOP = 0,
+ VecLoad = 1,
+ VecStore = 2,
+ VecBuild = 3,
+ VecShuffle = 4,
+ VecExtract = 5,
+ VecInsert = 6,
+ VecDest = 7,
+ VecOther = 15
+};
+
+enum SimpleMove {
+ SimpleMoveMask = 0x10,
+ SimpleMoveShift = 4
+};
+enum LoadStore {
+ isLoadMask = 0x20,
+ isLoadShift = 5,
+ isStoreMask = 0x40,
+ isStoreShift = 6
+};
+
+namespace PTXLdStInstCode {
+enum AddressSpace {
+ GENERIC = 0,
+ GLOBAL = 1,
+ CONSTANT = 2,
+ SHARED = 3,
+ PARAM = 4,
+ LOCAL = 5
+};
+enum FromType {
+ Unsigned = 0,
+ Signed,
+ Float
+};
+enum VecType {
+ Scalar = 1,
+ V2 = 2,
+ V4 = 4
+};
+}
+}
+} // end namespace llvm;
+
+// Defines symbolic names for NVPTX registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "NVPTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the NVPTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "NVPTXGenInstrInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
new file mode 100644
index 000000000000..d78b4e81a3e5
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -0,0 +1,62 @@
+//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the NVPTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "NVPTXRegisterInfo.td"
+include "NVPTXInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+// - We use the SM version number instead of explicit feature table.
+// - Need at least one feature to avoid generating zero sized array by
+// TableGen in NVPTXGenSubtarget.inc.
+//===----------------------------------------------------------------------===//
+
+// SM Versions
+def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
+ "Target SM 2.0">;
+def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
+ "Target SM 2.1">;
+def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
+ "Target SM 3.0">;
+def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
+ "Target SM 3.5">;
+
+// PTX Versions
+def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
+ "Use PTX version 3.0">;
+def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
+ "Use PTX version 3.1">;
+
+//===----------------------------------------------------------------------===//
+// NVPTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"sm_20", [SM20]>;
+def : Proc<"sm_21", [SM21]>;
+def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_35", [SM35]>;
+
+
+def NVPTXInstrInfo : InstrInfo {
+}
+
+def NVPTX : Target {
+ let InstructionSet = NVPTXInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
new file mode 100644
index 000000000000..0f792ec6826e
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -0,0 +1,46 @@
+//===-- AllocaHoisting.cpp - Hoist allocas to the entry block --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXAllocaHoisting.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
+ bool functionModified = false;
+ Function::iterator I = function.begin();
+ TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+
+ for (Function::iterator E = function.end(); I != E; ++I) {
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
+ if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
+ allocaInst->moveBefore(firstTerminatorInst);
+ functionModified = true;
+ }
+ }
+ }
+
+ return functionModified;
+}
+
+char NVPTXAllocaHoisting::ID = 1;
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+ "blocks to the entry block");
+
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
new file mode 100644
index 000000000000..19d73c5783cb
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -0,0 +1,49 @@
+//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_ALLOCA_HOISTING_H_
+#define NVPTX_ALLOCA_HOISTING_H_
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class FunctionPass;
+class Function;
+
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DataLayout>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+
+ virtual const char *getPassName() const {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ virtual bool runOnFunction(Function &function);
+};
+
+extern FunctionPass *createAllocaHoisting();
+
+} // end namespace llvm
+
+#endif // NVPTX_ALLOCA_HOISTING_H_
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
new file mode 100644
index 000000000000..ce5d78afa332
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -0,0 +1,2117 @@
+//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXAsmPrinter.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXNumRegisters.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXUtilities.h"
+#include "cl_common_defines.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <sstream>
+using namespace llvm;
+
+#include "NVPTXGenAsmWriter.inc"
+
+bool RegAllocNilUsed = true;
+
+#define DEPOTNAME "__local_depot"
+
+static cl::opt<bool>
+EmitLineNumbers("nvptx-emit-line-numbers",
+ cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
+ cl::init(true));
+
+namespace llvm { bool InterleaveSrcInPtx = false; }
+
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Emit source line in ptx file"),
+ cl::location(llvm::InterleaveSrcInPtx));
+
+namespace {
+/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
+/// depends.
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ Globals.insert(GV);
+ else {
+ if (User *U = dyn_cast<User>(V)) {
+ for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
+ DiscoverDependentGlobals(U->getOperand(i), Globals);
+ }
+ }
+ }
+}
+
+/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
+/// instances to be emitted, but only after any dependents have been added
+/// first.
+void VisitGlobalVariableForEmission(
+ GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+ DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+ // Have we already visited this one?
+ if (Visited.count(GV))
+ return;
+
+ // Do we have a circular dependency?
+ if (Visiting.count(GV))
+ report_fatal_error("Circular dependency found in global variable set");
+
+ // Start visiting this global
+ Visiting.insert(GV);
+
+ // Make sure we visit all dependents first
+ DenseSet<GlobalVariable *> Others;
+ for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
+ DiscoverDependentGlobals(GV->getOperand(i), Others);
+
+ for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
+ I != E; ++I)
+ VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
+
+ // Now we can visit ourself
+ Order.push_back(GV);
+ Visited.insert(GV);
+ Visiting.erase(GV);
+}
+}
+
+// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
+// cannot just link to the existing version.
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+using namespace nvptx;
+const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0)
+ llvm_unreachable("Unknown constant value to lower!");
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/ false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false /*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr =
+ MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add:
+ return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub:
+ return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul:
+ return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv:
+ return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem:
+ return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl:
+ return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And:
+ return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or:
+ return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+ case Instruction::Xor:
+ return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
+ if (!EmitLineNumbers)
+ return;
+ if (ignoreLoc(MI))
+ return;
+
+ DebugLoc curLoc = MI.getDebugLoc();
+
+ if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+ return;
+
+ if (prevDebugLoc == curLoc)
+ return;
+
+ prevDebugLoc = curLoc;
+
+ if (curLoc.isUnknown())
+ return;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ //const TargetMachine &TM = MF->getTarget();
+
+ const LLVMContext &ctx = MF->getFunction()->getContext();
+ DIScope Scope(curLoc.getScope(ctx));
+
+ if (!Scope.Verify())
+ return;
+
+ StringRef fileName(Scope.getFilename());
+ StringRef dirName(Scope.getDirectory());
+ SmallString<128> FullPathName = dirName;
+ if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
+ sys::path::append(FullPathName, fileName);
+ fileName = FullPathName.str();
+ }
+
+ if (filenameMap.find(fileName.str()) == filenameMap.end())
+ return;
+
+ // Emit the line from the source file.
+ if (llvm::InterleaveSrcInPtx)
+ this->emitSrcInText(fileName.str(), curLoc.getLine());
+
+ std::stringstream temp;
+ temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+ << " " << curLoc.getCol();
+ OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+}
+
+void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ emitLineNumberAsDotLoc(*MI);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
+ const DataLayout *TD = TM.getDataLayout();
+ const TargetLowering *TLI = TM.getTargetLowering();
+
+ Type *Ty = F->getReturnType();
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return;
+
+ O << " (";
+
+ if (isABI) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ size = ITy->getBitWidth();
+ if (size < 32)
+ size = 32;
+ } else {
+ assert(Ty->isFloatingPointTy() && "Floating point type expected here");
+ size = Ty->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " func_retval0";
+ } else if (isa<PointerType>(Ty)) {
+ O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+ << " func_retval0";
+ } else {
+ if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
+ }
+ }
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
+ } else
+ assert(false && "Unknown return type");
+ }
+ } else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned idx = 0;
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ O << ".reg .b" << sz << " func_retval" << idx;
+ if (j < je - 1)
+ O << ", ";
+ ++idx;
+ }
+ if (i < e - 1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ return;
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ printReturnValStr(F, O);
+}
+
+void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Set up
+ MRI = &MF->getRegInfo();
+ F = MF->getFunction();
+ emitLinkageDirective(F, O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else {
+ O << ".func ";
+ printReturnValStr(*MF, O);
+ }
+
+ O << *CurrentFnSym;
+
+ emitFunctionParamList(*MF, O);
+
+ if (llvm::isKernelFunction(*F))
+ emitKernelFunctionDirectives(*F, O);
+
+ OutStreamer.EmitRawText(O.str());
+
+ prevDebugLoc = DebugLoc();
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyStart() {
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ unsigned numRegClasses = TRI.getNumRegClasses();
+ VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
+ OutStreamer.EmitRawText(StringRef("{\n"));
+ setAndEmitFunctionVirtualRegisters(*MF);
+
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+ emitDemotedVars(MF->getFunction(), O);
+ OutStreamer.EmitRawText(O.str());
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(StringRef("}\n"));
+ delete[] VRidGlobal2LocalMap;
+}
+
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+ raw_ostream &O) const {
+ // If the NVVM IR has some of reqntid* specified, then output
+ // the reqntid directive, and set the unspecified ones to 1.
+ // If none of reqntid* is specified, don't output reqntid directive.
+ unsigned reqntidx, reqntidy, reqntidz;
+ bool specified = false;
+ if (llvm::getReqNTIDx(F, reqntidx) == false)
+ reqntidx = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDy(F, reqntidy) == false)
+ reqntidy = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDz(F, reqntidz) == false)
+ reqntidz = 1;
+ else
+ specified = true;
+
+ if (specified)
+ O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+ << "\n";
+
+ // If the NVVM IR has some of maxntid* specified, then output
+ // the maxntid directive, and set the unspecified ones to 1.
+ // If none of maxntid* is specified, don't output maxntid directive.
+ unsigned maxntidx, maxntidy, maxntidz;
+ specified = false;
+ if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ maxntidx = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ maxntidy = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ maxntidz = 1;
+ else
+ specified = true;
+
+ if (specified)
+ O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+ << "\n";
+
+ unsigned mincta;
+ if (llvm::getMinCTASm(F, mincta))
+ O << ".minnctapersm " << mincta << "\n";
+}
+
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+ raw_ostream &O) {
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
+ unsigned id = RC->getID();
+
+ std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
+ unsigned mapped_vr = regmap[vr];
+
+ if (!isVec) {
+ O << getNVPTXRegClassStr(RC) << mapped_vr;
+ return;
+ }
+ report_fatal_error("Bad register!");
+}
+
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+ raw_ostream &O) {
+ getVirtualRegisterName(vr, isVec, O);
+}
+
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+ const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+ static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+ int Imm = (int) MO.getImm();
+ if (0 == strcmp(Modifier, "vecelem"))
+ O << "_" << vecelem[Imm];
+ else if (0 == strcmp(Modifier, "vecv4comm1")) {
+ if ((Imm < 0) || (Imm > 3))
+ O << "//";
+ } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+ if ((Imm < 4) || (Imm > 7))
+ O << "//";
+ } else if (0 == strcmp(Modifier, "vecv4pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 4];
+ } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+ if ((Imm < 0) || (Imm > 1))
+ O << "//";
+ } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+ if ((Imm < 2) || (Imm > 3))
+ O << "//";
+ } else if (0 == strcmp(Modifier, "vecv2pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 2];
+ } else
+ llvm_unreachable("Unknown Modifier on immediate operand");
+}
+
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg() == NVPTX::VRDepot)
+ O << DEPOTNAME << getFunctionNumber();
+ else
+ O << getRegisterName(MO.getReg());
+ } else {
+ if (!Modifier)
+ emitVirtualRegister(MO.getReg(), false, O);
+ else {
+ if (strcmp(Modifier, "vecfull") == 0)
+ emitVirtualRegister(MO.getReg(), true, O);
+ else
+ llvm_unreachable(
+ "Don't know how to handle the modifier on virtual register.");
+ }
+ }
+ return;
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier)
+ O << MO.getImm();
+ else if (strstr(Modifier, "vec") == Modifier)
+ printVecModifiedImmediate(MO, Modifier, O);
+ else
+ llvm_unreachable(
+ "Don't know how to handle modifier on immediate operand");
+ return;
+
+ case MachineOperand::MO_FPImmediate:
+ printFPConstant(MO.getFPImm(), O);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *symbname = MO.getSymbolName();
+ if (strstr(symbname, ".PARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 6, "%u[];", &index);
+ printParamName(index, O);
+ } else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 9, "%u[];", &index);
+ O << *CurrentFnSym << "_param_" << index << "_offset";
+ } else
+ O << symbname;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ default:
+ llvm_unreachable("Operand type not supported.");
+ }
+}
+
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+ raw_ostream &O) const {
+#ifndef __OPTIMIZE__
+ O << "\t// Implicit def :";
+ //printOperand(MI, 0);
+ O << "\n";
+#endif
+}
+
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ if (Modifier && !strcmp(Modifier, "add")) {
+ O << ", ";
+ printOperand(MI, opNum + 1, O);
+ } else {
+ if (MI->getOperand(opNum + 1).isImm() &&
+ MI->getOperand(opNum + 1).getImm() == 0)
+ return; // don't print ',0' or '+0'
+ O << "+";
+ printOperand(MI, opNum + 1, O);
+ }
+}
+
+void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ if (Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ int Imm = (int) MO.getImm();
+ if (!strcmp(Modifier, "volatile")) {
+ if (Imm)
+ O << ".volatile";
+ } else if (!strcmp(Modifier, "addsp")) {
+ switch (Imm) {
+ case NVPTX::PTXLdStInstCode::GLOBAL:
+ O << ".global";
+ break;
+ case NVPTX::PTXLdStInstCode::SHARED:
+ O << ".shared";
+ break;
+ case NVPTX::PTXLdStInstCode::LOCAL:
+ O << ".local";
+ break;
+ case NVPTX::PTXLdStInstCode::PARAM:
+ O << ".param";
+ break;
+ case NVPTX::PTXLdStInstCode::CONSTANT:
+ O << ".const";
+ break;
+ case NVPTX::PTXLdStInstCode::GENERIC:
+ if (!nvptxSubtarget.hasGenericLdSt())
+ O << ".global";
+ break;
+ default:
+ llvm_unreachable("Wrong Address Space");
+ }
+ } else if (!strcmp(Modifier, "sign")) {
+ if (Imm == NVPTX::PTXLdStInstCode::Signed)
+ O << "s";
+ else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
+ O << "u";
+ else
+ O << "f";
+ } else if (!strcmp(Modifier, "vec")) {
+ if (Imm == NVPTX::PTXLdStInstCode::V2)
+ O << ".v2";
+ else if (Imm == NVPTX::PTXLdStInstCode::V4)
+ O << ".v4";
+ } else
+ llvm_unreachable("Unknown Modifier");
+ } else
+ llvm_unreachable("Empty Modifier");
+}
+
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
+
+ emitLinkageDirective(F, O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else
+ O << ".func ";
+ printReturnValStr(F, O);
+ O << *CurrentFnSym << "\n";
+ emitFunctionParamList(F, O);
+ O << ";\n";
+}
+
+static bool usedInGlobalVarDef(const Constant *C) {
+ if (!C)
+ return false;
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->getName().str() == "llvm.used")
+ return false;
+ return true;
+ }
+
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
+ const Constant *C = dyn_cast<Constant>(*ui);
+ if (usedInGlobalVarDef(C))
+ return true;
+ }
+ return false;
+}
+
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
+ if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
+ if (othergv->getName().str() == "llvm.used")
+ return true;
+ }
+
+ if (const Instruction *instr = dyn_cast<Instruction>(U)) {
+ if (instr->getParent() && instr->getParent()->getParent()) {
+ const Function *curFunc = instr->getParent()->getParent();
+ if (oneFunc && (curFunc != oneFunc))
+ return false;
+ oneFunc = curFunc;
+ return true;
+ } else
+ return false;
+ }
+
+ if (const MDNode *md = dyn_cast<MDNode>(U))
+ if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
+ (md->getName().str() == "llvm.dbg.sp")))
+ return true;
+
+ for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+ ui != ue; ++ui) {
+ if (usedInOneFunc(*ui, oneFunc) == false)
+ return false;
+ }
+ return true;
+}
+
+/* Find out if a global variable can be demoted to local scope.
+ * Currently, this is valid for CUDA shared variables, which have local
+ * scope and global lifetime. So the conditions to check are :
+ * 1. Is the global variable in shared address space?
+ * 2. Does it have internal linkage?
+ * 3. Is the global variable referenced only in one function?
+ */
+static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
+ if (gv->hasInternalLinkage() == false)
+ return false;
+ const PointerType *Pty = gv->getType();
+ if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
+ return false;
+
+ const Function *oneFunc = 0;
+
+ bool flag = usedInOneFunc(gv, oneFunc);
+ if (flag == false)
+ return false;
+ if (!oneFunc)
+ return false;
+ f = oneFunc;
+ return true;
+}
+
+static bool useFuncSeen(const Constant *C,
+ llvm::DenseMap<const Function *, bool> &seenMap) {
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
+ if (const Constant *cu = dyn_cast<Constant>(*ui)) {
+ if (useFuncSeen(cu, seenMap))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
+ const BasicBlock *bb = I->getParent();
+ if (!bb)
+ continue;
+ const Function *caller = bb->getParent();
+ if (!caller)
+ continue;
+ if (seenMap.find(caller) != seenMap.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+ llvm::DenseMap<const Function *, bool> seenMap;
+ for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ const Function *F = FI;
+
+ if (F->isDeclaration()) {
+ if (F->use_empty())
+ continue;
+ if (F->getIntrinsicID())
+ continue;
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ continue;
+ }
+ for (Value::const_use_iterator iter = F->use_begin(),
+ iterEnd = F->use_end();
+ iter != iterEnd; ++iter) {
+ if (const Constant *C = dyn_cast<Constant>(*iter)) {
+ if (usedInGlobalVarDef(C)) {
+ // The use is in the initialization of a global variable
+ // that is a function pointer, so print a declaration
+ // for the original function
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ // Emit a declaration of this function if the function that
+ // uses this constant expr has already been seen.
+ if (useFuncSeen(C, seenMap)) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+
+ if (!isa<Instruction>(*iter))
+ continue;
+ const Instruction *instr = cast<Instruction>(*iter);
+ const BasicBlock *bb = instr->getParent();
+ if (!bb)
+ continue;
+ const Function *caller = bb->getParent();
+ if (!caller)
+ continue;
+
+ // If a caller has already been seen, then the caller is
+ // appearing in the module before the callee. so print out
+ // a declaration for the callee.
+ if (seenMap.find(caller) != seenMap.end()) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+ seenMap[F] = true;
+ }
+}
+
+void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ unsigned i = 1;
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end();
+ I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef Filename(DIUnit.getFilename());
+ StringRef Dirname(DIUnit.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+ ++i;
+ }
+
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end();
+ I != E; ++I) {
+ DISubprogram SP(*I);
+ StringRef Filename(SP.getFilename());
+ StringRef Dirname(SP.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ ++i;
+ }
+}
+
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
+
+ SmallString<128> Str1;
+ raw_svector_ostream OS1(Str1);
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // We need to call the parent's one explicitly.
+ //bool Result = AsmPrinter::doInitialization(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getDataLayout());
+
+ // Emit header before any dwarf directives are emitted below.
+ emitHeader(M, OS1);
+ OutStreamer.EmitRawText(OS1.str());
+
+ // Already commented out
+ //bool Result = AsmPrinter::doInitialization(M);
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ recordAndEmitFilenames(M);
+
+ SmallString<128> Str2;
+ raw_svector_ostream OS2(Str2);
+
+ emitDeclarations(M, OS2);
+
+ // As ptxas does not support forward references of globals, we need to first
+ // sort the list of module-level globals in def-use order. We visit each
+ // global variable in order, and ensure that we emit it *after* its dependent
+ // globals. We use a little extra memory maintaining both a set and a list to
+ // have fast searches while maintaining a strict ordering.
+ SmallVector<GlobalVariable *, 8> Globals;
+ DenseSet<GlobalVariable *> GVVisited;
+ DenseSet<GlobalVariable *> GVVisiting;
+
+ // Visit each global variable, in order
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+ ++I)
+ VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+
+ assert(GVVisited.size() == M.getGlobalList().size() &&
+ "Missed a global variable");
+ assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
+
+ // Print out module-level global variables in proper order
+ for (unsigned i = 0, e = Globals.size(); i != e; ++i)
+ printModuleLevelGV(Globals[i], OS2);
+
+ OS2 << '\n';
+
+ OutStreamer.EmitRawText(OS2.str());
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
+ O << "//\n";
+ O << "// Generated by LLVM NVPTX Back-End\n";
+ O << "//\n";
+ O << "\n";
+
+ unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
+ O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
+
+ O << ".target ";
+ O << nvptxSubtarget.getTargetName();
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+ O << ", texmode_independent";
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (!nvptxSubtarget.hasDouble())
+ O << ", map_f64_to_f32";
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ O << ", debug";
+
+ O << "\n";
+
+ O << ".address_size ";
+ if (nvptxSubtarget.is64Bit())
+ O << "64";
+ else
+ O << "32";
+ O << "\n";
+
+ O << "\n";
+}
+
+bool NVPTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable *[n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+
+ //bool Result = AsmPrinter::doFinalization(M);
+ // Instead of calling the parents doFinalization, we may
+ // clone parents doFinalization and customize here.
+ // Currently, we if NVISA out the EmitGlobals() in
+ // parent's doFinalization, which is too intrusive.
+ //
+ // Same for the doInitialization.
+ //return Result;
+}
+
+// This function emits appropriate linkage directives for
+// functions and global variables.
+//
+// extern function declaration -> .extern
+// extern function definition -> .visible
+// external global variable with init -> .visible
+// external without init -> .extern
+// appending -> not allowed, assert.
+
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+ raw_ostream &O) {
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (V->hasExternalLinkage()) {
+ if (isa<GlobalVariable>(V)) {
+ const GlobalVariable *GVar = cast<GlobalVariable>(V);
+ if (GVar) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+ } else if (V->isDeclaration())
+ O << ".extern ";
+ else
+ O << ".visible ";
+ } else if (V->hasAppendingLinkage()) {
+ std::string msg;
+ msg.append("Error: ");
+ msg.append("Symbol ");
+ if (V->hasName())
+ msg.append(V->getName().str());
+ msg.append("has unsupported appending linkage type");
+ llvm_unreachable(msg.c_str());
+ }
+ }
+}
+
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+ bool processDemoted) {
+
+ // Skip meta data
+ if (GVar->hasSection()) {
+ if (GVar->getSection() == "llvm.metadata")
+ return;
+ }
+
+ const DataLayout *TD = TM.getDataLayout();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ if (GVar->hasExternalLinkage()) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+
+ if (llvm::isTexture(*GVar)) {
+ O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
+ return;
+ }
+
+ if (llvm::isSurface(*GVar)) {
+ O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
+ return;
+ }
+
+ if (GVar->isDeclaration()) {
+ // (extern) declarations, no definition or initializer
+ // Currently the only known declaration is for an automatic __local
+ // (.shared) promoted to global.
+ emitPTXGlobalVariable(GVar, O);
+ O << ";\n";
+ return;
+ }
+
+ if (llvm::isSampler(*GVar)) {
+ O << ".global .samplerref " << llvm::getSamplerName(*GVar);
+
+ Constant *Initializer = NULL;
+ if (GVar->hasInitializer())
+ Initializer = GVar->getInitializer();
+ ConstantInt *CI = NULL;
+ if (Initializer)
+ CI = dyn_cast<ConstantInt>(Initializer);
+ if (CI) {
+ unsigned sample = CI->getZExtValue();
+
+ O << " = { ";
+
+ for (int i = 0,
+ addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+ i < 3; i++) {
+ O << "addr_mode_" << i << " = ";
+ switch (addr) {
+ case 0:
+ O << "wrap";
+ break;
+ case 1:
+ O << "clamp_to_border";
+ break;
+ case 2:
+ O << "clamp_to_edge";
+ break;
+ case 3:
+ O << "wrap";
+ break;
+ case 4:
+ O << "mirror";
+ break;
+ }
+ O << ", ";
+ }
+ O << "filter_mode = ";
+ switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+ case 0:
+ O << "nearest";
+ break;
+ case 1:
+ O << "linear";
+ break;
+ case 2:
+ assert(0 && "Anisotropic filtering is not supported");
+ default:
+ O << "nearest";
+ break;
+ }
+ if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
+ O << ", force_unnormalized_coords = 1";
+ }
+ O << " }";
+ }
+
+ O << ";\n";
+ return;
+ }
+
+ if (GVar->hasPrivateLinkage()) {
+
+ if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
+ return;
+
+ // FIXME - need better way (e.g. Metadata) to avoid generating this global
+ if (!strncmp(GVar->getName().data(), "filename", 8))
+ return;
+ if (GVar->use_empty())
+ return;
+ }
+
+ const Function *demotedFunc = 0;
+ if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
+ O << "// " << GVar->getName().str() << " has been demoted\n";
+ if (localDecls.find(demotedFunc) != localDecls.end())
+ localDecls[demotedFunc].push_back(GVar);
+ else {
+ std::vector<GlobalVariable *> temp;
+ temp.push_back(GVar);
+ localDecls[demotedFunc] = temp;
+ }
+ return;
+ }
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy, false);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+
+ // Ptx allows variable initilization only for constant and global state
+ // spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!Initializer->isNullValue()) {
+ O << " = ";
+ printScalarConstant(Initializer, O);
+ }
+ }
+ } else {
+ unsigned int ElementSize = 0;
+
+ // Although PTX has direct support for struct type and array type and
+ // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
+ // targets that support these high level field accesses. Structs, arrays
+ // and vectors are lowered into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ // Ptx allows variable initilization only for constant and
+ // global state spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
+ AggBuffer aggBuffer(ElementSize, O, *this);
+ bufferAggregateConstant(Initializer, &aggBuffer);
+ if (aggBuffer.numSymbols) {
+ if (nvptxSubtarget.is64Bit()) {
+ O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 8;
+ } else {
+ O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 4;
+ }
+ O << "]";
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize;
+ O << "]";
+ }
+ O << " = {";
+ aggBuffer.print();
+ O << "}";
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
+ if (ElementSize) {
+ O << "[";
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
+ if (ElementSize) {
+ O << "[";
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ break;
+ default:
+ assert(0 && "type not supported yet");
+ }
+
+ }
+ O << ";\n";
+}
+
+void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
+ if (localDecls.find(f) == localDecls.end())
+ return;
+
+ std::vector<GlobalVariable *> &gvars = localDecls[f];
+
+ for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
+ O << "\t// demoted variable\n\t";
+ printModuleLevelGV(gvars[i], O, true);
+ }
+}
+
+void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
+ raw_ostream &O) const {
+ switch (AddressSpace) {
+ case llvm::ADDRESS_SPACE_LOCAL:
+ O << "local";
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ O << "global";
+ break;
+ case llvm::ADDRESS_SPACE_CONST:
+ // This logic should be consistent with that in
+ // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
+ if (nvptxSubtarget.hasGenericLdSt())
+ O << "global";
+ else
+ O << "const";
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << "const";
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << "shared";
+ break;
+ default:
+ report_fatal_error("Bad address space found while emitting PTX");
+ break;
+ }
+}
+
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
+ switch (Ty->getTypeID()) {
+ default:
+ llvm_unreachable("unexpected type");
+ break;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return "pred";
+ else if (NumBits <= 64) {
+ std::string name = "u";
+ return name + utostr(NumBits);
+ } else {
+ llvm_unreachable("Integer too large");
+ break;
+ }
+ break;
+ }
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ if (nvptxSubtarget.is64Bit())
+ if (useB4PTR)
+ return "b64";
+ else
+ return "u64";
+ else if (useB4PTR)
+ return "b32";
+ else
+ return "u32";
+ }
+ llvm_unreachable("unexpected type");
+ return NULL;
+}
+
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
+ raw_ostream &O) {
+
+ const DataLayout *TD = TM.getDataLayout();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+
+ int64_t ElementSize = 0;
+
+ // Although PTX has direct support for struct type and array type and LLVM IR
+ // is very similar to PTX, the LLVM CodeGen does not support for targets that
+ // support these high level field accesses. Structs and arrays are lowered
+ // into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ if (ElementSize) {
+ O << itostr(ElementSize);
+ }
+ O << "]";
+ break;
+ default:
+ assert(0 && "type not supported yet");
+ }
+ return;
+}
+
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
+ return TD->getPrefTypeAlignment(Ty);
+
+ const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (ATy)
+ return getOpenCLAlignment(TD, ATy->getElementType());
+
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (VTy) {
+ Type *ETy = VTy->getElementType();
+ unsigned int numE = VTy->getNumElements();
+ unsigned int alignE = TD->getPrefTypeAlignment(ETy);
+ if (numE == 3)
+ return 4 * alignE;
+ else
+ return numE * alignE;
+ }
+
+ const StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy) {
+ unsigned int alignStruct = 1;
+ // Go through each element of the struct and find the
+ // largest alignment.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
+ Type *ETy = STy->getElementType(i);
+ unsigned int align = getOpenCLAlignment(TD, ETy);
+ if (align > alignStruct)
+ alignStruct = align;
+ }
+ return alignStruct;
+ }
+
+ const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ if (FTy)
+ return TD->getPointerPrefAlignment();
+ return TD->getPrefTypeAlignment(Ty);
+}
+
+void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
+ int paramIndex, raw_ostream &O) {
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ else {
+ std::string argName = I->getName();
+ const char *p = argName.c_str();
+ while (*p) {
+ if (*p == '.')
+ O << "_";
+ else
+ O << *p;
+ p++;
+ }
+ }
+}
+
+void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
+ Function::const_arg_iterator I, E;
+ int i = 0;
+
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ return;
+ }
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
+ if (i == paramIndex) {
+ printParamName(I, paramIndex, O);
+ return;
+ }
+ }
+ llvm_unreachable("paramIndex out of bound");
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
+ const DataLayout *TD = TM.getDataLayout();
+ const AttributeSet &PAL = F->getAttributes();
+ const TargetLowering *TLI = TM.getTargetLowering();
+ Function::const_arg_iterator I, E;
+ unsigned paramIndex = 0;
+ bool first = true;
+ bool isKernelFunc = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ MVT thePointerTy = TLI->getPointerTy();
+
+ O << "(\n";
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
+ Type *Ty = I->getType();
+
+ if (!first)
+ O << ",\n";
+
+ first = false;
+
+ // Handle image/sampler parameters
+ if (llvm::isSampler(*I) || llvm::isImage(*I)) {
+ if (llvm::isImage(*I)) {
+ std::string sname = I->getName();
+ if (llvm::isImageWriteOnly(*I))
+ O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+ else // Default image is read_only
+ O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+ } else // Should be llvm::isSampler(*I)
+ O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+ << paramIndex;
+ continue;
+ }
+
+ if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (Ty->isVectorTy()) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
+ if (align == 0)
+ align = TD->getABITypeAlignment(Ty);
+
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ O << "\t.param .align " << align << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+
+ continue;
+ }
+ // Just a scalar
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (isKernelFunc) {
+ if (PTy) {
+ // Special handling for pointer arguments to kernel
+ O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
+
+ if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+ Type *ETy = PTy->getElementType();
+ int addrSpace = PTy->getAddressSpace();
+ switch (addrSpace) {
+ default:
+ O << ".ptr ";
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << ".ptr .const ";
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << ".ptr .shared ";
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ case llvm::ADDRESS_SPACE_CONST:
+ O << ".ptr .global ";
+ break;
+ }
+ O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
+ }
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // non-pointer scalar to kernel func
+ O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+ // Non-kernel function, just print .param .b<size> for ABI
+ // and .reg .b<size> for non ABY
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << "\t.param .b" << sz << " ";
+ else
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // param has byVal attribute. So should be a pointer
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy && "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI || isKernelFunc) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
+ if (align == 0)
+ align = TD->getABITypeAlignment(ETy);
+
+ unsigned sz = TD->getTypeAllocSize(ETy);
+ O << "\t.param .align " << align << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+ continue;
+ } else {
+ // Split the ETy into constituent parts and
+ // print .param .b<size> <name> for each part.
+ // Further, if a part is vector, print the above for
+ // each vector element.
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, ETy, vtparts);
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ if (j < je - 1)
+ O << ",\n";
+ ++paramIndex;
+ }
+ if (i < e - 1)
+ O << ",\n";
+ }
+ --paramIndex;
+ continue;
+ }
+ }
+
+ O << "\n)\n";
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ emitFunctionParamList(F, O);
+}
+
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+ const MachineFunction &MF) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Map the global virtual register number to a register class specific
+ // virtual register number starting from 1 with that class.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ //unsigned numRegClasses = TRI->getNumRegClasses();
+
+ // Emit the Fake Stack Object
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+ if (NumBytes) {
+ O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+ << getFunctionNumber() << "[" << NumBytes << "];\n";
+ if (nvptxSubtarget.is64Bit()) {
+ O << "\t.reg .b64 \t%SP;\n";
+ O << "\t.reg .b64 \t%SPL;\n";
+ } else {
+ O << "\t.reg .b32 \t%SP;\n";
+ O << "\t.reg .b32 \t%SPL;\n";
+ }
+ }
+
+ // Go through all virtual registers to establish the mapping between the
+ // global virtual
+ // register number and the per class virtual register number.
+ // We use the per class virtual register number in the ptx output.
+ unsigned int numVRs = MRI->getNumVirtRegs();
+ for (unsigned i = 0; i < numVRs; i++) {
+ unsigned int vr = TRI->index2VirtReg(i);
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
+ std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
+ int n = regmap.size();
+ regmap.insert(std::make_pair(vr, n + 1));
+ }
+
+ // Emit register declarations
+ // @TODO: Extract out the real register usage
+ O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
+
+ // Emit declaration of the virtual registers or 'physical' registers for
+ // each register class
+ //for (unsigned i=0; i< numRegClasses; i++) {
+ // std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
+ // const TargetRegisterClass *RC = TRI->getRegClass(i);
+ // std::string rcname = getNVPTXRegClassName(RC);
+ // std::string rcStr = getNVPTXRegClassStr(RC);
+ // //int n = regmap.size();
+ // if (!isNVPTXVectorRegClass(RC)) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<"
+ // << NVPTXNumRegisters << ">;\n";
+ // }
+
+ // Only declare those registers that may be used. And do not emit vector
+ // registers as
+ // they are all elementized to scalar registers.
+ //if (n && !isNVPTXVectorRegClass(RC)) {
+ // if (RegAllocNilUsed) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
+ // << ">;\n";
+ // }
+ // else {
+ // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
+ // << "<" << 32 << ">;\n";
+ // }
+ //}
+ //}
+
+ OutStreamer.EmitRawText(O.str());
+}
+
+void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
+ APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
+ bool ignored;
+ unsigned int numHex;
+ const char *lead;
+
+ if (Fp->getType()->getTypeID() == Type::FloatTyID) {
+ numHex = 8;
+ lead = "0f";
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
+ } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
+ numHex = 16;
+ lead = "0d";
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ } else
+ llvm_unreachable("unsupported fp type");
+
+ APInt API = APF.bitcastToAPInt();
+ std::string hexstr(utohexstr(API.getZExtValue()));
+ O << lead;
+ if (hexstr.length() < numHex)
+ O << std::string(numHex - hexstr.length(), '0');
+ O << utohexstr(API.getZExtValue());
+}
+
+void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ O << CI->getValue();
+ return;
+ }
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ printFPConstant(CFP, O);
+ return;
+ }
+ if (isa<ConstantPointerNull>(CPV)) {
+ O << "0";
+ return;
+ }
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+ if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ } else {
+ O << *LowerConstant(CPV, *this);
+ return;
+ }
+ }
+ llvm_unreachable("Not scalar type found in printScalarConstant()");
+}
+
+void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+ AggBuffer *aggBuffer) {
+
+ const DataLayout *TD = TM.getDataLayout();
+
+ if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
+ int s = TD->getTypeAllocSize(CPV->getType());
+ if (s < Bytes)
+ s = Bytes;
+ aggBuffer->addZeros(s);
+ return;
+ }
+
+ unsigned char *ptr;
+ switch (CPV->getType()->getTypeID()) {
+
+ case Type::IntegerTyID: {
+ const Type *ETy = CPV->getType();
+ if (ETy == Type::getInt8Ty(CPV->getContext())) {
+ unsigned char c =
+ (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = &c;
+ aggBuffer->addBytes(ptr, 1, Bytes);
+ } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+ short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = (unsigned char *)&int16;
+ aggBuffer->addBytes(ptr, 2, Bytes);
+ } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(4);
+ break;
+ }
+ }
+ llvm_unreachable("unsupported integer const type");
+ } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(8);
+ break;
+ }
+ }
+ llvm_unreachable("unsupported integer const type");
+ } else
+ llvm_unreachable("unsupported integer const type");
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const Type *Ty = CFP->getType();
+ if (Ty == Type::getFloatTy(CPV->getContext())) {
+ float float32 = (float) CFP->getValueAPF().convertToFloat();
+ ptr = (unsigned char *)&float32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
+ double float64 = CFP->getValueAPF().convertToDouble();
+ ptr = (unsigned char *)&float64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ } else {
+ llvm_unreachable("unsupported fp const type");
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ aggBuffer->addSymbol(GVar);
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ }
+ unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ aggBuffer->addZeros(s);
+ break;
+ }
+
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID: {
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
+ isa<ConstantStruct>(CPV)) {
+ int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ bufferAggregateConstant(CPV, aggBuffer);
+ if (Bytes > ElementSize)
+ aggBuffer->addZeros(Bytes - ElementSize);
+ } else if (isa<ConstantAggregateZero>(CPV))
+ aggBuffer->addZeros(Bytes);
+ else
+ llvm_unreachable("Unexpected Constant type");
+ break;
+ }
+
+ default:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+ AggBuffer *aggBuffer) {
+ const DataLayout *TD = TM.getDataLayout();
+ int Bytes;
+
+ // Old constants
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
+ if (CPV->getNumOperands())
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(CPV)) {
+ if (CDS->getNumElements())
+ for (unsigned i = 0; i < CDS->getNumElements(); ++i)
+ bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
+ aggBuffer);
+ return;
+ }
+
+ if (isa<ConstantStruct>(CPV)) {
+ if (CPV->getNumOperands()) {
+ StructType *ST = cast<StructType>(CPV->getType());
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
+ if (i == (e - 1))
+ Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
+ TD->getTypeAllocSize(ST) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ else
+ Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
+ }
+ }
+ return;
+ }
+ llvm_unreachable("unsupported constant type in printAggregateConstant()");
+}
+
+// buildTypeNameMap - Run through symbol table looking for type names.
+//
+
+bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
+
+ std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
+
+ if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+ !PI->second.compare("struct._image2d_t") ||
+ !PI->second.compare("struct._image3d_t")))
+ return true;
+
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+ const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case NVPTX::CallArgBeginInst:
+ case NVPTX::CallArgEndInst0:
+ case NVPTX::CallArgEndInst1:
+ case NVPTX::CallArgF32:
+ case NVPTX::CallArgF64:
+ case NVPTX::CallArgI16:
+ case NVPTX::CallArgI32:
+ case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgI64:
+ case NVPTX::CallArgI8:
+ case NVPTX::CallArgParam:
+ case NVPTX::CallVoidInst:
+ case NVPTX::CallVoidInstReg:
+ case NVPTX::Callseq_End:
+ case NVPTX::CallVoidInstReg64:
+ case NVPTX::DeclareParamInst:
+ case NVPTX::DeclareRetMemInst:
+ case NVPTX::DeclareRetRegInst:
+ case NVPTX::DeclareRetScalarInst:
+ case NVPTX::DeclareScalarParamInst:
+ case NVPTX::DeclareScalarRegInst:
+ case NVPTX::StoreParamF32:
+ case NVPTX::StoreParamF64:
+ case NVPTX::StoreParamI16:
+ case NVPTX::StoreParamI32:
+ case NVPTX::StoreParamI64:
+ case NVPTX::StoreParamI8:
+ case NVPTX::StoreParamS32I8:
+ case NVPTX::StoreParamU32I8:
+ case NVPTX::StoreParamS32I16:
+ case NVPTX::StoreParamU32I16:
+ case NVPTX::StoreRetvalF32:
+ case NVPTX::StoreRetvalF64:
+ case NVPTX::StoreRetvalI16:
+ case NVPTX::StoreRetvalI32:
+ case NVPTX::StoreRetvalI64:
+ case NVPTX::StoreRetvalI8:
+ case NVPTX::LastCallArgF32:
+ case NVPTX::LastCallArgF64:
+ case NVPTX::LastCallArgI16:
+ case NVPTX::LastCallArgI32:
+ case NVPTX::LastCallArgI32imm:
+ case NVPTX::LastCallArgI64:
+ case NVPTX::LastCallArgI8:
+ case NVPTX::LastCallArgParam:
+ case NVPTX::LoadParamMemF32:
+ case NVPTX::LoadParamMemF64:
+ case NVPTX::LoadParamMemI16:
+ case NVPTX::LoadParamMemI32:
+ case NVPTX::LoadParamMemI64:
+ case NVPTX::LoadParamMemI8:
+ case NVPTX::LoadParamRegF32:
+ case NVPTX::LoadParamRegF64:
+ case NVPTX::LoadParamRegI16:
+ case NVPTX::LoadParamRegI32:
+ case NVPTX::LoadParamRegI64:
+ case NVPTX::LoadParamRegI8:
+ case NVPTX::PrototypeInst:
+ case NVPTX::DBG_VALUE:
+ return true;
+ }
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
+
+void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
+ std::stringstream temp;
+ LineReader *reader = this->getReader(filename.str());
+ temp << "\n//";
+ temp << filename.str();
+ temp << ":";
+ temp << line;
+ temp << " ";
+ temp << reader->readLine(line);
+ temp << "\n";
+ this->OutStreamer.EmitRawText(Twine(temp.str()));
+}
+
+LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
+ if (reader == NULL) {
+ reader = new LineReader(filename);
+ }
+
+ if (reader->fileName() != filename) {
+ delete reader;
+ reader = new LineReader(filename);
+ }
+
+ return reader;
+}
+
+std::string LineReader::readLine(unsigned lineNum) {
+ if (lineNum < theCurLine) {
+ theCurLine = 0;
+ fstr.seekg(0, std::ios::beg);
+ }
+ while (theCurLine < lineNum) {
+ fstr.getline(buff, 500);
+ theCurLine++;
+ }
+ return buff;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
new file mode 100644
index 000000000000..6dc9fc0ffeff
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -0,0 +1,298 @@
+//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXASMPRINTER_H
+#define NVPTXASMPRINTER_H
+
+#include "NVPTX.h"
+#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include <fstream>
+
+// The ptx syntax and format is very different from that usually seem in a .s
+// file,
+// therefore we are not able to use the MCAsmStreamer interface here.
+//
+// We are handcrafting the output method here.
+//
+// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
+// (subclass of MCStreamer).
+
+// This is defined in AsmPrinter.cpp.
+// Used to process the constant expressions in initializers.
+namespace nvptx {
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
+}
+
+namespace llvm {
+
+class LineReader {
+private:
+ unsigned theCurLine;
+ std::ifstream fstr;
+ char buff[512];
+ std::string theFileName;
+ SmallVector<unsigned, 32> lineOffset;
+public:
+ LineReader(std::string filename) {
+ theCurLine = 0;
+ fstr.open(filename.c_str());
+ theFileName = filename;
+ }
+ std::string fileName() { return theFileName; }
+ ~LineReader() { fstr.close(); }
+ std::string readLine(unsigned line);
+};
+
+class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
+
+ class AggBuffer {
+ // Used to buffer the emitted string for initializing global
+ // aggregates.
+ //
+ // Normally an aggregate (array, vector or structure) is emitted
+ // as a u8[]. However, if one element/field of the aggregate
+ // is a non-NULL address, then the aggregate is emitted as u32[]
+ // or u64[].
+ //
+ // We first layout the aggregate in 'buffer' in bytes, except for
+ // those symbol addresses. For the i-th symbol address in the
+ //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
+ // are filled with 0s. symbolPosInBuffer[i-1] records its position
+ // in 'buffer', and Symbols[i-1] records the Value*.
+ //
+ // Once we have this AggBuffer setup, we can choose how to print
+ // it out.
+ public:
+ unsigned size; // size of the buffer in bytes
+ unsigned char *buffer; // the buffer
+ unsigned numSymbols; // number of symbol addresses
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<Value *, 4> Symbols;
+
+ private:
+ unsigned curpos;
+ raw_ostream &O;
+ NVPTXAsmPrinter &AP;
+
+ public:
+ AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
+ : O(_O), AP(_AP) {
+ buffer = new unsigned char[_size];
+ size = _size;
+ curpos = 0;
+ numSymbols = 0;
+ }
+ ~AggBuffer() { delete[] buffer; }
+ unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
+ assert((curpos + Num) <= size);
+ assert((curpos + Bytes) <= size);
+ for (int i = 0; i < Num; ++i) {
+ buffer[curpos] = Ptr[i];
+ curpos++;
+ }
+ for (int i = Num; i < Bytes; ++i) {
+ buffer[curpos] = 0;
+ curpos++;
+ }
+ return curpos;
+ }
+ unsigned addZeros(int Num) {
+ assert((curpos + Num) <= size);
+ for (int i = 0; i < Num; ++i) {
+ buffer[curpos] = 0;
+ curpos++;
+ }
+ return curpos;
+ }
+ void addSymbol(Value *GVar) {
+ symbolPosInBuffer.push_back(curpos);
+ Symbols.push_back(GVar);
+ numSymbols++;
+ }
+ void print() {
+ if (numSymbols == 0) {
+ // print out in bytes
+ for (unsigned i = 0; i < size; i++) {
+ if (i)
+ O << ", ";
+ O << (unsigned int) buffer[i];
+ }
+ } else {
+ // print out in 4-bytes or 8-bytes
+ unsigned int pos = 0;
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ unsigned int nBytes = 4;
+ if (AP.nvptxSubtarget.is64Bit())
+ nBytes = 8;
+ for (pos = 0; pos < size; pos += nBytes) {
+ if (pos)
+ O << ", ";
+ if (pos == nextSymbolPos) {
+ Value *v = Symbols[nSym];
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ MCSymbol *Name = AP.Mang->getSymbol(GVar);
+ O << *Name;
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+ O << *nvptx::LowerConstant(Cexpr, AP);
+ } else
+ llvm_unreachable("symbol type unknown");
+ nSym++;
+ if (nSym >= numSymbols)
+ nextSymbolPos = size + 1;
+ else
+ nextSymbolPos = symbolPosInBuffer[nSym];
+ } else if (nBytes == 4)
+ O << *(unsigned int *)(buffer + pos);
+ else
+ O << *(unsigned long long *)(buffer + pos);
+ }
+ }
+ }
+ };
+
+ friend class AggBuffer;
+
+ virtual void emitSrcInText(StringRef filename, unsigned line);
+
+private:
+ virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
+
+ const Function *F;
+ std::string CurrentFnName;
+
+ void EmitFunctionEntryLabel();
+ void EmitFunctionBodyStart();
+ void EmitFunctionBodyEnd();
+
+ void EmitInstruction(const MachineInstr *);
+
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
+
+ void printGlobalVariable(const GlobalVariable *GVar);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+ raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
+ // definition autogenerated.
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+ void printParamName(int paramIndex, raw_ostream &O);
+ void printParamName(Function::const_arg_iterator I, int paramIndex,
+ raw_ostream &O);
+ void emitHeader(Module &M, raw_ostream &O);
+ void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
+ void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
+ void emitFunctionExternParamList(const MachineFunction &MF);
+ void emitFunctionParamList(const Function *, raw_ostream &O);
+ void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
+ void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
+ void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
+ bool isImageType(const Type *Ty);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ void printReturnValStr(const Function *, raw_ostream &O);
+ void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
+
+protected:
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+private:
+ std::string CurrentBankselLabelInBasicBlock;
+
+ // This is specific per MachineFunction.
+ const MachineRegisterInfo *MRI;
+ // The contents are specific for each
+ // MachineFunction. But the size of the
+ // array is not.
+ std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
+ // cache the subtarget here.
+ const NVPTXSubtarget &nvptxSubtarget;
+ // Build the map between type name and ID based on module's type
+ // symbol table.
+ std::map<const Type *, std::string> TypeNameMap;
+
+ // List of variables demoted to a function scope.
+ std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+
+ // To record filename to ID mapping
+ std::map<std::string, unsigned> filenameMap;
+ void recordAndEmitFilenames(Module &);
+
+ void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
+ void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+ std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+ void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+
+ void printOperandProper(const MachineOperand &MO);
+
+ void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
+ void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclaration(const Function *, raw_ostream &O);
+
+ static const char *getRegisterName(unsigned RegNo);
+ void emitDemotedVars(const Function *, raw_ostream &);
+
+ LineReader *reader;
+ LineReader *getReader(std::string);
+public:
+ NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ CurrentBankselLabelInBasicBlock = "";
+ VRidGlobal2LocalMap = NULL;
+ reader = NULL;
+ }
+
+ ~NVPTXAsmPrinter() {
+ if (!reader)
+ delete reader;
+ }
+
+ bool ignoreLoc(const MachineInstr &);
+
+ virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
+
+ DebugLoc prevDebugLoc;
+ void emitLineNumberAsDotLoc(const MachineInstr &);
+};
+} // end of namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
new file mode 100644
index 000000000000..6533da5102b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -0,0 +1,78 @@
+//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXFrameLowering.h"
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
+
+void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+ if (MF.getFrameInfo()->hasStackObjects()) {
+ MachineBasicBlock &MBB = MF.front();
+ // Insert "mov.u32 %SP, %Depot"
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ // This instruction really occurs before first instruction
+ // in the BB, so giving it no debug location.
+ DebugLoc dl = DebugLoc();
+
+ if (tm.getSubtargetImpl()->hasGenericLdSt()) {
+ // mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
+ if (is64bit) {
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
+ } else {
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
+ }
+ } else {
+ // mov %SP, %depot;
+ if (is64bit)
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
+ else
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
+ }
+ }
+}
+
+void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
new file mode 100644
index 000000000000..819f1dd3f4be
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -0,0 +1,42 @@
+//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_FRAMELOWERING_H
+#define NVPTX_FRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class NVPTXTargetMachine;
+
+class NVPTXFrameLowering : public TargetFrameLowering {
+ NVPTXTargetMachine &tm;
+ bool is64bit;
+
+public:
+ explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+ is64bit(_is64bit) {}
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
new file mode 100644
index 000000000000..e862988c85d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -0,0 +1,1733 @@
+//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXISelDAGToDAG.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-isel"
+
+using namespace llvm;
+
+static cl::opt<bool> UseFMADInstruction(
+ "nvptx-mad-enable", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+ cl::init(false));
+
+static cl::opt<int>
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
+static cl::opt<int> UsePrecDivF32(
+ "nvptx-prec-divf32", cl::ZeroOrMore,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if avaiable."),
+ cl::init(2));
+
+/// createNVPTXISelDag - This pass converts a legalized DAG into a
+/// NVPTX-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new NVPTXDAGToDAGISel(TM, OptLevel);
+}
+
+NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
+ // Always do fma.f32 fpcontract if the target supports the instruction.
+ // Always do fma.f64 fpcontract if the target supports the instruction.
+ // Do mad.f32 is nvptx-mad-enable is specified and the target does not
+ // support fma.f32.
+
+ doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
+ doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+ doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+ doFMAF32AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+ doFMAF64AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
+
+ allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
+
+ UseF32FTZ = false;
+
+ doMulWide = (OptLevel > 0);
+
+ // Decide how to translate f32 div
+ do_DIVF32_PREC = UsePrecDivF32;
+ // sm less than sm_20 does not support div.rnd. Use div.full.
+ if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
+ do_DIVF32_PREC = 1;
+
+}
+
+/// Select - Select instructions not customized! Used for
+/// expanded, promoted and normal instructions.
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ SDNode *ResNode = NULL;
+ switch (N->getOpcode()) {
+ case ISD::LOAD:
+ ResNode = SelectLoad(N);
+ break;
+ case ISD::STORE:
+ ResNode = SelectStore(N);
+ break;
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4:
+ ResNode = SelectLoadVector(N);
+ break;
+ case NVPTXISD::LDGV2:
+ case NVPTXISD::LDGV4:
+ case NVPTXISD::LDUV2:
+ case NVPTXISD::LDUV4:
+ ResNode = SelectLDGLDUVector(N);
+ break;
+ case NVPTXISD::StoreV2:
+ case NVPTXISD::StoreV4:
+ ResNode = SelectStoreVector(N);
+ break;
+ default:
+ break;
+ }
+ if (ResNode)
+ return ResNode;
+ return SelectCode(N);
+}
+
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+ const NVPTXSubtarget &Subtarget) {
+ const Value *Src = N->getSrcValue();
+ if (!Src)
+ return NVPTX::PTXLdStInstCode::LOCAL;
+
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ switch (PT->getAddressSpace()) {
+ case llvm::ADDRESS_SPACE_LOCAL:
+ return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED:
+ return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ case llvm::ADDRESS_SPACE_GENERIC:
+ return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM:
+ return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_CONST:
+ // If the arch supports generic address space, translate it to GLOBAL
+ // for correctness.
+ // If the arch does not support generic address space, then the arch
+ // does not really support ADDRESS_SPACE_CONST, translate it to
+ // to CONSTANT for better performance.
+ if (Subtarget.hasGenericLdSt())
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ else
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ default:
+ break;
+ }
+ }
+ return NVPTX::PTXLdStInstCode::LOCAL;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDNode *NVPTXLD = NULL;
+
+ // do not support pre/post inc/dec
+ if (LD->isIndexed())
+ return NULL;
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = LD->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int fromType;
+ if ((LD->getExtensionType() == ISD::SEXTLOAD))
+ fromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ fromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ fromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N1, Addr)) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_avar;
+ break;
+ default:
+ return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Addr, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_asi;
+ break;
+ default:
+ return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari;
+ break;
+ default:
+ return NULL;
+ }
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg;
+ break;
+ default:
+ return NULL;
+ }
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), N1, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ }
+
+ if (NVPTXLD != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXLD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT LoadedVT = MemSD->getMemoryVT();
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int FromType;
+ // The last operand holds the original LoadSDNode::getExtensionType() value
+ unsigned ExtensionType = cast<ConstantSDNode>(
+ N->getOperand(N->getNumOperands() - 1))->getZExtValue();
+ if (ExtensionType == ISD::SEXTLOAD)
+ FromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ FromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::LoadV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ break;
+ case NVPTXISD::LoadV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ break;
+ default:
+ return NULL;
+ }
+
+ EVT EltVT = N->getValueType(0);
+
+ if (SelectDirectAddr(Op1, Addr)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_avar;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_avar;
+ break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_asi;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_asi;
+ break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ }
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+
+ EVT RetVT = N->getValueType(0);
+
+ // Select opcode
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ EVT StoreVT = ST->getMemoryVT();
+ SDNode *NVPTXST = NULL;
+
+ // do not support pre/post inc/dec
+ if (ST->isIndexed())
+ return NULL;
+
+ if (!StoreVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = ST->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = StoreVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ //
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int toType;
+ if (ScalarVT.isFloatingPoint())
+ toType = NVPTX::PTXLdStInstCode::Float;
+ else
+ toType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType SourceVT =
+ N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_avar;
+ break;
+ default:
+ return NULL;
+ }
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Addr, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_asi;
+ break;
+ default:
+ return NULL;
+ }
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari;
+ break;
+ default:
+ return NULL;
+ }
+ }
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg;
+ break;
+ default:
+ return NULL;
+ }
+ }
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), N2, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ }
+
+ if (NVPTXST != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXST;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *ST;
+ EVT EltVT = Op1.getValueType();
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT StoreVT = MemSD->getMemoryVT();
+
+ // Address Space Setting
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
+ report_fatal_error("Cannot store to pointer that points to constant "
+ "memory space");
+ }
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ assert(StoreVT.isSimple() && "Store value is not simple");
+ MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToType;
+ if (ScalarVT.isFloatingPoint())
+ ToType = NVPTX::PTXLdStInstCode::Float;
+ else
+ ToType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ SmallVector<SDValue, 12> StOps;
+ SDValue N2;
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::StoreV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ N2 = N->getOperand(3);
+ break;
+ case NVPTXISD::StoreV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ StOps.push_back(N->getOperand(3));
+ StOps.push_back(N->getOperand(4));
+ N2 = N->getOperand(5);
+ break;
+ default:
+ return NULL;
+ }
+
+ StOps.push_back(getI32Imm(IsVolatile));
+ StOps.push_back(getI32Imm(CodeAddrSpace));
+ StOps.push_back(getI32Imm(VecType));
+ StOps.push_back(getI32Imm(ToType));
+ StOps.push_back(getI32Imm(ToTypeWidth));
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_avar;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_avar;
+ break;
+ }
+ break;
+ }
+ StOps.push_back(Addr);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_asi;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_asi;
+ break;
+ }
+ break;
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari_64;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari;
+ break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg_64;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg;
+ break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(N2);
+ }
+
+ StOps.push_back(Chain);
+
+ ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return ST;
+}
+
+// SelectDirectAddr - Match a direct address for DAG.
+// A direct address could be a globaladdress or externalsymbol.
+bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
+ // Return true if TGA or ES.
+ if (N.getOpcode() == ISD::TargetGlobalAddress ||
+ N.getOpcode() == ISD::TargetExternalSymbol) {
+ Address = N;
+ return true;
+ }
+ if (N.getOpcode() == NVPTXISD::Wrapper) {
+ Address = N.getOperand(0);
+ return true;
+ }
+ if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
+ if (IID == Intrinsic::nvvm_ptr_gen_to_param)
+ if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
+ return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ SDValue base = Addr.getOperand(0);
+ if (SelectDirectAddr(base, Base)) {
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ Offset = CurDAG->getTargetConstant(0, mvt);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
+ return false;
+ }
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ else
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ return false;
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
+ unsigned int spN) const {
+ const Value *Src = NULL;
+ // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
+ // the classof() for MemSDNode does not include MemIntrinsicSDNode
+ // (See SelectionDAGNodes.h). So we need to check for both.
+ if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
+ Src = mN->getSrcValue();
+ } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ Src = mN->getSrcValue();
+ }
+ if (!Src)
+ return false;
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return (PT->getAddressSpace() == spN);
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default:
+ return true;
+ case 'm': // memory
+ if (SelectDirectAddr(Op, Op0)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ }
+ if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+ break;
+ }
+ return true;
+}
+
+// Return true if N is a undef or a constant.
+// If N was undef, return a (i8imm 0) in Retval
+// If N was imm, convert it to i8imm and return in Retval
+// Note: The convert to i8imm is required, otherwise the
+// pattern matcher inserts a bunch of IMOVi8rr to convert
+// the imm to i8imm, and this causes instruction selection
+// to fail.
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+ if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
+ return false;
+
+ if (N.getOpcode() == ISD::UNDEF)
+ Retval = CurDAG->getTargetConstant(0, MVT::i8);
+ else {
+ ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
+ unsigned retval = cn->getZExtValue();
+ Retval = CurDAG->getTargetConstant(retval, MVT::i8);
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
new file mode 100644
index 000000000000..70e8e464297d
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -0,0 +1,106 @@
+//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nvptx-isel"
+
+#include "NVPTX.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+
+class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+
+ // If true, generate corresponding FPCONTRACT. This is
+ // language dependent (i.e. CUDA and OpenCL works differently).
+ bool doFMADF32;
+ bool doFMAF64;
+ bool doFMAF32;
+ bool doFMAF64AGG;
+ bool doFMAF32AGG;
+ bool allowFMA;
+
+ // 0: use div.approx
+ // 1: use div.full
+ // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
+ // Otherwise, use div.full
+ int do_DIVF32_PREC;
+
+ // If true, add .ftz to f32 instructions.
+ // This is only meaningful for sm_20 and later, as the default
+ // is not ftz.
+ // For sm earlier than sm_20, f32 denorms are always ftz by the
+ // hardware.
+ // We always add the .ftz modifier regardless of the sm value
+ // when Use32FTZ is true.
+ bool UseF32FTZ;
+
+ // If true, generate mul.wide from sext and mul
+ bool doMulWide;
+
+public:
+ explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel);
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "NVPTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ const NVPTXSubtarget &Subtarget;
+
+ virtual bool SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
+private:
+// Include the pieces autogenerated from the target description.
+#include "NVPTXGenDAGISel.inc"
+
+ SDNode *Select(SDNode *N);
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectLoadVector(SDNode *N);
+ SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectStoreVector(SDNode *N);
+
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Match direct address complex pattern.
+ bool SelectDirectAddr(SDValue N, SDValue &Address);
+
+ bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
+
+ bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
+
+};
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
new file mode 100644
index 000000000000..6e01a5a82071
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -0,0 +1,1693 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXISelLowering.h"
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetObjectFile.h"
+#include "NVPTXUtilities.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-lower"
+
+using namespace llvm;
+
+static unsigned int uniqueCallSite = 0;
+
+static cl::opt<bool> sched4reg(
+ "nvptx-sched4reg",
+ cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
+
+static bool IsPTXVectorType(MVT VT) {
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::v2i8:
+ case MVT::v4i8:
+ case MVT::v2i16:
+ case MVT::v4i16:
+ case MVT::v2i32:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return true;
+ }
+}
+
+// NVPTXTargetLowering Constructor.
+NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
+ : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+
+ // always lower memset, memcpy, and memmove intrinsics to load/store
+ // instructions, rather
+ // then generating calls to memset, mempcy or memmove.
+ MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ // Jump is Expensive. Don't create extra control flow for 'and', 'or'
+ // condition branches.
+ setJumpIsExpensive(true);
+
+ // By default, use the Source scheduling
+ if (sched4reg)
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Source);
+
+ addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
+ addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
+ addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
+ addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
+ addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
+
+ // Operations not directly supported by NVPTX.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (nvptxSubtarget.hasROT64()) {
+ setOperationAction(ISD::ROTL, MVT::i64, Legal);
+ setOperationAction(ISD::ROTR, MVT::i64, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ }
+ if (nvptxSubtarget.hasROT32()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTR, MVT::i32, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Indirect branch is not supported.
+ // This also disables Jump Table creation.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+ // We want to legalize constant related memmove and memcopy
+ // intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PTX does not support load / store predicate registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ // This is legal in NVPTX
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+
+ // TRAP can be lowered to PTX trap
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Register custom handling for vector loads/stores
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT VT = (MVT::SimpleValueType) i;
+ if (IsPTXVectorType(VT)) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
+ }
+ }
+
+ // Now deduce the information based on the above mentioned
+ // actions
+ computeRegisterProperties();
+}
+
+const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return 0;
+ case NVPTXISD::CALL:
+ return "NVPTXISD::CALL";
+ case NVPTXISD::RET_FLAG:
+ return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::Wrapper:
+ return "NVPTXISD::Wrapper";
+ case NVPTXISD::NVBuiltin:
+ return "NVPTXISD::NVBuiltin";
+ case NVPTXISD::DeclareParam:
+ return "NVPTXISD::DeclareParam";
+ case NVPTXISD::DeclareScalarParam:
+ return "NVPTXISD::DeclareScalarParam";
+ case NVPTXISD::DeclareRet:
+ return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareRetParam:
+ return "NVPTXISD::DeclareRetParam";
+ case NVPTXISD::PrintCall:
+ return "NVPTXISD::PrintCall";
+ case NVPTXISD::LoadParam:
+ return "NVPTXISD::LoadParam";
+ case NVPTXISD::StoreParam:
+ return "NVPTXISD::StoreParam";
+ case NVPTXISD::StoreParamS32:
+ return "NVPTXISD::StoreParamS32";
+ case NVPTXISD::StoreParamU32:
+ return "NVPTXISD::StoreParamU32";
+ case NVPTXISD::MoveToParam:
+ return "NVPTXISD::MoveToParam";
+ case NVPTXISD::CallArgBegin:
+ return "NVPTXISD::CallArgBegin";
+ case NVPTXISD::CallArg:
+ return "NVPTXISD::CallArg";
+ case NVPTXISD::LastCallArg:
+ return "NVPTXISD::LastCallArg";
+ case NVPTXISD::CallArgEnd:
+ return "NVPTXISD::CallArgEnd";
+ case NVPTXISD::CallVoid:
+ return "NVPTXISD::CallVoid";
+ case NVPTXISD::CallVal:
+ return "NVPTXISD::CallVal";
+ case NVPTXISD::CallSymbol:
+ return "NVPTXISD::CallSymbol";
+ case NVPTXISD::Prototype:
+ return "NVPTXISD::Prototype";
+ case NVPTXISD::MoveParam:
+ return "NVPTXISD::MoveParam";
+ case NVPTXISD::MoveRetval:
+ return "NVPTXISD::MoveRetval";
+ case NVPTXISD::MoveToRetval:
+ return "NVPTXISD::MoveToRetval";
+ case NVPTXISD::StoreRetval:
+ return "NVPTXISD::StoreRetval";
+ case NVPTXISD::PseudoUseParam:
+ return "NVPTXISD::PseudoUseParam";
+ case NVPTXISD::RETURN:
+ return "NVPTXISD::RETURN";
+ case NVPTXISD::CallSeqBegin:
+ return "NVPTXISD::CallSeqBegin";
+ case NVPTXISD::CallSeqEnd:
+ return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::LoadV2:
+ return "NVPTXISD::LoadV2";
+ case NVPTXISD::LoadV4:
+ return "NVPTXISD::LoadV4";
+ case NVPTXISD::LDGV2:
+ return "NVPTXISD::LDGV2";
+ case NVPTXISD::LDGV4:
+ return "NVPTXISD::LDGV4";
+ case NVPTXISD::LDUV2:
+ return "NVPTXISD::LDUV2";
+ case NVPTXISD::LDUV4:
+ return "NVPTXISD::LDUV4";
+ case NVPTXISD::StoreV2:
+ return "NVPTXISD::StoreV2";
+ case NVPTXISD::StoreV4:
+ return "NVPTXISD::StoreV4";
+ }
+}
+
+bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT == MVT::i1;
+}
+
+SDValue
+NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+}
+
+std::string NVPTXTargetLowering::getPrototype(
+ Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::stringstream O;
+ O << "prototype_" << uniqueCallSite << " : .callprototype ";
+
+ if (retTy->getTypeID() == Type::VoidTyID)
+ O << "()";
+ else {
+ O << "(";
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ size = ITy->getBitWidth();
+ if (size < 32)
+ size = 32;
+ } else {
+ assert(retTy->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = retTy->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " _";
+ } else if (isa<PointerType>(retTy))
+ O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ else {
+ if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
+ }
+ }
+ O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ } else {
+ assert(false && "Unknown return type");
+ }
+ }
+ } else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned idx = 0;
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ O << ".reg .b" << sz << " _";
+ if (j < je - 1)
+ O << ", ";
+ ++idx;
+ }
+ if (i < e - 1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ }
+ O << "_ (";
+
+ bool first = true;
+ MVT thePointerTy = getPointerTy();
+
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ const Type *Ty = Args[i].Ty;
+ if (!first) {
+ O << ", ";
+ }
+ first = false;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << ".param .b" << sz << " ";
+ else
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ continue;
+ }
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy && "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI) {
+ unsigned align = Outs[i].Flags.getByValAlign();
+ unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+ O << ".param .align " << align << " .b8 ";
+ O << "_";
+ O << "[" << sz << "]";
+ continue;
+ } else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, ETy, vtparts);
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ if (j < je - 1)
+ O << ", ";
+ }
+ if (i < e - 1)
+ O << ", ";
+ }
+ continue;
+ }
+ }
+ O << ");";
+ return O.str();
+}
+
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ ArgListTy &Args = CLI.Args;
+ Type *retTy = CLI.RetTy;
+ ImmutableCallSite *CS = CLI.CS;
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ SDValue tempChain = Chain;
+ Chain =
+ DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
+ SDValue InFlag = Chain.getValue(1);
+
+ assert((Outs.size() == Args.size()) &&
+ "Unexpected number of arguments to function call");
+ unsigned paramCount = 0;
+ // Declare the .params or .reg need to pass values
+ // to the function
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ // Plain scalar
+ // for ABI, declare .param .b<size> .param<n>;
+ // for nonABI, declare .reg .b<size> .param<n>;
+ unsigned isReg = 1;
+ if (isABI)
+ isReg = 0;
+ unsigned sz = VT.getSizeInBits();
+ if (VT.isInteger() && (sz < 32))
+ sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(isReg, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), OutVals[i],
+ InFlag };
+
+ unsigned opcode = NVPTXISD::StoreParam;
+ if (isReg)
+ opcode = NVPTXISD::MoveToParam;
+ else {
+ if (Outs[i].Flags.isZExt())
+ opcode = NVPTXISD::StoreParamU32;
+ else if (Outs[i].Flags.isSExt())
+ opcode = NVPTXISD::StoreParamS32;
+ }
+ Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
+
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ continue;
+ }
+ // struct or vector
+ SmallVector<EVT, 16> vtparts;
+ const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ assert(PTy && "Type of a byval parameter should be pointer");
+ ComputeValueVTs(*this, PTy->getElementType(), vtparts);
+
+ if (isABI) {
+ // declare .param .align 16 .b8 .param<n>[<size>];
+ unsigned sz = Outs[i].Flags.getByValSize();
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
+ // don't need to
+ // worry about natural alignment or not. See TargetLowering::LowerCallTo()
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+ InFlag
+ };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ unsigned curOffset = 0;
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ theVal, InFlag };
+ Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ curOffset += sz / 8;
+ }
+ }
+ ++paramCount;
+ continue;
+ }
+ // Non-abi, struct or vector
+ // Declare a bunch or .reg .b<size> .param<n>
+ unsigned curOffset = 0;
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(1, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), theVal,
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ }
+ }
+ }
+
+ GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+ unsigned retAlignment = 0;
+
+ // Handle Result
+ unsigned retCount = 0;
+ if (Ins.size() > 0) {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
+ // individual .reg .b<size> func_retval<0..> for non ABI
+ unsigned resultsz = 0;
+ for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = resvtparts[i];
+ if (resvtparts[i].isVector()) {
+ elems = resvtparts[i].getVectorNumElements();
+ elemtype = resvtparts[i].getVectorElementType();
+ }
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (isABI == false) {
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ } else {
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ }
+ if (isABI == false) {
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(retCount, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ ++retCount;
+ }
+ resultsz += sz;
+ }
+ }
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
+ // Scalar needs to be at least 32bit wide
+ if (resultsz < 32)
+ resultsz = 32;
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resultsz, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ } else {
+ if (Func) { // direct call
+ if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
+ retAlignment = getDataLayout()->getABITypeAlignment(retTy);
+ } else { // indirect call
+ const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
+ if (!llvm::getAlign(*CallI, 0, retAlignment))
+ retAlignment = getDataLayout()->getABITypeAlignment(retTy);
+ }
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain,
+ DAG.getConstant(retAlignment, MVT::i32),
+ DAG.getConstant(resultsz / 8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ }
+ }
+
+ if (!Func) {
+ // This is indirect function call case : PTX requires a prototype of the
+ // form
+ // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
+ // to be emitted, and the label has to used as the last arg of call
+ // instruction.
+ // The prototype is embedded in a string and put as the operand for an
+ // INLINEASM SDNode.
+ SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
+ const char *asmstr = nvTM->getManagedStrPool()
+ ->getManagedString(proto_string.c_str())->c_str();
+ SDValue InlineAsmOps[] = {
+ Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+ DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+ };
+ Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ // Op to just print "call"
+ SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrintCallOps[] = {
+ Chain,
+ DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+ InFlag
+ };
+ Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+ dl, PrintCallVTs, PrintCallOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the function name
+ SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallVoidOps[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the param list
+ SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgBeginOps[] = { Chain, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
+ CallArgBeginOps, 2);
+ InFlag = Chain.getValue(1);
+
+ for (unsigned i = 0, e = paramCount; i != e; ++i) {
+ unsigned opcode;
+ if (i == (e - 1))
+ opcode = NVPTXISD::LastCallArg;
+ else
+ opcode = NVPTXISD::CallArg;
+ SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(i, MVT::i32), InFlag };
+ Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
+ InFlag = Chain.getValue(1);
+ }
+ SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ InFlag };
+ Chain =
+ DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
+ InFlag = Chain.getValue(1);
+
+ if (!Func) {
+ SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Generate loads from param memory/moves from registers for result
+ if (Ins.size() > 0) {
+ if (isABI) {
+ unsigned resoffset = 0;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ unsigned sz = Ins[i].VT.getSizeInBits();
+ if (Ins[i].VT.isInteger() && (sz < 8))
+ sz = 8;
+ EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
+ SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resoffset, MVT::i32), InFlag };
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
+ LoadRetOps, array_lengthof(LoadRetOps));
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ InVals.push_back(retval);
+ resoffset += sz / 8;
+ }
+ } else {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ assert(Ins.size() == resvtparts.size() &&
+ "Unexpected number of return values in non-ABI case");
+ unsigned paramNum = 0;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(EVT(Ins[i].VT) == resvtparts[i] &&
+ "Unexpected EVT type in non-ABI case");
+ unsigned numelems = 1;
+ EVT elemtype = Ins[i].VT;
+ if (Ins[i].VT.isVector()) {
+ numelems = Ins[i].VT.getVectorNumElements();
+ elemtype = Ins[i].VT.getVectorElementType();
+ }
+ std::vector<SDValue> tempRetVals;
+ for (unsigned j = 0; j < numelems; ++j) {
+ EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
+ SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(paramNum, MVT::i32),
+ InFlag };
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
+ MoveRetOps, array_lengthof(MoveRetOps));
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ tempRetVals.push_back(retval);
+ ++paramNum;
+ }
+ if (Ins[i].VT.isVector())
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
+ &tempRetVals[0], tempRetVals.size()));
+ else
+ InVals.push_back(tempRetVals[0]);
+ }
+ }
+ }
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+ DAG.getIntPtrConstant(uniqueCallSite + 1, true),
+ InFlag);
+ uniqueCallSite++;
+
+ // set isTailCall to false for now, until we figure out how to express
+ // tail call optimization in PTX
+ isTailCall = false;
+ return Chain;
+}
+
+// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
+// (see LegalizeDAG.cpp). This is slow and uses local memory.
+// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumOperands = Node->getNumOperands();
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue SubOp = Node->getOperand(i);
+ EVT VVT = SubOp.getNode()->getValueType(0);
+ EVT EltVT = VVT.getVectorElementType();
+ unsigned NumSubElem = VVT.getVectorNumElements();
+ for (unsigned j = 0; j < NumSubElem; ++j) {
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
+ DAG.getIntPtrConstant(j)));
+ }
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+ Ops.size());
+}
+
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::RETURNADDR:
+ return SDValue();
+ case ISD::FRAMEADDR:
+ return SDValue();
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return Op;
+ case ISD::BUILD_VECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ return Op;
+ case ISD::CONCAT_VECTORS:
+ return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG);
+ case ISD::LOAD:
+ return LowerLOAD(Op, DAG);
+ default:
+ llvm_unreachable("Custom lowering not defined for operation");
+ }
+}
+
+SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::i1)
+ return LowerLOADi1(Op, DAG);
+ else
+ return SDValue();
+}
+
+// v = ld i1* addr
+// =>
+// v1 = ld i8* addr
+// v = trunc v1 to i1
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ DebugLoc dl = Node->getDebugLoc();
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
+ assert(Node->getValueType(0) == MVT::i1 &&
+ "Custom lowering for i1 load only");
+ SDValue newLD =
+ DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
+ // The legalizer (the caller) is expecting two values from the legalized
+ // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
+ // in LegalizeDAG.cpp which also uses MergeValues.
+ SDValue Ops[] = { result, LD->getChain() };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValVT = Op.getOperand(1).getValueType();
+ if (ValVT == MVT::i1)
+ return LowerSTOREi1(Op, DAG);
+ else if (ValVT.isVector())
+ return LowerSTOREVector(Op, DAG);
+ else
+ return SDValue();
+}
+
+SDValue
+NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDValue Val = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT ValVT = Val.getValueType();
+
+ if (ValVT.isVector()) {
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 stores of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ if (!ValVT.isSimple())
+ return SDValue();
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ unsigned Opcode = 0;
+ EVT EltVT = ValVT.getVectorElementType();
+ unsigned NumElts = ValVT.getVectorNumElements();
+
+ // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // stored type to i16 and propogate the "real" type as the memory type.
+ bool NeedExt = false;
+ if (EltVT.getSizeInBits() < 16)
+ NeedExt = true;
+
+ switch (NumElts) {
+ default:
+ return SDValue();
+ case 2:
+ Opcode = NVPTXISD::StoreV2;
+ break;
+ case 4: {
+ Opcode = NVPTXISD::StoreV4;
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> Ops;
+
+ // First is the chain
+ Ops.push_back(N->getOperand(0));
+
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i));
+ if (NeedExt)
+ // ANY_EXTEND is correct here since the store will only look at the
+ // lower-order bits anyway.
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
+
+ // Then any remaining arguments
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+
+ SDValue NewSt = DAG.getMemIntrinsicNode(
+ Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+ //return DCI.CombineTo(N, NewSt, true);
+ return NewSt;
+ }
+
+ return SDValue();
+}
+
+// st i1 v, addr
+// =>
+// v1 = zxt v to i8
+// st i8, addr
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Tmp1 = ST->getChain();
+ SDValue Tmp2 = ST->getBasePtr();
+ SDValue Tmp3 = ST->getValue();
+ assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+ SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ return Result;
+}
+
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+ int idx, EVT v) const {
+ std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
+ std::stringstream suffix;
+ suffix << idx;
+ *name += suffix.str();
+ return DAG.getTargetExternalSymbol(name->c_str(), v);
+}
+
+SDValue
+NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
+ return getExtSymb(DAG, ".PARAM", idx, v);
+}
+
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+ return getExtSymb(DAG, ".HLPPARAM", idx);
+}
+
+// Check to see if the kernel argument is image*_t or sampler_t
+
+bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
+ static const char *const specialTypes[] = { "struct._image2d_t",
+ "struct._image3d_t",
+ "struct._sampler_t" };
+
+ const Type *Ty = arg->getType();
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+
+ if (!PTy)
+ return false;
+
+ if (!context)
+ return false;
+
+ const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
+
+ for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
+ if (TypeName == specialTypes[i])
+ return true;
+
+ return false;
+}
+
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const DataLayout *TD = getDataLayout();
+
+ const Function *F = MF.getFunction();
+ const AttributeSet &PAL = F->getAttributes();
+
+ SDValue Root = DAG.getRoot();
+ std::vector<SDValue> OutChains;
+
+ bool isKernel = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::vector<Type *> argTypes;
+ std::vector<const Argument *> theArgs;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ theArgs.push_back(I);
+ argTypes.push_back(I->getType());
+ }
+ //assert(argTypes.size() == Ins.size() &&
+ // "Ins types and function types did not match");
+
+ int idx = 0;
+ for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
+ Type *Ty = argTypes[i];
+ EVT ObjectVT = getValueType(Ty);
+ //assert(ObjectVT == Ins[i].VT &&
+ // "Ins type did not match function type");
+
+ // If the kernel argument is image*_t or sampler_t, convert it to
+ // a i32 constant holding the parameter position. This can later
+ // matched in the AsmPrinter to output the correct mangled name.
+ if (isImageOrSamplerVal(
+ theArgs[i],
+ (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+ : 0))) {
+ assert(isKernel && "Only kernels can have image/sampler params");
+ InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
+ continue;
+ }
+
+ if (theArgs[i]->use_empty()) {
+ // argument is dead
+ if (ObjectVT.isVector()) {
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+ }
+ } else {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ }
+ continue;
+ }
+
+ // In the following cases, assign a node order of "idx+1"
+ // to newly created nodes. The SDNOdes for params have to
+ // appear in the same order as their order of appearance
+ // in the original function. "idx+1" holds that order.
+ if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (ObjectVT.isVector()) {
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned Offset = 0;
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue B = DAG.getIntPtrConstant(Offset);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ //getParamSymbol(DAG, idx, EltVT),
+ //DAG.getConstant(Offset, getPointerTy()));
+ A, B);
+ Value *SrcValue = Constant::getNullValue(PointerType::get(
+ EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+ SDValue Ld = DAG.getLoad(
+ EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+ false,
+ TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ Offset += EltVT.getStoreSizeInBits() / 8;
+ InVals.push_back(Ld);
+ }
+ continue;
+ }
+
+ // A plain scalar.
+ if (isABI || isKernel) {
+ // If ABI, load from the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx);
+ // Conjure up a value that we can get the address space from.
+ // FIXME: Using a constant here is a hack.
+ Value *srcValue = Constant::getNullValue(
+ PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
+ SDValue p = DAG.getLoad(
+ ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+ false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx + 1);
+ InVals.push_back(p);
+ } else {
+ // If no ABI, just move the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx + 1);
+ InVals.push_back(p);
+ }
+ continue;
+ }
+
+ // Param has ByVal attribute
+ if (isABI || isKernel) {
+ // Return MoveParam(param symbol).
+ // Ideally, the param symbol can be returned directly,
+ // but when SDNode builder decides to use it in a CopyToReg(),
+ // machine instruction fails because TargetExternalSymbol
+ // (not lowered) is target dependent, and CopyToReg assumes
+ // the source is lowered.
+ SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx + 1);
+ if (isKernel)
+ InVals.push_back(p);
+ else {
+ SDValue p2 = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
+ InVals.push_back(p2);
+ }
+ } else {
+ // Have to move a set of param symbols to registers and
+ // store them locally and return the local pointer in InVals
+ const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
+ assert(elemPtrType && "Byval parameter should be a pointer type");
+ Type *elemType = elemPtrType->getElementType();
+ // Compute the constituent parts
+ SmallVector<EVT, 16> vtparts;
+ SmallVector<uint64_t, 16> offsets;
+ ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
+ unsigned totalsize = 0;
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
+ totalsize += vtparts[j].getStoreSizeInBits();
+ SDValue localcopy = DAG.getFrameIndex(
+ MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+ getPointerTy());
+ unsigned sizesofar = 0;
+ std::vector<SDValue> theChains;
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
+ unsigned numElems = 1;
+ if (vtparts[j].isVector())
+ numElems = vtparts[j].getVectorNumElements();
+ for (unsigned k = 0, ke = numElems; k != ke; ++k) {
+ EVT tmpvt = vtparts[j];
+ if (tmpvt.isVector())
+ tmpvt = tmpvt.getVectorElementType();
+ SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
+ getParamSymbol(DAG, idx, tmpvt));
+ SDValue addr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+ DAG.getConstant(sizesofar, getPointerTy()));
+ theChains.push_back(DAG.getStore(
+ Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+ sizesofar += tmpvt.getStoreSizeInBits() / 8;
+ ++idx;
+ }
+ }
+ --idx;
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
+ theChains.size());
+ InVals.push_back(localcopy);
+ }
+ }
+
+ // Clang will check explicit VarArg and issue error if any. However, Clang
+ // will let code with
+ // implicit var arg like f() pass.
+ // We treat this case as if the arg list is empty.
+ //if (F.isVarArg()) {
+ // assert(0 && "VarArg not supported yet!");
+ //}
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+ OutChains.size()));
+
+ return Chain;
+}
+
+SDValue NVPTXTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ unsigned sizesofar = 0;
+ unsigned idx = 0;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ SDValue theVal = OutVals[i];
+ EVT theValType = theVal.getValueType();
+ unsigned numElems = 1;
+ if (theValType.isVector())
+ numElems = theValType.getVectorNumElements();
+ for (unsigned j = 0, je = numElems; j != je; ++j) {
+ SDValue tmpval = theVal;
+ if (theValType.isVector())
+ tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ theValType.getVectorElementType(), tmpval,
+ DAG.getIntPtrConstant(j));
+ Chain = DAG.getNode(
+ isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+ MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+ tmpval);
+ if (theValType.isVector())
+ sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
+ else
+ sizesofar += theValType.getStoreSizeInBits() / 8;
+ ++idx;
+ }
+ }
+
+ return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ if (Constraint.length() > 1)
+ return;
+ else
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// NVPTX suuport vector of legal types of any length in Intrinsics because the
+// NVPTX specific type legalizer
+// will legalize them to the PTX supported length.
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+ if (isTypeLegal(VT))
+ return true;
+ if (VT.isVector()) {
+ MVT eVT = VT.getVectorElementType();
+ if (isTypeLegal(eVT))
+ return true;
+ }
+ return false;
+}
+
+// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
+// TgtMemIntrinsic
+// because we need the information that is only available in the "Value" type
+// of destination
+// pointer. In particular, the address space information.
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+ IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+
+ case Intrinsic::nvvm_atomic_load_add_f32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_atomic_load_inc_32:
+ case Intrinsic::nvvm_atomic_load_dec_32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
+ Info.memVT = MVT::i32;
+ else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 0;
+ return true;
+
+ }
+ return false;
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+/// Used to guide target specific optimizations, like loop strength reduction
+/// (LoopStrengthReduce.cpp) and memory optimization for address mode
+/// (CodeGenPrepare.cpp)
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+
+ // AddrMode - This represents an addressing mode of:
+ // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+ //
+ // The legal address modes are
+ // - [avar]
+ // - [areg]
+ // - [areg+immoff]
+ // - [immAddr]
+
+ if (AM.BaseGV) {
+ if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
+ return false;
+ return true;
+ }
+
+ switch (AM.Scale) {
+ case 0: // "r", "r+i" or "i" is allowed
+ break;
+ case 1:
+ if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
+ return false;
+ // Otherwise we have r+i.
+ break;
+ default:
+ // No scale > 1 is allowed
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+NVPTXTargetLowering::ConstraintType
+NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'r':
+ case 'h':
+ case 'c':
+ case 'l':
+ case 'f':
+ case 'd':
+ case '0':
+ case 'N':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'c':
+ return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
+ case 'h':
+ return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
+ case 'r':
+ return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
+ case 'l':
+ case 'N':
+ return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
+ case 'f':
+ return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
+ case 'd':
+ return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
+ return 4;
+}
+
+/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
+static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT ResVT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ assert(ResVT.isVector() && "Vector load must have vector type");
+
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 loads of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ assert(ResVT.isSimple() && "Can only handle simple types");
+ switch (ResVT.getSimpleVT().SimpleTy) {
+ default:
+ return;
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ EVT EltVT = ResVT.getVectorElementType();
+ unsigned NumElts = ResVT.getVectorNumElements();
+
+ // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default:
+ return;
+ case 2:
+ Opcode = NVPTXISD::LoadV2;
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ // The select routine does not have access to the LoadSDNode instance, so
+ // pass along the extension information
+ OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), LD->getMemoryVT(),
+ LD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+}
+
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Intrin = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Get the intrinsic ID
+ unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p: {
+ EVT ResVT = N->getValueType(0);
+
+ if (ResVT.isVector()) {
+ // Vector LDG/LDU
+
+ unsigned NumElts = ResVT.getVectorNumElements();
+ EVT EltVT = ResVT.getVectorElementType();
+
+ // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default:
+ return;
+ case 2:
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV2;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV2;
+ break;
+ }
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV4;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV4;
+ break;
+ }
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+
+ OtherOps.push_back(Chain); // Chain
+ // Skip operand 1 (intrinsic ID)
+ // Others
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(
+ Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res =
+ DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+ } else {
+ // i8 LDG/LDU
+ assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
+ "Custom handling of non-i8 ldu/ldg?");
+
+ // Just copy all operands as-is
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
+ // Force output to i16
+ SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ // We make sure the memory type is i8, which will be used during isel
+ // to select the proper instruction.
+ SDValue NewLD =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+ Ops.size(), MVT::i8, MemSD->getMemOperand());
+
+ Results.push_back(NewLD.getValue(0));
+ Results.push_back(NewLD.getValue(1));
+ }
+ }
+ }
+}
+
+void NVPTXTargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ report_fatal_error("Unhandled custom legalization");
+ case ISD::LOAD:
+ ReplaceLoadVector(N, DAG, Results);
+ return;
+ case ISD::INTRINSIC_W_CHAIN:
+ ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
+ return;
+ }
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
new file mode 100644
index 000000000000..3cd49d38af76
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -0,0 +1,164 @@
+//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXISELLOWERING_H
+#define NVPTXISELLOWERING_H
+
+#include "NVPTX.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace NVPTXISD {
+enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ Wrapper,
+ CALL,
+ RET_FLAG,
+ LOAD_PARAM,
+ NVBuiltin,
+ DeclareParam,
+ DeclareScalarParam,
+ DeclareRetParam,
+ DeclareRet,
+ DeclareScalarRet,
+ LoadParam,
+ StoreParam,
+ StoreParamS32, // to sext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
+ MoveToParam,
+ PrintCall,
+ PrintCallUni,
+ CallArgBegin,
+ CallArg,
+ LastCallArg,
+ CallArgEnd,
+ CallVoid,
+ CallVal,
+ CallSymbol,
+ Prototype,
+ MoveParam,
+ MoveRetval,
+ MoveToRetval,
+ StoreRetval,
+ PseudoUseParam,
+ RETURN,
+ CallSeqBegin,
+ CallSeqEnd,
+ Dummy,
+
+ LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LoadV4,
+ LDGV2, // LDG.v2
+ LDGV4, // LDG.v4
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4
+};
+}
+
+//===--------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===--------------------------------------------------------------------===//
+class NVPTXTargetLowering : public TargetLowering {
+public:
+ explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ bool isTypeSupportedInIntrinsic(MVT VT) const;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type
+ /// Used to guide target specific optimizations, like loop strength
+ /// reduction (LoopStrengthReduce.cpp) and memory optimization for
+ /// address mode (CodeGenPrepare.cpp)
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const {
+ if (VT.isVector())
+ return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ return MVT::i1;
+ }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
+
+ std::string getPrototype(Type *, const ArgListTy &,
+ const SmallVectorImpl<ISD::OutputArg> &,
+ unsigned retAlignment) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ NVPTXTargetMachine *nvTM;
+
+ // PTX always uses 32-bit shift amounts
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
+
+private:
+ const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+
+ SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+ EVT = MVT::i32) const;
+ SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
+ SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
+
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+};
+} // namespace llvm
+
+#endif // NVPTXISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
new file mode 100644
index 000000000000..f11f1b8f96fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -0,0 +1,43 @@
+//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe NVPTX instructions format
+//
+//===----------------------------------------------------------------------===//
+
+// Vector instruction type enum
+class VecInstTypeEnum<bits<4> val> {
+ bits<4> Value=val;
+}
+def VecNOP : VecInstTypeEnum<0>;
+
+// Generic NVPTX Format
+
+class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<14> Inst;
+
+ let Namespace = "NVPTX";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+
+ // TSFlagFields
+ bits<4> VecInstType = VecNOP.Value;
+ bit IsSimpleMove = 0;
+ bit IsLoad = 0;
+ bit IsStore = 0;
+
+ let TSFlags{3-0} = VecInstType;
+ let TSFlags{4-4} = IsSimpleMove;
+ let TSFlags{5-5} = IsLoad;
+ let TSFlags{6-6} = IsStore;
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
new file mode 100644
index 000000000000..33a63c26f4e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -0,0 +1,273 @@
+//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXTargetMachine.h"
+#define GET_INSTRINFO_CTOR
+#include "NVPTXGenInstrInfo.inc"
+#include "llvm/IR/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <cstdio>
+
+using namespace llvm;
+
+// FIXME: Add the subtarget support on this constructor.
+NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
+ : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
+
+void NVPTXInstrInfo::copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
+ if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
+ NVPTX::Int32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
+ NVPTX::Int8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
+ NVPTX::Int1RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
+ NVPTX::Float32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
+ NVPTX::Int16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
+ NVPTX::Int64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
+ NVPTX::Float64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else {
+ llvm_unreachable("Don't know how to copy a register");
+ }
+}
+
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DestReg) const {
+ // Look for the appropriate part of TSFlags
+ bool isMove = false;
+
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
+ isMove = (TSFlags == 1);
+
+ if (isMove) {
+ MachineOperand dest = MI.getOperand(0);
+ MachineOperand src = MI.getOperand(1);
+ assert(dest.isReg() && "dest of a movrr is not a reg");
+ assert(src.isReg() && "src of a movrr is not a reg");
+
+ SrcReg = src.getReg();
+ DestReg = dest.getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case NVPTX::INT_PTX_SREG_NTID_X:
+ case NVPTX::INT_PTX_SREG_NTID_Y:
+ case NVPTX::INT_PTX_SREG_NTID_Z:
+ case NVPTX::INT_PTX_SREG_TID_X:
+ case NVPTX::INT_PTX_SREG_TID_Y:
+ case NVPTX::INT_PTX_SREG_TID_Z:
+ case NVPTX::INT_PTX_SREG_CTAID_X:
+ case NVPTX::INT_PTX_SREG_CTAID_Y:
+ case NVPTX::INT_PTX_SREG_CTAID_Z:
+ case NVPTX::INT_PTX_SREG_NCTAID_X:
+ case NVPTX::INT_PTX_SREG_NCTAID_Y:
+ case NVPTX::INT_PTX_SREG_NCTAID_Z:
+ case NVPTX::INT_PTX_SREG_WARPSIZE:
+ return true;
+ }
+}
+
+bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isLoad = false;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
+ isLoad = (TSFlags == 1);
+ if (isLoad)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isLoad;
+}
+
+bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isStore = false;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
+ isStore = (TSFlags == 1);
+ if (isStore)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isStore;
+}
+
+bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
+ unsigned addrspace = 0;
+ if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
+ return false;
+ if (isLoadInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ if (isStoreInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ return true;
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool NVPTXInstrInfo::AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == NVPTX::CBranch) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
+ if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two NVPTX:GOTOs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin())
+ return 1;
+ --I;
+ if (I->getOpcode() != NVPTX::CBranch)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned NVPTXInstrInfo::InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "NVPTX branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+ .addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
+ return 2;
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
new file mode 100644
index 000000000000..b1972e9b7254
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -0,0 +1,78 @@
+//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the niversity of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXINSTRUCTIONINFO_H
+#define NVPTXINSTRUCTIONINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "NVPTXGenInstrInfo.inc"
+
+namespace llvm {
+
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
+ NVPTXTargetMachine &TM;
+ const NVPTXRegisterInfo RegInfo;
+public:
+ explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
+
+ virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
+ /* The following virtual functions are used in register allocation.
+ * They are not implemented because the existing interface and the logic
+ * at the caller side do not work for the elementized vector load and store.
+ *
+ * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned SrcReg, bool isKill, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned DestReg, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ */
+
+ virtual void copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+ virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DestReg) const;
+ bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isReadSpecialReg(MachineInstr &MI) const;
+
+ virtual bool CanTailMerge(const MachineInstr *MI) const;
+ // Branch analysis.
+ virtual bool AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+ unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
+ return MI.getOperand(2).getImm();
+ }
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
new file mode 100644
index 000000000000..f43abe283b58
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -0,0 +1,2877 @@
+//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "NVPTXInstrFormats.td"
+
+// A NOP instruction
+def NOP : NVPTXInst<(outs), (ins), "", []>;
+
+// List of vector specific properties
+def isVecLD : VecInstTypeEnum<1>;
+def isVecST : VecInstTypeEnum<2>;
+def isVecBuild : VecInstTypeEnum<3>;
+def isVecShuffle : VecInstTypeEnum<4>;
+def isVecExtract : VecInstTypeEnum<5>;
+def isVecInsert : VecInstTypeEnum<6>;
+def isVecDest : VecInstTypeEnum<7>;
+def isVecOther : VecInstTypeEnum<15>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instruction Predicate Definitions
+//===----------------------------------------------------------------------===//
+
+
+def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def useAtomRedG32forGen32 :
+ Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+def useAtomRedG64forGen64 :
+ Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget.hasVote()">;
+def hasDouble : Predicate<"Subtarget.hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDG : Predicate<"Subtarget.hasLDG()">;
+def hasLDU : Predicate<"Subtarget.hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+
+def doF32FTZ : Predicate<"UseF32FTZ">;
+
+def doFMAF32 : Predicate<"doFMAF32">;
+def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
+def doFMAF32AGG : Predicate<"doFMAF32AGG">;
+def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
+def doFMAF64 : Predicate<"doFMAF64">;
+def doFMAF64AGG : Predicate<"doFMAF64AGG">;
+def doFMADF32 : Predicate<"doFMADF32">;
+def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
+
+def doMulWide : Predicate<"doMulWide">;
+
+def allowFMA : Predicate<"allowFMA">;
+def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
+
+def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
+def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+
+def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
+
+def true : Predicate<"1">;
+
+//===----------------------------------------------------------------------===//
+// Special Handling for 8-bit Operands and Operations
+//
+// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
+// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
+// 8-bit registers.
+//
+// PTX ld, st and cvt instructions permit source and destination data operands
+// to be wider than the instruction-type size, so that narrow values may be
+// loaded, stored, and converted using regular-width registers.
+//
+// So in PTX generation, we
+// - always use 16-bit registers in place in 8-bit registers.
+// (8-bit variables should stay as 8-bit as they represent memory layout.)
+// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
+// before operation
+// . div
+// . rem
+// . neg (sign)
+// . set, setp
+// . shr
+//
+// We are patching the operations by inserting the cvt instructions in the
+// asm strings of the affected instructions.
+//
+// Since vector operations, except for ld/st, are eventually elementized. We
+// do not need to special-hand the vector 8-bit operations.
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt.s16.s8 %temp2, %b;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// mov.b16 %temp2, %b;
+// cvt.s16.s8 %temp2, %temp2;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .",
+ !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat("mov.b16 \t%temp2, $b;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// mov.b16 %temp1, %b;
+// cvt.s16.s8 %temp1, %temp1;
+// cvt.s16.s8 %temp2, %a;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat("mov.b16 \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Some Common Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+multiclass I3<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_noi8<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+}
+
+multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+}
+
+multiclass F3<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+}
+
+multiclass F3_rn<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>;
+}
+
+multiclass F2<string OpcStr, SDNode OpNode> {
+ def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
+ !strconcat(OpcStr, ".f64 \t$dst, $a;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
+ def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
+ Requires<[doF32FTZ]>;
+ def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instructions.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------
+// Integer Arithmetic
+//-----------------------------------
+
+multiclass ADD_SUB_i1<SDNode OpNode> {
+ def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
+}
+
+defm ADD_i1 : ADD_SUB_i1<add>;
+defm SUB_i1 : ADD_SUB_i1<sub>;
+
+
+defm ADD : I3<"add.s", add>;
+defm SUB : I3<"sub.s", sub>;
+
+defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
+defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
+
+defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
+defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
+
+//mul.wide PTX instruction
+def SInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(32))
+ return true;
+ return false;
+}]>;
+
+def UInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(32))
+ return true;
+ return false;
+}]>;
+
+def SInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(16))
+ return true;
+ return false;
+}]>;
+
+def UInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(16))
+ return true;
+ return false;
+}]>;
+
+def Int5Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 32
+ // Only then the result from (x << v) will be i32
+ if (v.sge(0) && v.slt(32))
+ return true;
+ return false;
+}]>;
+
+def Int4Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 16
+ // Only then the result from (x << v) will be i16
+ if (v.sge(0) && v.slt(16))
+ return true;
+ return false;
+}]>;
+
+def SHL2MUL32 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(32, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
+}]>;
+
+def SHL2MUL16 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(16, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
+}]>;
+
+def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+
+def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+
+def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+
+def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+
+def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+defm MULT : I3<"mul.lo.s", mul>;
+
+defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
+defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
+def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
+def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
+
+
+defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
+defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
+
+defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
+// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
+defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
+// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
+
+def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ Int8Regs:$c))]>;
+def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ imm:$c))]>;
+def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ Int8Regs:$c))]>;
+def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
+def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
+def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
+def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
+def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
+def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
+def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), imm:$c))]>;
+
+def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
+def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
+def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
+def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), imm:$c))]>;
+
+
+def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
+ "neg.s16 \t$dst, $dst;"),
+ [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
+def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "neg.s16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
+def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "neg.s32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "neg.s64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
+
+//-----------------------------------
+// Floating Point Arithmetic
+//-----------------------------------
+
+// Constant 1.0f
+def FloatConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
+ return false;
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==1.0f);
+}]>;
+// Constand (double)1.0
+def DoubleConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
+ return false;
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==1.0);
+}]>;
+
+defm FADD : F3<"add", fadd>;
+defm FSUB : F3<"sub", fsub>;
+defm FMUL : F3<"mul", fmul>;
+
+defm FADD_rn : F3_rn<"add", fadd>;
+defm FSUB_rn : F3_rn<"sub", fsub>;
+defm FMUL_rn : F3_rn<"mul", fmul>;
+
+defm FABS : F2<"abs", fabs>;
+defm FNEG : F2<"neg", fneg>;
+defm FSQRT : F2<"sqrt.rn", fsqrt>;
+
+//
+// F64 division
+//
+def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ "rcp.rn.f64 \t$dst, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
+def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, Float64Regs:$b))]>;
+def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, fpimm:$b))]>;
+
+//
+// F32 Approximate reciprocal
+//
+def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Approximate division
+//
+def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Semi-accurate reciprocal
+//
+// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
+//
+def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Semi-accurate division
+//
+def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Accurate reciprocal
+//
+def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20, doF32FTZ]>;
+def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+//
+// F32 Accurate division
+//
+def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[reqPTX20]>;
+
+
+multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b),
+ Float32Regs:$c))]>, Requires<[Pred]>;
+ // This is to WAR a weird bug in Tablegen that does not automatically
+ // generate the following permutated rule rrr2 from the above rrr.
+ // So we explicitly add it here. This happens to FMA32 only.
+ // See the comments at FMAD32 and FMA32 for more information.
+ def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd Float32Regs:$c,
+ (fmul Float32Regs:$a, Float32Regs:$b)))]>,
+ Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, Float64Regs:$b),
+ Float64Regs:$c))]>, Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
+ Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
+// automatically generate the rrr2 rule from
+// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
+// If we reverse the order of the following two lines, then rrr2 rule will be
+// generated for FMA32, but not for rrr.
+// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
+defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
+defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
+defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
+
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
+// b*c-a => fmad(b, c, -a)
+// - legal because b*c-a <=> b*c+(-a)
+multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
+ (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
+ Requires<[Pred]>;
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
+ (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
+ (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
+defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
+defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
+defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
+defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
+
+def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "sin.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
+def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "cos.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
+
+//-----------------------------------
+// Logical Arithmetic
+//-----------------------------------
+
+multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
+ def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
+ def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+}
+
+defm OR : LOG_FORMAT<"or", or>;
+defm AND : LOG_FORMAT<"and", and>;
+defm XOR : LOG_FORMAT<"xor", xor>;
+
+def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
+ "not.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
+def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
+def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
+def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "not.b32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
+def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "not.b64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
+
+// For shifts, the second src operand must be 32-bit value
+multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
+
+// For shifts, the second src operand must be 32-bit value
+// Need to add cvt for the 8-bits.
+multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
+defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
+
+// 32bit
+def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
+def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
+
+def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
+
+// 64bit
+def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_64 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
+def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
+
+def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+
+
+//-----------------------------------
+// Data Movement (Load / Store, Move)
+//-----------------------------------
+
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
+ [SDNPWantRoot]>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
+ [SDNPWantRoot]>;
+
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int32Regs, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int64Regs, i64imm);
+}
+
+def imem : Operand<iPTR> {
+ let PrintMethod = "printOperand";
+}
+
+def imemAny : Operand<iPTRAny> {
+ let PrintMethod = "printOperand";
+}
+
+def LdStCode : Operand<i32> {
+ let PrintMethod = "printLdStCode";
+}
+
+def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
+
+def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
+ "mov.u32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
+ "mov.u64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
+let IsSimpleMove=1 in {
+def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
+ "mov.pred \t$dst, $sss;", []>;
+def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.u32 \t$dst, $sss;", []>;
+def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.u64 \t$dst, $sss;", []>;
+
+def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "mov.f32 \t$dst, $src;", []>;
+def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
+ "mov.f64 \t$dst, $src;", []>;
+}
+def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
+ "mov.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, imm:$src)]>;
+def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int8Regs:$dst, imm:$src)]>;
+def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int16Regs:$dst, imm:$src)]>;
+def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.u32 \t$dst, $src;",
+ [(set Int32Regs:$dst, imm:$src)]>;
+def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.u64 \t$dst, $src;",
+ [(set Int64Regs:$dst, imm:$src)]>;
+
+def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
+ "mov.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, fpimm:$src)]>;
+def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
+ "mov.f64 \t$dst, $src;",
+ [(set Float64Regs:$dst, fpimm:$src)]>;
+
+def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
+
+//---- Copy Frame Index ----
+def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
+ "add.u32 \t$dst, ${addr:add};",
+ [(set Int32Regs:$dst, ADDRri:$addr)]>;
+def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
+ "add.u64 \t$dst, ${addr:add};",
+ [(set Int64Regs:$dst, ADDRri64:$addr)]>;
+
+//-----------------------------------
+// Comparison and Selection
+//-----------------------------------
+
+// Generate string block like
+// {
+// .reg .pred p;
+// setp.gt.s16 p, %a, %b;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
+class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
+ string b> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1 , OpcStr);
+ string t3 = !strconcat(t2 , sz1);
+ string t4 = !strconcat(t3 , " \tp, ");
+ string t5 = !strconcat(t4 , a);
+ string t6 = !strconcat(t5 , ", ");
+ string t7 = !strconcat(t6 , b);
+ string t8 = !strconcat(t7 , ";\n\tselp.s");
+ string t9 = !strconcat(t8 , sz2);
+ string t10 = !strconcat(t9, " \t");
+ string t11 = !strconcat(t10, d);
+ string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
+}
+
+// Generate string block like
+// {
+// .reg .pred p;
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt s16.s8 %temp1, %b;
+// setp.gt.s16 p, %temp1, %temp2;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
+class Set_Stri8<string OpcStr, string d, string a, string b, string type,
+ string cvt> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1, ".reg .");
+ string t3 = !strconcat(t2, type);
+ string t4 = !strconcat(t3, " %temp1;\n\t");
+ string t5 = !strconcat(t4, ".reg .");
+ string t6 = !strconcat(t5, type);
+ string t7 = !strconcat(t6, " %temp2;\n\t");
+ string t8 = !strconcat(t7, cvt);
+ string t9 = !strconcat(t8, " \t%temp1, ");
+ string t10 = !strconcat(t9, a);
+ string t11 = !strconcat(t10, ";\n\t");
+ string t12 = !strconcat(t11, cvt);
+ string t13 = !strconcat(t12, " \t%temp2, ");
+ string t14 = !strconcat(t13, b);
+ string t15 = !strconcat(t14, ";\n\t");
+ string t16 = !strconcat(t15, OpcStr);
+ string t17 = !strconcat(t16, "16");
+ string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
+ string t19 = !strconcat(t18, "selp.s16 \t");
+ string t20 = !strconcat(t19, d);
+ string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
+}
+
+multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
+ string TypeStr, string CVTStr> {
+ def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
+ []>;
+ def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int16Regs:$b),
+ Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
+ []>;
+ def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
+ []>;
+ def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int64Regs:$b),
+ Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
+ []>;
+
+ def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
+ Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
+ def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
+ def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
+ def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
+
+ def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
+ Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
+ def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
+ Int16Regs:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
+ def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
+ def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
+ Int64Regs:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
+}
+
+multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
+ def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
+ Float32Regs:$b),
+ Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
+ []>, Requires<[doF32FTZ]>;
+ def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
+ Float32Regs:$b),
+ Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
+ []>;
+ def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
+ Float64Regs:$b),
+ Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
+ []>;
+ def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
+ Float64Regs:$b),
+ Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
+ []>;
+
+ def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
+ , Float32Regs:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
+ , Requires<[doF32FTZ]>;
+ def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
+
+ def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
+}
+
+defm ISetSGT
+: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
+defm ISetUGT
+: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
+defm ISetSLT
+: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
+defm ISetULT
+: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
+defm ISetSGE
+: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
+defm ISetUGE
+: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
+defm ISetSLE
+: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
+defm ISetULE
+: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
+defm ISetSEQ
+: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
+defm ISetUEQ
+: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
+defm ISetSNE
+: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
+defm ISetUNE
+: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
+
+def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
+def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
+def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
+ [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
+def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
+ [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
+
+// Compare 2 i1's and produce a u32
+def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
+ [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
+def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
+ [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
+
+defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
+defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
+defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
+defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
+defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
+defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
+
+defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
+defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
+defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
+defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
+defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
+defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
+
+defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
+defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
+
+def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
+ (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
+ (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
+def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
+def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
+def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
+ (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
+def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
+ (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
+def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
+def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
+ (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
+def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
+ (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
+def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
+def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
+ (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
+def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
+ (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
+def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
+def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
+ (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
+def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
+ (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst,
+ (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
+def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
+def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
+def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
+
+def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst,
+ (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
+def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
+def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
+def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
+ "selp.f64 \t $dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
+
+//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
+// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisInt<2>]>;
+def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
+ SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
+def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
+def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
+def SDTCallValProfile : SDTypeProfile<1, 0, []>;
+def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
+def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
+def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
+def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+
+def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
+ SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
+ SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
+ [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
+def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
+ []>;
+def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
+ SDTPseudoUseParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t$dst, [retval0+$b];"),
+ [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+
+class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
+ !strconcat(!strconcat("mov", opstr),
+ "\t$dst, retval$b;"),
+ [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
+
+class StoreParamInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], $val;"),
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+
+class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("mov", opstr),
+ "\tparam$a, $val;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+
+class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval0+$a], $val;"),
+ [(StoreRetval (i32 imm:$a), regclass:$val)]>;
+
+class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
+ !strconcat(!strconcat("mov", opstr),
+ "\tfunc_retval$num, $val;"),
+ [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
+
+class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val),
+ !strconcat(!strconcat("mov", opstr),
+ "\tfunc_retval0, $val;"),
+ [(MoveRetval regclass:$val)]>;
+
+def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
+"call (retval0), ",
+ [(PrintCall (i32 1))]>;
+def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1), ",
+ [(PrintCall (i32 2))]>;
+def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2), ",
+ [(PrintCall (i32 3))]>;
+def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3), ",
+ [(PrintCall (i32 4))]>;
+def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCall (i32 5))]>;
+def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCall (i32 6))]>;
+def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCall (i32 7))]>;
+def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
+!strconcat("call (retval0, retval1, retval2, retval3, retval4",
+ ", retval5, retval6, retval7), "),
+ [(PrintCall (i32 8))]>;
+
+def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
+ [(PrintCall (i32 0))]>;
+
+def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
+"call.uni (retval0), ",
+ [(PrintCallUni (i32 1))]>;
+def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1), ",
+ [(PrintCallUni (i32 2))]>;
+def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2), ",
+ [(PrintCallUni (i32 3))]>;
+def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3), ",
+ [(PrintCallUni (i32 4))]>;
+def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCallUni (i32 5))]>;
+def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCallUni (i32 6))]>;
+def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCallUni (i32 7))]>;
+def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
+!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
+ ", retval5, retval6, retval7), "),
+ [(PrintCallUni (i32 8))]>;
+
+def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
+ [(PrintCallUni (i32 0))]>;
+
+def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
+def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
+def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
+def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
+
+//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
+// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
+// "cvt.u16.u32\t$dst, temp_param_reg;"),
+// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
+// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
+// "cvt.u16.u32\t$dst, temp_param_reg;"),
+// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+
+def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
+def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
+
+def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
+def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
+def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
+ "cvt.u16.u32\t$dst, retval$b;",
+ [(set Int16Regs:$dst,
+ (LoadParam (i32 0), (i32 imm:$b)))]>;
+def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
+ "cvt.u16.u32\t$dst, retval$b;",
+ [(set Int8Regs:$dst,
+ (LoadParam (i32 0), (i32 imm:$b)))]>;
+
+def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
+def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
+
+def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
+def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
+
+def StoreParamI16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ "st.param.b16\t[param$a+$b], $val;",
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+
+def StoreParamI8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ "st.param.b8\t[param$a+$b], $val;",
+ [(StoreParam
+ (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreParamS32I16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+def StoreParamU32I16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+
+def StoreParamU32I8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+def StoreParamS32I8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
+def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
+
+def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
+def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
+def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
+def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
+def MoveToParamI16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "mov.b32\tparam$a, temp_param_reg;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+def MoveToParamI8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "mov.b32\tparam$a, temp_param_reg;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
+def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
+def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
+def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
+
+//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
+// !strconcat("\{\n\t",
+// !strconcat(".reg .b32 temp_retval_reg;\n\t",
+// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
+// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
+// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
+//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
+// !strconcat("\{\n\t",
+// !strconcat(".reg .b32 temp_retval_reg;\n\t",
+// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
+// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
+// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
+
+def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
+def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
+
+def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
+def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
+def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
+def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
+def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
+def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
+
+def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
+def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
+def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
+def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
+def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
+ "cvt.u32.u16\tfunc_retval$num, $val;",
+ [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
+def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
+ "cvt.u32.u16\tfunc_retval$num, $val;",
+ [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
+
+def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
+def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
+def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
+def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
+
+class CallArgInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a, ",
+ [(CallArg (i32 0), regclass:$a)]>;
+
+class LastCallArgInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a",
+ [(LastCallArg (i32 0), regclass:$a)]>;
+
+def CallArgI64 : CallArgInst<Int64Regs>;
+def CallArgI32 : CallArgInst<Int32Regs>;
+def CallArgI16 : CallArgInst<Int16Regs>;
+def CallArgI8 : CallArgInst<Int8Regs>;
+
+def CallArgF64 : CallArgInst<Float64Regs>;
+def CallArgF32 : CallArgInst<Float32Regs>;
+
+def LastCallArgI64 : LastCallArgInst<Int64Regs>;
+def LastCallArgI32 : LastCallArgInst<Int32Regs>;
+def LastCallArgI16 : LastCallArgInst<Int16Regs>;
+def LastCallArgI8 : LastCallArgInst<Int8Regs>;
+
+def LastCallArgF64 : LastCallArgInst<Float64Regs>;
+def LastCallArgF32 : LastCallArgInst<Float32Regs>;
+
+def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
+ [(CallArg (i32 0), (i32 imm:$a))]>;
+def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
+ [(LastCallArg (i32 0), (i32 imm:$a))]>;
+
+def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
+ [(CallArg (i32 1), (i32 imm:$a))]>;
+def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
+ [(LastCallArg (i32 1), (i32 imm:$a))]>;
+
+def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
+ "$addr, ",
+ [(CallVoid (Wrapper tglobaladdr:$addr))]>;
+def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
+ "$addr, ",
+ [(CallVoid Int32Regs:$addr)]>;
+def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
+ "$addr, ",
+ [(CallVoid Int64Regs:$addr)]>;
+def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
+ ", prototype_$val;",
+ [(Prototype (i32 imm:$val))]>;
+
+def DeclareRetMemInst : NVPTXInst<(outs),
+ (ins i32imm:$align, i32imm:$size, i32imm:$num),
+ ".param .align $align .b8 retval$num[$size];",
+ [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".param .b$size retval$num;",
+ [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".reg .b$size retval$num;",
+ [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
+
+def DeclareParamInst : NVPTXInst<(outs),
+ (ins i32imm:$align, i32imm:$a, i32imm:$size),
+ ".param .align $align .b8 param$a[$size];",
+ [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
+def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".param .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
+def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".reg .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
+
+class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
+ [(set regclass:$dst, (MoveParam regclass:$src))]>;
+
+def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
+def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
+def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "cvt.u16.u32\t$dst, $src;",
+ [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
+def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ "cvt.u16.u32\t$dst, $src;",
+ [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
+def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
+def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
+
+class PseudoUseParamInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$src),
+ "// Pseudo use of $src",
+ [(PseudoUseParam regclass:$src)]>;
+
+def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
+def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
+def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
+def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
+def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
+def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+
+
+//
+// Load / Store Handling
+//
+multiclass LD<NVPTXRegClass regclass> {
+ def _avar : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr];"), []>;
+ def _areg : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr];"), []>;
+ def _areg_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr];"), []>;
+ def _ari : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr+$offset];"), []>;
+ def _ari_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr+$offset];"), []>;
+ def _asi : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr+$offset];"), []>;
+}
+
+let mayLoad=1, neverHasSideEffects=1 in {
+defm LD_i8 : LD<Int8Regs>;
+defm LD_i16 : LD<Int16Regs>;
+defm LD_i32 : LD<Int32Regs>;
+defm LD_i64 : LD<Int64Regs>;
+defm LD_f32 : LD<Float32Regs>;
+defm LD_f64 : LD<Float64Regs>;
+}
+
+multiclass ST<NVPTXRegClass regclass> {
+ def _avar : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr], $src;"), []>;
+ def _areg : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr], $src;"), []>;
+ def _areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr], $src;"), []>;
+ def _ari : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr+$offset], $src;"), []>;
+ def _ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr+$offset], $src;"), []>;
+ def _asi : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr+$offset], $src;"), []>;
+}
+
+let mayStore=1, neverHasSideEffects=1 in {
+defm ST_i8 : ST<Int8Regs>;
+defm ST_i16 : ST<Int16Regs>;
+defm ST_i32 : ST<Int32Regs>;
+defm ST_i64 : ST<Int64Regs>;
+defm ST_f32 : ST<Float32Regs>;
+defm ST_f64 : ST<Float64Regs>;
+}
+
+// The following is used only in and after vector elementizations.
+// Vector elementization happens at the machine instruction level, so the
+// following instruction
+// never appears in the DAG.
+multiclass LD_VEC<NVPTXRegClass regclass> {
+ def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
+ def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
+ def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
+}
+let mayLoad=1, neverHasSideEffects=1 in {
+defm LDV_i8 : LD_VEC<Int8Regs>;
+defm LDV_i16 : LD_VEC<Int16Regs>;
+defm LDV_i32 : LD_VEC<Int32Regs>;
+defm LDV_i64 : LD_VEC<Int64Regs>;
+defm LDV_f32 : LD_VEC<Float32Regs>;
+defm LDV_f64 : LD_VEC<Float64Regs>;
+}
+
+multiclass ST_VEC<NVPTXRegClass regclass> {
+ def _v2_avar : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_ari : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_asi : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v4_avar : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_ari : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
+ def _v4_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
+ def _v4_asi : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
+}
+let mayStore=1, neverHasSideEffects=1 in {
+defm STV_i8 : ST_VEC<Int8Regs>;
+defm STV_i16 : ST_VEC<Int16Regs>;
+defm STV_i32 : ST_VEC<Int32Regs>;
+defm STV_i64 : ST_VEC<Int64Regs>;
+defm STV_f32 : ST_VEC<Float32Regs>;
+defm STV_f64 : ST_VEC<Float64Regs>;
+}
+
+
+//---- Conversion ----
+
+multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
+// FIXME: need to add f16 support
+// def CVTf16i8 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
+// def CVTf16i16 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
+// def CVTf16i32 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
+// def CVTf16i64:
+// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
+// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
+
+ def CVTf32i1 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
+ "selp.f32 \t$d, 1.0, 0.0, $a;",
+ [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def CVTf32i8 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
+ def CVTf32i16 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
+ def CVTf32i32 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
+ def CVTf32i64:
+ NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
+
+ def CVTf64i1 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
+ "selp.f64 \t$d, 1.0, 0.0, $a;",
+ [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
+ def CVTf64i8 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
+ def CVTf64i16 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
+ def CVTf64i32 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
+ def CVTf64i64:
+ NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
+}
+
+defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
+defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
+
+multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
+// FIXME: need to add f16 support
+// def CVTi8f16:
+// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
+// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi8f32_ftz:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi8f32:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi8f64:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi16f16:
+// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
+// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi16f32_ftz:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi16f32:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi16f64:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi32f16: def CVTi32f16:
+// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
+// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi32f32_ftz:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi32f32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi32f64:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi64f16:
+// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
+// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi64f32_ftz:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi64f32:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi64f64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
+}
+
+defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
+defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
+
+multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
+ def ext1to8:
+ NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
+ "selp.u16 \t$d, 1, 0, $a;",
+ [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
+ "selp.u16 \t$d, 1, 0, $a;",
+ [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
+ "selp.u32 \t$d, 1, 0, $a;",
+ [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
+ "selp.u64 \t$d, 1, 0, $a;",
+ [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
+}
+
+multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
+ def ext1to8:
+ NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
+ "selp.s16 \t$d, -1, 0, $a;",
+ [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
+ "selp.s16 \t$d, -1, 0, $a;",
+ [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
+ "selp.s32 \t$d, -1, 0, $a;",
+ [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
+ "selp.s64 \t$d, -1, 0, $a;",
+ [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
+}
+
+multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
+ // All Int8Regs are emiited as 16bit registers in ptx.
+ // And there is no selp.u8 in ptx.
+ def ext8to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext8to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext8to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext16to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
+ !strconcat(OpStr, "16 \t$d, $a;")))),
+ [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
+ def ext16to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "16 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
+ def ext32to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "32 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
+}
+
+defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
+defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
+defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
+
+defm Sint_extend : INT_EXTEND <"s", sext>;
+defm Zint_extend : INT_EXTEND <"u", zext>;
+defm Aint_extend : INT_EXTEND <"u", anyext>;
+
+class TRUNC_to1_asm<string sz> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg ",
+ !strconcat(sz,
+ !strconcat(" temp;\n\t",
+ !strconcat("and",
+ !strconcat(sz,
+ !strconcat("\t temp, $a, 1;\n\t",
+ !strconcat("setp",
+ !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
+}
+
+def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "cvt.u32.u64 \t$d, $a;",
+ [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
+ "cvt.u16.u64 \t$d, $a;",
+ [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
+ "cvt.u8.u64 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
+ "cvt.u16.u32 \t$d, $a;",
+ [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
+ "cvt.u8.u32 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
+ "cvt.u8.u16 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
+def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ TRUNC_to1_asm<".b64">.s,
+ [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ TRUNC_to1_asm<".b32">.s,
+ [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
+ TRUNC_to1_asm<".b16">.s,
+ [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
+def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
+ TRUNC_to1_asm<".b16">.s,
+ [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
+
+// Select instructions
+def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
+ (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
+ (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
+ (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
+ (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
+ (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
+ (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+
+class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
+ NVPTXRegClass regclassOut> :
+ NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
+ !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
+ [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
+
+def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
+def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
+def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
+def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
+
+// pack a set of smaller int registers to a larger int register
+def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
+ (ins Int8Regs:$s1, Int8Regs:$s2,
+ Int8Regs:$s3, Int8Regs:$s4),
+ !strconcat("{{\n\t.reg .b8\t%t<4>;",
+ !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
+ !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
+ !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
+ !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
+ "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
+ []>;
+def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
+ (ins Int16Regs:$s1, Int16Regs:$s2,
+ Int16Regs:$s3, Int16Regs:$s4),
+ "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
+ []>;
+def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
+ (ins Int8Regs:$s1, Int8Regs:$s2),
+ !strconcat("{{\n\t.reg .b8\t%t<2>;",
+ !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
+ !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
+ "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
+ []>;
+def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
+ (ins Int16Regs:$s1, Int16Regs:$s2),
+ "mov.b32\t$d, {{$s1, $s2}};",
+ []>;
+def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
+ (ins Int32Regs:$s1, Int32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};",
+ []>;
+def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
+ (ins Float32Regs:$s1, Float32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};",
+ []>;
+
+// unpack a larger int register to a set of smaller int registers
+def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
+ Int8Regs:$d3, Int8Regs:$d4),
+ (ins Int32Regs:$s),
+ !strconcat("{{\n\t.reg .b8\t%t<4>;",
+ !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
+ !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
+ !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
+ !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
+ "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
+ []>;
+def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
+ Int16Regs:$d3, Int16Regs:$d4),
+ (ins Int64Regs:$s),
+ "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
+ []>;
+def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
+ (ins Int16Regs:$s),
+ !strconcat("{{\n\t.reg .b8\t%t<2>;",
+ !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
+ !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
+ "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
+ []>;
+def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
+ (ins Int32Regs:$s),
+ "mov.b32\t{{$d1, $d2}}, $s;",
+ []>;
+def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
+ (ins Int64Regs:$s),
+ "mov.b64\t{{$d1, $d2}}, $s;",
+ []>;
+def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
+ (ins Float64Regs:$s),
+ "mov.b64\t{{$d1, $d2}}, $s;",
+ []>;
+
+def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
+ "cvt.rn.ftz.f32.f64 \t$d, $a;",
+ [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
+
+def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
+ "cvt.rn.f32.f64 \t$d, $a;",
+ [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
+
+def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
+ "cvt.ftz.f64.f32 \t$d, $a;",
+ [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+
+def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
+ "cvt.f64.f32 \t$d, $a;",
+ [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
+
+def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//-----------------------------------
+// Control-flow
+//-----------------------------------
+
+let isTerminator=1 in {
+ let isReturn=1, isBarrier=1 in
+ def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
+
+ let isBranch=1 in
+ def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ "@$a bra \t$target;",
+ [(brcond Int1Regs:$a, bb:$target)]>;
+ let isBranch=1 in
+ def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ "@!$a bra \t$target;",
+ []>;
+
+ let isBranch=1, isBarrier=1 in
+ def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
+ "bra.uni \t$target;",
+ [(br bb:$target)]>;
+}
+
+def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
+ (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
+
+// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
+// conditional branch if
+// the target block is the next block so that the code can fall through to the
+// target block.
+// The invertion is done by 'xor condition, 1', which will be translated to
+// (setne condition, -1).
+// Since ptx supports '@!pred bra target', we should use it.
+def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
+ (CBranchOther Int1Regs:$a, bb:$target)>;
+
+// Call
+def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPSideEffect]>;
+
+def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def calltarget : Operand<i32>;
+let isCall=1 in {
+ def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
+ "call \t$dst, (1);", []>;
+}
+
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : NVPTXInst<outs, ins, asmstr, pattern>;
+
+// @TODO: We use some tricks here to emit curly braces. Can we clean this up
+// a bit without TableGen modifications?
+def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
+ "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
+ [(callseq_start timm:$amt)]>;
+def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "\n\t//{{\n\t}}// Callseq End $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+// trap instruction
+
+def trapinst : NVPTXInst<(outs), (ins),
+ "trap;",
+ [(trap)]>;
+
+include "NVPTXIntrinsics.td"
+
+
+//-----------------------------------
+// Notes
+//-----------------------------------
+// BSWAP is currently expanded. The following is a more efficient
+// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
+// - for sm_20, use pmpt (use vector scalar mov to get the pack and
+// unpack). sm_20 supports native 32-bit register, but not native 16-bit
+// register.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
new file mode 100644
index 000000000000..49e2568dfa2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -0,0 +1,1736 @@
+//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def immFloat0 : PatLeaf<(fpimm), [{
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==0.0f);
+}]>;
+
+def immFloat1 : PatLeaf<(fpimm), [{
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==1.0f);
+}]>;
+
+def immDouble0 : PatLeaf<(fpimm), [{
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==0.0);
+}]>;
+
+def immDouble1 : PatLeaf<(fpimm), [{
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==1.0);
+}]>;
+
+
+
+//-----------------------------------
+// Synchronization Functions
+//-----------------------------------
+def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
+ "bar.sync \t0;",
+ [(int_cuda_syncthreads)]>;
+def INT_BARRIER0 : NVPTXInst<(outs), (ins),
+ "bar.sync \t0;",
+ [(int_nvvm_barrier0)]>;
+def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
+ !strconcat("}}", ""))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
+def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat(".reg .pred \t%p2; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
+ !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ !strconcat("}}", ""))))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
+def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat(".reg .pred \t%p2; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
+ !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ !strconcat("}}", ""))))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
+
+
+//-----------------------------------
+// Explicit Memory Fence Functions
+//-----------------------------------
+class MEMBAR<string StrOp, Intrinsic IntOP> :
+ NVPTXInst<(outs), (ins),
+ StrOp, [(IntOP)]>;
+
+def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
+def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
+def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
+
+
+//-----------------------------------
+// Math Functions
+//-----------------------------------
+
+// Map min(1.0, max(0.0, x)) to sat(x)
+multiclass SAT<NVPTXRegClass regclass, Operand fimm, Intrinsic IntMinOp,
+ Intrinsic IntMaxOp, PatLeaf f0, PatLeaf f1, string OpStr> {
+
+ // fmin(1.0, fmax(0.0, x)) => sat(x)
+ def SAT11 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
+ (IntMaxOp f0:$srcf1, regclass:$src)))]>;
+
+ // fmin(1.0, fmax(x, 0.0)) => sat(x)
+ def SAT12 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
+ (IntMaxOp regclass:$src, f0:$srcf1)))]>;
+
+ // fmin(fmax(0.0, x), 1.0) => sat(x)
+ def SAT13 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp
+ (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>;
+
+ // fmin(fmax(x, 0.0), 1.0) => sat(x)
+ def SAT14 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp
+ (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>;
+
+}
+// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x
+// is NaN
+// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
+// Same story for fmax, fmin.
+
+defm SAT_fmin_fmax_f : SAT<Float32Regs, f32imm, int_nvvm_fmin_f,
+ int_nvvm_fmax_f, immFloat0, immFloat1,
+ "cvt.sat.f32.f32 \t$dst, $src; \n">;
+defm SAT_fmin_fmax_d : SAT<Float64Regs, f64imm, int_nvvm_fmin_d,
+ int_nvvm_fmax_d, immDouble0, immDouble1,
+ "cvt.sat.f64.f64 \t$dst, $src; \n">;
+
+
+// We need a full string for OpcStr here because we need to deal with case like
+// INT_PTX_RECIP.
+class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
+ NVPTXRegClass src_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
+ OpcStr,
+ [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
+
+// We need a full string for OpcStr here because we need to deal with the case
+// like INT_PTX_NATIVE_POWR_F.
+class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
+ NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs t_regclass:$dst),
+ (ins s0_regclass:$src0, s1_regclass:$src1),
+ OpcStr,
+ [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
+
+class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
+ NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
+ NVPTXRegClass s2_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs t_regclass:$dst),
+ (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
+ OpcStr,
+ [(set t_regclass:$dst,
+ (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
+
+//
+// MISC
+//
+
+def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_clz_i>;
+def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
+ int_nvvm_clz_ll>;
+
+def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_popc_i>;
+def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
+ int_nvvm_popc_ll>;
+
+def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
+
+//
+// Min Max
+//
+
+def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_min_i>;
+def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_min_ui>;
+
+def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_min_ll>;
+def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_min_ull>;
+
+def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_max_i>;
+def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_max_ui>;
+
+def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_max_ll>;
+def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_max_ull>;
+
+def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_fmin_f>;
+def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
+
+def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_fmax_f>;
+def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
+
+def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
+ Float64Regs, Float64Regs, int_nvvm_fmin_d>;
+def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
+ Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+
+//
+// Multiplication
+//
+
+def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
+def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
+
+def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
+def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
+
+def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
+def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
+def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
+def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
+def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
+def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
+def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
+def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
+
+def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
+def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
+def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
+def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
+
+def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
+def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
+
+//
+// Div
+//
+
+def INT_NVVM_DIV_APPROX_FTZ_F
+ : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
+def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
+
+def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
+def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
+def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
+def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
+def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
+def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
+def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
+def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
+
+def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
+def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
+def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
+def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
+
+//
+// Brev
+//
+
+def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_brev32>;
+def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
+ int_nvvm_brev64>;
+
+//
+// Sad
+//
+
+def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
+ Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
+def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
+ Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
+
+//
+// Floor Ceil
+//
+
+def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>;
+def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_floor_f>;
+def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_floor_d>;
+
+def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>;
+def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ceil_f>;
+def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_ceil_d>;
+
+//
+// Abs
+//
+
+def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_abs_i>;
+def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
+ int_nvvm_abs_ll>;
+
+def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_fabs_ftz_f>;
+def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_fabs_f>;
+
+def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_fabs_d>;
+
+//
+// Round
+//
+
+def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_round_ftz_f>;
+def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_round_f>;
+
+def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_round_d>;
+
+//
+// Trunc
+//
+
+def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>;
+def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_trunc_f>;
+
+def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_trunc_d>;
+
+//
+// Saturate
+//
+
+def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>;
+def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_saturate_f>;
+
+def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_saturate_d>;
+
+//
+// Exp2 Log2
+//
+
+def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
+def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
+def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
+
+def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
+def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
+def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
+
+//
+// Sin Cos
+//
+
+def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
+def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
+
+def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
+def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
+
+//
+// Fma
+//
+
+def INT_NVVM_FMA_RN_FTZ_F
+ : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
+def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
+def INT_NVVM_FMA_RZ_FTZ_F
+ : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
+def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
+def INT_NVVM_FMA_RM_FTZ_F
+ : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
+def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
+def INT_NVVM_FMA_RP_FTZ_F
+ : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
+def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
+
+def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
+def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
+def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
+def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
+
+//
+// Rcp
+//
+
+def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
+def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
+def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
+def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
+def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
+def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
+def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
+def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
+
+def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rn_d>;
+def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rz_d>;
+def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rm_d>;
+def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rp_d>;
+
+def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
+
+//
+// Sqrt
+//
+
+def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
+def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rn_f>;
+def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
+def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rz_f>;
+def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
+def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rm_f>;
+def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
+def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rp_f>;
+def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
+def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
+
+def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rn_d>;
+def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rz_d>;
+def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rm_d>;
+def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rp_d>;
+
+//
+// Rsqrt
+//
+
+def INT_NVVM_RSQRT_APPROX_FTZ_F
+ : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
+ int_nvvm_rsqrt_approx_ftz_f>;
+def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
+def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
+
+//
+// Add
+//
+
+def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
+def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
+def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
+def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
+def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
+def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
+def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
+def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
+
+def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
+def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
+def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
+def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
+
+//
+// Convert
+//
+
+def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>;
+def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rn>;
+def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>;
+def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rz>;
+def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>;
+def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rm>;
+def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>;
+def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rp>;
+
+def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rn>;
+def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rz>;
+def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rm>;
+def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rp>;
+
+def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rn>;
+def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rz>;
+def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rm>;
+def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rp>;
+
+def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rn>;
+def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rz>;
+def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rm>;
+def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rp>;
+
+def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rn>;
+def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rz>;
+def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rm>;
+def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rp>;
+
+def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>;
+def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rn>;
+def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>;
+def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rz>;
+def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>;
+def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rm>;
+def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>;
+def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rp>;
+
+def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>;
+def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rn>;
+def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>;
+def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rz>;
+def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>;
+def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rm>;
+def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>;
+def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rp>;
+
+def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rn>;
+def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rz>;
+def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rm>;
+def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rp>;
+
+def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rn>;
+def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rz>;
+def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rm>;
+def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rp>;
+
+def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
+ Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
+
+def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b32 %temp; \n\t",
+ !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
+ "}}"))),
+ Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
+def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b32 %temp; \n\t",
+ !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
+ "}}"))),
+ Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
+
+def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>;
+def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rn>;
+def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>;
+def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rz>;
+def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>;
+def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rm>;
+def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>;
+def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rp>;
+
+def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>;
+def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rn>;
+def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>;
+def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rz>;
+def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>;
+def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rm>;
+def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>;
+def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rp>;
+
+def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rn>;
+def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rz>;
+def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rm>;
+def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rp>;
+
+def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rn>;
+def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rz>;
+def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rm>;
+def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rp>;
+
+def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rn>;
+def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rz>;
+def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rm>;
+def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rp>;
+def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rn>;
+def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rz>;
+def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rm>;
+def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rp>;
+
+def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rn>;
+def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rz>;
+def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rm>;
+def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rp>;
+def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rn>;
+def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rz>;
+def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rm>;
+def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rp>;
+
+def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
+ !strconcat("mov.b16 \t$dst, %temp;\n",
+ "}}")))),
+ Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
+def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
+ !strconcat("mov.b16 \t$dst, %temp;\n",
+ "}}")))),
+ Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
+
+def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("mov.b16 \t%temp, $src0;\n\t",
+ !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
+ "}}")))),
+ Float32Regs, Int16Regs, int_nvvm_h2f>;
+
+//
+// Bitcast
+//
+
+def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_bitcast_f2i>;
+def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_bitcast_i2f>;
+
+def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_bitcast_ll2d>;
+def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_bitcast_d2ll>;
+
+//-----------------------------------
+// Atomic Functions
+//-----------------------------------
+
+class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
+}]>;
+class ATOMIC_SHARED_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
+}]>;
+class ATOMIC_GENERIC_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
+}]>;
+
+multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, SDNode IMM, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ Requires<[Pred]>;
+ def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+ string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
+ defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, IMM, Pred>;
+ defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, IMM, Pred>;
+}
+
+// has 2 operands, neg the second one
+multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s",
+ !strconcat(TypeStr,
+ !strconcat(" temp; \n\t",
+ !strconcat("neg.s",
+ !strconcat(TypeStr,
+ !strconcat(" \ttemp, $b; \n\t",
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(".u",
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], temp; \n\t",
+ !strconcat("}}", "")))))))))))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
+ string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
+ Predicate Pred> {
+ defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred> ;
+ defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred> ;
+}
+
+// has 3 operands
+multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, regclass:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst,
+ (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+ def imm1 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, regclass:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+ def imm2 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, IMMType:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ Requires<[Pred]>;
+ def imm3 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+ string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
+ defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred>;
+ defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred>;
+}
+
+// atom_add
+
+def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
+ atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
+ atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
+ atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
+ atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
+ atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
+ atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+
+defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
+ atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
+defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
+ atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
+defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
+ atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
+
+// atom_sub
+
+def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
+ atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
+ atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
+ atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
+ ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
+ atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
+ atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
+ atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
+ ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
+
+// atom_swap
+
+def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
+ atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
+ atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
+ atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
+ atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
+ atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
+ atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+
+// atom_max
+
+def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
+ , (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+ ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+ ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
+ atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+ ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
+ atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_min
+
+def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+ ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+ ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
+ atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+ ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
+ atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_inc atom_dec
+
+def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
+ atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
+ atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
+ atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
+ atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
+ atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
+ atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_and
+
+def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
+ atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
+ atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
+ atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_or
+
+def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
+ atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
+ atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
+ atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
+
+// atom_xor
+
+def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
+ atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
+ atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
+ atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_cas
+
+def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+
+defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
+ atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
+ atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
+ atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
+ ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
+ atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
+ atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
+ atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
+ ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
+
+
+//-----------------------------------
+// Read Special Registers
+//-----------------------------------
+class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
+ NVPTXInst<(outs regclassOut:$dst), (ins),
+ OpStr,
+ [(set regclassOut:$dst, (IntOp))]>;
+
+def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_x>;
+def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_y>;
+def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_z>;
+
+def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_x>;
+def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_y>;
+def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_z>;
+
+def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_x>;
+def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_y>;
+def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_z>;
+
+def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_x>;
+def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_y>;
+def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_z>;
+
+def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
+ int_nvvm_read_ptx_sreg_warpsize>;
+
+
+//-----------------------------------
+// Support for ldu on sm_20 or later
+//-----------------------------------
+
+// Scalar
+// @TODO: Revisit this, Changed imemAny to imem
+multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDU]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+}
+
+defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
+int_nvvm_ldu_global_f>;
+defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
+int_nvvm_ldu_global_f>;
+defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
+int_nvvm_ldu_global_p>;
+defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
+int_nvvm_ldu_global_p>;
+
+// vector
+
+// Elementized vector ldu
+multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+}
+
+multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+}
+
+defm INT_PTX_LDU_G_v2i8_ELE
+ : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>;
+defm INT_PTX_LDU_G_v2i16_ELE
+ : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDU_G_v2i32_ELE
+ : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDU_G_v2f32_ELE
+ : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDU_G_v2i64_ELE
+ : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDU_G_v2f64_ELE
+ : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDU_G_v4i8_ELE
+ : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>;
+defm INT_PTX_LDU_G_v4i16_ELE
+ : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Int16Regs>;
+defm INT_PTX_LDU_G_v4i32_ELE
+ : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Int32Regs>;
+defm INT_PTX_LDU_G_v4f32_ELE
+ : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float32Regs>;
+
+
+//-----------------------------------
+// Support for ldg on sm_35 or later
+//-----------------------------------
+
+def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ return M->getMemoryVT() == MVT::i8;
+}]>;
+
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+defm INT_PTX_LDG_GLOBAL_i8
+ : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+defm INT_PTX_LDG_GLOBAL_i16
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_f32
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_f64
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_p32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+defm INT_PTX_LDG_GLOBAL_p64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+
+// vector
+
+// Elementized vector ldg
+multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+}
+
+multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+}
+
+// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
+defm INT_PTX_LDG_G_v2i8_ELE
+ : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i16_ELE
+ : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i32_ELE
+ : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f32_ELE
+ : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDG_G_v2i64_ELE
+ : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDG_G_v2f64_ELE
+ : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDG_G_v4i8_ELE
+ : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i16_ELE
+ : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i32_ELE
+ : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f32_ELE
+ : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
+
+
+multiclass NG_TO_G<string Str, Intrinsic Intrin> {
+ def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+
+// @TODO: Are these actually needed? I believe global addresses will be copied
+// to register values anyway.
+ /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasGenericLdSt]>;
+ def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasGenericLdSt]>;*/
+
+ def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+
+// @TODO: Are these actually needed? I believe global addresses will be copied
+// to register values anyway.
+ /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
+ def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
+}
+
+multiclass G_TO_NG<string Str, Intrinsic Intrin> {
+ def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+}
+
+defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
+defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
+defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+
+defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
+defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
+defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
+
+def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
+def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
+
+
+
+// @TODO: Revisit this. There is a type
+// contradiction between iPTRAny and iPTR for the def.
+/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
+ (Wrapper tglobaladdr:$src)))]>;
+def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
+ (Wrapper tglobaladdr:$src)))]>;*/
+
+
+def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
+def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+
+
+// nvvm.ptr.gen.to.param
+def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
+ (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result,
+ (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
+def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
+ (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result,
+ (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
+
+
+// nvvm.move intrinsicc
+def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s),
+ "mov.b16 \t$r, $s;",
+ [(set Int8Regs:$r,
+ (int_nvvm_move_i8 Int8Regs:$s))]>;
+def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
+ "mov.b16 \t$r, $s;",
+ [(set Int16Regs:$r,
+ (int_nvvm_move_i16 Int16Regs:$s))]>;
+def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+ "mov.b32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_i32 Int32Regs:$s))]>;
+def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+ "mov.b64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_i64 Int64Regs:$s))]>;
+def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
+ "mov.f32 \t$r, $s;",
+ [(set Float32Regs:$r,
+ (int_nvvm_move_float Float32Regs:$s))]>;
+def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
+ "mov.f64 \t$r, $s;",
+ [(set Float64Regs:$r,
+ (int_nvvm_move_double Float64Regs:$s))]>;
+def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+ "mov.u32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_ptr Int32Regs:$s))]>;
+def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+ "mov.u64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_ptr Int64Regs:$s))]>;
+
+// @TODO: Are these actually needed, or will we always just see symbols
+// copied to registers first?
+/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
+ "mov.u32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_ptr texternalsym:$s))]>;
+def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
+ "mov.u64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_ptr texternalsym:$s))]>;*/
+
+
+// MoveParam %r1, param
+// ptr_local_to_gen %r2, %r1
+// ptr_gen_to_local %r3, %r2
+// ->
+// mov %r1, param
+
+// @TODO: Revisit this. There is a type
+// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
+// instructions are not currently defined. However, we can use the ptr
+// variants and the asm printer will do the right thing.
+def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
+ (MoveParam texternalsym:$src)))),
+ (nvvm_move_ptr64 texternalsym:$src)>;
+def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
+ (MoveParam texternalsym:$src)))),
+ (nvvm_move_ptr32 texternalsym:$src)>;
+
+
+//-----------------------------------
+// Compiler Error Warn
+// - Just ignore them in codegen
+//-----------------------------------
+
+def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+ "// llvm.nvvm.compiler.warn()",
+ [(int_nvvm_compiler_warn Int32Regs:$a)]>;
+def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+ "// llvm.nvvm.compiler.warn()",
+ [(int_nvvm_compiler_warn Int64Regs:$a)]>;
+def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+ "// llvm.nvvm.compiler.error()",
+ [(int_nvvm_compiler_error Int32Regs:$a)]>;
+def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+ "// llvm.nvvm.compiler.error()",
+ [(int_nvvm_compiler_error Int64Regs:$a)]>;
+
+
+
+//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
+
+// These intrinsics are handled to retain compatibility with the old backend.
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : NVPTXInst<(outs Int64Regs:$d), (ins),
+ !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
+ [(set Int64Regs:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : NVPTXInst<(outs Int32Regs:$d), (ins),
+ !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
+ [(set Int32Regs:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
+
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
+
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
+
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
new file mode 100644
index 000000000000..7c257b4c6a89
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -0,0 +1,205 @@
+//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
+// the size is large or is not a compile-time constant.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXLowerAggrCopies.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
+
+char NVPTXLowerAggrCopies::ID = 0;
+
+// Lower MemTransferInst or load-store pair to loop
+static void convertTransferToLoop(
+ Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+ //unsigned numLoads,
+ bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
+ Type *indType = len->getType();
+
+ BasicBlock *origBB = splitAt->getParent();
+ BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
+ BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+
+ origBB->getTerminator()->setSuccessor(0, loopBB);
+ IRBuilder<> builder(origBB, origBB->getTerminator());
+
+ // srcAddr and dstAddr are expected to be pointer types,
+ // so no check is made here.
+ unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+
+ // Cast pointers to (char *)
+ srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
+ dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+
+ IRBuilder<> loop(loopBB);
+ // The loop index (ind) is a phi node.
+ PHINode *ind = loop.CreatePHI(indType, 0);
+ // Incoming value for ind is 0
+ ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+
+ // load from srcAddr+ind
+ Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+ // store at dstAddr+ind
+ loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+
+ // The value for ind coming from backedge is (ind + 1)
+ Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
+ ind->addIncoming(newind, loopBB);
+
+ loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+}
+
+// Lower MemSetInst to loop
+static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
+ Value *len, Value *val, LLVMContext &Context,
+ Function &F) {
+ BasicBlock *origBB = splitAt->getParent();
+ BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
+ BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+
+ origBB->getTerminator()->setSuccessor(0, loopBB);
+ IRBuilder<> builder(origBB, origBB->getTerminator());
+
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+
+ // Cast pointer to the type of value getting stored
+ dstAddr =
+ builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
+
+ IRBuilder<> loop(loopBB);
+ PHINode *ind = loop.CreatePHI(len->getType(), 0);
+ ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+
+ loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+
+ Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
+ ind->addIncoming(newind, loopBB);
+
+ loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+}
+
+bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
+ SmallVector<LoadInst *, 4> aggrLoads;
+ SmallVector<MemTransferInst *, 4> aggrMemcpys;
+ SmallVector<MemSetInst *, 4> aggrMemsets;
+
+ DataLayout *TD = &getAnalysis<DataLayout>();
+ LLVMContext &Context = F.getParent()->getContext();
+
+ //
+ // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
+ //
+ //const BasicBlock *firstBB = &F.front(); // first BB in F
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ //BasicBlock *bb = BI;
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ if (LoadInst *load = dyn_cast<LoadInst>(II)) {
+
+ if (load->hasOneUse() == false)
+ continue;
+
+ if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ continue;
+
+ User *use = *(load->use_begin());
+ if (StoreInst *store = dyn_cast<StoreInst>(use)) {
+ if (store->getOperand(0) != load) //getValueOperand
+ continue;
+ aggrLoads.push_back(load);
+ }
+ } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
+ Value *len = intr->getLength();
+ // If the number of elements being copied is greater
+ // than MaxAggrCopySize, lower it to a loop
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
+ if (len_int->getZExtValue() >= MaxAggrCopySize) {
+ aggrMemcpys.push_back(intr);
+ }
+ } else {
+ // turn variable length memcpy/memmov into loop
+ aggrMemcpys.push_back(intr);
+ }
+ } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
+ Value *len = memsetintr->getLength();
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
+ if (len_int->getZExtValue() >= MaxAggrCopySize) {
+ aggrMemsets.push_back(memsetintr);
+ }
+ } else {
+ // turn variable length memset into loop
+ aggrMemsets.push_back(memsetintr);
+ }
+ }
+ }
+ }
+ if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+ (aggrMemsets.size() == 0))
+ return false;
+
+ //
+ // Do the transformation of an aggr load/copy/set to a loop
+ //
+ for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
+ LoadInst *load = aggrLoads[i];
+ StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
+ Value *srcAddr = load->getOperand(0);
+ Value *dstAddr = store->getOperand(1);
+ unsigned numLoads = TD->getTypeStoreSize(load->getType());
+ Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
+
+ convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
+ store->isVolatile(), Context, F);
+
+ store->eraseFromParent();
+ load->eraseFromParent();
+ }
+
+ for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
+ MemTransferInst *cpy = aggrMemcpys[i];
+ Value *len = cpy->getLength();
+ // llvm 2.7 version of memcpy does not have volatile
+ // operand yet. So always making it non-volatile
+ // optimistically, so that we don't see unnecessary
+ // st.volatile in ptx
+ convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
+ false, Context, F);
+ cpy->eraseFromParent();
+ }
+
+ for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
+ MemSetInst *memsetinst = aggrMemsets[i];
+ Value *len = memsetinst->getLength();
+ Value *val = memsetinst->getValue();
+ convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
+ F);
+ memsetinst->eraseFromParent();
+ }
+
+ return true;
+}
+
+FunctionPass *llvm::createLowerAggrCopies() {
+ return new NVPTXLowerAggrCopies();
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
new file mode 100644
index 000000000000..286e753fa92b
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -0,0 +1,47 @@
+//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVIDIA specific lowering of
+// aggregate copies
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_LOWER_AGGR_COPIES_H
+#define NVPTX_LOWER_AGGR_COPIES_H
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DataLayout>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ virtual const char *getPassName() const {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+
+extern FunctionPass *createLowerAggrCopies();
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h b/contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h
new file mode 100644
index 000000000000..a95c16b1e67e
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -0,0 +1,16 @@
+
+//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_NUM_REGISTERS_H
+#define NVPTX_NUM_REGISTERS_H
+
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
new file mode 100644
index 000000000000..282465359b07
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -0,0 +1,131 @@
+//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nvptx-reg-info"
+
+#include "NVPTXRegisterInfo.h"
+#include "NVPTX.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
+ if (RC == &NVPTX::Float32RegsRegClass) {
+ return ".f32";
+ }
+ if (RC == &NVPTX::Float64RegsRegClass) {
+ return ".f64";
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
+ return ".s64";
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
+ return ".s32";
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
+ return ".s16";
+ }
+ // Int8Regs become 16-bit registers in PTX
+ else if (RC == &NVPTX::Int8RegsRegClass) {
+ return ".s16";
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
+ return ".pred";
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ return "!Special!";
+ } else {
+ return "INTERNAL";
+ }
+ return "";
+}
+
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
+ if (RC == &NVPTX::Float32RegsRegClass) {
+ return "%f";
+ }
+ if (RC == &NVPTX::Float64RegsRegClass) {
+ return "%fd";
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
+ return "%rd";
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
+ return "%r";
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
+ return "%rs";
+ } else if (RC == &NVPTX::Int8RegsRegClass) {
+ return "%rc";
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
+ return "%p";
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ return "!Special!";
+ } else {
+ return "INTERNAL";
+ }
+ return "";
+}
+}
+
+NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
+ const NVPTXSubtarget &st)
+ : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
+
+#define GET_REGINFO_TARGET_DESC
+#include "NVPTXGenRegisterInfo.inc"
+
+/// NVPTX Callee Saved Registers
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const uint16_t CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+// NVPTX Callee Saved Reg Classes
+const TargetRegisterClass *const *
+NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(FIOperandNum + 1).getImm();
+
+ // Using I0 as the frame pointer
+ MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return 0;
+}
+
+unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return NVPTX::VRFrame;
+}
+
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
new file mode 100644
index 000000000000..d40682066142
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -0,0 +1,79 @@
+//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXREGISTERINFO_H
+#define NVPTXREGISTERINFO_H
+
+#include "ManagedStringPool.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "NVPTXGenRegisterInfo.inc"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
+
+namespace llvm {
+
+// Forward Declarations.
+class TargetInstrInfo;
+class NVPTXSubtarget;
+
+class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
+private:
+ bool Is64Bit;
+ // Hold Strings that can be free'd all together with NVPTXRegisterInfo
+ ManagedStringPool ManagedStrPool;
+
+public:
+ NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
+
+ //------------------------------------------------------
+ // Pure virtual functions from TargetRegisterInfo
+ //------------------------------------------------------
+
+ // NVPTX callee saved registers
+ virtual const uint16_t *
+ getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ // NVPTX callee saved register classes
+ virtual const TargetRegisterClass *const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const;
+ virtual unsigned getRARegister() const;
+
+ ManagedStringPool *getStrPool() const {
+ return const_cast<ManagedStringPool *>(&ManagedStrPool);
+ }
+
+ const char *getName(unsigned RegNo) const {
+ std::stringstream O;
+ O << "reg" << RegNo;
+ return getStrPool()->getManagedString(O.str().c_str())->c_str();
+ }
+
+};
+
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
new file mode 100644
index 000000000000..8d100d631683
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -0,0 +1,64 @@
+//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class NVPTXReg<string n> : Register<n> {
+ let Namespace = "NVPTX";
+}
+
+class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
+ : RegisterClass <"NVPTX", regTypes, alignment, regList>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+// Special Registers used as stack pointer
+def VRFrame : NVPTXReg<"%SP">;
+def VRFrameLocal : NVPTXReg<"%SPL">;
+
+// Special Registers used as the stack
+def VRDepot : NVPTXReg<"%Depot">;
+
+foreach i = 0-395 in {
+ def P#i : NVPTXReg<"%p"#i>; // Predicate
+ def RC#i : NVPTXReg<"%rc"#i>; // 8-bit
+ def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
+ def R#i : NVPTXReg<"%r"#i>; // 32-bit
+ def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
+ def F#i : NVPTXReg<"%f"#i>; // 32-bit float
+ def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
+
+ // Arguments
+ def ia#i : NVPTXReg<"%ia"#i>;
+ def la#i : NVPTXReg<"%la"#i>;
+ def fa#i : NVPTXReg<"%fa"#i>;
+ def da#i : NVPTXReg<"%da"#i>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>;
+def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>;
+def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>;
+def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>;
+def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>;
+def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>;
+def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>;
+def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>;
+def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>;
+def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>;
+def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
+
+// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
new file mode 100644
index 000000000000..e166be5a68e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -0,0 +1,47 @@
+//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTXSection class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_NVPTXSECTION_H
+#define LLVM_NVPTXSECTION_H
+
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCSection.h"
+#include <vector>
+
+namespace llvm {
+/// NVPTXSection - Represents a section in PTX
+/// PTX does not have sections. We create this class in order to use
+/// the ASMPrint interface.
+///
+class NVPTXSection : public MCSection {
+
+public:
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ ~NVPTXSection() {}
+
+ /// Override this as NVPTX has its own way of printing switching
+ /// to a section.
+ virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {}
+
+ /// Base address of PTX sections is zero.
+ virtual bool isBaseAddressKnownZero() const { return true; }
+ virtual bool UseCodeAlign() const { return false; }
+ virtual bool isVirtualSection() const { return false; }
+ virtual std::string getLabelBeginName() const { return ""; }
+ virtual std::string getLabelEndName() const { return ""; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
new file mode 100644
index 000000000000..83dfe120899a
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -0,0 +1,73 @@
+//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Split basic blocks so that a basic block that contains a barrier instruction
+// only contains the barrier instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXSplitBBatBar.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
+
+char NVPTXSplitBBatBar::ID = 0;
+
+bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
+
+ SmallVector<Instruction *, 4> SplitPoints;
+ bool changed = false;
+
+ // Collect all the split points in SplitPoints
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock::iterator IB = BI->begin();
+ BasicBlock::iterator II = IB;
+ BasicBlock::iterator IE = BI->end();
+
+ // Skit the first intruction. No splitting is needed at this
+ // point even if this is a bar.
+ while (II != IE) {
+ if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
+ Intrinsic::ID id = inst->getIntrinsicID();
+ // If this is a barrier, split at this instruction
+ // and the next instruction.
+ if (llvm::isBarrierIntrinsic(id)) {
+ if (II != IB)
+ SplitPoints.push_back(II);
+ II++;
+ if ((II != IE) && (!II->isTerminator())) {
+ SplitPoints.push_back(II);
+ II++;
+ }
+ continue;
+ }
+ }
+ II++;
+ }
+ }
+
+ for (unsigned i = 0; i != SplitPoints.size(); i++) {
+ changed = true;
+ Instruction *inst = SplitPoints[i];
+ inst->getParent()->splitBasicBlock(inst, "bar_split");
+ }
+
+ return changed;
+}
+
+// This interface will most likely not be necessary, because this pass will
+// not be invoked by the driver, but will be used as a prerequisite to
+// another pass.
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h
new file mode 100644
index 000000000000..bdafba9075a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h
@@ -0,0 +1,41 @@
+//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVIDIA specific declarations
+// for splitting basic blocks at barrier instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_SPLIT_BB_AT_BAR_H
+#define NVPTX_SPLIT_BB_AT_BAR_H
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+// actual analysis class, which is a functionpass
+struct NVPTXSplitBBatBar : public FunctionPass {
+ static char ID;
+
+ NVPTXSplitBBatBar() : FunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+ virtual bool runOnFunction(Function &F);
+
+ virtual const char *getPassName() const {
+ return "Split basic blocks at barrier";
+ }
+};
+
+extern FunctionPass *createSplitBBatBarPass();
+}
+
+#endif //NVPTX_SPLIT_BB_AT_BAR_H
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
new file mode 100644
index 000000000000..2dcd73dcff9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -0,0 +1,61 @@
+//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NVPTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXSubtarget.h"
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "NVPTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+// Select Driver Interface
+#include "llvm/Support/CommandLine.h"
+namespace {
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+ cl::desc("Choose driver interface:"),
+ cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+ clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+ clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+ cl::init(NVPTX::NVCL));
+}
+
+NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+ SmVersion(20) {
+
+ drvInterface = DriverInterface;
+
+ // Provide the default CPU if none
+ std::string defCPU = "sm_20";
+
+ ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
+
+ // Get the TargetName from the FS if available
+ if (FS.empty() && CPU.empty())
+ TargetName = defCPU;
+ else if (!CPU.empty())
+ TargetName = CPU;
+ else
+ llvm_unreachable("we are not using FeatureStr");
+
+ // We default to PTX 3.1, but we cannot just default to it in the initializer
+ // since the attribute parser checks if the given option is >= the default.
+ // So if we set ptx31 as the default, the ptx30 attribute would never match.
+ // Instead, we use 0 as the default and manually set 31 if the default is
+ // used.
+ if (PTXVersion == 0) {
+ PTXVersion = 31;
+ }
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
new file mode 100644
index 000000000000..670077daaa69
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -0,0 +1,96 @@
+//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXSUBTARGET_H
+#define NVPTXSUBTARGET_H
+
+#include "NVPTX.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "NVPTXGenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+
+class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
+
+ std::string TargetName;
+ NVPTX::DrvInterface drvInterface;
+ bool Is64Bit;
+
+ // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
+ unsigned PTXVersion;
+
+ // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
+ unsigned int SmVersion;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ bool hasBrkPt() const { return SmVersion >= 11; }
+ bool hasAtomRedG32() const { return SmVersion >= 11; }
+ bool hasAtomRedS32() const { return SmVersion >= 12; }
+ bool hasAtomRedG64() const { return SmVersion >= 12; }
+ bool hasAtomRedS64() const { return SmVersion >= 20; }
+ bool hasAtomRedGen32() const { return SmVersion >= 20; }
+ bool hasAtomRedGen64() const { return SmVersion >= 20; }
+ bool hasAtomAddF32() const { return SmVersion >= 20; }
+ bool hasVote() const { return SmVersion >= 12; }
+ bool hasDouble() const { return SmVersion >= 13; }
+ bool reqPTX20() const { return SmVersion >= 20; }
+ bool hasF32FTZ() const { return SmVersion >= 20; }
+ bool hasFMAF32() const { return SmVersion >= 20; }
+ bool hasFMAF64() const { return SmVersion >= 13; }
+ bool hasLDG() const { return SmVersion >= 32; }
+ bool hasLDU() const { return SmVersion >= 20; }
+ bool hasGenericLdSt() const { return SmVersion >= 20; }
+ inline bool hasHWROT32() const { return false; }
+ inline bool hasSWROT32() const { return true; }
+ inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
+ inline bool hasROT64() const { return SmVersion >= 20; }
+
+ bool is64Bit() const { return Is64Bit; }
+
+ unsigned int getSmVersion() const { return SmVersion; }
+ NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
+ std::string getTargetName() const { return TargetName; }
+
+ unsigned getPTXVersion() const { return PTXVersion; }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit())
+ p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+ "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
+ "n16:32:64";
+ else
+ p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+ "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
+ "n16:32:64";
+
+ return std::string(p);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif // NVPTXSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
new file mode 100644
index 000000000000..67ca6b58e5a6
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -0,0 +1,121 @@
+//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTX.h"
+#include "NVPTXAllocaHoisting.h"
+#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXSplitBBatBar.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
+
+extern "C" void LLVMInitializeNVPTXTarget() {
+ // Register the target.
+ RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
+ RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
+
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
+
+ // FIXME: This pass is really intended to be invoked during IR optimization,
+ // but it's very NVPTX-specific.
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+}
+
+NVPTXTargetMachine::NVPTXTargetMachine(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(
+ *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
+
+void NVPTXTargetMachine32::anchor() {}
+
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void NVPTXTargetMachine64::anchor() {}
+
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+namespace llvm {
+class NVPTXPassConfig : public TargetPassConfig {
+public:
+ NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ NVPTXTargetMachine &getNVPTXTargetMachine() const {
+ return getTM<NVPTXTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+};
+}
+
+TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
+ NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
+ return PassConfig;
+}
+
+bool NVPTXPassConfig::addInstSelector() {
+ addPass(createLowerAggrCopies());
+ addPass(createSplitBBatBarPass());
+ addPass(createAllocaHoisting());
+ addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
+ return false;
+}
+
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
new file mode 100644
index 000000000000..5fbcf735b48f
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -0,0 +1,118 @@
+//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_TARGETMACHINE_H
+#define NVPTX_TARGETMACHINE_H
+
+#include "ManagedStringPool.h"
+#include "NVPTXFrameLowering.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// NVPTXTargetMachine
+///
+class NVPTXTargetMachine : public LLVMTargetMachine {
+ NVPTXSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+
+ // NVPTX does not have any call stack frame, but need a NVPTX specific
+ // FrameLowering class because TargetFrameLowering is abstract.
+ NVPTXFrameLowering FrameLowering;
+
+ // Hold Strings that can be free'd all together with NVPTXTargetMachine
+ ManagedStringPool ManagedStrPool;
+
+ //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ // bool DisableVerify, MCContext *&OutCtx);
+
+public:
+ NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const NVPTXRegisterInfo *getRegisterInfo() const {
+ return &(InstrInfo.getRegisterInfo());
+ }
+
+ virtual NVPTXTargetLowering *getTargetLowering() const {
+ return const_cast<NVPTXTargetLowering *>(&TLInfo);
+ }
+
+ virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ //virtual bool addInstSelector(PassManagerBase &PM,
+ // CodeGenOpt::Level OptLevel);
+
+ //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+
+ ManagedStringPool *getManagedStrPool() const {
+ return const_cast<ManagedStringPool *>(&ManagedStrPool);
+ }
+
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+ bool = true) {
+ return true;
+ }
+
+}; // NVPTXTargetMachine.
+
+class NVPTXTargetMachine32 : public NVPTXTargetMachine {
+ virtual void anchor();
+public:
+ NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+class NVPTXTargetMachine64 : public NVPTXTargetMachine {
+ virtual void anchor();
+public:
+ NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
new file mode 100644
index 000000000000..6ab0e08ad091
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -0,0 +1,102 @@
+//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+
+#include "NVPTXSection.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <string>
+
+namespace llvm {
+class GlobalVariable;
+class Module;
+
+class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
+
+public:
+ NVPTXTargetObjectFile() {}
+ ~NVPTXTargetObjectFile() {
+ delete TextSection;
+ delete DataSection;
+ delete BSSSection;
+ delete ReadOnlySection;
+
+ delete StaticCtorSection;
+ delete StaticDtorSection;
+ delete LSDASection;
+ delete EHFrameSection;
+ delete DwarfAbbrevSection;
+ delete DwarfInfoSection;
+ delete DwarfLineSection;
+ delete DwarfFrameSection;
+ delete DwarfPubTypesSection;
+ delete DwarfDebugInlineSection;
+ delete DwarfStrSection;
+ delete DwarfLocSection;
+ delete DwarfARangesSection;
+ delete DwarfRangesSection;
+ delete DwarfMacroInfoSection;
+ }
+
+ virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+ TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+ DataSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+ ReadOnlySection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
+
+ StaticCtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ StaticDtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ LSDASection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ EHFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfAbbrevSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfStrSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLocSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfARangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfRangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ }
+
+ virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
+ return ReadOnlySection;
+ }
+
+ virtual const MCSection *
+ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ return DataSection;
+ }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
new file mode 100644
index 000000000000..6786eb02240c
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -0,0 +1,504 @@
+//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains miscellaneous utility functions
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXUtilities.h"
+#include "NVPTX.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include <algorithm>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+//#include <iostream>
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
+typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
+typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
+
+ManagedStatic<per_module_annot_t> annotationCache;
+
+static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
+ assert(md && "Invalid mdnode for annotation");
+ assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
+ // property
+ const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
+ assert(prop && "Annotation property not a string");
+
+ // value
+ ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
+ assert(Val && "Value operand not a constant int");
+
+ std::string keyname = prop->getString().str();
+ if (retval.find(keyname) != retval.end())
+ retval[keyname].push_back(Val->getZExtValue());
+ else {
+ std::vector<unsigned> tmp;
+ tmp.push_back(Val->getZExtValue());
+ retval[keyname] = tmp;
+ }
+ }
+}
+
+static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
+ NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
+ if (!NMD)
+ return;
+ key_val_pair_t tmp;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *elem = NMD->getOperand(i);
+
+ Value *entity = elem->getOperand(0);
+ // entity may be null due to DCE
+ if (!entity)
+ continue;
+ if (entity != gv)
+ continue;
+
+ // accumulate annotations for entity in tmp
+ cacheAnnotationFromMD(elem, tmp);
+ }
+
+ if (tmp.empty()) // no annotations for this gv
+ return;
+
+ if ((*annotationCache).find(m) != (*annotationCache).end())
+ (*annotationCache)[m][gv] = tmp;
+ else {
+ global_val_annot_t tmp1;
+ tmp1[gv] = tmp;
+ (*annotationCache)[m] = tmp1;
+ }
+}
+
+bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
+ unsigned &retval) {
+ const Module *m = gv->getParent();
+ if ((*annotationCache).find(m) == (*annotationCache).end())
+ cacheAnnotationFromMD(m, gv);
+ else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ cacheAnnotationFromMD(m, gv);
+ if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ return false;
+ retval = (*annotationCache)[m][gv][prop][0];
+ return true;
+}
+
+bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
+ std::vector<unsigned> &retval) {
+ const Module *m = gv->getParent();
+ if ((*annotationCache).find(m) == (*annotationCache).end())
+ cacheAnnotationFromMD(m, gv);
+ else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ cacheAnnotationFromMD(m, gv);
+ if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ return false;
+ retval = (*annotationCache)[m][gv][prop];
+ return true;
+}
+
+bool llvm::isTexture(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a texture symbol");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isSurface(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a surface symbol");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isSampler(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a sampler symbol");
+ return true;
+ }
+ }
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(
+ func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImageReadOnly(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImageWriteOnly(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImage(const llvm::Value &val) {
+ return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
+}
+
+std::string llvm::getTextureName(const llvm::Value &val) {
+ assert(val.hasName() && "Found texture variable with no name");
+ return val.getName();
+}
+
+std::string llvm::getSurfaceName(const llvm::Value &val) {
+ assert(val.hasName() && "Found surface variable with no name");
+ return val.getName();
+}
+
+std::string llvm::getSamplerName(const llvm::Value &val) {
+ assert(val.hasName() && "Found sampler variable with no name");
+ return val.getName();
+}
+
+bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
+}
+
+bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
+}
+
+bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
+}
+
+bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
+}
+
+bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
+}
+
+bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
+}
+
+bool llvm::getMinCTASm(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
+}
+
+bool llvm::isKernelFunction(const Function &F) {
+ unsigned x = 0;
+ bool retval = llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
+ if (retval == false) {
+ // There is no NVVM metadata, check the calling convention
+ if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
+ return true;
+ else
+ return false;
+ }
+ return (x == 1);
+}
+
+bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
+ std::vector<unsigned> Vs;
+ bool retval = llvm::findAllNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
+ if (retval == false)
+ return false;
+ for (int i = 0, e = Vs.size(); i < e; i++) {
+ unsigned v = Vs[i];
+ if ((v >> 16) == index) {
+ align = v & 0xFFFF;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
+ if (MDNode *alignNode = I.getMetadata("callalign")) {
+ for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ unsigned v = CI->getZExtValue();
+ if ((v >> 16) == index) {
+ align = v & 0xFFFF;
+ return true;
+ }
+ if ((v >> 16) > index) {
+ return false;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
+ if ((id == Intrinsic::nvvm_barrier0) ||
+ (id == Intrinsic::nvvm_barrier0_popc) ||
+ (id == Intrinsic::nvvm_barrier0_and) ||
+ (id == Intrinsic::nvvm_barrier0_or) ||
+ (id == Intrinsic::cuda_syncthreads))
+ return true;
+ return false;
+}
+
+// Interface for checking all memory space transfer related intrinsics
+bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
+ if (id == Intrinsic::nvvm_ptr_local_to_gen ||
+ id == Intrinsic::nvvm_ptr_shared_to_gen ||
+ id == Intrinsic::nvvm_ptr_global_to_gen ||
+ id == Intrinsic::nvvm_ptr_constant_to_gen ||
+ id == Intrinsic::nvvm_ptr_gen_to_global ||
+ id == Intrinsic::nvvm_ptr_gen_to_shared ||
+ id == Intrinsic::nvvm_ptr_gen_to_local ||
+ id == Intrinsic::nvvm_ptr_gen_to_constant ||
+ id == Intrinsic::nvvm_ptr_gen_to_param) {
+ return true;
+ }
+
+ return false;
+}
+
+// consider several special intrinsics in striping pointer casts, and
+// provide an option to ignore GEP indicies for find out the base address only
+// which could be used in simple alias disambigurate.
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
+ V = V->stripPointerCasts();
+ while (true) {
+ if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
+ if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
+ V = IS->getArgOperand(0)->stripPointerCasts();
+ continue;
+ }
+ } else if (ignore_GEP_indices)
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand()->stripPointerCasts();
+ continue;
+ }
+ break;
+ }
+ return V;
+}
+
+// consider several special intrinsics in striping pointer casts, and
+// - ignore GEP indicies for find out the base address only, and
+// - tracking PHINode
+// which could be used in simple alias disambigurate.
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
+ if (processed.find(V) != processed.end())
+ return NULL;
+ processed.insert(V);
+
+ const Value *V2 = V->stripPointerCasts();
+ if (V2 != V && processed.find(V2) != processed.end())
+ return NULL;
+ processed.insert(V2);
+
+ V = V2;
+
+ while (true) {
+ if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
+ if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
+ V = IS->getArgOperand(0)->stripPointerCasts();
+ continue;
+ }
+ } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand()->stripPointerCasts();
+ continue;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (V != V2 && processed.find(V) != processed.end())
+ return NULL;
+ processed.insert(PN);
+ const Value *common = 0;
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ const Value *pv = PN->getIncomingValue(i);
+ const Value *base = skipPointerTransfer(pv, processed);
+ if (base) {
+ if (common == 0)
+ common = base;
+ else if (common != base)
+ return PN;
+ }
+ }
+ if (common == 0)
+ return PN;
+ V = common;
+ }
+ break;
+ }
+ return V;
+}
+
+// The following are some useful utilities for debuggung
+
+BasicBlock *llvm::getParentBlock(Value *v) {
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v))
+ return B;
+
+ if (Instruction *I = dyn_cast<Instruction>(v))
+ return I->getParent();
+
+ return 0;
+}
+
+Function *llvm::getParentFunction(Value *v) {
+ if (Function *F = dyn_cast<Function>(v))
+ return F;
+
+ if (Instruction *I = dyn_cast<Instruction>(v))
+ return I->getParent()->getParent();
+
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v))
+ return B->getParent();
+
+ return 0;
+}
+
+// Dump a block by name
+void llvm::dumpBlock(Value *v, char *blockName) {
+ Function *F = getParentFunction(v);
+ if (F == 0)
+ return;
+
+ for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
+ BasicBlock *B = it;
+ if (strcmp(B->getName().data(), blockName) == 0) {
+ B->dump();
+ return;
+ }
+ }
+}
+
+// Find an instruction by name
+Instruction *llvm::getInst(Value *base, char *instName) {
+ Function *F = getParentFunction(base);
+ if (F == 0)
+ return 0;
+
+ for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
+ Instruction *I = &*it;
+ if (strcmp(I->getName().data(), instName) == 0) {
+ return I;
+ }
+ }
+
+ return 0;
+}
+
+// Dump an instruction by nane
+void llvm::dumpInst(Value *base, char *instName) {
+ Instruction *I = getInst(base, instName);
+ if (I)
+ I->dump();
+}
+
+// Dump an instruction and all dependent instructions
+void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
+ if (Instruction *I = dyn_cast<Instruction>(v)) {
+
+ if (visited->find(I) != visited->end())
+ return;
+
+ visited->insert(I);
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ dumpInstRec(I->getOperand(i), visited);
+
+ I->dump();
+ }
+}
+
+// Dump an instruction and all dependent instructions
+void llvm::dumpInstRec(Value *v) {
+ std::set<Instruction *> visited;
+
+ //BasicBlock *B = getParentBlock(v);
+
+ dumpInstRec(v, &visited);
+}
+
+// Dump the parent for Instruction, block or function
+void llvm::dumpParent(Value *v) {
+ if (Instruction *I = dyn_cast<Instruction>(v)) {
+ I->getParent()->dump();
+ return;
+ }
+
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
+ B->getParent()->dump();
+ return;
+ }
+
+ if (Function *F = dyn_cast<Function>(v)) {
+ F->getParent()->dump();
+ return;
+ }
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
new file mode 100644
index 000000000000..a208004297d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -0,0 +1,92 @@
+//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVVM specific utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXUTILITIES_H
+#define NVPTXUTILITIES_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include <cstdarg>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
+#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
+
+bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
+bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
+ std::vector<unsigned> &);
+
+bool isTexture(const llvm::Value &);
+bool isSurface(const llvm::Value &);
+bool isSampler(const llvm::Value &);
+bool isImage(const llvm::Value &);
+bool isImageReadOnly(const llvm::Value &);
+bool isImageWriteOnly(const llvm::Value &);
+
+std::string getTextureName(const llvm::Value &);
+std::string getSurfaceName(const llvm::Value &);
+std::string getSamplerName(const llvm::Value &);
+
+bool getMaxNTIDx(const llvm::Function &, unsigned &);
+bool getMaxNTIDy(const llvm::Function &, unsigned &);
+bool getMaxNTIDz(const llvm::Function &, unsigned &);
+
+bool getReqNTIDx(const llvm::Function &, unsigned &);
+bool getReqNTIDy(const llvm::Function &, unsigned &);
+bool getReqNTIDz(const llvm::Function &, unsigned &);
+
+bool getMinCTASm(const llvm::Function &, unsigned &);
+bool isKernelFunction(const llvm::Function &);
+
+bool getAlign(const llvm::Function &, unsigned index, unsigned &);
+bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
+
+bool isBarrierIntrinsic(llvm::Intrinsic::ID);
+
+/// make_vector - Helper function which is useful for building temporary vectors
+/// to pass into type construction of CallInst ctors. This turns a null
+/// terminated list of pointers (or other value types) into a real live vector.
+///
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
+ va_list Args;
+ va_start(Args, A);
+ std::vector<T> Result;
+ Result.push_back(A);
+ while (T Val = va_arg(Args, T))
+ Result.push_back(Val);
+ va_end(Args);
+ return Result;
+}
+
+bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
+const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
+BasicBlock *getParentBlock(Value *v);
+Function *getParentFunction(Value *v);
+void dumpBlock(Value *v, char *blockName);
+Instruction *getInst(Value *base, char *instName);
+void dumpInst(Value *base, char *instName);
+void dumpInstRec(Value *v, std::set<Instruction *> *visited);
+void dumpInstRec(Value *v);
+void dumpParent(Value *v);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
new file mode 100644
index 000000000000..775df19be162
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -0,0 +1,1481 @@
+//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------
+// Vector Specific
+//-----------------------------------
+
+//
+// All vector instructions derive from NVPTXVecInst
+//
+
+class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ NVPTXInst sInst=NOP>
+ : NVPTXInst<outs, ins, asmstr, pattern> {
+ NVPTXInst scalarInst=sInst;
+}
+
+let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
+// Extract v2i16
+def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
+ (ins V2I16Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int16Regs:$dst, (vector_extract
+ (v2i16 V2I16Regs:$src), imm:$c))],
+ IMOV16rr>;
+
+// Extract v4i16
+def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
+ (ins V4I16Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int16Regs:$dst, (vector_extract
+ (v4i16 V4I16Regs:$src), imm:$c))],
+ IMOV16rr>;
+
+// Extract v2i8
+def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
+ (ins V2I8Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int8Regs:$dst, (vector_extract
+ (v2i8 V2I8Regs:$src), imm:$c))],
+ IMOV8rr>;
+
+// Extract v4i8
+def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
+ (ins V4I8Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int8Regs:$dst, (vector_extract
+ (v4i8 V4I8Regs:$src), imm:$c))],
+ IMOV8rr>;
+
+// Extract v2i32
+def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
+ (ins V2I32Regs:$src, i8imm:$c),
+ "mov.u32 \t$dst, $src${c:vecelem};",
+ [(set Int32Regs:$dst, (vector_extract
+ (v2i32 V2I32Regs:$src), imm:$c))],
+ IMOV32rr>;
+
+// Extract v2f32
+def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
+ (ins V2F32Regs:$src, i8imm:$c),
+ "mov.f32 \t$dst, $src${c:vecelem};",
+ [(set Float32Regs:$dst, (vector_extract
+ (v2f32 V2F32Regs:$src), imm:$c))],
+ FMOV32rr>;
+
+// Extract v2i64
+def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
+ (ins V2I64Regs:$src, i8imm:$c),
+ "mov.u64 \t$dst, $src${c:vecelem};",
+ [(set Int64Regs:$dst, (vector_extract
+ (v2i64 V2I64Regs:$src), imm:$c))],
+ IMOV64rr>;
+
+// Extract v2f64
+def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
+ (ins V2F64Regs:$src, i8imm:$c),
+ "mov.f64 \t$dst, $src${c:vecelem};",
+ [(set Float64Regs:$dst, (vector_extract
+ (v2f64 V2F64Regs:$src), imm:$c))],
+ FMOV64rr>;
+
+// Extract v4i32
+def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
+ (ins V4I32Regs:$src, i8imm:$c),
+ "mov.u32 \t$dst, $src${c:vecelem};",
+ [(set Int32Regs:$dst, (vector_extract
+ (v4i32 V4I32Regs:$src), imm:$c))],
+ IMOV32rr>;
+
+// Extract v4f32
+def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
+ (ins V4F32Regs:$src, i8imm:$c),
+ "mov.f32 \t$dst, $src${c:vecelem};",
+ [(set Float32Regs:$dst, (vector_extract
+ (v4f32 V4F32Regs:$src), imm:$c))],
+ FMOV32rr>;
+}
+
+let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
+// Insert v2i8
+def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
+ (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
+ "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V2I8Regs:$dst,
+ (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
+ IMOV8rr>;
+
+// Insert v4i8
+def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
+ (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
+ "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V4I8Regs:$dst,
+ (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
+ IMOV8rr>;
+
+// Insert v2i16
+def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
+ (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
+ "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V2I16Regs:$dst,
+ (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
+
+// Insert v4i16
+def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
+ (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
+ "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V4I16Regs:$dst,
+ (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
+
+// Insert v2i32
+def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
+ (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
+ "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
+ [(set V2I32Regs:$dst,
+ (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
+
+// Insert v2f32
+def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
+ (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
+ "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
+ [(set V2F32Regs:$dst,
+ (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
+
+// Insert v2i64
+def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
+ (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
+ "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
+ [(set V2I64Regs:$dst,
+ (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+ IMOV64rr>;
+
+// Insert v2f64
+def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
+ (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
+ "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
+ [(set V2F64Regs:$dst,
+ (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+ FMOV64rr>;
+
+// Insert v4i32
+def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
+ (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
+ "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
+ [(set V4I32Regs:$dst,
+ (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
+
+// Insert v4f32
+def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
+ (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
+ "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
+ [(set V4F32Regs:$dst,
+ (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
+}
+
+class BinOpAsmString<string c> {
+ string s = c;
+}
+
+class V4AsmStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
+
+class V2AsmStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
+
+class V4MADStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
+
+class V2MADStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
+
+class V4UnaryStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3;")>;
+
+class V2UnaryStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1;")>;
+
+class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
+ asmstr.s,
+ [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
+ sInst>;
+
+class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
+ NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
+ asmstr.s,
+ [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
+ sInst>;
+
+class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
+ asmstr.s,
+ [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
+
+multiclass IntBinVOp<string asmstr, SDNode OpNode,
+ NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
+ i16op=NOP, NVPTXInst i8op=NOP> {
+ def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
+ i64op>;
+ def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
+ i32op>;
+ def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
+ i32op>;
+ def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
+ i16op>;
+ def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
+ i16op>;
+ def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
+ i8op>;
+ def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
+ i8op>;
+}
+
+multiclass FloatBinVOp<string asmstr, SDNode OpNode,
+ NVPTXInst f64=NOP, NVPTXInst f32=NOP,
+ NVPTXInst f32_ftz=NOP> {
+ def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
+ V2F64Regs, f64>;
+ def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
+ V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
+ def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
+ V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
+ def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
+ V4F32Regs, f32>;
+ def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
+ V2F32Regs, f32>;
+}
+
+multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
+ NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
+ NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
+ def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
+ V2I64Regs, i64op>;
+ def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
+ V4I32Regs, i32op>;
+ def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
+ V2I32Regs, i32op>;
+ def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V4I16Regs, i16op>;
+ def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V2I16Regs, i16op>;
+ def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V4I8Regs, i8op>;
+ def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V2I8Regs, i8op>;
+}
+
+
+// Integer Arithmetic
+let VecInstType=isVecOther.Value in {
+defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
+defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
+
+def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
+ ADDCCi32rr>;
+def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
+ ADDCCi32rr>;
+def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
+ SUBCCi32rr>;
+def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
+ SUBCCi32rr>;
+def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
+ ADDCCCi32rr>;
+def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
+ ADDCCCi32rr>;
+def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
+ SUBCCCi32rr>;
+def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
+ SUBCCCi32rr>;
+
+def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
+ SHLi64rr>;
+def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
+ SHLi32rr>;
+def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
+ SHLi32rr>;
+def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
+ SHLi16rr>;
+def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
+ SHLi16rr>;
+def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs,
+ SHLi8rr>;
+def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs,
+ SHLi8rr>;
+}
+
+// cvt to v*i32, helpers for shift
+class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
+
+class VecCVTStrHelper<string op, string dest, string src> {
+ string s=!strconcat(op, !strconcat("\t",
+ !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
+}
+
+class Vec2CVTStr<string op> {
+ string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
+ !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
+}
+
+class Vec4CVTStr<string op> {
+ string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
+ !strconcat("\n\t",
+ !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
+ !strconcat("\n\t",
+ !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
+ !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
+}
+
+let VecInstType=isVecOther.Value in {
+def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
+def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
+def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
+ Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
+def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
+ Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
+def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
+}
+
+def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+let VecInstType=isVecOther.Value in {
+def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
+ SRAi64rr>;
+def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
+ SRAi32rr>;
+def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
+ SRAi32rr>;
+def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
+ SRAi16rr>;
+def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
+ SRAi16rr>;
+def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs,
+ SRAi8rr>;
+def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs,
+ SRAi8rr>;
+
+def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
+ SRLi64rr>;
+def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
+ SRLi32rr>;
+def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
+ SRLi32rr>;
+def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
+ SRLi16rr>;
+def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
+ SRLi16rr>;
+def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs,
+ SRLi8rr>;
+def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs,
+ SRLi8rr>;
+
+defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
+ MULTi8rr>;
+defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
+ MULTHSi16rr,
+ MULTHSi8rr>;
+defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
+ MULTHUi16rr,
+ MULTHUi8rr>;
+defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
+ SDIVi8rr>;
+defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
+ UDIVi8rr>;
+defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
+ SREMi8rr>;
+defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
+ UREMi8rr>;
+}
+
+def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
+ NVPTXRegClass regclassv2,
+ SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V4 : NVPTXVecInst<(outs regclassv4:$dst),
+ (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
+ V4MADStr<asmstr>.s,
+ [(set regclassv4:$dst,
+ (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
+ sop>,
+ Requires<[Pred]>;
+ def V2 : NVPTXVecInst<(outs regclassv2:$dst),
+ (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclassv2:$dst,
+ (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
+ sop>,
+ Requires<[Pred]>;
+}
+
+multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V2 : NVPTXVecInst<(outs regclass:$dst),
+ (ins regclass:$a, regclass:$b, regclass:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclass:$dst, (add
+ (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
+ Requires<[Pred]>;
+}
+multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V2 : NVPTXVecInst<(outs regclass:$dst),
+ (ins regclass:$a, regclass:$b, regclass:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclass:$dst, (fadd
+ (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
+ Requires<[Pred]>;
+}
+
+let VecInstType=isVecOther.Value in {
+defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
+defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
+ true>;
+defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
+ true>;
+defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
+
+defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
+
+defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
+defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
+defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
+
+defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
+ FMAD32_ftzrrr, doFMADF32_ftz>;
+defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
+ FMA32_ftzrrr, doFMAF32_ftz>;
+defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
+ doFMADF32>;
+defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
+ doFMAF32>;
+defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
+}
+
+let VecInstType=isVecOther.Value in {
+def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
+ FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
+def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
+ FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
+def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
+ FDIV32rr_prec>, Requires<[reqPTX20]>;
+def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
+ FDIV32rr_prec>, Requires<[reqPTX20]>;
+def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
+ FDIV32rr_ftz>, Requires<[doF32FTZ]>;
+def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
+ FDIV32rr_ftz>, Requires<[doF32FTZ]>;
+def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
+def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
+def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
+}
+
+def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
+
+let VecInstType=isVecOther.Value in {
+def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
+ FNEGf32_ftz>, Requires<[doF32FTZ]>;
+def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
+ FNEGf32_ftz>, Requires<[doF32FTZ]>;
+def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
+def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
+def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
+
+// Logical Arithmetic
+defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
+defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
+defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
+
+defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
+}
+
+
+multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
+ (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
+ (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
+defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
+defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
+defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
+
+multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
+ (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
+ (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
+defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
+defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
+defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
+
+multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
+ (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
+ (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
+
+class VecModStr<string vecsize, string elem, string extra, string l="">
+{
+ string t1 = !strconcat("${c", elem);
+ string t2 = !strconcat(t1, ":vecv");
+ string t3 = !strconcat(t2, vecsize);
+ string t4 = !strconcat(t3, extra);
+ string t5 = !strconcat(t4, l);
+ string s = !strconcat(t5, "}");
+}
+class ShuffleOneLine<string vecsize, string elem, string type>
+{
+ string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
+ string t2 = !strconcat(t1, "mov.");
+ string t3 = !strconcat(t2, type);
+ string t4 = !strconcat(t3, " \t${dst}_");
+ string t5 = !strconcat(t4, elem);
+ string t6 = !strconcat(t5, ", $src1");
+ string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
+ string t8 = !strconcat(t7, ";\n\t");
+ string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
+ string t10 = !strconcat(t9, "mov.");
+ string t11 = !strconcat(t10, type);
+ string t12 = !strconcat(t11, " \t${dst}_");
+ string t13 = !strconcat(t12, elem);
+ string t14 = !strconcat(t13, ", $src2");
+ string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
+ string s = !strconcat(t15, ";");
+}
+class ShuffleAsmStr2<string type>
+{
+ string t1 = ShuffleOneLine<"2", "0", type>.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
+}
+class ShuffleAsmStr4<string type>
+{
+ string t1 = ShuffleOneLine<"4", "0", type>.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
+ string t4 = !strconcat(t3, "\n\t");
+ string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
+ string t6 = !strconcat(t5, "\n\t");
+ string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
+}
+
+let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
+def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
+ (ins V4F32Regs:$src1, V4F32Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"f32">.s),
+ [], FMOV32rr>;
+
+def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
+ (ins V4I32Regs:$src1, V4I32Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u32">.s),
+ [], IMOV32rr>;
+
+def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
+ (ins V4I16Regs:$src1, V4I16Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u16">.s),
+ [], IMOV16rr>;
+
+def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
+ (ins V4I8Regs:$src1, V4I8Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u16">.s),
+ [], IMOV8rr>;
+
+def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
+ (ins V2F32Regs:$src1, V2F32Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"f32">.s),
+ [], FMOV32rr>;
+
+def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
+ (ins V2I32Regs:$src1, V2I32Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u32">.s),
+ [], IMOV32rr>;
+
+def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
+ (ins V2I8Regs:$src1, V2I8Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u16">.s),
+ [], IMOV8rr>;
+
+def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
+ (ins V2I16Regs:$src1, V2I16Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u16">.s),
+ [], IMOV16rr>;
+
+def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
+ (ins V2F64Regs:$src1, V2F64Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"f64">.s),
+ [], FMOV64rr>;
+
+def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
+ (ins V2I64Regs:$src1, V2I64Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u64">.s),
+ [], IMOV64rr>;
+}
+
+def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
+}]>;
+def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
+}]>;
+def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
+}]>;
+def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
+}]>;
+
+// The spurious call is here to silence a compiler warning about N being
+// unused.
+def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs),
+ [{ N->getGluedNode(); return true; }]>;
+
+def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
+ (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
+ (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
+ (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
+ (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
+ (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
+ (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
+ (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
+ (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
+ (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
+ (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
+ NVPTXInst si>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins sclass:$a1, sclass:$a2),
+ !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
+ [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
+ si>;
+class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
+ NVPTXInst si>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
+ !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
+ [(set vclass:$dst,
+ (build_vector sclass:$a1, sclass:$a2,
+ sclass:$a3, sclass:$a4))], si>;
+
+let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
+def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
+ FMOV32rr>;
+def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
+ FMOV64rr>;
+
+def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
+ IMOV32rr>;
+def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
+ IMOV64rr>;
+def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
+ IMOV16rr>;
+def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs,
+ IMOV8rr>;
+
+def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
+ FMOV32rr>;
+
+def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
+ IMOV32rr>;
+def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
+ IMOV16rr>;
+def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
+ IMOV8rr>;
+}
+
+class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
+ !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
+ [], sop>;
+
+let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
+ VecInstType=isVecOther.Value in {
+def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
+def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
+
+def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
+def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
+
+def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
+def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
+
+def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
+def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
+
+def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
+def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
+}
+
+// extract subvector patterns
+def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
+
+def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
+ (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
+ (V4f32Extract V4F32Regs:$src, 1))>;
+def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
+ (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
+ (V4f32Extract V4F32Regs:$src, 3))>;
+def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
+ (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
+ (V4i32Extract V4I32Regs:$src, 1))>;
+def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
+ (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
+ (V4i32Extract V4I32Regs:$src, 3))>;
+def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
+ (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
+ (V4i16Extract V4I16Regs:$src, 1))>;
+def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
+ (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
+ (V4i16Extract V4I16Regs:$src, 3))>;
+def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
+ (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
+ (V4i8Extract V4I8Regs:$src, 1))>;
+def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
+ (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
+ (V4i8Extract V4I8Regs:$src, 3))>;
+
+// Select instructions
+class Select_OneLine<string type, string pos> {
+ string t1 = !strconcat("selp.", type);
+ string t2 = !strconcat(t1, " \t${dst}_");
+ string t3 = !strconcat(t2, pos);
+ string t4 = !strconcat(t3, ", ${src1}_");
+ string t5 = !strconcat(t4, pos);
+ string t6 = !strconcat(t5, ", ${src2}_");
+ string t7 = !strconcat(t6, pos);
+ string s = !strconcat(t7, ", $p;");
+}
+
+class Select_Str2<string type> {
+ string t1 = Select_OneLine<type, "0">.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string s = !strconcat(t2, Select_OneLine<type, "1">.s);
+}
+
+class Select_Str4<string type> {
+ string t1 = Select_OneLine<type, "0">.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
+ string t4 = !strconcat(t3, "\n\t");
+ string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
+ string t6 = !strconcat(t5, "\n\t");
+ string s = !strconcat(t6, Select_OneLine<type, "3">.s);
+
+}
+
+class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins vclass:$src1, vclass:$src2, Int1Regs:$p),
+ asmstr,
+ [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
+ vclass:$src2))],
+ sop>;
+
+let VecInstType=isVecOther.Value in {
+def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
+def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
+def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
+def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
+def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
+def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>;
+def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>;
+
+def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
+def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
+def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
+}
+
+// Comparison instructions
+
+// setcc convenience fragments.
+def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOEQ)>;
+def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGT)>;
+def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGE)>;
+def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLT)>;
+def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLE)>;
+def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETONE)>;
+def vseto : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETO)>;
+def vsetuo : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUO)>;
+def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUEQ)>;
+def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGT)>;
+def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGE)>;
+def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULT)>;
+def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULE)>;
+def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUNE)>;
+def vseteq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETEQ)>;
+def vsetgt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGT)>;
+def vsetge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGE)>;
+def vsetlt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLT)>;
+def vsetle : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLE)>;
+def vsetne : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETNE)>;
+
+class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
+ NVPTXInst sop>
+ : NVPTXVecInst<(outs outrclass:$dst),
+ (ins inrclass:$a, inrclass:$b),
+ "Unsupported",
+ [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
+ sop>;
+
+multiclass Vec_Compare_All<PatFrag op,
+ NVPTXInst inst8,
+ NVPTXInst inst16,
+ NVPTXInst inst32,
+ NVPTXInst inst64>
+{
+ def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>;
+ def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>;
+ def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
+ def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
+ def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
+ def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
+ def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
+}
+
+let VecInstType=isVecOther.Value in {
+ defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
+ ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
+ defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
+ ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
+ defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
+ ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
+ defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
+ ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
+ defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
+ ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
+ defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
+ ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
+ defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
+ ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
+ defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
+ ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
+ defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
+ ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
+ defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
+ ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
+ defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
+ ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
+ defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
+ ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
+}
+
+multiclass FVec_Compare_All<PatFrag op,
+ NVPTXInst instf32,
+ NVPTXInst instf64>
+{
+ def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
+ def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
+ def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
+}
+
+let VecInstType=isVecOther.Value in {
+ defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
+ FSetGTf64rr_toi64>;
+ defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
+ FSetLTf64rr_toi64>;
+ defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
+ FSetGEf64rr_toi64>;
+ defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
+ FSetLEf64rr_toi64>;
+ defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
+ FSetEQf64rr_toi64>;
+ defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
+ FSetNEf64rr_toi64>;
+
+ defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
+ FSetUGTf64rr_toi64>;
+ defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
+ FSetULTf64rr_toi64>;
+ defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
+ FSetUGEf64rr_toi64>;
+ defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
+ FSetULEf64rr_toi64>;
+ defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
+ FSetUEQf64rr_toi64>;
+ defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
+ FSetUNEf64rr_toi64>;
+
+ defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32,
+ FSetNUMf64rr_toi64>;
+ defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
+ FSetNANf64rr_toi64>;
+}
+
+class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
+ (ins i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
+
+class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$d1, regclass:$d2),
+ (ins i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t{{$d1, $d2}}, [retval0+$b];"), []>;
+
+
+class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
+ i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
+
+class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], {{$s1, $s2}};"), []>;
+
+class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
+ i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
+
+class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval+$a], {{$s1, $s2}};"), []>;
+
+def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
+def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
+def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
+
+def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
+def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
+def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
+def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
+
+def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
+def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
+def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
+
+def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
+def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
+def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
+
+def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
+def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
+def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
+def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
+
+def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
+def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
+def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
+
+def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
+def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
+def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
+
+def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
+def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
+def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
+def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
+
+def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
+def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
+def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
+
+class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
+ NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
+ "loadparam : $dst <- [$a, $b]",
+ [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
+ sop>;
+
+class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ "storeparam : [$a, $b] <- $val",
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
+
+class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
+ NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
+ "storeretval : retval[$a] <- $val",
+ [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
+
+let VecInstType=isVecLD.Value in {
+def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32",
+ LoadParamScalar4I32>;
+def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16",
+ LoadParamScalar4I16>;
+def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8",
+ LoadParamScalar4I8>;
+
+def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64",
+ LoadParamScalar2I64>;
+def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32",
+ LoadParamScalar2I32>;
+def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16",
+ LoadParamScalar2I16>;
+def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8",
+ LoadParamScalar2I8>;
+
+def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32",
+ LoadParamScalar4F32>;
+def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32",
+ LoadParamScalar2F32>;
+def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64",
+ LoadParamScalar2F64>;
+}
+
+let VecInstType=isVecST.Value in {
+def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32",
+ StoreParamScalar4I32>;
+def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16",
+ StoreParamScalar4I16>;
+def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8",
+ StoreParamScalar4I8>;
+
+def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64",
+ StoreParamScalar2I64>;
+def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32",
+ StoreParamScalar2I32>;
+def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16",
+ StoreParamScalar2I16>;
+def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8",
+ StoreParamScalar2I8>;
+
+def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32",
+ StoreParamScalar4F32>;
+def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32",
+ StoreParamScalar2F32>;
+def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64",
+ StoreParamScalar2F64>;
+
+def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
+ StoreRetvalScalar4I32>;
+def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
+ StoreRetvalScalar4I16>;
+def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8",
+ StoreRetvalScalar4I8>;
+
+def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
+ StoreRetvalScalar2I64>;
+def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
+ StoreRetvalScalar2I32>;
+def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
+ StoreRetvalScalar2I16>;
+def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8",
+ StoreRetvalScalar2I8>;
+
+def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
+ StoreRetvalScalar4F32>;
+def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
+ StoreRetvalScalar2F32>;
+def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
+ StoreRetvalScalar2F64>;
+
+}
+
+
+// Int vector to int scalar bit convert
+// v4i8 -> i32
+def : Pat<(i32 (bitconvert V4I8Regs:$s)),
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
+// v4i16 -> i64
+def : Pat<(i64 (bitconvert V4I16Regs:$s)),
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
+ (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2),
+ (V4i16Extract V4I16Regs:$s,3))>;
+// v2i8 -> i16
+def : Pat<(i16 (bitconvert V2I8Regs:$s)),
+ (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
+// v2i16 -> i32
+def : Pat<(i32 (bitconvert V2I16Regs:$s)),
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
+ (V2i16Extract V2I16Regs:$s,1))>;
+// v2i32 -> i64
+def : Pat<(i64 (bitconvert V2I32Regs:$s)),
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
+ (V2i32Extract V2I32Regs:$s,1))>;
+
+// Int scalar to int vector bit convert
+let VecInstType=isVecDest.Value in {
+// i32 -> v4i8
+def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
+ "Error!",
+ [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
+ I32toV4I8>;
+// i64 -> v4i16
+def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
+ "Error!",
+ [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
+ I64toV4I16>;
+// i16 -> v2i8
+def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
+ "Error!",
+ [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
+ I16toV2I8>;
+// i32 -> v2i16
+def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
+ "Error!",
+ [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
+ I32toV2I16>;
+// i64 -> v2i32
+def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
+ "Error!",
+ [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
+ I64toV2I32>;
+}
+
+// Int vector to int vector bit convert
+// v4i8 -> v2i16
+def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
+ (VecI32toV2I16
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
+// v4i16 -> v2i32
+def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
+ (VecI64toV2I32
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
+// v2i16 -> v4i8
+def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
+ (VecI32toV4I8
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
+// v2i32 -> v4i16
+def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
+ (VecI64toV4I16
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
+// v2i64 -> v4i32
+def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
+ (Build_Vector4_i32
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
+// v4i32 -> v2i64
+def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
+ (Build_Vector2_i64
+ (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
+ (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
+
+// Fp scalar to fp vector convert
+// f64 -> v2f32
+let VecInstType=isVecDest.Value in {
+def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
+ "Error!",
+ [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
+ F64toV2F32>;
+}
+
+// Fp vector to fp scalar convert
+// v2f32 -> f64
+def : Pat<(f64 (bitconvert V2F32Regs:$s)),
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
+
+// Fp scalar to int vector convert
+// f32 -> v4i8
+def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
+ (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
+// f32 -> v2i16
+def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
+ (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
+// f64 -> v4i16
+def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
+ (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
+// f64 -> v2i32
+def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
+ (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
+
+// Int vector to fp scalar convert
+// v4i8 -> f32
+def : Pat<(f32 (bitconvert V4I8Regs:$s)),
+ (BITCONVERT_32_I2F
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
+// v4i16 -> f64
+def : Pat<(f64 (bitconvert V4I16Regs:$s)),
+ (BITCONVERT_64_I2F
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
+// v2i16 -> f32
+def : Pat<(f32 (bitconvert V2I16Regs:$s)),
+ (BITCONVERT_32_I2F
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
+// v2i32 -> f64
+def : Pat<(f64 (bitconvert V2I32Regs:$s)),
+ (BITCONVERT_64_I2F
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
+
+// Int scalar to fp vector convert
+// i64 -> v2f32
+def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
+ (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
+
+// Fp vector to int scalar convert
+// v2f32 -> i64
+def : Pat<(i64 (bitconvert V2F32Regs:$s)),
+ (BITCONVERT_64_F2I
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
+
+// Int vector to fp vector convert
+// v2i64 -> v4f32
+def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
+ (Build_Vector4_f32
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 0)), 0)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 0)), 1)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 1)), 0)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
+// v2i64 -> v2f64
+def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
+ (Build_Vector2_f64
+ (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
+ (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
+// v2i32 -> v2f32
+def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
+ (Build_Vector2_f32
+ (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
+ (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
+// v4i32 -> v2f64
+def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
+ (Build_Vector2_f64
+ (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
+ (V4i32Extract V4I32Regs:$s,1))),
+ (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
+ (V4i32Extract V4I32Regs:$s,3))))>;
+// v4i32 -> v4f32
+def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
+ (Build_Vector4_f32
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
+// v4i16 -> v2f32
+def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
+ (VecF64toV2F32 (BITCONVERT_64_I2F
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
+ (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2),
+ (V4i16Extract V4I16Regs:$s,3))))>;
+
+// Fp vector to int vector convert
+// v2i64 <- v4f32
+def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
+ (Build_Vector2_i64
+ (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
+ (V4f32Extract V4F32Regs:$s,1))),
+ (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
+ (V4f32Extract V4F32Regs:$s,3))))>;
+// v2i64 <- v2f64
+def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
+ (Build_Vector2_i64
+ (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
+ (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
+// v2i32 <- v2f32
+def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
+ (Build_Vector2_i32
+ (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
+ (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
+// v4i32 <- v2f64
+def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
+ (Build_Vector4_i32
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 0)), 0)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 0)), 1)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 1)), 0)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
+// v4i32 <- v4f32
+def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
+ (Build_Vector4_i32
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
+// v4i16 <- v2f32
+def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
+ (VecI64toV4I16 (BITCONVERT_64_F2I
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
+ (V2f32Extract V2F32Regs:$s,1))))>;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
new file mode 100644
index 000000000000..5f074b33a2d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
@@ -0,0 +1,90 @@
+//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions that can be used in CodeGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXutil.h"
+#include "NVPTX.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+bool isParamLoad(const MachineInstr *MI) {
+ if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
+ (MI->getOpcode() != NVPTX::LD_i64_avar))
+ return false;
+ if (MI->getOperand(2).isImm() == false)
+ return false;
+ if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
+ return false;
+ return true;
+}
+
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
+
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
+ char *a;
+ char *end = space + splen;
+
+ a = space;
+ do {
+ unsigned char uc;
+
+ if (a >= end)
+ return 1;
+ uc = val & DATA_MASK;
+ val >>= DIGIT_WIDTH;
+ if (val != 0)
+ uc |= MORE_BYTES;
+ *a = uc;
+ a++;
+ } while (val);
+ *nbytes = a - space;
+ return 0;
+}
+
+#undef DATA_MASK
+#undef DIGIT_WIDTH
+#undef MORE_BYTES
+
+uint64_t encode_leb128(const char *str) {
+ union {
+ uint64_t x;
+ char a[8];
+ } temp64;
+
+ temp64.x = 0;
+
+ for (unsigned i = 0, e = strlen(str); i != e; ++i)
+ temp64.a[i] = str[e - 1 - i];
+
+ char encoded[16];
+ int nbytes;
+
+ int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
+
+ (void) retval;
+ assert(retval == 0 && "Encoding to leb128 failed");
+
+ assert(nbytes <= 8 &&
+ "Cannot support register names with leb128 encoding > 8 bytes");
+
+ temp64.x = 0;
+ for (int i = 0; i < nbytes; ++i)
+ temp64.a[i] = encoded[i];
+
+ return temp64.x;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
new file mode 100644
index 000000000000..d1d117159486
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
@@ -0,0 +1,25 @@
+//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions that can be used in CodeGen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_UTIL_H
+#define LLVM_TARGET_NVPTX_UTIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+bool isParamLoad(const MachineInstr *);
+uint64_t encode_leb128(const char *str);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 000000000000..0ad62ce39b0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,177 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+// If an undefined string value is seen in a call to __nvvm_reflect("string"),
+// a default value of 0 will be used.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+ StringMap<int> VarMap;
+ typedef DenseMap<std::string, int>::iterator VarMapIter;
+ Function *ReflectFunction;
+
+public:
+ static char ID;
+ NVVMReflect() : ModulePass(ID) {
+ VarMap.clear();
+ ReflectFunction = 0;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+ virtual bool runOnModule(Module &);
+
+ void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+ cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+ "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+ false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+ cl::desc("A list of string=num assignments"),
+ cl::ValueRequired);
+
+/// The command line can look as follows :
+/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+ for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+ DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
+ SmallVector<StringRef, 4> NameValList;
+ StringRef(ReflectList[i]).split(NameValList, ",");
+ for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
+ SmallVector<StringRef, 2> NameValPair;
+ NameValList[j].split(NameValPair, "=");
+ assert(NameValPair.size() == 2 && "name=val expected");
+ std::stringstream ValStream(NameValPair[1]);
+ int Val;
+ ValStream >> Val;
+ assert((!(ValStream.fail())) && "integer value expected");
+ VarMap[NameValPair[0]] = Val;
+ }
+ }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+ if (!NVVMReflectEnabled)
+ return false;
+
+ setVarMap();
+
+ ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+ // If reflect function is not used, then there will be
+ // no entry in the module.
+ if (ReflectFunction == 0)
+ return false;
+
+ // Validate _reflect function
+ assert(ReflectFunction->isDeclaration() &&
+ "_reflect function should not have a body");
+ assert(ReflectFunction->getReturnType()->isIntegerTy() &&
+ "_reflect's return type should be integer");
+
+ std::vector<Instruction *> ToRemove;
+
+ // Go through the uses of ReflectFunction in this Function.
+ // Each of them should a CallInst with a ConstantArray argument.
+ // First validate that. If the c-string corresponding to the
+ // ConstantArray can be found successfully, see if it can be
+ // found in VarMap. If so, replace the uses of CallInst with the
+ // value found in VarMap. If not, replace the use with value 0.
+ for (Value::use_iterator I = ReflectFunction->use_begin(),
+ E = ReflectFunction->use_end();
+ I != E; ++I) {
+ assert(isa<CallInst>(*I) && "Only a call instruction can use _reflect");
+ CallInst *Reflect = cast<CallInst>(*I);
+
+ assert((Reflect->getNumOperands() == 2) &&
+ "Only one operand expect for _reflect function");
+ // In cuda, we will have an extra constant-to-generic conversion of
+ // the string.
+ const Value *conv = Reflect->getArgOperand(0);
+ assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+ const CallInst *ConvCall = cast<CallInst>(conv);
+ const Value *str = ConvCall->getArgOperand(0);
+ assert(isa<ConstantExpr>(str) &&
+ "Format of _reflect function not recognized");
+ const ConstantExpr *GEP = cast<ConstantExpr>(str);
+
+ const Value *Sym = GEP->getOperand(0);
+ assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
+
+ const Constant *SymStr = cast<Constant>(Sym);
+
+ assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ "Format of _reflect function not recognized");
+
+ assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ "Format of _reflect function not recognized");
+
+ std::string ReflectArg =
+ cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+
+ ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
+ DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
+
+ int ReflectVal = 0; // The default value is 0
+ if (VarMap.find(ReflectArg) != VarMap.end()) {
+ ReflectVal = VarMap[ReflectArg];
+ }
+ Reflect->replaceAllUsesWith(
+ ConstantInt::get(Reflect->getType(), ReflectVal));
+ ToRemove.push_back(Reflect);
+ }
+ if (ToRemove.size() == 0)
+ return false;
+
+ for (unsigned i = 0, e = ToRemove.size(); i != e; ++i)
+ ToRemove[i]->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
new file mode 100644
index 000000000000..cc7d4dc5ece7
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheNVPTXTarget32;
+Target llvm::TheNVPTXTarget64;
+
+extern "C" void LLVMInitializeNVPTXTargetInfo() {
+ RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
+ "NVIDIA PTX 32-bit");
+ RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
+ "NVIDIA PTX 64-bit");
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/cl_common_defines.h b/contrib/llvm/lib/Target/NVPTX/cl_common_defines.h
new file mode 100644
index 000000000000..45cc0b8b67f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/cl_common_defines.h
@@ -0,0 +1,122 @@
+#ifndef __CL_COMMON_DEFINES_H__
+#define __CL_COMMON_DEFINES_H__
+// This file includes defines that are common to both kernel code and
+// the NVPTX back-end.
+
+//
+// Common defines for Image intrinsics
+// Channel order
+enum {
+ CLK_R = 0x10B0,
+ CLK_A = 0x10B1,
+ CLK_RG = 0x10B2,
+ CLK_RA = 0x10B3,
+ CLK_RGB = 0x10B4,
+ CLK_RGBA = 0x10B5,
+ CLK_BGRA = 0x10B6,
+ CLK_ARGB = 0x10B7,
+
+#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0)
+ CLK_xRGB = 0x10B7,
+#endif
+
+ CLK_INTENSITY = 0x10B8,
+ CLK_LUMINANCE = 0x10B9
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ ,
+ CLK_Rx = 0x10BA,
+ CLK_RGx = 0x10BB,
+ CLK_RGBx = 0x10BC
+#endif
+};
+
+typedef enum clk_channel_type {
+ // valid formats for float return types
+ CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
+ CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+ CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
+ CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+ CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
+ CLK_FLOAT = 0x10DE, // four channel RGBA float
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ CLK_UNORM_SHORT_565 = 0x10D4,
+ CLK_UNORM_SHORT_555 = 0x10D5,
+ CLK_UNORM_INT_101010 = 0x10D6,
+#endif
+
+ // valid only for integer return types
+ CLK_SIGNED_INT8 = 0x10D7,
+ CLK_SIGNED_INT16 = 0x10D8,
+ CLK_SIGNED_INT32 = 0x10D9,
+ CLK_UNSIGNED_INT8 = 0x10DA,
+ CLK_UNSIGNED_INT16 = 0x10DB,
+ CLK_UNSIGNED_INT32 = 0x10DC,
+
+ // CI SPI for CPU
+ __CLK_UNORM_INT8888, // four channel ARGB unorm8
+ __CLK_UNORM_INT8888R, // four channel BGRA unorm8
+
+ __CLK_VALID_IMAGE_TYPE_COUNT,
+ __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
+ __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+ // represent any image type
+ __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
+
+typedef enum clk_sampler_type {
+ __CLK_ADDRESS_BASE = 0,
+ CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
+#endif
+ __CLK_ADDRESS_MASK =
+ CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+ __CLK_ADDRESS_BITS = 3, // number of bits required to
+ // represent address info
+
+ __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+ CLK_NORMALIZED_COORDS_FALSE = 0,
+ CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+ __CLK_NORMALIZED_MASK =
+ CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+ __CLK_NORMALIZED_BITS = 1, // number of bits required to
+ // represent normalization
+
+ __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+ CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+ CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+ CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+ __CLK_FILTER_MASK =
+ CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+ __CLK_FILTER_BITS = 2, // number of bits required to
+ // represent address info
+
+ __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+ CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+ CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+ CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+ __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+ __CLK_MIP_BITS = 2,
+
+ __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+ __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+ __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+ __CLK_ANISOTROPIC_RATIO_BITS = 5,
+ __CLK_ANISOTROPIC_RATIO_MASK =
+ (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
+} clk_sampler_type;
+
+// Memory synchronization
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+#endif // __CL_COMMON_DEFINES_H__
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..bacc108c62b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,294 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "PPCGenAsmWriter.inc"
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ unsigned Code = MI->getOperand(OpNo).getImm();
+
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int Value = MI->getOperand(OpNo).getImm();
+ Value = SignExtend32<5>(Value);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ O << "$+";
+ printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo, O);
+ else
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "lo16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "ha16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 000000000000..8f1e211c3e96
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,69 @@
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+ // 0 -> AIX, 1 -> Darwin.
+ unsigned SyntaxVariant;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, unsigned syntaxVariant)
+ : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {}
+
+ bool isDarwinSyntax() const {
+ return SyntaxVariant == 1;
+ }
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier = 0);
+
+
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // FIXME: Remove
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
new file mode 100644
index 000000000000..ec2657403e0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -0,0 +1,195 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ case PPC::fixup_ppc_tlsreg:
+ case PPC::fixup_ppc_nofixup:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ return Value & 0xfffc;
+ case PPC::fixup_ppc_br24:
+ return Value & 0x3fffffc;
+#if 0
+ case PPC::fixup_ppc_hi16:
+ return (Value >> 16) & 0xffff;
+#endif
+ case PPC::fixup_ppc_ha16:
+ return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
+ case PPC::fixup_ppc_lo16:
+ return Value & 0xffff;
+ case PPC::fixup_ppc_lo16_ds:
+ return Value & 0xfffc;
+ }
+}
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+ }
+};
+
+class PPCAsmBackend : public MCAsmBackend {
+const Target &TheTarget;
+public:
+ PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
+
+ unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_lo16", 16, 16, 0 },
+ { "fixup_ppc_ha16", 16, 16, 0 },
+ { "fixup_ppc_lo16_ds", 16, 14, 0 },
+ { "fixup_ppc_tlsreg", 0, 0, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME.
+ return false;
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // FIXME: Zero fill for now. That's not right, but at least will get the
+ // section size right.
+ for (uint64_t i = 0; i != Count; ++i)
+ OW->Write8(0);
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createMachObjectWriter(new PPCMachObjectWriter(
+ /*Is64Bit=*/is64,
+ (is64 ? object::mach::CTM_PowerPC64 :
+ object::mach::CTM_PowerPC),
+ object::mach::CSPPC_ALL),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ uint8_t OSABI;
+ public:
+ ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
+ PPCAsmBackend(T), OSABI(OSABI) { }
+
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createPPCELFObjectWriter(OS, is64, OSABI);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
+} // end anonymous namespace
+
+
+
+
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ if (Triple(TT).isOSDarwin())
+ return new DarwinPPCAsmBackend(T);
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFPPCAsmBackend(T, OSABI);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
new file mode 100644
index 000000000000..84e4175e635b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -0,0 +1,277 @@
+//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+ virtual ~PPCELFObjectWriter();
+ protected:
+ virtual unsigned getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+
+ class PPCELFRelocationEntry : public ELFRelocationEntry {
+ public:
+ PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+ bool operator<(const PPCELFRelocationEntry &RE) const {
+ return (RE.r_offset < r_offset ||
+ (RE.r_offset == r_offset && RE.Type > Type));
+ }
+ };
+}
+
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+ : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+ RE.r_addend, *RE.Fixup) {}
+
+PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI,
+ Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*HasRelocationAddend*/ true) {}
+
+PPCELFObjectWriter::~PPCELFObjectWriter() {
+}
+
+unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const
+{
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ Type = ELF::R_PPC_REL32;
+ break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_ADDR24;
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
+ break;
+ case PPC::fixup_ppc_ha16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
+ Type = ELF::R_PPC_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_lo16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+ Type = ELF::R_PPC_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_lo16_ds:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_tlsreg:
+ Type = ELF::R_PPC64_TLS;
+ break;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ Type = ELF::R_PPC64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ Type = ELF::R_PPC64_TLSLD;
+ break;
+ }
+ break;
+ case FK_Data_8:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR64;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_PPC_ADDR32;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ }
+ }
+ return Type;
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return getRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0");
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+
+ unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel);
+
+ // The .odp creation emits a relocation against the symbol ".TOC." which
+ // create a R_PPC64_TOC relocation. However the relocation symbol name
+ // in final object creation should be NULL, since the symbol does not
+ // really exist, it is just the reference to TOC base for the current
+ // object file.
+ bool EmitThisSym = RelocType != ELF::R_PPC64_TOC;
+
+ if (EmitThisSym && !Symbol.isTemporary())
+ return &Symbol;
+ return NULL;
+}
+
+void PPCELFObjectWriter::
+adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
+ switch ((unsigned)Fixup.getKind()) {
+ case PPC::fixup_ppc_ha16:
+ case PPC::fixup_ppc_lo16:
+ case PPC::fixup_ppc_lo16_ds:
+ RelocOffset += 2;
+ break;
+ default:
+ break;
+ }
+}
+
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset. Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+
+ // Copy to a temporary vector of relocation entries having a different
+ // sort function.
+ std::vector<PPCELFRelocationEntry> TmpRelocs;
+
+ for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+ R != Relocs.end(); ++R) {
+ TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+ }
+
+ // Sort in place by ascending r_offset and descending r_type.
+ array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+ // Copy back to the original vector.
+ unsigned I = 0;
+ for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+ R != TmpRelocs.end(); ++R, ++I) {
+ Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+ R->Symbol, R->r_addend, *R->Fixup);
+ }
+}
+
+
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
new file mode 100644
index 000000000000..86c44f57a5e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -0,0 +1,54 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'li'.
+ fixup_ppc_lo16,
+
+ /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+ /// like 'lis'.
+ fixup_ppc_ha16,
+
+ /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_lo16_ds,
+
+ /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+ fixup_ppc_tlsreg,
+
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models.
+ fixup_ppc_nofixup,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 000000000000..a25d7fe64f3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,70 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+void PPCMCAsmInfoDarwin::anchor() { }
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = false;
+
+ PCSymbol = ".";
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+}
+
+void PPCLinuxMCAsmInfo::anchor() { }
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = false;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+
+ // Uses '.section' before '.bss' directive
+ UsesELFSectionDirectiveForBSS = true;
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+ AssemblerDialect = 0; // Old-Style mnemonics.
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 000000000000..7b4ed9f14eb6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,35 @@
+//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
+ explicit PPCMCAsmInfoDarwin(bool is64Bit);
+ };
+
+ class PPCLinuxMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit PPCLinuxMCAsmInfo(bool is64Bit);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
new file mode 100644
index 000000000000..2223cd623cb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -0,0 +1,239 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+
+ const MCSubtargetInfo &STI;
+ const MCContext &CTX;
+ Triple TT;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
+ }
+
+ ~PPCMCCodeEmitter() {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+
+ // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
+ unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+ Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
+ Size = 8;
+
+ // Output the constant in big endian byte order.
+ int ShiftValue = (Size * 8) - 8;
+ for (unsigned i = 0; i != Size; ++i) {
+ OS << (char)(Bits >> ShiftValue);
+ Bits <<= 8;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCMCCodeEmitter(MCII, STI, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+
+ // For special TLS calls, add another fixup for the symbol. Apparently
+ // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
+ // similar that TblGen will not generate a separate case for the latter
+ // two, so this is the only way to get the extra fixup generated.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
+ const MCOperand &MO2 = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ }
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_ha16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16_ds));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_tlsreg));
+ return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
+}
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF ||
+ MI.getOpcode() == PPC::MFOCRF ||
+ MI.getOpcode() == PPC::MTCRF8) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 000000000000..2209f936ec33
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,160 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPPCMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ else
+ MAI = new PPCLinuxMCAsmInfo(isPPC64);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ Triple T(TT);
+ if (T.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ if (CM == CodeModel::Default) {
+ Triple T(TT);
+ if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+ CM = CodeModel::Medium;
+ }
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant);
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+ createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+ createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 000000000000..38a7420d972d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,68 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+
+/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI);
+} // End llvm namespace
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
new file mode 100644
index 000000000000..d84eb9c6aa03
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -0,0 +1,31 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
new file mode 100644
index 000000000000..ad2b01812816
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -0,0 +1,43 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 000000000000..446b6854fb5b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,90 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPass;
+ class ImmutablePass;
+ class JITCodeEmitter;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+
+ FunctionPass *createPPCCTRLoops();
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB = 1,
+
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 2,
+
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 4,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 8,
+
+ /// The next are not flags but distinct values.
+ MO_ACCESS_MASK = 0xf0,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 1 << 4,
+ MO_HA16 = 2 << 4,
+
+ MO_TPREL16_HA = 3 << 4,
+ MO_TPREL16_LO = 4 << 4,
+
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL16_LO = 5 << 4,
+ MO_TLSLD16_LO = 6 << 4,
+ MO_TOC16_LO = 7 << 4
+ };
+ } // end namespace PPCII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 000000000000..389216278ee4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,240 @@
+//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E500mc", "">;
+def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
+ "Enable the MFOCRF instruction">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
+def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
+ "Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// POPCNTB p5 through p7 popcntb and related instructions
+// VSX p7 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"g5", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"e500mc", PPCE500mcModel,
+ [DirectiveE500mc, FeatureMFOCRF,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+def : ProcessorModel<"e5500", PPCE5500Model,
+ [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+def : ProcessorModel<"a2", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX]>;
+def : ProcessorModel<"pwr3", G5Model,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr6", G5Model,
+ [DirectivePwr6, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"pwr6x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr7", G5Model,
+ [DirectivePwr7, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : ProcessorModel<"ppc64", G5Model,
+ [Directive64, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+
+ let AssemblyWriters = [PPCAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..96a9f0a39006
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,1126 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+ class PPCAsmPrinter : public AsmPrinter {
+ protected:
+ MapVector<MCSymbol*, MCSymbol*> TOC;
+ const PPCSubtarget &Subtarget;
+ uint64_t TOCLabelID;
+ public:
+ explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+ MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitFunctionEntryLabel();
+
+ void EmitFunctionBodyEnd();
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+ void EmitStartOfAsmFile(Module &M);
+
+ void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+ };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ return;
+ case MachineOperand::MO_ExternalSymbol: {
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() == Reloc::Static) {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ }
+
+ MCSymbol *NLPSym =
+ OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+ MO.getSymbolName()+"$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+
+ O << *NLPSym;
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) {
+ if (!GV->hasHiddenVisibility()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>()
+ .getGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().
+ getHiddenGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+
+ O << *SymToPrint;
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "$" before a global var name or constant.
+ break; // PPC never has a prefix.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'y': // A memory reference for an X-form instruction
+ {
+ const char *RegName = "r0";
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName << ", ";
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ }
+ }
+
+ assert(MI->getOperand(OpNo).isReg());
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it. If not, create one. Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+ MCSymbol *&TOCEntry = TOC[Sym];
+
+ // To avoid name clash check if the name already exists.
+ while (TOCEntry == 0) {
+ if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ "C" + Twine(TOCLabelID++)) == 0) {
+ TOCEntry = GetTempSymbol("C", TOCLabelID);
+ }
+ }
+
+ return TOCEntry;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE: {
+ if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+
+ SmallString<32> Str;
+ raw_svector_ostream O(Str);
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+ return;
+ }
+ case PPC::LDtocJTI:
+ case PPC::LDtocCPT:
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = 0;
+ if (MO.isGlobal())
+ MOSymbol = Mang->getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ case PPC::ADDIStocHA: {
+ // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDIS8. If the global address is external,
+ // has common linkage, is a function address, or is a jump table
+ // address, then generate a TOC entry and reference that. Otherwise
+ // reference the symbol directly.
+ TmpInst.setOpcode(PPC::ADDIS8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ "Invalid operand for ADDIStocHA!");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+ bool IsCommon = false;
+ bool IsAvailExt = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsCommon = GVar && RealGValue->hasCommonLinkage();
+ IsFunction = !GVar;
+ IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::LDtocL: {
+ // Transform %Xd = LDtocL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD. If the global address is external, has
+ // common linkage, or is a jump table address, then reference the
+ // associated TOC entry. Otherwise reference the symbol directly.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
+ MCSymbol *MOSymbol = 0;
+
+ if (MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDItocL: {
+ // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise reference the
+ // symbol directly.
+ TmpInst.setOpcode(PPC::ADDI8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsFunction = !GVar;
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+
+ if (IsFunction || IsExternal)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL: {
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR: {
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL: {
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR: {
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDISdtprelHA: {
+ // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X3)
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL: {
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::MFCRpseud:
+ case PPC::MFCR8pseud:
+ // Transform: %R3 = MFCRpseud %CR7
+ // Into: %R3 = MFCR ;; cr7
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ case PPC::SYNC:
+ // In Book E sync is called msync, handle this special case here...
+ if (Subtarget.isBookE()) {
+ OutStreamer.EmitRawText(StringRef("\tmsync"));
+ return;
+ }
+ }
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+ if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ // Emit an official procedure descriptor.
+ const MCSection *Current = OutStreamer.getCurrentSection();
+ const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ OutStreamer.EmitValueToAlignment(8);
+ MCSymbol *Symbol1 =
+ OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
+ // entry point.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+ 8 /*size*/);
+ MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ // Generates a R_PPC64_TOC relocation for TOC base insertion.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
+ MCSymbolRefExpr::VK_PPC_TOC, OutContext),
+ 8/*size*/);
+ // Emit a null environment pointer.
+ OutStreamer.EmitIntValue(0, 8 /* size */);
+ OutStreamer.SwitchSection(Current);
+
+ MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
+ ".L." + Twine(CurrentFnSym->getName()));
+ OutStreamer.EmitLabel(RealFnSym);
+ CurrentFnSymForSize = RealFnSym;
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const DataLayout *TD = TM.getDataLayout();
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ if (isPPC64 && !TOC.empty()) {
+ const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
+
+ for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ E = TOC.end(); I != E; ++I) {
+ OutStreamer.EmitLabel(I->second);
+ MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+ OutStreamer.EmitTCEntry(*S);
+ }
+ }
+
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// EmitFunctionBodyEnd - Print the traceback table before the .size
+/// directive.
+///
+void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+ // Only the 64-bit target requires a traceback table. For now,
+ // we only emit the word of zeroes that GDB requires to find
+ // the end of the function, and zeroes for the eight-byte
+ // mandatory fields.
+ // FIXME: We should fill in the eight-byte mandatory fields as described in
+ // the PPC64 ELF ABI (this is a low-priority item because GDB does not
+ // currently make use of these fields).
+ if (Subtarget.isPPC64()) {
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.EmitIntValue(0, 8/*size*/);
+ }
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc440",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppcA2",
+ "ppce500mc",
+ "ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
+ "power6",
+ "power6x",
+ "power7",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+
+ // FIXME: This is a total hack, finish mc'izing the PPC backend.
+ if (OutStreamer.hasRawTextSupport()) {
+ assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
+ OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+ }
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText()));
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText()));
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+ // Remove $stub suffix, add $lazy_ptr.
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+ // Add $tmp suffix to $stub, yielding $stub$tmp.
+ return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ // .lazy_symbol_pointer
+ const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
+ // mflr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ // bcl 20, 31, AnonSymbol
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ OutStreamer.EmitLabel(AnonSymbol);
+ // mflr r11
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+ Anon, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(Sub));
+ // mtlr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(Sub).addExpr(Sub)
+ .addReg(PPC::R11));
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ const MCExpr *LazyPtrLo16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+ OutContext);
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+
+ OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+
+ // Darwin/PPC always uses mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty())
+ EmitFunctionStubs(Stubs);
+
+ if (MAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (*I) {
+ MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ }
+ }
+ }
+
+ // Output stubs for dynamically-linked functions.
+ Stubs = MMIMacho.GetGVStubList();
+
+ // Output macho stubs for external and common global variables.
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin())
+ return new PPCDarwinAsmPrinter(tm, Streamer);
+ return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 000000000000..bd1c37868110
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,195 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<16>(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == PPC::BCC) {
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
+ } else {
+ llvm_unreachable("Unhandled branch type!");
+ }
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
new file mode 100644
index 000000000000..81a54d7015b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -0,0 +1,775 @@
+//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the PPC branch instructions
+// that decrement and test the count register (CTR) (bdnz and friends).
+// This pass is based on the HexagonHardwareLoops pass.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for CTR loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested CTR loops.
+// - No function calls in loops.
+//
+// Note: As with unconverted loops, PPCBranchSelector must be run after this
+// pass in order to convert long-displacement jumps into jump pairs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ctrloops"
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
+namespace {
+ class CountValue;
+ struct PPCCTRLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "PPC CTR Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ void getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a CTR loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the CTR loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToCTRLoop - Given a loop, check if we can convert it to a
+ /// CTR loop. If so, then perform the conversion and return true.
+ bool convertToCTRLoop(MachineLoop *L);
+
+ /// isDead - Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const;
+
+ /// removeIfDead - Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+ };
+
+ char PPCCTRLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
+ SignedCmp = true;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
+ SignedCmp = false;
+ Int64Cmp = true;
+ return true;
+ }
+
+ return false;
+}
+
+
+/// createPPCCTRLoops - Factory for creating
+/// the CTR loop phase.
+FunctionPass *llvm::createPPCCTRLoops() {
+ return new PPCCTRLoops();
+}
+
+
+bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToCTRLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+void
+PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ IOps.push_back(DI);
+ IVars.push_back(MPhi);
+ }
+ }
+ }
+ }
+ return;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const {
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate a CTR loop if the loop has more than one exit.
+ if (LastMBB == 0)
+ return 0;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI->getOpcode() != PPC::BCC)
+ return 0;
+
+ // We need to make sure that this compare is defining the condition
+ // register actually used by the terminating branch.
+
+ unsigned PredReg = LastI->getOperand(1).getReg();
+ DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
+
+ unsigned PredCond = LastI->getOperand(0).getImm();
+ if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
+ return 0;
+
+ // Check that the loop has a induction variable.
+ SmallVector<MachineInstr *, 4> IVars, IOps;
+ getCanonicalInductionVariable(L, IVars, IOps);
+ for (unsigned i = 0; i < IVars.size(); ++i) {
+ MachineInstr *IOp = IOps[i];
+ MachineInstr *IV_Inst = IVars[i];
+
+ // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ DEBUG(dbgs() << "Considering:\n");
+ DEBUG(dbgs() << " induction operation: " << *IOp);
+ DEBUG(dbgs() << " induction variable: " << *IV_Inst);
+ DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
+ bool SignedCmp, Int64Cmp;
+ MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
+ MI->getOperand(0).getReg() == PredReg) {
+
+ OldInsts.push_back(MI);
+ OldInsts.push_back(IOp);
+
+ DEBUG(dbgs() << " compare: " << *MI);
+
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be an immediate");
+
+ int64_t ImmVal;
+ if (SignedCmp)
+ ImmVal = (short) MO.getImm();
+ else
+ ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
+
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ unsigned InitialValueReg = InitialValue->getReg();
+
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+
+ // Here we need to look for an immediate load (an li or lis/ori pair).
+ if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
+ DefInstr->getOpcode() == PPC::ORI)) {
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
+ if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
+ DefInstr2->getOpcode() == PPC::LIS)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+ DEBUG(dbgs() << " initial constant: " << *DefInstr2);
+
+ start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
+
+ int64_t count = ImmVal - start;
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
+ }
+ } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
+ DefInstr->getOpcode() == PPC::LI)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
+ } else if (iv_value == 1 || iv_value == -1) {
+ // We can't determine a constant starting value.
+ if (ImmVal == 0) {
+ return new CountValue(InitialValueReg, iv_value > 0);
+ }
+ // FIXME: handle non-zero end value.
+ }
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
+
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// addi iv, c
+///
+bool
+PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
+ MI->getOperand(1).isReg() && // could be a frame index instead
+ MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// CTR loop.
+bool
+PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a CTR loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // check if the instruction defines a CTR loop register
+ // (this will also catch nested CTR loops)
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the CTR loop function.
+///
+bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isDead returns true if the instruction is dead
+/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
+/// with special cases for inline asm, physical registers and instructions with
+/// side effects removed)
+bool PPCCTRLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (!MRI->use_nodbg_empty(Reg)) {
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
+ MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
+ if (llvm::next(I) == MRI->use_end() &&
+ I.getOperand().getParent()->isPHI()) {
+ MachineInstr *OnePhi = I.getOperand().getParent();
+
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (OPO.isReg() && OPO.isDef()) {
+ unsigned OPReg = OPO.getReg();
+
+ MachineRegisterInfo::use_iterator nextJ;
+ for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
+ E = MRI->use_end(); J!=E; J=nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand& Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ if (MI != UseMI) {
+ // The phi node has a user that is not MI, bail...
+ return false;
+ }
+ }
+ }
+ }
+
+ DeadPhis.push_back(OnePhi);
+ } else {
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim
+
+ SmallVector<MachineInstr *, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "CTR looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ if (Use.isDebug()) // this might also be a instr -> phi -> instr case
+ // which can also be removed.
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i) {
+ DeadPhis[i]->eraseFromParent();
+ }
+ }
+}
+
+/// converToCTRLoop - check if the loop is a candidate for
+/// converting to a CTR loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToCTRLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+
+ SmallVector<MachineInstr *, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L, OldInsts);
+ if (TripCount == 0) {
+ DEBUG(dbgs() << "failed to get trip count!\n");
+ return false;
+ }
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ DebugLoc dl;
+ if (InsertPos != Preheader->end())
+ dl = InsertPos->getDebugLoc();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate CTR loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a CTR loop
+ DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
+
+ MachineFunction *MF = LastMBB->getParent();
+ const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
+ bool isPPC64 = Subtarget.isPPC64();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+
+ unsigned CountReg;
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ const TargetRegisterClass *SrcRC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
+ (unsigned) PPC::EXTSW_32_64 :
+ (unsigned) TargetOpcode::COPY;
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
+ CountReg).addReg(CountReg1);
+ }
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Put the trip count in a register for transfer into the count register.
+
+ int64_t CountImm = TripCount->getImm();
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
+
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ if (abs64(CountImm) > 0x7FFF) {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
+ CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
+ } else {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
+ CountReg).addImm(CountImm);
+ }
+ }
+
+ // Add the mtctr instruction to the beginning of the loop.
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
+ TripCount->isImm() ? RegState::Kill : 0);
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with a bdnz instruction.
+ dl = LastI->getDebugLoc();
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ if (MBB != Preheader)
+ MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
+ }
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ assert(LastI->getOpcode() == PPC::BCC &&
+ "loop end must start with a BCC instruction");
+ // Either the BCC branches to the beginning of the loop, or it
+ // branches out of the loop and there is an unconditional branch
+ // to the start of the loop.
+ MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
+ BuildMI(*LastMBB, LastI, dl,
+ TII->get((BranchTarget == LoopStart) ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
+
+ // Conditional branch; just delete it.
+ DEBUG(dbgs() << "Removing old branch: " << *LastI);
+ LastMBB->erase(LastI);
+
+ delete TripCount;
+
+ // The induction operation (add) and the comparison (cmpwi) may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
+ ++NumCTRLoops;
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 000000000000..c8a29a3d2cfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,144 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ // On PPC64, integer return values are always promoted to i64
+ CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 32-bit ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC32_SVR4_Common : CallingConv<[
+ // The ABI requires i64 to be passed in two adjacent registers with the first
+ // register having an odd register number.
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
+
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+ // Make sure the i64 words from a long double are either both passed in
+ // registers or both passed on the stack.
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
+
+ // FP values are passed in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Split arguments have an alignment of 8 bytes on the stack.
+ CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // Floats are stored in double precision format, thus they have the same
+ // alignment and size as doubles.
+ CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
+ // The first 12 Vector arguments are passed in AltiVec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC32_SVR4 calling convention.
+//
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+
+def CC_PPC32_SVR4_ByVal : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
+]>;
+
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 000000000000..64787185138b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,271 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter : public MachineFunctionPass {
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+ void *MovePCtoLROffset;
+ public:
+
+ PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+
+ MachineRelocation GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+ }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const {
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ intptr_t Cst = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ Cst = -(intptr_t)MovePCtoLROffset - 4;
+ }
+
+ if (MO.isGlobal())
+ return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+ const_cast<GlobalValue *>(MO.getGlobal()),
+ Cst, isa<Function>(MO.getGlobal()));
+ if (MO.isSymbol())
+ return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ RelocID, MO.getSymbolName(), Cst);
+ if (MO.isCPI())
+ return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+
+ if (MO.isMBB())
+ return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ RelocID, MO.getMBB());
+
+ assert(MO.isJTI());
+ return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+ return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 000000000000..3244b904ee64
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,1329 @@
+//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment. This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta. You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const uint16_t VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call. Without the prolog code,
+/// the spill instruction refers to an undefined register. This code needs
+/// to account for all uses of that GPR.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = TRI->getEncodingValue(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone). For 64-bit
+ // SVR4, we also require a stack frame if we need to spill the CR,
+ // since this spill area is addressed relative to the stack pointer.
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
+ bool DisableRedZone = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+ if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave.
+ Subtarget.isSVR4ABI()
+ && spillsCR(MF)) &&
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ return false;
+
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl;
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ // Prepare for frame info.
+ MCSymbol *FrameLabel = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ if (!Subtarget.isSVR4ABI())
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ unsigned FrameSize = determineFrameLayout(MF);
+ int NegFrameSize = -FrameSize;
+
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
+ // offsets of R1 is not allowed.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!isPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(PPC::R1, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(PPC::R1, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(PPC::X1, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(PPC::X1, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Show update of SP.
+ if (NegFrameSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ if (MustSaveLR) {
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+ }
+ }
+
+ MCSymbol *ReadyLabel = 0;
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!isPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+
+ if (needsFrameMoves) {
+ ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+ // Mark effective beginning of when frame pointer is ready.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+ (isPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+
+ if (needsFrameMoves) {
+ MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+ // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+ // subregisters of CR2. We just need to emit a move of CR2.
+ if (PPC::CRBITRCRegClass.contains(Reg))
+ continue;
+
+ // For SVR4, don't emit a move for the CR spill slot if we haven't
+ // spilled CRs.
+ if (Subtarget.isSVR4ABI()
+ && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && !spillsCR(MF))
+ continue;
+
+ // For 64-bit SVR4 when we have spilled CRs, the spill location
+ // is SP+8, not a frame-relative slot.
+ if (Subtarget.isSVR4ABI()
+ && Subtarget.isPPC64()
+ && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ MachineLocation CSDst(PPC::X1, 8);
+ MachineLocation CSSrc(PPC::CR2);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ continue;
+ }
+
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no terminator");
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl;
+
+ assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!isPPC64) {
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegClass.contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegClass.contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegClass.contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+ } else if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
+ ; // do nothing, as we already know whether CRs are spilled
+ } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegClass.contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // For 32-bit only, the CR save area is below the general register
+ // save area. For 64-bit SVR4, the CR save area is addressed relative
+ // to the stack pointer and hence does not need an adjustment here.
+ // Only CR2 (the first nonvolatile spilled) has an associated frame
+ // index so that we have a single uniform save area.
+ if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
+ // Leave Darwin logic as-is.
+ || (!Subtarget.isSVR4ABI() &&
+ (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)))) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF))
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
+}
+
+bool
+PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ DebugLoc DL;
+ bool CRSpilled = false;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // CR2 through CR4 are the nonvolatile CR fields.
+ bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
+
+ if (CRSpilled && IsCRField)
+ continue;
+
+ // Add the callee-saved register as live-in; it's killed at the spill.
+ MBB.addLiveIn(Reg);
+
+ // Insert the spill to the stack frame.
+ if (IsCRField) {
+ CRSpilled = true;
+ // The first time we see a CR field, store the whole CR into the
+ // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+ if (Subtarget.isPPC64()) {
+ // 64-bit: SP+8
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
+ .addReg(PPC::X12,
+ getKillRegState(true))
+ .addImm(8)
+ .addReg(PPC::X1));
+ } else {
+ // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
+ // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+ .addReg(PPC::R12,
+ getKillRegState(true)),
+ CSI[i].getFrameIdx()));
+ }
+
+ // Record that we spill the CR in this function.
+ PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+ return true;
+}
+
+static void
+restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ DebugLoc DL;
+ unsigned RestoreOp, MoveReg;
+
+ if (isPPC64) {
+ // 64-bit: SP+8
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
+ .addImm(8)
+ .addReg(PPC::X1));
+ RestoreOp = PPC::MTCRF8;
+ MoveReg = PPC::X12;
+ } else {
+ // 32-bit: FP-relative
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
+ PPC::R12),
+ CSI[CSIIndex].getFrameIdx()));
+ RestoreOp = PPC::MTCRF;
+ MoveReg = PPC::R12;
+ }
+
+ if (CR2Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
+
+ if (CR3Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
+
+ if (CR4Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
+ .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+bool
+PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ bool CR2Spilled = false;
+ bool CR3Spilled = false;
+ bool CR4Spilled = false;
+ unsigned CSIIndex = 0;
+
+ // Initialize insertion-point logic; we will be restoring in reverse
+ // order of spill.
+ MachineBasicBlock::iterator I = MI, BeforeI = I;
+ bool AtStart = I == MBB.begin();
+
+ if (!AtStart)
+ --BeforeI;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (Reg == PPC::CR2) {
+ CR2Spilled = true;
+ // The spill slot is associated only with CR2, which is the
+ // first nonvolatile spilled. Save it here.
+ CSIIndex = i;
+ continue;
+ } else if (Reg == PPC::CR3) {
+ CR3Spilled = true;
+ continue;
+ } else if (Reg == PPC::CR4) {
+ CR4Spilled = true;
+ continue;
+ } else {
+ // When we first encounter a non-CR register after seeing at
+ // least one CR register, restore all spilled CRs together.
+ if ((CR2Spilled || CR3Spilled || CR4Spilled)
+ && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+ CR2Spilled = CR3Spilled = CR4Spilled = false;
+ }
+
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ }
+
+ // Insert in reverse order.
+ if (AtStart)
+ I = MBB.begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+
+ // If we haven't yet spilled the CRs, do so now.
+ if (CR2Spilled || CR3Spilled || CR4Spilled)
+ restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 000000000000..6f5f9368c6c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,293 @@
+//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
+
+public:
+ PPCFrameLowering(const PPCSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
+ Subtarget(sti) {
+ }
+
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI)
+ return isPPC64 ? 16 : 8;
+ // SVR4 ABI:
+ return isPPC64 ? 16 : 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (isDarwinABI)
+ return isPPC64 ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -8U : -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI || isPPC64)
+ return 6 * (isPPC64 ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI / 64-bit SVR4 ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isDarwinABI || isPPC64)
+ return 8 * (isPPC64 ? 8 : 4);
+
+ // 32-bit SVR4 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(isPPC64, isDarwinABI) +
+ getMinCallArgumentsSize(isPPC64, isDarwinABI);
+ }
+
+ // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return 0;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 000000000000..4bf1e3396429
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,245 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Scoreboard Hazard Recognizer
+void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ // This is a PPC pseudo-instruction.
+ return;
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void PPCScoreboardHazardRecognizer::AdvanceCycle() {
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCScoreboardHazardRecognizer::Reset() {
+ ScoreboardHazardRecognizer::Reset();
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DEBUG(errs() << "=== Start of dispatch group\n");
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ const MCInstrDesc &MCID = TII.get(Opcode);
+
+ isLoad = MCID.mayLoad();
+ isStore = MCID.mayStore();
+
+ uint64_t TSFlags = MCID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: llvm_unreachable("Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && Opcode == PPC::BCTRL)
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::Reset() {
+ EndDispatchGroup();
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 000000000000..55b45d01b20e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,91 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for generic PPC processors.
+class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+public:
+ PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void Reset();
+};
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void Reset();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 000000000000..95efc11b53c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1565 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class PPCDAGToDAGISel : public SelectionDAGISel {
+ const PPCTargetMachine &TM;
+ const PPCTargetLowering &PPCLowering;
+ const PPCSubtarget &PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(*TM.getTargetLowering()),
+ PPCSubTarget(*TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+
+ if (!PPCSubTarget.isSVR4ABI())
+ InsertVRSaveCode(MF);
+
+ return true;
+ }
+
+ virtual void PostprocessISelDAG();
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (N.getOpcode() == ISD::TargetConstant ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions. It is always correct to compute the value into
+ /// a register. The case of adding a (possibly relocatable) constant to a
+ /// register can be improved, but it is wrong to substitute Reg+Reg for
+ /// Reg in an asm, because the load or store opcode would have to change.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ OutOps.push_back(Op);
+ return false;
+ }
+
+ void InsertVRSaveCode(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDNode *N);
+ };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type created
+ // by the scheduler. Detect them now.
+ bool HasVectorVReg = false;
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl;
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl;
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (isShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (isShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ SH &= 31;
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, DebugLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>((int)Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt<32>(Imm)) {
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ llvm_unreachable("Should be lowered by legalize!");
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons. In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+ Invert = false;
+ Other = -1;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ llvm_unreachable("Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+}
+
+// getVCmpInst: return the vector compare instruction for the specified
+// vector type and condition code. Since this is for altivec specific code,
+// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ // v4f32 != v4f32 could be translate to unordered not equal
+ else if (VecVT == MVT::v4f32)
+ return PPC::VCMPEQFP;
+ break;
+ case ISD::SETLT:
+ case ISD::SETGT:
+ case ISD::SETLE:
+ case ISD::SETGE:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v4f32)
+ return PPC::VCMPGTFP;
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULE:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return PPC::VCMPEQFP;
+ break;
+ case ISD::SETOLT:
+ case ISD::SETOGT:
+ case ISD::SETOLE:
+ if (VecVT == MVT::v4f32)
+ return PPC::VCMPGTFP;
+ break;
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return PPC::VCMPGEFP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
+}
+
+// getVCmpEQInst: return the equal compare instruction for the specified vector
+// type. Since this is for altivec specific code, only support the altivec
+// types (v16i8, v8i16, v4i32, and v4f32).
+static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
+ switch (VecVT) {
+ case MVT::v16i8:
+ return PPC::VCMPEQUB;
+ case MVT::v8i16:
+ return PPC::VCMPEQUH;
+ case MVT::v4i32:
+ return PPC::VCMPEQUW;
+ case MVT::v4f32:
+ return PPC::VCMPEQFP;
+ default:
+ llvm_unreachable("Invalid integer vector compare condition");
+ }
+}
+
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ SDValue AD =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Altivec Vector compare instructions do not set any CR register by default and
+ // vector compare operations return the same type as the operands.
+ if (LHS.getValueType().isVector()) {
+ EVT VecVT = LHS.getValueType();
+ MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
+ unsigned int VCmpInst = getVCmpInst(VT, CC);
+
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+ case ISD::SETNE:
+ case ISD::SETONE:
+ case ISD::SETUNE: {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
+ }
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ case ISD::SETUGE: {
+ // Small optimization: Altivec provides a 'Vector Compare Greater Than
+ // or Equal To' instruction (vcmpgefp), so in this case there is no
+ // need for extra logic for the equal compare.
+ if (VecVT.getSimpleVT().isFloatingPoint()) {
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+ } else {
+ SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
+ }
+ }
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ case ISD::SETULE: {
+ SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
+ }
+ default:
+ llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+ }
+ }
+
+ bool Inv;
+ int OtherCondIdx;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1)
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ CR7Reg, CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (OtherCondIdx == -1 && !Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ if (Inv) {
+ assert(OtherCondIdx == -1 && "Can't have split plus negation");
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+
+ // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+ // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+ // Get the other bit of the comparison.
+ Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+ SDValue OtherCond =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(N);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDValue InFlag = N->getOperand(1);
+ // Use MFOCRF if supported.
+ if (PPCSubTarget.hasMFOCRF())
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (Offset.getOpcode() == ISD::TargetConstant ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ } else {
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 64 - CountTrailingOnes_64(Imm64);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+ }
+ case PPCISD::TOC_ENTRY: {
+ assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ break;
+
+ // The first source operand is a TargetGlobalAddress or a
+ // TargetJumpTable. If it is an externally defined symbol, a symbol
+ // with common linkage, a function address, or a jump table address,
+ // or if we are generating code for large code model, we generate:
+ // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+ SDValue GA = N->getOperand(0);
+ SDValue TOCbase = N->getOperand(1);
+ SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+ TOCbase, GA);
+
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GValue = G->getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ assert((GVar || isa<Function>(RealGValue)) &&
+ "Unexpected global value subclass!");
+
+ // An external variable is one without an initializer. For these,
+ // for variables with common linkage, and for Functions, generate
+ // the LDtocL form.
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+ }
+
+ return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+ SDValue(Tmp, 0), GA);
+ }
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+ false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 000000000000..16fc8a0e3726
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,7554 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->isDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new TargetLoweringObjectFileELF();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ bool isPPC64 = Subtarget->isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // We do not currently implement these libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (Subtarget->hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+ // frin does not implement "ties to even." Thus, this is safe only in
+ // fast-math mode.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+ // These need to set FE_INEXACT, and use a custom inserter.
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ }
+ }
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
+ if (Subtarget->hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ if (Subtarget->isSVR4ABI()) {
+ if (isPPC64) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ }
+ } else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (Subtarget->has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (PPCSubTarget.hasFPCVT()) {
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
+ if (Subtarget->use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (Subtarget->hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ // Altivec does not contain unordered floating-point compare instructions
+ setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+ }
+
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ if (isPPC64) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (Subtarget->isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ setMinFunctionAlignment(2);
+ if (PPCSubTarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ if (isPPC64 && Subtarget->isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
+ setInsertFencesForAtomic(true);
+
+ setSchedulingPreference(Sched::Hybrid);
+
+ computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+ const TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+
+ // 16byte and wider vectors are passed on 16byte boundary.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VTy->getBitWidth() >= 128)
+ return 16;
+
+ // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+ if (PPCSubTarget.isPPC64())
+ return 8;
+
+ return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
+ case PPCISD::LOAD: return "PPCISD::LOAD";
+ case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
+ case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
+ case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
+ }
+}
+
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (SignExtend32<5>(MaskVal) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from parent load or store, not from address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getZExtValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (DisablePPCPreinc) return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ unsigned Alignment;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ Alignment = LD->getAlignment();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
+ AM = ISD::PRE_INC;
+ return true;
+ }
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+ unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ HiOpFlags = PPCII::MO_HA16;
+ LoOpFlags = PPCII::MO_LO16;
+
+ // Don't use the pic base if not in PIC relocation model. Or if we are on a
+ // non-darwin platform. We don't support PIC on other platforms yet.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<PPCSubtarget>().isDarwin();
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
+ }
+
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
+ }
+
+ return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ DebugLoc DL = HiPart.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+ SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (!is64bit)
+ llvm_unreachable("only local-exec is currently supported for ppc32");
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, TPOffsetHi);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ }
+
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
+
+ llvm_unreachable("Unknown TLS model!");
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ DebugLoc DL = GSDN->getDebugLoc();
+ const GlobalValue *GV = GSDN->getGlobal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, false, 0);
+ return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ EVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ EVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
+
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
+ *DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+ }
+
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+ PtrVT);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
+ false, false, 0);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const uint16_t ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const uint16_t ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const uint16_t *GetFPR() {
+ static const uint16_t FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ } else {
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_32SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // 32-bit SVR4 ABI Stack Frame Layout:
+ // +-----------------------------------+
+ // +--> | Back chain |
+ // | +-----------------------------------+
+ // | | Floating-point register save area |
+ // | +-----------------------------------+
+ // | | General register save area |
+ // | +-----------------------------------+
+ // | | CR save word |
+ // | +-----------------------------------+
+ // | | VRSAVE save word |
+ // | +-----------------------------------+
+ // | | Alignment padding |
+ // | +-----------------------------------+
+ // | | Vector register save area |
+ // | +-----------------------------------+
+ // | | Local variable space |
+ // | +-----------------------------------+
+ // | | Parameter list area |
+ // | +-----------------------------------+
+ // | | LR save word |
+ // | +-----------------------------------+
+ // SP--> +--- | Back chain |
+ // +-----------------------------------+
+ //
+ // Specifications:
+ // System V Application Binary Interface PowerPC Processor Supplement
+ // AltiVec Technology Programming Interface Manual
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *RC;
+ EVT ValVT = VA.getValVT();
+
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i32:
+ RC = &PPC::GPRCRegClass;
+ break;
+ case MVT::f32:
+ RC = &PPC::F4RCRegClass;
+ break;
+ case MVT::f64:
+ RC = &PPC::F8RCRegClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ RC = &PPC::VRRCRegClass;
+ break;
+ }
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+
+ InVals.push_back(ArgValue);
+ } else {
+ // Argument stored in memory.
+ assert(VA.isMemLoc());
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, false, 0));
+ }
+ }
+
+ // Assign locations to all of the incoming aggregate by value arguments.
+ // Aggregates passed by value are stored in the local variable space of the
+ // caller's stack frame, right above the parameter list area.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(false, false));
+
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+
+ FI->setMinReservedArea(MinReservedArea);
+
+ SmallVector<SDValue, 8> MemOps;
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ static const uint16_t GPArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+ static const uint16_t FPArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+ NumGPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+ NumFPArgRegs));
+
+ // Make room for NumGPArgRegs and NumFPArgRegs.
+ int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+ NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+ FuncInfo->setVarArgsStackOffset(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
+
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
+ // The double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+ SelectionDAG &DAG, SDValue ArgVal,
+ DebugLoc dl) const {
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+}
+
+// Set the size that is at least reserved in caller of this function. Tail
+// call optimized functions' reserved stack space needs to be aligned so that
+// taking the difference between two stack areas will result in an aligned
+// stack.
+void
+PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+ unsigned nAltivecParamsAtEnd,
+ unsigned MinReservedArea,
+ bool isPPC64) const {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign
+ = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 8;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const uint16_t GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof(VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else
+ nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Empty aggregate parameters do not take up registers. Examples:
+ // struct { } a;
+ // union { } b;
+ // int c[0];
+ // etc. However, we have to provide a place-holder in InVals, so
+ // pretend we have an 8-byte item at the current address for that
+ // purpose.
+ if (!ObjSize) {
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ continue;
+ }
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (ObjSize < PtrByteSize)
+ CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ if (ObjSize < 8) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, CurArgOffset),
+ ObjType, false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot. The address of the parameter was already calculated
+ // above (InVals.push_back(FIN)) to be the right-justified
+ // offset within the slot. For this store, we need a new
+ // frame index that points at the beginning of the slot.
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ }
+
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - j;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 8 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+
+ ArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrByteSize, Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const uint16_t GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const uint16_t GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+ ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ unsigned ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ // FIXME: We are guaranteed to be !isPPC64 at this point.
+ // Does MVT::i64 apply?
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg,
+ CurArgOffset),
+ ObjType, false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ if (!isPPC64) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // All FP arguments reserve stack space in the Darwin ABI.
+ ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
+/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isVarArg,
+ unsigned CC,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumOps = Outs.size();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+
+ // Tail call needs the stack to be aligned.
+ if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+ getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+ unsigned ParamSize) {
+
+ if (!isTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Variable argument functions are not supported.
+ if (isVarArg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != Ins.size(); i++) {
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Flags.isByVal()) return false;
+ }
+
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ SignExtend32<26>(Addr) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDValue, 8> &MemOpChains,
+ DebugLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isDarwinABI,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+ isDarwinABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewRetAddr),
+ false, false, 0);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ int NewFPLoc =
+ SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(NewFPIdx),
+ false, false, 0);
+ }
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, false, MachinePointerInfo(0),
+ MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ DebugLoc dl) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+ DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDValue LROp, SDValue FPOp, bool isDarwinABI,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isDarwinABI, dl);
+
+ // Emit callseq_end just before tailcall node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+ SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ const PPCSubtarget &PPCSubTarget) {
+
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+
+ unsigned CallOpc = PPCISD::CALL;
+
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ unsigned OpFlags = 0;
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(),
+ 0, OpFlags);
+ needIndirectCall = false;
+ }
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (needIndirectCall) {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+ if (isSVR4ABI && isPPC64) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // All those operations are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // operations together, a TOC access in the caller could be scheduled
+ // between the load of the callee TOC and the branch to the callee, which
+ // results in the TOC access going through the TOC of the callee instead
+ // of going through the TOC of the caller, which leads to incorrect code.
+
+ // Load the address of the function entry point from the function
+ // descriptor.
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
+ InFlag.getNode() ? 3 : 2);
+ Chain = LoadFuncPtr.getValue(1);
+ InFlag = LoadFuncPtr.getValue(2);
+
+ // Load environment pointer into r11.
+ // Offset of the environment pointer within the function descriptor.
+ SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+ SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+ InFlag);
+ Chain = LoadEnvPtr.getValue(1);
+ InFlag = LoadEnvPtr.getValue(2);
+
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+
+ // Load TOC of the callee into r2. We are using a target-specific load
+ // with r2 hard coded, because the result of a target-independent load
+ // would never go directly into r2, since r2 is a reserved register (which
+ // prevents the register allocator from allocating it), resulting in an
+ // additional register being allocated and an unnecessary move instruction
+ // being generated.
+ VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+ Callee, InFlag);
+ Chain = LoadTOCPtr.getValue(0);
+ InFlag = LoadTOCPtr.getValue(1);
+
+ MTCTROps[0] = Chain;
+ MTCTROps[1] = LoadFuncPtr;
+ MTCTROps[2] = InFlag;
+ }
+
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+ 2 + (InFlag.getNode() != 0));
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ Ops.push_back(Chain);
+ CallOpc = PPCISD::BCTRL;
+ Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ return CallOpc;
+}
+
+static
+bool isLocalCall(const SDValue &Callee)
+{
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return !G->getGlobal()->isDeclaration() &&
+ !G->getGlobal()->isWeakForLinker();
+ return false;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+ bool isTailCall, bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const {
+ std::vector<EVT> NodeTys;
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+ isTailCall, RegsToPass, Ops, NodeTys,
+ PPCSubTarget);
+
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(((Callee.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(Callee)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+
+ bool needsTOCRestore = false;
+ if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ if (CallOpc == PPCISD::BCTRL) {
+ // This is a call through a function pointer.
+ // Restore the caller TOC from the save area into R2.
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ // We are using a target-specific load with r2 hard coded, because the
+ // result of a target-independent load would never go directly into r2,
+ // since r2 is a reserved register (which prevents the register allocator
+ // from allocating it), resulting in an additional register being
+ // allocated and an unnecessary move instruction being generated.
+ needsTOCRestore = true;
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ // Otherwise insert NOP for non-local calls.
+ CallOpc = PPCISD::CALL_NOP;
+ }
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ if (needsTOCRestore) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ }
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
+ // of the 32-bit SVR4 ABI stack frame layout.
+
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+ unsigned PtrByteSize = 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area and the part of the local variable space which
+ // contains copies of aggregates which are passed by value.
+
+ // Assign locations to all of the outgoing arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ if (isVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Fixed vector arguments go into registers as long as registers are
+ // available. Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool Result;
+
+ if (Outs[i].IsFixed) {
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
+ } else {
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
+ }
+
+ if (Result) {
+#ifndef NDEBUG
+ errs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+ } else {
+ // All arguments are treated the same.
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
+ }
+
+ // Assign locations to all of the outgoing aggregate by value arguments.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
+
+ // Size of the linkage area, parameter list area and the part of the local
+ // space variable where copies of aggregates which are passed by value are
+ // stored.
+ unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be moved somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ bool seenFloatArg = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart,
+ ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ return NewCallSeqStart;
+}
+
+SDValue
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned PtrByteSize = 8;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with at least 48 bytes, which
+ // is reserved space for [SP][CR][LR][3 x unused].
+ // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
+ // of this call.
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
+ Outs, OutVals, nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const uint16_t GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // Promote integers to 64-bit values.
+ if (Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ // Note: Size includes alignment padding, so
+ // struct x { short a; char b; }
+ // will have Size = 4. With #pragma pack(1), it will have Size = 3.
+ // These are the proper values we need for right-justifying the
+ // aggregate in a parameter register.
+ unsigned Size = Flags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no
+ // registers.
+ if (Size == 0)
+ continue;
+
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (Size==1 || Size==2 || Size==4) {
+ EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ }
+
+ if (GPR_idx == NumGPRs && Size < 8) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+
+ // FIXME: The above statement is likely due to a misunderstanding of the
+ // documents. All arguments must be copied into the parameter area BY
+ // THE CALLEE in the event that the callee takes the address of any
+ // formal argument. That has not yet been implemented. However, it is
+ // reasonable to use the stack area as a staging area for the register
+ // load.
+
+ // Skip this for small aggregates, as we will use the same slot for a
+ // right-justified copy, below.
+ if (Size >= 8)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // When a register is available, pass a small aggregate right-justified.
+ if (Size < 8 && GPR_idx != NumGPRs) {
+ // The easiest way to get this right-justified in a register
+ // is to copy the structure into the rightmost portion of a
+ // local variable slot, then load the whole slot into the
+ // register.
+ // FIXME: The memcpy seems to produce pretty awful code for
+ // small aggregates, particularly for packed ones.
+ // FIXME: It would be preferable to use the slot in the
+ // parameter save area instead of a new local variable.
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // Load the slot into the register.
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ // Done with this argument.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // For aggregates larger than PtrByteSize, copy the pieces of the
+ // object that fit into registers from the parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ // A single float or an aggregate containing only a single float
+ // must be passed right-justified in the stack doubleword, and
+ // in the GPR, if one is available.
+ SDValue StoreOff;
+ if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ } else
+ StoreOff = PtrOff;
+
+ SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else if (GPR_idx != NumGPRs)
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ ++GPR_idx;
+ } else {
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ SDValue PtrOff = DAG.getIntPtrConstant(40);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ false, false, 0);
+ // R12 must contain the address of an indirect callee. This does not
+ // mean the MTCTR instruction must use R12; it's easier to model this
+ // as an extra parameter, so do that.
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+ Outs, OutVals,
+ nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const uint16_t GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const uint16_t GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ // Very small objects are passed right-justified. Everything else is
+ // passed left-justified.
+ if (Size==1 || Size==2) {
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[i];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+ isDarwinABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+ EVT ResVT = Op.getValueType();
+ EVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+ (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !PPCSubTarget.hasFPCVT() &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled INT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, 2, MVT::i32, MMO);
+ } else {
+ assert(PPCSubTarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue MFFSreg, InFlag;
+
+ // Save FP Control Word to register
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, MachinePointerInfo(), false, false,0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const EVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ EVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ DebugLoc dl, EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ EVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ ArrayRef<int> PermMask = SVOp->getMask();
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ EVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ DebugLoc dl = Op.getDebugLoc();
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
+ false, false, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ llvm_unreachable("Unknown mul to lower!");
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, PPCSubTarget);
+
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
+ Op.getDebugLoc());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ const TargetMachine &TM = getTargetMachine();
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::VAARG: {
+ if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
+ return;
+ }
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = PPCSubTarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
+ return BB;
+}
+
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
+ PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ }
+
+ BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
+ .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
+ .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = PPCSubTarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::FRINDrint ||
+ MI->getOpcode() == PPC::FRINSrint) {
+ bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ // Perform the rounding.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+ .addReg(Src);
+
+ // Compare the results.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+ .addReg(Dest).addReg(Src);
+
+ // If the results were not equal, then set the FPSCR XX bit.
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+ BB->addSuccessor(midMBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+
+ // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+ // the FI bit here because that will not automatically set XX also,
+ // and XX is what libm interprets as the FE_INEXACT flag.
+ BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+ BB->addSuccessor(exitMBB);
+
+ BB = exitMBB;
+ } else {
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPThreeHalves =
+ DAG.getConstantFP(1.5, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPThreeHalves, FPThreeHalves,
+ FPThreeHalves, FPThreeHalves);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ const TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue() || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV);
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV,
+ N->getOperand(1).getOperand(1));
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+
+ SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV, DCI);
+ if (RV.getNode() != 0)
+ return RV;
+ }
+
+ }
+ break;
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (cast<StoreSDNode>(N)->isUnindexed() &&
+ N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+ Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ case 'Z':
+ // FIXME: While Z does indicate a memory constraint, it specifically
+ // indicates an r+r address (used in conjunction with the 'y' modifier
+ // in the replacement string). Currently, we're forcing the base
+ // register to be r0 in the asm printer (which is interpreted as zero)
+ // and forming the complete address in the second register. This is
+ // suboptimal.
+ return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ case 'Z':
+ weight = CW_Memory;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
+ case 'f':
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, &PPC::VRRCRegClass);
+ case 'y': // crrc
+ return std::make_pair(0U, &PPC::CRRCRegClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0,0);
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: llvm_unreachable("Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
+ return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ // Make sure the function does not optimize away the store of the RA to
+ // the stack.
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setLRStoreRequired();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+
+ DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+ isPPC64? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+ PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo(), false, false,
+ false, 0);
+ return FrameAddr;
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (this->PPCSubTarget.isPPC64()) {
+ return MVT::i64;
+ } else {
+ return MVT::i32;
+ }
+}
+
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+ if (DisableILPPref)
+ return TargetLowering::getSchedulingPreference(N);
+
+ return Sched::ILP;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 000000000000..7157b70d8622
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,632 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ TOC_ENTRY,
+
+ /// The following three target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// Restore the TOC from the TOC save area of the current stack frame.
+ /// This is basically a hard coded load instruction which additionally
+ /// takes/produces a flag.
+ TOC_RESTORE,
+
+ /// Like a regular LOAD but additionally taking/producing a flag.
+ LOAD,
+
+ /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
+ /// a hard coded load instruction.
+ LOAD_TOC,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// CALL - A direct function call.
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// SVR4 calls.
+ CALL, CALL_NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+ /// instructions. This copies the bits corresponding to the specified
+ /// CRREG into the resultant GPR. Bits corresponding to other CR regs
+ /// are undefined.
+ MFCR,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
+ FADDRTZ,
+
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym@got@tprel@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym@got@tprel@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsgd@ha.
+ ADDIS_TLSGD_HA,
+
+ /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsgd@l.
+ ADDI_TLSGD_L,
+
+ /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsgd).
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsld@ha.
+ ADDIS_TLSLD_HA,
+
+ /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsld@l.
+ ADDI_TLSLD_L,
+
+ /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsld).
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+ /// local-dynamic TLS model, produces an ADDIS8 instruction
+ /// that adds X3 to sym@dtprel@ha. The Chain operand is needed
+ /// to tie this in place following a copy to %X3 from the result
+ /// of a GET_TLSLD_ADDR.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@dtprel@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym@toc@ha.
+ ADDIS_TOC_HA,
+
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ LD_TOC_L,
+
+ /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+ /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+ /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ ADDI_TOC_L
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
+
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(Type *Ty) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const;
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+ bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue
+ extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
+ SDValue ArgVal, DebugLoc dl) const;
+
+ void
+ setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+ unsigned nAltivecParamsAtEnd,
+ unsigned MinReservedArea, bool isPPC64) const;
+
+ SDValue
+ LowerFormalArguments_Darwin(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_64SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_32SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG, DebugLoc dl) const;
+
+ SDValue
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 000000000000..fa5b65f0ba2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,968 @@
+//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i64imm:$imm);
+}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+}
+def tlsgd : Operand<i64> {}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ }
+}
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
+
+ def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", BrB, []>;
+
+ def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
+ def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins aaddr:$func),
+ "bla $func\n\tnop", BrB,
+ [(PPCcall_nop (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+
+
+// Calls
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx i64:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+let isCodeGenOnly = 1 in {
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+}
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+// 64-bit CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let isCodeGenOnly = 1 in
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+ "#MFCR8pseud", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Pattern = [(set i64:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+ "mfspr $rT, 268", SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+ "li $rD, $imm", IntSimple,
+ [(set i64:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+ "lis $rD, $imm", IntSimple,
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
+}
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntSimple,
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntSimple,
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntSimple,
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntSimple,
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntSimple,
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
+def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB@tls", IntSimple,
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+
+let Defs = [CARRY] in {
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
+}
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntSimple,
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "neg $rT, $rA", IntSimple,
+ [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
+def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
+}
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+}
+
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsb $rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsh $rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsw $rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntSimple,
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+let Defs = [CARRY] in {
+def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateDI,
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
+ "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
+ []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+ (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+
+def ISEL8 : AForm_4<31, 15,
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStLoad,
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "#LDtoc",
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "#LDtocJTI",
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
+let RST = 2, DS = 2 in
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
+ "ld 2, 8($reg)", LdStLD,
+ [(PPCload_toc i64:$reg)]>, isPPC64;
+
+let RST = 2, DS = 10, RA = 1 in
+def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
+ "ld 2, 40(1)", LdStLD,
+ [(PPCtoc_restore)]>, isPPC64;
+}
+def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
+let mayLoad = 1 in
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", LdStLDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", LdStLDU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+
+def : Pat<(PPCload ixaddr:$src),
+ (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+ (LDX xaddr:$src)>;
+
+// Support for medium and large code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDIStocHA",
+ [(set i64:$rD,
+ (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
+ isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+ "#LDtocL",
+ [(set i64:$rD,
+ (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDItocL",
+ [(set i64:$rD,
+ (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISgotTprelHA",
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+ "#LDgotTprelL",
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+ isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsgdHA",
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsgdL",
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsldHA",
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsldL",
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISdtprelHA",
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDIdtprelL",
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+
+let PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStStore,
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStStore,
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStStore,
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStStore,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStStore,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStStore,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfidu $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfids $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfidus $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiduz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwuz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
+ 0, 32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
+
+// SHL/SRL
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 000000000000..a5ba4c8aebef
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,828 @@
+//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
+
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", LdStLoad /*FIXME*/, []>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", LdStLoad /*FIXME*/, []>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+ "mfvscr $vD", LdStStore,
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+ "mtvscr $vB", LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", LdStLoad,
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+ "lvx $vD, $src", LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStStore,
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStStore,
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStStore,
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStStore,
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStStore,
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
+
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set v16i8:$vD,
+ (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
+
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
+
+
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+
+// Defines with the UIM field set to 0 for floating-point
+// to integer (fp_to_sint/fp_to_uint) conversions and integer
+// to floating-point (sint_to_fp/uint_to_fp) conversions.
+let VA = 0 in {
+def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vcfsx $vD, $vB, 0", VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vctuxs $vD, $vB, 0", VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vcfux $vD, $vB, 0", VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+ "vctsxs $vD, $vB, 0", VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
+}
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
+
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+
+def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
+
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+ v4i32:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
+
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
+
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set v16i8:$vD,
+ (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set v16i8:$vD,
+ (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set v16i8:$vD,
+ (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD,
+ (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set v16i8:$vD,
+ (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+let isCodeGenOnly = 1 in
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+ "vspltisw $vD, -1", VecFP,
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
+}
+} // VALU Operations.
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, $rA, $rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
+
+// Logical Operations
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
+
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+ (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+ (VANDC $A, $B)>;
+
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
+ (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+ (VPERM $vA, $vB, $vC)>;
+
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
+
+// Float to integer and integer to float conversions
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 000000000000..b424d1101416
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 000000000000..400b7e367bfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,1003 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : Instruction {
+ field bits<64> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode1;
+ let Inst{32-37} = opcode2;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr>
+ : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+ let BIBO{4-0} = bo;
+ let BIBO{6-5} = 0;
+ let CR = 0;
+}
+
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = A;
+ let Inst{43-47} = Addr{20-16}; // Base Reg
+ let Inst{48-63} = Addr{15-0}; // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{30-31} = xo;
+}
+
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> rS;
+
+ let Inst{6-10} = rS;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> rT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = rT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<5> COND;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = COND;
+ let Inst{26-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 000000000000..69c54ed084be
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,731 @@
+//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+ cl::desc("Disable analysis for CTR loops"));
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new PPCScoreboardHazardRecognizer(II, DAG);
+ }
+
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ assert(TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*TII);
+ }
+
+ return new PPCScoreboardHazardRecognizer(II, DAG);
+}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::LVX:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::STVX:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+// Note: If the condition register is set to CTR or CTR8 then this is a
+// BDNZ (imm == 1) or BDZ (imm == 0) branch.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
+ LastInst->getOpcode() == PPC::BDNZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDZ8 ||
+ LastInst->getOpcode() == PPC::BDZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ }
+
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDNZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const MCInstrDesc &MCID = get(Opc);
+ if (MCID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, MCID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+// This function returns true if a CR spill is necessary and false otherwise.
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
+ DebugLoc DL;
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+ // backend currently only uses CR1EQ as an individual bit, this should
+ // not cause any bug. If we need other uses of CR bits, the following
+ // code may be invalid.
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
+
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+bool
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+
+ unsigned Reg = 0;
+ if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+ DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+ DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+ DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+ DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+ DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+ DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+ DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+ DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
+
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+ addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
+ Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
+ else
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ case PPC::PROLOG_LABEL:
+ case PPC::EH_LABEL:
+ case PPC::GC_LABEL:
+ case PPC::DBG_VALUE:
+ return 0;
+ case PPC::BL8_NOP:
+ case PPC::BLA8_NOP:
+ return 8;
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 000000000000..635e3480b06d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,157 @@
+//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRUCTIONINFO_H
+#define POWERPC_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public PPCGenInstrInfo {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+public:
+ explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 000000000000..ab907622beeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1717 @@
+//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+ [SDNPHasChain]>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
+def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support medium and large code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+}
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+}
+// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+}
+
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
+ let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
+def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+ GPRC_NOR0:$T, GPRC_NOR0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I4",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+ G8RC_NOX0:$T, G8RC_NOX0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I8",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "#SELECT_CC_F4",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "#SELECT_CC_F8",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "#SELECT_CC_VRRC",
+ []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+ "#SPILL_CR", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
+ "#RESTORE_CR", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ let isCodeGenOnly = 1 in
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+
+ let Defs = [CTR], Uses = [CTR] in {
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ }
+}
+
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
+ }
+}
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
+ }
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isCodeGenOnly = 1 in {
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+ (DCBT xoaddr:$dst)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx i32:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStLoad,
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStLoad,
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStLoad,
+ [(set i32:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+ "lfs $rD, $src", LdStLFD,
+ [(set f32:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set f64:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfsu $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfdu $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoadUpd,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStLoad,
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStLoad,
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStLoad,
+ [(set i32:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", LdStLFD,
+ [(set f32:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", LdStLFD,
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStStore,
+ [(store i32:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStSTFD,
+ [(store f32:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStSTFD,
+ [(store f64:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+// Indexed (r+r) Stores.
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStStore,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStStore,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStStore,
+ [(store i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStSTFD,
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStSTFD,
+ [(store f32:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStSTFD,
+ [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "sync", LdStSync,
+ [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+}
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntSimple,
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set i32:$rD, (add i32:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ "li $rD, $imm", IntSimple,
+ [(set i32:$rD, immSExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ "lis $rD, $imm", IntSimple,
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntSimple,
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntSimple,
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntSimple,
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntSimple,
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
+ []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+let Defs = [CARRY] in {
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Defs = [CARRY] in {
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+}
+def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctlz i32:$rS))]>;
+def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsb $rA, $rS", IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsh $rA, $rS", IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+ def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
+ def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ // The frin -> nearbyint mapping is valid only in fast-math mode.
+ def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+ // These pseudos expand to rint but also set FE_INEXACT when the result does
+ // not equal the argument.
+ let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+ def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+ "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+ def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+ "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ }
+
+ def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
+ def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
+ def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
+ }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set f32:$frD, f32:$frB)
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fabs f32:$frB))]>;
+def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fabs f64:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fneg f32:$frB))]>;
+def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fre $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fres $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frsqrte $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frsqrtes $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+}
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+let isCodeGenOnly = 1 in {
+def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+ "crxor $dst, $dst, $dst", BrCR,
+ []>;
+
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", BrCR,
+ [(PPCcr6unset)]>;
+}
+}
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
+// vreg = MCRF CR0
+// MFCR <kill of whatever preg got assigned to vreg>
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it. Represent this in the
+// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "#MFCRpseud", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+ "mtfsf $FM, $rT", IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ "mffs $rT", IntMFFS,
+ [(set f64:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntSimple,
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+let Defs = [CARRY] in {
+def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_Cracked;
+}
+def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+let Defs = [CARRY] in {
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "neg $rT, $rA", IntSimple,
+ [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
+def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
+}
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+let Uses = [RM] in {
+ def FMADD : AForm_1<63, 29,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ def FMADDS : AForm_1<59, 29,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ def FMSUB : AForm_1<63, 28,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ def FMSUBS : AForm_1<59, 28,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ def FNMADD : AForm_1<63, 31,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ def FNMADDS : AForm_1<59, 31,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ def FNMSUB : AForm_1<63, 30,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
+ def FNMSUBS : AForm_1<59, 30,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Uses = [RM] in {
+ def FADD : AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ def FADDS : AForm_2<59, 21,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ def FDIV : AForm_2<63, 18,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ def FDIVS : AForm_2<59, 18,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ def FMUL : AForm_3<63, 25,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
+ "fmul $FRT, $FRA, $FRC", FPFused,
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ def FMULS : AForm_3<59, 25,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
+ "fmuls $FRT, $FRA, $FRC", FPGeneral,
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ def FSUB : AForm_2<63, 20,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPAddSub,
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ def FSUBS : AForm_2<59, 20,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ }
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+ def ISEL : AForm_4<31, 15,
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not i32:$in),
+ (NOR $in, $in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm /*ll*/),
+ (i32 imm /*ls*/),
+ (i32 imm /*sl*/),
+ (i32 imm /*ss*/),
+ (i32 imm /*device*/)),
+ (SYNC)>;
+
+def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 000000000000..cfcd7490ed0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,471 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC32 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _LLVMPPCCompilationCallback\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl LLVMPPCCompilationCallback\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+#ifdef __ELF__
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC64CompilationCallback\n"
+ ".section \".opd\",\"aw\",@progbits\n"
+ ".align 3\n"
+"PPC64CompilationCallback:\n"
+ ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+ ".size PPC64CompilationCallback,24\n"
+ ".previous\n"
+ ".align 4\n"
+ ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+#endif
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr 0\n"
+ "std 0, 16(1)\n"
+ "stdu 1, -280(1)\n"
+ // Save all int arg registers
+ "std 10, 272(1)\n" "std 9, 264(1)\n"
+ "std 8, 256(1)\n" "std 7, 248(1)\n"
+ "std 6, 240(1)\n" "std 5, 232(1)\n"
+ "std 4, 224(1)\n" "std 3, 216(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
+ "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
+ "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
+ "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
+ "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
+ "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
+ "stfd 1, 112(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr 3, 0\n" // return address (still in r0)
+ "ld 5, 280(1)\n" // stub's frame
+ "ld 4, 16(5)\n" // stub's lr
+ "li 5, 1\n" // 1 == 64 bit
+#ifdef __ELF__
+ "bl LLVMPPCCompilationCallback\n"
+ "nop\n"
+#else
+ "bl _LLVMPPCCompilationCallback\n"
+#endif
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "ld 10, 272(1)\n" "ld 9, 264(1)\n"
+ "ld 8, 256(1)\n" "ld 7, 248(1)\n"
+ "ld 6, 240(1)\n" "ld 5, 232(1)\n"
+ "ld 4, 224(1)\n" "ld 3, 216(1)\n"
+ // Restore all FP arg registers
+ "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
+ "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
+ "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
+ "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
+ "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
+ "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
+ "lfd 1, 112(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld 1, 280(1)\n"
+ "ld 0, 16(1)\n"
+ "mtlr 0\n"
+ // XXX: any special TOC handling in the ELF case for JIT?
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+extern "C" {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+ // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+ // instructions to save the caller's address if this is a lazy-compilation
+ // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+ // into a register and jump through it.
+ StubLayout Result = {10*4, 4};
+ return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+ return Addr;
+ }
+
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+ return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: llvm_unreachable("Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 000000000000..46d4a08eb687
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,49 @@
+//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual StubLayout getStubLayout();
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 000000000000..9b0df3e86a75
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,194 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ MCContext &Ctx = AP.OutContext;
+
+ SmallString<128> Name;
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Name += AP.MAI->getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+ (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+ isImplicitlyPrivate = true;
+
+ AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer, bool isDarwin) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+
+ switch (access) {
+ case PPCII::MO_HA16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_HA16 :
+ MCSymbolRefExpr::VK_PPC_GAS_HA16;
+ break;
+ case PPCII::MO_LO16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_LO16 :
+ MCSymbolRefExpr::VK_PPC_GAS_LO16;
+ break;
+ case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
+ break;
+ case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
+ break;
+ case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+ break;
+ case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+ break;
+ case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+ break;
+ }
+
+ // FIXME: This isn't right, but we don't have a good way to express this in
+ // the MC Level, see below.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+ RefKind = MCSymbolRefExpr::VK_None;
+
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+ // since it is not a symbol!
+ }
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+ isDarwin);
+ break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..6a0aec842be7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -0,0 +1,15 @@
+//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PPCFunctionInfo::anchor() { }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 000000000000..ee18eadf6e5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,162 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// VarArgsStackOffset - StackOffset for start of stack
+ /// arguments.
+ int VarArgsStackOffset;
+ /// VarArgsNumGPR - Index of the first unused integer
+ /// register for parameter passing.
+ unsigned VarArgsNumGPR;
+ /// VarArgsNumFPR - Index of the first unused double
+ /// register for parameter passing.
+ unsigned VarArgsNumFPR;
+
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
+public:
+ explicit PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
+ SpillsCR(false),
+ SpillsVRSAVE(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false),
+ VarArgsFrameIndex(0),
+ VarArgsStackOffset(0),
+ VarArgsNumGPR(0),
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+ void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+ void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 000000000000..17b836d1ed97
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 000000000000..1d61a3a8eac2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,618 @@
+//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
+#include "PPC.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
+ ST.isPPC64() ? 0 : 1,
+ ST.isPPC64() ? 0 : 1),
+ Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ return &PPC::GPRCRegClass;
+}
+
+const uint16_t*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+ CSR_Darwin32_SaveList;
+
+ return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
+
+const uint32_t*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+ CSR_Darwin32_RegMask;
+
+ return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
+}
+
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ if (Subtarget.isDarwin())
+ return CSR_NoRegs_Darwin_RegMask;
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ // The SVR4 ABI reserves r2 and r13
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::R2); // System-reserved register
+ Reserved.set(PPC::R13); // Small Data Area pointer register
+ }
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::X2);
+ }
+ }
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ return Reserved;
+}
+
+unsigned
+PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const unsigned DefaultSafety = 1;
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 32 - FP - DefaultSafety;
+ }
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return 32 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+ }
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+bool
+PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+
+ // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
+ // ABI, return true to prevent allocating an additional frame slot.
+ // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
+ if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ if (Subtarget.isPPC64())
+ FrameIdx = 0;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
+ }
+ return true;
+ }
+ return false;
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNum-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II);
+ return;
+ }
+
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+ bool is64Bit = Subtarget.isPPC64();
+ MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
+ (is64Bit ? PPC::X31 : PPC::R31) :
+ (is64Bit ? PPC::X1 : PPC::R1),
+ false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ isIXAddr = true;
+ break;
+ }
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ // Naked functions have stack size 0, although getStackSize may not reflect that
+ // because we didn't call all the pieces that compute it for naked functions.
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
+ Offset += MFI->getStackSize();
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
+ .addReg(SRegHi, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!Subtarget.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 000000000000..7e6683eeb2ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,90 @@
+//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "PPC.h"
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 000000000000..57a25f5143fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,232 @@
+//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ let HWEncoding{9-0} = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// General-purpose registers
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
+
+// 64-bit General-purpose registers
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
+
+// Floating-point registers
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
+// Vector registers
+foreach Index = 0-31 in {
+ def V#Index : VR<Index, "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>;
+
+// Carry bit. In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1); this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, FP)>;
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+ (sequence "X%u", 30, 14),
+ X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+ (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+ V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+ V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+ (add CR0LT, CR0GT, CR0EQ, CR0UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN)>
+{
+ let CopyCost = -1;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)>;
+
+// The CTR registers are not allocatable because they're used by the
+// decrement-and-branch instructions, and thus need to stay live across
+// multiple basic blocks.
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
+ let isAllocatable = 0;
+}
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
+ let isAllocatable = 0;
+}
+
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 000000000000..0b392f99b6d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCRELOCATIONS_H
+#define PPCRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 000000000000..660c0c3b6359
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,519 @@
+//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntSimple : InstrItinClass;
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotateDI : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStLoad : InstrItinClass;
+def LdStLoadUpd : InstrItinClass;
+def LdStStore : InstrItinClass;
+def LdStStoreUpd : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDU : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLHAU : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTDU : InstrItinClass;
+def LdStSTFD : InstrItinClass;
+def LdStSTFDU : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPAddSub : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntSimple
+// addc IntGeneral
+// adde IntGeneral
+// addi IntSimple
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntSimple
+// addme IntGeneral
+// addze IntGeneral
+// and IntSimple
+// andc IntSimple
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStLoad
+// dcbtst LdStLoad
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStLoad
+// ecowx LdStLoad
+// eieio LdStLoad
+// eqv IntSimple
+// extsb IntSimple
+// extsh IntSimple
+// extsw IntSimple
+// fabs FPGeneral
+// fadd FPAddSub
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPAddSub
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStLoad
+// lbzu LdStLoadUpd
+// lbzux LdStLoadUpd
+// lbzx LdStLoad
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLDU
+// ldux LdStLDU
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFD
+// lfs LdStLFD
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFD
+// lha LdStLHA
+// lhau LdStLHAU
+// lhaux LdStLHAU
+// lhax LdStLHA
+// lhbrx LdStLoad
+// lhz LdStLoad
+// lhzu LdStLoadUpd
+// lhzux LdStLoadUpd
+// lhzx LdStLoad
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHAU
+// lwax LdStLHA
+// lwbrx LdStLoad
+// lwz LdStLoad
+// lwzu LdStLoadUpd
+// lwzux LdStLoadUpd
+// lwzx LdStLoad
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntSimple
+// neg IntSimple
+// nor IntSimple
+// or IntSimple
+// orc IntSimple
+// ori IntSimple
+// oris IntSimple
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateDI
+// rldicl IntRotateDI
+// rldicr IntRotateDI
+// rldimi IntRotateDI
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateDI
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStStore
+// stbu LdStStoreUpd
+// stbux LdStStoreUpd
+// stbx LdStStore
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTDU
+// stdux LdStSTDU
+// stdx LdStSTD
+// stfd LdStSTFD
+// stfdu LdStSTFDU
+// stfdux LdStSTFDU
+// stfdx LdStSTFD
+// stfiwx LdStSTFD
+// stfs LdStSTFD
+// stfsu LdStSTFDU
+// stfsux LdStSTFDU
+// stfsx LdStSTFD
+// sth LdStStore
+// sthbrx LdStStore
+// sthu LdStStoreUpd
+// sthux LdStStoreUpd
+// sthx LdStStore
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStStore
+// stwbrx LdStStore
+// stwcx. LdStSTWCX
+// stwu LdStStoreUpd
+// stwux LdStStoreUpd
+// stwx LdStStore
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntSimple
+// xori IntSimple
+// xoris IntSimple
+//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 000000000000..37b6eac10cfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,663 @@
+//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User's Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def IFTH1 : FuncUnit; // Fetch unit 1
+def IFTH2 : FuncUnit; // Fetch unit 2
+def PDCD1 : FuncUnit; // Decode unit 1
+def PDCD2 : FuncUnit; // Decode unit 2
+def DISS1 : FuncUnit; // Issue unit 1
+def DISS2 : FuncUnit; // Issue unit 2
+def LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def IWB : FuncUnit; // Write-back unit for the I pipeline
+def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def JWB : FuncUnit; // Write-back unit for the J pipeline
+def AGEN : FuncUnit; // Address generation for the L pipeline
+def CRD : FuncUnit; // D-cache access for the L pipeline
+def LWB : FuncUnit; // Write-back unit for the L pipeline
+def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
+ IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
+ FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
+ [GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<33, [IWB]>],
+ [40, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<3, [AGEN], 1>,
+ InstrStage<2, [CRD], 1>,
+ InstrStage<1, [LWB]>]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC], 0>,
+ InstrStage<1, [LRACC], 0>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [FEXE1], 0>,
+ InstrStage<1, [AGEN], 0>,
+ InstrStage<1, [JEXE1], 0>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [FEXE2], 0>,
+ InstrStage<1, [CRD], 0>,
+ InstrStage<1, [JEXE2], 0>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<6, [FEXE3], 0>,
+ InstrStage<6, [LWB], 0>,
+ InstrStage<6, [JWB], 0>,
+ InstrStage<6, [IWB]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [9, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<25, [FWB]>],
+ [35, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<13, [FWB]>],
+ [23, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
new file mode 100644
index 000000000000..ae084aa0e8c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -0,0 +1,766 @@
+//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// A2 Processor User's Manual.
+// IBM (as updated in) 2010.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC A2 chip sets
+//
+def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1
+def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2
+def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3
+def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4
+def IU4_0 : FuncUnit; // Instruction buffer slot 1
+def IU4_1 : FuncUnit; // Instruction buffer slot 2
+def IU4_2 : FuncUnit; // Instruction buffer slot 3
+def IU4_3 : FuncUnit; // Instruction buffer slot 4
+def IU4_4 : FuncUnit; // Instruction buffer slot 5
+def IU4_5 : FuncUnit; // Instruction buffer slot 6
+def IU4_6 : FuncUnit; // Instruction buffer slot 7
+def IU4_7 : FuncUnit; // Instruction buffer slot 8
+def IU5 : FuncUnit; // Dependency resolution
+def IU6 : FuncUnit; // Instruction issue
+def RF0 : FuncUnit;
+def XRF1 : FuncUnit;
+def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline
+def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline
+def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline
+def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline
+def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline
+def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline
+def FRF1 : FuncUnit;
+def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline
+def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline
+def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline
+def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline
+def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline
+def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline
+
+def CR_Bypass : Bypass; // The bypass for condition regs.
+//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+//
+// This file defines the itinerary class data for the PPC A2 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPCA2Itineraries : ProcessorItineraries<
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
+ IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
+ IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
+ FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
+ [53, 7, 7],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStore , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
+ InstrItinData<SprISYNC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [GPR_Bypass, NoBypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+ InstrItinData<SprMFCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7],
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [GPR_Bypass, NoBypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [13, 7, 7],
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
+ InstrStage<71, [FEX1], 0>,
+ InstrStage<71, [FEX2], 0>,
+ InstrStage<71, [FEX3], 0>,
+ InstrStage<71, [FEX4], 0>,
+ InstrStage<71, [FEX5], 0>,
+ InstrStage<71, [FEX6]>],
+ [86, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
+ InstrStage<58, [FEX1], 0>,
+ InstrStage<58, [FEX2], 0>,
+ InstrStage<58, [FEX3], 0>,
+ InstrStage<58, [FEX4], 0>,
+ InstrStage<58, [FEX5], 0>,
+ InstrStage<58, [FEX6]>],
+ [73, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPSqrt , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
+ InstrStage<68, [FEX1], 0>,
+ InstrStage<68, [FEX2], 0>,
+ InstrStage<68, [FEX3], 0>,
+ InstrStage<68, [FEX4], 0>,
+ InstrStage<68, [FEX5], 0>,
+ InstrStage<68, [FEX6]>],
+ [86, 7], // FIXME: should be [86, 7] for double
+ // and [82, 7] for single. Likewise,
+ // the FEX? cycle count should be 68
+ // for double and 64 for single.
+ [NoBypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+ let IssueWidth = 1; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 6;
+
+ let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
new file mode 100644
index 000000000000..9bb779a0e62b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -0,0 +1,265 @@
+//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e500mc 32-bit
+// Power processor.
+//
+// All information is derived from the "e500mc Core Reference Manual",
+// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e500mc core:
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def DIS0 : FuncUnit; // Dispatch stage - insn 1
+def DIS1 : FuncUnit; // Dispatch stage - insn 2
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// Some instructions can only execute in SFX0 but not SFX1.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is executed.
+def SFX0 : FuncUnit; // Simple unit 0
+def SFX1 : FuncUnit; // Simple unit 1
+def BU : FuncUnit; // Branch unit
+def CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def CFX_0 : FuncUnit; // CFX pipeline
+def LSU_0 : FuncUnit; // LSU pipeline
+def FPU_0 : FuncUnit; // FPU pipeline
+
+def PPCE500mcItineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<14, [CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11], // Latency = 8
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [5, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [SFX0]>],
+ [8, 1],
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [4, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<68, [FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e500mc machine model for scheduling and other instruction cost heuristics.
+
+def PPCE500mcModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE500mcItineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
new file mode 100644
index 000000000000..d7e11acd9fc7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -0,0 +1,309 @@
+//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e5500 64-bit
+// Power processor.
+//
+// All information is derived from the "e5500 Core Reference Manual",
+// Freescale Document Number e5500RM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e5500 core
+// (These are the same as for the e500mc)
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+// def DIS0 : FuncUnit;
+// def DIS1 : FuncUnit;
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is being executed.
+// def SFX0 : FuncUnit; // Simple unit 0
+// def SFX1 : FuncUnit; // Simple unit 1
+// def BU : FuncUnit; // Branch unit
+// def CFX_DivBypass
+// : FuncUnit; // CFX divide bypass path
+// def CFX_0 : FuncUnit; // CFX pipeline stage 0
+
+def CFX_1 : FuncUnit; // CFX pipeline stage 1
+
+// def LSU_0 : FuncUnit; // LSU pipeline
+// def FPU_0 : FuncUnit; // FPU pipeline
+
+
+def PPCE5500Itineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
+ LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<26, [CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<16, [CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<7, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<31, [FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<16, [FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def PPCE5500Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE5500Itineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 000000000000..72a0a392631a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,71 @@
+//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<
+ [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 000000000000..fc9120dfa290
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,81 @@
+//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 000000000000..a4e82ce23e6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,88 @@
+//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+ [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 000000000000..c64998d52a0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,109 @@
+//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = G5Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..d4258b4a0eb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 000000000000..341b69cdfb5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 000000000000..a8f2b3f47d1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,159 @@
+//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PPCGenSubtargetInfo(TT, CPU, FS)
+ , StackAlignment(16)
+ , DarwinDirective(PPC::DIR_NONE)
+ , HasMFOCRF(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasQPX(false)
+ , HasFSQRT(false)
+ , HasFRE(false)
+ , HasFRES(false)
+ , HasFRSQRTE(false)
+ , HasFRSQRTES(false)
+ , HasRecipPrec(false)
+ , HasSTFIWX(false)
+ , HasLFIWAX(false)
+ , HasFPRND(false)
+ , HasFPCVT(false)
+ , HasISEL(false)
+ , HasPOPCNTD(false)
+ , HasLDBRX(false)
+ , IsBookE(false)
+ , HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
+ , TargetTriple(TT) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+#if (defined(__APPLE__) || defined(__linux__)) && \
+ (defined(__ppc__) || defined(__powerpc__))
+ if (CPUName == "generic")
+ CPUName = sys::getHostCPUName();
+#endif
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Make sure 64-bit features are available when CPUname is generic
+ std::string FullFS = FS;
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set up darwin-specific properties.
+ if (isDarwin())
+ HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
+
+bool PPCSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
+ // but we can't because we can't reassign the cr registers. There is a
+ // dependence between the cr register and the RLWINM instruction used
+ // to extract its value which the anti-dependency breaker can't currently
+ // see. Maybe we should make a late-expanded pseudo to encode this dependency.
+ // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
+
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+
+ CriticalPathRCs.clear();
+
+ if (isPPC64())
+ CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+ else
+ CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ CriticalPathRCs.push_back(&PPC::F8RCRegClass);
+ CriticalPathRCs.push_back(&PPC::VRRCRegClass);
+
+ return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 000000000000..65b4d211fc6a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,200 @@
+//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
+ DIR_PWR6,
+ DIR_PWR6X,
+ DIR_PWR7,
+ DIR_64
+ };
+}
+
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool HasMFOCRF;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasQPX;
+ bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
+ bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
+ bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasLDBRX;
+ bool IsBookE;
+ bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getDataLayoutString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getDataLayoutString() const {
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (isPPC64() && isSVR4ABI()) {
+ if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
+ return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64";
+ else
+ return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
+ }
+
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+ : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
+ bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasLDBRX() const { return HasLDBRX; }
+ bool isBookE() const { return IsBookE; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
+ /// isBGP - True if this is a BG/P platform.
+ bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+
+ bool isDarwinABI() const { return isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwin(); }
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 000000000000..fe851c1b6fb8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,137 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPC.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::
+opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
+ cl::desc("Disable CTR loops for PPC"));
+
+extern "C" void LLVMInitializePowerPCTarget() {
+ // Register the targets
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+}
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64Bit),
+ DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
+ FrameLowering(Subtarget), JITInfo(*this, is64Bit),
+ TLInfo(*this), TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+
+ // The binutils for the BG/P are too old for CFI.
+ if (Subtarget.isBGP())
+ setMCUseCFI(false);
+}
+
+void PPC32TargetMachine::anchor() { }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+}
+
+void PPC64TargetMachine::anchor() { }
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// PPC Code Generator Pass Configuration Options.
+class PPCPassConfig : public TargetPassConfig {
+public:
+ PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PPCTargetMachine &getPPCTargetMachine() const {
+ return getTM<PPCTargetMachine>();
+ }
+
+ virtual bool addPreRegAlloc();
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new PPCPassConfig(this, PM);
+}
+
+bool PPCPassConfig::addPreRegAlloc() {
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoops());
+
+ return false;
+}
+
+bool PPCPassConfig::addInstSelector() {
+ // Install an instruction selector.
+ addPass(createPPCISelDag(getPPCTargetMachine()));
+ return false;
+}
+
+bool PPCPassConfig::addPreEmitPass() {
+ // Must run branch selection immediately preceding the asm printer.
+ addPass(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 000000000000..606ccb314126
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,100 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameLowering.h"
+#include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameLowering FrameLowering;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+
+public:
+ PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const PPCTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE);
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2504ba70c25a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return PSK_FastHardware;
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 12;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes)
+ Cost *= (SrcBytes/Alignment);
+
+ return Cost;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 000000000000..fa44331b8af6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() {
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+ X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+ RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+ Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
new file mode 100644
index 000000000000..0b01433cc926
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -0,0 +1,51 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+
+// SI Passes
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
+
+// Passes common to R600 and SI
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+namespace ShaderType {
+ enum Type {
+ PIXEL = 0,
+ VERTEX = 1,
+ GEOMETRY = 2,
+ COMPUTE = 3
+ };
+}
+
+#endif // AMDGPU_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.td b/contrib/llvm/lib/Target/R600/AMDGPU.td
new file mode 100644
index 000000000000..1a26c77d6bb2
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.td
@@ -0,0 +1,41 @@
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+
+
+def AMDGPUInstrInfo : InstrInfo {
+ let guessInstructionProperties = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDGPUAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def AMDGPU : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyWriters = [AMDGPUAsmWriter];
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
new file mode 100644
index 000000000000..f6001445f4b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,145 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code. When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+
+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ return new AMDGPUAsmPrinter(tm, Streamer);
+}
+
+extern "C" void LLVMInitializeR600AsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+}
+
+/// We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+ }
+ SetupMachineFunction(MF);
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText("@" + MF.getName() + ":");
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ EmitProgramInfo(MF);
+ }
+ EmitFunctionBody();
+ return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+ unsigned MaxSGPR = 0;
+ unsigned MaxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDGPU::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ switch (reg) {
+ default: break;
+ case AMDGPU::EXEC:
+ case AMDGPU::M0:
+ continue;
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 8;
+ } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 16;
+ } else {
+ assert(!"Unknown register class");
+ }
+ hwReg = RI->getEncodingValue(reg) & 0xff;
+ maxUsed = hwReg + width - 1;
+ if (isSGPR) {
+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+ } else {
+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ MaxSGPR += 2;
+ }
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+ OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
new file mode 100644
index 000000000000..3812282b1798
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -0,0 +1,44 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_ASMPRINTER_H
+#define AMDGPU_ASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+
+public:
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU Assembly Printer";
+ }
+
+ /// \brief Emit register usage information so that the GPU driver
+ /// can correctly setup the GPU state.
+ void EmitProgramInfo(MachineFunction &MF);
+
+ /// Implemented in AMDGPUMCInstLower.cpp
+ virtual void EmitInstruction(const MachineInstr *MI);
+};
+
+} // End anonymous llvm
+
+#endif //AMDGPU_ASMPRINTER_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
new file mode 100644
index 000000000000..45ae37ef0c7f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
@@ -0,0 +1,42 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+ CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+ ]>>>,
+
+ CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+ >>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ ]>>>
+
+]>;
+
+def CC_AMDGPU : CallingConv<[
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
+]>;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUConvertToISA.cpp b/contrib/llvm/lib/Target/R600/AMDGPUConvertToISA.cpp
new file mode 100644
index 000000000000..50297d1f60c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUConvertToISA.cpp
@@ -0,0 +1,62 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers AMDIL machine instructions to the appropriate
+/// hardware instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
new file mode 100644
index 000000000000..815d6f71c3be
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -0,0 +1,122 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+ // XXX: Hardcoding to 1 for now.
+ //
+ // I think the StackWidth should stored as metadata associated with the
+ // MachineFunction. This metadata can either be added by a frontend, or
+ // calculated by a R600 specific LLVM IR pass.
+ //
+ // The StackWidth determines how stack objects are laid out in memory.
+ // For a vector stack variable, like: int4 stack[2], the data will be stored
+ // in the following ways depending on the StackWidth.
+ //
+ // StackWidth = 1:
+ //
+ // T0.X = stack[0].x
+ // T1.X = stack[0].y
+ // T2.X = stack[0].z
+ // T3.X = stack[0].w
+ // T4.X = stack[1].x
+ // T5.X = stack[1].y
+ // T6.X = stack[1].z
+ // T7.X = stack[1].w
+ //
+ // StackWidth = 2:
+ //
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T1.X = stack[0].z
+ // T1.Y = stack[0].w
+ // T2.X = stack[1].x
+ // T2.Y = stack[1].y
+ // T3.X = stack[1].z
+ // T3.Y = stack[1].w
+ //
+ // StackWidth = 4:
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T0.Z = stack[0].z
+ // T0.W = stack[0].w
+ // T1.X = stack[1].x
+ // T1.Y = stack[1].y
+ // T1.Z = stack[1].z
+ // T1.W = stack[1].w
+ return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Offset = 0;
+ int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+ for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+ const AllocaInst *Alloca = MFI->getObjectAllocation(i);
+ unsigned ArrayElements;
+ const Type *AllocaType = Alloca->getAllocatedType();
+ const Type *ElementType;
+
+ if (AllocaType->isArrayTy()) {
+ ArrayElements = AllocaType->getArrayNumElements();
+ ElementType = AllocaType->getArrayElementType();
+ } else {
+ ArrayElements = 1;
+ ElementType = AllocaType;
+ }
+
+ unsigned VectorElements;
+ if (ElementType->isVectorTy()) {
+ VectorElements = ElementType->getVectorNumElements();
+ } else {
+ VectorElements = 1;
+ }
+
+ Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
+ }
+ return Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
new file mode 100644
index 000000000000..cf5742ee0952
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
@@ -0,0 +1,44 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILFRAME_LOWERING_H
+#define AMDILFRAME_LOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1);
+ virtual ~AMDGPUFrameLowering();
+
+ /// \returns The number of 32-bit sub-registers that are used when storing
+ /// values to the stack.
+ virtual unsigned getStackWidth(const MachineFunction &MF) const;
+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // AMDILFRAME_LOWERING_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
new file mode 100644
index 000000000000..a266df535d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -0,0 +1,414 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This is the parent TargetLowering class for hardware code gen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+#include "AMDGPUGenCallingConv.inc"
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+ // Initialize target lowering borrowed from AMDIL
+ InitAMDILLowering();
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Library functions. These default to Expand, but we have instructions
+ // for them.
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
+ setOperationAction(ISD::FPOW, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ // Lower floating point store/load to integer store/load to reduce the number
+ // of patterns in tablegen.
+ setOperationAction(ISD::STORE, MVT::f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::LOAD, MVT::f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+ State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+ const {
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ // AMDIL DAG lowering
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ // AMDGPU DAG lowering
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ }
+ return Op;
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ switch (IntrinsicID) {
+ default: return Op;
+ case AMDGPUIntrinsic::AMDIL_abs:
+ return LowerIntrinsicIABS(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_exp:
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_lrp:
+ return LowerIntrinsicLRP(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_fraction:
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_max:
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_min:
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_round_nearest:
+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ Op.getOperand(1));
+
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ Op.getOperand(1));
+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+ Op.getOperand(3));
+ return DAG.getNode(ISD::FADD, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+ OneSubAC);
+}
+
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+
+ if (VT != MVT::f32 ||
+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ return SDValue();
+ }
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2:
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2:
+ case ISD::SETUO:
+ case ISD::SETO:
+ assert(0 && "Operation should already be optimised !");
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ }
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETUGT:
+ case ISD::SETOGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ }
+ case ISD::SETCC_INVALID:
+ assert(0 && "Invalid setcc condcode !");
+ }
+ return Op;
+}
+
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue Num = Op.getOperand(0);
+ SDValue Den = Op.getOperand(1);
+
+ SmallVector<SDValue, 8> Results;
+
+ // RCP = URECIP(Den) = 2^32 / Den + e
+ // e is rounding error.
+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+ // RCP_LO = umulo(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+ // RCP_HI = mulhu (RCP, Den) */
+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+ // NEG_RCP_LO = -RCP_LO
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ RCP_LO);
+
+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ NEG_RCP_LO, RCP_LO,
+ ISD::SETEQ);
+ // Calculate the rounding error from the URECIP instruction
+ // E = mulhu(ABS_RCP_LO, RCP)
+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+ // RCP_A_E = RCP + E
+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+ // RCP_S_E = RCP - E
+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ RCP_A_E, RCP_S_E,
+ ISD::SETEQ);
+ // Quotient = mulhu(Tmp0, Num)
+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+ // Num_S_Remainder = Quotient * Den
+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+ // Remainder = Num - Num_S_Remainder
+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+ DAG.getConstant(0, VT),
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+ Remainder_GE_Zero);
+
+ // Calculate Division result:
+
+ // Quotient_A_One = Quotient + 1
+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Quotient_S_One = Quotient - 1
+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Quotient, Quotient_A_One, ISD::SETEQ);
+
+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Quotient_S_One, Div, ISD::SETEQ);
+
+ // Calculate Rem result:
+
+ // Remainder_S_Den = Remainder - Den
+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+ // Remainder_A_Den = Remainder + Den
+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Remainder, Remainder_S_Den, ISD::SETEQ);
+
+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Remainder_A_Den, Rem, ISD::SETEQ);
+ SDValue Ops[2];
+ Ops[0] = Div;
+ Ops[1] = Rem;
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->isExactlyValue(1.0);
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isAllOnesValue();
+ }
+ return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->getValueAPF().isZero();
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isNullValue();
+ }
+ return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned VirtualRegister;
+ if (!MRI.isLiveIn(Reg)) {
+ VirtualRegister = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(Reg, VirtualRegister);
+ } else {
+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
+ }
+ return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ // AMDIL DAG nodes
+ NODE_NAME_CASE(CALL);
+ NODE_NAME_CASE(UMUL);
+ NODE_NAME_CASE(DIV_INF);
+ NODE_NAME_CASE(RET_FLAG);
+ NODE_NAME_CASE(BRANCH_COND);
+
+ // AMDGPU DAG nodes
+ NODE_NAME_CASE(DWORDADDR)
+ NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(SMAX)
+ NODE_NAME_CASE(UMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(SMIN)
+ NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ NODE_NAME_CASE(EXPORT)
+ NODE_NAME_CASE(CONST_ADDRESS)
+ NODE_NAME_CASE(REGISTER_LOAD)
+ NODE_NAME_CASE(REGISTER_STORE)
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
new file mode 100644
index 000000000000..f31b6466bd46
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -0,0 +1,140 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition of the TargetLowering class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering {
+private:
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+ /// MachineFunction.
+ ///
+ /// \returns a RegisterSDNode representing Reg.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
+
+ bool isHWTrueValue(SDValue Op) const;
+ bool isHWFalseValue(SDValue Op) const;
+
+ void AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
+ virtual SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ CLI.Callee.dump();
+ llvm_unreachable("Undefined function");
+ }
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const {
+ return N;
+ }
+
+// Functions defined in AMDILISelLowering.cpp
+public:
+
+ /// \brief Determine which of the bits specified in \p Mask are known to be
+ /// either zero or one and return them in the \p KnownZero and \p KnownOne
+ /// bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+
+ /// We want to mark f32/f64 floating point values as legal.
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// We don't want to shrink f64/f32 constants.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+private:
+ void InitAMDILLowering();
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+};
+
+namespace AMDGPUISD {
+
+enum {
+ // AMDIL ISD Opcodes
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CALL, // Function call based on a single integer
+ UMUL, // 32bit unsigned multiplication
+ DIV_INF, // Divide with infinity returned on zero divisor
+ RET_FLAG,
+ BRANCH_COND,
+ // End AMDIL ISD Opcodes
+ BITALIGN,
+ DWORDADDR,
+ FRACT,
+ FMAX,
+ SMAX,
+ UMAX,
+ FMIN,
+ SMIN,
+ UMIN,
+ URECIP,
+ EXPORT,
+ CONST_ADDRESS,
+ REGISTER_LOAD,
+ REGISTER_STORE,
+ LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/contrib/llvm/lib/Target/R600/AMDGPUIndirectAddressing.cpp
new file mode 100644
index 000000000000..ed6c8ec55dd2
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -0,0 +1,343 @@
+//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Instructions can use indirect addressing to index the register file as if it
+/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
+/// to either a COPY or a MOV that uses indirect addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const AMDGPUInstrInfo *TII;
+
+ bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
+
+public:
+ AMDGPUIndirectAddressingPass(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Handle indirect addressing"; }
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUIndirectAddressingPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
+ return new AMDGPUIndirectAddressingPass(tm);
+}
+
+bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ int IndirectBegin = TII->getIndirectIndexBegin(MF);
+ int IndirectEnd = TII->getIndirectIndexEnd(MF);
+
+ if (IndirectBegin == -1) {
+ // No indirect addressing, we can skip this pass
+ assert(IndirectEnd == -1);
+ return false;
+ }
+
+ // The map keeps track of the indirect address that is represented by
+ // each virtual register. The key is the register and the value is the
+ // indirect address it uses.
+ std::map<unsigned, unsigned> RegisterAddressMap;
+
+ // First pass - Lower all of the RegisterStore instructions and track which
+ // registers are live.
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // This map keeps track of the current live indirect registers.
+ // The key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterStore(MI)) {
+ continue;
+ }
+
+ // Lower RegisterStore
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+ const TargetRegisterClass *IndirectStoreRegClass =
+ TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access.
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
+ .addOperand(MI.getOperand(0));
+
+ RegisterAddressMap[DstReg] = Address;
+ LiveAddressRegisterMap[Address] = DstReg;
+ } else {
+ // Indirect register access.
+ MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+ MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
+ RegisterAddressMap[DstReg] = Addr;
+ LiveAddressRegisterMap[Addr] = DstReg;
+ }
+ }
+ MI.eraseFromParent();
+ }
+
+ // Update the live-ins of the succesor blocks
+ for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
+ SuccEnd = MBB.succ_end();
+ SuccEnd != Succ; ++Succ) {
+ std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
+ for (Key = LiveAddressRegisterMap.begin(),
+ KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
+ (*Succ)->addLiveIn(Key->second);
+ }
+ }
+ }
+
+ // Second pass - Lower the RegisterLoad instructions
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // Key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
+ while (LI != MBB.livein_end()) {
+ std::vector<unsigned> PhiRegisters;
+
+ // Make sure this live in is used for indirect addressing
+ if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
+ ++LI;
+ continue;
+ }
+
+ unsigned Address = RegisterAddressMap[*LI];
+ LiveAddressRegisterMap[Address] = *LI;
+ PhiRegisters.push_back(*LI);
+
+ // Check if there are other live in registers which map to the same
+ // indirect address.
+ for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
+ LE = MBB.livein_end();
+ LJ != LE; ++LJ) {
+ unsigned Reg = *LJ;
+ if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
+ continue;
+ }
+
+ if (RegisterAddressMap[Reg] == Address) {
+ PhiRegisters.push_back(Reg);
+ }
+ }
+
+ if (PhiRegisters.size() == 1) {
+ // We don't need to insert a Phi instruction, so we can just add the
+ // registers to the live list for the block.
+ LiveAddressRegisterMap[Address] = *LI;
+ MBB.removeLiveIn(*LI);
+ } else {
+ // We need to insert a PHI, because we have the same address being
+ // written in multiple predecessor blocks.
+ const TargetRegisterClass *PhiDstClass =
+ TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
+ unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
+ MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
+ MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::PHI), PhiDstReg);
+
+ for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
+ RE = PhiRegisters.end();
+ RI != RE; ++RI) {
+ unsigned Reg = *RI;
+ MachineInstr *DefInst = MRI.getVRegDef(Reg);
+ assert(DefInst);
+ MachineBasicBlock *RegBlock = DefInst->getParent();
+ Phi.addReg(Reg);
+ Phi.addMBB(RegBlock);
+ MBB.removeLiveIn(Reg);
+ }
+ RegisterAddressMap[PhiDstReg] = Address;
+ LiveAddressRegisterMap[Address] = PhiDstReg;
+ }
+ LI = MBB.livein_begin();
+ }
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterLoad(MI)) {
+ if (MI.getOpcode() == AMDGPU::PHI) {
+ continue;
+ }
+ // Check for indirect register defs
+ for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
+ OpIdx < NumOperands; ++OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isReg() && MO.isDef() &&
+ RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
+ unsigned Reg = MO.getReg();
+ unsigned LiveAddress = RegisterAddressMap[Reg];
+ // Chain the live-ins
+ if (LiveAddressRegisterMap.find(LiveAddress) !=
+ RegisterAddressMap.end()) {
+ MI.addOperand(MachineOperand::CreateReg(
+ LiveAddressRegisterMap[LiveAddress],
+ false, // isDef
+ true, // isImp
+ true)); // isKill
+ }
+ LiveAddressRegisterMap[LiveAddress] = Reg;
+ }
+ }
+ continue;
+ }
+
+ const TargetRegisterClass *SuperIndirectRegClass =
+ TII->getSuperIndirectRegClass();
+ const TargetRegisterClass *IndirectLoadRegClass =
+ TII->getIndirectAddrLoadRegClass();
+ unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access
+ unsigned Reg = LiveAddressRegisterMap[Address];
+ unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
+
+ if (regHasExplicitDef(MRI, Reg)) {
+ // If the register we are reading from has an explicit def, then that
+ // means it was written via a direct register access (i.e. COPY
+ // or other instruction that doesn't use indirect addressing). In
+ // this case we know where the value has been stored, so we can just
+ // issue a copy.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(Reg);
+ } else {
+ // If the register we are reading has an implicit def, then that
+ // means it was written by an indirect register access (i.e. An
+ // instruction that uses indirect addressing.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(AddrReg)
+ .addReg(Reg, RegState::Implicit);
+ }
+ } else {
+ // Indirect register access
+
+ // Note on REQ_SEQUENCE instructons: You can't actually use the register
+ // it defines unless you have an instruction that takes the defined
+ // register class as an operand.
+
+ MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::REG_SEQUENCE),
+ IndirectReg);
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
+ continue;
+ }
+ unsigned Reg = LiveAddressRegisterMap[Addr];
+
+ // We only need to use REG_SEQUENCE for explicit defs, since the
+ // register coalescer won't do anything with the implicit defs.
+ if (!regHasExplicitDef(MRI, Reg)) {
+ continue;
+ }
+
+ // Insert a REQ_SEQUENCE instruction to force the register allocator
+ // to allocate the virtual register to the correct physical register.
+ Sequence.addReg(LiveAddressRegisterMap[Addr]);
+ Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
+ }
+ MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+
+
+
+ Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
+ Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
+
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+
+ if (!DefInstr) {
+ return false;
+ }
+
+ if (DefInstr->getOpcode() == AMDGPU::PHI) {
+ bool Explicit = false;
+ for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
+ E = DefInstr->operands_end();
+ I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef()) {
+ continue;
+ }
+
+ Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
+ }
+ return Explicit;
+ }
+
+ return DefInstr->getOperand(0).isReg() &&
+ DefInstr->getOperand(0).getReg() == Reg;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
new file mode 100644
index 000000000000..30f736c84c25
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -0,0 +1,267 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implementation of the TargetInstrInfo class that is common to all
+/// AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "AMDGPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ case AMDGPU::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
+ || tmp->getOpcode() == AMDGPU::ENDIF
+ || tmp->getOpcode() == AMDGPU::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Convert dst regclass to one that is supported by the ISA
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
new file mode 100644
index 000000000000..3909e4e105ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@@ -0,0 +1,206 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Contains the definition of a TargetInstrInfo class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H
+#define AMDGPUINSTRUCTIONINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+ const AMDGPURegisterInfo RI;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+protected:
+ TargetMachine &TM;
+public:
+ explicit AMDGPUInstrInfo(TargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const = 0;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // Helper functions that check the opcode for status information
+ bool isLoadInst(llvm::MachineInstr *MI) const;
+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isStoreInst(llvm::MachineInstr *MI) const;
+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+ bool isRegisterStore(const MachineInstr &MI) const;
+ bool isRegisterLoad(const MachineInstr &MI) const;
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
+
+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const = 0;
+ virtual unsigned getIEQOpcode() const = 0;
+ virtual bool isMov(unsigned opcode) const = 0;
+
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
+
+ /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+ /// \p Channel
+ ///
+ /// We model indirect addressing using a virtual address space that can be
+ /// accesed with loads and stores. The "Indirect Address" is the memory
+ /// address in this virtual address space that maps to the given \p RegIndex
+ /// and \p Channel.
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const = 0;
+
+ /// \returns The register class to be used for storing values to an
+ /// "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const = 0;
+
+ /// \returns The register class to be used for loading values from
+ /// an "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+
+ /// \brief Build instruction(s) for an indirect register write.
+ ///
+ /// \returns The instruction that performs the indirect register write
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \brief Build instruction(s) for an indirect register read.
+ ///
+ /// \returns The instruction that performs the indirect register read
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \returns the register class whose sub registers are the set of all
+ /// possible registers that can be used for indirect addressing.
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
+
+
+ /// \brief Convert the AMDIL MachineInstr to a supported ISA
+ /// MachineInstr
+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
+#endif // AMDGPUINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
new file mode 100644
index 000000000000..b66ae879dc20
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@@ -0,0 +1,82 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+
+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayStore]>;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
new file mode 100644
index 000000000000..e740348717c7
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -0,0 +1,266 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bit isRegisterLoad = 0;
+ field bit isRegisterStore = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = NullALU;
+
+ let TSFlags{63} = isRegisterLoad;
+ let TSFlags{62} = isRegisterStore;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+
+def COND_EQ : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOEQ: case ISD::SETUEQ:
+ case ISD::SETEQ: return true;}}}]
+>;
+
+def COND_NE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETONE: case ISD::SETUNE:
+ case ISD::SETNE: return true;}}}]
+>;
+def COND_GT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGT: case ISD::SETUGT:
+ case ISD::SETGT: return true;}}}]
+>;
+
+def COND_GE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGE: case ISD::SETUGE:
+ case ISD::SETGE: return true;}}}]
+>;
+
+def COND_LT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLT: case ISD::SETULT:
+ case ISD::SETLT: return true;}}}]
+>;
+
+def COND_LE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLE: case ISD::SETULE:
+ case ISD::SETLE: return true;}}}]
+>;
+
+def COND_NULL : PatLeaf <
+ (cond),
+ [{return false;}]
+>;
+
+//===----------------------------------------------------------------------===//
+// Load/Store Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1 in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FNEG $dst, $src0",
+ [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+ ComplexPattern addrPat> {
+ def RegisterLoad : AMDGPUShaderInst <
+ (outs dstClass:$dst),
+ (ins addrClass:$addr, i32imm:$chan),
+ "RegisterLoad $dst, $addr",
+ [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
+ (i32 timm:$chan)))]
+ > {
+ let isRegisterLoad = 1;
+ }
+
+ def RegisterStore : AMDGPUShaderInst <
+ (outs),
+ (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+ "RegisterStore $val, $addr",
+ [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+ > {
+ let isRegisterStore = 1;
+ }
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (fpow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+ RegisterClass vec_class, int sub_idx,
+ SubRegIndex sub_reg>: Pat<
+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+ RegisterClass elem_class, RegisterClass vec_class,
+ int sub_idx, SubRegIndex sub_reg> : Pat <
+
+ (vec_type (vector_insert (vec_type vec_class:$vec),
+ (elem_type elem_class:$elem), sub_idx)),
+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$src))),
+ (vecType elemClass:$src)
+>;
+
+class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+>;
+
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
+ (elemType elemClass:$z), (elemType elemClass:$w))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
+ elemClass:$z, sub2), elemClass:$w, sub3)
+>;
+
+class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+>;
+
+class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7),
+ (elemType elemClass:$sub8), (elemType elemClass:$sub9),
+ (elemType elemClass:$sub10), (elemType elemClass:$sub11),
+ (elemType elemClass:$sub12), (elemType elemClass:$sub13),
+ (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7),
+ elemClass:$sub8, sub8), elemClass:$sub9, sub9),
+ elemClass:$sub10, sub10), elemClass:$sub11, sub11),
+ elemClass:$sub12, sub12), elemClass:$sub13, sub13),
+ elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+ (dt (bitconvert (st rc:$src0))),
+ (dt rc:$src0)
+>;
+
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
+ (vt rc:$addr)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td
new file mode 100644
index 000000000000..eecb25b04f79
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td
@@ -0,0 +1,60 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
new file mode 100644
index 000000000000..1dc1c657dfe5
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -0,0 +1,83 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
+ Ctx(ctx)
+{ }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_FPImmediate: {
+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
+ "Only floating point immediates are supported at the moment.");
+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ }
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ AMDGPUMCInstLower MCInstLowering(OutContext);
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = MI;
+ ++I;
+ while (I != MBB->end() && I->isInsideBundle()) {
+ MCInst MCBundleInst;
+ const MachineInstr *BundledInst = I;
+ MCInstLowering.lower(BundledInst, MCBundleInst);
+ OutStreamer.EmitInstruction(MCBundleInst);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
new file mode 100644
index 000000000000..d7d538e92599
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
@@ -0,0 +1,34 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_MCINSTLOWER_H
+#define AMDGPU_MCINSTLOWER_H
+
+namespace llvm {
+
+class MCInst;
+class MCContext;
+class MachineInstr;
+
+class AMDGPUMCInstLower {
+
+ MCContext &Ctx;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx);
+
+ /// \brief Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif //AMDGPU_MCINSTLOWER_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 000000000000..0223ec8e4f3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+ MachineFunctionInfo() {
+ AttributeSet Set = MF.getFunction()->getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+ ShaderTypeAttribute);
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType))
+ llvm_unreachable("Can't parse shader type!");
+ }
+}
+
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 000000000000..21c8c51dae45
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+ static const char *ShaderTypeAttribute;
+public:
+ AMDGPUMachineFunction(const MachineFunction &MF);
+ unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
new file mode 100644
index 000000000000..fe994d2d05a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -0,0 +1,75 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPUGenRegisterInfo(0),
+ TM(tm),
+ TII(tii)
+ { }
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(!"Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(!"Subroutines not supported yet");
+ return 0;
+}
+
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+ switch(IndirectIndex) {
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ case 4: return AMDGPU::sub4;
+ case 5: return AMDGPU::sub5;
+ case 6: return AMDGPU::sub6;
+ case 7: return AMDGPU::sub7;
+ case 8: return AMDGPU::sub8;
+ case 9: return AMDGPU::sub9;
+ case 10: return AMDGPU::sub10;
+ case 11: return AMDGPU::sub11;
+ case 12: return AMDGPU::sub12;
+ case 13: return AMDGPU::sub13;
+ case 14: return AMDGPU::sub14;
+ case 15: return AMDGPU::sub15;
+ default: llvm_unreachable("indirect index out of range");
+ }
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
new file mode 100644
index 000000000000..1fc88e7455b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
@@ -0,0 +1,66 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H
+#define AMDGPUREGISTERINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
+ TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ static const uint16_t CalleeSavedReg;
+
+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ assert(!"Unimplemented"); return BitVector();
+ }
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns The ISA reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass * getISARegClass(
+ const TargetRegisterClass * RC) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td
new file mode 100644
index 000000000000..b5aca0347fb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td
@@ -0,0 +1,25 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+
+foreach Index = 0-15 in {
+ def sub#Index : SubRegIndex;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp
new file mode 100644
index 000000000000..dea43b874c6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -0,0 +1,896 @@
+//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The pass implemented in this file transforms the programs control flow
+/// graph into a form that's suitable for code generation on hardware that
+/// implements control flow by execution masking. This currently includes all
+/// AMD GPUs but may as well be useful for other types of hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BranchInst*, 8> BranchVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef SmallPtrSet<BasicBlock *, 8> BBSet;
+
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
+typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
+
+// The name for newly created blocks.
+
+static const char *FlowBlockName = "Flow";
+
+/// @brief Find the nearest common dominator for multiple BasicBlocks
+///
+/// Helper class for AMDGPUStructurizeCFG
+/// TODO: Maybe move into common code
+class NearestCommonDominator {
+
+ DominatorTree *DT;
+
+ DTN2UnsignedMap IndexMap;
+
+ BasicBlock *Result;
+ unsigned ResultIndex;
+ bool ExplicitMentioned;
+
+public:
+ /// \brief Start a new query
+ NearestCommonDominator(DominatorTree *DomTree) {
+ DT = DomTree;
+ Result = 0;
+ }
+
+ /// \brief Add BB to the resulting dominator
+ void addBlock(BasicBlock *BB, bool Remember = true) {
+
+ DomTreeNode *Node = DT->getNode(BB);
+
+ if (Result == 0) {
+ unsigned Numbering = 0;
+ for (;Node;Node = Node->getIDom())
+ IndexMap[Node] = ++Numbering;
+ Result = BB;
+ ResultIndex = 1;
+ ExplicitMentioned = Remember;
+ return;
+ }
+
+ for (;Node;Node = Node->getIDom())
+ if (IndexMap.count(Node))
+ break;
+ else
+ IndexMap[Node] = 0;
+
+ assert(Node && "Dominator tree invalid!");
+
+ unsigned Numbering = IndexMap[Node];
+ if (Numbering > ResultIndex) {
+ Result = Node->getBlock();
+ ResultIndex = Numbering;
+ ExplicitMentioned = Remember && (Result == BB);
+ } else if (Numbering == ResultIndex) {
+ ExplicitMentioned |= Remember;
+ }
+ }
+
+ /// \brief Is "Result" one of the BBs added with "Remember" = True?
+ bool wasResultExplicitMentioned() {
+ return ExplicitMentioned;
+ }
+
+ /// \brief Get the query result
+ BasicBlock *getResult() {
+ return Result;
+ }
+};
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/
+/// 3
+/// || Where:
+/// | | 1 = "If" block, calculates the condition
+/// 4 | 2 = "Then" subregion, runs if the condition is true
+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/ 4 = "Else" optional subregion, runs if the condition is false
+/// 5 5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^ Where:
+/// | / 1 = "Entry" block
+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3 3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class AMDGPUStructurizeCFG : public RegionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+
+ Function *Func;
+ Region *ParentRegion;
+
+ DominatorTree *DT;
+
+ RNVector Order;
+ BBSet Visited;
+
+ BBPhiMap DeletedPhis;
+ BB2BBVecMap AddedPhis;
+
+ PredMap Predicates;
+ BranchVector Conditions;
+
+ BB2BBMap Loops;
+ PredMap LoopPreds;
+ BranchVector LoopConds;
+
+ RegionNode *PrevNode;
+
+ void orderNodes();
+
+ void analyzeLoops(RegionNode *N);
+
+ Value *invert(Value *Condition);
+
+ Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+
+ void gatherPredicates(RegionNode *N);
+
+ void collectInfos();
+
+ void insertConditions(bool Loops);
+
+ void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void setPhiValues();
+
+ void killTerminator(BasicBlock *BB);
+
+ void changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator);
+
+ BasicBlock *getNextFlow(BasicBlock *Dominator);
+
+ BasicBlock *needPrefix(bool NeedEmpty);
+
+ BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
+
+ void setPrevNode(BasicBlock *BB);
+
+ bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
+
+ bool isPredictableTrue(RegionNode *Node);
+
+ void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void createFlow();
+
+ void rebuildSSA();
+
+public:
+ AMDGPUStructurizeCFG():
+ RegionPass(ID) {
+
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ using Pass::doInitialization;
+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU simplify control flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ RegionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char AMDGPUStructurizeCFG::ID = 0;
+
+/// \brief Initialize the types and constants used in the pass
+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ return false;
+}
+
+/// \brief Build up the general order of nodes
+void AMDGPUStructurizeCFG::orderNodes() {
+ scc_iterator<Region *> I = scc_begin(ParentRegion),
+ E = scc_end(ParentRegion);
+ for (Order.clear(); I != E; ++I) {
+ std::vector<RegionNode *> &Nodes = *I;
+ Order.append(Nodes.begin(), Nodes.end());
+ }
+}
+
+/// \brief Determine the end of the loops
+void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) {
+
+ if (N->isSubRegion()) {
+ // Test for exit as back edge
+ BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
+ if (Visited.count(Exit))
+ Loops[Exit] = N->getEntry();
+
+ } else {
+ // Test for sucessors as back edge
+ BasicBlock *BB = N->getNodeAs<BasicBlock>();
+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
+ }
+ }
+}
+
+/// \brief Invert the given condition
+Value *AMDGPUStructurizeCFG::invert(Value *Condition) {
+
+ // First: Check if it's a constant
+ if (Condition == BoolTrue)
+ return BoolFalse;
+
+ if (Condition == BoolFalse)
+ return BoolTrue;
+
+ if (Condition == BoolUndef)
+ return BoolUndef;
+
+ // Second: If the condition is already inverted, return the original value
+ if (match(Condition, m_Not(m_Value(Condition))))
+ return Condition;
+
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
+
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
+
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
+ }
+
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+}
+
+/// \brief Build the condition for one edge
+Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+ bool Invert) {
+ Value *Cond = Invert ? BoolFalse : BoolTrue;
+ if (Term->isConditional()) {
+ Cond = Term->getCondition();
+
+ if (Idx != Invert)
+ Cond = invert(Cond);
+ }
+ return Cond;
+}
+
+/// \brief Analyze the predecessors of each block and build up predicates
+void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) {
+
+ RegionInfo *RI = ParentRegion->getRegionInfo();
+ BasicBlock *BB = N->getEntry();
+ BBPredicates &Pred = Predicates[BB];
+ BBPredicates &LPred = LoopPreds[BB];
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+
+ // Ignore it if it's a branch from outside into our region entry
+ if (!ParentRegion->contains(*PI))
+ continue;
+
+ Region *R = RI->getRegionFor(*PI);
+ if (R == ParentRegion) {
+
+ // It's a top level block in our region
+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (Succ != BB)
+ continue;
+
+ if (Visited.count(*PI)) {
+ // Normal forward edge
+ if (Term->isConditional()) {
+ // Try to treat it like an ELSE block
+ BasicBlock *Other = Term->getSuccessor(!i);
+ if (Visited.count(Other) && !Loops.count(Other) &&
+ !Pred.count(Other) && !Pred.count(*PI)) {
+
+ Pred[Other] = BoolFalse;
+ Pred[*PI] = BoolTrue;
+ continue;
+ }
+ }
+ Pred[*PI] = buildCondition(Term, i, false);
+
+ } else {
+ // Back edge
+ LPred[*PI] = buildCondition(Term, i, true);
+ }
+ }
+
+ } else {
+
+ // It's an exit from a sub region
+ while(R->getParent() != ParentRegion)
+ R = R->getParent();
+
+ // Edge from inside a subregion to its entry, ignore it
+ if (R == N)
+ continue;
+
+ BasicBlock *Entry = R->getEntry();
+ if (Visited.count(Entry))
+ Pred[Entry] = BoolTrue;
+ else
+ LPred[Entry] = BoolFalse;
+ }
+ }
+}
+
+/// \brief Collect various loop and predicate infos
+void AMDGPUStructurizeCFG::collectInfos() {
+
+ // Reset predicate
+ Predicates.clear();
+
+ // and loop infos
+ Loops.clear();
+ LoopPreds.clear();
+
+ // Reset the visited nodes
+ Visited.clear();
+
+ for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+ OI != OE; ++OI) {
+
+ // Analyze all the conditions leading to a node
+ gatherPredicates(*OI);
+
+ // Remember that we've seen this node
+ Visited.insert((*OI)->getEntry());
+
+ // Find the last back edges
+ analyzeLoops(*OI);
+ }
+}
+
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions(bool Loops) {
+ BranchVector &Conds = Loops ? LoopConds : Conditions;
+ Value *Default = Loops ? BoolTrue : BoolFalse;
+ SSAUpdater PhiInserter;
+
+ for (BranchVector::iterator I = Conds.begin(),
+ E = Conds.end(); I != E; ++I) {
+
+ BranchInst *Term = *I;
+ assert(Term->isConditional());
+
+ BasicBlock *Parent = Term->getParent();
+ BasicBlock *SuccTrue = Term->getSuccessor(0);
+ BasicBlock *SuccFalse = Term->getSuccessor(1);
+
+ PhiInserter.Initialize(Boolean, "");
+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
+ PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+
+ BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(Parent, false);
+
+ Value *ParentValue = 0;
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (PI->first == Parent) {
+ ParentValue = PI->second;
+ break;
+ }
+ PhiInserter.AddAvailableValue(PI->first, PI->second);
+ Dominator.addBlock(PI->first);
+ }
+
+ if (ParentValue) {
+ Term->setCondition(ParentValue);
+ } else {
+ if (!Dominator.wasResultExplicitMentioned())
+ PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
+
+ Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ }
+ }
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+ PhiMap &Map = DeletedPhis[To];
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ while (Phi.getBasicBlockIndex(From) != -1) {
+ Value *Deleted = Phi.removeIncomingValue(From, false);
+ Map[&Phi].push_back(std::make_pair(From, Deleted));
+ }
+ }
+}
+
+/// \brief Add a dummy PHI value as soon as we knew the new predecessor
+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ Value *Undef = UndefValue::get(Phi.getType());
+ Phi.addIncoming(Undef, From);
+ }
+ AddedPhis[To].push_back(From);
+}
+
+/// \brief Add the real PHI value as soon as everything is set up
+void AMDGPUStructurizeCFG::setPhiValues() {
+
+ SSAUpdater Updater;
+ for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
+ AI != AE; ++AI) {
+
+ BasicBlock *To = AI->first;
+ BBVector &From = AI->second;
+
+ if (!DeletedPhis.count(To))
+ continue;
+
+ PhiMap &Map = DeletedPhis[To];
+ for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
+ PI != PE; ++PI) {
+
+ PHINode *Phi = PI->first;
+ Value *Undef = UndefValue::get(Phi->getType());
+ Updater.Initialize(Phi->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(To, Undef);
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(To, false);
+ for (BBValueVector::iterator VI = PI->second.begin(),
+ VE = PI->second.end(); VI != VE; ++VI) {
+
+ Updater.AddAvailableValue(VI->first, VI->second);
+ Dominator.addBlock(VI->first);
+ }
+
+ if (!Dominator.wasResultExplicitMentioned())
+ Updater.AddAvailableValue(Dominator.getResult(), Undef);
+
+ for (BBVector::iterator FI = From.begin(), FE = From.end();
+ FI != FE; ++FI) {
+
+ int Idx = Phi->getBasicBlockIndex(*FI);
+ assert(Idx != -1);
+ Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+ }
+ }
+
+ DeletedPhis.erase(To);
+ }
+ assert(DeletedPhis.empty());
+}
+
+/// \brief Remove phi values from all successors and then remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (!Term)
+ return;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+
+ delPhiValues(BB, *SI);
+ }
+
+ Term->eraseFromParent();
+}
+
+/// \brief Let node exit(s) point to NewExit
+void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator) {
+
+ if (Node->isSubRegion()) {
+ Region *SubRegion = Node->getNodeAs<Region>();
+ BasicBlock *OldExit = SubRegion->getExit();
+ BasicBlock *Dominator = 0;
+
+ // Find all the edges from the sub region to the exit
+ for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
+ I != E;) {
+
+ BasicBlock *BB = *I++;
+ if (!SubRegion->contains(BB))
+ continue;
+
+ // Modify the edges to point to the new exit
+ delPhiValues(BB, OldExit);
+ BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
+ addPhiValues(BB, NewExit);
+
+ // Find the new dominator (if requested)
+ if (IncludeDominator) {
+ if (!Dominator)
+ Dominator = BB;
+ else
+ Dominator = DT->findNearestCommonDominator(Dominator, BB);
+ }
+ }
+
+ // Change the dominator (if requested)
+ if (Dominator)
+ DT->changeImmediateDominator(NewExit, Dominator);
+
+ // Update the region info
+ SubRegion->replaceExit(NewExit);
+
+ } else {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+ killTerminator(BB);
+ BranchInst::Create(NewExit, BB);
+ addPhiValues(BB, NewExit);
+ if (IncludeDominator)
+ DT->changeImmediateDominator(NewExit, BB);
+ }
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) {
+ LLVMContext &Context = Func->getContext();
+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+ Order.back()->getEntry();
+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+ Func, Insert);
+ DT->addNewBlock(Flow, Dominator);
+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+ return Flow;
+}
+
+/// \brief Create a new or reuse the previous node as flow node
+BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) {
+
+ BasicBlock *Entry = PrevNode->getEntry();
+
+ if (!PrevNode->isSubRegion()) {
+ killTerminator(Entry);
+ if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
+ return Entry;
+
+ }
+
+ // create a new flow node
+ BasicBlock *Flow = getNextFlow(Entry);
+
+ // and wire it up
+ changeExit(PrevNode, Flow, true);
+ PrevNode = ParentRegion->getBBNode(Flow);
+ return Flow;
+}
+
+/// \brief Returns the region exit if possible, otherwise just a new flow node
+BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow,
+ bool ExitUseAllowed) {
+
+ if (Order.empty() && ExitUseAllowed) {
+ BasicBlock *Exit = ParentRegion->getExit();
+ DT->changeImmediateDominator(Exit, Flow);
+ addPhiValues(Flow, Exit);
+ return Exit;
+ }
+ return getNextFlow(Flow);
+}
+
+/// \brief Set the previous node
+void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) {
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+}
+
+/// \brief Does BB dominate all the predicates of Node ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (!DT->dominates(BB, PI->first))
+ return false;
+ }
+ return true;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) {
+
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ bool Dominated = false;
+
+ // Regionentry is always true
+ if (PrevNode == 0)
+ return true;
+
+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+
+ if (I->second != BoolTrue)
+ return false;
+
+ if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
+ Dominated = true;
+ }
+
+ // TODO: The dominator check is too strict
+ return Dominated;
+}
+
+/// Take one node from the order vector and wire it up
+void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+
+ RegionNode *Node = Order.pop_back_val();
+ Visited.insert(Node->getEntry());
+
+ if (isPredictableTrue(Node)) {
+ // Just a linear flow
+ if (PrevNode) {
+ changeExit(PrevNode, Node->getEntry(), true);
+ }
+ PrevNode = Node;
+
+ } else {
+ // Insert extra prefix node (or reuse last one)
+ BasicBlock *Flow = needPrefix(false);
+
+ // Insert extra postfix node (or use exit instead)
+ BasicBlock *Entry = Node->getEntry();
+ BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
+
+ // let it point to entry and next block
+ Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+ addPhiValues(Flow, Entry);
+ DT->changeImmediateDominator(Entry, Flow);
+
+ PrevNode = Node;
+ while (!Order.empty() && !Visited.count(LoopEnd) &&
+ dominatesPredicates(Entry, Order.back())) {
+ handleLoops(false, LoopEnd);
+ }
+
+ changeExit(PrevNode, Next, false);
+ setPrevNode(Next);
+ }
+}
+
+void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+ RegionNode *Node = Order.back();
+ BasicBlock *LoopStart = Node->getEntry();
+
+ if (!Loops.count(LoopStart)) {
+ wireFlow(ExitUseAllowed, LoopEnd);
+ return;
+ }
+
+ if (!isPredictableTrue(Node))
+ LoopStart = needPrefix(true);
+
+ LoopEnd = Loops[Node->getEntry()];
+ wireFlow(false, LoopEnd);
+ while (!Visited.count(LoopEnd)) {
+ handleLoops(false, LoopEnd);
+ }
+
+ // Create an extra loop end node
+ LoopEnd = needPrefix(false);
+ BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
+ LoopConds.push_back(BranchInst::Create(Next, LoopStart,
+ BoolUndef, LoopEnd));
+ addPhiValues(LoopEnd, LoopStart);
+ setPrevNode(Next);
+}
+
+/// After this function control flow looks like it should be, but
+/// branches and PHI nodes only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
+
+ BasicBlock *Exit = ParentRegion->getExit();
+ bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
+
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Conditions.clear();
+ LoopConds.clear();
+
+ PrevNode = 0;
+ Visited.clear();
+
+ while (!Order.empty()) {
+ handleLoops(EntryDominatesExit, 0);
+ }
+
+ if (PrevNode)
+ changeExit(PrevNode, Exit, EntryDominatesExit);
+ else
+ assert(EntryDominatesExit);
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void AMDGPUStructurizeCFG::rebuildSSA() {
+ SSAUpdater Updater;
+ for (Region::block_iterator I = ParentRegion->block_begin(),
+ E = ParentRegion->block_end();
+ I != E; ++I) {
+
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+
+ bool Initialized = false;
+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+ Next = I->getNext();
+
+ Instruction *User = cast<Instruction>(I->getUser());
+ if (User->getParent() == BB) {
+ continue;
+
+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(*I) == BB)
+ continue;
+ }
+
+ if (DT->dominates(II, User))
+ continue;
+
+ if (!Initialized) {
+ Value *Undef = UndefValue::get(II->getType());
+ Updater.Initialize(II->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(BB, II);
+ Initialized = true;
+ }
+ Updater.RewriteUseAfterInsertions(*I);
+ }
+ }
+ }
+}
+
+/// \brief Run the transformation for each region found
+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ Func = R->getEntry()->getParent();
+ ParentRegion = R;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ orderNodes();
+ collectInfos();
+ createFlow();
+ insertConditions(false);
+ insertConditions(true);
+ setPhiValues();
+ rebuildSSA();
+
+ // Cleanup
+ Order.clear();
+ Visited.clear();
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Predicates.clear();
+ Conditions.clear();
+ Loops.clear();
+ LoopPreds.clear();
+ LoopConds.clear();
+
+ return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createAMDGPUStructurizeCFGPass() {
+ return new AMDGPUStructurizeCFG();
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
new file mode 100644
index 000000000000..0f356a1c3f11
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
+ InstrItins = getInstrItineraryForCPU(CPU);
+
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ StringRef GPU = CPU;
+ Is64bit = false;
+ DefaultSize[0] = 64;
+ DefaultSize[1] = 1;
+ DefaultSize[2] = 1;
+ ParseSubtargetFeatures(GPU, FS);
+ DevName = GPU;
+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
+}
+
+AMDGPUSubtarget::~AMDGPUSubtarget() {
+ delete Device;
+}
+
+bool
+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDGPUSubtarget::is64bit() const {
+ return Is64bit;
+}
+bool
+AMDGPUSubtarget::isTargetELF() const {
+ return false;
+}
+size_t
+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
+ if (dim > 3) {
+ return 1;
+ } else {
+ return DefaultSize[dim];
+ }
+}
+
+std::string
+AMDGPUSubtarget::getDataLayout() const {
+ if (!Device) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return Device->getDataLayout();
+}
+
+std::string
+AMDGPUSubtarget::getDeviceName() const {
+ return DevName;
+}
+const AMDGPUDevice *
+AMDGPUSubtarget::device() const {
+ return Device;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
new file mode 100644
index 000000000000..1973fc6d544c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -0,0 +1,65 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUSUBTARGET_H
+#define AMDGPUSUBTARGET_H
+#include "AMDILDevice.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+private:
+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
+ const AMDGPUDevice *Device;
+ size_t DefaultSize[3];
+ std::string DevName;
+ bool Is64bit;
+ bool Is32on64bit;
+ bool DumpCode;
+ bool R600ALUInst;
+
+ InstrItineraryData InstrItins;
+
+public:
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual ~AMDGPUSubtarget();
+
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ const AMDGPUDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ bool dumpCode() const { return DumpCode; }
+ bool r600ALUEncoding() const { return R600ALUInst; }
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
new file mode 100644
index 000000000000..e7ea876e2abb
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -0,0 +1,164 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU target machine contains all of the hardware specific
+/// information needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeR600Target() {
+ // Register the target
+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ Layout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ IntrinsicInfo(this),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ // TLInfo uses InstrInfo so it must be initialized after.
+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createR600MachineScheduler);
+ }
+ }
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUStructurizeCFGPass());
+ addPass(createSIAnnotateControlFlowPass());
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ addPass(createAMDGPUPeepholeOpt(*TM));
+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ // This callbacks this pass uses are not implemented yet on SI.
+ addPass(createAMDGPUIndirectAddressingPass(*TM));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ addPass(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIInsertWaits(*TM));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+
+ addPass(&IfConverterID);
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUCFGPreparationPass(*TM));
+ addPass(createAMDGPUCFGStructurizerPass(*TM));
+ addPass(createR600EmitClauseMarkers(*TM));
+ addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
+ addPass(&FinalizeMachineBundlesID);
+ } else {
+ addPass(createSILowerControlFlowPass(*TM));
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
new file mode 100644
index 000000000000..2afe7873a90c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
@@ -0,0 +1,70 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+ AMDGPUSubtarget Subtarget;
+ const DataLayout Layout;
+ AMDGPUFrameLowering FrameLowering;
+ AMDGPUIntrinsicInfo IntrinsicInfo;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ const InstrItineraryData* InstrItins;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const DataLayout* getDataLayout() const { return &Layout; }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/R600/AMDIL.h b/contrib/llvm/lib/Target/R600/AMDIL.h
new file mode 100644
index 000000000000..39ab664d1018
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDIL.h
@@ -0,0 +1,121 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// This file contains the entry points for global functions defined in the LLVM
+/// AMDGPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H
+#define AMDIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDGPUInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// Instruction selection passes.
+FunctionPass*
+ createAMDGPUISelDag(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUPeepholeOpt(TargetMachine &TM);
+
+// Pre emit passes.
+FunctionPass*
+ createAMDGPUCFGPreparationPass(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUCFGStructurizerPass(TargetMachine &TM);
+
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, ///< Address space for private memory.
+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory
+ LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ REGION_ADDRESS = 4, ///< Address space for region memory.
+ ADDRESS_NONE = 5, ///< Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+ CONSTANT_BUFFER_0 = 8,
+ CONSTANT_BUFFER_1 = 9,
+ CONSTANT_BUFFER_2 = 10,
+ CONSTANT_BUFFER_3 = 11,
+ CONSTANT_BUFFER_4 = 12,
+ CONSTANT_BUFFER_5 = 13,
+ CONSTANT_BUFFER_6 = 14,
+ CONSTANT_BUFFER_7 = 15,
+ CONSTANT_BUFFER_8 = 16,
+ CONSTANT_BUFFER_9 = 17,
+ CONSTANT_BUFFER_10 = 18,
+ CONSTANT_BUFFER_11 = 19,
+ CONSTANT_BUFFER_12 = 20,
+ CONSTANT_BUFFER_13 = 21,
+ CONSTANT_BUFFER_14 = 22,
+ CONSTANT_BUFFER_15 = 23,
+ LAST_ADDRESS = 24
+};
+
+} // namespace AMDGPUAS
+
+} // end namespace llvm
+#endif // AMDIL_H
diff --git a/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp b/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp
new file mode 100644
index 000000000000..ea6ac34f570c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp
@@ -0,0 +1,115 @@
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ DeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ DeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ DeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
+}
+
+void AMDGPU7XXDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDGPU7XXDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDGPU7XXDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
+ return 1;
+}
+
+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
+ setCaps();
+}
+
+AMDGPU770Device::~AMDGPU770Device() {
+}
+
+void AMDGPU770Device::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU770Device::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
+}
+
+AMDGPU710Device::~AMDGPU710Device() {
+}
+
+size_t AMDGPU710Device::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h b/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h
new file mode 100644
index 000000000000..1cf4ca415a4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h
@@ -0,0 +1,72 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDIL7XXDEVICEIMPL_H
+#define AMDIL7XXDEVICEIMPL_H
+#include "AMDILDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
+///
+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
+/// support the minimal features that are required to be considered OpenCL 1.0
+/// compliant and nothing more.
+class AMDGPU7XXDevice : public AMDGPUDevice {
+public:
+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
+/// derivative cards.
+///
+/// The difference between this device and the base class is this device device
+/// adds support for double precision and has a larger wavefront size.
+class AMDGPU770Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU770Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU710Device class derives from the 7XX base class.
+///
+/// This class is a smaller derivative, so we need to overload some of the
+/// functions in order to correctly specify this information.
+class AMDGPU710Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU710Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU710Device();
+ virtual size_t getWavefrontSize() const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILBase.td b/contrib/llvm/lib/Target/R600/AMDILBase.td
new file mode 100644
index 000000000000..c12cedcf7fd5
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILBase.td
@@ -0,0 +1,85 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDGPUDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "Is64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "Is32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+ "R600ALUInst",
+ "false",
+ "Older version of ALU instructions encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILInstrInfo.td"
+
diff --git a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
new file mode 100644
index 000000000000..b0cd0f9756a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3051 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct {
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer {
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+private:
+ void reversePredicateSetter(typename BlockT::iterator);
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ const AMDGPURegisterInfo *TRI;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ bool changed = false;
+
+ //FIXME: if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::prepare\n";
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::run\n";
+ func.viewCFG();
+ }
+
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ }
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1) {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+
+ // TODO: Simplify, maybe separate function?
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0);
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDGPU::IF_PREDICATE_SET, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ } else {
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false) {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ branchInstr->eraseFromParent();
+ } else {
+ // if we've arrived here then we've already erased the branch instruction
+ // travel back up the basic block to see the last reference of our debug location
+ // we've just inserted that reference here so it should be representative
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+ preBranchBlk->insert(preBranchBlk->begin(),
+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+ i - 1));
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDGPU branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder MIB(*funcRep, newInstr);
+ MIB.addMBB(loopHeader);
+ MIB.addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDGPU
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+
+protected:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const AMDGPURegisterInfo *TRI;
+
+public:
+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+
+private:
+
+};
+
+} //end of namespace llvm
+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+ TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
+}
+
+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPrepare(TargetMachine &tm);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
+ : AMDGPUCFGStructurizer(ID, tm ) {
+}
+const char *AMDGPUCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPerform(TargetMachine &tm);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
+: AMDGPUCFGStructurizer(ID, tm) {
+}
+
+const char *AMDGPUCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDGPUCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// this class is tailor to the AMDGPU backend
+template<>
+struct CFGStructTraits<AMDGPUCFGStructurizer> {
+ typedef int RegiT;
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ case AMDGPU::BRANCH:
+ return true;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
+ blk->getParent()->getTarget().getInstrInfo());
+
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!TII->isMov(instr->getOpcode())) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(oldInstr->getOperand(1).getReg(), false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(dstReg, RegState::Define); //set target
+ MIB.addReg(src1Reg); //set src value
+ MIB.addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDGPU instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getTrueBranch(branchInstr) == oldBlk) {
+ setTrueBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDGPU::CONTINUE
+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDGPUCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPrepare(tm );
+}
+
+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
+ *this,
+ TRI);
+}
+
+// createAMDGPUCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPerform(tm );
+}
+
+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
+ *this,
+ TRI);
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILDevice.cpp b/contrib/llvm/lib/Target/R600/AMDILDevice.cpp
new file mode 100644
index 000000000000..db8e01ea4043
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILDevice.cpp
@@ -0,0 +1,132 @@
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ DeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDGPUDevice::~AMDGPUDevice() {
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDGPUDevice::getMaxGDSSize() const {
+ return 0;
+}
+
+uint32_t
+AMDGPUDevice::getDeviceFlag() const {
+ return DeviceFlag;
+}
+
+size_t AMDGPUDevice::getMaxNumCBs() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxCBSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxScratchSize() const {
+ return 65536;
+}
+
+uint32_t AMDGPUDevice::getStackAlignment() const {
+ return 16;
+}
+
+void AMDGPUDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::HalfOps);
+ mSWBits.set(AMDGPUDeviceInfo::ByteOps);
+ mSWBits.set(AMDGPUDeviceInfo::ShortOps);
+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
+ mSWBits.set(AMDGPUDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDGPUDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+}
+
+AMDGPUDeviceInfo::ExecutionMode
+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Software;
+ }
+
+ return AMDGPUDeviceInfo::Unsupported;
+
+}
+
+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
+}
+
+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
+}
+
+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
+}
+
+std::string
+AMDGPUDevice::getDataLayout() const {
+ std::string DataLayout = std::string(
+ "e"
+ "-p:32:32:32"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
+ "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n32:64"
+ );
+
+ if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) {
+ DataLayout.append("-f64:64:64");
+ }
+
+ return DataLayout;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILDevice.h b/contrib/llvm/lib/Target/R600/AMDILDevice.h
new file mode 100644
index 000000000000..97df98cafb2a
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILDevice.h
@@ -0,0 +1,117 @@
+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+//
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEIMPL_H
+#define AMDILDEVICEIMPL_H
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDGPUDevice {
+public:
+ AMDGPUDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ /// \returns The max LDS size that the hardware supports. Size is in
+ /// bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ /// \returns The max GDS size that the hardware supports if the GDS is
+ /// supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ /// \returns The max number of hardware constant address spaces that
+ /// are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ /// \returns The max number of bytes a single hardware constant buffer
+ /// can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ /// \returns The max number of bytes allowed by the hardware scratch
+ /// buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ /// \brief Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ /// \returns The number of work-items that exist in a single hardware
+ /// wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ /// \brief Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ /// \brief Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ /// \brief Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ /// \brief Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+
+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDGPUSubtarget *mSTM;
+ uint32_t DeviceFlag;
+private:
+ AMDGPUDeviceInfo::ExecutionMode
+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
new file mode 100644
index 000000000000..9605fbe63340
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDGPUDeviceInfo {
+
+AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
+ AMDGPUSubtarget *ptr,
+ bool is64bit, bool is64on32bit) {
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDGPU710Device(ptr);
+ case '7':
+ return new AMDGPU770Device(ptr);
+ default:
+ return new AMDGPU7XXDevice(ptr);
+ }
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPURedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCedarDevice(ptr);
+ } else if (deviceName == "barts" || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDGPUSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPU7XXDevice(ptr);
+ }
+}
+} // End namespace AMDGPUDeviceInfo
+} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h
new file mode 100644
index 000000000000..4b2c3a53c79f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h
@@ -0,0 +1,88 @@
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEINFO_H
+#define AMDILDEVICEINFO_H
+
+
+#include <string>
+
+namespace llvm {
+ class AMDGPUDevice;
+ class AMDGPUSubtarget;
+ namespace AMDGPUDeviceInfo {
+ /// Each Capabilities can be executed using a hardware instruction,
+ /// emulated with a sequence of software instructions, or not
+ /// supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, ///< Unsupported feature on the card(Default value)
+ /// This is the execution mode that is set if the feature is emulated in
+ /// software.
+ Software,
+ /// This execution mode is set if the feature exists natively in hardware
+ Hardware
+ };
+
+ enum Caps {
+ HalfOps = 0x1, ///< Half float is supported or not.
+ DoubleOps = 0x2, ///< Double is supported or not.
+ ByteOps = 0x3, ///< Byte(char) is support or not.
+ ShortOps = 0x4, ///< Short is supported or not.
+ LongOps = 0x5, ///< Long is supported or not.
+ Images = 0x6, ///< Images are supported or not.
+ ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, ///< Constant/CB memory.
+ LocalMem = 0x9, ///< Local/LDS memory.
+ PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
+ RegionMem = 0xB, ///< OCL GDS Memory Extension.
+ FMA = 0xC, ///< Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, ///< ReservedFlag
+ NoAlias = 0x10, ///< Cached loads.
+ Signed24BitOps = 0x11, ///< Peephole Optimization.
+ /// Debug mode implies that no hardware features or optimizations
+ /// are performned and that all memory access go through a single
+ /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12,
+ CachedMem = 0x13, ///< Cached mem is available or not.
+ BarrierDetect = 0x14, ///< Detect duplicate barriers.
+ Reserved1 = 0x15, ///< Reserved flag
+ ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
+ NoInline = 0x19, ///< Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
+ /// If more capabilities are required, then
+ /// this number needs to be increased.
+ /// All capabilities must come before this
+ /// number.
+ MaxNumberCapabilities = 0x20
+ };
+ /// These have to be in order with the older generations
+ /// having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, ///< 7XX based devices.
+ HD5XXX, ///< Evergreen based devices.
+ HD6XXX, ///< NI/Evergreen+ based devices.
+ HD7XXX, ///< Southern Islands based devices.
+ HDTEST, ///< Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ AMDGPUDevice*
+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
+ bool is64bit = false, bool is64on32bit = false);
+ } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // AMDILDEVICEINFO_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILDevices.h b/contrib/llvm/lib/Target/R600/AMDILDevices.h
new file mode 100644
index 000000000000..636fa6d35947
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDIL_DEVICES_H
+#define AMDIL_DEVICES_H
+// Include all of the device specific header files
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // AMDIL_DEVICES_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.cpp b/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.cpp
new file mode 100644
index 000000000000..c5213a041005
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.cpp
@@ -0,0 +1,169 @@
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
+: AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ DeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ DeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ DeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ DeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
+}
+
+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD5XXX;
+}
+
+void AMDGPUEvergreenDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDGPUDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mSWBits.set(AMDGPUDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mHWBits.set(AMDGPUDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Images);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDGPUDeviceInfo::NoAlias);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::CachedMem);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
+ mHWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mHWBits.set(AMDGPUDeviceInfo::TmrReg);
+}
+
+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
+}
+
+void AMDGPUCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+}
+
+
+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
+}
+
+void AMDGPUCedarDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPUCedarDevice::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
+
+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
+}
+
+void AMDGPURedwoodDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.h b/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.h
new file mode 100644
index 000000000000..ea90f774a856
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.h
@@ -0,0 +1,93 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILEVERGREENDEVICE_H
+#define AMDILEVERGREENDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
+/// series of cards.
+///
+/// This class contains information required to differentiate
+/// the Evergreen device from the generic AMDGPUDevice. This device represents
+/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDGPUEvergreenDevice : public AMDGPUDevice {
+public:
+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+};
+
+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
+/// support for double precision operations. This device is used to represent
+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+/// and HD59XX cards.
+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCypressDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCypressDevice();
+private:
+ virtual void setCaps();
+};
+
+
+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
+/// devices.
+///
+/// This class differs from the base AMDGPUEvergreenDevice in that the
+/// device is a ~quarter of the 'Juniper'. These are commercially known as the
+/// HD54XX and HD53XX series of cards.
+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCedarDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
+/// devices.
+///
+/// This class differs from the base class, in that these devices are
+/// considered about half of a 'Juniper' device. These are commercially known as
+/// the HD55XX and HD56XX series of cards.
+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPURedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+} // namespace llvm
+#endif // AMDILEVERGREENDEVICE_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fa8f62de9c0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,643 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDGPUSubtarget &Subtarget;
+public:
+ AMDGPUDAGToDAGISel(TargetMachine &TM);
+ virtual ~AMDGPUDAGToDAGISel();
+
+ SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+ virtual void PostprocessISelDAG();
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isParamLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg, SDValue& Offset);
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
+ ) {
+ return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
+ )
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::BUILD_VECTOR: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ SDValue RegSeqArgs[9] = {
+ CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+ bool IsRegSeq = true;
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ }
+ if (!IsRegSeq)
+ break;
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+ RegSeqArgs, 2 * N->getNumOperands() + 1);
+ }
+ case ISD::ConstantFP:
+ case ISD::Constant: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ // XXX: Custom immediate lowering not implemented yet. Instead we use
+ // pseudo instructions defined in SIInstructions.td
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+ uint64_t ImmValue = 0;
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+
+ if (N->getOpcode() == ISD::ConstantFP) {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::f64);
+
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
+ APFloat Value = C->getValueAPF();
+ float FloatValue = Value.convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = Value.bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::i64);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (C->getZExtValue() == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (C->getZExtValue() == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = C->getZExtValue();
+ }
+ }
+
+ for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
+ Use != SDNode::use_end(); Use = Next) {
+ Next = llvm::next(Use);
+ std::vector<SDValue> Ops;
+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
+ Ops.push_back(Use->getOperand(i));
+ }
+
+ if (!Use->isMachineOpcode()) {
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ // We can only use literal constants (e.g. AMDGPU::ZERO,
+ // AMDGPU::ONE, etc) in machine opcodes.
+ continue;
+ }
+ } else {
+ if (!TII->isALUInstr(Use->getMachineOpcode()) ||
+ (TII->get(Use->getMachineOpcode()).TSFlags &
+ R600_InstFlag::VECTOR)) {
+ continue;
+ }
+
+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
+ assert(ImmIdx != -1);
+
+ // subtract one from ImmIdx, because the DST operand is usually index
+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
+ ImmIdx--;
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
+ assert(C);
+
+ if (C->getZExtValue() != 0) {
+ // This instruction is already using an immediate.
+ continue;
+ }
+
+ // Set the immediate value
+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
+ }
+ }
+ // Set the immediate register
+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
+
+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
+ }
+ break;
+ }
+ }
+ SDNode *Result = SelectCode(N);
+
+ // Fold operands of selected node
+
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ if (Result && Result->isMachineOpcode() &&
+ !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
+ && TII->isALUInstr(Result->getMachineOpcode())) {
+ // Fold FNEG/FABS/CONST_ADDRESS
+ // TODO: Isel can generate multiple MachineInst, we need to recursively
+ // parse Result
+ bool IsModified = false;
+ do {
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+ if (IsModified) {
+ Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+ }
+ } while (IsModified);
+
+ // If node has a single use which is CLAMP_R600, folds it
+ if (Result->hasOneUse() && Result->isMachineOpcode()) {
+ SDNode *PotentialClamp = *Result->use_begin();
+ if (PotentialClamp->isMachineOpcode() &&
+ PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
+ unsigned ClampIdx =
+ TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
+ std::vector<SDValue> Ops;
+ unsigned NumOp = Result->getNumOperands();
+ for (unsigned i = 0; i < NumOp; ++i) {
+ Ops.push_back(Result->getOperand(i));
+ }
+ Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ Result = CurDAG->SelectNodeTo(PotentialClamp,
+ Result->getMachineOpcode(), PotentialClamp->getVTList(),
+ Ops.data(), NumOp);
+ }
+ }
+ }
+ }
+
+ return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2)
+ };
+ int SelIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
+ -1
+ };
+
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return false;
+ SDValue Operand = Ops[OperandIdx[i] - 1];
+ switch (Operand.getOpcode()) {
+ case AMDGPUISD::CONST_ADDRESS: {
+ SDValue CstOffset;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
+ }
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
+ }
+ case ISD::FNEG:
+ if (NegIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::FABS:
+ if (AbsIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::BITCAST:
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ assert(!"Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+ return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!dyn_cast<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode * IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ CurDAG->getEntryNode().getDebugLoc(),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+
+ return true;
+}
+
+void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
+
+ // Go over all selected nodes and try to fold them a bit more
+ const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+
+ MachineSDNode *Node = dyn_cast<MachineSDNode>(I);
+ if (!Node)
+ continue;
+
+ SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG);
+ if (ResNode != Node)
+ ReplaceUses(Node, ResNode);
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp
new file mode 100644
index 000000000000..922cac12b98e
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp
@@ -0,0 +1,647 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetLowering functions borrowed from AMDIL.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::InitAMDILLowering() {
+ int types[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] = {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] = {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t NumTypes = sizeof(types) / sizeof(*types);
+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < NumTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+
+ // Use the default implementation.
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ MaxStoresPerMemcpy = 4096;
+ MaxStoresPerMemmove = 4096;
+ MaxStoresPerMemset = 4096;
+
+}
+
+bool
+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const {
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILInstrInfo.td b/contrib/llvm/lib/Target/R600/AMDILInstrInfo.td
new file mode 100644
index 000000000000..110f1476513b
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILInstrInfo.td
@@ -0,0 +1,207 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget.device()"
+ "->getGeneration() >="
+ "AMDGPUDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+ ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+ SDTCisEltOfVec<1, 0>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRI32:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI32:$src0)]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRF32:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, GPRF32:$src0)]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.cpp b/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.cpp
new file mode 100644
index 000000000000..4ddb057d80a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,79 @@
+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDIL.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo() {
+}
+
+std::string
+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const {
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned int
+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDGPUIntrinsic::ID IntrinsicID
+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+ // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+Function*
+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const {
+ llvm_unreachable("Not implemented");
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.h b/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.h
new file mode 100644
index 000000000000..35559e23fceb
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILIntrinsicInfo.h
@@ -0,0 +1,49 @@
+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef AMDIL_INTRINSICS_H
+#define AMDIL_INTRINSICS_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+class TargetMachine;
+
+namespace AMDGPUIntrinsic {
+enum ID {
+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDGPU_intrinsics
+};
+
+} // end namespace AMDGPUIntrinsic
+
+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+public:
+ AMDGPUIntrinsicInfo(TargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+};
+
+} // end namespace llvm
+
+#endif // AMDIL_INTRINSICS_H
+
diff --git a/contrib/llvm/lib/Target/R600/AMDILIntrinsics.td b/contrib/llvm/lib/Target/R600/AMDILIntrinsics.td
new file mode 100644
index 000000000000..6ec3559af24c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILIntrinsics.td
@@ -0,0 +1,232 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+ UnaryIntInt;
+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+ UnaryIntInt;
+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+ TernaryIntInt;
+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+ TernaryIntInt;
+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+ QuaternaryIntInt;
+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+ TernaryIntInt;
+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+ BinaryIntInt;
+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+ BinaryIntInt;
+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+ BinaryIntInt;
+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+ BinaryIntInt;
+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
+ BinaryIntFloat;
+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+ BinaryIntInt;
+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+ BinaryIntInt;
+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
+ BinaryIntFloat;
+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+ TernaryIntInt;
+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+ UnaryIntFloat;
+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+ TernaryIntFloat;
+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+ UnaryIntFloat;
+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+ UnaryIntFloat;
+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+ UnaryIntFloat;
+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+ UnaryIntFloat;
+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+ UnaryIntFloat;
+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+ UnaryIntFloat;
+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+ UnaryIntFloat;
+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+ UnaryIntFloat;
+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+ UnaryIntFloat;
+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+ UnaryIntFloat;
+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+ UnaryIntFloat;
+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+ UnaryIntFloat;
+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+ UnaryIntFloat;
+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+ UnaryIntFloat;
+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+ TernaryIntFloat;
+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+ UnaryIntFloat;
+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+ UnaryIntFloat;
+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+ TernaryIntFloat;
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
+
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+ ConvertIntITOF;
+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+ ConvertIntITOF;
+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp b/contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp
new file mode 100644
index 000000000000..47c3f7f209d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp
@@ -0,0 +1,65 @@
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ DeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ DeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ DeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ DeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDGPUNIDevice::~AMDGPUNIDevice() {
+}
+
+size_t
+AMDGPUNIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUNIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD6XXX;
+}
+
+
+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
+ : AMDGPUNIDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
+}
+
+void
+AMDGPUCaymanDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+}
+
diff --git a/contrib/llvm/lib/Target/R600/AMDILNIDevice.h b/contrib/llvm/lib/Target/R600/AMDILNIDevice.h
new file mode 100644
index 000000000000..24a640845eab
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILNIDevice.h
@@ -0,0 +1,57 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILNIDEVICE_H
+#define AMDILNIDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
+/// cards.
+///
+/// It is very similiar to the AMDGPUEvergreenDevice, with the major
+/// exception being differences in wavefront size and hardware capabilities. The
+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+/// integer operations
+class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUNIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+};
+
+/// Just as the AMDGPUCypressDevice is the double capable version of the
+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
+/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
+class AMDGPUCaymanDevice: public AMDGPUNIDevice {
+public:
+ AMDGPUCaymanDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUCaymanDevice();
+private:
+ virtual void setCaps();
+};
+
+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // AMDILNIDEVICE_H
diff --git a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 000000000000..3a28038666f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1215 @@
+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILDevices.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDGPUSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F) {
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDGPUPeepholeOpt(TargetMachine &tm) {
+ return new AMDGPUPeepholeOpt(tm);
+ }
+} // llvm namespace
+
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
+ : FunctionPass(ID), TM(tm) {
+ mDebug = DEBUGME;
+ optLevel = TM.getOptLevel();
+
+}
+
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
+}
+
+const char *
+AMDGPUPeepholeOpt::getPassName() const {
+ return "AMDGPU PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty) {
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+ mConvertAtomics = false;
+ }
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift) {
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ return false;
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "llvm.AMDGPU.bit.extract.u32";
+ if (isVector) {
+ name += ".v" + itostr(numEle) + "i32";
+ } else {
+ name += ".";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ ShiftInst->getOperand(0),
+ shiftValConst,
+ newMaskConst
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+ if (!Ty->isStructTy()) {
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[2] = {
+ CI->getOperand(0),
+ CI->getOperand(1)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
+ return (CI != NULL
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+ == "__amdil_get_local_size_int");
+}
+
+bool
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman")) {
+ return false;
+ }
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+ == "__amdil_improved_div";
+}
+
+void
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+ funcNameIdx = CI->getNumOperands() - 1;
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+ samplerIdx = 1;
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::doInitialization(Module &M) {
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::doFinalization(Module &M) {
+ return false;
+}
+
+void
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILRegisterInfo.td b/contrib/llvm/lib/Target/R600/AMDILRegisterInfo.td
new file mode 100644
index 000000000000..b9d033432e8c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILRegisterInfo.td
@@ -0,0 +1,107 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+ field bits<16> Value;
+ let Value = num;
+ let Namespace = "AMDGPU";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
diff --git a/contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp b/contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp
new file mode 100644
index 000000000000..0d1de3d11eb4
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp
@@ -0,0 +1,48 @@
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+
+using namespace llvm;
+
+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+}
+AMDGPUSIDevice::~AMDGPUSIDevice() {
+}
+
+size_t
+AMDGPUSIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUSIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDGPUSIDevice::getDataLayout() const {
+ return std::string(
+ "e"
+ "-p:64:64:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128"
+ "-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+ "-v2048:2048:2048"
+ "-n32:64"
+ );
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDILSIDevice.h b/contrib/llvm/lib/Target/R600/AMDILSIDevice.h
new file mode 100644
index 000000000000..5b2cb2502211
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDILSIDevice.h
@@ -0,0 +1,39 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILSIDEVICE_H
+#define AMDILSIDEVICE_H
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
+/// of cards.
+class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUSIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+};
+
+} // namespace llvm
+#endif // AMDILSIDEVICE_H
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 000000000000..10547a598805
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,172 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ printInstruction(MI, OS);
+
+ printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ switch (Op.getReg()) {
+ // This is the default predicate state, so we don't need to print it.
+ case AMDGPU::PRED_SEL_OFF: break;
+ default: O << getRegisterName(Op.getReg()); break;
+ }
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ O << Op.getFPImm();
+ } else if (Op.isExpr()) {
+ const MCExpr *Exp = Op.getExpr();
+ Exp->print(O);
+ } else {
+ assert(!"unknown operand type in printOperand");
+ }
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+
+ if (Imm == 2) {
+ O << "P0";
+ } else if (Imm == 1) {
+ O << "P20";
+ } else if (Imm == 0) {
+ O << "P10";
+ } else {
+ assert(!"Invalid interpolation parameter slot");
+ }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo + 1, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, StringRef Asm) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm());
+ if (Op.getImm() == 1) {
+ O << Asm;
+ }
+}
+
+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "|");
+}
+
+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ union Literal {
+ float f;
+ int32_t i;
+ } L;
+
+ L.i = MI->getOperand(OpNo).getImm();
+ O << L.i << "(" << L.f << ")";
+}
+
+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, " *");
+}
+
+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "-");
+}
+
+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ switch (MI->getOperand(OpNo).getImm()) {
+ default: break;
+ case 1:
+ O << " * 2.0";
+ break;
+ case 2:
+ O << " * 4.0";
+ break;
+ case 3:
+ O << " / 2.0";
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "+");
+}
+
+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() == 0) {
+ O << " (MASKED)";
+ }
+}
+
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const char * chans = "XYZW";
+ int sel = MI->getOperand(OpNo).getImm();
+
+ int chan = sel & 3;
+ sel >>= 2;
+
+ if (sel >= 512) {
+ sel -= 512;
+ int cb = sel >> 12;
+ sel &= 4095;
+ O << cb << "[" << sel << "]";
+ } else if (sel >= 448) {
+ sel -= 448;
+ O << sel;
+ } else if (sel >= 0){
+ O << sel;
+ }
+
+ if (sel >= 0)
+ O << "." << chans[chan];
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 000000000000..767a7082cc2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,54 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTPRINTER_H
+#define AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ //Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUINSTRPRINTER_H
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 000000000000..98fca432670d
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ //XXX: Implement if necessary.
+ }
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ assert(!"Not implemented");
+ }
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+ AMDGPUAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual unsigned getNumFixupKinds() const { return 0; };
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return false;
+ }
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(!"Not implemented");
+ }
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return true;
+ }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+ Asm.writeSectionData(I, Layout);
+ }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+ raw_ostream &OS) const {
+ return new AMDGPUMCObjectWriter(OS);
+}
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+
+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+ assert(Fixup.getKind() == FK_PCRel_4);
+ *Dst = (Value - 4) / 4;
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..b7cdd7c8cde9
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,83 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
+ HasSingleParameterDotFile = false;
+ WeakDefDirective = 0;
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = true;
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+ LabelSuffix = ":";
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = false;
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = 0;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+ HasAggressiveSymbolFolding = true;
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = false;
+ WeakRefDirective = ".weakref\t";
+ LinkOnceDirective = 0;
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = ".offset";
+
+}
+
+const char*
+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
+ return 0;
+}
+
+const MCSection*
+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 000000000000..3ad0fa6824ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMCASMINFO_H
+#define AMDGPUMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+
+class Target;
+class StringRef;
+
+class AMDGPUMCAsmInfo : public MCAsmInfo {
+public:
+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
+ const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+};
+} // namespace llvm
+#endif // AMDGPUMCASMINFO_H
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 000000000000..cd3a7ce65aa5
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,40 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCInst;
+class MCOperand;
+
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+public:
+
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..072ee49b6311
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,113 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAMDGPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo * X = new MCSubtargetInfo();
+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
+ } else {
+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
+extern "C" void LLVMInitializeR600TargetMC() {
+
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+
+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+
+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+
+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 000000000000..363a4af3f3a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,55 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDGPUMCTARGETDESC_H
+#define AMDGPUMCTARGETDESC_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDGPUTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif // AMDGPUMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 000000000000..927bcbd8305c
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,585 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This code emitter outputs bytecode that is understood by the r600g driver
+/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
+/// but it still needs to be run through a finalizer in order to be executed
+/// by the GPU.
+///
+/// [1] http://www.mesa3d.org/
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+ R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+
+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ /// \brief Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+private:
+
+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+ raw_ostream &OS) const;
+ void EmitDst(const MCInst &MI, raw_ostream &OS) const;
+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
+
+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
+
+ void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
+
+ void Emit(uint32_t value, raw_ostream &OS) const;
+ void Emit(uint64_t value, raw_ostream &OS) const;
+
+ unsigned getHWRegChan(unsigned reg) const;
+ unsigned getHWReg(unsigned regNo) const;
+
+ bool isFCOp(unsigned opcode) const;
+ bool isTexOp(unsigned opcode) const;
+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
+
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC,
+ INSTR_NATIVE,
+ INSTR_VTX,
+ INSTR_EXPORT,
+ INSTR_CFALU
+};
+
+enum FCInstr {
+ FC_IF_PREDICATE = 0,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK_PREDICATE,
+ FC_CONTINUE
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (isFCOp(MI.getOpcode())){
+ EmitFCInstr(MI, OS);
+ } else if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::BUNDLE ||
+ MI.getOpcode() == AMDGPU::KILL) {
+ return;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDGPU::STACK_SIZE: {
+ EmitByte(MI.getOperand(0).getImm(), OS);
+ break;
+ }
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(inst, OS);
+ break;
+ }
+ case AMDGPU::CONSTANT_LOAD_eg:
+ case AMDGPU::VTX_READ_PARAM_8_eg:
+ case AMDGPU::VTX_READ_PARAM_16_eg:
+ case AMDGPU::VTX_READ_PARAM_32_eg:
+ case AMDGPU::VTX_READ_PARAM_128_eg:
+ case AMDGPU::VTX_READ_GLOBAL_8_eg:
+ case AMDGPU::VTX_READ_GLOBAL_32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF : {
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+
+ EmitByte(INSTR_VTX, OS);
+ Emit(InstWord01, OS);
+ Emit(InstWord2, OS);
+ break;
+ }
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V: {
+ unsigned Opcode = MI.getOpcode();
+ bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = HasOffsets ? 3 : 0;
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+ uint32_t SrcSelect[4] = {0, 1, 2, 3};
+ uint32_t Offsets[3] = {0, 0, 0};
+ uint64_t CoordType[4] = {1, 1, 1, 1};
+
+ if (HasOffsets)
+ for (unsigned i = 0; i < 3; i++) {
+ int SignedOffset = MI.getOperand(i + 2).getImm();
+ Offsets[i] = (SignedOffset & 0x1F);
+ }
+
+
+ if (TextureType == TEXTURE_RECT ||
+ TextureType == TEXTURE_SHADOWRECT) {
+ CoordType[ELEMENT_X] = 0;
+ CoordType[ELEMENT_Y] = 0;
+ }
+
+ if (TextureType == TEXTURE_1D_ARRAY ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+ Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ CoordType[ELEMENT_Y] = 0;
+ } else {
+ CoordType[ELEMENT_Z] = 0;
+ SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (TextureType == TEXTURE_2D_ARRAY ||
+ TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ CoordType[ELEMENT_Z] = 0;
+ }
+
+
+ if ((TextureType == TEXTURE_SHADOW1D ||
+ TextureType == TEXTURE_SHADOW2D ||
+ TextureType == TEXTURE_SHADOWRECT ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ SrcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+ CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+ CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+ uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+ SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+ SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+ Offsets[2] << 10;
+
+ EmitByte(INSTR_TEX, OS);
+ Emit(Word01, OS);
+ Emit(Word2, OS);
+ break;
+ }
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::R600_ExportBuf: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_EXPORT, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_CFALU, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_TC:
+ case AMDGPU::CF_VC:
+ case AMDGPU::CF_CALL_FS:
+ return;
+ case AMDGPU::WHILE_LOOP:
+ case AMDGPU::END_LOOP:
+ case AMDGPU::LOOP_BREAK:
+ case AMDGPU::CF_CONTINUE:
+ case AMDGPU::CF_JUMP:
+ case AMDGPU::CF_ELSE:
+ case AMDGPU::POP: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ default:
+ EmitALUInstr(MI, Fixups, OS);
+ break;
+ }
+ }
+}
+
+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+
+ // Emit instruction type
+ EmitByte(INSTR_ALU, OS);
+
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+
+ //older alu have different encoding for instructions with one or two src
+ //parameters.
+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+ !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
+ uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
+ InstWord01 &= ~(0x3FFULL << 39);
+ InstWord01 |= ISAOpCode << 1;
+ }
+
+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
+
+ EmitByte(SrcNum, OS);
+
+ const unsigned SrcOps[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL}
+ };
+
+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
+ }
+
+ Emit(InstWord01, OS);
+ return;
+}
+
+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
+ raw_ostream &OS) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ union {
+ float f;
+ uint32_t i;
+ } Value;
+ Value.i = 0;
+ // Emit the source select (2 bytes). For GPRs, this is the register index.
+ // For other potential instruction operands, (e.g. constant registers) the
+ // value of the source select is defined in the r600isa docs.
+ if (MO.isReg()) {
+ unsigned reg = MO.getReg();
+ EmitTwoBytes(getHWReg(reg), OS);
+ if (reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ Value.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ Value.i = ImmOp.getImm();
+ }
+ }
+ } else {
+ // XXX: Handle other operand types.
+ EmitTwoBytes(0, OS);
+ }
+
+ // Emit the source channel (1 byte)
+ if (MO.isReg()) {
+ EmitByte(getHWRegChan(MO.getReg()), OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit isNegated (1 byte)
+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // Emit isAbsolute (1 byte)
+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit relative addressing mode (1 byte)
+ EmitByte(0, OS);
+
+ // Emit kc_bank, This will be adjusted later by r600_asm
+ EmitByte(0, OS);
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(Value.i, OS);
+
+}
+
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+ unsigned SelOpIdx, raw_ostream &OS) const {
+ const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+ const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
+ union {
+ float f;
+ uint32_t i;
+ } InlineConstant;
+ InlineConstant.i = 0;
+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+ // and select is 0 (GPR index is encoded in the instr encoding. For constants
+ // type is 1 and select is the original const select passed from the driver.
+ unsigned Reg = RegMO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ EmitByte(1, OS);
+ uint32_t Sel = SelMO.getImm();
+ Emit(Sel, OS);
+ } else {
+ EmitByte(0, OS);
+ Emit((uint32_t)0, OS);
+ }
+
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ InlineConstant.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ InlineConstant.i = ImmOp.getImm();
+ }
+ }
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(InlineConstant.i, OS);
+}
+
+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
+
+ // Emit instruction type
+ EmitByte(INSTR_FC, OS);
+
+ // Emit SRC
+ unsigned NumOperands = MI.getNumOperands();
+ if (NumOperands > 0) {
+ assert(NumOperands == 1);
+ EmitSrc(MI, 0, OS);
+ } else {
+ EmitNullBytes(SRC_BYTE_COUNT, OS);
+ }
+
+ // Emit FC Instruction
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDGPU::PREDICATED_BREAK:
+ instr = FC_BREAK_PREDICATE;
+ break;
+ case AMDGPU::CONTINUE:
+ instr = FC_CONTINUE;
+ break;
+ case AMDGPU::IF_PREDICATE_SET:
+ instr = FC_IF_PREDICATE;
+ break;
+ case AMDGPU::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDGPU::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDGPU::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDGPU::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ EmitByte(instr, OS);
+}
+
+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
+ raw_ostream &OS) const {
+
+ for (unsigned int i = 0; i < ByteCount; i++) {
+ EmitByte(0, OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+ OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
+ raw_ostream &OS) const {
+ OS.write((uint8_t) (Bytes & 0xff));
+ OS.write((uint8_t) ((Bytes >> 8) & 0xff));
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 4; i++) {
+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
+ }
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 8; i++) {
+ EmitByte((Value >> (8 * i)) & 0xff, OS);
+ }
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+ return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+ return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ if (MO.isReg()) {
+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+ return MRI.getEncodingValue(MO.getReg());
+ } else {
+ return getHWReg(MO.getReg());
+ }
+ } else if (MO.isImm()) {
+ return MO.getImm();
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::PREDICATED_BREAK:
+ case AMDGPU::CONTINUE:
+ case AMDGPU::IF_PREDICATE_SET:
+ case AMDGPU::ELSE:
+ case AMDGPU::ENDIF:
+ case AMDGPU::ENDLOOP:
+ case AMDGPU::WHILELOOP:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
+ unsigned Flag) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
+ if (FlagIndex == 0) {
+ return false;
+ }
+ assert(MI.getOperand(FlagIndex).isImm());
+ return !!((MI.getOperand(FlagIndex).getImm() >>
+ (NUM_MO_FLAGS * Operand)) & Flag);
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 000000000000..5af83209a0d5
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,201 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The SI code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Helper type used in encoding
+typedef union {
+ int32_t I;
+ float F;
+} IntFloatUnion;
+
+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
+ SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+
+ /// \brief Can this operand also contain immediate values?
+ bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
+
+ /// \brief Encode an fp or int literal
+ uint32_t getLitEncoding(const MCOperand &MO) const;
+
+public:
+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri) { }
+
+ ~SIMCCodeEmitter() { }
+
+ /// \breif Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
+ unsigned OpNo) const {
+
+ unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
+ return (AMDGPU::SSrc_32RegClassID == RegClass) ||
+ (AMDGPU::SSrc_64RegClassID == RegClass) ||
+ (AMDGPU::VSrc_32RegClassID == RegClass) ||
+ (AMDGPU::VSrc_64RegClassID == RegClass);
+}
+
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
+
+ IntFloatUnion Imm;
+ if (MO.isImm())
+ Imm.I = MO.getImm();
+ else if (MO.isFPImm())
+ Imm.F = MO.getFPImm();
+ else
+ return ~0;
+
+ if (Imm.I >= 0 && Imm.I <= 64)
+ return 128 + Imm.I;
+
+ if (Imm.I >= -16 && Imm.I <= -1)
+ return 192 + abs(Imm.I);
+
+ if (Imm.F == 0.5f)
+ return 240;
+
+ if (Imm.F == -0.5f)
+ return 241;
+
+ if (Imm.F == 1.0f)
+ return 242;
+
+ if (Imm.F == -1.0f)
+ return 243;
+
+ if (Imm.F == 2.0f)
+ return 244;
+
+ if (Imm.F == -2.0f)
+ return 245;
+
+ if (Imm.F == 4.0f)
+ return 246;
+
+ if (Imm.F == -4.0f)
+ return 247;
+
+ return 255;
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ unsigned bytes = Desc.getSize();
+
+ for (unsigned i = 0; i < bytes; i++) {
+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+ }
+
+ if (bytes > 4)
+ return;
+
+ // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+ for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+
+ // Check if this operand should be encoded as [SV]Src
+ if (!isSrcOperand(Desc, i))
+ continue;
+
+ // Is this operand a literal immediate?
+ const MCOperand &Op = MI.getOperand(i);
+ if (getLitEncoding(Op) != 255)
+ continue;
+
+ // Yes! Encode it
+ IntFloatUnion Imm;
+ if (Op.isImm())
+ Imm.I = Op.getImm();
+ else
+ Imm.F = Op.getFPImm();
+
+ for (unsigned j = 0; j < 4; j++) {
+ OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+ }
+
+ // Only one literal value allowed
+ break;
+ }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg())
+ return MRI.getEncodingValue(MO.getReg());
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ // Figure out the operand number, needed for isSrcOperand check
+ unsigned OpNo = 0;
+ for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
+ if (&MO == &MI.getOperand(OpNo))
+ break;
+ }
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (isSrcOperand(Desc, OpNo)) {
+ uint32_t Enc = getLitEncoding(MO);
+ if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+ return Enc;
+
+ } else if (MO.isImm())
+ return MO.getImm();
+
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ return 0;
+}
+
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
new file mode 100644
index 000000000000..868810c613b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/Processors.td
@@ -0,0 +1,30 @@
+//===-- Processors.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDIL processors supported.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"rv710", R600_EG_Itin, []>;
+def : Proc<"rv730", R600_EG_Itin, []>;
+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
+
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 000000000000..3a6c7eac730f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,268 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600cf"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+ unsigned MaxFetchInst;
+
+ bool isFetch(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned CfAddress) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isFetch(I))
+ break;
+ AluInstCount ++;
+ if (AluInstCount > MaxFetchInst)
+ break;
+ }
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ TII->get(AMDGPU::CF_TC))
+ .addImm(CfAddress) // ADDR
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+ void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+ MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+ }
+ void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+ const {
+ for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+ It != E; ++It) {
+ MachineInstr *MI = *It;
+ CounterPropagateAddr(MI, Addr);
+ }
+ }
+
+public:
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ MaxFetchInst = 8;
+ else
+ MaxFetchInst = 16;
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ unsigned MaxStack = 0;
+ unsigned CurrentStack = 0;
+ for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+ ++MB) {
+ MachineBasicBlock &MBB = *MB;
+ unsigned CfCount = 0;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<MachineInstr * > IfThenElseStack;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ if (MFI->ShaderType == 1) {
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::CF_CALL_FS));
+ CfCount++;
+ }
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (isFetch(I)) {
+ DEBUG(dbgs() << CfCount << ":"; I->dump(););
+ I = MakeFetchClause(MBB, I, 0);
+ CfCount++;
+ continue;
+ }
+
+ MachineBasicBlock::iterator MI = I;
+ I++;
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ case AMDGPU::CF_ALU:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportBuf:
+ case AMDGPU::R600_ExportSwz:
+ DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ CfCount++;
+ break;
+ case AMDGPU::WHILELOOP: {
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::WHILE_LOOP))
+ .addImm(2);
+ std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::set<MachineInstr *>());
+ Pair.second.insert(MIb);
+ LoopStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDLOOP: {
+ CurrentStack--;
+ std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ LoopStack.back();
+ LoopStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ .addImm(Pair.first + 1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::IF_PREDICATE_SET: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_JUMP))
+ .addImm(0)
+ .addImm(0);
+ IfThenElseStack.push_back(MIb);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ELSE: {
+ MachineInstr * JumpInst = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(JumpInst, CfCount);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_ELSE))
+ .addImm(0)
+ .addImm(1);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ IfThenElseStack.push_back(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDIF: {
+ CurrentStack--;
+ MachineInstr *IfOrElseInst = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(IfOrElseInst, CfCount + 1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::PREDICATED_BREAK: {
+ CurrentStack--;
+ CfCount += 3;
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ .addImm(CfCount)
+ .addImm(1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::LOOP_BREAK))
+ .addImm(0);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount)
+ .addImm(1);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::CONTINUE: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_CONTINUE))
+ .addImm(0);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::STACK_SIZE))
+ .addImm(MaxStack);
+ }
+
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Control Flow Finalizer Pass";
+ }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+ return new R600ControlFlowFinalizer(TM);
+}
+
diff --git a/contrib/llvm/lib/Target/R600/R600Defines.h b/contrib/llvm/lib/Target/R600/R600Defines.h
new file mode 100644
index 000000000000..16cfcf59eb3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Defines.h
@@ -0,0 +1,97 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600DEFINES_H_
+#define R600DEFINES_H_
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+#define MO_FLAG_PUSH (1 << 4)
+#define MO_FLAG_NOT_LAST (1 << 5)
+#define MO_FLAG_LAST (1 << 6)
+#define NUM_MO_FLAGS 7
+
+/// \brief Helper for getting the operand index for the instruction flags
+/// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5),
+ VECTOR = (1 << 6),
+ //FlagOperand bits 7, 8
+ NATIVE_OPERANDS = (1 << 9),
+ OP1 = (1 << 10),
+ OP2 = (1 << 11)
+ };
+}
+
+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
+
+/// \brief Defines for extracting register infomation from register encoding
+#define HW_REG_MASK 0x1ff
+#define HW_CHAN_SHIFT 9
+
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
+namespace R600Operands {
+ enum Ops {
+ DST,
+ UPDATE_EXEC_MASK,
+ UPDATE_PREDICATE,
+ WRITE,
+ OMOD,
+ DST_REL,
+ CLAMP,
+ SRC0,
+ SRC0_NEG,
+ SRC0_REL,
+ SRC0_ABS,
+ SRC0_SEL,
+ SRC1,
+ SRC1_NEG,
+ SRC1_REL,
+ SRC1_ABS,
+ SRC1_SEL,
+ SRC2,
+ SRC2_NEG,
+ SRC2_REL,
+ SRC2_SEL,
+ LAST,
+ PRED_SEL,
+ IMM,
+ COUNT
+ };
+
+ const static int ALUOpTable[3][R600Operands::COUNT] = {
+// W C S S S S S S S S S S S
+// R O D L S R R R R S R R R R S R R R L P
+// D U I M R A R C C C C R C C C C R C C C A R I
+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+ };
+
+}
+
+#endif // R600DEFINES_H_
diff --git a/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000000000000..3fdc678b9ef1
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,255 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ unsigned OccupiedDwords(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return 4;
+ case AMDGPU::KILL:
+ return 0;
+ default:
+ break;
+ }
+
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return 4;
+
+ unsigned NumLiteral = 0;
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++NumLiteral;
+ }
+ return 1 + NumLiteral;
+ }
+
+ bool isALU(const MachineInstr *MI) const {
+ if (TII->isALUInstr(MI->getOpcode()))
+ return true;
+ if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // Register Idx, then Const value
+ std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
+ const {
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+ std::vector<std::pair<unsigned, unsigned> > Result;
+
+ if (!TII->isALUInstr(MI->getOpcode()))
+ return Result;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
+ }
+ }
+ return Result;
+ }
+
+ std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+ // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+ // (See also R600ISelLowering.cpp)
+ // ConstIndex value is in [0, 4095];
+ return std::pair<unsigned, unsigned>(
+ ((Sel >> 2) - 512) >> 12, // KC_BANK
+ // Line Number of ConstIndex
+ // A line contains 16 constant registers however KCX bank can lock
+ // two line at the same time ; thus we want to get an even line number.
+ // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+ // an even number.
+ ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+ }
+
+ bool SubstituteKCacheBank(MachineInstr *MI,
+ std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+ std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+ std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
+ assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned Sel = Consts[i].second;
+ unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+ unsigned KCacheIndex = Index * 4 + Chan;
+ const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+ if (CachedConsts.empty()) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[0] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts.size() == 1) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[1] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ return false;
+ }
+
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ switch(UsedKCache[i].first) {
+ case 0:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
+ break;
+ case 1:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
+ break;
+ default:
+ llvm_unreachable("Wrong Cache Line");
+ }
+ }
+ return true;
+ }
+
+ MachineBasicBlock::iterator
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+ bool PushBeforeModifier = false;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isALU(I))
+ break;
+ if (AluInstCount > TII->getMaxAlusPerClause())
+ break;
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+ PushBeforeModifier = true;
+ AluInstCount ++;
+ continue;
+ }
+ if (I->getOpcode() == AMDGPU::KILLGT) {
+ I++;
+ break;
+ }
+ if (TII->isALUInstr(I->getOpcode()) &&
+ !SubstituteKCacheBank(I, KCacheBanks))
+ break;
+ AluInstCount += OccupiedDwords(I);
+ }
+ unsigned Opcode = PushBeforeModifier ?
+ AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+ .addImm(0) // ADDR
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+ .addImm(KCacheBanks.empty()?0:2) // KM0
+ .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+
+public:
+ R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ if (I->getOpcode() == AMDGPU::CF_ALU)
+ continue; // BB was already parsed
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+ if (isALU(I))
+ I = MakeALUClause(MBB, I);
+ else
+ ++I;
+ }
+ }
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Emit Clause Markers Pass";
+ }
+};
+
+char R600EmitClauseMarkersPass::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {
+ return new R600EmitClauseMarkersPass(TM);
+}
+
diff --git a/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 000000000000..f8c900f72776
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,297 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
+/// group to work correctly. This pass expands these individual instructions
+/// into several instructions that will completely fill the instruction group.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ bool ExpandInputPerspective(MachineInstr& MI);
+ bool ExpandInputConstant(MachineInstr& MI);
+
+public:
+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "R600 Expand special instructions pass";
+ }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+ return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+
+ switch (MI.getOpcode()) {
+ default: break;
+ // Expand PRED_X to one of the PRED_SET instructions.
+ case AMDGPU::PRED_X: {
+ uint64_t Flags = MI.getOperand(3).getImm();
+ // The native opcode used by PRED_X is stored as an immediate in the
+ // third operand.
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ MI.getOperand(2).getImm(), // opcode
+ MI.getOperand(0).getReg(), // dst
+ MI.getOperand(1).getReg(), // src0
+ AMDGPU::ZERO); // src1
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ if (Flags & MO_FLAG_PUSH) {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+ } else {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
+ }
+ MI.eraseFromParent();
+ continue;
+ }
+ case AMDGPU::BREAK: {
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ AMDGPU::PRED_SETE_INT,
+ AMDGPU::PREDICATE_BIT,
+ AMDGPU::ZERO,
+ AMDGPU::ZERO);
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::PREDICATED_BREAK))
+ .addReg(AMDGPU::PREDICATE_BIT);
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_PAIR_XY: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = MI.getOperand(Chan).getReg();
+ else
+ DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan >= 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_PAIR_ZW: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
+ else
+ DstReg = MI.getOperand(Chan-2).getReg();
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan < 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_VEC_LOAD: {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(1).getImm());
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
+ TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+ }
+
+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
+ bool IsVector = TII->isVector(MI);
+ bool IsCube = TII->isCubeOp(MI.getOpcode());
+ if (!IsReduction && !IsVector && !IsCube) {
+ continue;
+ }
+
+ // Expand the instruction
+ //
+ // Reduction instructions:
+ // T0_X = DP4 T1_XYZW, T2_XYZW
+ // becomes:
+ // TO_X = DP4 T1_X, T2_X
+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
+ // TO_W (write masked) = DP4 T1_W, T2_W
+ //
+ // Vector instructions:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // becomes:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
+ //
+ // Cube instructions:
+ // T0_XYZW = CUBE T1_XYZW
+ // becomes:
+ // TO_X = CUBE T1_Z, T1_Y
+ // T0_Y = CUBE T1_Z, T1_X
+ // T0_Z = CUBE T1_X, T1_Z
+ // T0_W = CUBE T1_Y, T1_Z
+ for (unsigned Chan = 0; Chan < 4; Chan++) {
+ unsigned DstReg = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::DST)).getReg();
+ unsigned Src0 = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
+ unsigned Src1 = 0;
+
+ // Determine the correct source registers
+ if (!IsCube) {
+ int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
+ if (Src1Idx != -1) {
+ Src1 = MI.getOperand(Src1Idx).getReg();
+ }
+ }
+ if (IsReduction) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ } else if (IsCube) {
+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+ }
+
+ // Determine the correct destination registers;
+ bool Mask = false;
+ bool NotLast = true;
+ if (IsCube) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+ } else {
+ // Mask the write if the original instruction does not write to
+ // the current Channel.
+ Mask = (Chan != TRI.getHWRegChan(DstReg));
+ unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ }
+
+ // Set the IsLast bit
+ NotLast = (Chan != 3 );
+
+ // Add the new instruction
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::CUBE_r600_pseudo:
+ Opcode = AMDGPU::CUBE_r600_real;
+ break;
+ case AMDGPU::CUBE_eg_pseudo:
+ Opcode = AMDGPU::CUBE_eg_real;
+ break;
+ case AMDGPU::DOT4_r600_pseudo:
+ Opcode = AMDGPU::DOT4_r600_real;
+ break;
+ case AMDGPU::DOT4_eg_pseudo:
+ Opcode = AMDGPU::DOT4_eg_real;
+ break;
+ default:
+ break;
+ }
+
+ MachineInstr *NewMI =
+ TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
+
+ if (Chan != 0)
+ NewMI->bundleWithPred();
+ if (Mask) {
+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+ }
+ if (NotLast) {
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
new file mode 100644
index 000000000000..53e6e51dd2b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
@@ -0,0 +1,1106 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for R600
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+ computeRegisterProperties();
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Expand);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::ADD, MVT::v4i32, Expand);
+ setOperationAction(ISD::AND, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
+ setOperationAction(ISD::UREM, MVT::v4i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+
+ setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+
+ // Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+ setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setSchedulingPreference(Sched::VLIW);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+ MachineFunction * MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock::iterator I = *MI;
+
+ switch (MI->getOpcode()) {
+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::CLAMP_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+ break;
+ }
+
+ case AMDGPU::FABS_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_ABS);
+ break;
+ }
+
+ case AMDGPU::FNEG_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_NEG);
+ break;
+ }
+
+ case AMDGPU::MASK_WRITE: {
+ unsigned maskedRegister = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+ TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+ break;
+ }
+
+ case AMDGPU::MOV_IMM_F32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getFPImm()->getValueAPF()
+ .bitcastToAPInt().getZExtValue());
+ break;
+ case AMDGPU::MOV_IMM_I32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+ break;
+ case AMDGPU::CONST_COPY: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+ MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+ MI->getOperand(1).getImm());
+ break;
+ }
+
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(EOP); // Set End of program bit
+ break;
+ }
+
+ case AMDGPU::TXD: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::TXD_SHADOW: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0));
+ break;
+
+ case AMDGPU::BRANCH_COND_f32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::BRANCH_COND_i32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO_INT)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz: {
+ // Instruction is left unmodified if its not the last one of its type
+ bool isLastInstructionOfItsType = true;
+ unsigned InstExportType = MI->getOperand(1).getImm();
+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ EndBlock = BB->end(); NextExportInst != EndBlock;
+ NextExportInst = llvm::next(NextExportInst)) {
+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+ .getImm();
+ if (CurrentInstExportType == InstExportType) {
+ isLastInstructionOfItsType = false;
+ break;
+ }
+ }
+ }
+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+ if (!EOP && !isLastInstructionOfItsType)
+ return BB;
+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addImm(CfInst)
+ .addImm(EOP);
+ break;
+ }
+ case AMDGPU::RETURN: {
+ // RETURN instructions must have the live-out registers as implicit uses,
+ // otherwise they appear dead.
+ R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ MachineInstrBuilder MIB(*MF, MI);
+ for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
+ MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
+ return BB;
+ }
+ }
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+using namespace llvm::Intrinsic;
+using namespace llvm::AMDGPUIntrinsic;
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
+ case ISD::INTRINSIC_VOID: {
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::AMDGPU_store_output: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ MFI->LiveOuts.push_back(Reg);
+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ }
+ case AMDGPUIntrinsic::R600_store_swizzle: {
+ const SDValue Args[8] = {
+ Chain,
+ Op.getOperand(2), // Export Value
+ Op.getOperand(3), // ArrayBase
+ Op.getOperand(4), // Type
+ DAG.getConstant(0, MVT::i32), // SWZ_X
+ DAG.getConstant(1, MVT::i32), // SWZ_Y
+ DAG.getConstant(2, MVT::i32), // SWZ_Z
+ DAG.getConstant(3, MVT::i32) // SWZ_W
+ };
+ return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
+ Args, 8);
+ }
+
+ // default for switch(IntrinsicID)
+ default: break;
+ }
+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ switch(IntrinsicID) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case AMDGPUIntrinsic::R600_load_input: {
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ }
+
+ case AMDGPUIntrinsic::R600_interp_input: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+ MachineSDNode *interp;
+ if (ijb < 0) {
+ interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
+ MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+ return DAG.getTargetExtractSubreg(
+ TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
+ DL, MVT::f32, SDValue(interp, 0));
+ }
+
+ if (slot % 4 < 2)
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+ else
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+
+ return SDValue(interp, slot % 2);
+ }
+
+ case r600_read_ngroups_x:
+ return LowerImplicitParameter(DAG, VT, DL, 0);
+ case r600_read_ngroups_y:
+ return LowerImplicitParameter(DAG, VT, DL, 1);
+ case r600_read_ngroups_z:
+ return LowerImplicitParameter(DAG, VT, DL, 2);
+ case r600_read_global_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 3);
+ case r600_read_global_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 4);
+ case r600_read_global_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 5);
+ case r600_read_local_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 6);
+ case r600_read_local_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 7);
+ case r600_read_local_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 8);
+
+ case r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_X, VT);
+ case r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Y, VT);
+ case r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Z, VT);
+ case r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_X, VT);
+ case r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Y, VT);
+ case r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Z, VT);
+ }
+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+ break;
+ }
+ } // end switch(Op.getOpcode())
+ return SDValue();
+}
+
+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default: return;
+ case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE:
+ SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ return;
+ }
+}
+
+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(
+ ISD::SETCC,
+ Op.getDebugLoc(),
+ MVT::i1,
+ Op, DAG.getConstantFP(0.0f, MVT::f32),
+ DAG.getCondCode(ISD::SETNE)
+ );
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL,
+ unsigned DwordOffset) const {
+ unsigned ByteOffset = DwordOffset * 4;
+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+
+ // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+ assert(isInt<16>(ByteOffset));
+
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getConstant(ByteOffset, MVT::i32), // PTR
+ MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+ false, false, false, 0);
+}
+
+SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+ Op.getOperand(0),
+ Op.getOperand(0),
+ DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(32, MVT::i32),
+ Op.getOperand(1)));
+}
+
+bool R600TargetLowering::isZero(SDValue Op) const {
+ if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ return Cst->isNullValue();
+ } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+ return CstFP->isZero();
+ } else {
+ return false;
+ }
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ SDValue Temp;
+
+ // LHS and RHS are guaranteed to be the same value type
+ EVT CompareVT = LHS.getValueType();
+
+ // Check if we can lower this to a native operation.
+
+ // Try to lower to a SET* instruction:
+ //
+ // SET* can match the following patterns:
+ //
+ // select_cc f32, f32, -1, 0, cc_any
+ // select_cc f32, f32, 1.0f, 0.0f, cc_any
+ // select_cc i32, i32, -1, 0, cc_any
+ //
+
+ // Move hardware True/False values to the correct operand.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ std::swap(False, True);
+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ }
+
+ if (isHWTrueValue(True) && isHWFalseValue(False) &&
+ (CompareVT == VT || VT == MVT::i32)) {
+ // This can be matched by a SET* instruction.
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
+ // Try to lower to a CND* instruction:
+ //
+ // CND* can match the following patterns:
+ //
+ // select_cc f32, 0.0, f32, f32, cc_any
+ // select_cc f32, 0.0, i32, i32, cc_any
+ // select_cc i32, 0, f32, f32, cc_any
+ // select_cc i32, 0, i32, i32, cc_any
+ //
+ if (isZero(LHS) || isZero(RHS)) {
+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
+ SDValue Zero = (isZero(LHS) ? LHS : RHS);
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ if (CompareVT != VT) {
+ // Bitcast True / False to the correct types. This will end up being
+ // a nop, but it allows us to define only a single pattern in the
+ // .TD files for each CND* instruction rather than having to have
+ // one pattern for integer True/False and one for fp True/False
+ True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+ False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+ }
+ if (isZero(LHS)) {
+ CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
+ }
+
+ switch (CCOpcode) {
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT:
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ Temp = True;
+ True = False;
+ False = Temp;
+ break;
+ default:
+ break;
+ }
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ Cond, Zero,
+ True, False,
+ DAG.getCondCode(CCOpcode));
+ return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
+ }
+
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ // If we make it this for it means we have no native instructions to handle
+ // this SELECT_CC, so we must lower it.
+ SDValue HWTrue, HWFalse;
+
+ if (CompareVT == MVT::f32) {
+ HWTrue = DAG.getConstantFP(1.0f, CompareVT);
+ HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+ } else if (CompareVT == MVT::i32) {
+ HWTrue = DAG.getConstant(-1, CompareVT);
+ HWFalse = DAG.getConstant(0, CompareVT);
+ }
+ else {
+ assert(!"Unhandled value type in LowerSELECT_CC");
+ }
+
+ // Lower this unsupported SELECT_CC into a combination of two supported
+ // SELECT_CC operations.
+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
+
+ return DAG.getNode(ISD::SELECT_CC, DL, VT,
+ Cond, HWFalse,
+ True, False,
+ DAG.getCondCode(ISD::SETNE));
+}
+
+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getCondCode(ISD::SETNE));
+}
+
+/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
+/// convert these pointers to a register index. Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+ unsigned StackWidth,
+ SelectionDAG &DAG) const {
+ unsigned SRLPad;
+ switch(StackWidth) {
+ case 1:
+ SRLPad = 2;
+ break;
+ case 2:
+ SRLPad = 3;
+ break;
+ case 4:
+ SRLPad = 4;
+ break;
+ default: llvm_unreachable("Invalid stack width");
+ }
+
+ return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(SRLPad, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+ unsigned ElemIdx,
+ unsigned &Channel,
+ unsigned &PtrIncr) const {
+ switch (StackWidth) {
+ default:
+ case 1:
+ Channel = 0;
+ if (ElemIdx > 0) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 2:
+ Channel = ElemIdx % 2;
+ if (ElemIdx == 2) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 4:
+ Channel = ElemIdx;
+ PtrIncr = 0;
+ break;
+ }
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
+ }
+
+ EVT ValueVT = Value.getValueType();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (ValueVT.isVector()) {
+ unsigned NumElemVT = ValueVT.getVectorNumElements();
+ EVT ElemVT = ValueVT.getVectorElementType();
+ SDValue Stores[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+ Value, DAG.getConstant(i, MVT::i32));
+
+ Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Elem, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32));
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+ } else {
+ if (ValueVT == MVT::i8) {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+ }
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ return Chain;
+}
+
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+ switch (AddressSpace) {
+ case AMDGPUAS::CONSTANT_BUFFER_0:
+ return 512;
+ case AMDGPUAS::CONSTANT_BUFFER_1:
+ return 512 + 4096;
+ case AMDGPUAS::CONSTANT_BUFFER_2:
+ return 512 + 4096 * 2;
+ case AMDGPUAS::CONSTANT_BUFFER_3:
+ return 512 + 4096 * 3;
+ case AMDGPUAS::CONSTANT_BUFFER_4:
+ return 512 + 4096 * 4;
+ case AMDGPUAS::CONSTANT_BUFFER_5:
+ return 512 + 4096 * 5;
+ case AMDGPUAS::CONSTANT_BUFFER_6:
+ return 512 + 4096 * 6;
+ case AMDGPUAS::CONSTANT_BUFFER_7:
+ return 512 + 4096 * 7;
+ case AMDGPUAS::CONSTANT_BUFFER_8:
+ return 512 + 4096 * 8;
+ case AMDGPUAS::CONSTANT_BUFFER_9:
+ return 512 + 4096 * 9;
+ case AMDGPUAS::CONSTANT_BUFFER_10:
+ return 512 + 4096 * 10;
+ case AMDGPUAS::CONSTANT_BUFFER_11:
+ return 512 + 4096 * 11;
+ case AMDGPUAS::CONSTANT_BUFFER_12:
+ return 512 + 4096 * 12;
+ case AMDGPUAS::CONSTANT_BUFFER_13:
+ return 512 + 4096 * 13;
+ case AMDGPUAS::CONSTANT_BUFFER_14:
+ return 512 + 4096 * 14;
+ case AMDGPUAS::CONSTANT_BUFFER_15:
+ return 512 + 4096 * 15;
+ default:
+ return -1;
+ }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ if (ConstantBlock > -1) {
+ SDValue Result;
+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+ dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+ dyn_cast<ConstantSDNode>(Ptr)) {
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+ } else {
+ // non constant ptr cant be folded, keeps it as a v4f32 load
+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+ DAG.getConstant(LoadNode->getAddressSpace() -
+ AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+ );
+ }
+
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (VT.isVector()) {
+ unsigned NumElemVT = VT.getVectorNumElements();
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Loads[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+ Chain, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32),
+ Op.getOperand(2));
+ }
+ for (unsigned i = NumElemVT; i < 4; ++i) {
+ Loads[i] = DAG.getUNDEF(ElemVT);
+ }
+ EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+ } else {
+ LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32), // Channel
+ Op.getOperand(2));
+ }
+
+ SDValue Ops[2];
+ Ops[0] = LoweredLoad;
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+/// XXX Only kernel functions are supported, so we can assume for now that
+/// every function is a kernel function, but in the future we should use
+/// separate calling conventions for kernel and non-kernel functions.
+SDValue R600TargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ unsigned ParamOffsetBytes = 36;
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
+ EVT VT = Ins[i].VT;
+ Type *ArgType = FuncArg->getType();
+ unsigned ArgSizeInBits = ArgType->isPointerTy() ?
+ 32 : ArgType->getPrimitiveSizeInBits();
+ unsigned ArgBytes = ArgSizeInBits >> 3;
+ EVT ArgVT;
+ if (ArgSizeInBits < VT.getSizeInBits()) {
+ assert(!ArgType->isFloatTy() &&
+ "Extending floating point arguments not supported yet");
+ ArgVT = MVT::getIntegerVT(ArgSizeInBits);
+ } else {
+ ArgVT = VT;
+ }
+ PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+ SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
+ DAG.getConstant(ParamOffsetBytes, MVT::i32),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
+ ArgVT, false, false, ArgBytes);
+ InVals.push_back(Arg);
+ ParamOffsetBytes += ArgBytes;
+ }
+ return Chain;
+}
+
+EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
+ case ISD::FP_ROUND: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+ Arg.getOperand(0));
+ }
+ break;
+ }
+
+ // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+ // (i32 select_cc f32, f32, -1, 0 cc)
+ //
+ // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+ // this to one of the SET*_DX10 instructions.
+ case ISD::FP_TO_SINT: {
+ SDValue FNeg = N->getOperand(0);
+ if (FNeg.getOpcode() != ISD::FNEG) {
+ return SDValue();
+ }
+ SDValue SelectCC = FNeg.getOperand(0);
+ if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+ SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+ SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+ !isHWTrueValue(SelectCC.getOperand(2)) ||
+ !isHWFalseValue(SelectCC.getOperand(3))) {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+ SelectCC.getOperand(0), // LHS
+ SelectCC.getOperand(1), // RHS
+ DAG.getConstant(-1, MVT::i32), // True
+ DAG.getConstant(0, MVT::i32), // Flase
+ SelectCC.getOperand(4)); // CC
+
+ break;
+ }
+ // Extract_vec (Build_vector) generated by custom lowering
+ // also needs to be customly combined
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return Arg->getOperand(Element);
+ }
+ }
+ if (Arg.getOpcode() == ISD::BITCAST &&
+ Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+ Arg->getOperand(0).getOperand(Element));
+ }
+ }
+ }
+
+ case ISD::SELECT_CC: {
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+ // selectcc x, y, a, b, inv(cc)
+ //
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+ // selectcc x, y, a, b, cc
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::SELECT_CC) {
+ return SDValue();
+ }
+
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+ ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ if (LHS.getOperand(2).getNode() != True.getNode() ||
+ LHS.getOperand(3).getNode() != False.getNode() ||
+ RHS.getNode() != False.getNode()) {
+ return SDValue();
+ }
+
+ switch (NCC) {
+ default: return SDValue();
+ case ISD::SETNE: return LHS;
+ case ISD::SETEQ: {
+ ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+ LHSCC = ISD::getSetCCInverse(LHSCC,
+ LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(N->getDebugLoc(),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
+ }
+ }
+ }
+ case AMDGPUISD::EXPORT: {
+ SDValue Arg = N->getOperand(1);
+ if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+ break;
+ SDValue NewBldVec[4] = {
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32)
+ };
+ SDValue NewArgs[8] = {
+ N->getOperand(0), // Chain
+ SDValue(),
+ N->getOperand(2), // ArrayBase
+ N->getOperand(3), // Type
+ N->getOperand(4), // SWZ_X
+ N->getOperand(5), // SWZ_Y
+ N->getOperand(6), // SWZ_Z
+ N->getOperand(7) // SWZ_W
+ };
+ for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
+ if (C->isZero()) {
+ NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+ } else if (C->isExactlyValue(1.0)) {
+ NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ }
+ DebugLoc DL = N->getDebugLoc();
+ NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+ }
+ }
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.h b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
new file mode 100644
index 000000000000..2c09acb9af30
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
@@ -0,0 +1,74 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering {
+public:
+ R600TargetLowering(TargetMachine &TM);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+private:
+ const R600InstrInfo * TII;
+
+ /// Each OpenCL kernel has nine implicit parameters that are stored in the
+ /// first nine dwords of a Vertex Buffer. These implicit parameters are
+ /// lowered to load instructions which retreive the values from the Vertex
+ /// Buffer.
+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL, unsigned DwordOffset) const;
+
+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+ /// \brief Lower ROTL opcode to BITALIGN
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+ SelectionDAG &DAG) const;
+ void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+ unsigned &Channel, unsigned &PtrIncr) const;
+ bool isZero(SDValue Op) const;
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
new file mode 100644
index 000000000000..b232188a2641
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -0,0 +1,841 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ for (unsigned I = 0; I < 4; I++) {
+ unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+ buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ RI.getSubReg(DestReg, SubRegIndex),
+ RI.getSubReg(SrcReg, SubRegIndex))
+ .addReg(DestReg,
+ RegState::Define | RegState::Implicit);
+ }
+ } else {
+
+ // We can't copy vec4 registers
+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
+
+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ DestReg, SrcReg);
+ NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
+ .setIsKill(KillSrc);
+ }
+}
+
+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
+ unsigned DstReg, int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addReg(AMDGPU::ALU_LITERAL_X);
+ MIB.addImm(Imm);
+ MIB.addReg(0); // PREDICATE_BIT
+
+ return MI;
+}
+
+unsigned R600InstrInfo::getIEQOpcode() const {
+ return AMDGPU::SETE_INT;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const {
+
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::MOV:
+ case AMDGPU::MOV_IMM_F32:
+ case AMDGPU::MOV_IMM_I32:
+ return true;
+ }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return false;
+ case AMDGPU::RETURN:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DOT4_r600_pseudo:
+ case AMDGPU::DOT4_eg_pseudo:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CUBE_r600_pseudo:
+ case AMDGPU::CUBE_r600_real:
+ case AMDGPU::CUBE_eg_pseudo:
+ case AMDGPU::CUBE_eg_real:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::OP1) |
+ (TargetFlags & R600_InstFlag::OP2) |
+ (TargetFlags & R600_InstFlag::OP3));
+}
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+static
+bool isJump(unsigned Opcode) {
+ return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
+ if (LastOpc == AMDGPU::JUMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastOpc == AMDGPU::JUMP_COND) {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
+ case AMDGPU::JUMP:
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
+ case AMDGPU::JUMP:
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
+ int idx = MI->findFirstPredOperandIdx();
+ if (idx < 0)
+ return false;
+
+ unsigned Reg = MI->getOperand(idx).getReg();
+ switch (Reg) {
+ default: return false;
+ case AMDGPU::PRED_SEL_ONE:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PREDICATE_BIT:
+ return true;
+ }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const {
+ // XXX: KILL* instructions can be predicated, but they must be the last
+ // instruction in a clause, so this means any instructions after them cannot
+ // be predicated. Until we have proper support for instruction clauses in the
+ // backend, we will mark KILL* instructions as unpredicable.
+
+ if (MI->getOpcode() == AMDGPU::KILLGT) {
+ return false;
+ } else if (isVector(*MI)) {
+ return false;
+ } else {
+ return AMDGPUInstrInfo::isPredicable(MI);
+ }
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ const BranchProbability &Probability)
+ const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ MachineOperand &MO = Cond[1];
+ switch (MO.getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+ break;
+ case OPCODE_IS_NOT_ZERO_INT:
+ MO.setImm(OPCODE_IS_ZERO_INT);
+ break;
+ case OPCODE_IS_ZERO:
+ MO.setImm(OPCODE_IS_NOT_ZERO);
+ break;
+ case OPCODE_IS_NOT_ZERO:
+ MO.setImm(OPCODE_IS_ZERO);
+ break;
+ default:
+ return true;
+ }
+
+ MachineOperand &MO2 = Cond[2];
+ switch (MO2.getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (PredCost)
+ *PredCost = 2;
+ return 2;
+}
+
+int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = 0;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ Offset = std::max(Offset,
+ GET_REG_INDEX(RI.getEncodingValue(LI->first)));
+ }
+
+ return Offset + 1;
+}
+
+int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
+std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+ const MachineFunction &MF) const {
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+ std::vector<unsigned> Regs;
+
+ unsigned StackWidth = TFL->getStackWidth(MF);
+ int End = getIndirectIndexEnd(MF);
+
+ if (End == -1) {
+ return Regs;
+ }
+
+ for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+ unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+ Regs.push_back(SuperReg);
+ for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+ Regs.push_back(Reg);
+ }
+ }
+ return Regs;
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ // XXX: Remove when we support a stack width > 2
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ return &AMDGPU::R600_TReg32RegClass;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
+ return &AMDGPU::TRegMemRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X, OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ AddrReg, ValueReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::DST_REL, 1);
+ return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X,
+ OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ ValueReg,
+ AddrReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+
+ return Mov;
+}
+
+const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
+ return &AMDGPU::IndirectRegRegClass;
+}
+
+unsigned R600InstrInfo::getMaxAlusPerClause() const {
+ return 115;
+}
+
+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg) const {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
+ DstReg); // $dst
+
+ if (Src1Reg) {
+ MIB.addImm(0) // $update_exec_mask
+ .addImm(0); // $update_predicate
+ }
+ MIB.addImm(1) // $write
+ .addImm(0) // $omod
+ .addImm(0) // $dst_rel
+ .addImm(0) // $dst_clamp
+ .addReg(Src0Reg) // $src0
+ .addImm(0) // $src0_neg
+ .addImm(0) // $src0_rel
+ .addImm(0) // $src0_abs
+ .addImm(-1); // $src0_sel
+
+ if (Src1Reg) {
+ MIB.addReg(Src1Reg) // $src1
+ .addImm(0) // $src1_neg
+ .addImm(0) // $src1_rel
+ .addImm(0) // $src1_abs
+ .addImm(-1); // $src1_sel
+ }
+
+ //XXX: The r600g finalizer expects this to be 1, once we've moved the
+ //scheduling to the backend, we can change the default to 0.
+ MIB.addImm(1) // $last
+ .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+ .addImm(0); // $literal
+
+ return MIB;
+}
+
+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const {
+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
+ AMDGPU::ALU_LITERAL_X);
+ setImmOperand(MovImm, R600Operands::IMM, Imm);
+ return MovImm;
+}
+
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+ R600Operands::Ops Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::Ops Op) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+ unsigned OpTableIdx;
+
+ if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
+ switch (Op) {
+ case R600Operands::DST: return 0;
+ case R600Operands::SRC0: return 1;
+ case R600Operands::SRC1: return 2;
+ case R600Operands::SRC2: return 3;
+ default:
+ assert(!"Unknown operand type for instruction");
+ return -1;
+ }
+ }
+
+ if (TargetFlags & R600_InstFlag::OP1) {
+ OpTableIdx = 0;
+ } else if (TargetFlags & R600_InstFlag::OP2) {
+ OpTableIdx = 1;
+ } else {
+ assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
+ "for this instruction");
+ OpTableIdx = 2;
+ }
+
+ return R600Operands::ALUOpTable[OpTableIdx][Op];
+}
+
+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
+ int64_t Imm) const {
+ int Idx = getOperandIdx(*MI, Op);
+ assert(Idx != -1 && "Operand not supported for this instruction.");
+ assert(MI->getOperand(Idx).isImm());
+ MI->getOperand(Idx).setImm(Imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ int FlagIndex = 0;
+ if (Flag != 0) {
+ // If we pass something other than the default value of Flag to this
+ // function, it means we are want to set a flag on an instruction
+ // that uses native encoding.
+ assert(HAS_NATIVE_OPERANDS(TargetFlags));
+ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
+ switch (Flag) {
+ case MO_FLAG_CLAMP:
+ FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
+ break;
+ case MO_FLAG_MASK:
+ FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
+ break;
+ case MO_FLAG_NOT_LAST:
+ case MO_FLAG_LAST:
+ FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
+ break;
+ case MO_FLAG_NEG:
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
+ case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
+ }
+ break;
+
+ case MO_FLAG_ABS:
+ assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
+ "instructions.");
+ (void)IsOP3;
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
+ }
+ break;
+
+ default:
+ FlagIndex = -1;
+ break;
+ }
+ assert(FlagIndex != -1 && "Flag not supported for this instruction");
+ } else {
+ FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
+ assert(FlagIndex != 0 &&
+ "Instruction flags not supported for this instruction");
+ }
+
+ MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+ assert(FlagOp.isImm());
+ return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (Flag == 0) {
+ return;
+ }
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ if (Flag == MO_FLAG_NOT_LAST) {
+ clearFlag(MI, Operand, MO_FLAG_LAST);
+ } else if (Flag == MO_FLAG_MASK) {
+ clearFlag(MI, Operand, Flag);
+ } else {
+ FlagOp.setImm(1);
+ }
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand);
+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+ }
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ FlagOp.setImm(0);
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI);
+ unsigned InstFlags = FlagOp.getImm();
+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+ FlagOp.setImm(InstFlags);
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.h b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
new file mode 100644
index 000000000000..dbae90013d22
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
@@ -0,0 +1,204 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class DFAPacketizer;
+ class ScheduleDAG;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDGPUInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+
+ int getBranchInstr(const MachineOperand &op) const;
+
+ public:
+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ bool isTrig(const MachineInstr &MI) const;
+ bool isPlaceHolderOpcode(unsigned opcode) const;
+ bool isReductionOp(unsigned opcode) const;
+ bool isCubeOp(unsigned opcode) const;
+
+ /// \returns true if this \p Opcode represents an ALU instruction.
+ bool isALUInstr(unsigned Opcode) const;
+
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
+ /// \breif Vector instructions are instructions that must fill all
+ /// instruction slots within an instruction group.
+ bool isVector(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const;
+ virtual bool isMov(unsigned Opcode) const;
+
+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ bool isPredicated(const MachineInstr *MI) const;
+
+ bool isPredicable(MachineInstr *MI) const;
+
+ bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ const BranchProbability &Probability) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const ;
+
+ bool
+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const { return 1;}
+
+ /// \returns a list of all the registers that may be accesed using indirect
+ /// addressing.
+ std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+
+ unsigned getMaxAlusPerClause() const;
+
+ ///buildDefaultInstruction - This function returns a MachineInstr with
+ /// all the instruction modifiers initialized to their default values.
+ /// You can use this function to avoid manually specifying each instruction
+ /// modifier operand when building a new instruction.
+ ///
+ /// \returns a MachineInstr with all the instruction modifiers initialized
+ /// to their default values.
+ MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg = 0) const;
+
+ MachineInstr *buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const;
+
+ /// \brief Get the index of Op in the MachineInstr.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+
+ /// \brief Get the index of \p Op for the given Opcode.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+
+ /// \brief Helper function for setting instruction flag values.
+ void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
+
+ /// \returns true if this instruction has an operand for storing target flags.
+ bool hasFlagOperand(const MachineInstr &MI) const;
+
+ ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+ ///\brief Determine if the specified \p Flag is set on this \p Operand.
+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+ /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
+ /// \param Flag The flag being set.
+ ///
+ /// \returns the operand containing the flags for this instruction.
+ MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
+ unsigned Flag = 0) const;
+
+ /// \brief Clear the specified flag on the instruction.
+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+} // End llvm namespace
+
+#endif // R600INSTRINFO_H_
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
new file mode 100644
index 000000000000..663b41a66d6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -0,0 +1,2267 @@
+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Tablegen instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin>
+ : AMDGPUInst <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ bit Trig = 0;
+ bit Op3 = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
+ bit Op1 = 0;
+ bit Op2 = 0;
+ bit HasNativeOperands = 0;
+
+ bits<11> op_code = inst;
+ //let Inst = inst;
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+
+ // Vector instructions are instructions that must fill all slots in an
+ // instruction group
+ let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
+ let TSFlags{9} = HasNativeOperands;
+ let TSFlags{10} = Op1;
+ let TSFlags{11} = Op2;
+}
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst <outs, ins, asm, pattern> {
+ field bits<64> Inst;
+
+ let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+ let PrintMethod = "printMemOperand";
+}
+
+def MEMrr : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+// Operands for non-registers
+
+class InstFlag<string PM = "printOperand", int Default = 0>
+ : OperandWithDefaultOps <i32, (ops (i32 Default))> {
+ let PrintMethod = PM;
+}
+
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
+def LITERAL : InstFlag<"printLiteral">;
+
+def WRITE : InstFlag <"printWrite", 1>;
+def OMOD : InstFlag <"printOMOD">;
+def REL : InstFlag <"printRel">;
+def CLAMP : InstFlag <"printClamp">;
+def NEG : InstFlag <"printNeg">;
+def ABS : InstFlag <"printAbs">;
+def UEM : InstFlag <"printUpdateExecMask">;
+def UP : InstFlag <"printUpdatePred">;
+
+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
+// Once we start using the packetizer in this backend we should have this
+// default to 0.
+def LAST : InstFlag<"printLast", 1>;
+
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+
+class R600ALU_Word0 {
+ field bits<32> Word0;
+
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<11> src1;
+ bits<1> src1_rel;
+ bits<1> src1_neg;
+ bits<3> index_mode = 0;
+ bits<2> pred_sel;
+ bits<1> last;
+
+ bits<9> src0_sel = src0{8-0};
+ bits<2> src0_chan = src0{10-9};
+ bits<9> src1_sel = src1{8-0};
+ bits<2> src1_chan = src1{10-9};
+
+ let Word0{8-0} = src0_sel;
+ let Word0{9} = src0_rel;
+ let Word0{11-10} = src0_chan;
+ let Word0{12} = src0_neg;
+ let Word0{21-13} = src1_sel;
+ let Word0{22} = src1_rel;
+ let Word0{24-23} = src1_chan;
+ let Word0{25} = src1_neg;
+ let Word0{28-26} = index_mode;
+ let Word0{30-29} = pred_sel;
+ let Word0{31} = last;
+}
+
+class R600ALU_Word1 {
+ field bits<32> Word1;
+
+ bits<11> dst;
+ bits<3> bank_swizzle = 0;
+ bits<1> dst_rel;
+ bits<1> clamp;
+
+ bits<7> dst_sel = dst{6-0};
+ bits<2> dst_chan = dst{10-9};
+
+ let Word1{20-18} = bank_swizzle;
+ let Word1{27-21} = dst_sel;
+ let Word1{28} = dst_rel;
+ let Word1{30-29} = dst_chan;
+ let Word1{31} = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+ bits<1> src0_abs;
+ bits<1> src1_abs;
+ bits<1> update_exec_mask;
+ bits<1> update_pred;
+ bits<1> write;
+ bits<2> omod;
+
+ let Word1{0} = src0_abs;
+ let Word1{1} = src1_abs;
+ let Word1{2} = update_exec_mask;
+ let Word1{3} = update_pred;
+ let Word1{4} = write;
+ let Word1{6-5} = omod;
+ let Word1{17-7} = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+ bits<11> src2;
+ bits<1> src2_rel;
+ bits<1> src2_neg;
+
+ bits<9> src2_sel = src2{8-0};
+ bits<2> src2_chan = src2{10-9};
+
+ let Word1{8-0} = src2_sel;
+ let Word1{9} = src2_rel;
+ let Word1{11-10} = src2_chan;
+ let Word1{12} = src2_neg;
+ let Word1{17-13} = alu_inst;
+}
+
+class VTX_WORD0 {
+ field bits<32> Word0;
+ bits<7> SRC_GPR;
+ bits<5> VC_INST;
+ bits<2> FETCH_TYPE;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> BUFFER_ID;
+ bits<1> SRC_REL;
+ bits<2> SRC_SEL_X;
+ bits<6> MEGA_FETCH_COUNT;
+
+ let Word0{4-0} = VC_INST;
+ let Word0{6-5} = FETCH_TYPE;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = BUFFER_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{25-24} = SRC_SEL_X;
+ let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+ field bits<32> Word1;
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<1> USE_CONST_FIELDS;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL;
+ bits<1> FORMAT_COMP_ALL;
+ bits<1> SRF_MODE_ALL;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{8} = 0; // Reserved
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{21} = USE_CONST_FIELDS;
+ let Word1{27-22} = DATA_FORMAT;
+ let Word1{29-28} = NUM_FORMAT_ALL;
+ let Word1{30} = FORMAT_COMP_ALL;
+ let Word1{31} = SRF_MODE_ALL;
+}
+
+class TEX_WORD0 {
+ field bits<32> Word0;
+
+ bits<5> TEX_INST;
+ bits<2> INST_MOD;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> RESOURCE_ID;
+ bits<7> SRC_GPR;
+ bits<1> SRC_REL;
+ bits<1> ALT_CONST;
+ bits<2> RESOURCE_INDEX_MODE;
+ bits<2> SAMPLER_INDEX_MODE;
+
+ let Word0{4-0} = TEX_INST;
+ let Word0{6-5} = INST_MOD;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = RESOURCE_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{24} = ALT_CONST;
+ let Word0{26-25} = RESOURCE_INDEX_MODE;
+ let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+ field bits<32> Word1;
+
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<7> LOD_BIAS;
+ bits<1> COORD_TYPE_X;
+ bits<1> COORD_TYPE_Y;
+ bits<1> COORD_TYPE_Z;
+ bits<1> COORD_TYPE_W;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{27-21} = LOD_BIAS;
+ let Word1{28} = COORD_TYPE_X;
+ let Word1{29} = COORD_TYPE_Y;
+ let Word1{30} = COORD_TYPE_Z;
+ let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+ field bits<32> Word2;
+
+ bits<5> OFFSET_X;
+ bits<5> OFFSET_Y;
+ bits<5> OFFSET_Z;
+ bits<5> SAMPLER_ID;
+ bits<3> SRC_SEL_X;
+ bits<3> SRC_SEL_Y;
+ bits<3> SRC_SEL_Z;
+ bits<3> SRC_SEL_W;
+
+ let Word2{4-0} = OFFSET_X;
+ let Word2{9-5} = OFFSET_Y;
+ let Word2{14-10} = OFFSET_Z;
+ let Word2{19-15} = SAMPLER_ID;
+ let Word2{22-20} = SRC_SEL_X;
+ let Word2{25-23} = SRC_SEL_Y;
+ let Word2{28-26} = SRC_SEL_Z;
+ let Word2{31-29} = SRC_SEL_W;
+}
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets. We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+ bits<1> fog_merge;
+ bits<10> alu_inst;
+
+ let Inst{37} = fog_merge;
+ let Inst{39-38} = omod;
+ let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+ bits<11> alu_inst;
+
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+}
+*/
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+ (ops PRED_SEL_OFF)>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+// Class for instructions with only one source register.
+// If you add new ins to this instruction, make sure they are listed before
+// $literal, because the backend currently assumes that the last operand is
+// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
+// and R600InstrInfo::getOperandIdx().
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName,
+ "$clamp $dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let src1 = 0;
+ let src1_rel = 0;
+ let src1_neg = 0;
+ let src1_abs = 0;
+ let update_exec_mask = 0;
+ let update_pred = 0;
+ let HasNativeOperands = 1;
+ let Op1 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itin = AnyALU> :
+ R600_1OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+>;
+
+// If you add our change the operands for R600_2OP instructions, you must
+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
+ OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName,
+ "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let HasNativeOperands = 1;
+ let Op2 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itim = AnyALU> :
+ R600_2OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
+ R600_Reg32:$src1))]
+>;
+
+// If you add our change the operands for R600_3OP instructions, you must
+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and
+// R600InstrInfo::getOperandIdx().
+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName, "$clamp $dst$dst_rel, "
+ "$src0_neg$src0$src0_rel, "
+ "$src1_neg$src1$src1_rel, "
+ "$src2_neg$src2$src2_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP3<inst>{
+
+ let HasNativeOperands = 1;
+ let DisableEncoding = "$literal";
+ let Op3 = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin = VecALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ ins,
+ asm,
+ pattern,
+ itin>;
+
+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg128:$DST_GPR),
+ (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+ !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
+ pattern,
+ itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+ let TEX_INST = inst{4-0};
+ let SRC_REL = 0;
+ let DST_REL = 0;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let LOD_BIAS = 0;
+
+ let INST_MOD = 0;
+ let FETCH_WHOLE_QUAD = 0;
+ let ALT_CONST = 0;
+ let SAMPLER_INDEX_MODE = 0;
+
+ let COORD_TYPE_X = 0;
+ let COORD_TYPE_Y = 0;
+ let COORD_TYPE_Z = 0;
+ let COORD_TYPE_W = 0;
+ }
+
+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
+ }]
+>;
+
+def TEX_RECT : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 5;
+ }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+ dag ins, string asm, list<dag> pattern> :
+ InstR600ISA <outs, ins, asm, pattern> {
+ bits<7> RW_GPR;
+ bits<7> INDEX_GPR;
+
+ bits<2> RIM;
+ bits<2> TYPE;
+ bits<1> RW_REL;
+ bits<2> ELEM_SIZE;
+
+ bits<12> ARRAY_SIZE;
+ bits<4> COMP_MASK;
+ bits<4> BURST_COUNT;
+ bits<1> VPM;
+ bits<1> eop;
+ bits<1> MARK;
+ bits<1> BARRIER;
+
+ // CF_ALLOC_EXPORT_WORD0_RAT
+ let Inst{3-0} = rat_id;
+ let Inst{9-4} = rat_inst;
+ let Inst{10} = 0; // Reserved
+ let Inst{12-11} = RIM;
+ let Inst{14-13} = TYPE;
+ let Inst{21-15} = RW_GPR;
+ let Inst{22} = RW_REL;
+ let Inst{29-23} = INDEX_GPR;
+ let Inst{31-30} = ELEM_SIZE;
+
+ // CF_ALLOC_EXPORT_WORD1_BUF
+ let Inst{43-32} = ARRAY_SIZE;
+ let Inst{47-44} = COMP_MASK;
+ let Inst{51-48} = BURST_COUNT;
+ let Inst{52} = VPM;
+ let Inst{53} = eop;
+ let Inst{61-54} = cf_inst;
+ let Inst{62} = MARK;
+ let Inst{63} = BARRIER;
+}
+
+class LoadParamFrag <PatFrag load_type> : PatFrag <
+ (ops node:$ptr), (load_type node:$ptr),
+ [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
+>;
+
+def load_param : LoadParamFrag<load>;
+def load_param_zexti8 : LoadParamFrag<zextloadi8>;
+def load_param_zexti16 : LoadParamFrag<zextloadi16>;
+
+def isR600 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
+def isR700 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->getDeviceFlag()"
+ ">= OCL_DEVICE_RV710">;
+def isEG : Predicate<
+ "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
+ "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
+
+def isCayman : Predicate<"Subtarget.device()"
+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
+ "|| Subtarget.device()->getGeneration() =="
+ "AMDGPUDeviceInfo::HD6XXX">;
+
+def isR600toCayman : Predicate<
+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
+
+//===----------------------------------------------------------------------===//
+// R600 SDNodes
+//===----------------------------------------------------------------------===//
+
+def INTERP_PAIR_XY : AMDGPUShaderInst <
+ (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+ []>;
+
+def INTERP_PAIR_ZW : AMDGPUShaderInst <
+ (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+ []>;
+
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+ SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPVariadic]
+>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP_VEC_LOAD : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0),
+ "INTERP_LOAD $src0 : $dst",
+ []>;
+
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+class ExportWord0 {
+ field bits<32> Word0;
+
+ bits<13> arraybase;
+ bits<2> type;
+ bits<7> gpr;
+ bits<2> elem_size;
+
+ let Word0{12-0} = arraybase;
+ let Word0{14-13} = type;
+ let Word0{21-15} = gpr;
+ let Word0{22} = 0; // RW_REL
+ let Word0{29-23} = 0; // INDEX_GPR
+ let Word0{31-30} = elem_size;
+}
+
+class ExportSwzWord1 {
+ field bits<32> Word1;
+
+ bits<3> sw_x;
+ bits<3> sw_y;
+ bits<3> sw_z;
+ bits<3> sw_w;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{2-0} = sw_x;
+ let Word1{5-3} = sw_y;
+ let Word1{8-6} = sw_z;
+ let Word1{11-9} = sw_w;
+}
+
+class ExportBufWord1 {
+ field bits<32> Word1;
+
+ bits<12> arraySize;
+ bits<4> compMask;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{11-0} = arraySize;
+ let Word1{15-12} = compMask;
+}
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ 0, 61, 0, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ 0, 61, 7, 0, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_dummy (i32 imm:$type)),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_dummy 1),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+ imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
+ >;
+
+}
+
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+ bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf3inst, 0)>;
+}
+
+let usesCustomInserter = 1 in {
+
+class ExportSwzInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
+ i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportSwzWord1 {
+ let elem_size = 3;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+} // End usesCustomInserter = 1
+
+class ExportBufInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportBufWord1 {
+ let elem_size = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+ field bits<32> Word0;
+
+ bits<22> ADDR;
+ bits<4> KCACHE_BANK0;
+ bits<4> KCACHE_BANK1;
+ bits<2> KCACHE_MODE0;
+
+ let Word0{21-0} = ADDR;
+ let Word0{25-22} = KCACHE_BANK0;
+ let Word0{29-26} = KCACHE_BANK1;
+ let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+ field bits<32> Word1;
+
+ bits<2> KCACHE_MODE1;
+ bits<8> KCACHE_ADDR0;
+ bits<8> KCACHE_ADDR1;
+ bits<7> COUNT;
+ bits<1> ALT_CONST;
+ bits<4> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{1-0} = KCACHE_MODE1;
+ let Word1{9-2} = KCACHE_ADDR0;
+ let Word1{17-10} = KCACHE_ADDR1;
+ let Word1{24-18} = COUNT;
+ let Word1{25} = ALT_CONST;
+ let Word1{29-26} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let ALT_CONST = 0;
+ let WHOLE_QUAD_MODE = 0;
+ let BARRIER = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class CF_WORD0 {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+"TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+"VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+}
+
+def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+def SETE : R600_2OP <
+ 0x08, "SETE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_EQ))]
+>;
+
+def SGT : R600_2OP <
+ 0x09, "SETGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GT))]
+>;
+
+def SGE : R600_2OP <
+ 0xA, "SETGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GE))]
+>;
+
+def SNE : R600_2OP <
+ 0xB, "SETNE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_NE))]
+>;
+
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
+
+def MOV : R600_1OP <0x19, "MOV", []>;
+
+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+
+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+ (outs R600_Reg32:$dst),
+ (ins immType:$imm),
+ "",
+ []
+>;
+
+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+ (imm:$val),
+ (MOV_IMM_I32 imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+ (fpimm:$val),
+ (MOV_IMM_F32 fpimm:$val)
+>;
+
+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
+
+let hasSideEffects = 1 in {
+
+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
+
+} // end hasSideEffects
+
+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+
+def SETE_INT : R600_2OP <
+ 0x3A, "SETE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+ 0x3B, "SETGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+ 0x3C, "SETGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+ 0x3D, "SETNE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+ 0x3E, "SETGT_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+ 0x3F, "SETGE_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+>;
+
+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
+
+def CNDE_INT : R600_3OP <
+ 0x1C, "CNDE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+ 0x1E, "CNDGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+ 0x1D, "CNDGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+def TEX_LD : R600_TEX <
+ 0x03, "TEX_LD",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+ imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+ imm:$SAMPLER_ID, imm:$textureTarget))]
+> {
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+ "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+ i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+ i32imm:$textureTarget);
+}
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+ 0x04, "TEX_GET_TEXTURE_RESINFO",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+ 0x07, "TEX_GET_GRADIENTS_H",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+ 0x08, "TEX_GET_GRADIENTS_V",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SET_GRADIENTS_H : R600_TEX <
+ 0x0B, "TEX_SET_GRADIENTS_H",
+ []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+ 0x0C, "TEX_SET_GRADIENTS_V",
+ []
+>;
+
+def TEX_SAMPLE : R600_TEX <
+ 0x10, "TEX_SAMPLE",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+ 0x18, "TEX_SAMPLE_C",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+ 0x11, "TEX_SAMPLE_L",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+ 0x19, "TEX_SAMPLE_C_L",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+ 0x12, "TEX_SAMPLE_LB",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+ 0x1A, "TEX_SAMPLE_C_LB",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+ 0x14, "TEX_SAMPLE_G",
+ []
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+ 0x1C, "TEX_SAMPLE_C_G",
+ []
+>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
+ inst, "MUL_LIT",
+ []
+>;
+
+class MULADD_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
+ [(set (f32 R600_Reg32:$dst),
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+>;
+
+class CNDE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+class CNDGT_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+class CNDGE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+multiclass DOT4_Common <bits<11> inst> {
+
+ def _pseudo : R600_REDUCTION <inst,
+ (ins R600_Reg128:$src0, R600_Reg128:$src1),
+ "DOT4 $dst $src0, $src1",
+ [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+ >;
+
+ def _real : R600_2OP <inst, "DOT4", []>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+multiclass CUBE_Common <bits<11> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ > {
+ let isPseudo = 1;
+ }
+
+ def _real : R600_2OP <inst, "CUBE", []>;
+}
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "EXP_IEEE", fexp2
+>;
+
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_INT", fp_to_sint
+>;
+
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "INT_TO_FLT", sint_to_fp
+>;
+
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_UINT", fp_to_uint
+>;
+
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "UINT_TO_FLT", uint_to_fp
+>;
+
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "LOG_CLAMPED", []
+>;
+
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "LOG_IEEE", flog2
+>;
+
+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI_INT", mulhs
+>;
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI", mulhu
+>;
+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULLO_INT", mul
+>;
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_CLAMPED", []
+>;
+
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
+>;
+
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIP_UINT", AMDGPUurecip
+>;
+
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+>;
+
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIPSQRT_IEEE", []
+>;
+
+class SIN_Common <bits<11> inst> : R600_1OP <
+ inst, "SIN", []>{
+ let Trig = 1;
+}
+
+class COS_Common <bits<11> inst> : R600_1OP <
+ inst, "COS", []> {
+ let Trig = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+multiclass DIV_Common <InstR600 recip_ieee> {
+def : Pat<
+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+
+def : Pat<
+ (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+}
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+ def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
+ def CNDE_r600 : CNDE_Common<0x18>;
+ def CNDGT_r600 : CNDGT_Common<0x19>;
+ def CNDGE_r600 : CNDGE_Common<0x1A>;
+ defm DOT4_r600 : DOT4_Common<0x50>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+ def SIN_r600 : SIN_Common<0x6E>;
+ def COS_r600 : COS_Common<0x6F>;
+ def ASHR_r600 : ASHR_Common<0x70>;
+ def LSHR_r600 : LSHR_Common<0x71>;
+ def LSHL_r600 : LSHL_Common<0x72>;
+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+ defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+ def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+
+ def R600_ExportSwz : ExportSwzInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<R600_ExportSwz, 39>;
+
+ def R600_ExportBuf : ExportBufInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+}
+
+// Helper pattern for normalizing inputs to triginomic instructions for R700+
+// cards.
+class COS_PAT <InstR600 trig> : Pat<
+ (fcos R600_Reg32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+class SIN_PAT <InstR600 trig> : Pat<
+ (fsin R600_Reg32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+//===----------------------------------------------------------------------===//
+// R700 Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR700] in {
+ def SIN_r700 : SIN_Common<0x6E>;
+ def COS_r700 : COS_Common<0x6F>;
+
+ // R700 normalizes inputs to SIN/COS the same as EG
+ def : SIN_PAT <SIN_r700>;
+ def : COS_PAT <COS_r700>;
+}
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_eg>;
+def : COS_PAT <COS_eg>;
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+ R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+ def ASHR_eg : ASHR_Common<0x15>;
+ def LSHR_eg : LSHR_Common<0x16>;
+ def LSHL_eg : LSHL_Common<0x17>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ defm DOT4_eg : DOT4_Common<0xBE>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
+
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+ let Pattern = [];
+ }
+
+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+ let Pattern = [];
+ }
+
+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+ // TRUNC is used for the FLT_TO_INT instructions to work around a
+ // perceived problem where the rounding modes are applied differently
+ // depending on the instruction and the slot they are in.
+ // See:
+ // https://bugs.freedesktop.org/show_bug.cgi?id=50232
+ // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
+ //
+ // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
+ // which do not need to be truncated since the fp values are 0.0f or 1.0f.
+ // We should look into handling these cases separately.
+ def : Pat<(fp_to_sint R600_Reg32:$src0),
+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def : Pat<(fp_to_uint R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def EG_ExportSwz : ExportSwzInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<EG_ExportSwz, 83>;
+
+ def EG_ExportBuf : ExportBufInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
+ !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
+ let RIM = 0;
+ // XXX: Have a separate instruction for non-indexed writes.
+ let TYPE = 1;
+ let RW_REL = 0;
+ let ELEM_SIZE = 0;
+
+ let ARRAY_SIZE = 0;
+ let COMP_MASK = comp_mask;
+ let BURST_COUNT = 0;
+ let VPM = 0;
+ let MARK = 0;
+ let BARRIER = 1;
+}
+
+} // End usesCustomInserter = 1
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0x1, "RAT_WRITE_CACHELESS_32_eg",
+ [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0xf, "RAT_WRITE_CACHELESS_128",
+ [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ // Static fields
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+ // however, based on my testing if USE_CONST_FIELDS is set, then all
+ // these fields need to be set to 0.
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 1;
+ let FORMAT_COMP_ALL = 0;
+ let SRF_MODE_ALL = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+ // LLVM can only encode 64-bit instructions, so these fields are manually
+ // encoded in R600CodeEmitter
+ //
+ // bits<16> OFFSET;
+ // bits<2> ENDIAN_SWAP = 0;
+ // bits<1> CONST_BUF_NO_STRIDE = 0;
+ // bits<1> MEGA_FETCH = 0;
+ // bits<1> ALT_CONST = 0;
+ // bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+ // is done in R600CodeEmitter
+ //
+ // Inst{79-64} = OFFSET;
+ // Inst{81-80} = ENDIAN_SWAP;
+ // Inst{82} = CONST_BUF_NO_STRIDE;
+ // Inst{83} = MEGA_FETCH;
+ // Inst{84} = ALT_CONST;
+ // Inst{86-85} = BUFFER_INDEX_MODE;
+ // Inst{95-86} = 0; Reserved
+
+ // VTX_WORD3 (Padding)
+ //
+ // Inst{127-96} = 0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 1;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+ let MEGA_FETCH_COUNT = 2;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+ [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Constant Loads
+// XXX: We are currently storing all constants in the global address space.
+//===----------------------------------------------------------------------===//
+
+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_cm>;
+def : COS_PAT <COS_cm>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+
+// RECIP_UINT emulation for Cayman
+def : Pat <
+ (AMDGPUurecip R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
+ (MOV_IMM_I32 0x4f800000)))
+>;
+
+
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+
+} // End isCayman
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+
+def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
+ "IF_PREDICATE_SET $src", []>;
+
+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
+ "PREDICATED_BREAK $src", []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+def PRED_X : InstR600 <
+ 0, (outs R600_Predicate_Bit:$dst),
+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+ "", [], NullALU> {
+ let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Predicate_Bit:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target),
+ "JUMP $target",
+ [], AnyALU
+ >
+{
+ let isPredicable = 1;
+ let isBarrier = 1;
+}
+
+} // End isTerminator = 1, isBranch = 1
+
+let usesCustomInserter = 1 in {
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+ (outs),
+ (ins R600_Reg32:$src),
+ "MASK_WRITE $src",
+ []
+>;
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
+
+
+def TXD: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+} // End isPseudo = 1
+} // End usesCustomInserter = 1
+
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern =
+ [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+ [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X = 0;
+let DST_SEL_Y = 1;
+let DST_SEL_Z = 2;
+let DST_SEL_W = 3;
+let DATA_FORMAT = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+ def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("SWITCH", " $src"), []>;
+ def CASE : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("CASE", " $src"), []>;
+ def BREAK : ILFormat< (outs), (ins),
+ "BREAK", []>;
+ def CONTINUE : ILFormat< (outs), (ins),
+ "CONTINUE", []>;
+ def DEFAULT : ILFormat< (outs), (ins),
+ "DEFAULT", []>;
+ def ELSE : ILFormat< (outs), (ins),
+ "ELSE", []>;
+ def ENDSWITCH : ILFormat< (outs), (ins),
+ "ENDSWITCH", []>;
+ def ENDMAIN : ILFormat< (outs), (ins),
+ "ENDMAIN", []>;
+ def END : ILFormat< (outs), (ins),
+ "END", []>;
+ def ENDFUNC : ILFormat< (outs), (ins),
+ "ENDFUNC", []>;
+ def ENDIF : ILFormat< (outs), (ins),
+ "ENDIF", []>;
+ def WHILELOOP : ILFormat< (outs), (ins),
+ "WHILE", []>;
+ def ENDLOOP : ILFormat< (outs), (ins),
+ "ENDLOOP", []>;
+ def FUNC : ILFormat< (outs), (ins),
+ "FUNC", []>;
+ def RETDYN : ILFormat< (outs), (ins),
+ "RET_DYN", []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+ defm IFC : BranchInstr2<"IFC">;
+ defm BREAKC : BranchInstr2<"BREAKC">;
+ defm CONTINUEC : BranchInstr2<"CONTINUEC">;
+}
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+ (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
+ R600_Reg32:$src2, cc),
+ (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT, SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
+//CNDGE_INT extra pattern
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
+ (i32 R600_Reg32:$src2), COND_GT),
+ (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+// KIL Patterns
+def KILP : Pat <
+ (int_AMDGPU_kilp),
+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+>;
+
+def KIL : Pat <
+ (int_AMDGPU_kill R600_Reg32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
+// SGT Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
+ (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SGE Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
+ (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// The next two patterns are special cases for handling 'true if ordered' and
+// 'true if unordered' conditionals. The assumption here is that the behavior of
+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
+// described here:
+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
+// We assume that SETE returns false when one of the operands is NAN and
+// SNE returns true when on of the operands is NAN
+
+//SETE - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
+ (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SNE - 'true if unordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
+ (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+def : BitConvert <v4i32, v4f32, R600_Reg128>;
+
+// DWORDADDR pattern
+def : DwordAddrPat <i32, R600_Reg32>;
+
+} // End isR600toCayman Predicate
diff --git a/contrib/llvm/lib/Target/R600/R600Intrinsics.td b/contrib/llvm/lib/Target/R600/R600Intrinsics.td
new file mode 100644
index 000000000000..dc8980aef146
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Intrinsics.td
@@ -0,0 +1,31 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_interp_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_load_texbuf :
+ Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_store_swizzle :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_R600_store_stream_output :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_R600_store_pixel_depth :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_stencil :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_dummy :
+ Intrinsic<[], [llvm_i32_ty], []>;
+}
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..018b40363363
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF) { }
+
+
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
new file mode 100644
index 000000000000..99c1f91b09b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
@@ -0,0 +1,32 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "AMDGPUMachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+ R600MachineFunctionInfo(const MachineFunction &MF);
+ SmallVector<unsigned, 4> LiveOuts;
+ std::vector<unsigned> IndirectRegs;
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000000000000..a777142a9e70
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,427 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+
+ DAG = dag;
+ TII = static_cast<const R600InstrInfo*>(DAG->TII);
+ TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+ MRI = &DAG->MRI;
+ Available[IDAlu]->clear();
+ Available[IDFetch]->clear();
+ Available[IDOther]->clear();
+ CurInstKind = IDOther;
+ CurEmitted = 0;
+ OccupedSlotsMask = 15;
+ InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
+
+
+ const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+ InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+ } else {
+ InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+ }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+ if (QSrc->empty())
+ return;
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ QDst->push(*I);
+ }
+ QSrc->clear();
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+ SUnit *SU = 0;
+ IsTopNode = true;
+ NextInstKind = IDOther;
+
+ // check if we might want to switch current clause type
+ bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
+ (CurEmitted > InstKindLimit[CurInstKind]) ||
+ (Available[CurInstKind]->empty());
+ bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
+ (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
+
+ if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+ (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+ // try to pick ALU
+ SU = pickAlu();
+ if (SU) {
+ if (CurEmitted > InstKindLimit[IDAlu])
+ CurEmitted = 0;
+ NextInstKind = IDAlu;
+ }
+ }
+
+ if (!SU) {
+ // try to pick FETCH
+ SU = pickOther(IDFetch);
+ if (SU)
+ NextInstKind = IDFetch;
+ }
+
+ // try to pick other
+ if (!SU) {
+ SU = pickOther(IDOther);
+ if (SU)
+ NextInstKind = IDOther;
+ }
+
+ DEBUG(
+ if (SU) {
+ dbgs() << "picked node: ";
+ SU->dump(DAG);
+ } else {
+ dbgs() << "NO NODE ";
+ for (int i = 0; i < IDLast; ++i) {
+ Available[i]->dump();
+ Pending[i]->dump();
+ }
+ for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+ const SUnit &S = DAG->SUnits[i];
+ if (!S.isScheduled)
+ S.dump(DAG);
+ }
+ }
+ );
+
+ return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+ DEBUG(dbgs() << "scheduled: ");
+ DEBUG(SU->dump(DAG));
+
+ if (NextInstKind != CurInstKind) {
+ DEBUG(dbgs() << "Instruction Type Switch\n");
+ if (NextInstKind != IDAlu)
+ OccupedSlotsMask = 15;
+ CurEmitted = 0;
+ CurInstKind = NextInstKind;
+ }
+
+ if (CurInstKind == IDAlu) {
+ switch (getAluKind(SU)) {
+ case AluT_XYZW:
+ CurEmitted += 4;
+ break;
+ case AluDiscarded:
+ break;
+ default: {
+ ++CurEmitted;
+ for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+ E = SU->getInstr()->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++CurEmitted;
+ }
+ }
+ }
+ } else {
+ ++CurEmitted;
+ }
+
+
+ DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+
+ if (CurInstKind != IDFetch) {
+ MoveUnits(Pending[IDFetch], Available[IDFetch]);
+ }
+ MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+ int IK = getInstKind(SU);
+
+ DEBUG(dbgs() << IK << " <= ");
+ DEBUG(SU->dump(DAG));
+
+ Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+ const TargetRegisterClass *RC) const {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->contains(Reg);
+ } else {
+ return MRI->getRegClass(Reg) == RC;
+ }
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+ MachineInstr *MI = SU->getInstr();
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ return AluT_XYZW;
+ case AMDGPU::COPY:
+ if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+ // %vregX = COPY Tn_X is likely to be discarded in favor of an
+ // assignement of Tn_X to %vregX, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ else if (MI->getOperand(1).isUndef()) {
+ // MI will become a KILL, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ default:
+ break;
+ }
+
+ // Does the instruction take a whole IG ?
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return AluT_XYZW;
+
+ // Is the result already assigned to a channel ?
+ unsigned DestSubReg = MI->getOperand(0).getSubReg();
+ switch (DestSubReg) {
+ case AMDGPU::sub0:
+ return AluT_X;
+ case AMDGPU::sub1:
+ return AluT_Y;
+ case AMDGPU::sub2:
+ return AluT_Z;
+ case AMDGPU::sub3:
+ return AluT_W;
+ default:
+ break;
+ }
+
+ // Is the result already member of a X/Y/Z/W class ?
+ unsigned DestReg = MI->getOperand(0).getReg();
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
+ regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+ return AluT_X;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+ return AluT_Y;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+ return AluT_Z;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+ return AluT_W;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+ return AluT_XYZW;
+
+ return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+ int Opcode = SU->getInstr()->getOpcode();
+
+ if (TII->isALUInstr(Opcode)) {
+ return IDAlu;
+ }
+
+ switch (Opcode) {
+ case AMDGPU::COPY:
+ case AMDGPU::CONST_COPY:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return IDAlu;
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return IDFetch;
+ default:
+ DEBUG(
+ dbgs() << "other inst: ";
+ SU->dump(DAG);
+ );
+ return IDOther;
+ }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
+ if (Q.empty())
+ return NULL;
+ for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
+ It != E; ++It) {
+ SUnit *SU = *It;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
+ Q.erase(It);
+ return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
+ }
+ }
+ return NULL;
+}
+
+void R600SchedStrategy::LoadAlu() {
+ ReadyQueue *QSrc = Pending[IDAlu];
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ AluKind AK = getAluKind(*I);
+ AvailableAlus[AK].insert(*I);
+ }
+ QSrc->clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+ DEBUG(dbgs() << "New Slot\n");
+ assert (OccupedSlotsMask && "Slot wasn't filled");
+ OccupedSlotsMask = 0;
+ InstructionsGroupCandidate.clear();
+ LoadAlu();
+}
+
+void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ // PressureRegister crashes if an operand is def and used in the same inst
+ // and we try to constraint its regclass
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && !MO.isDef() &&
+ MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
+ // Constrains the regclass of DestReg to assign it to Slot
+ switch (Slot) {
+ case 0:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+ break;
+ case 1:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+ break;
+ case 2:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+ break;
+ case 3:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+ break;
+ }
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+ static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+ if (!UnslotedSU) {
+ return SlotedSU;
+ } else if (!SlotedSU) {
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ } else {
+ //Determine which one to pick (the lesser one)
+ if (CompareSUnit()(SlotedSU, UnslotedSU)) {
+ AvailableAlus[AluAny].insert(UnslotedSU);
+ return SlotedSU;
+ } else {
+ AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ }
+ }
+}
+
+bool R600SchedStrategy::isAvailablesAluEmpty() const {
+ return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
+ AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
+ AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
+ AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+ while (!isAvailablesAluEmpty()) {
+ if (!OccupedSlotsMask) {
+ // Flush physical reg copies (RA will discard them)
+ if (!AvailableAlus[AluDiscarded].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluDiscarded]);
+ }
+ // If there is a T_XYZW alu available, use it
+ if (!AvailableAlus[AluT_XYZW].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluT_XYZW]);
+ }
+ }
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ bool isOccupied = OccupedSlotsMask & (1 << Chan);
+ if (!isOccupied) {
+ SUnit *SU = AttemptFillSlot(Chan);
+ if (SU) {
+ OccupedSlotsMask |= (1 << Chan);
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ return SU;
+ }
+ }
+ }
+ PrepareNextSlot();
+ }
+ return NULL;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+ SUnit *SU = 0;
+ ReadyQueue *AQ = Available[QID];
+
+ if (AQ->empty()) {
+ MoveUnits(Pending[QID], AQ);
+ }
+ if (!AQ->empty()) {
+ SU = *AQ->begin();
+ AQ->remove(AQ->begin());
+ }
+ return SU;
+}
+
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.h b/contrib/llvm/lib/Target/R600/R600MachineScheduler.h
new file mode 100644
index 000000000000..3d0367fd8ebf
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600MachineScheduler.h
@@ -0,0 +1,120 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class CompareSUnit {
+public:
+ bool operator()(const SUnit *S1, const SUnit *S2) {
+ return S1->getDepth() > S2->getDepth();
+ }
+};
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+ const ScheduleDAGMI *DAG;
+ const R600InstrInfo *TII;
+ const R600RegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ enum InstQueue {
+ QAlu = 1,
+ QFetch = 2,
+ QOther = 4
+ };
+
+ enum InstKind {
+ IDAlu,
+ IDFetch,
+ IDOther,
+ IDLast
+ };
+
+ enum AluKind {
+ AluAny,
+ AluT_X,
+ AluT_Y,
+ AluT_Z,
+ AluT_W,
+ AluT_XYZW,
+ AluDiscarded, // LLVM Instructions that are going to be eliminated
+ AluLast
+ };
+
+ ReadyQueue *Available[IDLast], *Pending[IDLast];
+ std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
+
+ InstKind CurInstKind;
+ int CurEmitted;
+ InstKind NextInstKind;
+
+ int InstKindLimit[IDLast];
+
+ int OccupedSlotsMask;
+
+public:
+ R600SchedStrategy() :
+ DAG(0), TII(0), TRI(0), MRI(0) {
+ Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+ Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+ Available[IDOther] = new ReadyQueue(QOther, "AOther");
+ Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+ Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+ Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+ }
+
+ virtual ~R600SchedStrategy() {
+ for (unsigned I = 0; I < IDLast; ++I) {
+ delete Available[I];
+ delete Pending[I];
+ }
+ }
+
+ virtual void initialize(ScheduleDAGMI *dag);
+ virtual SUnit *pickNode(bool &IsTopNode);
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+ virtual void releaseTopNode(SUnit *SU);
+ virtual void releaseBottomNode(SUnit *SU);
+
+private:
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
+
+ int getInstKind(SUnit *SU);
+ bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+ AluKind getAluKind(SUnit *SU) const;
+ void LoadAlu();
+ bool isAvailablesAluEmpty() const;
+ SUnit *AttemptFillSlot (unsigned Slot);
+ void PrepareNextSlot();
+ SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
+
+ void AssignSlot(MachineInstr *MI, unsigned Slot);
+ SUnit* pickAlu();
+ SUnit* pickOther(int QID);
+ void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
new file mode 100644
index 000000000000..bbd7995d7d51
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
@@ -0,0 +1,99 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+
+ Reserved.set(AMDGPU::ZERO);
+ Reserved.set(AMDGPU::HALF);
+ Reserved.set(AMDGPU::ONE);
+ Reserved.set(AMDGPU::ONE_INT);
+ Reserved.set(AMDGPU::NEG_HALF);
+ Reserved.set(AMDGPU::NEG_ONE);
+ Reserved.set(AMDGPU::PV_X);
+ Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::ALU_CONST);
+ Reserved.set(AMDGPU::PREDICATE_BIT);
+ Reserved.set(AMDGPU::PRED_SEL_OFF);
+ Reserved.set(AMDGPU::PRED_SEL_ZERO);
+ Reserved.set(AMDGPU::PRED_SEL_ONE);
+
+ for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+ E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
+ E = AMDGPU::TRegMemRegClass.end();
+ I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
+ std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
+ for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
+ E = IndirectRegs.end();
+ I != E; ++I) {
+ Reserved.set(*I);
+ }
+ return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ case AMDGPU::GPRI32RegClassID:
+ return &AMDGPU::R600_Reg32RegClass;
+ default: return rc;
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
+ return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+ }
+}
+
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+ switch (Channel) {
+ default: assert(!"Invalid channel index"); return 0;
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
new file mode 100644
index 000000000000..f9ca918f246b
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
@@ -0,0 +1,55 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+namespace llvm {
+
+class R600TargetMachine;
+class TargetInstrInfo;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the R600 reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *getISARegClass(
+ const TargetRegisterClass *RC) const;
+
+ /// \brief get the HW encoding for a register's channel.
+ unsigned getHWRegChan(unsigned reg) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
new file mode 100644
index 000000000000..03f49761ea40
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
@@ -0,0 +1,209 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class R600RegWithChan <string name, bits<9> sel, string chan> :
+ Register <name> {
+
+ field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
+ !if(!eq(chan, "Y"), 1,
+ !if(!eq(chan, "Z"), 2,
+ !if(!eq(chan, "W"), 3, 0))));
+ let HWEncoding{8-0} = sel;
+ let HWEncoding{10-9} = chan_encoding;
+ let Namespace = "AMDGPU";
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1, sub2, sub3];
+ let HWEncoding = encoding;
+}
+
+foreach Index = 0-127 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+ // Indirect addressing offset registers
+ def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+ Index, Chan>;
+ def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
+ Chan>;
+ }
+ // 128-bit Temporary Registers
+ def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
+ [!cast<Register>("T"#Index#"_X"),
+ !cast<Register>("T"#Index#"_Y"),
+ !cast<Register>("T"#Index#"_Z"),
+ !cast<Register>("T"#Index#"_W")],
+ Index>;
+}
+
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW",
+ [!cast<Register>("KC0_"#Index#"_X"),
+ !cast<Register>("KC0_"#Index#"_Y"),
+ !cast<Register>("KC0_"#Index#"_Z"),
+ !cast<Register>("KC0_"#Index#"_W")],
+ Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW",
+ [!cast<Register>("KC1_"#Index#"_X"),
+ !cast<Register>("KC1_"#Index#"_Y"),
+ !cast<Register>("KC1_"#Index#"_Z"),
+ !cast<Register>("KC1_"#Index#"_W")],
+ Index>;
+}
+
+
+// Array Base Register holding input in FS
+foreach Index = 448-480 in {
+ def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
+}
+
+
+// Special Registers
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
+
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "ArrayBase%u", 448, 480))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
+
+let isAllocatable = 0 in {
+
+// XXX: Only use the X channel, until we support wider stack widths
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+
+} // End isAllocatable = 0
+
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC0_X, R600_KC0_Y,
+ R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC1_X, R600_KC1_Y,
+ R600_KC1_Z, R600_KC1_W)>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_X", 0, 127), AR_X)>;
+
+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Y", 0, 127))>;
+
+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Z", 0, 127))>;
+
+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_W", 0, 127))>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_TReg32_X, R600_TReg32_Y,
+ R600_TReg32_Z, R600_TReg32_W)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_TReg32,
+ R600_ArrayBase,
+ R600_Addr,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+ ALU_CONST, ALU_PARAM
+ )>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+ PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add (sequence "T%u_XYZW", 0, 127))> {
+ let CopyCost = -1;
+}
+
+//===----------------------------------------------------------------------===//
+// Register classes for indirect addressing
+//===----------------------------------------------------------------------===//
+
+// Super register for all the Indirect Registers. This register class is used
+// by the REG_SEQUENCE instruction to specify the registers to use for direct
+// reads / writes which may be written / read by an indirect address.
+class IndirectSuper<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices =
+ [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
+}
+
+def IndirectSuperReg : IndirectSuper<"Indirect",
+ [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
+ TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
+ TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+>;
+
+def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+
+// This register class defines the registers that are the storage units for
+// the "Indirect Addressing" pseudo memory space.
+// XXX: Only use the X channel, until we support wider stack widths
+def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "TRegMem%u_X", 0, 16))
+>;
diff --git a/contrib/llvm/lib/Target/R600/R600Schedule.td b/contrib/llvm/lib/Target/R600/R600Schedule.td
new file mode 100644
index 000000000000..7ede181c51dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Schedule.td
@@ -0,0 +1,36 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
+// slot has been removed.
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+
+def R600_EG_Itin : ProcessorItineraries <
+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+ [],
+ [
+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+ InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+ ]
+>;
diff --git a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
new file mode 100644
index 000000000000..2477e2a9dcc3
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -0,0 +1,329 @@
+//===-- SIAnnotateControlFlow.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Annotates the control flow with hardware specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Complex types used in this pass
+typedef std::pair<BasicBlock *, Value *> StackEntry;
+typedef SmallVector<StackEntry, 16> StackVector;
+
+// Intrinsic names the control flow is annotated with
+static const char *IfIntrinsic = "llvm.SI.if";
+static const char *ElseIntrinsic = "llvm.SI.else";
+static const char *BreakIntrinsic = "llvm.SI.break";
+static const char *IfBreakIntrinsic = "llvm.SI.if.break";
+static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
+static const char *LoopIntrinsic = "llvm.SI.loop";
+static const char *EndCfIntrinsic = "llvm.SI.end.cf";
+
+class SIAnnotateControlFlow : public FunctionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ Type *Void;
+ Type *Int64;
+ Type *ReturnStruct;
+
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+ Constant *Int64Zero;
+
+ Constant *If;
+ Constant *Else;
+ Constant *Break;
+ Constant *IfBreak;
+ Constant *ElseBreak;
+ Constant *Loop;
+ Constant *EndCf;
+
+ DominatorTree *DT;
+ StackVector Stack;
+ SSAUpdater PhiInserter;
+
+ bool isTopOfStack(BasicBlock *BB);
+
+ Value *popSaved();
+
+ void push(BasicBlock *BB, Value *Saved);
+
+ bool isElse(PHINode *Phi);
+
+ void eraseIfUnused(PHINode *Phi);
+
+ void openIf(BranchInst *Term);
+
+ void insertElse(BranchInst *Term);
+
+ void handleLoopCondition(Value *Cond);
+
+ void handleLoop(BranchInst *Term);
+
+ void closeControlFlow(BasicBlock *BB);
+
+public:
+ SIAnnotateControlFlow():
+ FunctionPass(ID) { }
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual const char *getPassName() const {
+ return "SI annotate control flow";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char SIAnnotateControlFlow::ID = 0;
+
+/// \brief Initialize all the types and constants used in the pass
+bool SIAnnotateControlFlow::doInitialization(Module &M) {
+ LLVMContext &Context = M.getContext();
+
+ Void = Type::getVoidTy(Context);
+ Boolean = Type::getInt1Ty(Context);
+ Int64 = Type::getInt64Ty(Context);
+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
+
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+ Int64Zero = ConstantInt::get(Int64, 0);
+
+ If = M.getOrInsertFunction(
+ IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
+
+ Else = M.getOrInsertFunction(
+ ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
+
+ Break = M.getOrInsertFunction(
+ BreakIntrinsic, Int64, Int64, (Type *)0);
+
+ IfBreak = M.getOrInsertFunction(
+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
+
+ ElseBreak = M.getOrInsertFunction(
+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
+
+ Loop = M.getOrInsertFunction(
+ LoopIntrinsic, Boolean, Int64, (Type *)0);
+
+ EndCf = M.getOrInsertFunction(
+ EndCfIntrinsic, Void, Int64, (Type *)0);
+
+ return false;
+}
+
+/// \brief Is BB the last block saved on the stack ?
+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
+ return !Stack.empty() && Stack.back().first == BB;
+}
+
+/// \brief Pop the last saved value from the control flow stack
+Value *SIAnnotateControlFlow::popSaved() {
+ return Stack.pop_back_val().second;
+}
+
+/// \brief Push a BB and saved value to the control flow stack
+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
+ Stack.push_back(std::make_pair(BB, Saved));
+}
+
+/// \brief Can the condition represented by this PHI node treated like
+/// an "Else" block?
+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
+ BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ if (Phi->getIncomingBlock(i) == IDom) {
+
+ if (Phi->getIncomingValue(i) != BoolTrue)
+ return false;
+
+ } else {
+ if (Phi->getIncomingValue(i) != BoolFalse)
+ return false;
+
+ }
+ }
+ return true;
+}
+
+// \brief Erase "Phi" if it is not used any more
+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+ if (!Phi->hasNUsesOrMore(1))
+ Phi->eraseFromParent();
+}
+
+/// \brief Open a new "If" block
+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Close the last "If" block and open a new "Else" block
+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+ Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Recursively handle the condition leading to a loop
+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+ if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+
+ // Handle all non constant incoming values first
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (isa<ConstantInt>(Incoming))
+ continue;
+
+ Phi->setIncomingValue(i, BoolFalse);
+ handleLoopCondition(Incoming);
+ }
+
+ BasicBlock *Parent = Phi->getParent();
+ BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
+
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (Incoming != BoolTrue)
+ continue;
+
+ BasicBlock *From = Phi->getIncomingBlock(i);
+ if (From == IDom) {
+ CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
+ if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
+ Value *Args[] = {
+ OldEnd->getArgOperand(0),
+ PhiInserter.GetValueAtEndOfBlock(Parent)
+ };
+ Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+ continue;
+ }
+ }
+
+ TerminatorInst *Insert = From->getTerminator();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
+ Value *Ret = CallInst::Create(Break, Arg, "", Insert);
+ PhiInserter.AddAvailableValue(From, Ret);
+ }
+ eraseIfUnused(Phi);
+
+ } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+ BasicBlock *Parent = Inst->getParent();
+ TerminatorInst *Insert = Parent->getTerminator();
+ Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
+ Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+
+ } else {
+ assert(0 && "Unhandled loop condition!");
+ }
+}
+
+/// \brief Handle a back edge (loop)
+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *Target = Term->getSuccessor(1);
+ PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+
+ PhiInserter.Initialize(Int64, "");
+ PhiInserter.AddAvailableValue(Target, Broken);
+
+ Value *Cond = Term->getCondition();
+ Term->setCondition(BoolTrue);
+ handleLoopCondition(Cond);
+
+ BasicBlock *BB = Term->getParent();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
+ for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
+ PI != PE; ++PI) {
+
+ Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+ }
+
+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+ push(Term->getSuccessor(0), Arg);
+}
+
+/// \brief Close the last opened control flow
+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+ CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+}
+
+/// \brief Annotate the control flow with intrinsics so the backend can
+/// recognize if/then/else and loops.
+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+
+ for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
+ E = df_end(&F.getEntryBlock()); I != E; ++I) {
+
+ BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+
+ if (!Term || Term->isUnconditional()) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ continue;
+ }
+
+ if (I.nodeVisited(Term->getSuccessor(1))) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ handleLoop(Term);
+ continue;
+ }
+
+ if (isTopOfStack(*I)) {
+ PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
+ if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+ insertElse(Term);
+ eraseIfUnused(Phi);
+ continue;
+ }
+ closeControlFlow(*I);
+ }
+ openIf(Term);
+ }
+
+ assert(Stack.empty());
+ return true;
+}
+
+/// \brief Create the annotation pass
+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
+ return new SIAnnotateControlFlow();
+}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
new file mode 100644
index 000000000000..6f0c30761506
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -0,0 +1,670 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for SI
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "AMDIL.h"
+#include "AMDGPU.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
+ TRI(TM.getRegisterInfo()) {
+
+ addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
+ addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
+
+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+
+ addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+
+ addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+
+ addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+
+ addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
+
+ addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
+
+ computeRegisterProperties();
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::ADD, MVT::i32, Legal);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setTargetDAGCombine(ISD::SETCC);
+
+ setSchedulingPreference(Sched::RegPressure);
+}
+
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ assert(CallConv == CallingConv::C);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ uint32_t Skipped = 0;
+
+ for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+
+ // First check if it's a PS input addr
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+
+ assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+ if (!Arg.Used) {
+ // We can savely skip PS inputs
+ Skipped |= 1 << i;
+ ++PSInputNum;
+ continue;
+ }
+
+ Info->PSInputAddr |= 1 << PSInputNum++;
+ }
+
+ // Second split vertices into their elements
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eigth.
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned j = 0; j != NumElements; ++j) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
+ }
+
+ } else {
+ Splits.push_back(Arg);
+ }
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // At least one interpolation mode must be enabled or else the GPU will hang.
+ if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+ Info->PSInputAddr |= 1;
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ }
+
+ AnalyzeFormalArguments(CCInfo, Splits);
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+
+ if (Skipped & (1 << i)) {
+ InVals.push_back(SDValue());
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ MVT VT = VA.getLocVT();
+
+ if (VT == MVT::i64) {
+ // For now assume it is a pointer
+ Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
+ &AMDGPU::SReg_64RegClass);
+ Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+ InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ continue;
+ }
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ const ISD::InputArg &Arg = Ins[i];
+ if (Arg.VT.isVector()) {
+
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+ Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ for (unsigned j = 0; j != NumElements; ++j)
+ Regs.push_back(DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
+ Regs.data(), Regs.size()));
+ continue;
+ }
+
+ InVals.push_back(Val);
+ }
+ return Chain;
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+
+ switch (MI->getOpcode()) {
+ default:
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::BRANCH: return BB;
+ }
+ return BB;
+}
+
+EVT SITargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ }
+ return SDValue();
+}
+
+/// \brief Helper function for LowerBRCOND
+static SDNode *findUser(SDValue Value, unsigned Opcode) {
+
+ SDNode *Parent = Value.getNode();
+ for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
+ I != E; ++I) {
+
+ if (I.getUse().get() != Value)
+ continue;
+
+ if (I->getOpcode() == Opcode)
+ return *I;
+ }
+ return 0;
+}
+
+/// This transforms the control flow intrinsics to get the branch destination as
+/// last parameter, also switches branch target with BR if the need arise
+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = BRCOND.getDebugLoc();
+
+ SDNode *Intr = BRCOND.getOperand(1).getNode();
+ SDValue Target = BRCOND.getOperand(2);
+ SDNode *BR = 0;
+
+ if (Intr->getOpcode() == ISD::SETCC) {
+ // As long as we negate the condition everything is fine
+ SDNode *SetCC = Intr;
+ assert(SetCC->getConstantOperandVal(1) == 1);
+ assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
+ ISD::SETNE);
+ Intr = SetCC->getOperand(0).getNode();
+
+ } else {
+ // Get the target from BR if we don't negate the condition
+ BR = findUser(BRCOND, ISD::BR);
+ Target = BR->getOperand(1);
+ }
+
+ assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+
+ // Build the result and
+ SmallVector<EVT, 4> Res;
+ for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
+ Res.push_back(Intr->getValueType(i));
+
+ // operands of the new intrinsic call
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(BRCOND.getOperand(0));
+ for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
+ Ops.push_back(Intr->getOperand(i));
+ Ops.push_back(Target);
+
+ // build the new intrinsic call
+ SDNode *Result = DAG.getNode(
+ Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+
+ if (BR) {
+ // Give the branch instruction our target
+ SDValue Ops[] = {
+ BR->getOperand(0),
+ BRCOND.getOperand(2)
+ };
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ }
+
+ SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
+
+ // Copy the intrinsic results to registers
+ for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
+ SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
+ if (!CopyToReg)
+ continue;
+
+ Chain = DAG.getCopyToReg(
+ Chain, DL,
+ CopyToReg->getOperand(1),
+ SDValue(Result, i - 1),
+ SDValue());
+
+ DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
+ }
+
+ // Remove the old intrinsic from the chain
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(Intr, Intr->getNumValues() - 1),
+ Intr->getOperand(0));
+
+ return Chain;
+}
+
+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC: {
+ N->dump();
+ ConstantSDNode *True, *False;
+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
+ if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ && True->isAllOnesValue()
+ && False->isNullValue()
+ && VT == MVT::i1) {
+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(4));
+
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ SDValue Arg0 = N->getOperand(0);
+ SDValue Arg1 = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ ConstantSDNode * C = NULL;
+ ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
+
+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
+ if (VT == MVT::i1
+ && Arg0.getOpcode() == ISD::SIGN_EXTEND
+ && Arg0.getOperand(0).getValueType() == MVT::i1
+ && (C = dyn_cast<ConstantSDNode>(Arg1))
+ && C->isNullValue()
+ && CCOp == ISD::SETNE) {
+ return SimplifySetCC(VT, Arg0.getOperand(0),
+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+/// \brief Test if RegClass is one of the VSrc classes
+static bool isVSrc(unsigned RegClass) {
+ return AMDGPU::VSrc_32RegClassID == RegClass ||
+ AMDGPU::VSrc_64RegClassID == RegClass;
+}
+
+/// \brief Test if RegClass is one of the SSrc classes
+static bool isSSrc(unsigned RegClass) {
+ return AMDGPU::SSrc_32RegClassID == RegClass ||
+ AMDGPU::SSrc_64RegClassID == RegClass;
+}
+
+/// \brief Analyze the possible immediate value Op
+///
+/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
+/// and the immediate value if it's a literal immediate
+int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
+
+ union {
+ int32_t I;
+ float F;
+ } Imm;
+
+ if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+ Imm.I = Node->getSExtValue();
+ else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ Imm.F = Node->getValueAPF().convertToFloat();
+ else
+ return -1; // It isn't an immediate
+
+ if ((Imm.I >= -16 && Imm.I <= 64) ||
+ Imm.F == 0.5f || Imm.F == -0.5f ||
+ Imm.F == 1.0f || Imm.F == -1.0f ||
+ Imm.F == 2.0f || Imm.F == -2.0f ||
+ Imm.F == 4.0f || Imm.F == -4.0f)
+ return 0; // It's an inline immediate
+
+ return Imm.I; // It's a literal immediate
+}
+
+/// \brief Try to fold an immediate directly into an instruction
+bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const {
+
+ MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
+ if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ return false;
+
+ const SDValue &Op = Mov->getOperand(0);
+ int32_t Value = analyzeImmediate(Op.getNode());
+ if (Value == -1) {
+ // Not an immediate at all
+ return false;
+
+ } else if (Value == 0) {
+ // Inline immediates can always be fold
+ Operand = Op;
+ return true;
+
+ } else if (Value == Immediate) {
+ // Already fold literal immediate
+ Operand = Op;
+ return true;
+
+ } else if (!ScalarSlotUsed && !Immediate) {
+ // Fold this literal immediate
+ ScalarSlotUsed = true;
+ Immediate = Value;
+ Operand = Op;
+ return true;
+
+ }
+
+ return false;
+}
+
+/// \brief Does "Op" fit into register class "RegClass" ?
+bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
+ unsigned RegClass) const {
+
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ SDNode *Node = Op.getNode();
+
+ const TargetRegisterClass *OpClass;
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
+ const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
+ int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+ if (OpClassID == -1)
+ OpClass = getRegClassFor(Op.getSimpleValueType());
+ else
+ OpClass = TRI->getRegClass(OpClassID);
+
+ } else if (Node->getOpcode() == ISD::CopyFromReg) {
+ RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
+ OpClass = MRI.getRegClass(Reg->getReg());
+
+ } else
+ return false;
+
+ return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
+}
+
+/// \brief Make sure that we don't exeed the number of allowed scalars
+void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass,
+ bool &ScalarSlotUsed) const {
+
+ // First map the operands register class to a destination class
+ if (RegClass == AMDGPU::VSrc_32RegClassID)
+ RegClass = AMDGPU::VReg_32RegClassID;
+ else if (RegClass == AMDGPU::VSrc_64RegClassID)
+ RegClass = AMDGPU::VReg_64RegClassID;
+ else
+ return;
+
+ // Nothing todo if they fit naturaly
+ if (fitsRegClass(DAG, Operand, RegClass))
+ return;
+
+ // If the scalar slot isn't used yet use it now
+ if (!ScalarSlotUsed) {
+ ScalarSlotUsed = true;
+ return;
+ }
+
+ // This is a conservative aproach, it is possible that we can't determine
+ // the correct register class and copy too often, but better save than sorry.
+ SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
+ SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(),
+ Operand.getValueType(), Operand, RC);
+ Operand = SDValue(Node, 0);
+}
+
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+
+ // Original encoding (either e32 or e64)
+ int Opcode = Node->getMachineOpcode();
+ const MCInstrDesc *Desc = &TII->get(Opcode);
+
+ unsigned NumDefs = Desc->getNumDefs();
+ unsigned NumOps = Desc->getNumOperands();
+
+ // Commuted opcode if available
+ int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+
+ assert(!DescRev || DescRev->getNumDefs() == NumDefs);
+ assert(!DescRev || DescRev->getNumOperands() == NumOps);
+
+ // e64 version if available, -1 otherwise
+ int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+
+ assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
+ assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
+
+ int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
+ bool HaveVSrc = false, HaveSSrc = false;
+
+ // First figure out what we alread have in this instruction
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (isVSrc(RegClass))
+ HaveVSrc = true;
+ else if (isSSrc(RegClass))
+ HaveSSrc = true;
+ else
+ continue;
+
+ int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
+ if (Imm != -1 && Imm != 0) {
+ // Literal immediate
+ Immediate = Imm;
+ }
+ }
+
+ // If we neither have VSrc nor SSrc it makes no sense to continue
+ if (!HaveVSrc && !HaveSSrc)
+ return Node;
+
+ // No scalar allowed when we have both VSrc and SSrc
+ bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
+
+ // Second go over the operands and try to fold them
+ std::vector<SDValue> Ops;
+ bool Promote2e64 = false;
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ const SDValue &Operand = Node->getOperand(i);
+ Ops.push_back(Operand);
+
+ // Already folded immediate ?
+ if (isa<ConstantSDNode>(Operand.getNode()) ||
+ isa<ConstantFPSDNode>(Operand.getNode()))
+ continue;
+
+ // Is this a VSrc or SSrc operand ?
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (isVSrc(RegClass) || isSSrc(RegClass)) {
+ // Try to fold the immediates
+ if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+ // Folding didn't worked, make sure we don't hit the SReg limit
+ ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+ }
+ continue;
+ }
+
+ if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
+
+ unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
+ assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
+
+ // Test if it makes sense to swap operands
+ if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[1], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+ // Swap commutable operands
+ SDValue Tmp = Ops[1];
+ Ops[1] = Ops[0];
+ Ops[0] = Tmp;
+
+ Desc = DescRev;
+ DescRev = 0;
+ continue;
+ }
+ }
+
+ if (DescE64 && !Immediate) {
+
+ // Test if it makes sense to switch to e64 encoding
+ unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
+ if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
+ continue;
+
+ int32_t TmpImm = -1;
+ if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[i], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+ // Switch to e64 encoding
+ Immediate = -1;
+ Promote2e64 = true;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
+ }
+ }
+
+ if (Promote2e64) {
+ // Add the modifier flags while promoting
+ for (unsigned i = 0; i < 4; ++i)
+ Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+ }
+
+ // Add optional chain and glue
+ for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ // Create a complete new instruction
+ return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
+ Node->getVTList(), Ops.data(), Ops.size());
+}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
new file mode 100644
index 000000000000..5ad2f40f0f3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h
@@ -0,0 +1,58 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering {
+ const SIInstrInfo * TII;
+ const TargetRegisterInfo * TRI;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+ bool foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const;
+ bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const;
+ void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass, bool &ScalarSlotUsed) const;
+
+public:
+ SITargetLowering(TargetMachine &tm);
+
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+
+ int32_t analyzeImmediate(const SDNode *N) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
new file mode 100644
index 000000000000..98bd3dbb6646
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
@@ -0,0 +1,358 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+ struct {
+ unsigned VM;
+ unsigned EXP;
+ unsigned LGKM;
+ } Named;
+ unsigned Array[3];
+
+} Counters;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo &TRI;
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Constant hardware limits
+ static const Counters WaitCounts;
+
+ /// \brief Constant zero value
+ static const Counters ZeroCounts;
+
+ /// \brief Counter values we have already waited on.
+ Counters WaitedOn;
+
+ /// \brief Counter values for last instruction issued.
+ Counters LastIssued;
+
+ /// \brief Registers used by async instructions.
+ RegCounters UsedRegs;
+
+ /// \brief Registers defined by async instructions.
+ RegCounters DefinedRegs;
+
+ /// \brief Different export instruction types seen since last wait.
+ unsigned ExpInstrTypesSeen;
+
+ /// \brief Get increment/decrement amount for this instruction.
+ Counters getHwCounts(MachineInstr &MI);
+
+ /// \brief Is operand relevant for async execution?
+ bool isOpRelevant(MachineOperand &Op);
+
+ /// \brief Get register interval an operand affects.
+ RegInterval getRegInterval(MachineOperand &Op);
+
+ /// \brief Handle instructions async components
+ void pushInstruction(MachineInstr &MI);
+
+ /// \brief Insert the actual wait instruction
+ bool insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Counts);
+
+ /// \brief Do we need def2def checks?
+ bool unorderedDefines(MachineInstr &MI);
+
+ /// \brief Resolve all operand dependencies to counter requirements
+ Counters handleOperands(MachineInstr &MI);
+
+public:
+ SIInsertWaits(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
+ TRI(TII->getRegisterInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI insert wait instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+ return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+ Counters Result;
+
+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+ // Only consider stores or EXP for EXP_CNT
+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+ (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+
+ // LGKM may uses larger values
+ if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ Result.Named.LGKM = 0;
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+ // Constants are always irrelevant
+ if (!Op.isReg())
+ return false;
+
+ // Defines are always relevant
+ if (Op.isDef())
+ return true;
+
+ // For exports all registers are relevant
+ MachineInstr &MI = *Op.getParent();
+ if (MI.getOpcode() == AMDGPU::EXP)
+ return true;
+
+ // For stores the stored value is also relevant
+ if (!MI.getDesc().mayStore())
+ return false;
+
+ for (MachineInstr::mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end(); I != E; ++I) {
+
+ if (I->isReg() && I->isUse())
+ return Op.isIdenticalTo(*I);
+ }
+
+ return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+ if (!Op.isReg())
+ return std::make_pair(0, 0);
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+
+ assert(Size >= 4);
+
+ RegInterval Result;
+ Result.first = TRI.getEncodingValue(Reg);
+ Result.second = Result.first + Size / 4;
+
+ return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+
+ // Get the hardware counter increments and sum them up
+ Counters Increment = getHwCounts(MI);
+ unsigned Sum = 0;
+
+ for (unsigned i = 0; i < 3; ++i) {
+ LastIssued.Array[i] += Increment.Array[i];
+ Sum += Increment.Array[i];
+ }
+
+ // If we don't increase anything then that's it
+ if (Sum == 0)
+ return;
+
+ // Remember which export instructions we have seen
+ if (Increment.Named.EXP) {
+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ if (!isOpRelevant(Op))
+ continue;
+
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ // Remember which registers we define
+ if (Op.isDef())
+ DefinedRegs[j] = LastIssued;
+
+ // and which one we are using
+ if (Op.isUse())
+ UsedRegs[j] = LastIssued;
+ }
+ }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Required) {
+
+ // End of program? No need to wait on anything
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ return false;
+
+ // Figure out if the async instructions execute in order
+ bool Ordered[3];
+
+ // VM_CNT is always ordered
+ Ordered[0] = true;
+
+ // EXP_CNT is unordered if we have both EXP & VM-writes
+ Ordered[1] = ExpInstrTypesSeen == 3;
+
+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+ Ordered[2] = false;
+
+ // The values we are going to put into the S_WAITCNT instruction
+ Counters Counts = WaitCounts;
+
+ // Do we really need to wait?
+ bool NeedWait = false;
+
+ for (unsigned i = 0; i < 3; ++i) {
+
+ if (Required.Array[i] <= WaitedOn.Array[i])
+ continue;
+
+ NeedWait = true;
+
+ if (Ordered[i]) {
+ unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+ // adjust the value to the real hardware posibilities
+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+ } else
+ Counts.Array[i] = 0;
+
+ // Remember on what we have waited on
+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+ }
+
+ if (!NeedWait)
+ return false;
+
+ // Reset EXP_CNT instruction types
+ if (Counts.Named.EXP == 0)
+ ExpInstrTypesSeen = 0;
+
+ // Build the wait instruction
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm((Counts.Named.VM & 0xF) |
+ ((Counts.Named.EXP & 0x7) << 4) |
+ ((Counts.Named.LGKM & 0x7) << 8));
+
+ return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+ for (unsigned i = 0; i < 3; ++i)
+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+ Counters Result = ZeroCounts;
+
+ // For each register affected by this
+ // instruction increase the result sequence
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ if (Op.isDef()) {
+ increaseCounters(Result, UsedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+
+ if (Op.isUse())
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changes = false;
+
+ MRI = &MF.getRegInfo();
+
+ WaitedOn = ZeroCounts;
+ LastIssued = ZeroCounts;
+
+ memset(&UsedRegs, 0, sizeof(UsedRegs));
+ memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+
+ Changes |= insertWait(MBB, I, handleOperands(*I));
+ pushInstruction(*I);
+ }
+
+ // Wait for everything at the end of the MBB
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ }
+
+ return Changes;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
new file mode 100644
index 000000000000..3891ddb2dbe2
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -0,0 +1,426 @@
+//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<1> VM_CNT = 0;
+ field bits<1> EXP_CNT = 0;
+ field bits<1> LGKM_CNT = 0;
+
+ let TSFlags{0} = VM_CNT;
+ let TSFlags{1} = EXP_CNT;
+ let TSFlags{2} = LGKM_CNT;
+}
+
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+ let Size = 4;
+}
+
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ let Size = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar operations
+//===----------------------------------------------------------------------===//
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = op;
+ let Inst{22-16} = SDST;
+ let Inst{31-23} = 0x17d; //encoding;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = SDST;
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins , asm, pattern> {
+
+ bits <7> SDST;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = SDST;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+ (outs),
+ ins,
+ asm,
+ pattern > {
+
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
+ list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<7> SBASE;
+ bits<8> OFFSET;
+
+ let Inst{7-0} = OFFSET;
+ let Inst{8} = imm;
+ let Inst{14-9} = SBASE{6-1};
+ let Inst{21-15} = SDST;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+
+ let LGKM_CNT = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = op;
+ let Inst{24-17} = VDST;
+ let Inst{31-25} = 0x3f; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = VDST;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<3> ABS;
+ bits<1> CLAMP;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{10-8} = ABS;
+ let Inst{11} = CLAMP;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<7> SDST;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{14-8} = SDST;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<8> VSRC;
+ bits<2> ATTRCHAN;
+ bits<6> ATTR;
+
+ let Inst{7-0} = VSRC;
+ let Inst{9-8} = ATTRCHAN;
+ let Inst{15-10} = ATTR;
+ let Inst{17-16} = op;
+ let Inst{25-18} = VDST;
+ let Inst{31-26} = 0x32; // encoding
+
+ let neverHasSideEffects = 1;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<1> LDS;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{16} = LDS;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
+}
+
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<4> DFMT;
+ bits<3> NFMT;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{18-16} = op;
+ let Inst{22-19} = DFMT;
+ let Inst{25-23} = NFMT;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<4> DMASK;
+ bits<1> UNORM;
+ bits<1> GLC;
+ bits<1> DA;
+ bits<1> R128;
+ bits<1> TFE;
+ bits<1> LWE;
+ bits<1> SLC;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<7> SSAMP;
+
+ let Inst{11-8} = DMASK;
+ let Inst{12} = UNORM;
+ let Inst{13} = GLC;
+ let Inst{14} = DA;
+ let Inst{15} = R128;
+ let Inst{16} = TFE;
+ let Inst{17} = LWE;
+ let Inst{24-18} = op;
+ let Inst{25} = SLC;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{57-53} = SSAMP{6-2};
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+}
+
+def EXP : Enc64<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ bits<4> EN;
+ bits<6> TGT;
+ bits<1> COMPR;
+ bits<1> DONE;
+ bits<1> VM;
+ bits<8> VSRC0;
+ bits<8> VSRC1;
+ bits<8> VSRC2;
+ bits<8> VSRC3;
+
+ let Inst{3-0} = EN;
+ let Inst{9-4} = TGT;
+ let Inst{10} = COMPR;
+ let Inst{11} = DONE;
+ let Inst{12} = VM;
+ let Inst{31-26} = 0x3e;
+ let Inst{39-32} = VSRC0;
+ let Inst{47-40} = VSRC1;
+ let Inst{55-48} = VSRC2;
+ let Inst{63-56} = VSRC3;
+
+ let EXP_CNT = 1;
+}
+
+} // End Uses = [EXEC]
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
new file mode 100644
index 000000000000..0bfcef562f04
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -0,0 +1,264 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+
+ // If we are trying to copy to or from SCC, there is a bug somewhere else in
+ // the backend. While it may be theoretically possible to do this, it should
+ // never be necessary.
+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+ const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ };
+
+ const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ };
+
+ const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ };
+
+ const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1, 0
+ };
+
+ unsigned Opcode;
+ const int16_t *SubIndices;
+
+ if (AMDGPU::M0 == DestReg) {
+ // Check if M0 isn't already set to this value
+ for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+ I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+ if (!I->definesRegister(AMDGPU::M0))
+ continue;
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+ break;
+
+ if (!I->readsRegister(SrcReg))
+ break;
+
+ // The copy isn't necessary
+ return;
+ }
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_15;
+
+ } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_1;
+
+ } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_15;
+
+ } else {
+ llvm_unreachable("Can't copy register!");
+ }
+
+ while (unsigned SubIdx = *SubIndices++) {
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+ if (*SubIndices)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+}
+
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+ int NewOpc;
+
+ // Try to map original to commuted opcode
+ if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ return NewOpc;
+
+ // Try to map commuted to original opcode
+ if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ return NewOpc;
+
+ return Opcode;
+}
+
+MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
+ !MI->getOperand(2).isReg())
+ return 0;
+
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+ if (MI)
+ MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+
+ return MI;
+}
+
+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addImm(Imm);
+
+ return MI;
+
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ return true;
+ }
+}
+
+bool
+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ return RC != &AMDGPU::EXECRegRegClass;
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+
+int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
new file mode 100644
index 000000000000..d4e60e508634
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -0,0 +1,97 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+ const SIRegisterInfo RI;
+
+public:
+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+ const SIRegisterInfo &getRegisterInfo() const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ unsigned commuteOpcode(unsigned Opcode) const;
+
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI=false) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ virtual bool isMov(unsigned Opcode) const;
+
+ virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+ };
+
+namespace AMDGPU {
+
+ int getVOPe64(uint16_t Opcode);
+ int getCommuteRev(uint16_t Opcode);
+ int getCommuteOrig(uint16_t Opcode);
+
+} // End namespace AMDGPU
+
+} // End namespace llvm
+
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ VM_CNT = 1 << 0,
+ EXP_CNT = 1 << 1,
+ LGKM_CNT = 1 << 2
+ };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
new file mode 100644
index 000000000000..617f0b871c25
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -0,0 +1,356 @@
+//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+// SMRD takes a 64bit memory address and can only add an 32bit offset
+def SIadd64bit32bit : SDNode<"ISD::ADD",
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
+>;
+
+// Transformation function, extract the lower 32bit of a 64bit immediate
+def LO32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+}]>;
+
+// Transformation function, extract the upper 32bit of a 64bit immediate
+def HI32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+}]>;
+
+def IMM8bitDWORD : ImmLeaf <
+ i32, [{
+ return (Imm & ~0x3FC) == 0;
+ }], SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ N->getZExtValue() >> 2, MVT::i32);
+ }]>
+>;
+
+def IMM12bit : ImmLeaf <
+ i16,
+ [{return isUInt<12>(Imm);}]
+>;
+
+class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
+ return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// SI assembler operands
+//===----------------------------------------------------------------------===//
+
+def SIOperand {
+ int ZERO = 0x80;
+ int VCC = 0x6A;
+}
+
+include "SIInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction multiclass helpers.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Scalar classes
+//===----------------------------------------------------------------------===//
+
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_32:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_64:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
+ def _IMM : SMRD <
+ op, 1, (outs dstClass:$dst),
+ (ins baseClass:$sbase, i32imm:$offset),
+ asm#" $dst, $sbase, $offset", []
+ >;
+
+ def _SGPR : SMRD <
+ op, 0, (outs dstClass:$dst),
+ (ins baseClass:$sbase, SReg_32:$soff),
+ asm#" $dst, $sbase, $soff", []
+ >;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU classes
+//===----------------------------------------------------------------------===//
+
+class VOP <string opName> {
+ string OpName = opName;
+}
+
+class VOP2_REV <string revOp, bit isOrig> {
+ string RevOp = revOp;
+ bit IsOrig = isOrig;
+}
+
+multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
+ string opName, list<dag> pattern> {
+
+ def _e32 : VOP1 <
+ op, (outs drc:$dst), (ins src:$src0),
+ opName#"_e32 $dst, $src0", pattern
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs drc:$dst),
+ (ins src:$src0,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName> {
+ let SRC1 = SIOperand.ZERO;
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern, string revOp> {
+ def _e32 : VOP2 <
+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _e64 : VOP3 <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs vrc:$dst),
+ (ins arc:$src0, arc:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
+
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName> {
+
+ def _e32 : VOP2 <
+ op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _e64 : VOP3b <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let SRC2 = SIOperand.ZERO;
+ /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
+ can write it into any SGPR. We currently don't use the carry out,
+ so for now hardcode it to VCC as well */
+ let SDST = SIOperand.VCC;
+ }
+}
+
+multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, ValueType vt, PatLeaf cond> {
+
+ def _e32 : VOPC <
+ op, (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", []
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs SReg_64:$dst),
+ (ins arc:$src0, arc:$src1,
+ InstFlag:$abs, InstFlag:$clamp,
+ InstFlag:$omod, InstFlag:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
+ !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
+ [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
+ )
+ >, VOP <opName> {
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOPC_32 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+
+multiclass VOPC_64 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+//===----------------------------------------------------------------------===//
+// Vector I/O classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs),
+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+ SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 0;
+}
+
+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
+ i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+ #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+ op,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ SReg_256:$srsrc, SReg_128:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector instruction mappings
+//===----------------------------------------------------------------------===//
+
+// Maps an opcode in e32 form to its e64 equivalent
+def getVOPe64 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["Size"];
+ let KeyCol = ["4"];
+ let ValueCols = [["8"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+include "SIInstructions.td"
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
new file mode 100644
index 000000000000..4f734f91245a
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -0,0 +1,1607 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file was originally auto-generated from a GPU register header file and
+// all the instruction definitions were originally commented out. Instructions
+// that are not yet supported remain commented out.
+//===----------------------------------------------------------------------===//
+
+class InterpSlots {
+int P0 = 2;
+int P10 = 0;
+int P20 = 1;
+}
+def INTERP : InterpSlots;
+
+def InterpSlot : Operand<i32> {
+ let PrintMethod = "printInterpSlot";
+}
+
+def isSI : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
+
+let Predicates = [isSI] in {
+
+let neverHasSideEffects = 1 in {
+
+let isMoveImm = 1 in {
+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+} // End isMoveImm = 1
+
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+} // End neverHasSideEffects = 1
+
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
+
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+
+} // End hasSideEffects = 1
+
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+def S_CMPK_EQ_I32 : SOPK <
+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
+ "S_CMPK_EQ_I32",
+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+let isCompare = 1 in {
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+} // End isCompare = 1
+
+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
+//def EXP : EXP_ <0x00000000, "EXP", []>;
+
+let isCompare = 1 in {
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+} // End isCompare = 1
+
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+
+let mayLoad = 1 in {
+
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
+
+} // mayLoad = 1
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
+def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
+def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+
+
+let neverHasSideEffects = 1, isMoveImm = 1 in {
+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
+} // End neverHasSideEffects = 1, isMoveImm = 1
+
+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
+ [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+>;
+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
+ [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+>;
+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
+ [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
+ [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+>;
+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
+ [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+>;
+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
+ [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+>;
+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
+ [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+>;
+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
+ [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+>;
+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
+ [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+>;
+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_LEGACY_F32 : VOP1_32 <
+ 0x0000002d, "V_RSQ_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+def V_INTERP_P1_F32 : VINTRP <
+ 0x00000000,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
+ []> {
+ let DisableEncoding = "$m0";
+}
+
+def V_INTERP_P2_F32 : VINTRP <
+ 0x00000001,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+ []> {
+
+ let Constraints = "$src0 = $dst";
+ let DisableEncoding = "$src0,$m0";
+
+}
+
+def V_INTERP_MOV_F32 : VINTRP <
+ 0x00000002,
+ (outs VReg_32:$dst),
+ (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
+ []> {
+ let DisableEncoding = "$m0";
+}
+
+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+ [(IL_retflag)]> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+ 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
+ [(br bb:$target)]> {
+ let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC0 $target", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC1 $target",
+ []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCZ $target",
+ []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCNZ $target",
+ []
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+ 0x00000008, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECZ $target",
+ []
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+ 0x00000009, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECNZ $target",
+ []
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
+let hasSideEffects = 1 in {
+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+ []
+>;
+} // End hasSideEffects
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+
+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
+ "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
+ []
+>{
+ let DisableEncoding = "$vcc";
+}
+
+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
+ InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+ "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
+ [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
+ VSrc_32:$src1, VSrc_32:$src0))]
+>;
+
+//f32 pattern for V_CNDMASK_B32_e64
+def : Pat <
+ (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
+ (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+>;
+
+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
+ [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
+ [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
+} // End isCommutable = 1
+
+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+
+let isCommutable = 1 in {
+
+defm V_MUL_LEGACY_F32 : VOP2_32 <
+ 0x00000007, "V_MUL_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
+ [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+
+let isCommutable = 1 in {
+
+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+ [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+ [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
+
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+ [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+ [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+ [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+ [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+ [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+
+let Uses = [VCC] in { // Carry-out comes from VCC
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+} // End Uses = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+let neverHasSideEffects = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
+
+} // End neverHasSideEffects
+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
+def : Pat <
+ (mul VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+ (mulhu VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+ (mulhs VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+
+def S_CSELECT_B32 : SOP2 <
+ 0x0000000a, (outs SReg_32:$dst),
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
+ SReg_32:$src0, SReg_32:$src1))]
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+// f32 pattern for S_CSELECT_B32
+def : Pat <
+ (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
+>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+ [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+>;
+
+def : Pat <
+ (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
+ (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def : Pat <
+ (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
+ (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def LOAD_CONST : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_CONST $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
+// SI Psuedo instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
+ Uses = [EXEC], Defs = [EXEC] in {
+
+let isBranch = 1, isTerminator = 1 in {
+
+def SI_IF : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$vcc, brtarget:$target),
+ "SI_IF $dst, $vcc, $target",
+ [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+>;
+
+def SI_ELSE : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target),
+ "SI_ELSE $dst, $src, $target",
+ [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+
+ let Constraints = "$src = $dst";
+}
+
+def SI_LOOP : InstSI <
+ (outs),
+ (ins SReg_64:$saved, brtarget:$target),
+ "SI_LOOP $saved, $target",
+ [(int_SI_loop SReg_64:$saved, bb:$target)]
+>;
+
+} // end isBranch = 1, isTerminator = 1
+
+def SI_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src),
+ "SI_ELSE $dst, $src",
+ [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+>;
+
+def SI_IF_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$vcc, SReg_64:$src),
+ "SI_IF_BREAK $dst, $vcc, $src",
+ [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+>;
+
+def SI_ELSE_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src0, SReg_64:$src1),
+ "SI_ELSE_BREAK $dst, $src0, $src1",
+ [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+>;
+
+def SI_END_CF : InstSI <
+ (outs),
+ (ins SReg_64:$saved),
+ "SI_END_CF $saved",
+ [(int_SI_end_cf SReg_64:$saved)]
+>;
+
+def SI_KILL : InstSI <
+ (outs),
+ (ins VReg_32:$src),
+ "SI_KIL $src",
+ [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+ // Uses = [EXEC], Defs = [EXEC]
+
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
+} // end IsCodeGenOnly, isPseudo
+
+def : Pat<
+ (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+ (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+>;
+
+def : Pat <
+ (int_AMDGPU_kilp),
+ (SI_KILL (V_MOV_B32_e32 0xbf800000))
+>;
+
+/* int_SI_vs_load_input */
+def : Pat<
+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
+ VReg_32:$buf_idx_vgpr),
+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
+ 0, 0, 0)
+>;
+
+/* int_SI_export */
+def : Pat <
+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+>;
+
+
+/* int_SI_sample for simple 1D texture lookup */
+def : Pat <
+ (int_SI_sample imm:$writemask, VReg_32:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
+ (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
+ def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+}
+
+defm : SamplePatterns<VReg_64, v2i32>;
+defm : SamplePatterns<VReg_128, v4i32>;
+defm : SamplePatterns<VReg_256, v8i32>;
+defm : SamplePatterns<VReg_512, v16i32>;
+
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
+def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
+def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
+def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VReg_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VReg_32>;
+
+/********** =================== **********/
+/********** Src & Dst modifiers **********/
+/********** =================== **********/
+
+def : Pat <
+ (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fabs VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fneg VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+>;
+
+/********** ================== **********/
+/********** Immediate Patterns **********/
+/********** ================== **********/
+
+def : Pat <
+ (i32 imm:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+>;
+
+def : Pat <
+ (f32 fpimm:$imm),
+ (V_MOV_B32_e32 fpimm:$imm)
+>;
+
+def : Pat <
+ (i1 imm:$imm),
+ (S_MOV_B64 imm:$imm)
+>;
+
+def : Pat <
+ (i64 InlineImm<i64>:$imm),
+ (S_MOV_B64 InlineImm<i64>:$imm)
+>;
+
+// i64 immediates aren't supported in hardware, split it into two 32bit values
+def : Pat <
+ (i64 imm:$imm),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
+ (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
+>;
+
+/********** ===================== **********/
+/********** Interpolation Paterns **********/
+/********** ===================== **********/
+
+def : Pat <
+ (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
+ (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
+ imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (EXTRACT_SUBREG VReg_64:$ij, sub1),
+ imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+
+def : Pat <
+ (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat<
+ (fdiv VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat <
+ (fcos VSrc_32:$src0),
+ (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+ (fsin VSrc_32:$src0),
+ (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+ (int_AMDGPU_cube VReg_128:$src),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub0),
+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub1),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub2),
+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub3)
+>;
+
+def : Pat <
+ (i32 (sext (i1 SReg_64:$src0))),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+>;
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
+/********** ================== **********/
+/********** VOP3 Patterns **********/
+/********** ================== **********/
+
+def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
+ (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+ 0, 0, 0, 0)>;
+
+/********** ================== **********/
+/********** SMRD Patterns **********/
+/********** ================== **********/
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+ // 1. Offset as 8bit DWORD immediate
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
+ (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+ >;
+
+ // 2. Offset loaded in an 32bit SGPR
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
+ (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+ >;
+
+ // 3. No offset at all
+ def : Pat <
+ (constant_load SReg_64:$sbase),
+ (vt (Instr_IMM SReg_64:$sbase, 0))
+ >;
+}
+
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
+/********** =============== **********/
+/********** Conditions **********/
+/********** =============== **********/
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETO)),
+ (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+>;
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETUO)),
+ (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+>;
+
+} // End isSI predicate
diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
new file mode 100644
index 000000000000..0af378edfe2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
@@ -0,0 +1,42 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_SI_sample : Sample;
+ def int_SI_sampleb : Sample;
+ def int_SI_samplel : Sample;
+
+ /* Interpolation Intrinsics */
+
+ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
+
+ /* Control flow Intrinsics */
+
+ def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
+ def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
+ def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
+ def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
+ def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+}
diff --git a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
new file mode 100644
index 000000000000..2b60eb9fb375
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
@@ -0,0 +1,501 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers the pseudo control flow instructions to real
+/// machine instructions.
+///
+/// All control flow is handled using predicated instructions and
+/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
+/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
+/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// single vector ALU). Typically, for predicates, a vector ALU will write
+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
+/// EXEC to update the predicates.
+///
+/// For example:
+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
+/// %SGPR0 = SI_IF %VCC
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
+/// %SGPR0 = SI_ELSE %SGPR0
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
+/// SI_END_CF %SGPR0
+///
+/// becomes:
+///
+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_CBRANCH_EXECZ label0 // This instruction is an optional
+/// // optimization which allows us to
+/// // branch if all the bits of
+/// // EXEC are zero.
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
+///
+/// label0:
+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_BRANCH_EXECZ label1 // Use our branch optimization
+/// // instruction again.
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
+/// label1:
+/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerControlFlowPass : public MachineFunctionPass {
+
+private:
+ static const unsigned SkipThreshold = 12;
+
+ static char ID;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+ void Skip(MachineInstr &From, MachineOperand &To);
+ void SkipIfDead(MachineInstr &MI);
+
+ void If(MachineInstr &MI);
+ void Else(MachineInstr &MI);
+ void Break(MachineInstr &MI);
+ void IfBreak(MachineInstr &MI);
+ void ElseBreak(MachineInstr &MI);
+ void Loop(MachineInstr &MI);
+ void EndCf(MachineInstr &MI);
+
+ void Kill(MachineInstr &MI);
+ void Branch(MachineInstr &MI);
+
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
+public:
+ SILowerControlFlowPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI Lower control flow instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SILowerControlFlowPass::ID = 0;
+
+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
+ return new SILowerControlFlowPass(tm);
+}
+
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+
+ unsigned NumInstr = 0;
+
+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
+ MBB = *MBB->succ_begin()) {
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ NumInstr < SkipThreshold && I != E; ++I) {
+
+ if (I->isBundle() || !I->isBundled())
+ if (++NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
+ return;
+
+ DebugLoc DL = From.getDebugLoc();
+ BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addOperand(To)
+ .addReg(AMDGPU::EXEC);
+}
+
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ return;
+
+ MachineBasicBlock::iterator Insert = &MI;
+ ++Insert;
+
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
+void SILowerControlFlowPass::If(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
+ .addReg(Vcc);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Else(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+ .addReg(Src); // Saved EXEC
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Dst);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Break(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Vcc)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Saved = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Saved)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Src = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addOperand(MI.getOperand(1))
+ .addReg(AMDGPU::EXEC);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+ MachineBasicBlock *Next = MI.getParent()->getNextNode();
+ MachineBasicBlock *Target = MI.getOperand(0).getMBB();
+ if (Target == Next)
+ MI.eraseFromParent();
+ else
+ assert(0);
+}
+
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Kill is only allowed in pixel shaders
+ assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+ ShaderType::PIXEL);
+
+ // Clear this pixel from the exec mask if the operand is negative
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addOperand(MI.getOperand(0));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
+
+ bool HaveKill = false;
+ bool NeedWQM = false;
+ unsigned Depth = 0;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::SI_IF:
+ ++Depth;
+ If(MI);
+ break;
+
+ case AMDGPU::SI_ELSE:
+ Else(MI);
+ break;
+
+ case AMDGPU::SI_BREAK:
+ Break(MI);
+ break;
+
+ case AMDGPU::SI_IF_BREAK:
+ IfBreak(MI);
+ break;
+
+ case AMDGPU::SI_ELSE_BREAK:
+ ElseBreak(MI);
+ break;
+
+ case AMDGPU::SI_LOOP:
+ ++Depth;
+ Loop(MI);
+ break;
+
+ case AMDGPU::SI_END_CF:
+ if (--Depth == 0 && HaveKill) {
+ SkipIfDead(MI);
+ HaveKill = false;
+ }
+ EndCf(MI);
+ break;
+
+ case AMDGPU::SI_KILL:
+ if (Depth == 0)
+ SkipIfDead(MI);
+ else
+ HaveKill = true;
+ Kill(MI);
+ break;
+
+ case AMDGPU::S_BRANCH:
+ Branch(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
+
+ case AMDGPU::V_INTERP_P1_F32:
+ case AMDGPU::V_INTERP_P2_F32:
+ case AMDGPU::V_INTERP_MOV_F32:
+ NeedWQM = true;
+ break;
+
+ }
+ }
+ }
+
+ if (NeedWQM) {
+ MachineBasicBlock &MBB = MF.front();
+ BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+ AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..ee0e30755f01
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF),
+ PSInputAddr(0) { }
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
new file mode 100644
index 000000000000..6da9f7f9a14d
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIMACHINEFUNCTIONINFO_H_
+#define SIMACHINEFUNCTIONINFO_H_
+
+#include "AMDGPUMachineFunction.h"
+
+namespace llvm {
+
+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
+/// tells the hardware which interpolation parameters to load.
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+ SIMachineFunctionInfo(const MachineFunction &MF);
+ unsigned PSInputAddr;
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
new file mode 100644
index 000000000000..99278ae8dceb
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
@@ -0,0 +1,53 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ return RC->getNumRegs();
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ return &AMDGPU::VReg_32RegClass;
+ default: return rc;
+ }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::VReg_32RegClass;
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
new file mode 100644
index 000000000000..caec22841345
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
@@ -0,0 +1,50 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct SIRegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the SI register class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass *RC) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+};
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
new file mode 100644
index 000000000000..4f14931a9c48
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
@@ -0,0 +1,182 @@
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+// Special Registers
+def VCC : SIReg<"VCC", 106>;
+def EXEC : SIReg<"EXEC", 126>;
+def SCC : SIReg<"SCC", 253>;
+def M0 : SIReg <"M0", 124>;
+
+// SGPR registers
+foreach Index = 0-101 in {
+ def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+ def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+ let HWEncoding{8} = 1;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+
+// SGPR 32-bit registers
+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "SGPR%u", 0, 101))>;
+
+// SGPR 64-bit registers
+def SGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (decimate (trunc SGPR_32, 101), 2)),
+ (add (decimate (shl SGPR_32, 1), 2))]>;
+
+// SGPR 128-bit registers
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (decimate (trunc SGPR_32, 99), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4))]>;
+
+// SGPR 256-bit registers
+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (decimate (trunc SGPR_32, 95), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (decimate (trunc SGPR_32, 87), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4)),
+ (add (decimate (shl SGPR_32, 8), 4)),
+ (add (decimate (shl SGPR_32, 9), 4)),
+ (add (decimate (shl SGPR_32, 10), 4)),
+ (add (decimate (shl SGPR_32, 11), 4)),
+ (add (decimate (shl SGPR_32, 12), 4)),
+ (add (decimate (shl SGPR_32, 13), 4)),
+ (add (decimate (shl SGPR_32, 14), 4)),
+ (add (decimate (shl SGPR_32, 15), 4))]>;
+
+// VGPR 32-bit registers
+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "VGPR%u", 0, 255))>;
+
+// VGPR 64-bit registers
+def VGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (trunc VGPR_32, 255)),
+ (add (shl VGPR_32, 1))]>;
+
+// VGPR 128-bit registers
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (trunc VGPR_32, 253)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3))]>;
+
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (trunc VGPR_32, 249)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (trunc VGPR_32, 241)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7)),
+ (add (shl VGPR_32, 8)),
+ (add (shl VGPR_32, 9)),
+ (add (shl VGPR_32, 10)),
+ (add (shl VGPR_32, 11)),
+ (add (shl VGPR_32, 12)),
+ (add (shl VGPR_32, 13)),
+ (add (shl VGPR_32, 14)),
+ (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+// Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+
+// Register class for all scalar registers (SGPRs + Special Registers)
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add SGPR_32, M0Reg)
+>;
+
+def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+ (add SGPR_64, VCCReg, EXECReg)
+>;
+
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
+
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
+
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+
+//===----------------------------------------------------------------------===//
+// [SV]Src_* register classes, can have either an immediate or an register
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
+
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
diff --git a/contrib/llvm/lib/Target/R600/SISchedule.td b/contrib/llvm/lib/Target/R600/SISchedule.td
new file mode 100644
index 000000000000..28b65b825855
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SISchedule.td
@@ -0,0 +1,15 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This is just a place holder for now.
+//
+//===----------------------------------------------------------------------===//
+
+
+def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 000000000000..46b1f18c6263
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// \brief The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// \brief Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeR600TargetInfo() {
+ RegisterTarget<Triple::r600, false>
+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 000000000000..6123773d5f4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,320 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+#include "Sparc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-sparc-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the Sparc delay slot filler."),
+ cl::Hidden);
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ MachineBasicBlock::iterator
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+ bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB, I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ unsigned structSize = 0;
+ if (needsUnimp(I, structSize)) {
+ MachineBasicBlock::iterator J = I;
+ ++J; //skip the delay filler.
+ BuildMI(MBB, ++J, I->getDebugLoc(),
+ TII->get(SP::UNIMP)).addImm(structSize);
+ }
+ }
+ return Changed;
+}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot)
+{
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ MachineBasicBlock::iterator I = slot;
+
+ if (slot->getOpcode() == SP::RET)
+ return MBB.end();
+
+ if (slot->getOpcode() == SP::RETL) {
+ --I;
+ if (I->getOpcode() != SP::RESTORErr)
+ return MBB.end();
+ //change retl to ret
+ slot->setDesc(TII->get(SP::RET));
+ return I;
+ }
+
+ //Call's delay filler can def some of call's uses.
+ if (slot->isCall())
+ insertCallUses(slot, RegUses);
+ else
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool done = false;
+
+ while (!done) {
+ done = (I == MBB.begin());
+
+ if (!done)
+ --I;
+
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || I->hasDelaySlot()
+ || isDelayFiller(MBB, I))
+ break;
+
+ if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(I, RegDefs, RegUses);
+ continue;
+ }
+
+ return I;
+ }
+ return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses)
+{
+
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ if (candidate->mayLoad()) {
+ sawLoad = true;
+ if (sawStore)
+ return true;
+ }
+
+ if (candidate->mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ if (!MO.isReg())
+ continue; // skip
+
+ unsigned Reg = MO.getReg();
+
+ if (MO.isDef()) {
+ //check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ //check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses)
+{
+
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown opcode.");
+ case SP::CALL: break;
+ case SP::JMPLrr:
+ case SP::JMPLri:
+ assert(MI->getNumOperands() >= 2);
+ const MachineOperand &Reg = MI->getOperand(0);
+ assert(Reg.isReg() && "JMPL first operand is not a register.");
+ assert(Reg.isUse() && "JMPL first operand is not a use.");
+ RegUses.insert(Reg.getReg());
+
+ const MachineOperand &RegOrImm = MI->getOperand(1);
+ if (RegOrImm.isImm())
+ break;
+ assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+ assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+ RegUses.insert(RegOrImm.getReg());
+ break;
+ }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses)
+{
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ if (MO.isUse())
+ RegUses.insert(Reg);
+
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
+ AI.isValid(); ++AI)
+ if (RegSet.count(*AI))
+ return true;
+ return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate)
+{
+ if (candidate == MBB.begin())
+ return false;
+ if (candidate->getOpcode() == SP::UNIMP)
+ return true;
+ --candidate;
+ return candidate->hasDelaySlot();
+}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+ if (!I->isCall())
+ return false;
+
+ unsigned structSizeOpNum = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unknown call opcode.");
+ case SP::CALL: structSizeOpNum = 1; break;
+ case SP::JMPLrr:
+ case SP::JMPLri: structSizeOpNum = 2; break;
+ }
+
+ const MachineOperand &MO = I->getOperand(structSizeOpNum);
+ if (!MO.isImm())
+ return false;
+ StructSize = MO.getImm();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/FPMover.cpp b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 000000000000..1325b98cf0ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,141 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ explicit FPMover(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const uint16_t EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const uint16_t OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const uint16_t DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ llvm_unreachable("Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ DebugLoc dl = MI->getDebugLoc();
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setDesc(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setDesc(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setDesc(TII->get(SP::FABSS));
+ else
+ llvm_unreachable("Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
new file mode 100644
index 000000000000..3d4bfdcd5e6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -0,0 +1,44 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+void SparcELFMCAsmInfo::anchor() { }
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::sparcv9) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
new file mode 100644
index 000000000000..f0e1354c212b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class StringRef;
+ class Target;
+
+ class SparcELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 000000000000..7fdb0c39285a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,81 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSparcMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSparcMCRegisterInfo(X, SP::I7);
+ return X;
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+extern "C" void LLVMInitializeSparcTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
+ createSparcMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
+ createSparcMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+ createSparcMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 000000000000..cba775adb1a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
new file mode 100644
index 000000000000..ce6ae17b6ca2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,108 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class SparcTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+} // end namespace llvm;
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ llvm_unreachable("Invalid cond code");
+ }
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
new file mode 100644
index 000000000000..611f8e8129f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,72 @@
+//===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 000000000000..e14b3cbf161d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,261 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+namespace {
+ class SparcAsmPrinter : public AsmPrinter {
+ public:
+ explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+ virtual void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+ !MO.isReg() && !MO.isImm()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isReg() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobal() ||
+ MI->getOperand(opNum+1).isCPI()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1, O);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1, O);
+ }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+ raw_ostream &O) {
+ std::string operand = "";
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Operand is not a register");
+ case MachineOperand::MO_Register:
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Operand is not a physical register ");
+ assert(MO.getReg() != SP::O7 &&
+ "%o7 is assigned as destination for getpcx!");
+ operand = "%" + StringRef(getRegisterName(MO.getReg())).lower();
+ break;
+ }
+
+ unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+ unsigned bbNum = MI->getParent()->getNumber();
+
+ O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+ O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+ O << "\t sethi\t"
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n' ;
+
+ O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+ O << "\tor\t" << operand
+ << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n';
+ O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
+
+ return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->isBarrier();
+}
+
+MachineLocation SparcAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ assert(MI->getNumOperands() == 4 && "Invalid number of operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() {
+ RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+ RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 000000000000..b38ac616dcf4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,56 @@
+//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 64-bit C return-value convention.
+def RetCC_Sparc64 : CallingConv<[
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+ //Custom assign SRet to [sp+64].
+ CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+ // i32 f32 arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // f64 arguments are split and passed through registers or through stack.
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Sparc 64-bit C Calling convention.
+def CC_Sparc64 : CallingConv<[
+ // All integers are promoted to i64 by the caller.
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ // Integer arguments get passed in integer registers if there is space.
+ CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // FIXME: Floating point arguments.
+
+ // Alternatively, they are assigned to the stack in 8-byte aligned units.
+ CCAssignToStack<8, 8>
+]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 000000000000..a0dae6e9480c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,96 @@
+//===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 000000000000..464233e7da35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+public:
+ explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 000000000000..5fa545d30160
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,211 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+ SparcTargetMachine& TM;
+public:
+ explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+ : SelectionDAGISel(tm),
+ Subtarget(tm.getSubtarget<SparcSubtarget>()),
+ TM(tm) {
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+
+private:
+ SDNode* getGlobalBaseReg();
+};
+} // end anonymous namespace
+
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (isInt<13>(CN->getSExtValue())) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case SPISD::GLOBAL_BASE_REG:
+ return getGlobalBaseReg();
+
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDValue DivLHS = N->getOperand(0);
+ SDValue DivRHS = N->getOperand(1);
+
+ // Set the Y register to the high-part.
+ SDValue TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDValue MulLHS = N->getOperand(0);
+ SDValue MulRHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectADDRrr(Op, Op0, Op1))
+ SelectADDRri(Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 000000000000..325f13424b42
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1360 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ assert (ArgFlags.isSRet());
+
+ //Assign SRet argument
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ 0,
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const uint16_t RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ //Try to get first reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ } else {
+ //Assign whole thing in stack
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8,4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ //Try to get second reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4,4),
+ LocVT, LocInfo));
+ return true;
+}
+
+#include "SparcGenCallingConv.inc"
+
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
+ RetCC_Sparc64 : RetCC_Sparc32);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ // Make room for the return address offset.
+ RetOps.push_back(SDValue());
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+ // If the function returns a struct, copy the SRetReturnReg to I0
+ if (MF.getFunction()->hasStructRetAttr()) {
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
+ RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
+ }
+
+ RetOps[0] = Chain; // Update chain.
+ RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32);
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+SDValue SparcTargetLowering::
+LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget->is64Bit())
+ return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
+ DL, DAG, InVals);
+ return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
+ DL, DAG, InVals);
+}
+
+/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values. TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue SparcTargetLowering::
+LowerFormalArguments_32(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
+ const unsigned StackOffset = 92;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (i == 0 && Ins[i].Flags.isSRet()) {
+ //Get SRet from [%fp+64]
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isRegLoc()) {
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+ SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+ assert(i+1 < e);
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ SDValue LoVal;
+ if (NextVA.isMemLoc()) {
+ int FrameIdx = MF.getFrameInfo()->
+ CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ } else {
+ unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+ &SP::IntRegsRegClass);
+ LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
+ }
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+ SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ if (VA.getLocVT() == MVT::f32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+ else if (VA.getLocVT() != MVT::i32) {
+ Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+ DAG.getValueType(VA.getLocVT()));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+ }
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::f64);
+ //If it is double-word aligned, just load.
+ if (Offset % 8 == 0) {
+ int FI = MF.getFrameInfo()->CreateFixedObject(8,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false,false, false, 0);
+ InVals.push_back(Load);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset+4,
+ true);
+ SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+ SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+ MachinePointerInfo(),
+ false, false, false, 0);
+
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load ;
+ if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+ Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ MachinePointerInfo(),
+ VA.getValVT(), false, false,0);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+ }
+ InVals.push_back(Load);
+ }
+
+ if (MF.getFunction()->hasStructRetAttr()) {
+ //Copy the SRet Argument to SRetReturnReg
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+ SFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (isVarArg) {
+ static const uint16_t ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+ const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ if (NumAllocated == 6)
+ ArgOffset += StackOffset;
+ else {
+ assert(!ArgOffset);
+ ArgOffset = 68+4*NumAllocated;
+ }
+
+ // Remember the vararg offset for the va_start implementation.
+ FuncInfo->setVarArgsFrameOffset(ArgOffset);
+
+ std::vector<SDValue> OutChains;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+ SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += 4;
+ }
+
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+ }
+
+ return Chain;
+}
+
+// Lower formal arguments for the 64 bit ABI.
+SDValue SparcTargetLowering::
+LowerFormalArguments_64(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Analyze arguments according to CC_Sparc64.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // This argument is passed in a register.
+ // All integer register arguments are promoted by the caller to i64.
+
+ // Create a virtual register for the promoted live-in value.
+ unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+ getRegClassFor(VA.getLocVT()));
+ SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+ // The caller promoted the argument, so insert an Assert?ext SDNode so we
+ // won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the argument type.
+ if (VA.isExtInLoc())
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ // The registers are exhausted. This argument was passed on the stack.
+ assert(VA.isMemLoc());
+ }
+ return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ // Sparc target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ //Create local copies for byval args.
+ SmallVector<SDValue, 8> ByValArgs;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Arg = OutVals[i];
+ unsigned Size = Flags.getByValSize();
+ unsigned Align = Flags.getByValAlign();
+
+ int FI = MFI->CreateStackObject(Size, Align, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+ false, //isVolatile,
+ (Size <= 32), //AlwaysInline if size <= 32
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(FIPtr);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ const unsigned StackOffset = 92;
+ bool hasStructRetAttr = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+ //Use local copy if it is a byval arg.
+ if (Flags.isByVal())
+ Arg = ByValArgs[byvalArgIdx++];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (Flags.isSRet()) {
+ assert(VA.needsCustom());
+ // store SRet argument in %sp+64
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(64);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ hasStructRetAttr = true;
+ continue;
+ }
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+
+ if (VA.isMemLoc()) {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ //if it is double-word aligned, just store.
+ if (Offset % 8 == 0) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ continue;
+ }
+ }
+
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Arg, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ // Sparc is big-endian, so the high part comes first.
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, false, 0);
+ // Increment the pointer to the other half.
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(4));
+ // Load the low part.
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, false, 0);
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ assert(i+1 != e);
+ CCValAssign &NextVA = ArgLocs[++i];
+ if (NextVA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ } else {
+ //Store the low part in stack.
+ unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ } else {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ // Store the high part.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ // Store the low part.
+ PtrOff = DAG.getIntPtrConstant(Offset+4);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ if (VA.getLocVT() != MVT::f32) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // Returns a chain & a flag for retval copy to use
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ if (hasStructRetAttr)
+ Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ unsigned Reg = RVLocs[i].getLocReg();
+
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ CalleeFn = M->getFunction(E->getSymbol());
+ }
+
+ if (!CalleeFn)
+ return 0;
+
+ assert(CalleeFn->hasStructRetAttr() &&
+ "Callee does not have the StructRet attribute.");
+
+ PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ Type *ElementTy = Ty->getElementType();
+ return getDataLayout()->getTypeAllocSize(ElementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ Subtarget = &TM.getSubtarget<SparcSubtarget>();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
+ addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
+ addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Sparc doesn't have i1 sign extending load
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ }
+
+ // FIXME: There are instructions available for ATOMIC_FENCE
+ // on SparcV8 and later.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // FIXME: Sparc provides these multiplies, but we don't have them yet.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ // No debug info support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+ setMinFunctionAlignment(2);
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRXCC: return "SPISD::BRXCC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+ case SPISD::FLUSHW: return "SPISD::FLUSHW";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2, KnownOne2;
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_XCC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
+ CC == ISD::SETNE &&
+ (((LHS.getOpcode() == SPISD::SELECT_ICC ||
+ LHS.getOpcode() == SPISD::SELECT_XCC) &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, false, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDValue CompareFlag;
+ if (LHS.getValueType().isInteger()) {
+ EVT VTs[] = { LHS.getValueType(), MVT::Glue };
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+ Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDValue CompareFlag;
+ if (LHS.getValueType().isInteger()) {
+ // subcc returns a value
+ EVT VTs[] = { LHS.getValueType(), MVT::Glue };
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ Opc = LHS.getValueType() == MVT::i32 ?
+ SPISD::SELECT_ICC : SPISD::SELECT_XCC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Offset =
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+ MVT::i32));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+ MachinePointerInfo(SV), false, false, false, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ MVT::i32));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ VAListPtr, MachinePointerInfo(SV), false, false, 0);
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64)
+ return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Bit-Convert the value to f64.
+ SDValue Ops[2] = {
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
+ V.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0); // Legalize the chain.
+ SDValue Size = Op.getOperand(1); // Legalize the size.
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ SDValue Ops[2] = { NewVal, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+ dl, MVT::Other, DAG.getEntryNode());
+ return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned FrameReg = SP::I6;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue FrameAddr;
+ if (depth == 0)
+ FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ FrameAddr, DAG.getIntPtrConstant(56));
+ FrameAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, false, 0);
+ }
+ }
+ return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned RetReg = SP::I7;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue RetAddr;
+ if (depth == 0)
+ RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ RetAddr,
+ DAG.getIntPtrConstant((i == depth-1)?60:56));
+ RetAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, false, 0);
+ }
+ }
+ return RetAddr;
+}
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ DebugLoc dl = MI->getDebugLoc();
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ }
+ }
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, &SP::IntRegsRegClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Sparc target isn't yet aware of offsets.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 000000000000..aa2ef711a080
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,113 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ class SparcSubtarget;
+
+ namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMPICC, // Compare two GPR operands, set icc+xcc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRXCC, // Branch to dest on xcc condition (64-bit only).
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_XCC, // Select between two values using the current XCC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ GLOBAL_BASE_REG, // Global base reg for PIC
+ FLUSHW // FLUSH register windows to stack
+ };
+ }
+
+ class SparcTargetLowering : public TargetLowering {
+ const SparcSubtarget *Subtarget;
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerFormalArguments_32(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerFormalArguments_64(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+ };
+} // end namespace llvm
+
+#endif // SPARC_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
new file mode 100644
index 000000000000..ca1153b3fe8f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -0,0 +1,285 @@
+//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction definitions and patterns needed for 64-bit
+// code generation on SPARC v9.
+//
+// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can
+// also be used in 32-bit code running on a SPARC v9 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are don't care.
+// This give us free trunc and anyext.
+def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>;
+def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Shift Instructions.
+//===----------------------------------------------------------------------===//
+//
+// The 32-bit shift instructions are still available. The left shift srl
+// instructions shift all 64 bits, but it only accepts a 5-bit shift amount.
+//
+// The srl instructions only shift the low 32 bits and clear the high 32 bits.
+// Finally, sra shifts the low 32 bits and sign-extends to 64 bits.
+
+let Predicates = [Is64Bit] in {
+
+def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Immediates.
+//===----------------------------------------------------------------------===//
+//
+// All 32-bit immediates can be materialized with sethi+or, but 64-bit
+// immediates may require more code. There may be a point where it is
+// preferable to use a constant pool load instead, depending on the
+// microarchitecture.
+
+// The %g0 register is constant 0.
+// This is useful for stx %g0, [...], for example.
+def : Pat<(i64 0), (i64 G0)>, Requires<[Is64Bit]>;
+
+// Single-instruction patterns.
+
+// The ALU instructions want their simm13 operands as i32 immediates.
+def as_i32imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+}]>;
+def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
+def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
+
+// Double-instruction patterns.
+
+// All unsigned i32 immediates can be handled by sethi+or.
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>,
+ Requires<[Is64Bit]>;
+
+// All negative i33 immediates can be handled by sethi+xor.
+def nimm33 : PatLeaf<(imm), [{
+ int64_t Imm = N->getSExtValue();
+ return Imm < 0 && isInt<33>(Imm);
+}]>;
+// Bits 10-31 inverted. Same as assembler's %hix.
+def HIX22 : SDNodeXForm<imm, [{
+ uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
+def LOX10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+}]>;
+def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
+ Requires<[Is64Bit]>;
+
+// More possible patterns:
+//
+// (sllx sethi, n)
+// (sllx simm13, n)
+//
+// 3 instrs:
+//
+// (xor (sllx sethi), simm13)
+// (sllx (xor sethi, simm13))
+//
+// 4 instrs:
+//
+// (or sethi, (sllx sethi))
+// (xnor sethi, (sllx sethi))
+//
+// 5 instrs:
+//
+// (or (sllx sethi), (or sethi, simm13))
+// (xnor (sllx sethi), (or sethi, simm13))
+// (or (sllx sethi), (sllx sethi))
+// (xnor (sllx sethi), (sllx sethi))
+//
+// Worst case is 6 instrs:
+//
+// (or (sllx (or sethi, simmm13)), (or sethi, simm13))
+
+// Bits 42-63, same as assembler's %hh.
+def HH22 : SDNodeXForm<imm, [{
+ uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 32-41, same as assembler's %hm.
+def HM10 : SDNodeXForm<imm, [{
+ uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+def : Pat<(i64 imm:$val),
+ (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+ (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
+ Requires<[Is64Bit]>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Arithmetic and Logic.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+// Register-register instructions.
+
+def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>;
+def : Pat<(or i64:$a, i64:$b), (ORrr $a, $b)>;
+def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>;
+
+def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>;
+def : Pat<(or i64:$a, (not i64:$b)), (ORNrr $a, $b)>;
+def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
+
+def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
+def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
+
+// Add/sub with carry were renamed to addc/subc in SPARC v9.
+def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
+def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
+
+def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
+def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+def : Pat<(SPcmpicc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+// Register-immediate instructions.
+
+def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
+def : Pat<(or i64:$a, (i64 simm13:$b)), (ORri $a, (as_i32imm $b))>;
+def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>;
+
+def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>;
+def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
+
+def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Loads and Stores.
+//===----------------------------------------------------------------------===//
+//
+// All the 32-bit loads and stores are available. The extending loads are sign
+// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits
+// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned
+// Word).
+//
+// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads.
+
+let Predicates = [Is64Bit] in {
+
+// 64-bit loads.
+def LDXrr : F3_1<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMrr:$addr),
+ "ldx [$addr], $dst",
+ [(set i64:$dst, (load ADDRrr:$addr))]>;
+def LDXri : F3_2<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "ldx [$addr], $dst",
+ [(set i64:$dst, (load ADDRri:$addr))]>;
+
+// Extending loads to i64.
+def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+
+// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
+def LDSWrr : F3_1<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMrr:$addr),
+ "ldsw [$addr], $dst",
+ [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>;
+def LDSWri : F3_2<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "ldsw [$addr], $dst",
+ [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>;
+
+// 64-bit stores.
+def STXrr : F3_1<3, 0b001110,
+ (outs), (ins MEMrr:$addr, I64Regs:$src),
+ "stx $src, [$addr]",
+ [(store i64:$src, ADDRrr:$addr)]>;
+def STXri : F3_2<3, 0b001110,
+ (outs), (ins MEMri:$addr, I64Regs:$src),
+ "stx $src, [$addr]",
+ [(store i64:$src, ADDRri:$addr)]>;
+
+// Truncating stores from i64 are identical to the i32 stores.
+def : Pat<(truncstorei8 i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei8 i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri ADDRri:$addr, $src)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Conditionals.
+//===----------------------------------------------------------------------===//
+//
+// Flag-setting instructions like subcc and addcc set both icc and xcc flags.
+// The icc flags correspond to the 32-bit result, and the xcc are for the
+// full 64-bit result.
+//
+// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// 64-bit compares. See LowerBR_CC.
+
+let Predicates = [Is64Bit] in {
+
+let Uses = [ICC] in
+def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "bp$cc %xcc, $dst",
+ [(SPbrxcc bb:$dst, imm:$cc)]>;
+
+// Conditional moves on %xcc.
+let Uses = [ICC], Constraints = "$f = $rd" in {
+def MOVXCCrr : Pseudo<(outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %xcc, $rs2, $rd",
+ [(set i32:$rd,
+ (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>;
+def MOVXCCri : Pseudo<(outs IntRegs:$rd),
+ (ins i32imm:$i, IntRegs:$f, CCOp:$cond),
+ "mov$cond %xcc, $i, $rd",
+ [(set i32:$rd,
+ (SPselecticc simm11:$i, i32:$f, imm:$cond))]>;
+} // Uses, Constraints
+
+def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
+ (MOVXCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
+ (MOVXCCri (as_i32imm $t), $f, imm:$cond)>;
+
+} // Predicates = [Is64Bit]
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 000000000000..f1018569153c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,151 @@
+//===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : F2<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+// Shift by register rs2.
+class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bit x = xVal; // 1 for 64-bit shifts.
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12} = x; // extended registers.
+ let Inst{4-0} = rs2;
+}
+
+// Shift by immediate.
+class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bit x = xVal; // 1 for 64-bit shifts.
+ bits<6> shcnt; // shcnt32 / shcnt64.
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12} = x; // extended registers.
+ let Inst{5-0} = shcnt;
+}
+
+// Define rr and ri shift instructions with patterns.
+multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
+ ValueType VT, RegisterClass RC> {
+ def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+ !strconcat(OpcStr, " $rs, $rs2, $rd"),
+ [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
+ def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+ !strconcat(OpcStr, " $rs, $shcnt, $rd"),
+ [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 000000000000..39d7329f2663
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,357 @@
+//===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "Sparc.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SparcGenInstrInfo.inc"
+
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+static bool IsIntegerCC(unsigned CC)
+{
+ return (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+ switch(CC) {
+ case SPCC::ICC_NE: return SPCC::ICC_E;
+ case SPCC::ICC_E: return SPCC::ICC_NE;
+ case SPCC::ICC_G: return SPCC::ICC_LE;
+ case SPCC::ICC_LE: return SPCC::ICC_G;
+ case SPCC::ICC_GE: return SPCC::ICC_L;
+ case SPCC::ICC_L: return SPCC::ICC_GE;
+ case SPCC::ICC_GU: return SPCC::ICC_LEU;
+ case SPCC::ICC_LEU: return SPCC::ICC_GU;
+ case SPCC::ICC_CC: return SPCC::ICC_CS;
+ case SPCC::ICC_CS: return SPCC::ICC_CC;
+ case SPCC::ICC_POS: return SPCC::ICC_NEG;
+ case SPCC::ICC_NEG: return SPCC::ICC_POS;
+ case SPCC::ICC_VC: return SPCC::ICC_VS;
+ case SPCC::ICC_VS: return SPCC::ICC_VC;
+
+ case SPCC::FCC_U: return SPCC::FCC_O;
+ case SPCC::FCC_O: return SPCC::FCC_U;
+ case SPCC::FCC_G: return SPCC::FCC_LE;
+ case SPCC::FCC_LE: return SPCC::FCC_G;
+ case SPCC::FCC_UG: return SPCC::FCC_ULE;
+ case SPCC::FCC_ULE: return SPCC::FCC_UG;
+ case SPCC::FCC_L: return SPCC::FCC_GE;
+ case SPCC::FCC_GE: return SPCC::FCC_L;
+ case SPCC::FCC_UL: return SPCC::FCC_UGE;
+ case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_LG: return SPCC::FCC_UE;
+ case SPCC::FCC_UE: return SPCC::FCC_LG;
+ case SPCC::FCC_NE: return SPCC::FCC_E;
+ case SPCC::FCC_E: return SPCC::FCC_NE;
+ }
+ llvm_unreachable("Invalid cond code");
+}
+
+MachineInstr *
+SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc dl) const {
+ MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ //When we see a non-terminator, we are done
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ //Terminator is not a branch
+ if (!I->isBranch())
+ return true;
+
+ //Handle Unconditional branches
+ if (I->getOpcode() == SP::BA) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ unsigned Opcode = I->getOpcode();
+ if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+ return true; //Unknown Opcode
+
+ SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+
+ //Transform the code
+ //
+ // brCC L1
+ // ba L2
+ // L1:
+ // ..
+ // L2:
+ //
+ // into
+ //
+ // brnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+ .addMBB(TargetBB);
+
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ //FIXME: Handle subsequent conditional branches
+ //For now, we can't handle multiple conditional branches
+ return true;
+ }
+ return false;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Sparc branch conditions should have one component!");
+
+ if (Cond.empty()) {
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+ return 1;
+ }
+
+ //Conditional branch
+ unsigned CC = Cond[0].getImm();
+
+ if (IsIntegerCC(CC))
+ BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+ else
+ BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+ return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ if (I->getOpcode() != SP::BA
+ && I->getOpcode() != SP::BCOND
+ && I->getOpcode() != SP::FBCOND)
+ break; // Not a branch
+
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
+}
+
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == &SP::IntRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == &SP::FPRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == &SP::DFPRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ llvm_unreachable("Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == &SP::IntRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == &SP::FPRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == &SP::DFPRegsRegClass)
+ BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ llvm_unreachable("Can't load this register from stack slot");
+}
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+ SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+ unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+ DebugLoc dl;
+
+ BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+ SparcFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 000000000000..204f69855c23
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,107 @@
+//===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public SparcGenInstrInfo {
+ const SparcRegisterInfo RI;
+ const SparcSubtarget& Subtarget;
+public:
+ explicit SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// emitFrameIndexDebugValue - Emit a target-dependent form of
+ /// DBG_VALUE encoding the address of a frame index.
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc dl) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const ;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 000000000000..5ff439583c5c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,822 @@
+//===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// True when generating 32-bit code.
+def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+
+// True when generating 64-bit code. This also implies HasV9.
+def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
+
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+ MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+ (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<iPTR> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def MEMri : Operand<iPTR> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops ptr_rc, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
+ [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
+
+def getPCX : Operand<i32> {
+ let PrintMethod = "printGetPCX";
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern>;
+
+// GETPCX for PIC
+let Defs = [O7] in {
+ def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 1, mayStore = 1 in {
+ let rd = 0, rs1 = 0, rs2 = 0 in
+ def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+ "flushw",
+ [(flushw)]>, Requires<[HasV9]>;
+ let rd = 0, rs1 = 1, simm13 = 3 in
+ def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+ "ta 3",
+ [(flushw)]>;
+}
+
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set f64:$dst, (fneg f64:$src))]>;
+ def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set f64:$dst, (fabs f64:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence. This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+ // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in {
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
+
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
+
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
+ let rd = O7.Num, rs1 = G0.Num in
+ def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+ let rd = I7.Num, rs1 = G0.Num in
+ def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %i7+$val", []>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set i32:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set i32:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set f32:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set f32:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set f64:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set f64:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 i32:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 i32:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store i32:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store i32:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (outs), (ins MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store f32:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (outs), (ins MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store f32:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (outs), (ins MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store f64:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (outs), (ins MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store f64:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (outs IntRegs:$dst), (ins i32imm:$src),
+ "sethi $src, $dst",
+ [(set i32:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set i32:$dst, ADDRri:$addr)]>;
+
+let Defs = [ICC] in
+ defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+
+let Uses = [ICC] in
+ defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+let Uses = [ICC] in
+ defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+
+let Defs = [ICC] in
+ defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+let Uses = [ICC], Defs = [ICC] in
+ def SUBXCCrr: F3_1<2, 0b011100,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+ defm UMUL : F3_12np<"umul", 0b001010>;
+ defm SMUL : F3_12 <"smul", 0b001011, mul>;
+}
+
+// Section B.19 - Divide Instructions, p. 115
+let Defs = [Y] in {
+ defm UDIV : F3_12np<"udiv", 0b001110>;
+ defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ins brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [ICC] in
+ def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [FCC] in
+ def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O6],
+ hasDelaySlot = 1, isCall = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ ICC, FCC, Y] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (outs), (ins MEMrr:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+let Uses = [Y] in
+ def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+let Defs = [Y] in {
+ def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+ def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+}
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstod $src, $dst",
+ [(set f64:$dst, (fextend f32:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set f32:$dst, (fround f64:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set f32:$dst, (fneg f32:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fabss $src, $dst",
+ [(set f32:$dst, (fabs f32:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set f32:$dst, (fsqrt f32:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set f64:$dst, (fsqrt f64:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set f64:$dst, (fmul (fextend f32:$src1),
+ (fextend f32:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+let Defs = [FCC] in {
+ def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (outs), (ins FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc f32:$src1, f32:$src2)]>;
+ def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc f64:$src1, f64:$src2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ let Uses = [ICC] in {
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>;
+ }
+
+ let Uses = [ICC] in {
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set f32:$dst,
+ (SPselecticc f32:$F, f32:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>;
+ }
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set f64:$dst, (fneg f64:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set f64:$dst, (fabs f64:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop i32:$src),
+ (POPCrr (SLLri $src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri (i32 G0), imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc i32:$b, i32:$c),
+ (SUBCCrr $b, $c)>;
+def : Pat<(subc i32:$b, simm13:$val),
+ (SUBCCri $b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
+ (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
+ (ADDri $r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+include "SparcInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..e7442826e78b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void SparcMachineFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 000000000000..90c27a4459a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,48 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ class SparcMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+ private:
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameOffset - Frame offset to start of varargs area.
+ int VarArgsFrameOffset;
+
+ /// SRetReturnReg - Holds the virtual register into which the sret
+ /// argument is passed.
+ unsigned SRetReturnReg;
+ public:
+ SparcMachineFunctionInfo()
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+ explicit SparcMachineFunctionInfo(MachineFunction &MF)
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+ void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 000000000000..db9b30eb4330
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,110 @@
+//===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcRegisterInfo.h"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
+}
+
+const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const uint16_t CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ Reserved.set(SP::G1);
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+const TargetRegisterClass*
+SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
+}
+
+void
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(FIOperandNum + 1).getImm();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return SP::I6;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 000000000000..f91df5398953
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,59 @@
+//===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 000000000000..497e7c5d5612
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,170 @@
+//===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+class SparcCtrlReg<string n>: Register<n> {
+ let Namespace = "SP";
+}
+
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
+ let CoveredBySubRegs = 1;
+}
+
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
+def FCC : SparcCtrlReg<"FCC">;
+
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[72]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[73]>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[74]>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[75]>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[76]>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[77]>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[78]>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[79]>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[80]>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[81]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[82]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[83]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[84]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+// This register class should not be used to hold i64 values, use the I64Regs
+// register class for that. The i64 type is included here to allow i64 patterns
+// using the integer instructions.
+def IntRegs : RegisterClass<"SP", [i32, i64], 32,
+ (add L0, L1, L2, L3, L4, L5, L6,
+ L7, I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ )>;
+
+// Register class for 64-bit mode, with a 64-bit spill slot size.
+// These are the same as the 32-bit registers, so TableGen will consider this
+// to be a sub-class of IntRegs. That works out because requiring a 64-bit
+// spill slot is a stricter constraint than only requiring a 32-bit spill slot.
+def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
+
+// Floating point register classes.
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..190c575a52de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcTargetMachine.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 000000000000..dcd42037253d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcTargetMachine;
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+ ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 000000000000..e5b2aeb1bb85
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,46 @@
+//===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+void SparcSubtarget::anchor() { }
+
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit) :
+ SparcGenSubtargetInfo(TT, CPU, FS),
+ IsV9(false),
+ V8DeprecatedInsts(false),
+ IsVIS(false),
+ Is64Bit(is64Bit) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+ if (is64Bit)
+ CPUName = "v9";
+ else
+ CPUName = "v8";
+ }
+ IsV9 = CPUName == "v9";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 000000000000..a81931b34aa2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,59 @@
+//===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class SparcSubtarget : public SparcGenSubtargetInfo {
+ virtual void anchor();
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+ bool Is64Bit;
+
+public:
+ SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64bit);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool is64Bit() const { return Is64Bit; }
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit()) {
+ p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+ } else {
+ p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+ }
+ return std::string(p);
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 000000000000..60bceb708fbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,98 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "Sparc.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSparcTarget() {
+ // Register the target.
+ RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+ RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64bit),
+ DL(Subtarget.getDataLayout()),
+ InstrInfo(Subtarget),
+ TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) {
+}
+
+namespace {
+/// Sparc Code Generator Pass Configuration Options.
+class SparcPassConfig : public TargetPassConfig {
+public:
+ SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SparcTargetMachine &getSparcTargetMachine() const {
+ return getTM<SparcTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SparcPassConfig(this, PM);
+}
+
+bool SparcPassConfig::addInstSelector() {
+ addPass(createSparcISelDag(getSparcTargetMachine()));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcPassConfig::addPreEmitPass(){
+ addPass(createSparcFPMoverPass(getSparcTargetMachine()));
+ addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
+ return true;
+}
+
+void SparcV8TargetMachine::anchor() { }
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+}
+
+void SparcV9TargetMachine::anchor() { }
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 000000000000..081075de2dc8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,87 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "SparcFrameLowering.h"
+#include "SparcISelLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ SparcSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ SparcInstrInfo InstrInfo;
+ SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
+ SparcFrameLowering FrameLowering;
+public:
+ SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const SparcTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+ virtual void anchor();
+public:
+ SparcV8TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+ virtual void anchor();
+public:
+ SparcV9TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 000000000000..bb714632349a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
+
+extern "C" void LLVMInitializeSparcTargetInfo() {
+ RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
new file mode 100644
index 000000000000..9a78ebc3facb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -0,0 +1,116 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMTarget.a, which implements target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeTarget(PassRegistry &Registry) {
+ initializeDataLayoutPass(Registry);
+ initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+ initializeTarget(*unwrap(R));
+}
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+ return wrap(new DataLayout(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new DataLayout(*unwrap(TD)));
+}
+
+void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
+ LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+ std::string StringRep = unwrap(TD)->getStringRepresentation();
+ return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+ return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+ return unwrap(TD)->getPointerSize(0);
+}
+
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return unwrap(TD)->getPointerSize(AS);
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+}
+
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+ LLVMValueRef GlobalVar) {
+ return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned long long Offset) {
+ StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned Element) {
+ StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+ delete unwrap(TD);
+}
diff --git a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 000000000000..64bd56f6e7df
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo() {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
+
+unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+ const ValueName *ValName = F->getValueName();
+ if (!ValName)
+ return 0;
+ return lookupName(ValName->getKeyData(), ValName->getKeyLength());
+}
diff --git a/contrib/llvm/lib/Target/TargetJITInfo.cpp b/contrib/llvm/lib/Target/TargetJITInfo.cpp
new file mode 100644
index 000000000000..aafedf8749b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetJITInfo.cpp
@@ -0,0 +1,14 @@
+//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetJITInfo.h"
+
+using namespace llvm;
+
+void TargetJITInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 000000000000..ee88ce77c09f
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,645 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+void TargetLibraryInfo::anchor() { }
+
+const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
+ {
+ "_IO_getc",
+ "_IO_putc",
+ "_ZdaPv",
+ "_ZdlPv",
+ "_Znaj",
+ "_ZnajRKSt9nothrow_t",
+ "_Znam",
+ "_ZnamRKSt9nothrow_t",
+ "_Znwj",
+ "_ZnwjRKSt9nothrow_t",
+ "_Znwm",
+ "_ZnwmRKSt9nothrow_t",
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release",
+ "__isoc99_scanf",
+ "__isoc99_sscanf",
+ "__memcpy_chk",
+ "__strdup",
+ "__strndup",
+ "__strtok_r",
+ "abs",
+ "access",
+ "acos",
+ "acosf",
+ "acosh",
+ "acoshf",
+ "acoshl",
+ "acosl",
+ "asin",
+ "asinf",
+ "asinh",
+ "asinhf",
+ "asinhl",
+ "asinl",
+ "atan",
+ "atan2",
+ "atan2f",
+ "atan2l",
+ "atanf",
+ "atanh",
+ "atanhf",
+ "atanhl",
+ "atanl",
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "bcmp",
+ "bcopy",
+ "bzero",
+ "calloc",
+ "cbrt",
+ "cbrtf",
+ "cbrtl",
+ "ceil",
+ "ceilf",
+ "ceill",
+ "chmod",
+ "chown",
+ "clearerr",
+ "closedir",
+ "copysign",
+ "copysignf",
+ "copysignl",
+ "cos",
+ "cosf",
+ "cosh",
+ "coshf",
+ "coshl",
+ "cosl",
+ "ctermid",
+ "exp",
+ "exp10",
+ "exp10f",
+ "exp10l",
+ "exp2",
+ "exp2f",
+ "exp2l",
+ "expf",
+ "expl",
+ "expm1",
+ "expm1f",
+ "expm1l",
+ "fabs",
+ "fabsf",
+ "fabsl",
+ "fclose",
+ "fdopen",
+ "feof",
+ "ferror",
+ "fflush",
+ "ffs",
+ "ffsl",
+ "ffsll",
+ "fgetc",
+ "fgetpos",
+ "fgets",
+ "fileno",
+ "fiprintf",
+ "flockfile",
+ "floor",
+ "floorf",
+ "floorl",
+ "fmod",
+ "fmodf",
+ "fmodl",
+ "fopen",
+ "fopen64",
+ "fprintf",
+ "fputc",
+ "fputs",
+ "fread",
+ "free",
+ "frexp",
+ "frexpf",
+ "frexpl",
+ "fscanf",
+ "fseek",
+ "fseeko",
+ "fseeko64",
+ "fsetpos",
+ "fstat",
+ "fstat64",
+ "fstatvfs",
+ "fstatvfs64",
+ "ftell",
+ "ftello",
+ "ftello64",
+ "ftrylockfile",
+ "funlockfile",
+ "fwrite",
+ "getc",
+ "getc_unlocked",
+ "getchar",
+ "getenv",
+ "getitimer",
+ "getlogin_r",
+ "getpwnam",
+ "gets",
+ "htonl",
+ "htons",
+ "iprintf",
+ "isascii",
+ "isdigit",
+ "labs",
+ "lchown",
+ "llabs",
+ "log",
+ "log10",
+ "log10f",
+ "log10l",
+ "log1p",
+ "log1pf",
+ "log1pl",
+ "log2",
+ "log2f",
+ "log2l",
+ "logb",
+ "logbf",
+ "logbl",
+ "logf",
+ "logl",
+ "lstat",
+ "lstat64",
+ "malloc",
+ "memalign",
+ "memccpy",
+ "memchr",
+ "memcmp",
+ "memcpy",
+ "memmove",
+ "memrchr",
+ "memset",
+ "memset_pattern16",
+ "mkdir",
+ "mktime",
+ "modf",
+ "modff",
+ "modfl",
+ "nearbyint",
+ "nearbyintf",
+ "nearbyintl",
+ "ntohl",
+ "ntohs",
+ "open",
+ "open64",
+ "opendir",
+ "pclose",
+ "perror",
+ "popen",
+ "posix_memalign",
+ "pow",
+ "powf",
+ "powl",
+ "pread",
+ "printf",
+ "putc",
+ "putchar",
+ "puts",
+ "pwrite",
+ "qsort",
+ "read",
+ "readlink",
+ "realloc",
+ "reallocf",
+ "realpath",
+ "remove",
+ "rename",
+ "rewind",
+ "rint",
+ "rintf",
+ "rintl",
+ "rmdir",
+ "round",
+ "roundf",
+ "roundl",
+ "scanf",
+ "setbuf",
+ "setitimer",
+ "setvbuf",
+ "sin",
+ "sinf",
+ "sinh",
+ "sinhf",
+ "sinhl",
+ "sinl",
+ "siprintf",
+ "snprintf",
+ "sprintf",
+ "sqrt",
+ "sqrtf",
+ "sqrtl",
+ "sscanf",
+ "stat",
+ "stat64",
+ "statvfs",
+ "statvfs64",
+ "stpcpy",
+ "stpncpy",
+ "strcasecmp",
+ "strcat",
+ "strchr",
+ "strcmp",
+ "strcoll",
+ "strcpy",
+ "strcspn",
+ "strdup",
+ "strlen",
+ "strncasecmp",
+ "strncat",
+ "strncmp",
+ "strncpy",
+ "strndup",
+ "strnlen",
+ "strpbrk",
+ "strrchr",
+ "strspn",
+ "strstr",
+ "strtod",
+ "strtof",
+ "strtok",
+ "strtok_r",
+ "strtol",
+ "strtold",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strxfrm",
+ "system",
+ "tan",
+ "tanf",
+ "tanh",
+ "tanhf",
+ "tanhl",
+ "tanl",
+ "times",
+ "tmpfile",
+ "tmpfile64",
+ "toascii",
+ "trunc",
+ "truncf",
+ "truncl",
+ "uname",
+ "ungetc",
+ "unlink",
+ "unsetenv",
+ "utime",
+ "utimes",
+ "valloc",
+ "vfprintf",
+ "vfscanf",
+ "vprintf",
+ "vscanf",
+ "vsnprintf",
+ "vsprintf",
+ "vsscanf",
+ "write"
+ };
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T,
+ const char **StandardNames) {
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfo function names must be sorted");
+ }
+#endif // !NDEBUG
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.getOS() == Triple::IOS) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ }
+
+ if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+ !T.isMacOSXVersionLT(10, 7)) {
+ // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+ // we don't care about) have two versions; on recent OSX, the one we want
+ // has a $UNIX2003 suffix. The two implementations are identical except
+ // for the return value in some edge cases. However, we don't want to
+ // generate code that depends on the old symbols.
+ TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ }
+
+ // iprintf and friends are only available on XCore and TCE.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
+
+ if (T.getOS() == Triple::Win32) {
+ // Win32 does not support long double
+ TLI.setUnavailable(LibFunc::acosl);
+ TLI.setUnavailable(LibFunc::asinl);
+ TLI.setUnavailable(LibFunc::atanl);
+ TLI.setUnavailable(LibFunc::atan2l);
+ TLI.setUnavailable(LibFunc::ceill);
+ TLI.setUnavailable(LibFunc::copysignl);
+ TLI.setUnavailable(LibFunc::cosl);
+ TLI.setUnavailable(LibFunc::coshl);
+ TLI.setUnavailable(LibFunc::expl);
+ TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
+ TLI.setUnavailable(LibFunc::fabsl);
+ TLI.setUnavailable(LibFunc::floorl);
+ TLI.setUnavailable(LibFunc::fmodl);
+ TLI.setUnavailable(LibFunc::frexpl);
+ TLI.setUnavailable(LibFunc::logl);
+ TLI.setUnavailable(LibFunc::modfl);
+ TLI.setUnavailable(LibFunc::powl);
+ TLI.setUnavailable(LibFunc::sinl);
+ TLI.setUnavailable(LibFunc::sinhl);
+ TLI.setUnavailable(LibFunc::sqrtl);
+ TLI.setUnavailable(LibFunc::tanl);
+ TLI.setUnavailable(LibFunc::tanhl);
+
+ // Win32 only has C89 math
+ TLI.setUnavailable(LibFunc::acosh);
+ TLI.setUnavailable(LibFunc::acoshf);
+ TLI.setUnavailable(LibFunc::acoshl);
+ TLI.setUnavailable(LibFunc::asinh);
+ TLI.setUnavailable(LibFunc::asinhf);
+ TLI.setUnavailable(LibFunc::asinhl);
+ TLI.setUnavailable(LibFunc::atanh);
+ TLI.setUnavailable(LibFunc::atanhf);
+ TLI.setUnavailable(LibFunc::atanhl);
+ TLI.setUnavailable(LibFunc::cbrt);
+ TLI.setUnavailable(LibFunc::cbrtf);
+ TLI.setUnavailable(LibFunc::cbrtl);
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc::exp10l);
+ TLI.setUnavailable(LibFunc::exp2);
+ TLI.setUnavailable(LibFunc::exp2f);
+ TLI.setUnavailable(LibFunc::exp2l);
+ TLI.setUnavailable(LibFunc::expm1);
+ TLI.setUnavailable(LibFunc::expm1f);
+ TLI.setUnavailable(LibFunc::expm1l);
+ TLI.setUnavailable(LibFunc::log2);
+ TLI.setUnavailable(LibFunc::log2f);
+ TLI.setUnavailable(LibFunc::log2l);
+ TLI.setUnavailable(LibFunc::log1p);
+ TLI.setUnavailable(LibFunc::log1pf);
+ TLI.setUnavailable(LibFunc::log1pl);
+ TLI.setUnavailable(LibFunc::logb);
+ TLI.setUnavailable(LibFunc::logbf);
+ TLI.setUnavailable(LibFunc::logbl);
+ TLI.setUnavailable(LibFunc::nearbyint);
+ TLI.setUnavailable(LibFunc::nearbyintf);
+ TLI.setUnavailable(LibFunc::nearbyintl);
+ TLI.setUnavailable(LibFunc::rint);
+ TLI.setUnavailable(LibFunc::rintf);
+ TLI.setUnavailable(LibFunc::rintl);
+ TLI.setUnavailable(LibFunc::round);
+ TLI.setUnavailable(LibFunc::roundf);
+ TLI.setUnavailable(LibFunc::roundl);
+ TLI.setUnavailable(LibFunc::trunc);
+ TLI.setUnavailable(LibFunc::truncf);
+ TLI.setUnavailable(LibFunc::truncl);
+
+ // Win32 provides some C99 math with mangled names
+ TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+
+ if (T.getArch() == Triple::x86) {
+ // Win32 on x86 implements single-precision math functions as macros
+ TLI.setUnavailable(LibFunc::acosf);
+ TLI.setUnavailable(LibFunc::asinf);
+ TLI.setUnavailable(LibFunc::atanf);
+ TLI.setUnavailable(LibFunc::atan2f);
+ TLI.setUnavailable(LibFunc::ceilf);
+ TLI.setUnavailable(LibFunc::copysignf);
+ TLI.setUnavailable(LibFunc::cosf);
+ TLI.setUnavailable(LibFunc::coshf);
+ TLI.setUnavailable(LibFunc::expf);
+ TLI.setUnavailable(LibFunc::floorf);
+ TLI.setUnavailable(LibFunc::fmodf);
+ TLI.setUnavailable(LibFunc::logf);
+ TLI.setUnavailable(LibFunc::powf);
+ TLI.setUnavailable(LibFunc::sinf);
+ TLI.setUnavailable(LibFunc::sinhf);
+ TLI.setUnavailable(LibFunc::sqrtf);
+ TLI.setUnavailable(LibFunc::tanf);
+ TLI.setUnavailable(LibFunc::tanhf);
+ }
+
+ // Win32 does *not* provide provide these functions, but they are
+ // generally available on POSIX-compliant systems:
+ TLI.setUnavailable(LibFunc::access);
+ TLI.setUnavailable(LibFunc::bcmp);
+ TLI.setUnavailable(LibFunc::bcopy);
+ TLI.setUnavailable(LibFunc::bzero);
+ TLI.setUnavailable(LibFunc::chmod);
+ TLI.setUnavailable(LibFunc::chown);
+ TLI.setUnavailable(LibFunc::closedir);
+ TLI.setUnavailable(LibFunc::ctermid);
+ TLI.setUnavailable(LibFunc::fdopen);
+ TLI.setUnavailable(LibFunc::ffs);
+ TLI.setUnavailable(LibFunc::fileno);
+ TLI.setUnavailable(LibFunc::flockfile);
+ TLI.setUnavailable(LibFunc::fseeko);
+ TLI.setUnavailable(LibFunc::fstat);
+ TLI.setUnavailable(LibFunc::fstatvfs);
+ TLI.setUnavailable(LibFunc::ftello);
+ TLI.setUnavailable(LibFunc::ftrylockfile);
+ TLI.setUnavailable(LibFunc::funlockfile);
+ TLI.setUnavailable(LibFunc::getc_unlocked);
+ TLI.setUnavailable(LibFunc::getitimer);
+ TLI.setUnavailable(LibFunc::getlogin_r);
+ TLI.setUnavailable(LibFunc::getpwnam);
+ TLI.setUnavailable(LibFunc::htonl);
+ TLI.setUnavailable(LibFunc::htons);
+ TLI.setUnavailable(LibFunc::lchown);
+ TLI.setUnavailable(LibFunc::lstat);
+ TLI.setUnavailable(LibFunc::memccpy);
+ TLI.setUnavailable(LibFunc::mkdir);
+ TLI.setUnavailable(LibFunc::ntohl);
+ TLI.setUnavailable(LibFunc::ntohs);
+ TLI.setUnavailable(LibFunc::open);
+ TLI.setUnavailable(LibFunc::opendir);
+ TLI.setUnavailable(LibFunc::pclose);
+ TLI.setUnavailable(LibFunc::popen);
+ TLI.setUnavailable(LibFunc::pread);
+ TLI.setUnavailable(LibFunc::pwrite);
+ TLI.setUnavailable(LibFunc::read);
+ TLI.setUnavailable(LibFunc::readlink);
+ TLI.setUnavailable(LibFunc::realpath);
+ TLI.setUnavailable(LibFunc::rmdir);
+ TLI.setUnavailable(LibFunc::setitimer);
+ TLI.setUnavailable(LibFunc::stat);
+ TLI.setUnavailable(LibFunc::statvfs);
+ TLI.setUnavailable(LibFunc::stpcpy);
+ TLI.setUnavailable(LibFunc::stpncpy);
+ TLI.setUnavailable(LibFunc::strcasecmp);
+ TLI.setUnavailable(LibFunc::strncasecmp);
+ TLI.setUnavailable(LibFunc::times);
+ TLI.setUnavailable(LibFunc::uname);
+ TLI.setUnavailable(LibFunc::unlink);
+ TLI.setUnavailable(LibFunc::unsetenv);
+ TLI.setUnavailable(LibFunc::utime);
+ TLI.setUnavailable(LibFunc::utimes);
+ TLI.setUnavailable(LibFunc::write);
+
+ // Win32 does *not* provide provide these functions, but they are
+ // specified by C99:
+ TLI.setUnavailable(LibFunc::atoll);
+ TLI.setUnavailable(LibFunc::frexpf);
+ TLI.setUnavailable(LibFunc::llabs);
+ }
+
+ // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+ // Linux (GLIBC):
+ // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+ switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsl);
+ }
+
+ // ffsll is available on at least FreeBSD and Linux (GLIBC):
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+ switch (T.getOS()) {
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsll);
+ }
+
+ // The following functions are available on at least Linux:
+ if (T.getOS() != Triple::Linux) {
+ TLI.setUnavailable(LibFunc::dunder_strdup);
+ TLI.setUnavailable(LibFunc::dunder_strtok_r);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc::under_IO_getc);
+ TLI.setUnavailable(LibFunc::under_IO_putc);
+ TLI.setUnavailable(LibFunc::memalign);
+ TLI.setUnavailable(LibFunc::fopen64);
+ TLI.setUnavailable(LibFunc::fseeko64);
+ TLI.setUnavailable(LibFunc::fstat64);
+ TLI.setUnavailable(LibFunc::fstatvfs64);
+ TLI.setUnavailable(LibFunc::ftello64);
+ TLI.setUnavailable(LibFunc::lstat64);
+ TLI.setUnavailable(LibFunc::open64);
+ TLI.setUnavailable(LibFunc::stat64);
+ TLI.setUnavailable(LibFunc::statvfs64);
+ TLI.setUnavailable(LibFunc::tmpfile64);
+ }
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple(), StandardNames);
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T, StandardNames);
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
+ : ImmutablePass(ID) {
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ CustomNames = TLI.CustomNames;
+}
+
+namespace {
+struct StringComparator {
+ /// Compare two strings and return true if LHS is lexicographically less than
+ /// RHS. Requires that RHS doesn't contain any zero bytes.
+ bool operator()(const char *LHS, StringRef RHS) const {
+ // Compare prefixes with strncmp. If prefixes match we know that LHS is
+ // greater or equal to RHS as RHS can't contain any '\0'.
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ }
+
+ // Provided for compatibility with MSVC's debug mode.
+ bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
+ bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
+};
+}
+
+bool TargetLibraryInfo::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char **Start = &StandardNames[0];
+ const char **End = &StandardNames[LibFunc::NumLibFuncs];
+
+ // Filter out empty names and names containing null bytes, those can't be in
+ // our table.
+ if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ return false;
+
+ // Check for \01 prefix that is used to mangle __asm declarations and
+ // strip it if present.
+ if (funcName.front() == '\01')
+ funcName = funcName.substr(1);
+ const char **I = std::lower_bound(Start, End, funcName, StringComparator());
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 000000000000..f5121e34f77f
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,319 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Code
+//===----------------------------------------------------------------------===//
+
+/// Initialize - this method must be called before any actual lowering is
+/// done. This specifies the current context for codegen, and gives the
+/// lowering implementations a chance to set up their default sections.
+void TargetLoweringObjectFile::Initialize(MCContext &ctx,
+ const TargetMachine &TM) {
+ Ctx = &ctx;
+ InitMCObjectFileInfo(TM.getTargetTriple(),
+ TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
+ const Constant *C = GV->getInitializer();
+
+ // Must have zero initializer.
+ if (!C->isNullValue())
+ return false;
+
+ // Leave constant zeros in readonly constant sections, so they can be shared.
+ if (GV->isConstant())
+ return false;
+
+ // If the global has an explicit section specified, don't put it in BSS.
+ if (!GV->getSection().empty())
+ return false;
+
+ // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+ if (NoZerosInBSS)
+ return false;
+
+ // Otherwise, put it in BSS!
+ return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it. Note that this is more general
+/// than ConstantDataSequential::isString because we allow 2 & 4 byte strings.
+static bool IsNullTerminatedString(const Constant *C) {
+ // First check: is we have constant array terminated with zero
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ unsigned NumElts = CDS->getNumElements();
+ assert(NumElts != 0 && "Can't have an empty CDS");
+
+ if (CDS->getElementAsInteger(NumElts-1) != 0)
+ return false; // Not null terminated.
+
+ // Verify that the null doesn't occur anywhere else in the string.
+ for (unsigned i = 0; i != NumElts-1; ++i)
+ if (CDS->getElementAsInteger(i) == 0)
+ return false;
+ return true;
+ }
+
+ // Another possibility: [1 x i8] zeroinitializer
+ if (isa<ConstantAggregateZero>(C))
+ return cast<ArrayType>(C->getType())->getNumElements() == 1;
+
+ return false;
+}
+
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+}
+
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable. Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler. The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+ const TargetMachine &TM){
+ assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+ "Can only be used for global definitions");
+
+ Reloc::Model ReloModel = TM.getRelocationModel();
+
+ // Early exit - functions should be always in text sections.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (GVar == 0)
+ return SectionKind::getText();
+
+ // Handle thread-local data first.
+ if (GVar->isThreadLocal()) {
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
+ return SectionKind::getThreadBSS();
+ return SectionKind::getThreadData();
+ }
+
+ // Variables with common linkage always get classified as common.
+ if (GVar->hasCommonLinkage())
+ return SectionKind::getCommon();
+
+ // Variable can be easily put to BSS section.
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
+ if (GVar->hasLocalLinkage())
+ return SectionKind::getBSSLocal();
+ else if (GVar->hasExternalLinkage())
+ return SectionKind::getBSSExtern();
+ return SectionKind::getBSS();
+ }
+
+ const Constant *C = GVar->getInitializer();
+
+ // If the global is marked constant, we can put it into a mergable section,
+ // a mergable string section, or general .data if it contains relocations.
+ if (GVar->isConstant()) {
+ // If the initializer for the global contains something that requires a
+ // relocation, then we may have to drop this into a writable data section
+ // even though it is marked const.
+ switch (C->getRelocationInfo()) {
+ case Constant::NoRelocation:
+ // If the global is required to have a unique address, it can't be put
+ // into a mergable section: just drop it into the general read-only
+ // section instead.
+ if (!GVar->hasUnnamedAddr())
+ return SectionKind::getReadOnly();
+
+ // If initializer is a null-terminated string, put it in a "cstring"
+ // section of the right width.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ if (IntegerType *ITy =
+ dyn_cast<IntegerType>(ATy->getElementType())) {
+ if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+ ITy->getBitWidth() == 32) &&
+ IsNullTerminatedString(C)) {
+ if (ITy->getBitWidth() == 8)
+ return SectionKind::getMergeable1ByteCString();
+ if (ITy->getBitWidth() == 16)
+ return SectionKind::getMergeable2ByteCString();
+
+ assert(ITy->getBitWidth() == 32 && "Unknown width");
+ return SectionKind::getMergeable4ByteCString();
+ }
+ }
+ }
+
+ // Otherwise, just drop it into a mergable constant section. If we have
+ // a section for this size, use it, otherwise use the arbitrary sized
+ // mergable section.
+ switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+ case 4: return SectionKind::getMergeableConst4();
+ case 8: return SectionKind::getMergeableConst8();
+ case 16: return SectionKind::getMergeableConst16();
+ default: return SectionKind::getMergeableConst();
+ }
+
+ case Constant::LocalRelocation:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel.local section.
+ return SectionKind::getReadOnlyWithRelLocal();
+
+ case Constant::GlobalRelocations:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel section.
+ return SectionKind::getReadOnlyWithRel();
+ }
+ }
+
+ // Okay, this isn't a constant. If the initializer for the global is going
+ // to require a runtime relocation by the dynamic linker, put it into a more
+ // specific section to improve startup time of the app. This coalesces these
+ // globals together onto fewer pages, improving the locality of the dynamic
+ // linker.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getDataNoRel();
+
+ switch (C->getRelocationInfo()) {
+ case Constant::NoRelocation:
+ return SectionKind::getDataNoRel();
+ case Constant::LocalRelocation:
+ return SectionKind::getDataRelLocal();
+ case Constant::GlobalRelocations:
+ return SectionKind::getDataRel();
+ }
+ llvm_unreachable("Invalid relocation");
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition. This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+ const TargetMachine &TM) const {
+ // Select section name.
+ if (GV->hasSection())
+ return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+ // Use default section depending on the 'type' of global
+ return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const{
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ if (Kind.isText())
+ return getTextSection();
+
+ if (Kind.isBSS() && BSSSection != 0)
+ return BSSSection;
+
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return DataSection;
+}
+
+/// getTTypeGlobalReference - Return an MCExpr to use for a
+/// reference to the specified global variable from exception
+/// handling information.
+const MCExpr *TargetLoweringObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCSymbolRefExpr *Ref =
+ MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+
+ return getTTypeReference(Ref, Encoding, Streamer);
+}
+
+const MCExpr *TargetLoweringObjectFile::
+getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ // Do nothing special
+ return Sym;
+ case dwarf::DW_EH_PE_pcrel: {
+ // Emit a label to the streamer for the current position. This gives us
+ // .-foo addressing.
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Sym, PC, getContext());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
new file mode 100644
index 000000000000..e7282519d597
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -0,0 +1,190 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool HasDivModLibcall;
+ bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool>
+DataSections("fdata-sections",
+ cl::desc("Emit data into separate sections"),
+ cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+ cl::desc("Emit functions into separate sections"),
+ cl::init(false));
+
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options)
+ : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
+ CodeGenInfo(0), AsmInfo(0),
+ MCRelaxAll(false),
+ MCNoExecStack(false),
+ MCSaveTempLabels(false),
+ MCUseLoc(true),
+ MCUseCFI(true),
+ MCUseDwarfDirectory(false),
+ Options(Options) {
+}
+
+TargetMachine::~TargetMachine() {
+ delete CodeGenInfo;
+ delete AsmInfo;
+}
+
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() const {
+ if (!CodeGenInfo)
+ return Reloc::Default;
+ return CodeGenInfo->getRelocationModel();
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() const {
+ if (!CodeGenInfo)
+ return CodeModel::Default;
+ return CodeGenInfo->getCodeModel();
+}
+
+/// Get the IR-specified TLS model for Var.
+static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
+ switch (Var->getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal:
+ llvm_unreachable("getSelectedTLSModel for non-TLS variable");
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ return TLSModel::GeneralDynamic;
+ case GlobalVariable::LocalDynamicTLSModel:
+ return TLSModel::LocalDynamic;
+ case GlobalVariable::InitialExecTLSModel:
+ return TLSModel::InitialExec;
+ case GlobalVariable::LocalExecTLSModel:
+ return TLSModel::LocalExec;
+ }
+ llvm_unreachable("invalid TLS model");
+}
+
+TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+ const GlobalVariable *Var = cast<GlobalVariable>(GV);
+
+ bool isLocal = Var->hasLocalLinkage();
+ bool isDeclaration = Var->isDeclaration();
+ bool isPIC = getRelocationModel() == Reloc::PIC_;
+ bool isPIE = Options.PositionIndependentExecutable;
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = Var->hasHiddenVisibility();
+
+ TLSModel::Model Model;
+ if (isPIC && !isPIE) {
+ if (isLocal || isHidden)
+ Model = TLSModel::LocalDynamic;
+ else
+ Model = TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ Model = TLSModel::LocalExec;
+ else
+ Model = TLSModel::InitialExec;
+ }
+
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
+ if (SelectedModel > Model)
+ return SelectedModel;
+
+ return Model;
+}
+
+/// getOptLevel - Returns the optimization level: None, Less,
+/// Default, or Aggressive.
+CodeGenOpt::Level TargetMachine::getOptLevel() const {
+ if (!CodeGenInfo)
+ return CodeGenOpt::Default;
+ return CodeGenInfo->getOptLevel();
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+ return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+ AsmVerbosityDefault = V;
+}
+
+bool TargetMachine::getFunctionSections() {
+ return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+ return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+ FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+ DataSections = V;
+}
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
new file mode 100644
index 000000000000..79f74bd66127
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -0,0 +1,197 @@
+//===-- TargetMachine.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM-C part of TargetMachine.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/TargetMachine.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+
+
+LLVMTargetRef LLVMGetFirstTarget() {
+ const Target* target = &*TargetRegistry::begin();
+ return wrap(target);
+}
+LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
+ return wrap(unwrap(T)->getNext());
+}
+
+const char * LLVMGetTargetName(LLVMTargetRef T) {
+ return unwrap(T)->getName();
+}
+
+const char * LLVMGetTargetDescription(LLVMTargetRef T) {
+ return unwrap(T)->getShortDescription();
+}
+
+LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) {
+ return unwrap(T)->hasJIT();
+}
+
+LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) {
+ return unwrap(T)->hasTargetMachine();
+}
+
+LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
+ return unwrap(T)->hasMCAsmBackend();
+}
+
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
+ char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+ LLVMCodeModel CodeModel) {
+ Reloc::Model RM;
+ switch (Reloc){
+ case LLVMRelocStatic:
+ RM = Reloc::Static;
+ break;
+ case LLVMRelocPIC:
+ RM = Reloc::PIC_;
+ break;
+ case LLVMRelocDynamicNoPic:
+ RM = Reloc::DynamicNoPIC;
+ break;
+ default:
+ RM = Reloc::Default;
+ break;
+ }
+
+ CodeModel::Model CM;
+ switch (CodeModel) {
+ case LLVMCodeModelJITDefault:
+ CM = CodeModel::JITDefault;
+ break;
+ case LLVMCodeModelSmall:
+ CM = CodeModel::Small;
+ break;
+ case LLVMCodeModelKernel:
+ CM = CodeModel::Kernel;
+ break;
+ case LLVMCodeModelMedium:
+ CM = CodeModel::Medium;
+ break;
+ case LLVMCodeModelLarge:
+ CM = CodeModel::Large;
+ break;
+ default:
+ CM = CodeModel::Default;
+ break;
+ }
+ CodeGenOpt::Level OL;
+
+ switch (Level) {
+ case LLVMCodeGenLevelNone:
+ OL = CodeGenOpt::None;
+ break;
+ case LLVMCodeGenLevelLess:
+ OL = CodeGenOpt::Less;
+ break;
+ case LLVMCodeGenLevelAggressive:
+ OL = CodeGenOpt::Aggressive;
+ break;
+ default:
+ OL = CodeGenOpt::Default;
+ break;
+ }
+
+ TargetOptions opt;
+ return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
+ CM, OL));
+}
+
+
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
+ delete unwrap(T);
+}
+
+LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
+ const Target* target = &(unwrap(T)->getTarget());
+ return wrap(target);
+}
+
+char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetTriple();
+ return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetCPU();
+ return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetFeatureString();
+ return strdup(StringRep.c_str());
+}
+
+LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
+ return wrap(unwrap(T)->getDataLayout());
+}
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+ char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+ TargetMachine* TM = unwrap(T);
+ Module* Mod = unwrap(M);
+
+ PassManager pass;
+
+ std::string error;
+
+ const DataLayout* td = TM->getDataLayout();
+
+ if (!td) {
+ error = "No DataLayout in TargetMachine";
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+ pass.add(new DataLayout(*td));
+
+ TargetMachine::CodeGenFileType ft;
+ switch (codegen) {
+ case LLVMAssemblyFile:
+ ft = TargetMachine::CGFT_AssemblyFile;
+ break;
+ default:
+ ft = TargetMachine::CGFT_ObjectFile;
+ break;
+ }
+ raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+ formatted_raw_ostream destf(dest);
+ if (!error.empty()) {
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+
+ if (TM->addPassesToEmitFile(pass, destf, ft)) {
+ error = "TargetMachine can't emit a file of this type";
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+
+ pass.run(*Mod);
+
+ destf.flush();
+ dest.flush();
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
new file mode 100644
index 000000000000..af0cef62d552
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -0,0 +1,37 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo() {}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+ return false;
+}
+
+bool TargetSubtargetInfo::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = ANTIDEP_NONE;
+ CriticalPathRCs.clear();
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 000000000000..e4623228b397
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,2221 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86AsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ ParseInstructionInfo *InstInfo;
+private:
+ MCAsmParser &getParser() const { return Parser; }
+
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+ bool MatchingInlineAsm = false) {
+ if (MatchingInlineAsm) return true;
+ return Parser.Error(L, Msg, Ranges);
+ }
+
+ X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
+ Error(Loc, Msg);
+ return 0;
+ }
+
+ X86Operand *ParseOperand();
+ X86Operand *ParseATTOperand();
+ X86Operand *ParseIntelOperand();
+ X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
+ X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
+ X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+ SMLoc StartLoc);
+ X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
+ unsigned Size);
+ X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
+ SMLoc &IdentStart);
+ X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+
+ X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+ SMLoc SizeDirLoc, unsigned Size);
+
+ bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
+ SmallString<64> &Err);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
+
+ bool processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+ /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
+ bool isSrcOp(X86Operand &Op);
+
+ /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ bool isDstOp(X86Operand &Op);
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
+ setAvailableFeatures(FB);
+ }
+
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
+ /// }
+
+public:
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+
+ bool isParsingIntelSyntax() {
+ return getParser().getAssemblerDialect();
+ }
+};
+} // end anonymous namespace
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+static bool isImmSExti16i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti32i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmZExtu32u8Value(uint64_t Value) {
+ return (Value <= 0x00000000000000FFULL);
+}
+
+static bool isImmSExti64i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti64i32Value(uint64_t Value) {
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Register,
+ Immediate,
+ Memory
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ SMLoc OffsetOfLoc;
+ bool AddressOf;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ };
+
+ union {
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ };
+
+ X86Operand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End) {}
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
+ SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+ /// getOffsetOfLoc - Get the location of the offset operator.
+ SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
+
+ virtual void print(raw_ostream &OS) const {}
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+ void setTokenValue(StringRef Value) {
+ assert(Kind == Token && "Invalid access!");
+ Tok.Data = Value.data();
+ Tok.Length = Value.size();
+ }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNo;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getMemDisp() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Disp;
+ }
+ unsigned getMemSegReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.SegReg;
+ }
+ unsigned getMemBaseReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.BaseReg;
+ }
+ unsigned getMemIndexReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.IndexReg;
+ }
+ unsigned getMemScale() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Scale;
+ }
+
+ bool isToken() const {return Kind == Token; }
+
+ bool isImm() const { return Kind == Immediate; }
+
+ bool isImmSExti16i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ return isImmSExti16i8Value(CE->getValue());
+ }
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ return isImmSExti32i8Value(CE->getValue());
+ }
+ bool isImmZExtu32u8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ return isImmZExtu32u8Value(CE->getValue());
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ return isImmSExti64i8Value(CE->getValue());
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ return isImmSExti64i32Value(CE->getValue());
+ }
+
+ bool isOffsetOf() const {
+ return OffsetOfLoc.getPointer();
+ }
+
+ bool needAddressOf() const {
+ return AddressOf;
+ }
+
+ bool isMem() const { return Kind == Memory; }
+ bool isMem8() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMem16() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMem32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMem64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64);
+ }
+ bool isMem80() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 80);
+ }
+ bool isMem128() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 128);
+ }
+ bool isMem256() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 256);
+ }
+
+ bool isMemVX32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
+ }
+ bool isMemVY32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
+ }
+ bool isMemVX64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
+ }
+ bool isMemVY64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
+ }
+
+ bool isAbsMem() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1;
+ }
+
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMem8Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem16Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem80Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem128Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem256Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMemVX32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMemVY32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMemVX64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMemVY64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 5) && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+ Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+ Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+ addExpr(Inst, getMemDisp());
+ Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+ }
+
+ void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 1) && "Invalid number of operands!");
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ }
+
+ static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+ SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
+ X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
+ Res->Tok.Data = Str.data();
+ Res->Tok.Length = Str.size();
+ return Res;
+ }
+
+ static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false,
+ SMLoc OffsetOfLoc = SMLoc()) {
+ X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+ Res->Reg.RegNo = RegNo;
+ Res->AddressOf = AddressOf;
+ Res->OffsetOfLoc = OffsetOfLoc;
+ return Res;
+ }
+
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+ Res->Imm.Val = Val;
+ return Res;
+ }
+
+ /// Create an absolute memory operand.
+ static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0) {
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = 0;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = 0;
+ Res->Mem.IndexReg = 0;
+ Res->Mem.Scale = 1;
+ Res->Mem.Size = Size;
+ Res->AddressOf = false;
+ return Res;
+ }
+
+ /// Create a generalized memory operand.
+ static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0) {
+ // We should never just have a displacement, that should be parsed as an
+ // absolute memory operand.
+ assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+ // The scale should always be one of {1,2,4,8}.
+ assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+ "Invalid scale!");
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = SegReg;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = BaseReg;
+ Res->Mem.IndexReg = IndexReg;
+ Res->Mem.Scale = Scale;
+ Res->Mem.Size = Size;
+ Res->AddressOf = false;
+ return Res;
+ }
+};
+
+} // end anonymous namespace.
+
+bool X86AsmParser::isSrcOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
+
+ return (Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86AsmParser::isDstOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
+
+ return Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
+
+bool X86AsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = 0;
+ const AsmToken &PercentTok = Parser.getTok();
+ StartLoc = PercentTok.getLoc();
+
+ // If we encounter a %, ignore it. This code handles registers with and
+ // without the prefix, unprefixed registers can occur in cfi directives.
+ if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
+ Parser.Lex(); // Eat percent token.
+
+ const AsmToken &Tok = Parser.getTok();
+ EndLoc = Tok.getEndLoc();
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ if (isParsingIntelSyntax()) return true;
+ return Error(StartLoc, "invalid register name",
+ SMRange(StartLoc, EndLoc));
+ }
+
+ RegNo = MatchRegisterName(Tok.getString());
+
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(Tok.getString().lower());
+
+ if (!is64BitMode()) {
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
+ // checked.
+ // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
+ // REX prefix.
+ if (RegNo == X86::RIZ ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
+ X86II::isX86_64NonExtLowByteReg(RegNo) ||
+ X86II::isX86_64ExtendedReg(RegNo))
+ return Error(StartLoc, "register %"
+ + Tok.getString() + " is only available in 64-bit mode",
+ SMRange(StartLoc, EndLoc));
+ }
+
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
+ RegNo = X86::ST0;
+ Parser.Lex(); // Eat 'st'
+
+ // Check to see if we have '(4)' after %st.
+ if (getLexer().isNot(AsmToken::LParen))
+ return false;
+ // Lex the paren.
+ getParser().Lex();
+
+ const AsmToken &IntTok = Parser.getTok();
+ if (IntTok.isNot(AsmToken::Integer))
+ return Error(IntTok.getLoc(), "expected stack index");
+ switch (IntTok.getIntVal()) {
+ case 0: RegNo = X86::ST0; break;
+ case 1: RegNo = X86::ST1; break;
+ case 2: RegNo = X86::ST2; break;
+ case 3: RegNo = X86::ST3; break;
+ case 4: RegNo = X86::ST4; break;
+ case 5: RegNo = X86::ST5; break;
+ case 6: RegNo = X86::ST6; break;
+ case 7: RegNo = X86::ST7; break;
+ default: return Error(IntTok.getLoc(), "invalid stack index");
+ }
+
+ if (getParser().Lex().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat ')'
+ return false;
+ }
+
+ EndLoc = Parser.getTok().getEndLoc();
+
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
+ if (RegNo == 0) {
+ if (isParsingIntelSyntax()) return true;
+ return Error(StartLoc, "invalid register name",
+ SMRange(StartLoc, EndLoc));
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ return false;
+}
+
+X86Operand *X86AsmParser::ParseOperand() {
+ if (isParsingIntelSyntax())
+ return ParseIntelOperand();
+ return ParseATTOperand();
+}
+
+/// getIntelMemOperandSize - Return intel memory operand size.
+static unsigned getIntelMemOperandSize(StringRef OpStr) {
+ unsigned Size = StringSwitch<unsigned>(OpStr)
+ .Cases("BYTE", "byte", 8)
+ .Cases("WORD", "word", 16)
+ .Cases("DWORD", "dword", 32)
+ .Cases("QWORD", "qword", 64)
+ .Cases("XWORD", "xword", 80)
+ .Cases("XMMWORD", "xmmword", 128)
+ .Cases("YMMWORD", "ymmword", 256)
+ .Default(0);
+ return Size;
+}
+
+enum IntelBracExprState {
+ IBES_START,
+ IBES_LBRAC,
+ IBES_RBRAC,
+ IBES_REGISTER,
+ IBES_REGISTER_STAR,
+ IBES_REGISTER_STAR_INTEGER,
+ IBES_INTEGER,
+ IBES_INTEGER_STAR,
+ IBES_INDEX_REGISTER,
+ IBES_IDENTIFIER,
+ IBES_DISP_EXPR,
+ IBES_MINUS,
+ IBES_ERROR
+};
+
+class IntelBracExprStateMachine {
+ IntelBracExprState State;
+ unsigned BaseReg, IndexReg, Scale;
+ int64_t Disp;
+
+ unsigned TmpReg;
+ int64_t TmpInteger;
+
+ bool isPlus;
+
+public:
+ IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
+ State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
+ TmpReg(0), TmpInteger(0), isPlus(true) {}
+
+ unsigned getBaseReg() { return BaseReg; }
+ unsigned getIndexReg() { return IndexReg; }
+ unsigned getScale() { return Scale; }
+ int64_t getDisp() { return Disp; }
+ bool isValidEndState() { return State == IBES_RBRAC; }
+
+ void onPlus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = true;
+ }
+ void onMinus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_MINUS;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = false;
+ }
+ void onRegister(unsigned Reg) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_REGISTER;
+ TmpReg = Reg;
+ break;
+ case IBES_INTEGER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = Reg;
+ Scale = TmpInteger;
+ break;
+ }
+ }
+ void onDispExpr() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_DISP_EXPR;
+ break;
+ }
+ }
+ void onInteger(int64_t TmpInt) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_MINUS:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_REGISTER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = TmpReg;
+ Scale = TmpInt;
+ break;
+ }
+ }
+ void onStar() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_INTEGER_STAR;
+ break;
+ case IBES_REGISTER:
+ State = IBES_REGISTER_STAR;
+ break;
+ }
+ }
+ void onLBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_RBRAC:
+ State = IBES_START;
+ isPlus = true;
+ break;
+ }
+ }
+ void onRBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_DISP_EXPR:
+ State = IBES_RBRAC;
+ break;
+ case IBES_INTEGER:
+ State = IBES_RBRAC;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_RBRAC;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_RBRAC;
+ break;
+ }
+ }
+};
+
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+ SMLoc End, SMLoc SizeDirLoc,
+ unsigned Size) {
+ bool NeedSizeDir = false;
+ bool IsVarDecl = false;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size) {
+ Size = tType * 8; // Size is in terms of bits in this context.
+ NeedSizeDir = Size > 0;
+ }
+ }
+
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
+ if (NeedSizeDir)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+ /*Len*/0, Size));
+
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+ uint64_t ImmDisp,
+ unsigned Size) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
+
+ // Eat '['
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ Parser.Lex();
+
+ unsigned TmpReg = 0;
+
+ // Try to handle '[' 'Symbol' ']'
+ if (getLexer().is(AsmToken::Identifier)) {
+ if (ParseRegister(TmpReg, Start, End)) {
+ const MCExpr *Disp;
+ SMLoc IdentStart = Tok.getLoc();
+ if (getParser().parseExpression(Disp, End))
+ return 0;
+
+ if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+ return Err;
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
+
+ // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
+ if (ImmDisp)
+ return ErrorOperand(Start, "Unsupported immediate displacement!");
+
+ // Adjust the EndLoc due to the ']'.
+ End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
+ Parser.Lex();
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ // We want the size directive before the '['.
+ SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+ return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+ }
+ }
+
+ // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
+ // immediate displacement before the bracketed expression.
+ bool Done = false;
+ IntelBracExprStateMachine SM(Parser, ImmDisp);
+
+ // If we parsed a register, then the end loc has already been set and
+ // the identifier has already been lexed. We also need to update the
+ // state.
+ if (TmpReg)
+ SM.onRegister(TmpReg);
+
+ const MCExpr *Disp = 0;
+ while (!Done) {
+ bool UpdateLocLex = true;
+
+ // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
+ // identifier. Don't try an parse it as a register.
+ if (Tok.getString().startswith("."))
+ break;
+
+ switch (getLexer().getKind()) {
+ default: {
+ if (SM.isValidEndState()) {
+ Done = true;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ }
+ case AsmToken::Identifier: {
+ // This could be a register or a displacement expression.
+ if(!ParseRegister(TmpReg, Start, End)) {
+ SM.onRegister(TmpReg);
+ UpdateLocLex = false;
+ break;
+ } else if (!getParser().parseExpression(Disp, End)) {
+ SM.onDispExpr();
+ UpdateLocLex = false;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ }
+ case AsmToken::Integer: {
+ int64_t Val = Tok.getIntVal();
+ SM.onInteger(Val);
+ break;
+ }
+ case AsmToken::Plus: SM.onPlus(); break;
+ case AsmToken::Minus: SM.onMinus(); break;
+ case AsmToken::Star: SM.onStar(); break;
+ case AsmToken::LBrac: SM.onLBrac(); break;
+ case AsmToken::RBrac: SM.onRBrac(); break;
+ }
+ if (!Done && UpdateLocLex) {
+ End = Tok.getLoc();
+ Parser.Lex(); // Consume the token.
+ }
+ }
+
+ if (!Disp)
+ Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
+
+ // Parse the dot operator (e.g., [ebx].foo.bar).
+ if (Tok.getString().startswith(".")) {
+ SmallString<64> Err;
+ const MCExpr *NewDisp;
+ if (ParseIntelDotOperator(Disp, &NewDisp, Err))
+ return ErrorOperand(Tok.getLoc(), Err);
+
+ End = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the field.
+ Disp = NewDisp;
+ }
+
+ int BaseReg = SM.getBaseReg();
+ int IndexReg = SM.getIndexReg();
+
+ // handle [-42]
+ if (!BaseReg && !IndexReg) {
+ if (!SegReg)
+ return X86Operand::CreateMem(Disp, Start, End);
+ else
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ }
+
+ int Scale = SM.getScale();
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ Start, End, Size);
+}
+
+// Inline assembly may use variable names with namespace alias qualifiers.
+X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
+ SMLoc &IdentStart) {
+ // We should only see Foo::Bar if we're parsing inline assembly.
+ if (!isParsingInlineAsm())
+ return 0;
+
+ // If we don't see a ':' then there can't be a qualifier.
+ if (getLexer().isNot(AsmToken::Colon))
+ return 0;
+
+
+ bool Done = false;
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc IdentEnd = Tok.getEndLoc();
+ while (!Done) {
+ switch (getLexer().getKind()) {
+ default:
+ Done = true;
+ break;
+ case AsmToken::Colon:
+ getLexer().Lex(); // Consume ':'.
+ if (getLexer().isNot(AsmToken::Colon))
+ return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+ getLexer().Lex(); // Consume second ':'.
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
+ break;
+ case AsmToken::Identifier:
+ IdentEnd = Tok.getEndLoc();
+ getLexer().Lex(); // Consume the identifier.
+ break;
+ }
+ }
+ size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
+ StringRef Identifier(IdentStart.getPointer(), Len);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+ return 0;
+}
+
+/// ParseIntelMemOperand - Parse intel style memory operand.
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
+ uint64_t ImmDisp,
+ SMLoc Start) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc End;
+
+ unsigned Size = getIntelMemOperandSize(Tok.getString());
+ if (Size) {
+ Parser.Lex();
+ assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
+ "Unexpected token!");
+ Parser.Lex();
+ }
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (getLexer().is(AsmToken::Integer)) {
+ const AsmToken &IntTok = Parser.getTok();
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+ IntTok.getLoc()));
+ uint64_t ImmDisp = IntTok.getIntVal();
+ Parser.Lex(); // Eat the integer.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ }
+
+ if (getLexer().is(AsmToken::LBrac))
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+
+ if (!ParseRegister(SegReg, Start, End)) {
+ // Handel SegReg : [ ... ]
+ if (getLexer().isNot(AsmToken::Colon))
+ return ErrorOperand(Start, "Expected ':' token!");
+ Parser.Lex(); // Eat :
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ }
+
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ SMLoc IdentStart = Tok.getLoc();
+ if (getParser().parseExpression(Disp, End))
+ return 0;
+
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+ return Err;
+
+ return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
+}
+
+/// Parse the '.' operator.
+bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+ const MCExpr **NewDisp,
+ SmallString<64> &Err) {
+ AsmToken Tok = *&Parser.getTok();
+ uint64_t OrigDispVal, DotDispVal;
+
+ // FIXME: Handle non-constant expressions.
+ if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+ OrigDispVal = OrigDisp->getValue();
+ } else {
+ Err = "Non-constant offsets are not supported!";
+ return true;
+ }
+
+ // Drop the '.'.
+ StringRef DotDispStr = Tok.getString().drop_front(1);
+
+ // .Imm gets lexed as a real.
+ if (Tok.is(AsmToken::Real)) {
+ APInt DotDisp;
+ DotDispStr.getAsInteger(10, DotDisp);
+ DotDispVal = DotDisp.getZExtValue();
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // We should only see an identifier when parsing the original inline asm.
+ // The front-end should rewrite this in terms of immediates.
+ assert (isParsingInlineAsm() && "Unexpected field name!");
+
+ unsigned DotDisp;
+ std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
+ if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
+ DotDisp)) {
+ Err = "Unable to lookup field reference!";
+ return true;
+ }
+ DotDispVal = DotDisp;
+ } else {
+ Err = "Unexpected token type!";
+ return true;
+ }
+
+ if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
+ SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
+ unsigned Len = DotDispStr.size();
+ unsigned Val = OrigDispVal + DotDispVal;
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
+ Val));
+ }
+
+ *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ return false;
+}
+
+/// Parse the 'offset' operator. This operator is used to specify the
+/// location rather then the content of a variable.
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
+ SMLoc OffsetOfLoc = Start;
+ Parser.Lex(); // Eat offset.
+ Start = Parser.getTok().getLoc();
+ assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+
+ SMLoc End;
+ const MCExpr *Val;
+ if (getParser().parseExpression(Val, End))
+ return ErrorOperand(Start, "Unable to parse expression!");
+
+ // Don't emit the offset operator.
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
+
+ // The offset operator will have an 'r' constraint, thus we need to create
+ // register operand to ensure proper matching. Just pick a GPR based on
+ // the size of a pointer.
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
+ OffsetOfLoc);
+}
+
+enum IntelOperatorKind {
+ IOK_LENGTH,
+ IOK_SIZE,
+ IOK_TYPE
+};
+
+/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
+/// returns the number of elements in an array. It returns the value 1 for
+/// non-array variables. The SIZE operator returns the size of a C or C++
+/// variable. A variable's size is the product of its LENGTH and TYPE. The
+/// TYPE operator returns the size of a C or C++ type or variable. If the
+/// variable is an array, TYPE returns the size of a single element.
+X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
+ SMLoc TypeLoc = Start;
+ Parser.Lex(); // Eat offset.
+ Start = Parser.getTok().getLoc();
+ assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+
+ SMLoc End;
+ const MCExpr *Val;
+ if (getParser().parseExpression(Val, End))
+ return 0;
+
+ unsigned Length = 0, Size = 0, Type = 0;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ bool IsVarDecl;
+ if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
+ Size, Type, IsVarDecl))
+ return ErrorOperand(Start, "Unable to lookup expr!");
+ }
+ unsigned CVal;
+ switch(OpKind) {
+ default: llvm_unreachable("Unexpected operand kind!");
+ case IOK_LENGTH: CVal = Length; break;
+ case IOK_SIZE: CVal = Size; break;
+ case IOK_TYPE: CVal = Type; break;
+ }
+
+ // Rewrite the type operator and the C or C++ type or variable in terms of an
+ // immediate. E.g. TYPE foo -> $$4
+ unsigned Len = End.getPointer() - TypeLoc.getPointer();
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
+
+ const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
+ return X86Operand::CreateImm(Imm, Start, End);
+}
+
+X86Operand *X86AsmParser::ParseIntelOperand() {
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ StringRef AsmTokStr = Parser.getTok().getString();
+
+ // Offset, length, type and size operators.
+ if (isParsingInlineAsm()) {
+ if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+ return ParseIntelOffsetOfOperator(Start);
+ if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
+ return ParseIntelOperator(Start, IOK_LENGTH);
+ if (AsmTokStr == "size" || AsmTokStr == "SIZE")
+ return ParseIntelOperator(Start, IOK_SIZE);
+ if (AsmTokStr == "type" || AsmTokStr == "TYPE")
+ return ParseIntelOperator(Start, IOK_TYPE);
+ }
+
+ // Immediate.
+ if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
+ getLexer().is(AsmToken::Minus)) {
+ const MCExpr *Val;
+ bool isInteger = getLexer().is(AsmToken::Integer);
+ if (!getParser().parseExpression(Val, End)) {
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+ // Immediate.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return X86Operand::CreateImm(Val, Start, End);
+
+ // Only positive immediates are valid.
+ if (!isInteger) {
+ Error(Parser.getTok().getLoc(), "expected a positive immediate "
+ "displacement before bracketed expr.");
+ return 0;
+ }
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
+ return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
+ }
+ }
+
+ // Register.
+ unsigned RegNo = 0;
+ if (!ParseRegister(RegNo, Start, End)) {
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, End);
+
+ getParser().Lex(); // Eat the colon.
+ return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
+ }
+
+ // Memory operand.
+ return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
+}
+
+X86Operand *X86AsmParser::ParseATTOperand() {
+ switch (getLexer().getKind()) {
+ default:
+ // Parse a memory operand with no segment register.
+ return ParseMemOperand(0, Parser.getTok().getLoc());
+ case AsmToken::Percent: {
+ // Read the register.
+ unsigned RegNo;
+ SMLoc Start, End;
+ if (ParseRegister(RegNo, Start, End)) return 0;
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "%eiz and %riz can only be used as index registers",
+ SMRange(Start, End));
+ return 0;
+ }
+
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, End);
+
+ getParser().Lex(); // Eat the colon.
+ return ParseMemOperand(RegNo, Start);
+ }
+ case AsmToken::Dollar: {
+ // $42 -> immediate.
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ Parser.Lex();
+ const MCExpr *Val;
+ if (getParser().parseExpression(Val, End))
+ return 0;
+ return X86Operand::CreateImm(Val, Start, End);
+ }
+ }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
+ // We have to disambiguate a parenthesized expression "(4+5)" from the start
+ // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
+ // only way to do this without lookahead is to eat the '(' and see what is
+ // after it.
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ if (getLexer().isNot(AsmToken::LParen)) {
+ SMLoc ExprEnd;
+ if (getParser().parseExpression(Disp, ExprEnd)) return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ } else {
+ // Okay, we have a '('. We don't know if this is an expression or not, but
+ // so we have to eat the ( to see beyond it.
+ SMLoc LParenLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '('.
+
+ if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+ // Nothing to do here, fall into the code below with the '(' part of the
+ // memory operand consumed.
+ } else {
+ SMLoc ExprEnd;
+
+ // It must be an parenthesized expression, parse it now.
+ if (getParser().parseParenExpression(Disp, ExprEnd))
+ return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ }
+ }
+
+ // If we reached here, then we just ate the ( of the memory operand. Process
+ // the rest of the memory operand.
+ unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+ SMLoc IndexLoc;
+
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc StartLoc, EndLoc;
+ if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(StartLoc, "eiz and riz can only be used as index registers",
+ SMRange(StartLoc, EndLoc));
+ return 0;
+ }
+ }
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ IndexLoc = Parser.getTok().getLoc();
+
+ // Following the comma we should have either an index register, or a scale
+ // value. We don't support the later form, but we want to parse it
+ // correctly.
+ //
+ // Not that even though it would be completely consistent to support syntax
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc L;
+ if (ParseRegister(IndexReg, L, L)) return 0;
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(),
+ "expected comma in scale expression");
+ return 0;
+ }
+ Parser.Lex(); // Eat the comma.
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t ScaleVal;
+ if (getParser().parseAbsoluteExpression(ScaleVal)){
+ Error(Loc, "expected scale expression");
+ return 0;
+ }
+
+ // Validate the scale amount.
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+ Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+ return 0;
+ }
+ Scale = (unsigned)ScaleVal;
+ }
+ }
+ } else if (getLexer().isNot(AsmToken::RParen)) {
+ // A scale amount without an index is ignored.
+ // index.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t Value;
+ if (getParser().parseAbsoluteExpression(Value))
+ return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
+ }
+ }
+
+ // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
+ return 0;
+ }
+ SMLoc MemEnd = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ // If we have both a base register and an index register make sure they are
+ // both 64-bit or 32-bit registers.
+ // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
+ if (BaseReg != 0 && IndexReg != 0) {
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
+ IndexReg != X86::RIZ) {
+ Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+ return 0;
+ }
+ if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
+ IndexReg != X86::EIZ){
+ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+ return 0;
+ }
+ }
+
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ MemStart, MemEnd);
+}
+
+bool X86AsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ InstInfo = &Info;
+ StringRef PatchedName = Name;
+
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName[0] == 'v';
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0x00)
+ .Case("lt", 0x01)
+ .Case("le", 0x02)
+ .Case("unord", 0x03)
+ .Case("neq", 0x04)
+ .Case("nlt", 0x05)
+ .Case("nle", 0x06)
+ .Case("ord", 0x07)
+ /* AVX only from here */
+ .Case("eq_uq", 0x08)
+ .Case("nge", 0x09)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ }
+ }
+ }
+
+ Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+
+ if (ExtraImmOp && !isParsingIntelSyntax())
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ Name == "lock" || Name == "rep" ||
+ Name == "repe" || Name == "repz" ||
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+
+ // Parse '*' modifier.
+ if (getLexer().is(AsmToken::Star)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("*", Loc));
+ Parser.Lex(); // Eat the star.
+ }
+
+ // Read the first operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
+
+ if (ExtraImmOp && isParsingIntelSyntax())
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+ // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+ // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
+ // documented form in various unofficial manuals, so a lot of code uses it.
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+ if (Name.startswith("ins") && Operands.size() == 3 &&
+ (Name == "insb" || Name == "insw" || Name == "insl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+ if (Name.startswith("outs") && Operands.size() == 3 &&
+ (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+ if (Name.startswith("movs") && Operands.size() == 3 &&
+ (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+ (is64BitMode() && Name == "movsq"))) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+ // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+ if (Name.startswith("lods") && Operands.size() == 3 &&
+ (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+ Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isSrcOp(*Op1) && Op2->isReg()) {
+ const char *ins;
+ unsigned reg = Op2->getReg();
+ bool isLods = Name == "lods";
+ if (reg == X86::AL && (isLods || Name == "lodsb"))
+ ins = "lodsb";
+ else if (reg == X86::AX && (isLods || Name == "lodsw"))
+ ins = "lodsw";
+ else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+ ins = "lodsl";
+ else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+ ins = "lodsq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+ // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+ if (Name.startswith("stos") && Operands.size() == 3 &&
+ (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+ Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isDstOp(*Op2) && Op1->isReg()) {
+ const char *ins;
+ unsigned reg = Op1->getReg();
+ bool isStos = Name == "stos";
+ if (reg == X86::AL && (isStos || Name == "stosb"))
+ ins = "stosb";
+ else if (reg == X86::AX && (isStos || Name == "stosw"))
+ ins = "stosw";
+ else if (reg == X86::EAX && (isStos || Name == "stosl"))
+ ins = "stosl";
+ else if (reg == X86::RAX && (isStos || Name == "stosq"))
+ ins = "stosq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl") || Name.startswith("sal") ||
+ Name.startswith("rcl") || Name.startswith("rcr") ||
+ Name.startswith("rol") || Name.startswith("ror")) &&
+ Operands.size() == 3) {
+ if (isParsingIntelSyntax()) {
+ // Intel syntax
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[2];
+ Operands.pop_back();
+ }
+ } else {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+ }
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
+
+ return false;
+}
+
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
+
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
+
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
+
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
+
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+ }
+}
+
+static const char *getSubtargetFeatureName(unsigned Val);
+bool X86AsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
+
+ // First, handle aliases that expand to multiple instructions.
+ // FIXME: This should be replaced with a real .td file alias mechanism.
+ // Also, MatchInstructionImpl should actually *do* the EmitInstruction
+ // call.
+ if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+ Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+ Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+ Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ MCInst Inst;
+ Inst.setOpcode(X86::WAIT);
+ Inst.setLoc(IDLoc);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
+
+ const char *Repl =
+ StringSwitch<const char*>(Op->getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(0);
+ assert(Repl && "Unknown wait-prefixed instruction");
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+ }
+
+ bool WasOriginallyInvalidOperand = false;
+ MCInst Inst;
+
+ // First, try a direct match.
+ switch (MatchInstructionImpl(Operands, Inst,
+ ErrorInfo, MatchingInlineAsm,
+ isParsingIntelSyntax())) {
+ default: break;
+ case Match_Success:
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other.
+ if (!MatchingInlineAsm)
+ while (processInstruction(Inst, Operands))
+ ;
+
+ Inst.setLoc(IDLoc);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
+ Opcode = Inst.getOpcode();
+ return false;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing.
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ }
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
+
+ // FIXME: Ideally, we would only attempt suffix matches for things which are
+ // valid prefixes, and we could just infer the right unambiguous
+ // type. However, that requires substantially more matcher support than the
+ // following hack.
+
+ // Change the operand to point to a temporary token.
+ StringRef Base = Op->getToken();
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
+
+ // If this instruction starts with an 'f', then it is a floating point stack
+ // instruction. These come in up to three forms for 32-bit, 64-bit, and
+ // 80-bit floating point, which use the suffixes s,l,t respectively.
+ //
+ // Otherwise, we assume that this may be an integer instruction, which comes
+ // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+ const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+
+ // Check for the various suffix matches.
+ Tmp[Base.size()] = Suffixes[0];
+ unsigned ErrorInfoIgnore;
+ unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
+ unsigned Match1, Match2, Match3, Match4;
+
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match1 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ Tmp[Base.size()] = Suffixes[1];
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match2 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ Tmp[Base.size()] = Suffixes[2];
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match3 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ Tmp[Base.size()] = Suffixes[3];
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match4 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+
+ // Restore the old token.
+ Op->setTokenValue(Base);
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ unsigned NumSuccessfulMatches =
+ (Match1 == Match_Success) + (Match2 == Match_Success) +
+ (Match3 == Match_Success) + (Match4 == Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ Inst.setLoc(IDLoc);
+ if (!MatchingInlineAsm)
+ Out.EmitInstruction(Inst);
+ Opcode = Inst.getOpcode();
+ return false;
+ }
+
+ // Otherwise, the match failed, try to produce a decent error message.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (NumSuccessfulMatches > 1) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+ if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+ if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+ if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
+ return true;
+ }
+
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+ (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
+ Op->getLocRange();
+ return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
+ Ranges, MatchingInlineAsm);
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction",
+ EmptyRanges, MatchingInlineAsm);
+
+ X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
+ if (Operand->getStartLoc().isValid()) {
+ SMRange OperandRange = Operand->getLocRange();
+ return Error(Operand->getStartLoc(), "invalid operand for instruction",
+ OperandRange, MatchingInlineAsm);
+ }
+ }
+
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+ (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
+ if (ErrorInfoMissingFeature & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+ (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
+ EmptyRanges, MatchingInlineAsm);
+ return true;
+}
+
+
+bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal.startswith(".code"))
+ return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
+ else if (IDVal.startswith(".att_syntax")) {
+ getParser().setAssemblerDialect(0);
+ return false;
+ } else if (IDVal.startswith(".intel_syntax")) {
+ getParser().setAssemblerDialect(1);
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if(Parser.getTok().getString() == "noprefix") {
+ // FIXME : Handle noprefix
+ Parser.Lex();
+ } else
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveCode
+/// ::= .code32 | .code64
+bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ if (IDVal == ".code32") {
+ Parser.Lex();
+ if (is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+ } else if (IDVal == ".code64") {
+ Parser.Lex();
+ if (!is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
+ }
+ } else {
+ return Error(L, "unexpected directive " + IDVal);
+ }
+
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+ RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
+ RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_SUBTARGET_FEATURE_NAME
+#include "X86GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 000000000000..ca6f80ce3e58
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,814 @@
+//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+// MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s) {
+ dbgs() << file << ":" << line << ": " << s;
+}
+
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
+ const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
+ return MII->getName(Opcode);
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
+namespace llvm {
+
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
+namespace X86 {
+ enum {
+ BX_SI = 500,
+ BX_DI = 501,
+ BP_SI = 502,
+ BP_DI = 503,
+ sib = 504,
+ sib64 = 505
+ };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+ InternalInstruction &source,
+ const MCDisassembler *Dis);
+
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
+ DisassemblerMode mode,
+ const MCInstrInfo *MII)
+ : MCDisassembler(STI), MII(MII), fMode(mode) {}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+ delete MII;
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+/// MemoryObject.
+///
+/// @param arg - The generic callback parameter. In this case, this should
+/// be a pointer to a MemoryObject.
+/// @param byte - A pointer to the byte to be read.
+/// @param address - The address to be read.
+static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
+ const MemoryObject* region = static_cast<const MemoryObject*>(arg);
+ return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+/// raw_ostream.
+///
+/// @param arg - The generic callback parameter. This should be a pointe
+/// to a raw_ostream.
+/// @param log - A string to be logged. logger() adds a newline.
+static void logger(void* arg, const char* log) {
+ if (!arg)
+ return;
+
+ raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+ vStream << log << "\n";
+}
+
+//
+// Public interface for the disassembler
+//
+
+MCDisassembler::DecodeStatus
+X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ CommentStream = &cStream;
+
+ InternalInstruction internalInstr;
+
+ dlog_t loggerFn = logger;
+ if (&vStream == &nulls())
+ loggerFn = 0; // Disable logging completely if it's going to nulls().
+
+ int ret = decodeInstruction(&internalInstr,
+ regionReader,
+ (const void*)&region,
+ loggerFn,
+ (void*)&vStream,
+ (const void*)MII,
+ address,
+ fMode);
+
+ if (ret) {
+ size = internalInstr.readerCursor - address;
+ return Fail;
+ }
+ else {
+ size = internalInstr.length;
+ return (!translateInstruction(instr, internalInstr, this)) ?
+ Success : Fail;
+ }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+/// register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param reg - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+ uint8_t llvmRegnums[] = {
+ ALL_REGS
+ 0
+ };
+#undef ENTRY
+
+ uint8_t llvmRegnum = llvmRegnums[reg];
+ mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value - The immediate Value, has had any PC adjustment made by
+/// the caller.
+/// @param isBranch - If the instruction is a branch instruction
+/// @param Address - The starting address of the instruction
+/// @param Offset - The byte offset to this immediate in the instruction
+/// @param Width - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width. If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created. This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Width, MCInst &MI,
+ const MCDisassembler *Dis) {
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+
+ if (!getOpInfo ||
+ !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
+ // Clear SymbolicOp.Value from above and also all other fields.
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (!SymbolLookUp)
+ return false;
+ uint64_t ReferenceType;
+ if (isBranch)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ else
+ ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ }
+ // For branches always create an MCExpr so it gets printed as hex address.
+ else if (isBranch) {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ if (!Name && !isBranch)
+ return false;
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the rip.
+/// These can often be addresses in a literal pool. The Address of the
+/// instruction and its immediate Value are used to determine the address
+/// being referenced in the literal pool entry. The SymbolLookUp call back will
+/// return a pointer to a literal 'C' string if the referenced address is an
+/// address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
+/// translateImmediate - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param immediate - The immediate value to append.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = (OperandType)operand.type;
+
+ bool isBranch = false;
+ uint64_t pcrel = 0;
+ if (type == TYPE_RELv) {
+ isBranch = true;
+ pcrel = insn.startLocation +
+ insn.immediateOffset + insn.immediateSize;
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ type = TYPE_MOFFS8;
+ break;
+ case 2:
+ type = TYPE_MOFFS16;
+ break;
+ case 4:
+ type = TYPE_MOFFS32;
+ break;
+ case 8:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+ // By default sign-extend all X86 immediates based on their encoding.
+ else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
+ type == TYPE_IMM64) {
+ uint32_t Opcode = mcInst.getOpcode();
+ switch (operand.encoding) {
+ default:
+ break;
+ case ENCODING_IB:
+ // Special case those X86 instructions that use the imm8 as a set of
+ // bits, bit count, etc. and are not sign-extend.
+ if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
+ Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
+ Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
+ Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
+ Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
+ Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
+ Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
+ Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
+ Opcode != X86::VINSERTPSrr)
+ type = TYPE_MOFFS8;
+ break;
+ case ENCODING_IW:
+ type = TYPE_MOFFS16;
+ break;
+ case ENCODING_ID:
+ type = TYPE_MOFFS32;
+ break;
+ case ENCODING_IO:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM256:
+ mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
+ return;
+ case TYPE_REL8:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ // fall through to sign extend the immediate if needed.
+ case TYPE_MOFFS8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_REL32:
+ case TYPE_REL64:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ // fall through to sign extend the immediate if needed.
+ case TYPE_MOFFS32:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
+ if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+ insn.immediateOffset, insn.immediateSize,
+ mcInst, Dis))
+ mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction to extract the R/M field
+/// from.
+/// @return - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ debug("A R/M register operand may not have a SIB byte");
+ return true;
+ }
+
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected EA base register");
+ return true;
+ case EA_BASE_NONE:
+ debug("EA_BASE_NONE for ModR/M base");
+ return true;
+#define ENTRY(x) case EA_BASE_##x:
+ ALL_EA_BASES
+#undef ENTRY
+ debug("A R/M register operand may not have a base; "
+ "the operand must be a register.");
+ return true;
+#define ENTRY(x) \
+ case EA_REG_##x: \
+ mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+ ALL_REGS
+#undef ENTRY
+ }
+
+ return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+/// fields of an internal instruction (and possibly its SIB byte) to a memory
+/// operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ // Addresses in an MCInst are represented as five operands:
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // SIB base
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // scale amount
+ // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
+ // the index (which is multiplied by the
+ // scale amount)
+ // 4. displacement (immediate) 0, or the displacement if there is one
+ // 5. segmentreg (register) x86_registerNONE for now, but could be set
+ // if we have segment overrides
+
+ MCOperand baseReg;
+ MCOperand scaleAmount;
+ MCOperand indexReg;
+ MCOperand displacement;
+ MCOperand segmentReg;
+ uint64_t pcrel = 0;
+
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ if (insn.sibBase != SIB_BASE_NONE) {
+ switch (insn.sibBase) {
+ default:
+ debug("Unexpected sibBase");
+ return true;
+#define ENTRY(x) \
+ case SIB_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_SIB_BASES
+#undef ENTRY
+ }
+ } else {
+ baseReg = MCOperand::CreateReg(0);
+ }
+
+ // Check whether we are handling VSIB addressing mode for GATHER.
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
+ // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
+ // I don't see a way to get the correct IndexReg in readSIB:
+ // We can tell whether it is VSIB or SIB after instruction ID is decoded,
+ // but instruction ID may not be decoded yet when calling readSIB.
+ uint32_t Opcode = mcInst.getOpcode();
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ if (IndexIs128 || IndexIs256) {
+ unsigned IndexOffset = insn.sibIndex -
+ (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
+ SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ insn.sibIndex = (SIBIndex)(IndexBase +
+ (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
+ }
+
+ if (insn.sibIndex != SIB_INDEX_NONE) {
+ switch (insn.sibIndex) {
+ default:
+ debug("Unexpected sibIndex");
+ return true;
+#define ENTRY(x) \
+ case SIB_INDEX_##x: \
+ indexReg = MCOperand::CreateReg(X86::x); break;
+ EA_BASES_32BIT
+ EA_BASES_64BIT
+ REGS_XMM
+ REGS_YMM
+#undef ENTRY
+ }
+ } else {
+ indexReg = MCOperand::CreateReg(0);
+ }
+
+ scaleAmount = MCOperand::CreateImm(insn.sibScale);
+ } else {
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ if (insn.eaDisplacement == EA_DISP_NONE) {
+ debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+ return true;
+ }
+ if (insn.mode == MODE_64BIT){
+ pcrel = insn.startLocation +
+ insn.displacementOffset + insn.displacementSize;
+ tryAddingPcLoadReferenceComment(insn.startLocation +
+ insn.displacementOffset,
+ insn.displacement + pcrel, Dis);
+ baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+ }
+ else
+ baseReg = MCOperand::CreateReg(0);
+
+ indexReg = MCOperand::CreateReg(0);
+ break;
+ case EA_BASE_BX_SI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BX_DI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ case EA_BASE_BP_SI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BP_DI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ default:
+ indexReg = MCOperand::CreateReg(0);
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected eaBase");
+ return true;
+ // Here, we will use the fill-ins defined above. However,
+ // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+ // sib and sib64 were handled in the top-level if, so they're only
+ // placeholders to keep the compiler happy.
+#define ENTRY(x) \
+ case EA_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+ ALL_REGS
+#undef ENTRY
+ debug("A R/M memory operand may not be a register; "
+ "the base field must be a base.");
+ return true;
+ }
+ }
+
+ scaleAmount = MCOperand::CreateImm(1);
+ }
+
+ displacement = MCOperand::CreateImm(insn.displacement);
+
+ static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+ 0, // SEG_OVERRIDE_NONE
+ X86::CS,
+ X86::SS,
+ X86::DS,
+ X86::ES,
+ X86::FS,
+ X86::GS
+ };
+
+ segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+
+ mcInst.addOperand(baseReg);
+ mcInst.addOperand(scaleAmount);
+ mcInst.addOperand(indexReg);
+ if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+ insn.startLocation, insn.displacementOffset,
+ insn.displacementSize, mcInst, Dis))
+ mcInst.addOperand(displacement);
+ mcInst.addOperand(segmentReg);
+ return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+/// byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn, const MCDisassembler *Dis) {
+ switch (operand.type) {
+ default:
+ debug("Unexpected type for a R/M operand");
+ return true;
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ case TYPE_Rv:
+ case TYPE_MM:
+ case TYPE_MM32:
+ case TYPE_MM64:
+ case TYPE_XMM:
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ case TYPE_XMM256:
+ case TYPE_DEBUGREG:
+ case TYPE_CONTROLREG:
+ return translateRMRegister(mcInst, insn);
+ case TYPE_M:
+ case TYPE_M8:
+ case TYPE_M16:
+ case TYPE_M32:
+ case TYPE_M64:
+ case TYPE_M128:
+ case TYPE_M256:
+ case TYPE_M512:
+ case TYPE_Mv:
+ case TYPE_M32FP:
+ case TYPE_M64FP:
+ case TYPE_M80FP:
+ case TYPE_M16INT:
+ case TYPE_M32INT:
+ case TYPE_M64INT:
+ case TYPE_M1616:
+ case TYPE_M1632:
+ case TYPE_M1664:
+ case TYPE_LEA:
+ return translateRMMemory(mcInst, insn, Dis);
+ }
+}
+
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param stackPos - The stack position to translate.
+/// @return - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+ uint8_t stackPos) {
+ if (stackPos >= 8) {
+ debug("Invalid FP stack position");
+ return true;
+ }
+
+ mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+ return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction
+/// to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ switch (operand.encoding) {
+ default:
+ debug("Unhandled operand encoding during translation");
+ return true;
+ case ENCODING_REG:
+ translateRegister(mcInst, insn.reg);
+ return false;
+ case ENCODING_RM:
+ return translateRM(mcInst, operand, insn, Dis);
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ debug("Translation of code offsets isn't supported.");
+ return true;
+ case ENCODING_IB:
+ case ENCODING_IW:
+ case ENCODING_ID:
+ case ENCODING_IO:
+ case ENCODING_Iv:
+ case ENCODING_Ia:
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn,
+ Dis);
+ return false;
+ case ENCODING_RB:
+ case ENCODING_RW:
+ case ENCODING_RD:
+ case ENCODING_RO:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_I:
+ return translateFPRegister(mcInst, insn.opcodeModifier);
+ case ENCODING_Rv:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
+ case ENCODING_DUP:
+ return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
+ insn, Dis);
+ }
+}
+
+/// translateInstruction - Translates an internal instruction and all its
+/// operands to an MCInst.
+///
+/// @param mcInst - The MCInst to populate with the instruction's data.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ if (!insn.spec) {
+ debug("Instruction has no specification");
+ return true;
+ }
+
+ mcInst.setOpcode(insn.instructionID);
+
+ int index;
+
+ insn.numImmediatesTranslated = 0;
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ if (insn.operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static MCDisassembler *createX86_32Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
+ T.createMCInstrInfo());
+}
+
+static MCDisassembler *createX86_64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
+ T.createMCInstrInfo());
+}
+
+extern "C" void LLVMInitializeX86Disassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ createX86_32Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+ createX86_64Disassembler);
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 000000000000..b92427a7e91a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,131 @@
+//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets. The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+// These attributes, recorded in enum attributeBits
+// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
+// provides a mapping from bitmasks to contexts, which are represented by
+// enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is. The
+// disassembler distinguishes four kinds of opcodes, which are enumerated in
+// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
+// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
+// a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
+// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+// ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
+// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+// meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+// (X86DisassemblerDecoderCommon.h) and its type is an entry from
+// OperandType (ibid.). The encoding indicates how to read it from the
+// instruction; the type indicates how to interpret the value once it has
+// been read. For example, a register operand could be stored in the R/M
+// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
+// register, for instance). Given this information, the operands can be
+// extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+// and operands into a format understandable by the client - in this case, an
+// MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself. The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+// adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+// invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+// table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+// factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+// responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
+
+#define INSTRUCTION_IDS \
+ uint16_t instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class MemoryObject;
+class raw_ostream;
+
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+/// All each platform class should have to do is subclass the constructor, and
+/// provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+ const MCInstrInfo *MII;
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ /// @param mode - The X86 architecture mode to decode for.
+ X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode,
+ const MCInstrInfo *MII);
+private:
+ ~X86GenericDisassembler();
+public:
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+private:
+ DisassemblerMode fMode;
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 000000000000..e40edba6d689
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1676 @@
+/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <stdarg.h> /* for va_*() */
+#include <stdio.h> /* for vsnprintf() */
+#include <stdlib.h> /* for exit() */
+#include <string.h> /* for memset() */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE 1
+#define FALSE 0
+
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table. Takes a set of
+ * attributes and returns the appropriate decode context.
+ *
+ * @param attrMask - Attributes, from the enumeration attributeBits.
+ * @return - The InstructionContext to use when looking up an
+ * an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint8_t attrMask) {
+ return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ * the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ * contextForAttrs.
+ * @param opcode - The last byte of the instruction's opcode, not counting
+ * ModR/M extensions and escapes.
+ * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode) {
+ const struct ContextDecision* decision = 0;
+
+ switch (type) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
+ }
+
+ return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+ modrm_type != MODRM_ONEENTRY;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ * an instruction.
+ *
+ * @param type - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode - See modRMRequired().
+ * @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec = 0;
+
+ switch (type) {
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
+ case MODRM_ONEENTRY:
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+1];
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITREG:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_SPLITMISC:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_FULL:
+ return modRMTable[dec->instructionIDs+modRM];
+ }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ * that instruction.
+ *
+ * @param uid - The unique ID for the instruction. This should be returned by
+ * decode(); specifierForUID will not check bounds.
+ * @return - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+ return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ * byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn - The instruction with the reader function to use. The cursor
+ * for this instruction is advanced.
+ * @param byte - A pointer to a pre-allocated memory buffer to be populated
+ * with the data read.
+ * @return - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+ int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+
+ if (!ret)
+ ++(insn->readerCursor);
+
+ return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn - See consumeByte().
+ * @param byte - See consumeByte().
+ * @return - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+ return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+ insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((uint64_t)byte << (offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
+ }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ * values of various sizes from the instruction's memory and advance the
+ * cursor appropriately. These readers perform endian conversion.
+ *
+ * @param insn - See consumeByte().
+ * @param ptr - A pointer to a pre-allocated memory of appropriate size to
+ * be populated with the data read.
+ * @return - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ * message, typically without a carriage-return.
+ *
+ * @param insn - The instruction containing the logging function.
+ * @param format - See printf().
+ * @param ... - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
+ char buffer[256];
+ va_list ap;
+
+ if (!insn->dlog)
+ return;
+
+ va_start(ap, format);
+ (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ insn->dlog(insn->dlogArg, buffer);
+
+ return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ * location.
+ *
+ * @param insn - The instruction to be marked as having the prefix.
+ * @param prefix - The prefix that is present.
+ * @param location - The location where the prefix is located (in the address
+ * space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ insn->prefixPresent[prefix] = 1;
+ insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ * present at a given location.
+ *
+ * @param insn - The instruction to be queried.
+ * @param prefix - The prefix.
+ * @param location - The location to query.
+ * @return - Whether the prefix is at that location.
+ */
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ if (insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ * instruction as having them. Also sets the instruction's default operand,
+ * address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn - The instruction whose prefixes are to be read.
+ * @return - 0 if the instruction could be read until the end of the prefix
+ * bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+ BOOL isPrefix = TRUE;
+ BOOL prefixGroups[4] = { FALSE };
+ uint64_t prefixLocation;
+ uint8_t byte = 0;
+
+ BOOL hasAdSize = FALSE;
+ BOOL hasOpSize = FALSE;
+
+ dbgprintf(insn, "readPrefixes()");
+
+ while (isPrefix) {
+ prefixLocation = insn->readerCursor;
+
+ if (consumeByte(insn, &byte))
+ return -1;
+
+ /*
+ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+ * break and let it be disassembled as a normal "instruction".
+ */
+ if (insn->readerCursor - 1 == insn->startLocation
+ && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
+ uint8_t nextByte;
+ if (byte == 0xf0)
+ break;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+ if (consumeByte(insn, &nextByte))
+ return -1;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ unconsumeByte(insn);
+ }
+ if (nextByte != 0x0f && nextByte != 0x90)
+ break;
+ }
+
+ switch (byte) {
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ case 0xf3: /* REP or REPE/REPZ */
+ if (prefixGroups[0])
+ dbgprintf(insn, "Redundant Group 1 prefix");
+ prefixGroups[0] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x2e: /* CS segment override -OR- Branch not taken */
+ case 0x36: /* SS segment override -OR- Branch taken */
+ case 0x3e: /* DS segment override */
+ case 0x26: /* ES segment override */
+ case 0x64: /* FS segment override */
+ case 0x65: /* GS segment override */
+ switch (byte) {
+ case 0x2e:
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36:
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e:
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26:
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64:
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65:
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ default:
+ debug("Unhandled override");
+ return -1;
+ }
+ if (prefixGroups[1])
+ dbgprintf(insn, "Redundant Group 2 prefix");
+ prefixGroups[1] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x66: /* Operand-size override */
+ if (prefixGroups[2])
+ dbgprintf(insn, "Redundant Group 3 prefix");
+ prefixGroups[2] = TRUE;
+ hasOpSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x67: /* Address-size override */
+ if (prefixGroups[3])
+ dbgprintf(insn, "Redundant Group 4 prefix");
+ prefixGroups[3] = TRUE;
+ hasAdSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ default: /* Not a prefix byte */
+ isPrefix = FALSE;
+ break;
+ }
+
+ if (isPrefix)
+ dbgprintf(insn, "Found prefix 0x%hhx", byte);
+ }
+
+ insn->vexSize = 0;
+
+ if (byte == 0xc4) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ }
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ }
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (hasOpSize ? 4 : 2);
+ insn->addressSize = (hasAdSize ? 4 : 2);
+ insn->displacementSize = (hasAdSize ? 4 : 2);
+ insn->immediateSize = (hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 2 : 4);
+ insn->displacementSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else if (insn->rexPrefix) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ * extended or escape opcodes).
+ *
+ * @param insn - The instruction whose opcode is to be read.
+ * @return - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {
+ /* Determine the length of the primary opcode */
+
+ uint8_t current;
+
+ dbgprintf(insn, "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x0f) {
+ dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+
+ insn->twoByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x38) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
+ } else {
+ dbgprintf(insn, "Didn't find a three-byte escape prefix");
+
+ insn->opcodeType = TWOBYTE;
+ }
+ }
+
+ /*
+ * At this point we have consumed the full opcode.
+ * Anything we consume from here on must be unconsumed.
+ */
+
+ insn->opcode = current;
+
+ return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ * the ModR/M byte as appropriate for extended and escape opcodes,
+ * and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ * instruction.
+ * @param insn - The instruction whose ID is to be determined.
+ * @param attrMask - The attribute mask to search.
+ * @return - 0 if the ModR/M could be read when needed or was not
+ * needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+ struct InternalInstruction* insn,
+ uint8_t attrMask) {
+ BOOL hasModRMExtension;
+
+ uint8_t instructionClass;
+
+ instructionClass = contextForAttrs(attrMask);
+
+ hasModRMExtension = modRMRequired(insn->opcodeType,
+ instructionClass,
+ insn->opcode);
+
+ if (hasModRMExtension) {
+ if (readModRM(insn))
+ return -1;
+
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ 0);
+ }
+
+ return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
+ * context for the instruction before doing so.
+ *
+ * @param insn - The instruction whose ID is to be determined.
+ * @return - 0 if the ModR/M could be read when needed or was not needed;
+ * nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
+ uint8_t attrMask;
+ uint16_t instructionID;
+
+ dbgprintf(insn, "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_ADSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+ }
+
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
+ !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
+ * has precedence since there are no L-bit with W-bit entries in the tables.
+ * So if the L-bit isn't significant we should use the W-bit instead.
+ * We only need to do this if the instruction doesn't specify OpSize since
+ * there is a VEX_L_W_OPSIZE table.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithWBit;
+ const struct InstructionSpecifier *specWithWBit;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithWBit,
+ insn,
+ (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithWBit = specifierForUID(instructionIDWithWBit);
+
+ if (instructionID != instructionIDWithWBit) {
+ insn->instructionID = instructionIDWithWBit;
+ insn->spec = specWithWBit;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * The instruction tables make no distinction between instructions that
+ * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ * particular spot (i.e., many MMX operations). In general we're
+ * conservative, but in the specific case where OpSize is present but not
+ * in the right place we check if there's a 16-bit operation.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithOpsize;
+ const char *specName, *specWithOpSizeName;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithOpsize,
+ insn,
+ attrMask | ATTR_OPSIZE)) {
+ /*
+ * ModRM required with OpSize but not present; give up and return version
+ * without OpSize set
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specName = x86DisassemblerGetInstrName(instructionID, miiArg);
+ specWithOpSizeName =
+ x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+
+ if (is16BitEquivalent(specName, specWithOpSizeName)) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = specifierForUID(instructionIDWithOpsize);
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ /*
+ * NOOP shouldn't decode as NOOP if REX.b is set. Instead
+ * it should decode as XCHG %r8, %eax.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = specifierForUID(instructionID);
+
+ /* Borrow opcode from one of the other XCHGar opcodes */
+ insn->opcode = 0x91;
+
+ if (getIDWithAttrMask(&instructionIDWithNewOpcode,
+ insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
+
+ /* Change back */
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(insn->instructionID);
+
+ return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ * instruction.
+ *
+ * @param insn - The instruction whose SIB byte is to be read.
+ * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+ SIBIndex sibIndexBase = 0;
+ SIBBase sibBaseBase = 0;
+ uint8_t index, base;
+
+ dbgprintf(insn, "readSIB()");
+
+ if (insn->consumedSIB)
+ return 0;
+
+ insn->consumedSIB = TRUE;
+
+ switch (insn->addressSize) {
+ case 2:
+ dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ return -1;
+ break;
+ case 4:
+ sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consumeByte(insn, &insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ switch (index) {
+ case 0x4:
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ default:
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+ if (insn->sibIndex == SIB_INDEX_sib ||
+ insn->sibIndex == SIB_INDEX_sib64)
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ }
+
+ switch (scaleFromSIB(insn->sib)) {
+ case 0:
+ insn->sibScale = 1;
+ break;
+ case 1:
+ insn->sibScale = 2;
+ break;
+ case 2:
+ insn->sibScale = 4;
+ break;
+ case 3:
+ insn->sibScale = 8;
+ break;
+ }
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x3:
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
+ }
+ break;
+ default:
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn - The instruction whose displacement is to be read.
+ * @return - 0 if the displacement byte was successfully read; nonzero
+ * otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+
+ dbgprintf(insn, "readDisplacement()");
+
+ if (insn->consumedDisplacement)
+ return 0;
+
+ insn->consumedDisplacement = TRUE;
+ insn->displacementOffset = insn->readerCursor - insn->startLocation;
+
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ insn->consumedDisplacement = FALSE;
+ break;
+ case EA_DISP_8:
+ if (consumeInt8(insn, &d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consumeInt16(insn, &d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consumeInt32(insn, &d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ insn->consumedDisplacement = TRUE;
+ return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ * displacement) for an instruction and interprets it.
+ *
+ * @param insn - The instruction whose addressing information is to be read.
+ * @return - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {
+ uint8_t mod, rm, reg;
+
+ dbgprintf(insn, "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
+ insn->consumedModRM = TRUE;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ /*
+ * This goes by insn->registerSize to pick the correct register, which messes
+ * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ * fixupReg().
+ */
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = MODRM_REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = MODRM_REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = MODRM_REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2:
+ insn->eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 4:
+ case 8:
+ insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = (insn->addressSize == 4 ?
+ EA_BASE_sib : EA_BASE_sib64);
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5:
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = EA_BASE_sib;
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ break;
+ }
+ break;
+ } /* switch (insn->addressSize) */
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
+ static uint8_t name(struct InternalInstruction *insn, \
+ OperandType type, \
+ uint8_t index, \
+ uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ if (insn->rexPrefix && \
+ index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
+ case TYPE_XMM128: \
+ case TYPE_XMM64: \
+ case TYPE_XMM32: \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_MM64: \
+ case TYPE_MM32: \
+ case TYPE_MM: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_MM0 + index; \
+ case TYPE_SEGMENTREG: \
+ if (index > 5) \
+ *valid = 0; \
+ return prefix##_ES + index; \
+ case TYPE_DEBUGREG: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_DR0 + index; \
+ case TYPE_CONTROLREG: \
+ if (index > 8) \
+ *valid = 0; \
+ return prefix##_CR0 + index; \
+ } \
+ }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ * reg or R/M field. If the operand is an XMM operand, for example, an
+ * operand would be XMM0 instead of AX, which readModRM() would otherwise
+ * misinterpret it as.
+ *
+ * @param insn - The instruction containing the operand.
+ * @param type - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t. The target is set to 1 if the
+ * field is valid for the register class; 0 if not.
+ * @return - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ * fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn - See fixup*Value().
+ * @param op - The operand specifier.
+ * @return - 0 if fixup was successful; -1 if the register returned was
+ * invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn,
+ const struct OperandSpecifier *op) {
+ uint8_t valid;
+
+ dbgprintf(insn, "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->reg - insn->regBase,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(insn,
+ (OperandType)op->type,
+ insn->eaBase - insn->eaRegBase,
+ &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an
+ * instruction. Handles AddRegFrm instructions.
+ *
+ * @param insn - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ * ModR/M extension; useful for escape opcodes
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readOpcodeModifier()");
+
+ if (insn->consumedOpcodeModifier)
+ return 0;
+
+ insn->consumedOpcodeModifier = TRUE;
+
+ switch (insn->spec->modifierType) {
+ default:
+ debug("Unknown modifier type.");
+ return -1;
+ case MODIFIER_NONE:
+ debug("No modifier but an operand expects one.");
+ return -1;
+ case MODIFIER_OPCODE:
+ insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+ return 0;
+ case MODIFIER_MODRM:
+ insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+ return 0;
+ }
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an
+ * instruction and interprets it appropriately given the operand width.
+ * Handles AddRegFrm instructions.
+ *
+ * @param insn - See readOpcodeModifier().
+ * @param size - The width (in bytes) of the register being specified.
+ * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ * RAX.
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dbgprintf(insn, "readOpcodeRegister()");
+
+ if (readOpcodeModifier(insn))
+ return -1;
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 4:
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 8:
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @param size - The width (in bytes) of the operand.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ dbgprintf(insn, "readImmediate()");
+
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
+
+ if (size == 0)
+ size = insn->immediateSize;
+ else
+ insn->immediateSize = size;
+ insn->immediateOffset = insn->readerCursor - insn->startLocation;
+
+ switch (size) {
+ case 1:
+ if (consumeByte(insn, &imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consumeUInt16(insn, &imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consumeUInt32(insn, &imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consumeUInt64(insn, &imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+/*
+ * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the vvvv was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ if (insn->mode != MODE_64BIT)
+ insn->vvvv &= 0x7;
+
+ return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ * operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn - The instruction whose operands are to be read and interpreted.
+ * @return - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+ int index;
+ int hasVVVV, needVVVV;
+ int sawRegImm = 0;
+
+ dbgprintf(insn, "readOperands()");
+
+ /* If non-zero vvvv specified, need to make sure one of the operands
+ uses it. */
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ switch (x86OperandSets[insn->spec->operands][index].encoding) {
+ case ENCODING_NONE:
+ break;
+ case ENCODING_REG:
+ case ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
+ return -1;
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ dbgprintf(insn, "We currently don't hande code-offset encodings");
+ return -1;
+ case ENCODING_IB:
+ if (sawRegImm) {
+ /* Saw a register immediate so don't read again and instead split the
+ previous immediate. FIXME: This is a hack. */
+ insn->immediates[insn->numImmediatesConsumed] =
+ insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+ ++insn->numImmediatesConsumed;
+ break;
+ }
+ if (readImmediate(insn, 1))
+ return -1;
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+ return -1;
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+ return -1;
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+ x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
+ sawRegImm = 1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
+ break;
+ case ENCODING_Ia:
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
+ break;
+ case ENCODING_RB:
+ if (readOpcodeRegister(insn, 1))
+ return -1;
+ break;
+ case ENCODING_RW:
+ if (readOpcodeRegister(insn, 2))
+ return -1;
+ break;
+ case ENCODING_RD:
+ if (readOpcodeRegister(insn, 4))
+ return -1;
+ break;
+ case ENCODING_RO:
+ if (readOpcodeRegister(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Rv:
+ if (readOpcodeRegister(insn, 0))
+ return -1;
+ break;
+ case ENCODING_I:
+ if (readOpcodeModifier(insn))
+ return -1;
+ break;
+ case ENCODING_VVVV:
+ needVVVV = 0; /* Mark that we have found a VVVV operand. */
+ if (!hasVVVV)
+ return -1;
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
+ return -1;
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
+ if (needVVVV) return -1;
+
+ return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ * user.
+ *
+ * @param insn - A pointer to the instruction to be populated. Must be
+ * pre-allocated.
+ * @param reader - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ * any internal state.
+ * @param logger - If non-NULL, the function to be used to write log messages
+ * and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ * any internal state.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ * decode the instruction in.
+ * @return - 0 if the instruction's memory could be read; nonzero if
+ * not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ const void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ const void* miiArg,
+ uint64_t startLoc,
+ DisassemblerMode mode) {
+ memset(insn, 0, sizeof(struct InternalInstruction));
+
+ insn->reader = reader;
+ insn->readerArg = readerArg;
+ insn->dlog = logger;
+ insn->dlogArg = loggerArg;
+ insn->startLocation = startLoc;
+ insn->readerCursor = startLoc;
+ insn->mode = mode;
+ insn->numImmediatesConsumed = 0;
+
+ if (readPrefixes(insn) ||
+ readOpcode(insn) ||
+ getID(insn, miiArg) ||
+ insn->instructionID == 0 ||
+ readOperands(insn))
+ return -1;
+
+ insn->operands = &x86OperandSets[insn->spec->operands][0];
+
+ insn->length = insn->readerCursor - insn->startLocation;
+
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
+
+ if (insn->length > 15)
+ dbgprintf(insn, "Instruction exceeds 15-byte limit");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 000000000000..407ead3cafa9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,588 @@
+/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
+
+#define INSTRUCTION_IDS \
+ uint16_t instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
+ ENTRY(DIL)
+
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_MMX \
+ ENTRY(MM0) \
+ ENTRY(MM1) \
+ ENTRY(MM2) \
+ ENTRY(MM3) \
+ ENTRY(MM4) \
+ ENTRY(MM5) \
+ ENTRY(MM6) \
+ ENTRY(MM7)
+
+#define REGS_XMM \
+ ENTRY(XMM0) \
+ ENTRY(XMM1) \
+ ENTRY(XMM2) \
+ ENTRY(XMM3) \
+ ENTRY(XMM4) \
+ ENTRY(XMM5) \
+ ENTRY(XMM6) \
+ ENTRY(XMM7) \
+ ENTRY(XMM8) \
+ ENTRY(XMM9) \
+ ENTRY(XMM10) \
+ ENTRY(XMM11) \
+ ENTRY(XMM12) \
+ ENTRY(XMM13) \
+ ENTRY(XMM14) \
+ ENTRY(XMM15)
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15)
+
+#define REGS_SEGMENT \
+ ENTRY(ES) \
+ ENTRY(CS) \
+ ENTRY(SS) \
+ ENTRY(DS) \
+ ENTRY(FS) \
+ ENTRY(GS)
+
+#define REGS_DEBUG \
+ ENTRY(DR0) \
+ ENTRY(DR1) \
+ ENTRY(DR2) \
+ ENTRY(DR3) \
+ ENTRY(DR4) \
+ ENTRY(DR5) \
+ ENTRY(DR6) \
+ ENTRY(DR7)
+
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
+
+#define ALL_EA_BASES \
+ EA_BASES_16BIT \
+ EA_BASES_32BIT \
+ EA_BASES_64BIT
+
+#define ALL_SIB_BASES \
+ REGS_32BIT \
+ REGS_64BIT
+
+#define ALL_REGS \
+ REGS_8BIT \
+ REGS_16BIT \
+ REGS_32BIT \
+ REGS_64BIT \
+ REGS_MMX \
+ REGS_XMM \
+ REGS_YMM \
+ REGS_SEGMENT \
+ REGS_DEBUG \
+ REGS_CONTROL \
+ ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address
+ * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
+ * distinguish between bases (EA_BASE_*) and registers that just happen to be
+ * referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+ EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ EA_max
+} EABase;
+
+/*
+ * SIBIndex - All possible values of the SIB index field.
+ * Borrows entries from ALL_EA_BASES with the special case that
+ * sib is synonymous with NONE.
+ * Vector SIB: index can be XMM or YMM.
+ */
+typedef enum {
+ SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+ ALL_EA_BASES
+ REGS_XMM
+ REGS_YMM
+#undef ENTRY
+ SIB_INDEX_max
+} SIBIndex;
+
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+ SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+ ALL_SIB_BASES
+#undef ENTRY
+ SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ * computations.
+ */
+typedef enum {
+ EA_DISP_NONE,
+ EA_DISP_8,
+ EA_DISP_16,
+ EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) MODRM_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ MODRM_REG_max
+} Reg;
+
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+ SEG_OVERRIDE_NONE,
+ SEG_OVERRIDE_CS,
+ SEG_OVERRIDE_SS,
+ SEG_OVERRIDE_DS,
+ SEG_OVERRIDE_ES,
+ SEG_OVERRIDE_FS,
+ SEG_OVERRIDE_GS,
+ SEG_OVERRIDE_max
+} SegmentOverride;
+
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ * the decoder. Reads a single byte from the instruction's address space.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param byte - A pointer to a single byte in memory that should be set to
+ * contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ * be read from.
+ * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ * get debugging output from the decoder.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param log - A string that contains the message. Will be reused after
+ * the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+ /* Reader interface (C) */
+ byteReader_t reader;
+ /* Opaque value passed to the reader */
+ const void* readerArg;
+ /* The address of the next byte to read via the reader */
+ uint64_t readerCursor;
+
+ /* Logger interface (C) */
+ dlog_t dlog;
+ /* Opaque value passed to the logger */
+ void* dlogArg;
+
+ /* General instruction information */
+
+ /* The mode to disassemble for (64-bit, protected, real) */
+ DisassemblerMode mode;
+ /* The start of the instruction, usable with the reader */
+ uint64_t startLocation;
+ /* The length of the instruction, in bytes */
+ size_t length;
+
+ /* Prefix state */
+
+ /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+ uint8_t prefixPresent[0x100];
+ /* contains the location (for use with the reader) of the prefix byte */
+ uint64_t prefixLocations[0x100];
+ /* The value of the VEX prefix, if present */
+ uint8_t vexPrefix[3];
+ /* The length of the VEX prefix (0 if not present) */
+ uint8_t vexSize;
+ /* The value of the REX prefix, if present */
+ uint8_t rexPrefix;
+ /* The location where a mandatory prefix would have to be (i.e., right before
+ the opcode, or right before the REX prefix if one is present) */
+ uint64_t necessaryPrefixLocation;
+ /* The segment override type */
+ SegmentOverride segmentOverride;
+
+ /* Sizes of various critical pieces of data, in bytes */
+ uint8_t registerSize;
+ uint8_t addressSize;
+ uint8_t displacementSize;
+ uint8_t immediateSize;
+
+ /* Offsets from the start of the instruction to the pieces of data, which is
+ needed to find relocation entries for adding symbolic operands */
+ uint8_t displacementOffset;
+ uint8_t immediateOffset;
+
+ /* opcode state */
+
+ /* The value of the two-byte escape prefix (usually 0x0f) */
+ uint8_t twoByteEscape;
+ /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+ uint8_t threeByteEscape;
+ /* The last byte of the opcode, not counting any ModR/M extension */
+ uint8_t opcode;
+ /* The ModR/M byte of the instruction, if it is an opcode extension */
+ uint8_t modRMExtension;
+
+ /* decode state */
+
+ /* The type of opcode, used for indexing into the array of decode tables */
+ OpcodeType opcodeType;
+ /* The instruction ID, extracted from the decode table */
+ uint16_t instructionID;
+ /* The specifier for the instruction, from the instruction info table */
+ const struct InstructionSpecifier *spec;
+
+ /* state for additional bytes, consumed during operand decode. Pattern:
+ consumed___ indicates that the byte was already consumed and does not
+ need to be consumed again */
+
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
+ instructions */
+ Reg vvvv;
+
+ /* The ModR/M byte, which contains most register operands and some portion of
+ all memory operands */
+ BOOL consumedModRM;
+ uint8_t modRM;
+
+ /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+ BOOL consumedSIB;
+ uint8_t sib;
+
+ /* The displacement, used for memory operands */
+ BOOL consumedDisplacement;
+ int32_t displacement;
+
+ /* Immediates. There can be two in some cases */
+ uint8_t numImmediatesConsumed;
+ uint8_t numImmediatesTranslated;
+ uint64_t immediates[2];
+
+ /* A register or immediate operand encoded into the opcode */
+ BOOL consumedOpcodeModifier;
+ uint8_t opcodeModifier;
+ Reg opcodeRegister;
+
+ /* Portions of the ModR/M byte */
+
+ /* These fields determine the allowable values for the ModR/M fields, which
+ depend on operand and address widths */
+ EABase eaBaseBase;
+ EABase eaRegBase;
+ Reg regBase;
+
+ /* The Mod and R/M fields can encode a base for an effective address, or a
+ register. These are separated into two fields here */
+ EABase eaBase;
+ EADisplacement eaDisplacement;
+ /* The reg field always encodes a register */
+ Reg reg;
+
+ /* SIB state */
+ SIBIndex sibIndex;
+ uint8_t sibScale;
+ SIBBase sibBase;
+
+ const struct OperandSpecifier *operands;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ * a buffer provided by the consumer.
+ * @param insn - The buffer to store the instruction in. Allocated by the
+ * consumer.
+ * @param reader - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ * specific to the consumer. May be NULL.
+ * @param logger - The dlog_t to be used in printing status messages from the
+ * disassembler. May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ * specific to the logger. May be NULL.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ const void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ const void* miiArg,
+ uint64_t startLoc,
+ DisassemblerMode mode);
+
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ * debugs()
+ * @param file - The name of the file printing the debug message.
+ * @param line - The line number that printed the debug message.
+ * @param s - The message to print.
+ */
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s);
+
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 000000000000..23dfe4b5b5f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,398 @@
+/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ * generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM x86DisassemblerContexts
+#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
+
+#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR "x86DisassemblerContexts"
+#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly. Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS \
+ ENUM_ENTRY(ATTR_NONE, 0x00) \
+ ENUM_ENTRY(ATTR_64BIT, 0x01) \
+ ENUM_ENTRY(ATTR_XS, 0x02) \
+ ENUM_ENTRY(ATTR_XD, 0x04) \
+ ENUM_ENTRY(ATTR_REXW, 0x08) \
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+ ENUM_ENTRY(ATTR_ADSIZE, 0x20) \
+ ENUM_ENTRY(ATTR_VEX, 0x40) \
+ ENUM_ENTRY(ATTR_VEXL, 0x80)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+ ATTRIBUTE_BITS
+ ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode. Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/* Class name Rank Rationale for rank assignment */
+#define INSTRUCTION_CONTEXTS \
+ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
+ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
+ "64-bit mode but no more") \
+ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
+ "change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
+ "secondary") \
+ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
+ "opcode") \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
+ "IC_64BIT_REXW_XS") \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
+ "else because this changes most " \
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
+
+
+#define ENUM_ENTRY(n, r, d) n,
+typedef enum {
+ INSTRUCTION_CONTEXTS
+ IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+ ONEBYTE = 0,
+ TWOBYTE = 1,
+ THREEBYTE_38 = 2,
+ THREEBYTE_3A = 3,
+ THREEBYTE_A6 = 4,
+ THREEBYTE_A7 = 5
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table. After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode. Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ * instruction is the same.
+ * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ * corresponds to one instruction; otherwise, it corresponds to
+ * a different instruction.
+ * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
+ * divided by 8 is used to select instruction; otherwise, each
+ * value of the ModR/M byte could correspond to a different
+ * instruction.
+ * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
+ corresponds to instructions that use reg field as opcode
+ * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+ * to a different instruction.
+ */
+
+#define MODRMTYPES \
+ ENUM_ENTRY(MODRM_ONEENTRY) \
+ ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_SPLITMISC) \
+ ENUM_ENTRY(MODRM_SPLITREG) \
+ ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODRMTYPES
+ MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * instruction each possible value of the ModR/M byte corresponds to. Once
+ * this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+ uint8_t modrm_type;
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ * given a particular opcode.
+ */
+struct OpcodeDecision {
+ struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ * a particular context (set of attributes). Since there are many possible
+ * contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ * applies given a specific set of attributes. Hence there are only IC_max
+ * entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+ struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/*
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS \
+ ENUM_ENTRY(ENCODING_NONE, "") \
+ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
+ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
+ ENUM_ENTRY(ENCODING_CW, "2-byte") \
+ ENUM_ENTRY(ENCODING_CD, "4-byte") \
+ ENUM_ENTRY(ENCODING_CP, "6-byte") \
+ ENUM_ENTRY(ENCODING_CO, "8-byte") \
+ ENUM_ENTRY(ENCODING_CT, "10-byte") \
+ ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
+ ENUM_ENTRY(ENCODING_IW, "2-byte") \
+ ENUM_ENTRY(ENCODING_ID, "4-byte") \
+ ENUM_ENTRY(ENCODING_IO, "8-byte") \
+ ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
+ "the opcode byte") \
+ ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
+ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
+ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
+ ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
+ "opcode byte") \
+ \
+ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
+ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
+ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
+ "opcode byte") \
+ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
+ "in type")
+
+#define ENUM_ENTRY(n, d) n,
+ typedef enum {
+ ENCODINGS
+ ENCODING_max
+ } OperandEncoding;
+#undef ENUM_ENTRY
+
+/*
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES \
+ ENUM_ENTRY(TYPE_NONE, "") \
+ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
+ ENUM_ENTRY(TYPE_REL16, "2-byte") \
+ ENUM_ENTRY(TYPE_REL32, "4-byte") \
+ ENUM_ENTRY(TYPE_REL64, "8-byte") \
+ ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
+ ENUM_ENTRY(TYPE_R16, "2-byte") \
+ ENUM_ENTRY(TYPE_R32, "4-byte") \
+ ENUM_ENTRY(TYPE_R64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
+ ENUM_ENTRY(TYPE_IMM16, "2-byte") \
+ ENUM_ENTRY(TYPE_IMM32, "4-byte") \
+ ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
+ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
+ ENUM_ENTRY(TYPE_RM16, "2-byte") \
+ ENUM_ENTRY(TYPE_RM32, "4-byte") \
+ ENUM_ENTRY(TYPE_RM64, "8-byte") \
+ ENUM_ENTRY(TYPE_M, "Memory operand") \
+ ENUM_ENTRY(TYPE_M8, "1-byte") \
+ ENUM_ENTRY(TYPE_M16, "2-byte") \
+ ENUM_ENTRY(TYPE_M32, "4-byte") \
+ ENUM_ENTRY(TYPE_M64, "8-byte") \
+ ENUM_ENTRY(TYPE_LEA, "Effective address") \
+ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
+ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
+ "base)") \
+ ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
+ ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
+ ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
+ ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
+ "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
+ ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
+ ENUM_ENTRY(TYPE_M64FP, "64-bit") \
+ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
+ "floating-point instructions") \
+ ENUM_ENTRY(TYPE_M32INT, "4-byte") \
+ ENUM_ENTRY(TYPE_M64INT, "8-byte") \
+ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
+ ENUM_ENTRY(TYPE_MM, "MMX register operand") \
+ ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
+ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
+ ENUM_ENTRY(TYPE_XMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
+ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
+ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
+ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ \
+ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
+ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
+ ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
+ ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
+ ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
+ ENUM_ENTRY(TYPE_DUP1, "operand 1") \
+ ENUM_ENTRY(TYPE_DUP2, "operand 2") \
+ ENUM_ENTRY(TYPE_DUP3, "operand 3") \
+ ENUM_ENTRY(TYPE_DUP4, "operand 4") \
+ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,
+typedef enum {
+ TYPES
+ TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/*
+ * OperandSpecifier - The specification for how to extract and interpret one
+ * operand.
+ */
+struct OperandSpecifier {
+ uint8_t encoding;
+ uint8_t type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found. Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES \
+ ENUM_ENTRY(MODIFIER_NONE) \
+ ENUM_ENTRY(MODIFIER_OPCODE) \
+ ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODIFIER_TYPES
+ MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+ uint8_t modifierType;
+ uint8_t modifierBase;
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+ MODE_16BIT,
+ MODE_32BIT,
+ MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..e357710b20eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,218 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ OS << markup("<reg:")
+ << '%' << getRegisterName(RegNo)
+ << markup(">");
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
+
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ }
+}
+
+void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid avxcc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
+ }
+}
+
+/// printPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls). These
+/// print slightly differently than normal immediates. For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << formatImm(Op.getImm());
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ } else if (Op.isImm()) {
+ // Print X86 immediates as signed values.
+ O << markup("<imm:")
+ << '$' << formatImm((int64_t)Op.getImm())
+ << markup(">");
+
+ if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+ *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
+
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << markup("<imm:")
+ << '$' << *Op.getExpr()
+ << markup(">");
+ }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ O << markup("<mem:");
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ if (DispSpec.isImm()) {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << formatImm(DispVal);
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op, O);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1) {
+ O << ','
+ << markup("<imm:")
+ << ScaleVal // never printed in hex.
+ << markup(">");
+ }
+ }
+ O << ')';
+ }
+
+ O << markup(">");
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..8e09183dccc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,87 @@
+//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+ X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..0f6eeb19bccd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,511 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<int, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ case X86::VINSERTPSrr:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ case X86::VMOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ case X86::VMOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PALIGNR128rr:
+ case X86::VPALIGNR128rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PALIGNR128rm:
+ case X86::VPALIGNR128rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPALIGNR256rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPALIGNR256rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ case X86::VPSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ case X86::VPSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(MVT::v4i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFDYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(MVT::v8i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+
+ case X86::PSHUFHWri:
+ case X86::VPSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ case X86::VPSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFHWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFHWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ case X86::VPSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ case X86::VPSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFLWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFLWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ case X86::VPUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ case X86::VPUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ case X86::VPUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ case X86::VPUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ case X86::VPUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ case X86::VPUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ case X86::VPUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ case X86::VPUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ case X86::VPUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ case X86::VPUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ case X86::VPUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ case X86::VPUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ case X86::VPUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ case X86::VPUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ case X86::VPUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ case X86::VPUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ case X86::VSHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPDrmi:
+ case X86::VSHUFPDrmi:
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDYrmi:
+ DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ case X86::VSHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ case X86::VSHUFPSrmi:
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSYrmi:
+ DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ case X86::VUNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ case X86::VUNPCKHPDrm:
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDYrm:
+ DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ case X86::VUNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ case X86::VUNPCKHPSrm:
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSYrm:
+ DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSmi:
+ DecodePSHUFMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSYmi:
+ DecodePSHUFMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDmi:
+ DecodePSHUFMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDYmi:
+ DecodePSHUFMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERM2F128rr:
+ case X86::VPERM2I128rr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPERM2F128rm:
+ case X86::VPERM2I128rm:
+ // For instruction comments purpose, assume the 256-bit vector is v4i64.
+ DecodeVPERM2X128Mask(MVT::v4i64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMQYri:
+ case X86::VPERMPDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMQYmi:
+ case X86::VPERMPDYmi:
+ DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= (int)e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 000000000000..13fdf9af8c98
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..141f4a4dd856
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,208 @@
+//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as Intel-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
+ printInstruction(MI, OS);
+
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ }
+}
+
+void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+ switch (Imm) {
+ default: llvm_unreachable("Invalid avxcc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
+ }
+}
+
+/// printPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+ for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+ O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ PrintRegName(O, getRegisterName(Op.getReg()));
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, O);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2, O);
+ NeedPlus = true;
+ }
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+
+ O << ']';
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..bb769eb52e4f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,96 @@
+//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to Intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+ X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "OPAQUE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
new file mode 100644
index 000000000000..598ddee56d21
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -0,0 +1,476 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_SecRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_SecRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_global_offset_table:
+ case FK_SecRel_4:
+ case FK_Data_4: return 2;
+ case FK_PCRel_8:
+ case FK_SecRel_8:
+ case FK_Data_8: return 3;
+ }
+}
+
+namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
+ bool HasRelocationAddend, bool foobar)
+ : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
+};
+
+class X86AsmBackend : public MCAsmBackend {
+ StringRef CPU;
+public:
+ X86AsmBackend(const Target &T, StringRef _CPU)
+ : MCAsmBackend(), CPU(_CPU) {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ // Check that uppper bits are either all zeros or all ones.
+ // Specifically ignore overflow/underflow as long as the leakage is
+ // limited to the lower bits. This is to remain compatible with
+ // other assemblers.
+ assert(isIntN(Size * 8 + 1, Value) &&
+ "Value does not fit in the Fixup field");
+
+ for (unsigned i = 0; i != Size; ++i)
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ case X86::JAE_1: return X86::JAE_4;
+ case X86::JA_1: return X86::JA_4;
+ case X86::JBE_1: return X86::JBE_4;
+ case X86::JB_1: return X86::JB_4;
+ case X86::JE_1: return X86::JE_4;
+ case X86::JGE_1: return X86::JGE_4;
+ case X86::JG_1: return X86::JG_4;
+ case X86::JLE_1: return X86::JLE_4;
+ case X86::JL_1: return X86::JL_4;
+ case X86::JMP_1: return X86::JMP_4;
+ case X86::JNE_1: return X86::JNE_4;
+ case X86::JNO_1: return X86::JNO_4;
+ case X86::JNP_1: return X86::JNP_4;
+ case X86::JNS_1: return X86::JNS_4;
+ case X86::JO_1: return X86::JO_4;
+ case X86::JP_1: return X86::JP_4;
+ case X86::JS_1: return X86::JS_4;
+ }
+}
+
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ // IMUL
+ case X86::IMUL16rri8: return X86::IMUL16rri;
+ case X86::IMUL16rmi8: return X86::IMUL16rmi;
+ case X86::IMUL32rri8: return X86::IMUL32rri;
+ case X86::IMUL32rmi8: return X86::IMUL32rmi;
+ case X86::IMUL64rri8: return X86::IMUL64rri32;
+ case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+ // AND
+ case X86::AND16ri8: return X86::AND16ri;
+ case X86::AND16mi8: return X86::AND16mi;
+ case X86::AND32ri8: return X86::AND32ri;
+ case X86::AND32mi8: return X86::AND32mi;
+ case X86::AND64ri8: return X86::AND64ri32;
+ case X86::AND64mi8: return X86::AND64mi32;
+
+ // OR
+ case X86::OR16ri8: return X86::OR16ri;
+ case X86::OR16mi8: return X86::OR16mi;
+ case X86::OR32ri8: return X86::OR32ri;
+ case X86::OR32mi8: return X86::OR32mi;
+ case X86::OR64ri8: return X86::OR64ri32;
+ case X86::OR64mi8: return X86::OR64mi32;
+
+ // XOR
+ case X86::XOR16ri8: return X86::XOR16ri;
+ case X86::XOR16mi8: return X86::XOR16mi;
+ case X86::XOR32ri8: return X86::XOR32ri;
+ case X86::XOR32mi8: return X86::XOR32mi;
+ case X86::XOR64ri8: return X86::XOR64ri32;
+ case X86::XOR64mi8: return X86::XOR64mi32;
+
+ // ADD
+ case X86::ADD16ri8: return X86::ADD16ri;
+ case X86::ADD16mi8: return X86::ADD16mi;
+ case X86::ADD32ri8: return X86::ADD32ri;
+ case X86::ADD32mi8: return X86::ADD32mi;
+ case X86::ADD64ri8: return X86::ADD64ri32;
+ case X86::ADD64mi8: return X86::ADD64mi32;
+
+ // SUB
+ case X86::SUB16ri8: return X86::SUB16ri;
+ case X86::SUB16mi8: return X86::SUB16mi;
+ case X86::SUB32ri8: return X86::SUB32ri;
+ case X86::SUB32mi8: return X86::SUB32mi;
+ case X86::SUB64ri8: return X86::SUB64ri32;
+ case X86::SUB64mi8: return X86::SUB64mi32;
+
+ // CMP
+ case X86::CMP16ri8: return X86::CMP16ri;
+ case X86::CMP16mi8: return X86::CMP16mi;
+ case X86::CMP32ri8: return X86::CMP32ri;
+ case X86::CMP32mi8: return X86::CMP32mi;
+ case X86::CMP64ri8: return X86::CMP64ri32;
+ case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
+ case X86::PUSHi16: return X86::PUSHi32;
+ case X86::PUSH64i8: return X86::PUSH64i32;
+ case X86::PUSH64i16: return X86::PUSH64i32;
+ }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ unsigned R = getRelaxedOpcodeArith(Op);
+ if (R != Op)
+ return R;
+ return getRelaxedOpcodeBranch(Op);
+}
+
+bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ // Branches can always be relaxed.
+ if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+
+ if (MCDisableArithRelaxation)
+ return false;
+
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+
+ // Check if it has an expression and is not RIP relative.
+ bool hasExp = false;
+ bool hasRIP = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &Op = Inst.getOperand(i);
+ if (Op.isExpr())
+ hasExp = true;
+
+ if (Op.isReg() && Op.getReg() == X86::RIP)
+ hasRIP = true;
+ }
+
+ // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+ // how we do relaxations?
+ return hasExp && !hasRIP;
+}
+
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
+}
+
+/// \brief Write a sequence of optimal nops to the output, covering \p Count
+/// bytes.
+/// \return - true on success, false on failure
+bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ static const uint8_t Nops[10][10] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ };
+
+ // This CPU doesnt support long nops. If needed add more.
+ // FIXME: Can we get this from the subtarget somehow?
+ if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
+ CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
+ for (uint64_t i = 0; i < Count; ++i)
+ OW->Write8(0x90);
+ return true;
+ }
+
+ // 15 is the longest single nop instruction. Emit as many 15-byte nops as
+ // needed, then emit a nop of the remaining length.
+ do {
+ const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+ const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
+ for (uint8_t i = 0; i < Prefixes; i++)
+ OW->Write8(0x66);
+ const uint8_t Rest = ThisNopLength - Prefixes;
+ for (uint8_t i = 0; i < Rest; i++)
+ OW->Write8(Nops[Rest - 1][i]);
+ Count -= ThisNopLength;
+ } while (Count != 0);
+
+ return true;
+}
+
+/* *** */
+
+namespace {
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+ uint8_t OSABI;
+ ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU)
+ : X86AsmBackend(T, CPU), OSABI(_OSABI) {
+ HasReliableSymbolDifference = true;
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+ return ES.getFlags() & ELF::SHF_MERGE;
+ }
+};
+
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
+ }
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU)
+ : X86AsmBackend(T, CPU)
+ , Is64Bit(is64Bit) {
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86WinCOFFObjectWriter(OS, Is64Bit);
+ }
+};
+
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+ DarwinX86AsmBackend(const Target &T, StringRef CPU)
+ : X86AsmBackend(T, CPU) { }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_32AsmBackend(const Target &T, StringRef CPU)
+ : DarwinX86AsmBackend(T, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
+ }
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_64AsmBackend(const Target &T, StringRef CPU)
+ : DarwinX86AsmBackend(T, CPU) {
+ HasReliableSymbolDifference = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ // Temporary labels in the string literals sections require symbols. The
+ // issue is that the x86_64 relocation format does not allow symbol +
+ // offset, and so the linker does not have enough information to resolve the
+ // access to the appropriate atom unless an external relocation is used. For
+ // non-cstring sections, we expect the compiler to use a non-temporary label
+ // for anything that could have an addend pointing outside the symbol.
+ //
+ // See <rdar://problem/4765733>.
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+ }
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
+};
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T, CPU);
+
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
+ return new WindowsX86AsmBackend(T, false, CPU);
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFX86_32AsmBackend(T, OSABI, CPU);
+}
+
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T, CPU);
+
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
+ return new WindowsX86AsmBackend(T, true, CPU);
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFX86_64AsmBackend(T, OSABI, CPU);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
new file mode 100644
index 000000000000..36695600707e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -0,0 +1,621 @@
+//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the X86 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86BASEINFO_H
+#define X86BASEINFO_H
+
+#include "X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+} // end namespace X86;
+
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index structure that contains
+ /// the module number and variable offset for the symbol. Used in the
+ /// general dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+
+ /// MO_TLSLD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to
+ /// __tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the x86-64 local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLD
+ MO_TLSLD,
+
+ /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to
+ /// ___tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the IA32 local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLDM
+ MO_TLSLDM,
+
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the thread-pointer offset for the
+ /// symbol. Used in the x86-64 initial exec TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// the absolute address of the GOT entry with the negative thread-pointer
+ /// offset for the symbol. Used in the non-PIC IA32 initial exec TLS access
+ /// model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// the thread-pointer offset for the symbol. Used in the x86-64 local
+ /// exec TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+
+ /// MO_DTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS offset of the symbol. Used
+ /// in the local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @DTPOFF
+ MO_DTPOFF,
+
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// the negative thread-pointer offset for the symbol. Used in the IA32
+ /// local exec TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+
+ /// MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the negative thread-pointer offset for
+ /// the symbol. Used in the PIC IA32 initial exec TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTNTPOFF
+ MO_GOTNTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB,
+
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE,
+
+ /// MO_SECREL - On a symbol operand this indicates that the immediate is
+ /// the offset from beginning of section.
+ ///
+ /// This is the TLS offset for the COFF/Windows TLS mechanism.
+ MO_SECREL
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ //// MRM_XX - A mod/rm byte of exactly 0xXX.
+ MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
+ MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
+ MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
+ MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
+ MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
+ MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 44,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0x1F << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
+ // T8XD - Prefix before and after 0x0F. Combination of T8 and XD.
+ T8XD = 17 << Op0Shift,
+
+ // T8XS - Prefix before and after 0x0F. Combination of T8 and XS.
+ T8XS = 18 << Op0Shift,
+
+ // TAXD - Prefix before and after 0x0F. Combination of TA and XD.
+ TAXD = 19 << Op0Shift,
+
+ // XOP8 - Prefix to include use of imm byte.
+ XOP8 = 20 << Op0Shift,
+
+ // XOP9 - Prefix to exclude use of imm byte.
+ XOP9 = 21 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = Op0Shift + 5,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 3,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = LOCKShift + 1,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
+
+ OpcodeShift = SSEDomainShift + 2,
+
+ //===------------------------------------------------------------------===//
+ /// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
+ VEX = 1U << 0,
+
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
+ VEX_W = 1U << 1,
+
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1U << 2,
+
+ /// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
+ /// operand 3 with VEX.vvvv.
+ VEX_4VOp3 = 1U << 3,
+
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
+ VEX_I8IMM = 1U << 4,
+
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 5,
+
+ // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
+ // prefix. Usually used for scalar instructions. Needed by disassembler.
+ VEX_LIG = 1U << 6,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 7,
+
+ /// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
+ /// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
+ MemOp4 = 1U << 8,
+
+ /// XOP - Opcode prefix used by XOP instructions.
+ XOP = 1U << 9
+
+ };
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+ }
+
+ inline bool hasImm(uint64_t TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
+ /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+ /// of the specified instruction.
+ inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: llvm_unreachable("Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ inline unsigned isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: llvm_unreachable("Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ // FIXME: Remove this form.
+ return -1;
+ default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+ if (HasMemOp4)
+ ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
+ }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 0;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
+ return FirstMemOp;
+ }
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+ case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_DE: case X86II::MRM_DF:
+ return -1;
+ }
+ }
+
+ /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+ /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ switch (RegNo) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
+ return true;
+ }
+ return false;
+ }
+
+ inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
new file mode 100644
index 000000000000..de80dd835e99
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -0,0 +1,227 @@
+//===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
+
+ virtual ~X86ELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ };
+}
+
+X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
+ uint16_t EMachine)
+ : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
+ // Only i386 uses Rel instead of RelA.
+ /*HasRelocationAddend*/ EMachine != ELF::EM_386) {}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ unsigned Type;
+ if (getEMachine() == ELF::EM_X86_64) {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+ case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+ case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
+ case FK_PCRel_8:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC64;
+ break;
+ case X86::reloc_signed_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_riprel_4byte:
+ case FK_PCRel_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_X86_64_PLT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_X86_64_GOTTPOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_X86_64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Type = ELF::R_X86_64_TLSLD;
+ break;
+ }
+ break;
+ case FK_PCRel_2:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC16;
+ break;
+ case FK_PCRel_1:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC8;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_signed_4byte:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_32S;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF32;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_X86_64_DTPOFF32;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else if (getEMachine() == ELF::EM_386) {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_386_PLT32;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+ // instead?
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_32;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_386_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_386_GOTOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_386_TLS_GD;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_386_TLS_LE_32;
+ break;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ Type = ELF::R_386_TLS_IE;
+ break;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ Type = ELF::R_386_TLS_LE;
+ break;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ Type = ELF::R_386_TLS_GOTIE;
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ Type = ELF::R_386_TLS_LDM;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_386_TLS_LDO_32;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_386_TLS_IE_32;
+ break;
+ }
+ break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ } else
+ llvm_unreachable("Unsupported ELF machine type.");
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
+ bool IsELF64,
+ uint8_t OSABI,
+ uint16_t EMachine) {
+ MCELFObjectTargetWriter *MOTW =
+ new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
new file mode 100644
index 000000000000..f2e34cbe0d65
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -0,0 +1,33 @@
+//===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+ reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
+ reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
+ // this will be sign extended at
+ // runtime.
+ reloc_global_offset_table, // 32-bit, relative to the start
+ // of the instruction. Used only
+ // for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
new file mode 100644
index 000000000000..7815ae98c9bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -0,0 +1,158 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+static cl::opt<bool>
+MarkedJTDataRegions("mark-data-regions", cl::init(false),
+ cl::desc("Mark code section jump table data regions."),
+ cl::Hidden);
+
+void X86MCAsmInfoDarwin::anchor() { }
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ if (is64Bit)
+ PointerSize = CalleeSaveStackSlotSize = 8;
+
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+
+ // Use ## as a comment string so that .s files generated by llvm can go
+ // through the GCC preprocessor without causing an error. This is needed
+ // because "clang foo.s" runs the C preprocessor, which is usually reserved
+ // for .S files on other systems. Perhaps this is because the file system
+ // wasn't always case preserving or something.
+ CommentString = "##";
+ PCSymbol = ".";
+
+ SupportsDebugInformation = true;
+ DwarfUsesInlineInfoSection = true;
+ UseDataRegionDirectives = MarkedJTDataRegions;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
+}
+
+void X86ELFMCAsmInfo::anchor() { }
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ bool isX32 = T.getEnvironment() == Triple::GNUX32;
+
+ // For ELF, x86-64 pointer size depends on the ABI.
+ // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
+ // with the x32 ABI, pointer size remains the default 4.
+ PointerSize = (is64Bit && !isX32) ? 8 : 4;
+
+ // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
+ CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
+
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
+ // into two .words.
+ if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
+ T.getArch() == Triple::x86)
+ Data64bitsDirective = 0;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
+
+void X86MCAsmInfoMicrosoft::anchor() { }
+
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
+
+void X86MCAsmInfoGNUCOFF::anchor() { }
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
new file mode 100644
index 000000000000..b6b70fd3e855
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -0,0 +1,58 @@
+//===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+ class Triple;
+
+ class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+ };
+
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
+ class X86ELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+ };
+
+ class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ virtual void anchor();
+ public:
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+ };
+
+ class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ virtual void anchor();
+ public:
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
+ };
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
new file mode 100644
index 000000000000..776cee1e35cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -0,0 +1,1233 @@
+//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+ X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+public:
+ X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti), Ctx(ctx) {
+ }
+
+ ~X86MCCodeEmitter() {}
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
+ bool is32BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+ }
+
+ unsigned GetX86RegNum(const MCOperand &MO) const {
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()) & 0x7;
+ }
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) const {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if (X86II::isX86_64ExtendedReg(SrcReg))
+ SrcRegNum |= 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitImmediate(const MCOperand &Disp, SMLoc Loc,
+ unsigned ImmSize, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset = 0) const;
+
+ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+ }
+
+ void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+ unsigned &CurByte, raw_ostream &OS) const {
+ EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+ }
+
+ void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ unsigned &CurByte, raw_ostream &OS) const {
+ // SIB byte is in the same format as the ModRMByte.
+ EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+ }
+
+
+ void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+
+ void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const;
+
+ void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
+ unsigned Size = X86II::getSizeOfImm(TSFlags);
+ bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+ return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
+/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+enum GlobalOffsetTableExprKind {
+ GOT_None,
+ GOT_Normal,
+ GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ const MCExpr *RHS = 0;
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ RHS = BE->getRHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return GOT_None;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+ return GOT_None;
+ if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+ return GOT_SymDiff;
+ return GOT_Normal;
+}
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+ const MCExpr *Expr = NULL;
+ if (DispOp.isImm()) {
+ // If this is a simple integer displacement that doesn't require a
+ // relocation, emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
+ }
+
+ // If we have an immoffset, add it to the expression.
+ if ((FixupKind == FK_Data_4 ||
+ FixupKind == FK_Data_8 ||
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+ GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+ if (Kind != GOT_None) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ if (Kind == GOT_Normal)
+ ImmOffset = CurByte;
+ } else if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+ FixupKind = MCFixupKind(FK_SecRel_4);
+ }
+ }
+ }
+
+ // If the fixup is pc-relative, we need to bias the value to be relative to
+ // the start of the field, not the end of the field.
+ if (FixupKind == FK_PCRel_4 ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
+ ImmOffset -= 4;
+ if (FixupKind == FK_PCRel_2)
+ ImmOffset -= 2;
+ if (FixupKind == FK_PCRel_1)
+ ImmOffset -= 1;
+
+ if (ImmOffset)
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Ctx);
+
+ // Emit a symbolic constant as a fixup and 4 zeros.
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind, Loc));
+ EmitConstant(0, Size, CurByte, OS);
+}
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte,
+ raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const{
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
+ assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+ unsigned FixupKind = X86::reloc_riprel_4byte;
+
+ // movq loads are handled with a special relocation form which allows the
+ // linker to eliminate some loads for GOT references which end up in the
+ // same linkage unit.
+ if (MI.getOpcode() == X86::MOV64rm)
+ FixupKind = X86::reloc_riprel_4byte_movq_load;
+
+ // rip-relative addressing is actually relative to the *next* instruction.
+ // Since an immediate can follow the mod/rm byte for an instruction, this
+ // means that we need to bias the immediate field of the instruction with
+ // the size of the immediate field. If we have this case, add it into the
+ // expression to emit.
+ int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
+ CurByte, OS, Fixups, -ImmSize);
+ return;
+ }
+
+ unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
+ // Determine whether a SIB byte is needed.
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!is64BitMode() || BaseReg != 0)) {
+
+ if (BaseReg == 0) { // [disp32] in X86-32 mode
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ EmitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+ EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (Disp.isImm() && isDisp8(Disp.getImm())) {
+ EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
+ return;
+ }
+
+ // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (!Disp.isImm()) {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (Disp.getImm() == 0 &&
+ // Base reg can't be anything that ends up with '5' as the base
+ // reg, it is the magic [*] nomenclature that indicates no base.
+ BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ } else if (isDisp8(Disp.getImm())) {
+ // Emit the disp8 encoding.
+ EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ } else {
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8)
+ EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ else if (ForceDisp32 || Disp.getImm() != 0)
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+ CurByte, OS, Fixups);
+}
+
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: llvm_unreachable("Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::T8XS: // F3 0F 38
+ VEX_PP = 0x2;
+ VEX_5M = 0x2;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ VEX_PP = 0x3;
+ VEX_5M = 0x3;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem:
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp++).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ if (HasVEX_4VOp3)
+ // Instruction format for 4VOp3:
+ // src1(ModR/M), MemAddr, src3(VEX_4V)
+ // CurOp points to start of the MemoryOperand,
+ // it skips TIED_TO operands if exist, then increments past src1.
+ // CurOp + X86::AddrNumOperands will point to src3.
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ // src1(VEX_4V), MemAddr
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
+ case X86II::MRMSrcReg:
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
+ EmitByte(0xC5, CurByte, OS);
+ EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ return;
+ }
+
+ // 3 byte VEX prefix
+ EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+ const MCInstrDesc &Desc) {
+ unsigned REX = 0;
+ if (TSFlags & X86II::REX_W)
+ REX |= 1 << 3; // set REX.W
+
+ if (MI.getNumOperands() == 0) return REX;
+
+ unsigned NumOps = MI.getNumOperands();
+ // FIXME: MCInst should explicitize the two-addrness.
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
+ // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+ // that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ break;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
+ case X86II::MRMSrcReg:
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 0; // set REX.B
+ }
+ break;
+ case X86II::MRMSrcMem: {
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && MI.getOperand(e).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ default:
+ if (MI.getOperand(0).isReg() &&
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 0; // set REX.B
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 2; // set REX.R
+ }
+ break;
+ }
+ return REX;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+ unsigned &CurByte, int MemOperand,
+ const MCInst &MI,
+ raw_ostream &OS) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: llvm_unreachable("Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: llvm_unreachable("Unknown segment register!");
+ case 0: break;
+ case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+ case X86::SS: EmitByte(0x36, CurByte, OS); break;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+ case X86::ES: EmitByte(0x26, CurByte, OS); break;
+ case X86::FS: EmitByte(0x64, CurByte, OS); break;
+ case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ EmitByte(0x64, CurByte, OS);
+ break;
+ case X86II::GS:
+ EmitByte(0x65, CurByte, OS);
+ break;
+ }
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present. If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+
+ // Emit the lock opcode prefix as needed.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // Emit the repeat opcode prefix as needed.
+ if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+ EmitByte(0xF3, CurByte, OS);
+
+ // Emit the address size opcode prefix as needed.
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (is64BitMode()) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else if (is32BitMode()) {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ } else {
+ need_address_override = false;
+ }
+
+ if (need_address_override)
+ EmitByte(0x67, CurByte, OS);
+
+ // Emit the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ EmitByte(0x66, CurByte, OS);
+
+ bool Need0FPrefix = false;
+ switch (TSFlags & X86II::Op0Mask) {
+ default: llvm_unreachable("Invalid prefix!");
+ case 0: break; // No prefix!
+ case X86II::REP: break; // already handled.
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::T8XS: // F3 0F 38
+ EmitByte(0xF3, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XS: // F3 0F
+ EmitByte(0xF3, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+ case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+ case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+ case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+ case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+ case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+ case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+ case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+ }
+
+ // Handle REX prefix.
+ // FIXME: Can this come before F2 etc to simplify emission?
+ if (is64BitMode()) {
+ if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+ EmitByte(0x40 | REX, CurByte, OS);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ EmitByte(0x0F, CurByte, OS);
+
+ // FIXME: Pull this up into previous switch if REX can be moved earlier.
+ switch (TSFlags & X86II::Op0Mask) {
+ case X86II::T8XS: // F3 0F 38
+ case X86II::T8XD: // F2 0F 38
+ case X86II::T8: // 0F 38
+ EmitByte(0x38, CurByte, OS);
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ case X86II::TA: // 0F 3A
+ EmitByte(0x3A, CurByte, OS);
+ break;
+ case X86II::A6: // 0F A6
+ EmitByte(0xA6, CurByte, OS);
+ break;
+ case X86II::A7: // 0F A7
+ EmitByte(0xA7, CurByte, OS);
+ break;
+ }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
+ // If this is a two-address instruction, skip one of the register operands.
+ // FIXME: This should be handled during MCInst lowering.
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ const unsigned MemOp4_I8IMMOperand = 2;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ else
+ EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
+ unsigned SrcRegNum = 0;
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+ default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+ llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
+ case X86II::Pseudo:
+ llvm_unreachable("Pseudo instruction shouldn't be emitted");
+ case X86II::RawFrm:
+ EmitByte(BaseOpcode, CurByte, OS);
+ break;
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+ OS, Fixups);
+ break;
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+ OS, Fixups);
+ break;
+
+ case X86II::AddRegFrm:
+ EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ break;
+
+ case X86II::MRMDestReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
+ EmitRegModRMByte(MI.getOperand(CurOp),
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMDestMem:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
+ EmitMemModRMByte(MI, CurOp,
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMSrcReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
+ if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+ ++SrcRegNum;
+
+ EmitRegModRMByte(MI.getOperand(SrcRegNum),
+ GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+
+ // 2 operands skipped with HasMemOp4, compensate accordingly
+ CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
+
+ EmitByte(BaseOpcode, CurByte, OS);
+
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += AddrOperands + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(CurOp++),
+ (TSFlags & X86II::FormMask)-X86II::MRM0r,
+ CurByte, OS);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
+ case X86II::MRM_F8: case X86II::MRM_F9:
+ EmitByte(BaseOpcode, CurByte, OS);
+
+ unsigned char MRM;
+ switch (TSFlags & X86II::FormMask) {
+ default: llvm_unreachable("Invalid Form");
+ case X86II::MRM_C1: MRM = 0xC1; break;
+ case X86II::MRM_C2: MRM = 0xC2; break;
+ case X86II::MRM_C3: MRM = 0xC3; break;
+ case X86II::MRM_C4: MRM = 0xC4; break;
+ case X86II::MRM_C8: MRM = 0xC8; break;
+ case X86II::MRM_C9: MRM = 0xC9; break;
+ case X86II::MRM_D0: MRM = 0xD0; break;
+ case X86II::MRM_D1: MRM = 0xD1; break;
+ case X86II::MRM_D4: MRM = 0xD4; break;
+ case X86II::MRM_D5: MRM = 0xD5; break;
+ case X86II::MRM_D6: MRM = 0xD6; break;
+ case X86II::MRM_D8: MRM = 0xD8; break;
+ case X86II::MRM_D9: MRM = 0xD9; break;
+ case X86II::MRM_DA: MRM = 0xDA; break;
+ case X86II::MRM_DB: MRM = 0xDB; break;
+ case X86II::MRM_DC: MRM = 0xDC; break;
+ case X86II::MRM_DD: MRM = 0xDD; break;
+ case X86II::MRM_DE: MRM = 0xDE; break;
+ case X86II::MRM_DF: MRM = 0xDF; break;
+ case X86II::MRM_E8: MRM = 0xE8; break;
+ case X86II::MRM_F0: MRM = 0xF0; break;
+ case X86II::MRM_F8: MRM = 0xF8; break;
+ case X86II::MRM_F9: MRM = 0xF9; break;
+ }
+ EmitByte(MRM, CurByte, OS);
+ break;
+ }
+
+ // If there is a remaining operand, it must be a trailing immediate. Emit it
+ // according to the right size for the instruction. Some instructions
+ // (SSE4a extrq and insertq) have two trailing immediates.
+ while (CurOp != NumOps && NumOps - CurOp <= 2) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte.
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
+ : CurOp);
+ ++CurOp;
+ unsigned RegNum = GetX86RegNum(MO) << 4;
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ RegNum |= 1 << 7;
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if (CurOp != NumOps) {
+ const MCOperand &MIMM = MI.getOperand(CurOp++);
+ if (MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
+ EmitImmediate(MCOperand::CreateImm(RegNum), MI.getLoc(), 1, FK_Data_1,
+ CurByte, OS, Fixups);
+ } else {
+ unsigned FixupKind;
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::ADD64ri32 ||
+ MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
+ CurByte, OS, Fixups);
+ }
+ }
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
+#ifndef NDEBUG
+ // FIXME: Verify.
+ if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+ errs() << "Cannot encode all operands of: ";
+ MI.dump();
+ errs() << '\n';
+ abort();
+ }
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 000000000000..5e84530cd729
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,442 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+ Triple TheTriple(TT);
+ std::string FS;
+ if (TheTriple.getArch() == Triple::x86_64)
+ FS = "+64bit-mode";
+ else
+ FS = "-64bit-mode";
+ return FS;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #else
+ return true;
+ #endif
+#else
+ return true;
+#endif
+}
+
+/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
+/// 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value),
+ "c" (subleaf));
+ return false;
+ #elif defined(_MSC_VER)
+ // __cpuidex was added in MSVC++ 9.0 SP1
+ #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
+ #else
+ return true;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value),
+ "c" (subleaf));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ mov ecx,subleaf
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #else
+ return true;
+ #endif
+#else
+ return true;
+#endif
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return DWARFFlavour::X86_64;
+
+ if (TheTriple.isOSDarwin())
+ return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
+ if (TheTriple.getOS() == Triple::MinGW32 ||
+ TheTriple.getOS() == Triple::Cygwin)
+ // Unsupported by now, just quick fallback
+ return DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
+}
+
+void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
+ // FIXME: TableGen these.
+ for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
+ unsigned SEH = MRI->getEncodingValue(Reg);
+ MRI->mapLLVMRegToSEHReg(Reg, SEH);
+ }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = X86_MC::ParseX86Triple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+ return X;
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitX86MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ unsigned RA = (TheTriple.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitX86MCRegisterInfo(X, RA,
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true),
+ RA);
+ X86_MC::InitLLVM2SEHRegisterMapping(X);
+ return X;
+}
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool is64Bit = TheTriple.getArch() == Triple::x86_64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (is64Bit)
+ MAI = new X86_64MCAsmInfoDarwin(TheTriple);
+ else
+ MAI = new X86MCAsmInfoDarwin(TheTriple);
+ } else if (TheTriple.getEnvironment() == Triple::ELF) {
+ // Force the use of an ELF container.
+ MAI = new X86ELFMCAsmInfo(TheTriple);
+ } else if (TheTriple.getOS() == Triple::Win32) {
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+ MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
+ } else {
+ // The default is ELF.
+ MAI = new X86ELFMCAsmInfo(TheTriple);
+ }
+
+ // Initialize initial frame state.
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = is64Bit ? -8 : -4;
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ // Add return address to move list
+ MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP);
+ MAI->addInitialFrameState(0, CSDst, CSSrc);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ Triple T(TT);
+ bool is64Bit = T.getArch() == Triple::x86_64;
+
+ if (RM == Reloc::Default) {
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (T.isOSDarwin()) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::DynamicNoPIC;
+ } else if (T.isOSWindows() && is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::Static;
+ }
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (RM == Reloc::DynamicNoPIC) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else if (!T.isOSDarwin())
+ RM = Reloc::Static;
+ }
+
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ RM = Reloc::PIC_;
+
+ // For static codegen, if we're not already set, use Small codegen.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ // 64-bit JIT places everything in the same buffer except external funcs.
+ CM = is64Bit ? CodeModel::Large : CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
+ return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(MAI, MII, MRI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(MAI, MII, MRI);
+ return 0;
+}
+
+static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new MCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+ RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
+ RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
+ createX86MCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
+ createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
+ createX86MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
+ createX86MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
+ createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
+ createX86MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 000000000000..981aa1a2b911
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,114 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+namespace X86_MC {
+ std::string ParseX86Triple(StringRef TT);
+
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+ /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+ /// the 4 values in the specified arguments. If we can't run cpuid on the
+ /// host, return true.
+ bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+ void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+ unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+
+ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+
+ /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU);
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
+ bool IsELF64,
+ uint8_t OSABI,
+ uint16_t EMachine);
+/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
new file mode 100644
index 000000000000..64f005c469bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -0,0 +1,586 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+ void RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ if (Writer->is64Bit())
+ RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ else
+ RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ // FIXME: Remove these!!!
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case FK_Data_4: return 2;
+ case FK_Data_8: return 3;
+ }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // See <reloc.h>.
+ uint32_t FixupOffset =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ int64_t Value = 0;
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ Value = Target.getConstant();
+
+ if (IsPCRel) {
+ // Compensate for the relocation offset, Darwin x86_64 relocations only have
+ // the addend and appear to have attempted to define it to be the actual
+ // expression addend without the PCrel bias. However, instructions with data
+ // following the relocation are not accommodated for (see comment below
+ // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+ Value += 1LL << Log2Size;
+ }
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ Type = macho::RIT_X86_64_Unsigned;
+ Index = 0;
+
+ // FIXME: I believe this is broken, I don't think the linker can understand
+ // it. I think it would require a local relocation, but I'm not sure if that
+ // would work either. The official way to get an absolute PCrel relocation
+ // is to use an absolute symbol (which we don't support yet).
+ if (IsPCRel) {
+ IsExtern = 1;
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else if (Target.getSymB()) { // A - B + constant
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+ const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+ // Neither symbol can be modified.
+ if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported relocation of modified symbol");
+
+ // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+ // implement most of these correctly.
+ if (IsPCRel)
+ report_fatal_error("unsupported pc-relative relocation of difference");
+
+ // The support for the situation where one or both of the symbols would
+ // require a local relocation is handled just like if the symbols were
+ // external. This is certainly used in the case of debug sections where the
+ // section has only temporary symbols and thus the symbols don't have base
+ // symbols. This is encoded using the section ordinal and non-extern
+ // relocation entries.
+
+ // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+ // single SIGNED relocation); reject it for now. Except the case where both
+ // symbols don't have a base, equal but both NULL.
+ if (A_Base == B_Base && A_Base)
+ report_fatal_error("unsupported relocation with identical base");
+
+ Value += Writer->getSymbolAddress(&A_SD, Layout) -
+ (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+ Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+ (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+ if (A_Base) {
+ Index = A_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Unsigned;
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+
+ if (B_Base) {
+ Index = B_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Subtractor;
+ } else {
+ const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
+
+ // Relocations inside debug sections always use local relocations when
+ // possible. This seems to be done because the debugger doesn't fully
+ // understand x86_64 relocation entries, and expects to find values that
+ // have already been fixed up.
+ if (Symbol->isInSection()) {
+ const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+ Fragment->getParent()->getSection());
+ if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+ Base = 0;
+ }
+
+ // x86_64 almost always uses external relocations, except when there is no
+ // symbol to use as a base address (a local symbol with no preceding
+ // non-local symbol).
+ if (Base) {
+ Index = Base->getIndex();
+ IsExtern = 1;
+
+ // Add the local offset, if needed.
+ if (Base != &SD)
+ Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+ } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+ // The index is the section ordinal (1-based).
+ Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ Value += Writer->getSymbolAddress(&SD, Layout);
+
+ if (IsPCRel)
+ Value -= FixupAddress + (1 << Log2Size);
+ } else if (Symbol->isVariable()) {
+ const MCExpr *Value = Symbol->getVariableValue();
+ int64_t Res;
+ bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+ Writer->getSectionAddressMap());
+ if (isAbs) {
+ FixedValue = Res;
+ return;
+ } else {
+ report_fatal_error("unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ }
+ } else {
+ report_fatal_error("unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
+ }
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+ if (IsPCRel) {
+ if (IsRIPRel) {
+ if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+ // rewrite the movq to an leaq at link time if the symbol ends up in
+ // the same linkage unit.
+ if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+ Type = macho::RIT_X86_64_GOTLoad;
+ else
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ Type = macho::RIT_X86_64_TLV;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
+ report_fatal_error("unsupported symbol modifier in relocation");
+ } else {
+ Type = macho::RIT_X86_64_Signed;
+
+ // The Darwin x86_64 relocation format has a problem where it cannot
+ // encode an address (L<foo> + <constant>) which is outside the atom
+ // containing L<foo>. Generally, this shouldn't occur but it does
+ // happen when we have a RIPrel instruction with data following the
+ // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+ // adjustment Darwin x86_64 uses, the offset is still negative and the
+ // linker has no way to recognize this.
+ //
+ // To work around this, Darwin uses several special relocation types
+ // to indicate the offsets. However, the specification or
+ // implementation of these seems to also be incomplete; they should
+ // adjust the addend as well based on the actual encoded instruction
+ // (the additional bias), but instead appear to just look at the final
+ // offset.
+ switch (-(Target.getConstant() + (1LL << Log2Size))) {
+ case 1: Type = macho::RIT_X86_64_Signed1; break;
+ case 2: Type = macho::RIT_X86_64_Signed2; break;
+ case 4: Type = macho::RIT_X86_64_Signed4; break;
+ }
+ }
+ } else {
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in branch "
+ "relocation");
+
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else {
+ if (Modifier == MCSymbolRefExpr::VK_GOT) {
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+ // case all we do is set the PCrel bit in the relocation entry; this is
+ // used with exception handling, for example. The source is required to
+ // include any necessary offset directly.
+ Type = macho::RIT_X86_64_GOT;
+ IsPCRel = 1;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ report_fatal_error("TLVP symbol modifier should have been rip-rel");
+ } else if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in relocation");
+ else
+ Type = macho::RIT_X86_64_Unsigned;
+ }
+ }
+
+ // x86_64 always writes custom values into the fixups.
+ FixedValue = Value;
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ //
+ // Note that there is no longer any semantic difference between these two
+ // relocation types from the linkers point of view, this is done solely for
+ // pedantic compatibility with 'as'.
+ Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+ (unsigned)macho::RIT_Generic_LocalDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") + Buffer +
+ ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+ !is64Bit() &&
+ "Should only be called with a 32-bit TLVP relocation!");
+
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+ uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = 0;
+
+ // Get the symbol data.
+ MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ unsigned Index = SD_A->getIndex();
+
+ // We're only going to have a second symbol in pic mode and it'll be a
+ // subtraction from the picbase. For 32-bit pic the addend is the difference
+ // between the picbase and the next address. For 32-bit static the addend is
+ // zero.
+ if (Target.getSymB()) {
+ // If this is a subtraction then we're pcrel.
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+ IsPCRel = 1;
+ FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+ Target.getConstant());
+ FixedValue += 1ULL << Log2Size;
+ } else {
+ FixedValue = 0;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = Value;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // Extern
+ (macho::RIT_Generic_TLV << 28)); // Type
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // If this is a 32-bit TLVP reloc it's handled a bit differently.
+ if (Target.getSymA() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+ RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ return;
+ }
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // If this is an internal relocation with an offset, it also needs a scattered
+ // relocation entry.
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel)
+ Offset += 1 << Log2Size;
+ // Try to record the scattered relocation if needed. Fall back to non
+ // scattered if necessary (see comments in RecordScatteredRelocation()
+ // for details).
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue))
+ return;
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ Type = macho::RIT_Vanilla;
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ Type = macho::RIT_Vanilla;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..bc272efcc9ce
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace llvm {
+ class MCObjectWriter;
+}
+
+namespace {
+ class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+ const bool Is64Bit;
+
+ public:
+ X86WinCOFFObjectWriter(bool Is64Bit_);
+ ~X86WinCOFFObjectWriter();
+
+ virtual unsigned getRelocType(unsigned FixupKind) const;
+ };
+}
+
+X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
+ : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 :
+ COFF::IMAGE_FILE_MACHINE_I386),
+ Is64Bit(Is64Bit_) {}
+
+X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
+
+unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+ switch (FixupKind) {
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
+ case FK_Data_4:
+ case X86::reloc_signed_4byte:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
+ case FK_Data_8:
+ if (Is64Bit)
+ return COFF::IMAGE_REL_AMD64_ADDR64;
+ llvm_unreachable("unsupported relocation type");
+ case FK_SecRel_4:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+}
+
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+ bool Is64Bit) {
+ MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 000000000000..815d23588f11
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() {
+ RegisterTarget<Triple::x86, /*HasJIT=*/true>
+ X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+ RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+ Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 000000000000..bbd490411f2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,217 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePALIGNRMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Base = i + Offset;
+ // if i+offset is out of this lane then we actually need the other source
+ if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+ ShuffleMask.push_back(Base + l);
+ }
+ }
+}
+
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ unsigned NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + l);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
+ }
+}
+
+void DecodePSHUFHWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + 4 + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ }
+}
+
+void DecodePSHUFLWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ }
+}
+
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ unsigned NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ // each half of a lane comes from different source
+ for (unsigned s = 0; s != NumElts*2; s += NumElts) {
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
+ NewImm /= NumLaneElts;
+ }
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
+ }
+}
+
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ }
+ }
+}
+
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ }
+ }
+}
+
+void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ if (Imm & 0x88)
+ return; // Not a shuffle
+
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
+ for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ }
+}
+
+/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
+/// No VT provided since it only works on 256-bit, 4 element vectors.
+void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm >> (2*i)) & 3);
+ }
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 000000000000..017ab325ec51
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,71 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = -1
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+
+void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
+/// No VT provided since it only works on 256-bit, 4 element vectors.
+void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
new file mode 100644
index 000000000000..1f9919f15955
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -0,0 +1,75 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class JITCodeEmitter;
+class X86TargetMachine;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
+/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
+/// to local-dynamic TLS variables so that the TLS base address for the module
+/// is only fetched once per execution path through the function.
+FunctionPass *createCleanupLocalDynamicTLSPass();
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
+/// before each call to avoid transition penalty between functions encoded with
+/// AVX and SSE.
+FunctionPass *createX86IssueVZeroUpperPass();
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// \brief Creates an X86-specific Target Transformation Info pass.
+ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
+
+/// createX86PadShortFunctions - Return a pass that pads short functions
+/// with NOOPs. This will prevent a stall when returning on the Atom.
+FunctionPass *createX86PadShortFunctions();
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
new file mode 100644
index 000000000000..1dcc344e7f0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -0,0 +1,343 @@
+//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+ "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features
+//===----------------------------------------------------------------------===//
+
+def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
+ "Enable conditional move instructions">;
+
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ // SSE codegen depends on cmovs, and all
+ // SSE1+ processors support them.
+ [FeatureMMX, FeatureCMOV]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions",
+ [FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+ "64-bit with cmpxchg16b",
+ [Feature64Bit]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+ "IsUAMemFast", "true",
+ "Fast unaligned memory access">;
+def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+ "Support SSE 4a instructions",
+ [FeatureSSE3]>;
+
+def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
+ "Enable AVX instructions",
+ [FeatureSSE42]>;
+def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
+ "Enable AVX2 instructions",
+ [FeatureAVX]>;
+def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
+ "Enable packed carry-less multiplication instructions",
+ [FeatureSSE2]>;
+def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
+ "Enable three-operand fused multiple-add",
+ [FeatureAVX]>;
+def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
+ "Enable four-operand fused multiple-add",
+ [FeatureAVX, FeatureSSE4A]>;
+def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
+ "Enable XOP instructions",
+ [FeatureFMA4]>;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+ "HasVectorUAMem", "true",
+ "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
+ "Enable AES instructions",
+ [FeatureSSE2]>;
+def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
+ "Support MOVBE instruction">;
+def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+ "Support RDRAND instruction">;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions">;
+def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
+ "Support FS/GS Base instructions">;
+def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+ "Support LZCNT instruction">;
+def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
+ "Support BMI instructions">;
+def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
+ "Support BMI2 instructions">;
+def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
+ "Support RTM instructions">;
+def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
+ "Support HLE">;
+def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
+ "Support ADX instructions">;
+def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+ "Support PRFCHW instructions">;
+def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+ "Support RDSEED instruction">;
+def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+ "Use LEA for adjusting the stack pointer">;
+def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
+ "HasSlowDivide", "true",
+ "Use small divide for positive values less than 256">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+ "PadShortFunctions", "true",
+ "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+ "CallRegIndirect", "true",
+ "Call register indirect">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+include "X86Schedule.td"
+
+def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
+ "Intel Atom processors">;
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : ProcessorModel<Name, GenericModel, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"i586", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", [FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+ [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+ [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+ [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+ [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+ FeatureSlowDivide,
+ FeatureCallRegIndirect,
+ FeaturePadShortFunctions]>;
+
+// "Arrandale" along with corei3 and corei5
+def : ProcessorModel<"corei7", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : ProcessorModel<"westmere", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+ FeaturePCLMUL]>;
+// Sandy Bridge
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+// Ivy Bridge
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase]>;
+
+// Haswell
+def : ProcessorModel<"core-avx2", HaswellModel,
+ [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+ FeatureHLE]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"amdfam10", [FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowBTMem]>;
+// Bobcat
+def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureLZCNT, FeaturePOPCNT]>;
+// Bulldozer
+def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePCLMUL,
+ FeatureLZCNT, FeaturePOPCNT]>;
+// Enhanced Bulldozer
+def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePCLMUL,
+ FeatureF16C, FeatureLZCNT,
+ FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
+def : Proc<"geode", [Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+def ATTAsmParser : AsmParser {
+ string AsmParserClassName = "AsmParser";
+}
+
+def ATTAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = "#";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "%";
+}
+
+def IntelAsmParserVariant : AsmParserVariant {
+ int Variant = 1;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = ";";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTInstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelInstPrinter";
+ int Variant = 1;
+ bit isMCAsmWriter = 1;
+}
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+ let AssemblyParsers = [ATTAsmParser];
+ let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 000000000000..6b228b0b0329
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,755 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to X86 machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+/// runOnMachineFunction - Emit the function body.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Have common code print out the function header with linkage info etc.
+ EmitFunctionHeader();
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // We didn't modify anything.
+ return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand. This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown symbol type!");
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+
+ MCSymbol *GVSym;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ else
+ GVSym = Mang->getSymbol(GV);
+
+ // Handle dllimport linkage.
+ if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+ GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (GVSym->getName()[0] != '$')
+ O << *GVSym;
+ else
+ O << '(' << *GVSym << ')';
+ printOffset(MO.getOffset(), O);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const MCSymbol *SymToPrint;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ SmallString<128> TempNameStr;
+ TempNameStr += StringRef(MO.getSymbolName());
+ TempNameStr += StringRef("$stub");
+
+ MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+ true);
+ }
+ SymToPrint = StubSym.getPointer();
+ } else {
+ SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (SymToPrint->getName()[0] != '$')
+ O << *SymToPrint;
+ else
+ O << '(' << *SymToPrint << '(';
+ break;
+ }
+ }
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ break;
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ // These affect the name of the symbol, not any suffix.
+ break;
+ case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+ O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+ break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ O << '-' << *MF->getPICBaseSymbol();
+ break;
+ case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_TLSLD: O << "@TLSLD"; break;
+ case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
+ case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
+ case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
+ case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
+ case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOT: O << "@GOT"; break;
+ case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
+ case X86II::MO_PLT: O << "@PLT"; break;
+ case X86II::MO_TLVP: O << "@TLVP"; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
+ break;
+ case X86II::MO_SECREL: O << "@SECREL32"; break;
+ }
+}
+
+/// printPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::printPCRelImm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ printSymbolOperand(MO, O);
+ return;
+ }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier,
+ unsigned AsmVariant) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type!");
+ case MachineOperand::MO_Register: {
+ // FIXME: Enumerating AsmVariant, so we can remove magic number.
+ if (AsmVariant == 0) O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ if (AsmVariant == 0) O << '$';
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol: {
+ if (AsmVariant == 0) O << '$';
+ printSymbolOperand(MO, O);
+ break;
+ }
+ }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ // If we really don't want to print out (rip), don't.
+ bool HasBaseReg = BaseReg.getReg() != 0;
+ if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+ BaseReg.getReg() == X86::RIP)
+ HasBaseReg = false;
+
+ // HasParenPart - True if we will print out the () part of the mem ref.
+ bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+ if (DispSpec.isImm()) {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || !HasParenPart)
+ O << DispVal;
+ } else {
+ assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI() || DispSpec.isSymbol());
+ printSymbolOperand(MI->getOperand(Op+3), O);
+ }
+
+ if (Modifier && strcmp(Modifier, "H") == 0)
+ O << "+8";
+
+ if (HasParenPart) {
+ assert(IndexReg.getReg() != X86::ESP &&
+ "X86 doesn't allow scaling by ESP");
+
+ O << '(';
+ if (HasBaseReg)
+ printOperand(MI, Op, O, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O, Modifier);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ const MachineOperand &Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, O, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, O, Modifier);
+}
+
+void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier,
+ unsigned AsmVariant){
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+ const MachineOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O, Modifier, AsmVariant);
+ O << ':';
+ }
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, O, Modifier, AsmVariant);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2, O, Modifier, AsmVariant);
+ NeedPlus = true;
+ }
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ printOperand(MI, Op+3, O, Modifier, AsmVariant);
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+ O << ']';
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+ raw_ostream &O) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'a': // This is an address. Currently only 'i' and 'r' are expected.
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+ printSymbolOperand(MO, O);
+ if (Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+ return false;
+ }
+ if (MO.isReg()) {
+ O << '(';
+ printOperand(MI, OpNo, O);
+ O << ')';
+ return false;
+ }
+ return true;
+
+ case 'c': // Don't print "$" before a global var name or constant.
+ if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+ printSymbolOperand(MO, O);
+ else
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'A': // Print '*' before a register (it must be a register)
+ if (MO.isReg()) {
+ O << '*';
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ return true;
+
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MO.isReg())
+ return printAsmMRegister(MO, ExtraCode[0], O);
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'P': // This is the operand of a call, treat specially.
+ printPCRelImm(MI, OpNo, O);
+ return false;
+
+ case 'n': // Negate the immediate or print a '-' before the operand.
+ // Note: this is a temporary solution. It should be handled target
+ // independently as part of the 'MC' work.
+ if (MO.isImm()) {
+ O << -MO.getImm();
+ return false;
+ }
+ O << '-';
+ }
+ }
+
+ printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant);
+ return false;
+}
+
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (AsmVariant) {
+ printIntelMemReference(MI, OpNo, O);
+ return false;
+ }
+
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'H':
+ printMemReference(MI, OpNo, O, "H");
+ return false;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, O, "no-rip");
+ return false;
+ }
+ }
+ printMemReference(MI, OpNo, O);
+ return false;
+}
+
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho())
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho()) {
+ // All darwin targets use mach-o.
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output stubs for dynamically-linked functions.
+ MachineModuleInfoMachO::SymbolListTy Stubs;
+
+ Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__jump_table",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 5, SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+ MCSA_IndirectSymbol);
+ // hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
+ const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5));
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Output stubs for external and common global variables.
+ Stubs = MMIMacho.GetGVStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__pointers",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+ MCSA_IndirectSymbol);
+ // .long 0
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs. However, sometimes the types are local to the file. So
+ // we need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext), 4/*size*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext), 4/*size*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+ MMI->usesVAFloatArgument()) {
+ StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ }
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+ // Emit type information for external functions
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name);
+ }
+ }
+ }
+
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize());
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() {
+ RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+ RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
new file mode 100644
index 000000000000..bc7496bad144
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,79 @@
+//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCStreamer;
+
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const LLVM_OVERRIDE {
+ return "X86 Assembly / Object Emitter";
+ }
+
+ const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+ virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
+
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+
+ void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0, unsigned AsmVariant = 0);
+ void printPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
+
+ void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+
+ void printIntelMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier=NULL,
+ unsigned AsmVariant = 1);
+
+ virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ virtual MachineLocation
+ getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 000000000000..6a6125bb6b2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,19 @@
+//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+using namespace llvm;
+
+
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+}
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 000000000000..0dfeb42f1a4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,46 @@
+//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "X86MachineFunctionInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+namespace llvm {
+ class X86MachineFunctionInfo;
+ class DataLayout;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+ DenseSet<MCSymbol const *> Externals;
+public:
+ X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+ virtual ~X86COFFMachineModuleInfo();
+
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
+ }
+
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 000000000000..9eafbd55a5ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,537 @@
+//===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX. For i8, the ABI
+ // requires the values to be in AL and AH, however this code uses AL and DL
+ // instead. This is because using AH for the second register conflicts with
+ // the way LLVM does multiple return values -- a return of {i16,i8} would end
+ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+ // for functions that return two i8 values are currently expected to pack the
+ // values into an i16 (which uses AX, and thus AL:AH).
+ //
+ // For code that doesn't care about the ABI, we allow returning more than two
+ // integer values in registers.
+ CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX, RCX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
+ // can only be used by ABI non-compliant code. If the target doesn't have XMM
+ // registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
+
+ // Long double types are always returned in ST0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, unless marked
+ // with "inreg" (used here to distinguish one kind of reg from another,
+ // weirdly; this is really the sse-regparm calling convention) in which
+ // case they use XMM0, otherwise it is the same as the common X86 calling
+ // conv.
+ CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+ CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+ // SSE2.
+ // This can happen when a float, 2 x float, or 3 x float vector is split by
+ // target lowering, and is returned in 1-3 sse regs.
+ CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// Intel_OCL_BI return-value convention.
+def RetCC_Intel_OCL_BI : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ // No more than 4 registers
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // i32, i64 in the standard way
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_HiPE : CallingConv<[
+ // Promote all types to i32
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+ // MMX vector types are always returned in XMM0.
+ CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+ // The X86-Win64 calling convention always returns __m64 values in RAX.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // Otherwise, everything is the same as 'normal' X86-64 C CC.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// X86-64 HiPE return-value convention.
+def RetCC_X86_64_HiPE : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
+]>;
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // If HiPE, use RetCC_X86_32_HiPE.
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // HiPE uses RetCC_X86_64_HiPE
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+
+ // Check if this is the Intel OpenCL built-ins calling convention
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
+ CCIfType<[x86mmx],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCIfSubtarget<"hasSSE2()",
+ CCPromoteToType<v2i64>>>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 256-bit vector arguments are passed in YMM registers, unless
+ // this is a vararg function.
+ // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+ // fixed arguments to vararg functions are supposed to be passed in
+ // registers. Actually modeling that would be a lot of work, though.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+ YMM4, YMM5, YMM6, YMM7]>>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+ // FIXME: Handle byval stuff.
+ // FIXME: Handle varargs.
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // 128 bit vectors are passed by pointer
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+
+ // 256 bit vectors are passed by pointer
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>
+]>;
+
+def CC_X86_64_GHC : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64],
+ CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+ // Pass in STG registers: F1, F2, F3, F4, D1, D2
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
+def CC_X86_64_HiPE : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The first 3 float or double arguments, if marked 'inreg' and if the call
+ // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+ // The first 3 __m64 vector arguments are passed in mmx registers if the
+ // call is not a vararg call.
+ CCIfNotVarArg<CCIfType<[x86mmx],
+ CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // Long doubles get slots whose size depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+ // The first 4 SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
+ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in ECX.
+ CCIfNest<CCAssignToReg<[ECX]>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass sret arguments indirectly through stack.
+ CCIfSRet<CCAssignToStack<4, 4>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+ // Handles byval parameters. Note that we can't rely on the delegation
+ // to CC_X86_32_Common for this because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // The first 3 float or double arguments, if the call is not a vararg
+ // call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+ // Doubles get 8-byte slots that are 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_GHC : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1
+ CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
+
+def CC_X86_32_HiPE : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+ CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
+
+ CCIfType<[i32], CCIfSubtarget<"is64Bit()", CCAssignToReg<[EDI, ESI, EDX, ECX]>>>,
+ CCIfType<[i64], CCIfSubtarget<"is64Bit()", CCAssignToReg<[RDI, RSI, RDX, RCX]>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64_C>>,
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+
+ // Otherwise, drop to normal X86-32 CC
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
+
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+ CCDelegateTo<CC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved Registers.
+//===----------------------------------------------------------------------===//
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
+def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
+
+def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>;
+def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
+
+def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
+ (sequence "XMM%u", 6, 15))>;
+
+def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
+ R11, R12, R13, R14, R15, RBP,
+ (sequence "XMM%u", 0, 15))>;
+
+// Standard C + YMM6-15
+def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
+ R13, R14, R15,
+ (sequence "YMM%u", 6, 15))>;
+
+//Standard C + XMM 8-15
+def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
+ (sequence "XMM%u", 8, 15))>;
+
+//Standard C + YMM 8-15
+def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
+ (sequence "YMM%u", 8, 15))>;
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 000000000000..2518e02e2a40
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,1499 @@
+//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+ template<class CodeEmitter>
+ class Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const DataLayout *TD;
+ X86TargetMachine &TM;
+ CodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ intptr_t PICBaseOffset;
+ bool Is64BitMode;
+ bool IsPIC;
+ public:
+ static char ID;
+ explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+ const X86InstrInfo &ii, const DataLayout &td, bool is64)
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitOpcodePrefix(uint64_t TSFlags, int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const;
+
+ void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const;
+
+ void emitSegmentOverridePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI) const;
+
+ void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp = 0, intptr_t PCAdj = 0,
+ bool Indirect = false);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+ intptr_t PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ intptr_t Adj = 0, bool IsPCRel = true);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitRegModRMByte(unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ return TRI->getEncodingValue(RegNo) & 0x7;
+ }
+
+ unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) const;
+ };
+
+template<class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+} // end anonymous namespace.
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified JITCodeEmitter object.
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ II = TM.getInstrInfo();
+ TD = TM.getDataLayout();
+ Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ do {
+ DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MCInstrDesc &Desc = I->getDesc();
+ emitInstruction(*I, &Desc);
+ // MOVPC32r is basically a call plus a pop instruction.
+ if (Desc.getOpcode() == X86::MOVPC32r)
+ emitInstruction(*I, &II->get(X86::POP32r));
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ }
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const MCInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+ unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */,
+ bool Indirect /* = false */) {
+ intptr_t RelocCST = Disp;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ RelocCST, false)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), RelocCST, false);
+ MCE.addRelocation(MR);
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+ // X86 never needs stubs because instruction selection will always pick
+ // an instruction sequence that is large enough to hold any address
+ // to a symbol.
+ // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+ bool NeedStub = false;
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, RelocCST,
+ 0, NeedStub));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+ unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+ unsigned Index,
+ unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+ const TargetMachine &TM) {
+ // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
+ // mechanism as 32-bit mode.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+ return false;
+
+ // Return true if this is a reference to a stub containing the address of the
+ // global, not the global itself.
+ return isGlobalStubReference(GVOp.getTargetFlags());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal,
+ intptr_t Adj /* = 0 */,
+ bool IsPCRel /* = true */) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ unsigned RelocType = Is64BitMode ?
+ (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (RelocOp->isGlobal()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial. If the MCE supports using RIP directly
+ // do it, otherwise fallback to absolute (this is determined by IsPCRel).
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+ emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+ Adj, Indirect);
+ } else if (RelocOp->isSymbol()) {
+ emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
+ } else if (RelocOp->isCPI()) {
+ emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+ RelocOp->getOffset(), Adj);
+ } else {
+ assert(RelocOp->isJTI() && "Unexpected machine operand!");
+ emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op,unsigned RegOpcodeField,
+ intptr_t PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isSymbol()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJTI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP ||
+ (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+ assert(IndexReg.getReg() == 0 && Is64BitMode &&
+ "Invalid rip-relative address");
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // Indicate that the displacement will use an pcrel or absolute reference
+ // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+ // while others, unless explicit asked to use RIP, use absolute references.
+ bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
+ // Is a SIB byte needed?
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+ unsigned BaseRegNo = -1U;
+ if (BaseReg != 0 && BaseReg != X86::RIP)
+ BaseRegNo = getX86RegNum(BaseReg);
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!Is64BitMode || BaseReg != 0)) {
+ if (BaseReg == 0 || // [disp32] in X86-32 mode
+ BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (!DispForReloc && isDisp8(DispVal)) {
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ return;
+ }
+
+ // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ emitSIBByte(SS, IndexRegNo, 5);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ }
+}
+
+static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II,
+ unsigned Opcode) {
+ const MCInstrDesc *Desc = &II->get(Opcode);
+ MI.setDesc(*Desc);
+ return Desc;
+}
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const {
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK)
+ MCE.emitByte(0xF0);
+
+ // Emit segment override opcode prefix as needed.
+ emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+ MCE.emitByte(0xF3);
+
+ // Emit the address size opcode prefix as needed.
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (Is64BitMode) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ }
+
+ if (need_address_override)
+ MCE.emitByte(0x67);
+
+ // Emit the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ MCE.emitByte(0x66);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::T8XS: // F3 0F 38
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ case X86II::TAXD: // F2 0F 3A
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: llvm_unreachable("Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ // Handle REX prefix.
+ if (Is64BitMode) {
+ if (unsigned REX = determineREX(MI))
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::T8XD: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TAXD: // F2 0F 38
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
+ }
+}
+
+// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+// 0-7 and the difference between the 2 groups is given by the REX prefix.
+// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+// in 1's complement form, example:
+//
+// ModRM field => XMM9 => 1
+// VEX.VVVV => XMM9 => ~9
+//
+// See table 4-35 of Intel AVX Programming Reference for details.
+template<class CodeEmitter>
+unsigned char
+Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) const {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg());
+ if (X86II::isX86_64ExtendedReg(SrcReg))
+ SrcRegNum |= 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: llvm_unreachable("Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: llvm_unreachable("Unknown segment register!");
+ case 0: break;
+ case X86::CS: MCE.emitByte(0x2E); break;
+ case X86::SS: MCE.emitByte(0x36); break;
+ case X86::DS: MCE.emitByte(0x3E); break;
+ case X86::ES: MCE.emitByte(0x26); break;
+ case X86::FS: MCE.emitByte(0x64); break;
+ case X86::GS: MCE.emitByte(0x65); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: llvm_unreachable("Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::T8XS: // F3 0F 38
+ VEX_PP = 0x2;
+ VEX_5M = 0x2;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ VEX_PP = 0x3;
+ VEX_5M = 0x3;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ // Duplicate register.
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MachineOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem:
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ // src1(VEX_4V), MemAddr
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
+ case X86II::MRMSrcReg:
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
+ MCE.emitByte(0xC5);
+ MCE.emitByte(LastByte | (VEX_R << 7));
+ return;
+ }
+
+ // 3 byte VEX prefix
+ MCE.emitByte(XOP ? 0x8F : 0xC4);
+ MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
+ MCE.emitByte(LastByte | (VEX_W << 7));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+ const MCInstrDesc *Desc) {
+ DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
+ case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
+ case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
+ case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
+ case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
+ case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
+ case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
+ case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
+ case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
+ case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
+ case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
+ case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
+ case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
+ case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
+ case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
+ case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
+ case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
+ }
+
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ unsigned Opcode = Desc->Opcode;
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ uint64_t TSFlags = Desc->TSFlags;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ const unsigned MemOp4_I8IMMOperand = 2;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
+ else
+ emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ llvm_unreachable("pseudo instructions should be removed before code"
+ " emission");
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
+ case TargetOpcode::INLINEASM:
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0])
+ report_fatal_error("JIT does not support inline asm!");
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::GC_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ MCE.emitByte(BaseOpcode);
+ emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
+ // Remember PIC base.
+ PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+ X86JITInfo *JTI = TM.getJITInfo();
+ JTI->setPICBase(MCE.getCurrentPCValue());
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm: {
+ MCE.emitByte(BaseOpcode);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+ DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+ DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+ DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+ DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ break;
+ }
+
+ if (MO.isGlobal()) {
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0);
+ break;
+ }
+
+ if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+ if (MO.isJTI()) {
+ emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ assert(MO.isImm() && "Unknown RawFrm operand!");
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
+ } else
+ emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::AddRegFrm: {
+ MCE.emitByte(BaseOpcode +
+ getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri64i32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+ emitMemModRMByte(MI, CurOp,
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
+ break;
+ }
+
+ case X86II::MRMSrcReg: {
+ MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
+ if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+ ++SrcRegNum;
+
+ emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ // 2 operands skipped with HasMemOp4, compensate accordingly
+ CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
+
+ MCE.emitByte(BaseOpcode);
+
+ intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 0;
+ emitMemModRMByte(MI, FirstMemOp,
+ getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
+ CurOp += AddrOperands + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
+ intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += X86::AddrNumOperands;
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO.isImm()) {
+ emitConstant(MO.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+
+ case X86II::MRM_C1:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC1);
+ break;
+ case X86II::MRM_C8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC8);
+ break;
+ case X86II::MRM_C9:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC9);
+ break;
+ case X86II::MRM_E8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xE8);
+ break;
+ case X86II::MRM_F0:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xF0);
+ break;
+ }
+
+ while (CurOp != NumOps && NumOps - CurOp <= 2) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte.
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
+ : CurOp);
+ ++CurOp;
+ unsigned RegNum = getX86RegNum(MO.getReg()) << 4;
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ RegNum |= 1 << 7;
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if (CurOp != NumOps) {
+ const MachineOperand &MIMM = MI.getOperand(CurOp++);
+ if (MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
+ emitConstant(RegNum, 1);
+ } else {
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ }
+ }
+
+ if (!MI.isVariadic() && CurOp != NumOps) {
+#ifndef NDEBUG
+ dbgs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 000000000000..cadec682a435
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,2311 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86ISelLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// RegInfo - X86 register info.
+ ///
+ const X86RegisterInfo *RegInfo;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ }
+
+ virtual bool TargetSelectInstruction(const Instruction *I);
+
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
+ virtual bool FastLowerArguments();
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+ bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+ bool X86SelectLoad(const Instruction *I);
+
+ bool X86SelectStore(const Instruction *I);
+
+ bool X86SelectRet(const Instruction *I);
+
+ bool X86SelectCmp(const Instruction *I);
+
+ bool X86SelectZExt(const Instruction *I);
+
+ bool X86SelectBranch(const Instruction *I);
+
+ bool X86SelectShift(const Instruction *I);
+
+ bool X86SelectSelect(const Instruction *I);
+
+ bool X86SelectTrunc(const Instruction *I);
+
+ bool X86SelectFPExt(const Instruction *I);
+ bool X86SelectFPTrunc(const Instruction *I);
+
+ bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+ bool X86SelectCall(const Instruction *I);
+
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ unsigned TargetMaterializeConstant(const Constant *C);
+
+ unsigned TargetMaterializeAlloca(const AllocaInst *C);
+
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ VT = evt.getSimpleVT();
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+ unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = &X86::GR8RegClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = &X86::GR16RegClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+ break;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), ResultReg), AM);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i1: {
+ // Mask out all but lowest bit.
+ unsigned AndResult = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+ Val = AndResult;
+ }
+ // FALLTHROUGH, handling i1 as i8.
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ break;
+ case MVT::v4f32:
+ Opc = X86::MOVAPSmr;
+ break;
+ case MVT::v2f64:
+ Opc = X86::MOVAPDmr;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Opc = X86::MOVDQAmr;
+ break;
+ }
+
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM).addReg(Val);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+ const X86AddressMode &AM) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ bool Signed = true;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if (isInt<32>(CI->getSExtValue()))
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM)
+ .addImm(Signed ? (uint64_t) CI->getSExtValue() :
+ CI->getZExtValue());
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+ unsigned Src, EVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+ if (RR == 0)
+ return false;
+
+ ResultReg = RR;
+ return true;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::Alloca: {
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt<32>(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ X86AddressMode SavedAM = AM;
+
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+ // Check for displacement overflow.
+ if (!isInt<32>(Disp))
+ break;
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ if (X86SelectAddress(U->getOperand(0), AM))
+ return true;
+
+ // If we couldn't merge the gep value into this addr mode, revert back to
+ // our address and just match the value instead of completely failing.
+ AM = SavedAM;
+ break;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectCallAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (Subtarget->isPICStyleRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle DLLImport.
+ if (GV->hasDLLImportLinkage())
+ return false;
+
+ // Can't handle TLS.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Okay, we've committed to selecting this global. Set up the basic address.
+ AM.GV = GV;
+
+ // No ABI requires an extra load for anything other than DLLImport, which
+ // we rejected above. Return a direct reference to the global.
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ AM.GVOpFlags = X86II::MO_GOTOFF;
+ }
+
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ const StoreInst *S = cast<StoreInst>(I);
+
+ if (S->isAtomic())
+ return false;
+
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
+ return false;
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(1), AM))
+ return false;
+
+ return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+ const X86MachineFunctionInfo *X86MFInfo =
+ FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (X86MFInfo->getBytesToPopOnReturn() != 0)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax. We also do the same with %eax for Win32.
+ if (F.hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
+ unsigned Reg = X86MFInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments()!");
+ unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ RetReg).addReg(Reg);
+ RetRegs.push_back(RetReg);
+ }
+
+ // Now emit the RET.
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+ return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(0), AM))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (X86FastEmitLoad(VT, AM, ResultReg)) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ return false;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = Subtarget->hasSSE2();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+ EVT VT) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+ unsigned SetCCOpc;
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OEQ: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned EReg = createResultReg(&X86::GR8RegClass);
+ unsigned NPReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNPr), NPReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_UNE: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned NEReg = createResultReg(&X86::GR8RegClass);
+ unsigned PReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
+ .addReg(PReg).addReg(NEReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of Op0/Op1.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+ // Handle zero-extension from i1 to i8, which is common.
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+ }
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+ switch (Predicate) {
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::FCMP_UNE;
+ // FALL THROUGH
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+
+ if (Predicate == CmpInst::FCMP_UNE) {
+ // X86 requires a second branch to handle UNE (and OEQ,
+ // which is mapped to UNE above).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+ .addMBB(TrueMBB);
+ }
+
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+ unsigned CReg = 0, OpReg = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (I->getType()->isIntegerTy(8)) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(16)) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(32)) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(64)) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (VT == MVT::i16) {
+ Opc = X86::CMOVE16rr;
+ RC = &X86::GR16RegClass;
+ } else if (VT == MVT::i32) {
+ Opc = X86::CMOVE32rr;
+ RC = &X86::GR32RegClass;
+ } else if (VT == MVT::i64) {
+ Opc = X86::CMOVE64rr;
+ RC = &X86::GR64RegClass;
+ } else {
+ return false;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(Op0Reg).addReg(Op0Reg);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+ // fpext from float to double.
+ if (X86ScalarSSEf64 &&
+ I->getType()->isDoubleTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isFloatTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(&X86::FR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+ if (X86ScalarSSEf64) {
+ if (I->getType()->isFloatTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isDoubleTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(&X86::FR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ return false;
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
+ (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
+ (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
+ unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ InputReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ VT = MVT::i8;
+ }
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ if (MSI.isVolatile())
+ return false;
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memset");
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code to store the stack guard onto the stack.
+ EVT PtrTy = TLI.getPointerTy();
+
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ // Grab the frame index.
+ X86AddressMode AM;
+ if (!X86SelectAddress(Slot, AM)) return false;
+ if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+ X86AddressMode AM;
+ assert(DI->getAddress() && "Null address should be checked earlier!");
+ if (!X86SelectAddress(DI->getAddress(), AM))
+ return false;
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+ addImm(0).addMetadata(DI->getVariable());
+ return true;
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction.
+ const Function *Callee = I.getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ // The call to CreateRegs builds two sequential registers, to store the
+ // both the returned values.
+ unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+ .addReg(Reg1).addReg(Reg2);
+
+ unsigned Opc = X86::SETBr;
+ if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+ Opc = X86::SETOr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+ UpdateValueMap(&I, ResultReg, 2);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ if (!Subtarget->is64Bit())
+ return false;
+
+ // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 6)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ case MVT::i64:
+ break;
+ default:
+ return false;
+ }
+ }
+
+ static const uint16_t GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const uint16_t GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
+ };
+
+ Idx = 0;
+ const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
+ const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
+ unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+}
+
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm yet.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Handle intrinsic calls.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+ return X86VisitIntrinsicCall(*II);
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (cast<CallInst>(I)->isTailCall())
+ return false;
+
+ return DoSelectCall(I, 0);
+}
+
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+ const ImmutableCallSite &CS) {
+ if (Subtarget.is64Bit())
+ return 0;
+ if (Subtarget.isTargetWindows())
+ return 0;
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ return 0;
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS.paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Handle only C and fastcc calling conventions for now.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ return false;
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
+ return false;
+
+ // Fast-isel doesn't know about callee-pop yet.
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+ TM.Options.GuaranteedTailCallOpt))
+ return false;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
+ if (!CanLowerReturn)
+ return false;
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectCallAddress(Callee, CalleeAM))
+ return false;
+ unsigned CalleeOp = 0;
+ const GlobalValue *GV = 0;
+ if (CalleeAM.GV != 0) {
+ GV = CalleeAM.GV;
+ } else if (CalleeAM.Base.Reg != 0) {
+ CalleeOp = CalleeAM.Base.Reg;
+ } else
+ return false;
+
+ // Deal with call operands first.
+ SmallVector<const Value *, 8> ArgVals;
+ SmallVector<unsigned, 8> Args;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgVals.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
+ Value *ArgVal = *i;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
+ Type *ArgTy = ArgVal->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64())
+ CCInfo.AllocateStack(32, 8);
+
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+ .addImm(NumBytes);
+
+ // Process argument: walk the register/memloc assignments, inserting
+ // copies / loads.
+ SmallVector<unsigned, 4> RegArgs;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = Args[VA.getValNo()];
+ EVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::VExt:
+ // VExt has not been implemented, so this should be impossible to reach
+ // for now. However, fallback to Selection DAG isel once implemented.
+ return false;
+ case CCValAssign::Indirect:
+ // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
+ // support this.
+ return false;
+ }
+
+ if (VA.isRegLoc()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = RegInfo->getStackRegister();
+ AM.Disp = LocMemOffset;
+ const Value *ArgVal = ArgVals[VA.getValNo()];
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
+ if (!X86FastEmitStore(ArgVT, ArgVal, AM))
+ return false;
+ } else {
+ if (!X86FastEmitStore(ArgVT, Arg, AM))
+ return false;
+ }
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (Subtarget->isPICStyleGOT()) {
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
+ }
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const uint16_t XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
+ // Issue the call.
+ MachineInstrBuilder MIB;
+ if (CalleeOp) {
+ // Register-indirect call.
+ unsigned CallOpc;
+ if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addReg(CalleeOp);
+
+ } else {
+ // Direct call.
+ assert(GV && "Not a direct call");
+ unsigned CallOpc;
+ if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
+
+ // See if we need any target-specific flags on the GV operand.
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
+ }
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX, RegState::Implicit);
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL, RegState::Implicit);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i], RegState::Implicit);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(NumBytesCallee);
+
+ // Build info for return calling conv lowering code.
+ // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, I->getType(), RetTys);
+ for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+ EVT VT = RetTys[i];
+ MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+ for (unsigned j = 0; j != NumRegs; ++j) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = !CS.getInstruction()->use_empty();
+ if (CS.paramHasAttr(0, Attribute::SExt))
+ MyFlags.Flags.setSExt();
+ if (CS.paramHasAttr(0, Attribute::ZExt))
+ MyFlags.Flags.setZExt();
+ if (CS.paramHasAttr(0, Attribute::InReg))
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
+
+ // Now handle call return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
+ unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ unsigned CopyReg = ResultReg + i;
+
+ // If this is a call to a function that returns an fp value on the x87 fp
+ // stack, but where we prefer to use the value in xmm registers, copy it
+ // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(&X86::RFP80RegClass);
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+ CopyReg);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
+ }
+
+ if (CopyVT != RVLocs[i].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register. This is accomplished by storing the F80 value in memory and
+ // then loading it back. Ewww...
+ EVT ResVT = RVLocs[i].getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc)), FI)
+ .addReg(CopyReg);
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg + i), FI);
+ }
+ }
+
+ if (RVLocs.size())
+ UpdateValueMap(I, ResultReg, RVLocs.size());
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::Call:
+ return X86SelectCall(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
+ MVT VT;
+ if (!isTypeLegal(C->getType(), VT))
+ return 0;
+
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = &X86::GR8RegClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = &X86::GR16RegClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+ break;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return 0;
+ }
+
+ // Materialize addresses with LEA instructions.
+ if (isa<GlobalValue>(C)) {
+ X86AddressMode AM;
+ if (X86SelectAddress(C, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+ }
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ unsigned char OpFlag = 0;
+ if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOTOFF;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small) {
+ PICBase = X86::RIP;
+ }
+
+ // Create the load from the constant pool.
+ unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned ResultReg = createResultReg(RC);
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg),
+ MCPOffset, PICBase, OpFlag);
+
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but TargetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and TargetMaterializeAlloca.
+ if (!FuncInfo.StaticAllocaMap.count(C))
+ return 0;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM))
+ return 0;
+ unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return 0;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return 0;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return 0;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
+namespace llvm {
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 000000000000..0585b43a4640
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1768 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// pseudo registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
+//
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle() : Mask(0), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
+ MachineBasicBlock *MBB; // Current basic block
+
+ // The hardware keeps track of how many FP registers are live, so we have
+ // to model that exactly. Usually, each live register corresponds to an
+ // FP<n> register, but when dealing with calls, returns, and inline
+ // assembly, it is sometimes necessary to have live scratch registers.
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned StackTop; // The current top of the FP stack.
+
+ enum {
+ NumFPRegs = 16 // Including scratch pseudo-registers.
+ };
+
+ // For each live FP<n> register, point to its Stack[] entry.
+ // The first entries correspond to FP0-FP6, the rest are scratch registers
+ // used when we need slightly different live registers than what the
+ // register allocator thinks.
+ unsigned RegMap[NumFPRegs];
+
+ // Pending fixed registers - Inline assembly needs FP registers to appear
+ // in fixed stack slot positions. This is handled by copying FP registers
+ // to ST registers before the instruction, and copying back after the
+ // instruction.
+ //
+ // This is modeled with pending ST registers. NumPendingSTs is the number
+ // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+ // register that holds the ST value. The ST registers are not moved into
+ // place until immediately before the instruction that needs them.
+ //
+ // It can happen that we need an ST register to be live when no FP register
+ // holds the value:
+ //
+ // %ST0 = COPY %FP4<kill>
+ //
+ // When that happens, we allocate a scratch FP register to hold the ST
+ // value. That means every register in PendingST must be live.
+
+ unsigned NumPendingSTs;
+ unsigned char PendingST[8];
+
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dumpStack() const {
+ dbgs() << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ dbgs() << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ for (unsigned i = 0; i != NumPendingSTs; ++i)
+ dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
+ dbgs() << "\n";
+ }
+#endif
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < NumFPRegs && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() const {
+ for (int i = NumFPRegs - 1; i >= 8; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+ static bool isScratchReg(unsigned RegNo) {
+ return RegNo > 8 && RegNo < NumFPRegs;
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
+ unsigned getStackEntry(unsigned STi) const {
+ if (STi >= StackTop)
+ report_fatal_error("Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack.
+ void pushReg(unsigned Reg) {
+ assert(Reg < NumFPRegs && "Register number out of range!");
+ if (StackTop >= 8)
+ report_fatal_error("Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ if (isAtTop(RegNo)) return;
+
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state.
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+ ++NumFXCH;
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+ // Check if a COPY instruction is using FP registers.
+ static bool isFPCopy(MachineInstr *MI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ return X86::RFP80RegClass.contains(DstReg) ||
+ X86::RFP80RegClass.contains(SrcReg);
+ }
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isReg() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ Bundles = &getAnalysis<EdgeBundles>();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ SmallPtrSet<MachineBasicBlock*, 8> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ // Process any unreachable blocks in arbitrary order now.
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ LiveBundles.clear();
+
+ return Changed;
+}
+
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ LiveBundles.resize(Bundles->getNumBundles());
+
+ // Gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+ }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+ NumPendingSTs = 0;
+
+ setupBlockStack();
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ uint64_t Flags = MI->getDesc().TSFlags;
+
+ unsigned FPInstClass = Flags & X86II::FPTypeMask;
+ if (MI->isInlineAsm())
+ FPInstClass = X86II::SpecialFP;
+
+ if (MI->isCopy() && isFPCopy(MI))
+ FPInstClass = X86II::SpecialFP;
+
+ if (MI->isImplicitDef() &&
+ X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
+ FPInstClass = X86II::SpecialFP;
+
+ if (FPInstClass == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DEBUG(dbgs() << "\nFPInst:\t" << *MI);
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (FPInstClass) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: llvm_unreachable("Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ dbgs() << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ dbgs() << "Inserted instructions:\n\t";
+ Start->print(dbgs(), &MF.getTarget());
+ while (++Start != llvm::next(I)) {}
+ }
+ dumpStack();
+ );
+ (void)PrevMI;
+
+ Changed = true;
+ }
+
+ finishBlockStack();
+
+ return Changed;
+}
+
+/// setupBlockStack - Use the live bundles to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ uint16_t from;
+ uint16_t to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
+ const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ABS_Fp80 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m64 , X86::ADD_F64m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CHS_Fp80 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVB_Fp80 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVE_Fp80 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::CMOVP_Fp80 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::COS_Fp80 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m64 , X86::DIV_F64m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp16m80 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp32m80 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ILD_Fp64m80 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp16m80 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp32m80 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::IST_Fp64m80 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp080 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp180 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp32m64 , X86::LD_F32m },
+ { X86::LD_Fp32m80 , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::LD_Fp64m80 , X86::LD_F64m },
+ { X86::LD_Fp80m , X86::LD_F80m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m64 , X86::MUL_F64m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SIN_Fp80 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::SQRT_Fp80 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::ST_Fp80m32 , X86::ST_F32m },
+ { X86::ST_Fp80m64 , X86::ST_F64m },
+ { X86::ST_FpP80m , X86::ST_FP80m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m64 , X86::SUB_F64m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::TST_Fp80 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr80 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr80 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ MachineInstr* MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ ASSERT_SORTED(PopTable);
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setDesc(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ while (StackTop) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ if (FixCount > 0)
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64 ||
+ MI->getOpcode() == X86::IST_Fp64m80 ||
+ MI->getOpcode() == X86::ISTT_Fp16m80 ||
+ MI->getOpcode() == X86::ISTT_Fp32m80 ||
+ MI->getOpcode() == X86::ISTT_Fp64m80 ||
+ MI->getOpcode() == X86::ST_FpP80m)) {
+ duplicateToTop(Reg, getScratchReg(), I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m ||
+ MI->getOpcode() == X86::ST_FP80m) {
+ if (StackTop == 0)
+ report_fatal_error("Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+#ifndef NDEBUG
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ if (StackTop == 0)
+ report_fatal_error("Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::ADD_Fp80 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::DIV_Fp80 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::MUL_Fp80 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+ { X86::SUB_Fp80 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::DIV_Fp80 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+ { X86::SUB_Fp80 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp80 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp80 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::ADD_Fp80 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::DIV_Fp80 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::MUL_Fp80 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+ { X86::SUB_Fp80 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+ MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SpecialFP instruction!");
+ case TargetOpcode::COPY: {
+ // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+ const MachineOperand &MO1 = MI->getOperand(1);
+ const MachineOperand &MO0 = MI->getOperand(0);
+ unsigned DstST = MO0.getReg() - X86::ST0;
+ unsigned SrcST = MO1.getReg() - X86::ST0;
+ bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+ // ST = COPY FP. Set up a pending ST register.
+ if (DstST < 8) {
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+ // Unallocated STs are marked as the nonexistent FP255.
+ while (NumPendingSTs <= DstST)
+ PendingST[NumPendingSTs++] = NumFPRegs;
+
+ // STi could still be live from a previous inline asm.
+ if (isScratchReg(PendingST[DstST])) {
+ DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+ << '\n');
+ freeStackSlotBefore(MI, PendingST[DstST]);
+ }
+
+ // When the source is killed, allocate a scratch FP register.
+ if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
+ unsigned Slot = getSlot(SrcFP);
+ unsigned SR = getScratchReg();
+ PendingST[DstST] = SR;
+ Stack[Slot] = SR;
+ RegMap[SR] = Slot;
+ } else
+ PendingST[DstST] = SrcFP;
+ break;
+ }
+
+ // FP = COPY ST. Extract fixed stack value.
+ // Any instruction defining ST registers must have assigned them to a
+ // scratch register.
+ if (SrcST < 8) {
+ unsigned DstFP = getFPReg(MO0);
+ assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+ assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+ unsigned SrcFP = PendingST[SrcST];
+ assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+ assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+ // DstFP steals the stack slot from SrcFP.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+
+ // Always treat the ST as killed.
+ PendingST[SrcST] = NumFPRegs;
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+ break;
+ }
+
+ // FP <- FP copy.
+ unsigned DstFP = getFPReg(MO0);
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ if (KillsSrc) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+ } else {
+ // For COPY we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcFP, DstFP, I);
+ }
+ break;
+ }
+
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
+ case X86::FpPOP_RETVAL: {
+ // The FpPOP_RETVAL instruction is used after calls that return a value on
+ // the floating point stack. We cannot model this with ST defs since CALL
+ // instructions have fixed clobber lists. This instruction is interpreted
+ // to mean that there is one more live register on the stack than we
+ // thought.
+ //
+ // This means that StackTop does not match the hardware stack between a
+ // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
+ // between CALL and FpPOP_RETVAL as long as they don't overflow the
+ // hardware stack.
+ unsigned DstFP = getFPReg(MI->getOperand(0));
+
+ // Move existing stack elements up to reflect reality.
+ assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+ if (StackTop) {
+ std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ ++RegMap[i];
+ }
+ ++StackTop;
+
+ // DstFP is the new bottom of the stack.
+ Stack[0] = DstFP;
+ RegMap[DstFP] = 0;
+
+ // DstFP will be killed by processBasicBlock if this was a dead def.
+ break;
+ }
+
+ case TargetOpcode::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr.
+ //
+ // There are special rules for x87 inline assembly. The compiler must know
+ // exactly how many registers are popped and pushed implicitly by the asm.
+ // Otherwise it is not possible to restore the stack state after the inline
+ // asm.
+ //
+ // There are 3 kinds of input operands:
+ //
+ // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+ // popped input operand must be in a fixed stack slot, and it is either
+ // tied to an output operand, or in the clobber list. The MI has ST use
+ // and def operands for these inputs.
+ //
+ // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+ // preserved by the inline asm. The fixed stack slots must be STn-STm
+ // following the popped inputs. A fixed input operand cannot be tied to
+ // an output or appear in the clobber list. The MI has ST use operands
+ // and no defs for these inputs.
+ //
+ // 3. Preserved inputs. These inputs use the "f" constraint which is
+ // represented as an FP register. The inline asm won't change these
+ // stack slots.
+ //
+ // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+ // registers do not count as output operands. The inline asm changes the
+ // stack as if it popped all the popped inputs and then pushed all the
+ // output operands.
+
+ // Scan the assembly for ST registers used, defined and clobbered. We can
+ // only tell clobbers from defs by looking at the asm descriptor.
+ unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+ unsigned NumOps = 0;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+ unsigned Flags = MI->getOperand(i).getImm();
+ NumOps = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumOps != 1)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i + 1);
+ if (!MO.isReg())
+ continue;
+ unsigned STReg = MO.getReg() - X86::ST0;
+ if (STReg >= 8)
+ continue;
+
+ switch (InlineAsm::getKind(Flags)) {
+ case InlineAsm::Kind_RegUse:
+ STUses |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ STDefs |= (1u << STReg);
+ if (MO.isDead())
+ STDeadDefs |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_Clobber:
+ STClobbers |= (1u << STReg);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (STUses && !isMask_32(STUses))
+ MI->emitError("fixed input regs must be last on the x87 stack");
+ unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+ // Defs must be contiguous from the stack top. ST0-STn.
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
+ unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+ // So must the clobbered stack slots. ST0-STm, m >= n.
+ if (STClobbers && !isMask_32(STDefs | STClobbers))
+ MI->emitError("clobbers must be last on the x87 stack");
+
+ // Popped inputs are the ones that are also clobbered or defined.
+ unsigned STPopped = STUses & (STDefs | STClobbers);
+ if (STPopped && !isMask_32(STPopped))
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
+ unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+ DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+ << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+ // Scan the instruction for FP uses corresponding to "f" constraints.
+ // Collect FP registers to kill afer the instruction.
+ // Always kill all the scratch regs.
+ unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+ unsigned FPUsed = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ if (!Op.isUse())
+ MI->emitError("illegal \"f\" output constraint");
+ unsigned FPReg = getFPReg(Op);
+ FPUsed |= 1U << FPReg;
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ FPKills |= 1U << FPReg;
+ }
+
+ // The popped inputs will be killed by the instruction, so duplicate them
+ // if the FP register needs to be live after the instruction, or if it is
+ // used in the instruction itself. We effectively treat the popped inputs
+ // as early clobbers.
+ for (unsigned i = 0; i < NumSTPopped; ++i) {
+ if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+ continue;
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+ PendingST[i] = SR;
+ }
+
+ // Make sure we have a unique live register for every fixed use. Some of
+ // them could be undef uses, and we need to emit LD_F0 instructions.
+ for (unsigned i = 0; i < NumSTUses; ++i) {
+ if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+ // Check for shared assignments.
+ for (unsigned j = 0; j < i; ++j) {
+ if (PendingST[j] != PendingST[i])
+ continue;
+ // STi and STj are inn the same register, create a copy.
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i])
+ << " to avoid collision with ST" << j << '\n');
+ PendingST[i] = SR;
+ }
+ continue;
+ }
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(SR);
+ PendingST[i] = SR;
+ if (NumPendingSTs == i)
+ ++NumPendingSTs;
+ }
+ assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+ // Now we can rearrange the live registers to match what was requested.
+ shuffleStackTop(PendingST, NumPendingSTs, I);
+ DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+ // With the stack layout fixed, rewrite the FP registers.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ unsigned FPReg = getFPReg(Op);
+ Op.setReg(getSTReg(FPReg));
+ }
+
+ // Simulate the inline asm popping its inputs and pushing its outputs.
+ StackTop -= NumSTPopped;
+
+ // Hold the fixed output registers in scratch FP registers. They will be
+ // transferred to real FP registers by copies.
+ NumPendingSTs = 0;
+ for (unsigned i = 0; i < NumSTDefs; ++i) {
+ unsigned SR = getScratchReg();
+ pushReg(SR);
+ FPKills &= ~(1u << SR);
+ }
+ for (unsigned i = 0; i < NumSTDefs; ++i)
+ PendingST[NumPendingSTs++] = getStackEntry(i);
+ DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+ // If any of the ST defs were dead, pop them immediately. Our caller only
+ // handles dead FP defs.
+ MachineBasicBlock::iterator InsertPt = MI;
+ for (unsigned i = 0; STDefs & (1u << i); ++i) {
+ if (!(STDeadDefs & (1u << i)))
+ continue;
+ freeStackSlotAfter(InsertPt, PendingST[i]);
+ PendingST[i] = NumFPRegs;
+ }
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+
+ // If this asm kills any FP registers (is the last use of them) we must
+ // explicitly emit pop instructions for them. Do this now after the asm has
+ // executed so that the ST(x) numbers are not off (which would happen if we
+ // did this inline with operand rewriting).
+ //
+ // Note: this might be a non-optimal pop sequence. We might be able to do
+ // better by trying to pop in stack order or something.
+ while (FPKills) {
+ unsigned FPReg = CountTrailingZeros_32(FPKills);
+ if (isLive(FPReg))
+ freeStackSlotAfter(InsertPt, FPReg);
+ FPKills &= ~(1U << FPReg);
+ }
+ // Don't delete the inline asm!
+ return;
+ }
+
+ case X86::WIN_FTOL_32:
+ case X86::WIN_FTOL_64: {
+ // Push the operand into ST0.
+ MachineOperand &Op = MI->getOperand(0);
+ assert(Op.isUse() && Op.isReg() &&
+ Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
+ unsigned FPReg = getFPReg(Op);
+ if (Op.isKill())
+ moveToTop(FPReg, I);
+ else
+ duplicateToTop(FPReg, FPReg, I);
+
+ // Emit the call. This will pop the operand.
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("_ftol2")
+ .addReg(X86::ST0, RegState::ImplicitKill)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EDX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ --StackTop;
+
+ break;
+ }
+
+ case X86::RET:
+ case X86::RETI:
+ // If RET has an FP register use operand, pass the first one in ST(0) and
+ // the second one in ST(1).
+
+ // Find the register operands.
+ unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+ unsigned LiveMask = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ // FP Register uses must be kills unless there are two uses of the same
+ // register, in which case only one will be a kill.
+ assert(Op.isUse() &&
+ (Op.isKill() || // Marked kill.
+ getFPReg(Op) == FirstFPRegOp || // Second instance.
+ MI->killsRegister(Op.getReg())) && // Later use is marked kill.
+ "Ret only defs operands, and values aren't live beyond it");
+
+ if (FirstFPRegOp == ~0U)
+ FirstFPRegOp = getFPReg(Op);
+ else {
+ assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+ SecondFPRegOp = getFPReg(Op);
+ }
+ LiveMask |= (1 << getFPReg(Op));
+
+ // Remove the operand so that later passes don't see it.
+ MI->RemoveOperand(i);
+ --i, --e;
+ }
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
+ // There are only four possibilities here:
+ // 1) we are returning a single FP value. In this case, it has to be in
+ // ST(0) already, so just declare success by removing the value from the
+ // FP Stack.
+ if (SecondFPRegOp == ~0U) {
+ // Assert that the top of stack contains the right FP register.
+ assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+ "Top of stack not the right register for RET!");
+
+ // Ok, everything is good, mark the value as not being on the stack
+ // anymore so that our assertion about the stack being empty at end of
+ // block doesn't fire.
+ StackTop = 0;
+ return;
+ }
+
+ // Otherwise, we are returning two values:
+ // 2) If returning the same value for both, we only have one thing in the FP
+ // stack. Consider: RET FP1, FP1
+ if (StackTop == 1) {
+ assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+ "Stack misconfiguration for RET!");
+
+ // Duplicate the TOS so that we return it twice. Just pick some other FPx
+ // register to hold it.
+ unsigned NewReg = getScratchReg();
+ duplicateToTop(FirstFPRegOp, NewReg, MI);
+ FirstFPRegOp = NewReg;
+ }
+
+ /// Okay we know we have two different FPx operands now:
+ assert(StackTop == 2 && "Must have two values live!");
+
+ /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+ /// in ST(1). In this case, emit an fxch.
+ if (getStackEntry(0) == SecondFPRegOp) {
+ assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+ moveToTop(FirstFPRegOp, MI);
+ }
+
+ /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+ /// ST(1). Just remove both from our understanding of the stack and return.
+ assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+ assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+ StackTop = 0;
+ return;
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 000000000000..54cbd40274a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,1837 @@
+//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RegInfo = TM.getRegisterInfo();
+
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit() || MMI.callsEHReturn());
+}
+
+static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+static unsigned getLEArOpcode(unsigned IsLP64) {
+ return IsLP64 ? X86::LEA64r : X86::LEA32r;
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const uint16_t CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX, 0
+ };
+
+ static const uint16_t CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11, 0
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<uint16_t, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ }
+
+ const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, bool IsLP64, bool UseLEA,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc;
+ if (UseLEA)
+ Opc = getLEArOpcode(IsLP64);
+ else
+ Opc = isSub
+ ? getSUBriOpcode(IsLP64, Offset)
+ : getADDriOpcode(IsLP64, Offset);
+
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
+ MachineInstr *MI = NULL;
+
+ if (UseLEA) {
+ MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ StackPtr, false, isSub ? -ThisVal : ThisVal);
+ } else {
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+
+ Offset -= ThisVal;
+ }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ MCSymbol *Label,
+ unsigned FramePtr) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -RegInfo->getSlotSize();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minimum due to stack growth).
+ int64_t MaxOffset = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+}
+
+/// getCompactUnwindRegNum - Get the compact unwind number for a given
+/// register. The number corresponds to the enum lists in
+/// compact_unwind_encoding.h.
+static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) {
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+}
+
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
+/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
+/// used with frameless stacks. It is passed the number of registers to be saved
+/// and an array of the registers saved.
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ unsigned RegCount, bool Is64Bit) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ static const uint16_t CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const uint16_t CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::swap(SavedRegs[0], SavedRegs[5]);
+ std::swap(SavedRegs[1], SavedRegs[4]);
+ std::swap(SavedRegs[2], SavedRegs[3]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+}
+
+/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
+/// compact encoding with a frame pointer.
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ bool Is64Bit) {
+ static const uint16_t CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const uint16_t CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ // Encode the registers in the order they were saved, 3-bits per register. The
+ // registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == 0) continue;
+
+ int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for each
+ // register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
+ return RegEnc;
+}
+
+uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ bool Is64Bit = STI.is64Bit();
+ bool HasFP = hasFP(MF);
+
+ unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
+
+ unsigned OffsetSize = (Is64Bit ? 8 : 4);
+
+ unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+ unsigned PushInstrSize = 1;
+ unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
+ unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
+
+ unsigned StackDivide = (Is64Bit ? 8 : 4);
+
+ unsigned InstrOffset = 0;
+ unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
+
+ MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
+ bool ExpectEnd = false;
+ for (MachineBasicBlock::iterator
+ MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opc = MI.getOpcode();
+ if (Opc == X86::PROLOG_LABEL) continue;
+ if (!MI.getFlag(MachineInstr::FrameSetup)) break;
+
+ // We don't exect any more prolog instructions.
+ if (ExpectEnd) return 0;
+
+ if (Opc == PushInstr) {
+ // If there are too many saved registers, we cannot use compact encoding.
+ if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
+
+ SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+ StackAdjust += OffsetSize;
+ InstrOffset += PushInstrSize;
+ } else if (Opc == MoveInstr) {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg != FramePtr || SrcReg != StackPtr)
+ return 0;
+
+ StackAdjust = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ SavedRegIdx = 0;
+ InstrOffset += MoveInstrSize;
+ } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+ if (StackSize)
+ // We already have a stack size.
+ return 0;
+
+ if (!MI.getOperand(0).isReg() ||
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
+ MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
+ // We need this to be a stack adjustment pointer. Something like:
+ //
+ // %RSP<def> = SUB64ri8 %RSP, 48
+ return 0;
+
+ StackSize = MI.getOperand(2).getImm() / StackDivide;
+ SubtractInstrIdx += InstrOffset;
+ ExpectEnd = true;
+ }
+ }
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+ StackAdjust /= StackDivide;
+ if (HasFP) {
+ if ((StackAdjust & 0xFF) != StackAdjust)
+ // Offset was too big for compact encoding.
+ return 0;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
+ if (RegEnc == ~0U) return 0;
+
+ CompactUnwindEncoding |= 0x01000000;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & 0x7FFF;
+ } else {
+ ++StackAdjust;
+ uint32_t TotalStackSize = StackAdjust + StackSize;
+ if ((TotalStackSize & 0xFF) == TotalStackSize) {
+ // Frameless stack with a small stack size.
+ CompactUnwindEncoding |= 0x02000000;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
+ } else {
+ if ((StackAdjust & 0x7) != StackAdjust)
+ // The extra stack adjustments are too big for us to handle.
+ return 0;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= 0x03000000;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack adjustments (done via push instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
+ }
+
+ // Encode the number of registers saved.
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc =
+ encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+ Is64Bit);
+ if (RegEnc == ~0U) return 0;
+
+ // Encode the register encoding.
+ CompactUnwindEncoding |= RegEnc & 0x3FF;
+ }
+
+ return CompactUnwindEncoding;
+}
+
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
+ re = MRI.reg_end(); ri != re; ++ri)
+ if (ri->isCopy())
+ return true;
+
+ return false;
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ Fn->needsUnwindTableEntry();
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
+ bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
+ bool IsWin64 = STI.isTargetWin64();
+ bool UseLEA = STI.useLeaForSP();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+ unsigned BasePtr = RegInfo->getBaseRegister();
+ DebugLoc DL;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone). We also check that we don't
+ // push and pop from the stack.
+ if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoRedZone) &&
+ !RegInfo->needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64 && // Win64 has no Red Zone
+ !usesTheStack(MF) && // Don't push and pop.
+ !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ uint64_t NumBytes = 0;
+ int stackGrowth = -SlotSize;
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers are pushed on stack before the stack
+ // is realigned.
+ FrameSize -= X86FI->getCalleeSavedFrameSize();
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Get the offset of the stack slot for the EBP register, which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP/RBP into the appropriate stack slot.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark the place where EBP/RBP was saved.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Update EBP with the new base value.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
+ MBBI->setFlag(MachineInstr::FrameSetup);
+ ++MBBI;
+
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
+ // Realign stack after we pushed callee-saved registers (so that we'll be
+ // able to calculate their offsets from the frame pointer).
+
+ // NOTE: We push the registers before realigning the stack, so
+ // vector callee-saved (xmm) registers may be saved w/o proper
+ // alignment in this way. However, currently these regs are saved in
+ // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
+ // this shouldn't be a problem.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert(HasFP && "There should be a frame pointer if stack is realigned.");
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ // Adjust stack pointer: ESP -= numbytes.
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ const char *StackProbeSymbol;
+ bool isSPUpdateNeeded = false;
+
+ if (Is64Bit) {
+ if (STI.isTargetCygMing())
+ StackProbeSymbol = "___chkstk";
+ else {
+ StackProbeSymbol = "__chkstk";
+ isSPUpdateNeeded = true;
+ }
+ } else if (STI.isTargetCygMing())
+ StackProbeSymbol = "_alloca";
+ else
+ StackProbeSymbol = "_chkstk";
+
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ if (isEAXAlive) {
+ // Sanity check that EAX is not livein for this function.
+ // It should not be, so throw an assert.
+ assert(!Is64Bit && "EAX is livein in x64 case!");
+
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ if (Is64Bit) {
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+ // We'll also use 4 already allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // MSVC x64's __chkstk needs to adjust %rsp.
+ // FIXME: %rax preserves the offset and should be available.
+ if (isSPUpdateNeeded)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
+ UseLEA, TII, *RegInfo);
+
+ if (isEAXAlive) {
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
+ UseLEA, TII, *RegInfo);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF)) {
+ // Update the frame pointer with the current stack pointer.
+ unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(Label);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+ }
+
+ // Darwin 10.7 and greater has support for compact unwind encoding.
+ if (STI.getTargetTriple().isMacOSX() &&
+ !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
+ MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+ bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
+ bool UseLEA = STI.useLeaForSP();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ break; // These are ok
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers were pushed on stack before the stack
+ // was realigned.
+ FrameSize -= CSSize;
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - CSSize;
+ }
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+ !PI->isTerminator())
+ break;
+
+ --MBBI;
+ }
+ MachineBasicBlock::iterator FirstCSPop = MBBI;
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned.
+ if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ if (RegInfo->needsStackRealignment(MF))
+ MBBI = FirstCSPop;
+ if (CSSize != 0) {
+ unsigned Opc = getLEArOpcode(IsLP64);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ } else {
+ unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+ TII, *RegInfo);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode == X86::TCRETURNmi ||
+ RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+ RetOpcode == X86::TCRETURNmi64) {
+ bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+ UseLEA, TII, *RegInfo);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+ ? X86::TAILJMPm : X86::TAILJMPm64));
+ for (unsigned i = 0; i != 5; ++i)
+ MIB.addOperand(MBBI->getOperand(i));
+ } else if (RetOpcode == X86::TCRETURNri64) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ NewMI->copyImplicitOps(MF, MBBI);
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = MBB.getLastNonDebugInstr();
+
+ // Check for possible merge with preceding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+ *RegInfo);
+ }
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RegInfo->hasBasePointer(MF)) {
+ assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
+ if (FI < 0) {
+ // Skip the saved EBP.
+ return Offset + RegInfo->getSlotSize();
+ } else {
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+ return Offset + StackSize;
+ }
+ } else if (RegInfo->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ return Offset + RegInfo->getSlotSize();
+ } else {
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RegInfo->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ // We can't calculate offset from frame pointer if the stack is realigned,
+ // so enforce usage of stack/base pointer. The base pointer is used when we
+ // have dynamic allocas in addition to dynamic realignment.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ else if (RegInfo->needsStackRealignment(MF))
+ FrameReg = RegInfo->getStackRegister();
+ else
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ // Push GPRs. It increases frame size.
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ (void)FrameIdx;
+ }
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+}
+
+static bool
+HasNestArgument(const MachineFunction *MF) {
+ const Function *F = MF->getFunction();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++) {
+ if (I->hasNestAttr())
+ return true;
+ }
+ return false;
+}
+
+/// GetScratchRegister - Get a temp register for performing work in the
+/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
+/// and the properties of the function either one or two registers will be
+/// needed. Set primary to true for the first register, false for the second.
+static unsigned
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+
+ // Erlang stuff.
+ if (CallingConvention == CallingConv::HiPE) {
+ if (Is64Bit)
+ return Primary ? X86::R14 : X86::R13;
+ else
+ return Primary ? X86::EBX : X86::EDI;
+ }
+
+ if (Is64Bit)
+ return Primary ? X86::R11 : X86::R12;
+
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall ||
+ CallingConvention == CallingConv::Fast) {
+ if (IsNested)
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return Primary ? X86::EAX : X86::ECX;
+ }
+ if (IsNested)
+ return Primary ? X86::EDX : X86::EAX;
+ return Primary ? X86::ECX : X86::EAX;
+}
+
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
+void
+X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ uint64_t StackSize;
+ bool Is64Bit = STI.is64Bit();
+ unsigned TlsReg, TlsOffset;
+ DebugLoc DL;
+
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "Scratch register is live-in");
+
+ if (MF.getFunction()->isVarArg())
+ report_fatal_error("Segmented stacks do not support vararg functions.");
+ if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
+ !STI.isTargetWin32() && !STI.isTargetFreeBSD())
+ report_fatal_error("Segmented stacks not supported on this platform.");
+
+ MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool IsNested = false;
+
+ // We need to know if the function has a nest argument only in 64 bit mode.
+ if (Is64Bit)
+ IsNested = HasNestArgument(&MF);
+
+ // The MOV R10, RAX needs to be in a different block, since the RET we emit in
+ // allocMBB needs to be last (terminating) instruction.
+
+ for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
+ e = prologueMBB.livein_end(); i != e; i++) {
+ allocMBB->addLiveIn(*i);
+ checkMBB->addLiveIn(*i);
+ }
+
+ if (IsNested)
+ allocMBB->addLiveIn(X86::R10);
+
+ MF.push_front(allocMBB);
+ MF.push_front(checkMBB);
+
+ // Eventually StackSize will be calculated by a link-time pass; which will
+ // also decide whether checking code needs to be injected into this particular
+ // prologue.
+ StackSize = MFI->getStackSize();
+
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
+ // Read the limit off the current stacklet off the stack_guard location.
+ if (Is64Bit) {
+ if (STI.isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (STI.isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ } else if (STI.isTargetFreeBSD()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x18;
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
+
+ if (CompareStackPointer)
+ ScratchReg = X86::RSP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+
+ BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else {
+ if (STI.isTargetLinux()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
+ } else if (STI.isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x48 + 90*4;
+ } else if (STI.isTargetWin32()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x14; // pvArbitrary, reserved for application use
+ } else if (STI.isTargetFreeBSD()) {
+ report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
+
+ if (CompareStackPointer)
+ ScratchReg = X86::ESP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+
+ if (STI.isTargetLinux() || STI.isTargetWin32()) {
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (STI.isTargetDarwin()) {
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
+
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
+ }
+
+ // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
+ // It jumps to normal execution of the function body.
+ BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
+
+ // On 32 bit we first push the arguments size and then the frame size. On 64
+ // bit, we pass the stack frame size in r10 and the argument size in r11.
+ if (Is64Bit) {
+ // Functions with nested arguments use R10, so it needs to be saved across
+ // the call to _morestack
+
+ if (IsNested)
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ .addImm(StackSize);
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ .addImm(X86FI->getArgumentStackSize());
+ MF.getRegInfo().setPhysRegUsed(X86::R10);
+ MF.getRegInfo().setPhysRegUsed(X86::R11);
+ } else {
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(StackSize);
+ }
+
+ // __morestack is in libgcc
+ if (Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack");
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack");
+
+ if (IsNested)
+ BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
+
+ allocMBB->addSuccessor(&prologueMBB);
+
+ checkMBB->addSuccessor(allocMBB);
+ checkMBB->addSuccessor(&prologueMBB);
+
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
+
+/// Erlang programs may need a special prologue to handle the stack size they
+/// might need at runtime. That is because Erlang/OTP does not implement a C
+/// stack but uses a custom implementation of hybrid stack/heap architecture.
+/// (for more information see Eric Stenman's Ph.D. thesis:
+/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+///
+/// CheckStack:
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// OldStart:
+/// ...
+/// IncStack:
+/// call inc_stack # doubles the stack space
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const bool Is64Bit = STI.is64Bit();
+ DebugLoc DL;
+ // HiPE-specific values
+ const unsigned HipeLeafWords = 24;
+ const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
+ const unsigned Guaranteed = HipeLeafWords * SlotSize;
+ unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
+ MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
+ unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
+
+ assert(STI.isTargetLinux() &&
+ "HiPE prologue is only supported on Linux operating systems.");
+
+ // Compute the largest caller's frame that is needed to fit the callees'
+ // frames. This 'MaxStack' is computed from:
+ //
+ // a) the fixed frame size, which is the space needed for all spilled temps,
+ // b) outgoing on-stack parameter areas, and
+ // c) the minimum stack space this function needs to make available for the
+ // functions it calls (a tunable ABI property).
+ if (MFI->hasCalls()) {
+ unsigned MoreStackForCalls = 0;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
+ MBBI != MBBE; ++MBBI)
+ for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
+ MI != ME; ++MI) {
+ if (!MI->isCall())
+ continue;
+
+ // Get callee operand.
+ const MachineOperand &MO = MI->getOperand(0);
+
+ // Only take account of global function calls (no closures etc.).
+ if (!MO.isGlobal())
+ continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // Do not update 'MaxStack' for primitive and built-in functions
+ // (encoded with names either starting with "erlang."/"bif_" or not
+ // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
+ // "_", such as the BIF "suspend_0") as they are executed on another
+ // stack.
+ if (F->getName().find("erlang.") != StringRef::npos ||
+ F->getName().find("bif_") != StringRef::npos ||
+ F->getName().find_first_of("._") == StringRef::npos)
+ continue;
+
+ unsigned CalleeStkArity =
+ F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
+ if (HipeLeafWords - 1 > CalleeStkArity)
+ MoreStackForCalls = std::max(MoreStackForCalls,
+ (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+ }
+ MaxStack += MoreStackForCalls;
+ }
+
+ // If the stack frame needed is larger than the guaranteed then runtime checks
+ // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
+ if (MaxStack > Guaranteed) {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
+
+ for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
+ E = prologueMBB.livein_end(); I != E; I++) {
+ stackCheckMBB->addLiveIn(*I);
+ incStackMBB->addLiveIn(*I);
+ }
+
+ MF.push_front(incStackMBB);
+ MF.push_front(stackCheckMBB);
+
+ unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
+ unsigned LEAop, CMPop, CALLop;
+ if (Is64Bit) {
+ SPReg = X86::RSP;
+ PReg = X86::RBP;
+ LEAop = X86::LEA64r;
+ CMPop = X86::CMP64rm;
+ CALLop = X86::CALL64pcrel32;
+ SPLimitOffset = 0x90;
+ } else {
+ SPReg = X86::ESP;
+ PReg = X86::EBP;
+ LEAop = X86::LEA32r;
+ CMPop = X86::CMP32rm;
+ CALLop = X86::CALLpcrel32;
+ SPLimitOffset = 0x4c;
+ }
+
+ ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "HiPE prologue scratch register is live-in");
+
+ // Create new MBB for StackCheck:
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ // SPLimitOffset is in a fixed heap location (pointed by BP).
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+
+ // Create new MBB for IncStack:
+ BuildMI(incStackMBB, DL, TII.get(CALLop)).
+ addExternalSymbol("inc_stack_0");
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+
+ stackCheckMBB->addSuccessor(&prologueMBB, 99);
+ stackCheckMBB->addSuccessor(incStackMBB, 1);
+ incStackMBB->addSuccessor(&prologueMBB, 99);
+ incStackMBB->addSuccessor(incStackMBB, 1);
+ }
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
+
+void X86FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+ unsigned StackPtr = RegInfo.getStackRegister();
+ bool reseveCallFrame = hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ bool IsLP64 = STI.isTarget64BitLP64();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
+
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(IsLP64, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
+ MBB.insert(I, New);
+ }
+
+ return;
+ }
+
+ if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // We are not tracking the stack pointer adjustment by the callee, so make
+ // sure we restore the stack pointer immediately after the call, there may
+ // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ MachineBasicBlock::iterator B = MBB.begin();
+ while (I != B && !llvm::prior(I)->isCall())
+ --I;
+ MBB.insert(I, New);
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 000000000000..3f08b9a2e8d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,76 @@
+//===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void adjustForSegmentedStacks(MachineFunction &MF) const;
+
+ void adjustForHiPEPrologue(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..6041669f8182
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,2728 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDValue's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ // This is really a union, discriminated by BaseType!
+ SDValue Base_Reg;
+ int Base_FrameIndex;
+
+ unsigned Scale;
+ SDValue IndexReg;
+ int32_t Disp;
+ SDValue Segment;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+ unsigned char SymbolFlags; // X86II::MO_*
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
+ SymbolFlags(X86II::MO_NO_FLAG) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+ }
+
+ bool hasBaseOrIndexReg() const {
+ return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ }
+
+ /// isRIPRelative - Return true if this addressing mode is already RIP
+ /// relative.
+ bool isRIPRelative() const {
+ if (BaseType != RegBase) return false;
+ if (RegisterSDNode *RegNode =
+ dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
+ return RegNode->getReg() == X86::RIP;
+ return false;
+ }
+
+ void setBaseReg(SDValue Reg) {
+ BaseType = RegBase;
+ Base_Reg = Reg;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << "X86ISelAddressMode " << this << '\n';
+ dbgs() << "Base_Reg ";
+ if (Base_Reg.getNode() != 0)
+ Base_Reg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
+ << " Scale" << Scale << '\n'
+ << "IndexReg ";
+ if (IndexReg.getNode() != 0)
+ IndexReg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Disp " << Disp << '\n'
+ << "GV ";
+ if (GV)
+ GV->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " CP ";
+ if (CP)
+ CP->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << '\n'
+ << "ES ";
+ if (ES)
+ dbgs() << ES;
+ else
+ dbgs() << "nul";
+ dbgs() << " JT" << JT << " Align" << Align << '\n';
+ }
+#endif
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class X86DAGToDAGISel : public SelectionDAGISel {
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ const X86TargetLowering &X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
+ public:
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ X86Lowering(*tm.getTargetLowering()),
+ Subtarget(&tm.getSubtarget<X86Subtarget>()),
+ OptForSize(false) {}
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ virtual void EmitFunctionEntryCode();
+
+ virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+ virtual void PreprocessISelDAG();
+
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+
+ bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
+ bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment,
+ SDValue &NodeWithChain);
+
+ bool TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+ AM.Base_Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
+ AM.SymbolFlags);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+ AM.Align, AM.Disp, AM.SymbolFlags);
+ else if (AM.ES) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+ } else if (AM.JT != -1) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+ } else if (AM.BlockAddr)
+ Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
+ AM.SymbolFlags);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+ if (AM.Segment.getNode())
+ Segment = AM.Segment;
+ else
+ Segment = CurDAG->getRegister(0, MVT::i32);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - Return an SDNode that returns the value of
+ /// the global base register. Output instructions required to
+ /// initialize the global base register, if necessary.
+ ///
+ SDNode *getGlobalBaseReg();
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const X86TargetMachine &getTargetMachine() const {
+ return static_cast<const X86TargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine().getInstrInfo();
+ }
+ };
+}
+
+
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ if (!N.hasOneUse())
+ return false;
+
+ if (N.getOpcode() != ISD::LOAD)
+ return true;
+
+ // If N is a load, do additional profitability checks.
+ if (U == Root) {
+ switch (U->getOpcode()) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::AND:
+ case X86ISD::XOR:
+ case X86ISD::OR:
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
+ // If the other operand is a 8-bit immediate we should fold the immediate
+ // instead. This reduces code size.
+ // e.g.
+ // movl 4(%esp), %eax
+ // addl $4, %eax
+ // vs.
+ // movl $4, %eax
+ // addl 4(%esp), %eax
+ // The former is 2 bytes shorter. In case where the increment is 1, then
+ // the saving can be 4 bytes (by using incl %eax).
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+ if (Imm->getAPIntValue().isSignedIntN(8))
+ return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue OrigChain) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Chain = OrigChain.getOperand(0);
+ if (Chain.getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else {
+ assert(Chain.getOpcode() == ISD::TokenFactor &&
+ "Unexpected chain operand");
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i).getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(Chain.getOperand(i));
+ SDValue NewChain =
+ CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ Ops.clear();
+ Ops.push_back(NewChain);
+ }
+ for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+ Ops.push_back(OrigChain.getOperand(i));
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
+ Load.getOperand(1), Load.getOperand(2));
+
+ unsigned NumOps = Call.getNode()->getNumOperands();
+ Ops.clear();
+ Ops.push_back(SDValue(Load.getNode(), 1));
+ for (unsigned i = 1, e = NumOps; i != e; ++i)
+ Ops.push_back(Call.getOperand(i));
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+ // The transformation is somewhat dangerous if the call's chain was glued to
+ // the call. After MoveBelowOrigChain the load is moved between the call and
+ // the chain, this can create a cycle if the load is not folded. So it is
+ // *really* important that we are sure the load will be folded.
+ if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+ return false;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+ if (!LD ||
+ LD->isVolatile() ||
+ LD->getAddressingMode() != ISD::UNINDEXED ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ // Now let's find the callseq_start.
+ while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
+ if (!Chain.hasOneUse())
+ return false;
+ Chain = Chain.getOperand(0);
+ }
+
+ if (!Chain.getNumOperands())
+ return false;
+ // Since we are not checking for AA here, conservatively abort if the chain
+ // writes to memory. It's not safe to move the callee (a load) across a store.
+ if (isa<MemSDNode>(Chain.getNode()) &&
+ cast<MemSDNode>(Chain.getNode())->writeMem())
+ return false;
+ if (Chain.getOperand(0).getNode() == Callee.getNode())
+ return true;
+ if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+ Callee.getValue(1).hasOneUse())
+ return true;
+ return false;
+}
+
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ // OptForSize is used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None &&
+ // Only does this when target favors doesn't favor register indirect
+ // call.
+ ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
+ (N->getOpcode() == X86ISD::TC_RETURN &&
+ // Only does this if load can be folded into TC_RETURN.
+ (Subtarget->is64Bit() ||
+ getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
+ /// Also try moving call address load from outside callseq_start to just
+ /// before the call to allow it to be folded.
+ ///
+ /// [Load chain]
+ /// ^
+ /// |
+ /// [Load]
+ /// ^ ^
+ /// | |
+ /// / \--
+ /// / |
+ ///[CALLSEQ_START] |
+ /// ^ |
+ /// | |
+ /// [LOAD/C2Reg] |
+ /// | |
+ /// \ /
+ /// \ /
+ /// [CALL]
+ bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
+ if (!isCalleeLoad(Load, Chain, HasCallSeq))
+ continue;
+ MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+ ++NumLoadMoved;
+ continue;
+ }
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ EVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+ N->getOperand(0),
+ MemTmp, MachinePointerInfo(), MemVT,
+ false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ CurDAG->DeleteNode(N);
+ }
+}
+
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ BuildMI(BB, DebugLoc(),
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
+ // If this is main, emit special code for main.
+ if (const Function *Fn = MF->getFunction())
+ if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+ EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+}
+
+static bool isDispSafeForFrameIndex(int64_t Val) {
+ // On 64-bit platforms, we can run into an issue where a frame index
+ // includes a displacement that, when added to the explicit displacement,
+ // will overflow the displacement field. Assuming that the frame index
+ // displacement fits into a 31-bit integer (which is only slightly more
+ // aggressive than the current fundamental assumption that it fits into
+ // a 32-bit integer), a 31-bit disp should always be safe.
+ return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+ X86ISelAddressMode &AM) {
+ int64_t Val = AM.Disp + Offset;
+ CodeModel::Model M = TM.getCodeModel();
+ if (Subtarget->is64Bit()) {
+ if (!X86::isOffsetSuitableForCodeModel(Val, M,
+ AM.hasSymbolicDisplacement()))
+ return true;
+ // In addition to the checks required for a register base, check that
+ // we do not try to use an unsafe Disp with a frame index.
+ if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+ !isDispSafeForFrameIndex(Val))
+ return true;
+ }
+ AM.Disp = Val;
+ return false;
+
+}
+
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
+ // This optimization is valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address.
+ // For more information see http://people.redhat.com/drepper/tls.pdf
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetLinux())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode. These wrap things that will resolve down into a
+/// symbol reference. If no match is possible, this returns true, otherwise it
+/// returns false.
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+ CodeModel::Model M = TM.getCodeModel();
+
+ // Handle X86-64 rip-relative addresses. We check this before checking direct
+ // folding because RIP is preferable to non-RIP accesses.
+ if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+ // they cannot be folded into immediate fields.
+ // FIXME: This can be improved for kernel and other models?
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ // Base and index reg must be 0 in order to use %rip as base.
+ if (AM.hasBaseOrIndexReg())
+ return true;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.GV = G->getGlobal();
+ AM.SymbolFlags = G->getTargetFlags();
+ if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.SymbolFlags = CP->getTargetFlags();
+ if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.SymbolFlags = BA->getTargetFlags();
+ if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
+
+ if (N.getOpcode() == X86ISD::WrapperRIP)
+ AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
+ return false;
+ }
+
+ // Handle the case when globals fit in our immediate field: This is true for
+ // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
+ // mode, this only applies to a non-RIP-relative computation.
+ if (!Subtarget->is64Bit() ||
+ M == CodeModel::Small || M == CodeModel::Kernel) {
+ assert(N.getOpcode() != X86ISD::WrapperRIP &&
+ "RIP-relative addressing already handled");
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.Disp += BA->getOffset();
+ AM.SymbolFlags = BA->getTargetFlags();
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (MatchAddressRecursively(N, AM, 0))
+ return true;
+
+ // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+ // a smaller encoding and avoids a scaled-index.
+ if (AM.Scale == 2 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0) {
+ AM.Base_Reg = AM.IndexReg;
+ AM.Scale = 1;
+ }
+
+ // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+ // because it has a smaller encoding.
+ // TODO: Which other code models can use this?
+ if (TM.getCodeModel() == CodeModel::Small &&
+ Subtarget->is64Bit() &&
+ AM.Scale == 1 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG &&
+ AM.hasSymbolicDisplacement())
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+ return false;
+}
+
+// Insert a node into the DAG at least before the Pos node's position. This
+// will reposition the node as needed, and will assign it a node ID that is <=
+// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
+// IDs! The selection DAG must no longer depend on their uniqueness when this
+// is used.
+static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+ if (N.getNode()->getNodeId() == -1 ||
+ N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
+ DAG.RepositionNode(Pos.getNode(), N.getNode());
+ N.getNode()->setNodeId(Pos.getNode()->getNodeId());
+ }
+}
+
+// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
+// allows us to convert the shift and and into an h-register extract and
+// a scaled index. Returns false if the simplification is performed.
+static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SRL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ !Shift.hasOneUse())
+ return true;
+
+ int ScaleLog = 8 - Shift.getConstantOperandVal(1);
+ if (ScaleLog <= 0 || ScaleLog >= 4 ||
+ Mask != (0xffu << ScaleLog))
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue Eight = DAG.getConstant(8, MVT::i8);
+ SDValue NewMask = DAG.getConstant(0xff, VT);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
+ SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, Eight);
+ InsertDAGNode(DAG, N, Srl);
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, And);
+ InsertDAGNode(DAG, N, ShlCount);
+ InsertDAGNode(DAG, N, Shl);
+ DAG.ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+}
+
+// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
+// allows us to fold the shift into this addressing mode. Returns false if the
+// transform succeeded.
+static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SHL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ return true;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, NewAnd);
+ InsertDAGNode(DAG, N, NewShift);
+ DAG.ReplaceAllUsesWith(N, NewShift);
+
+ AM.Scale = 1 << ShiftAmt;
+ AM.IndexReg = NewAnd;
+ return false;
+}
+
+// Implement some heroics to detect shifts of masked values where the mask can
+// be replaced by extending the shift and undoing that in the addressing mode
+// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
+// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
+// the addressing mode. This results in code such as:
+//
+// int f(short *y, int *lookup_table) {
+// ...
+// return *y + lookup_table[*y >> 11];
+// }
+//
+// Turning into:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $11, %ecx
+// addl (%rsi,%rcx,4), %eax
+//
+// Instead of:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $9, %ecx
+// andl $124, %rcx
+// addl (%rsi,%rcx), %eax
+//
+// Note that this function assumes the mask is provided as a mask *after* the
+// value is shifted. The input chain may or may not match that, but computing
+// such a mask is trivial.
+static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ unsigned MaskLZ = CountLeadingZeros_64(Mask);
+ unsigned MaskTZ = CountTrailingZeros_64(Mask);
+
+ // The amount of shift we're trying to fit into the addressing mode is taken
+ // from the trailing zeros of the mask.
+ unsigned AMShiftAmt = MaskTZ;
+
+ // There is nothing we can do here unless the mask is removing some bits.
+ // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+ if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+ // We also need to ensure that mask is a continuous run of bits.
+ if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+
+ // Scale the leading zero count down based on the actual size of the value.
+ // Also scale it down based on the size of the shift.
+ MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+
+ // The final check is to ensure that any masked out high bits of X are
+ // already known to be zero. Otherwise, the mask has a semantic impact
+ // other than masking out a couple of low bits. Unfortunately, because of
+ // the mask, zero extensions will be removed from operands in some cases.
+ // This code works extra hard to look through extensions because we can
+ // replace them with zero extensions cheaply if necessary.
+ bool ReplacingAnyExtend = false;
+ if (X.getOpcode() == ISD::ANY_EXTEND) {
+ unsigned ExtendBits =
+ X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+ // Assume that we'll replace the any-extend with a zero-extend, and
+ // narrow the search to the extended value.
+ X = X.getOperand(0);
+ MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
+ ReplacingAnyExtend = true;
+ }
+ APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
+ MaskLZ);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
+ if (MaskedHighBits != KnownZero) return true;
+
+ // We've identified a pattern that can be transformed into a single shift
+ // and an addressing mode. Make it so.
+ EVT VT = N.getValueType();
+ if (ReplacingAnyExtend) {
+ assert(X.getValueType() != VT);
+ // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
+ SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X);
+ InsertDAGNode(DAG, N, NewX);
+ X = NewX;
+ }
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewSRLAmt);
+ InsertDAGNode(DAG, N, NewSRL);
+ InsertDAGNode(DAG, N, NewSHLAmt);
+ InsertDAGNode(DAG, N, NewSHL);
+ DAG.ReplaceAllUsesWith(N, NewSHL);
+
+ AM.Scale = 1 << AMShiftAmt;
+ AM.IndexReg = NewSRL;
+ return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG({
+ dbgs() << "MatchAddress: ";
+ AM.dump();
+ });
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // If this is already a %rip relative address, we can only merge immediates
+ // into it. Instead of handling this in every case, we handle it here.
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRelative()) {
+ // FIXME: JumpTable and ExternalSymbol address currently don't like
+ // displacements. It isn't very important, but this should be fixed for
+ // consistency.
+ if (!AM.ES && AM.JT != -1) return true;
+
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+ if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+ return false;
+ return true;
+ }
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!FoldOffsetIntoAddress(Val, AM))
+ return false;
+ break;
+ }
+
+ case X86ISD::Wrapper:
+ case X86ISD::WrapperRIP:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::LOAD:
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+ break;
+
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+ unsigned Val = CN->getZExtValue();
+ // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+ // that the base operand remains free for further matching. If
+ // the base doesn't end up getting used, a post-processing step
+ // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDValue ShVal = N.getNode()->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
+ AM.IndexReg = ShVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+ uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
+ if (!FoldOffsetIntoAddress(Disp, AM))
+ return false;
+ }
+
+ AM.IndexReg = ShVal;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SRL: {
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ SDValue And = N.getOperand(0);
+ if (And.getOpcode() != ISD::AND) break;
+ SDValue X = And.getOperand(0);
+
+ // We only handle up to 64-bit values here as those are what matter for
+ // addressing mode optimizations.
+ if (X.getValueSizeInBits() > 64) break;
+
+ // The mask used for the transform is expected to be post-shift, but we
+ // found the shift first so just apply the shift to the mask before passing
+ // it down.
+ if (!isa<ConstantSDNode>(N.getOperand(1)) ||
+ !isa<ConstantSDNode>(And.getOperand(1)))
+ break;
+ uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
+
+ // Try to fold the mask and shift into the scale, and return false if we
+ // succeed.
+ if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+ return false;
+ break;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.getResNo() != 0) break;
+ // FALL THROUGH
+ case ISD::MUL:
+ case X86ISD::MUL_IMM:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0) {
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+ if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+ CN->getZExtValue() == 9) {
+ AM.Scale = unsigned(CN->getZExtValue())-1;
+
+ SDValue MulVal = N.getNode()->getOperand(0);
+ SDValue Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+ Reg = MulVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+ uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+ if (FoldOffsetIntoAddress(Disp, AM))
+ Reg = N.getNode()->getOperand(0);
+ } else {
+ Reg = N.getNode()->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base_Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ // Test if the LHS of the sub can be folded.
+ X86ISelAddressMode Backup = AM;
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
+ AM = Backup;
+ break;
+ }
+
+ int Cost = 0;
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+ // If the RHS involves a register with multiple uses, this
+ // transformation incurs an extra mov, due to the neg instruction
+ // clobbering its operand.
+ if (!RHS.getNode()->hasOneUse() ||
+ RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+ RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+ RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+ (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+ ++Cost;
+ // If the base is a register with multiple uses, this
+ // transformation may save a mov.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() &&
+ !AM.Base_Reg.getNode()->hasOneUse()) ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ --Cost;
+ // If the folded LHS was interesting, this transformation saves
+ // address arithmetic.
+ if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+ ((AM.Disp != 0) && (Backup.Disp == 0)) +
+ (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+ --Cost;
+ // If it doesn't look like it may be an overall win, don't do it.
+ if (Cost >= 0) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+ AM.Scale = 1;
+
+ // Insert the new nodes into the topological ordering.
+ InsertDAGNode(*CurDAG, N, Zero);
+ InsertDAGNode(*CurDAG, N, Neg);
+ return false;
+ }
+
+ case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base_Reg.getNode() &&
+ !AM.IndexReg.getNode()) {
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ N = Handle.getValue();
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ X86ISelAddressMode Backup = AM;
+ ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
+
+ // Start with the LHS as an addr mode.
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
+ return false;
+ AM = Backup;
+ }
+ break;
+
+ case ISD::AND: {
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
+ SDValue X = Shift.getOperand(0);
+
+ // We only handle up to 64-bit values here as those are what matter for
+ // addressing mode optimizations.
+ if (X.getValueSizeInBits() > 64) break;
+
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ break;
+ uint64_t Mask = N.getConstantOperandVal(1);
+
+ // Try to fold the mask and shift into an extract and scale.
+ if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+
+ // Try to fold the mask and shift directly into the scale.
+ if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+
+ // Try to swap the mask and shift to place shifts which can be done as
+ // a scale on the outside of the mask.
+ if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+ break;
+ }
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.getNode() == 0) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base_Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+ Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+ Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+ SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment,
+ SDValue &PatternNodeWithChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getNode()->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ PatternNodeWithChain = SDValue(LD, 0);
+ return true;
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
+ return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
+
+ EVT VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base_Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.hasSymbolicDisplacement()) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity++;
+
+ // If it isn't worth using an LEA, reject it.
+ if (Complexity <= 2)
+ return false;
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+
+ X86ISelAddressMode AM;
+ AM.GV = GA->getGlobal();
+ AM.Disp += GA->getOffset();
+ AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
+ AM.SymbolFlags = GA->getTargetFlags();
+
+ if (N.getValueType() == MVT::i32) {
+ AM.Scale = 1;
+ AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+ } else {
+ AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+ }
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P, OptLevel))
+ return false;
+
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = Node->getOperand(2);
+ SDValue In2H = Node->getOperand(3);
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+/// Atomic opcode table
+///
+enum AtomicOpc {
+ ADD,
+ SUB,
+ INC,
+ DEC,
+ OR,
+ AND,
+ XOR,
+ AtomicOpcEnd
+};
+
+enum AtomicSz {
+ ConstantI8,
+ I8,
+ SextConstantI16,
+ ConstantI16,
+ I16,
+ SextConstantI32,
+ ConstantI32,
+ I32,
+ SextConstantI64,
+ ConstantI64,
+ I64,
+ AtomicSzEnd
+};
+
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_ADD8mi,
+ X86::LOCK_ADD8mr,
+ X86::LOCK_ADD16mi8,
+ X86::LOCK_ADD16mi,
+ X86::LOCK_ADD16mr,
+ X86::LOCK_ADD32mi8,
+ X86::LOCK_ADD32mi,
+ X86::LOCK_ADD32mr,
+ X86::LOCK_ADD64mi8,
+ X86::LOCK_ADD64mi32,
+ X86::LOCK_ADD64mr,
+ },
+ {
+ X86::LOCK_SUB8mi,
+ X86::LOCK_SUB8mr,
+ X86::LOCK_SUB16mi8,
+ X86::LOCK_SUB16mi,
+ X86::LOCK_SUB16mr,
+ X86::LOCK_SUB32mi8,
+ X86::LOCK_SUB32mi,
+ X86::LOCK_SUB32mr,
+ X86::LOCK_SUB64mi8,
+ X86::LOCK_SUB64mi32,
+ X86::LOCK_SUB64mr,
+ },
+ {
+ 0,
+ X86::LOCK_INC8m,
+ 0,
+ 0,
+ X86::LOCK_INC16m,
+ 0,
+ 0,
+ X86::LOCK_INC32m,
+ 0,
+ 0,
+ X86::LOCK_INC64m,
+ },
+ {
+ 0,
+ X86::LOCK_DEC8m,
+ 0,
+ 0,
+ X86::LOCK_DEC16m,
+ 0,
+ 0,
+ X86::LOCK_DEC32m,
+ 0,
+ 0,
+ X86::LOCK_DEC64m,
+ },
+ {
+ X86::LOCK_OR8mi,
+ X86::LOCK_OR8mr,
+ X86::LOCK_OR16mi8,
+ X86::LOCK_OR16mi,
+ X86::LOCK_OR16mr,
+ X86::LOCK_OR32mi8,
+ X86::LOCK_OR32mi,
+ X86::LOCK_OR32mr,
+ X86::LOCK_OR64mi8,
+ X86::LOCK_OR64mi32,
+ X86::LOCK_OR64mr,
+ },
+ {
+ X86::LOCK_AND8mi,
+ X86::LOCK_AND8mr,
+ X86::LOCK_AND16mi8,
+ X86::LOCK_AND16mi,
+ X86::LOCK_AND16mr,
+ X86::LOCK_AND32mi8,
+ X86::LOCK_AND32mi,
+ X86::LOCK_AND32mr,
+ X86::LOCK_AND64mi8,
+ X86::LOCK_AND64mi32,
+ X86::LOCK_AND64mr,
+ },
+ {
+ X86::LOCK_XOR8mi,
+ X86::LOCK_XOR8mr,
+ X86::LOCK_XOR16mi8,
+ X86::LOCK_XOR16mi,
+ X86::LOCK_XOR16mr,
+ X86::LOCK_XOR32mi8,
+ X86::LOCK_XOR32mi,
+ X86::LOCK_XOR32mr,
+ X86::LOCK_XOR64mi8,
+ X86::LOCK_XOR64mi32,
+ X86::LOCK_XOR64mr,
+ }
+};
+
+// Return the target constant operand for atomic-load-op and do simple
+// translations, such as from atomic-load-add to lock-sub. The return value is
+// one of the following 3 cases:
+// + target-constant, the operand could be supported as a target constant.
+// + empty, the operand is not needed any more with the new op selected.
+// + non-empty, otherwise.
+static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
+ DebugLoc dl,
+ enum AtomicOpc &Op, EVT NVT,
+ SDValue Val) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
+ int64_t CNVal = CN->getSExtValue();
+ // Quit if not 32-bit imm.
+ if ((int32_t)CNVal != CNVal)
+ return Val;
+ // For atomic-load-add, we could do some optimizations.
+ if (Op == ADD) {
+ // Translate to INC/DEC if ADD by 1 or -1.
+ if ((CNVal == 1) || (CNVal == -1)) {
+ Op = (CNVal == 1) ? INC : DEC;
+ // No more constant operand after being translated into INC/DEC.
+ return SDValue();
+ }
+ // Translate to SUB if ADD by negative value.
+ if (CNVal < 0) {
+ Op = SUB;
+ CNVal = -CNVal;
+ }
+ }
+ return CurDAG->getTargetConstant(CNVal, NVT);
+ }
+
+ // If the value operand is single-used, try to optimize it.
+ if (Op == ADD && Val.hasOneUse()) {
+ // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
+ if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
+ Op = SUB;
+ return Val.getOperand(1);
+ }
+ // A special case for i16, which needs truncating as, in most cases, it's
+ // promoted to i32. We will translate
+ // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
+ if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
+ Val.getOperand(0).getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
+ Op = SUB;
+ Val = Val.getOperand(0);
+ return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
+ Val.getOperand(1));
+ }
+ }
+
+ return Val;
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Optimize common patterns for __sync_or_and_fetch and similar arith
+ // operations where the result is not used. This allows us to use the "lock"
+ // version of the arithmetic instruction.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ // Which index into the table.
+ enum AtomicOpc Op;
+ switch (Node->getOpcode()) {
+ default:
+ return 0;
+ case ISD::ATOMIC_LOAD_OR:
+ Op = OR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Op = AND;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Op = XOR;
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ Op = ADD;
+ break;
+ }
+
+ Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+ bool isUnOp = !Val.getNode();
+ bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isCN)
+ Opc = AtomicOpcTbl[Op][ConstantI8];
+ else
+ Opc = AtomicOpcTbl[Op][I8];
+ break;
+ case MVT::i16:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI16];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI16];
+ } else
+ Opc = AtomicOpcTbl[Op][I16];
+ break;
+ case MVT::i32:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI32];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI32];
+ } else
+ Opc = AtomicOpcTbl[Op][I32];
+ break;
+ case MVT::i64:
+ Opc = AtomicOpcTbl[Op][I64];
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI64];
+ else if (i64immSExt32(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][ConstantI64];
+ }
+ break;
+ }
+
+ assert(Opc != 0 && "Invalid arith lock transform!");
+
+ SDValue Ret;
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ if (isUnOp) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ }
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ // Only examine CopyToReg uses.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+ X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1) continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the opcode of the user.
+ switch (FlagUI->getMachineOpcode()) {
+ // These comparisons don't treat the most significant bit specially.
+ case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+ case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+ case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+ case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+ case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+ case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+ case X86::CMOVA16rr: case X86::CMOVA16rm:
+ case X86::CMOVA32rr: case X86::CMOVA32rm:
+ case X86::CMOVA64rr: case X86::CMOVA64rm:
+ case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+ case X86::CMOVB16rr: case X86::CMOVB16rm:
+ case X86::CMOVB32rr: case X86::CMOVB32rm:
+ case X86::CMOVB64rr: case X86::CMOVB64rm:
+ case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+ case X86::CMOVE16rr: case X86::CMOVE16rm:
+ case X86::CMOVE32rr: case X86::CMOVE32rm:
+ case X86::CMOVE64rr: case X86::CMOVE64rm:
+ case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+ case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+ case X86::CMOVP16rr: case X86::CMOVP16rm:
+ case X86::CMOVP32rr: case X86::CMOVP32rm:
+ case X86::CMOVP64rr: case X86::CMOVP64rm:
+ continue;
+ // Anything else: assume conservatively.
+ default: return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+ SDValue StoredVal, SelectionDAG *CurDAG,
+ LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+ // is the value stored the result of a DEC or INC?
+ if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+ // is the stored value result 0 of the load?
+ if (StoredVal.getResNo() != 0) return false;
+
+ // are there other uses of the loaded value than the inc or dec?
+ if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+ // is the store non-extending and non-indexed?
+ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+ return false;
+
+ SDValue Load = StoredVal->getOperand(0);
+ // Is the stored value a non-extending and non-indexed load?
+ if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+ // Return LoadNode by reference.
+ LoadNode = cast<LoadSDNode>(Load);
+ // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ LdVT != MVT::i8)
+ return false;
+
+ // Is store the only read of the loaded value?
+ if (!Load.hasOneUse())
+ return false;
+
+ // Is the address of the store the same as the load?
+ if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+ LoadNode->getOffset() != StoreNode->getOffset())
+ return false;
+
+ // Check if the chain is produced by the load or is a TokenFactor with
+ // the load output chain as an operand. Return InputChain by reference.
+ SDValue Chain = StoreNode->getChain();
+
+ bool ChainCheck = false;
+ if (Chain == Load.getValue(1)) {
+ ChainCheck = true;
+ InputChain = LoadNode->getChain();
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ SmallVector<SDValue, 4> ChainOps;
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+ SDValue Op = Chain.getOperand(i);
+ if (Op == Load.getValue(1)) {
+ ChainCheck = true;
+ continue;
+ }
+
+ // Make sure using Op as part of the chain would not cause a cycle here.
+ // In theory, we could check whether the chain node is a predecessor of
+ // the load. But that can be very expensive. Instead visit the uses and
+ // make sure they all have smaller node id than the load.
+ int LoadId = LoadNode->getNodeId();
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = UI->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != 0)
+ continue;
+ if (UI->getNodeId() > LoadId)
+ return false;
+ }
+
+ ChainOps.push_back(Op);
+ }
+
+ if (ChainCheck)
+ // Make a new TokenFactor with all the other input chains except
+ // for the load.
+ InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+ MVT::Other, &ChainOps[0], ChainOps.size());
+ }
+ if (!ChainCheck)
+ return false;
+
+ return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+ if (Opc == X86ISD::DEC) {
+ if (LdVT == MVT::i64) return X86::DEC64m;
+ if (LdVT == MVT::i32) return X86::DEC32m;
+ if (LdVT == MVT::i16) return X86::DEC16m;
+ if (LdVT == MVT::i8) return X86::DEC8m;
+ } else {
+ assert(Opc == X86ISD::INC && "unrecognized opcode");
+ if (LdVT == MVT::i64) return X86::INC64m;
+ if (LdVT == MVT::i32) return X86::INC32m;
+ if (LdVT == MVT::i16) return X86::INC16m;
+ if (LdVT == MVT::i8) return X86::INC8m;
+ }
+ llvm_unreachable("unrecognized size for LdVT");
+}
+
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
+ MVT::Other);
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VTs, Ops, array_lengthof(Ops));
+ // Node has 2 outputs: VDst and MVT::Other.
+ // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
+ // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
+ // of ResNode.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
+ return ResNode;
+}
+
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
+ EVT NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ // We already called ReplaceUses inside SelectGather.
+ return NULL;
+ break;
+ }
+ }
+ break;
+ }
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+
+ case X86ISD::ATOMOR64_DAG:
+ case X86ISD::ATOMXOR64_DAG:
+ case X86ISD::ATOMADD64_DAG:
+ case X86ISD::ATOMSUB64_DAG:
+ case X86ISD::ATOMNAND64_DAG:
+ case X86ISD::ATOMAND64_DAG:
+ case X86ISD::ATOMMAX64_DAG:
+ case X86ISD::ATOMMIN64_DAG:
+ case X86ISD::ATOMUMAX64_DAG:
+ case X86ISD::ATOMUMIN64_DAG:
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
+ case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
+ case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
+ case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_ADD: {
+ SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+ if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
+ break;
+
+ unsigned ShlOp, Op;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ }
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+ }
+
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
+ N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
+
+ if (foldedLoad) {
+ SDValue Chain;
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), Chain);
+ } else {
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the low half of the result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+
+ return NULL;
+ }
+
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+ }
+
+ unsigned LoReg, HiReg, ClrReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; ClrReg = HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+ ClrNode, InFlag).getValue(1);
+ }
+ }
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the division (low) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ return NULL;
+ }
+
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
+ // Look past the truncate if CMP is the only use of it.
+ N0 = N0.getOperand(0);
+ if ((N0.getNode()->getOpcode() == ISD::AND ||
+ (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
+ N0.getNode()->hasOneUse() &&
+ N0.getValueType() != MVT::i8 &&
+ X86::isZeroNode(N1)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+ if (!C) break;
+
+ // For example, convert "testl %eax, $8" to "testb %al, $8"
+ if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+ (!(C->getZExtValue() & 0x80) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // On x86-32, only the ABCD registers have 8-bit subregisters.
+ if (!Subtarget->is64Bit()) {
+ const TargetRegisterClass *TRC;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+ }
+
+ // Extract the l-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
+ }
+
+ // For example, "testl %eax, $2048" to "testb %ah, $8".
+ if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ // Shift the immediate right by 8 bits.
+ SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+ MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Put the value in an ABCD register.
+ const TargetRegisterClass *TRC;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+
+ // Extract the h-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
+ // target GR8_NOREX registers, so make sure the register class is
+ // forced.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
+ }
+
+ // For example, "testl %eax, $32776" to "testw %ax, $32776".
+ if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+ N0.getValueType() != MVT::i16 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 16-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
+ MVT::i16, Reg);
+
+ // Emit a testw.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
+ }
+
+ // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+ if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+ N0.getValueType() == MVT::i64 &&
+ (!(C->getZExtValue() & 0x80000000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 32-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
+ MVT::i32, Reg);
+
+ // Emit a testl.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
+ }
+ }
+ break;
+ }
+ case ISD::STORE: {
+ // Change a chain of {load; incr or dec; store} of the same value into
+ // a simple increment or decrement through memory of that value, if the
+ // uses of the modified value and its address are suitable.
+ // The DEC64m tablegen pattern is currently not able to match the case where
+ // the EFLAGS on the original DEC are used. (This also applies to
+ // {INC,DEC}X{64,32,16,8}.)
+ // We'll need to improve tablegen to allow flags to be transferred from a
+ // node in the pattern to the result node. probably with a new keyword
+ // for example, we have this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (implicit EFLAGS)]>;
+ // but maybe need something like this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (transferrable EFLAGS)]>;
+
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
+ SDValue StoredVal = StoreNode->getOperand(1);
+ unsigned Opc = StoredVal->getOpcode();
+
+ LoadSDNode *LoadNode = 0;
+ SDValue InputChain;
+ if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+ LoadNode, InputChain))
+ break;
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+ Base, Scale, Index, Disp, Segment))
+ break;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
+ MemOp[0] = StoreNode->getMemOperand();
+ MemOp[1] = LoadNode->getMemOperand();
+ const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
+ EVT LdVT = LoadNode->getMemoryVT();
+ unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+ MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
+ Node->getDebugLoc(),
+ MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ Result->setMemRefs(MemOp, MemOp + 2);
+
+ ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
+ ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
+
+ return Result;
+ }
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1, Op2, Op3, Op4;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ OutOps.push_back(Op4);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 000000000000..69341869aa3e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,18512 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86ISelLowering.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include <bitset>
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2);
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.is256BitVector() && "Unexpected vector size!");
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits()/128;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(ResultVT);
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
+ DebugLoc dl) {
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+
+ EVT VT = Vec.getValueType();
+ assert(VT.is128BitVector() && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X86_64MachoTargetObjectFile();
+ return new TargetLoweringObjectFileMachO();
+ }
+
+ if (Subtarget->isTargetLinux())
+ return new X86LinuxTargetObjectFile();
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ return new TargetLoweringObjectFileCOFF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+
+ RegInfo = TM.getRegisterInfo();
+ TD = getDataLayout();
+
+ // Set up the TargetLowering object.
+ static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // For 64-bit since we have so many registers use the ILP scheduler, for
+ // 32-bit code use the register pressure specific scheduling.
+ // For Atom, always use ILP scheduling.
+ if (Subtarget->isAtom())
+ setSchedulingPreference(Sched::ILP);
+ else if (Subtarget->is64Bit())
+ setSchedulingPreference(Sched::ILP);
+ else
+ setSchedulingPreference(Sched::RegPressure);
+ setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
+
+ // Bypass expensive divides on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
+ addBypassSlowDiv(32, 8);
+ if (Subtarget->is64Bit())
+ addBypassSlowDiv(64, 16);
+ }
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::SREM_I64, "_allrem");
+ setLibcallName(RTLIB::UREM_I64, "_aullrem");
+ setLibcallName(RTLIB::MUL_I64, "_allmul");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+
+ // The _ftol2 runtime function has an unusual calling conv, which
+ // is modeled by a special pseudo-instruction.
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, &X86::GR8RegClass);
+ addRegisterClass(MVT::i16, &X86::GR16RegClass);
+ addRegisterClass(MVT::i32, &X86::GR32RegClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, &X86::GR64RegClass);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ } else if (!TM.Options.UseSoftFloat) {
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ // We have an algorithm for SSE2, and we turn this into a 64-bit
+ // FILD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+
+ if (!TM.Options.UseSoftFloat) {
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else if (!TM.Options.UseSoftFloat) {
+ // Since AVX is a superset of SSE3, only check for SSE here.
+ if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64; without
+ // SSE, we're stuck with a fistpll.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ }
+
+ if (isTargetFTOL()) {
+ // Use the _ftol2 runtime function, which has a pseudo-instruction
+ // to handle its weird calling convention.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ }
+ }
+
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
+ for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f80, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ // Promote the i8 variants and force them on up to i32 which has a shorter
+ // encoding.
+ setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
+ if (Subtarget->hasBMI()) {
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasLZCNT()) {
+ // When promoting the i8 variants, force them to i32 for a shorter
+ // encoding.
+ setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ } else {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+ }
+ }
+
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i8 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasSSE1())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
+
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
+
+ // Expand certain atomics
+ for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+ }
+
+ if (Subtarget->hasCmpxchg16b()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ }
+
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else if (TM.Options.EnableSegmentedStacks)
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Expand);
+
+ if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, &X86::FR32RegClass);
+ addRegisterClass(MVT::f64, &X86::FR64RegClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // Lower this to FGETSIGNx86 plus an AND.
+ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+ setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, &X86::FR32RegClass);
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // Special cases we handle for FP constants.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ if (!TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ }
+ } else if (!TM.Options.UseSoftFloat) {
+ // f32 and f64 in x87.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+ addRegisterClass(MVT::f32, &X86::RFP32RegClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (!TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ }
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // We don't support FMA.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ // Long double always uses X87.
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f80, &X86::RFP80RegClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ {
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
+ if (!TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
+ }
+
+ setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
+ setOperationAction(ISD::FMA, MVT::f80, Expand);
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ setOperationAction(ISD::ADD , VT, Expand);
+ setOperationAction(ISD::SUB , VT, Expand);
+ setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSUB, VT, Expand);
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::LOAD, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::SHL, VT, Expand);
+ setOperationAction(ISD::SRA, VT, Expand);
+ setOperationAction(ISD::SRL, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction(VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ }
+
+ // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+ // with -msoft-float, disable use of MMX as well.
+ if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
+ addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ }
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
+ addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
+
+ // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // registers cannot be used even for integer operations.
+ addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
+ addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
+ addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
+ addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+ // Do not attempt to custom lower non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // Do not attempt to promote non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ // As there is no 64-bit GPR available, we need build a special custom
+ // sequence to convert from v2i32 to v2f32.
+ if (!Subtarget->is64Bit())
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
+ }
+
+ if (Subtarget->hasSSE41()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // FIXME: these should be Legal but thats only for the case where
+ // the index is constant. For now custom expand to deal with that.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+ }
+
+ if (Subtarget->hasSSE2()) {
+ setOperationAction(ISD::SRL, MVT::v8i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i8, Custom);
+
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
+ }
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
+ addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
+ addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
+ addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
+ addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
+ addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
+ addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
+
+ setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
+ setOperationAction(ISD::FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::SRL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRA, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
+ setOperationAction(ISD::FMA, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
+ }
+
+ if (Subtarget->hasInt256()) {
+ setOperationAction(ISD::ADD, MVT::v4i64, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v16i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v16i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Legal);
+ setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+
+ setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
+ } else {
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i16, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i16, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, Custom);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+ }
+
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+
+ // Custom lower several nodes for 256-bit types.
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // Extract subvector is special because the value type
+ // (result) is 128-bit but the source is 256-bit wide.
+ if (VT.is128BitVector())
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+
+ // Do not attempt to custom lower other non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ }
+
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+ }
+ }
+
+ // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+ // of this type with custom code.
+ for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
+ VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
+ Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
+ // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+ // handle type legalization for these operations here.
+ //
+ // FIXME: We really should do custom legalization for addition and
+ // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
+ // than generic legalization for 64-bit multiplication-with-overflow, though.
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
+ }
+
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
+ if (!Subtarget->is64Bit()) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+ }
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMA);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::SETCC);
+ if (Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::XOR);
+
+ computeRegisterProperties();
+
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
+ MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ setPrefLoopAlignment(4); // 2^4 bytes.
+
+ // Predictable cmov don't hurt on atom because it's in-order.
+ PredictableSelectIsExpensive = !Subtarget->isAtom();
+
+ setPrefFunctionAlignment(4); // 2^4 bytes.
+}
+
+EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i8;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+ if (Subtarget->is64Bit()) {
+ // Max of 8 and alignment of type.
+ unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ if (TyAlign > 8)
+ return TyAlign;
+ return 8;
+ }
+
+ unsigned Align = 4;
+ if (Subtarget->hasSSE1())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ if ((!IsMemset || ZeroMemset) &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Size >= 32) {
+ if (Subtarget->hasInt256())
+ return MVT::v8i32;
+ if (Subtarget->hasFp256())
+ return MVT::v8f32;
+ }
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (!MemcpyStrSrc && Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->hasSSE2()) {
+ // Do not use f64 to lower memcpy if source is string constant. It's
+ // better to use i32 to avoid the loads.
+ return MVT::f64;
+ }
+ }
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
+ if (VT == MVT::f32)
+ return X86ScalarSSEf32;
+ else if (VT == MVT::f64)
+ return X86ScalarSSEf64;
+ return true;
+}
+
+bool
+X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ if (Fast)
+ *Fast = Subtarget->isUnalignedMemAccessFast();
+ return true;
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+ // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+ // symbol.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ return MachineJumpTableInfo::EK_Custom32;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid,MCContext &Ctx) const{
+ assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT());
+ // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+ // entries.
+ return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (!Subtarget->is64Bit())
+ // This doesn't have DebugLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+ MCContext &Ctx) const {
+ // X86-64 uses RIP relative addressing based on the jump table label.
+ if (Subtarget->isPICStyleRIPRel())
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+ // Otherwise, the reference is relative to the PIC base.
+ return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+}
+
+// FIXME: Why this routine is here? Move to RegInfo!
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(MVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = Subtarget->is64Bit() ?
+ (const TargetRegisterClass*)&X86::GR64RegClass :
+ (const TargetRegisterClass*)&X86::GR32RegClass;
+ break;
+ case MVT::x86mmx:
+ RRC = &X86::VR64RegClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = &X86::VR128RegClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+ MVT::i16));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // Promote values to the appropriate types
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::AExt)
+ ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget->is64Bit()) {
+ if (ValVT == MVT::x86mmx) {
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasSSE2())
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+ }
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ unsigned RetValReg
+ = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+ X86::RAX : X86::EAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
+ Flag = Chain.getValue(1);
+
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
+}
+
+MVT
+X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT;
+ // TODO: Is this also valid on 32-bit?
+ if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ ReturnMVT = MVT::i8;
+ else
+ ReturnMVT = MVT::i32;
+
+ MVT MinVT = getRegisterType(ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool Is64Bit = Subtarget->is64Bit();
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT CopyVT = VA.getValVT();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+
+ SDValue Val;
+
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, we must guarantee the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpPOP_RETVAL instruction
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+ MVT::Other, MVT::Glue, Ops, 2), 1);
+ Val = Chain.getValue(0);
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a call uses struct return
+/// semantics.
+enum StructReturnType {
+ NotStructReturn,
+ RegStructReturn,
+ StackStructReturn
+};
+static StructReturnType
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ if (Outs.empty())
+ return NotStructReturn;
+
+ const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
+}
+
+/// ArgsAreStructReturn - Determines whether a function uses struct
+/// return semantics.
+static StructReturnType
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ if (Ins.empty())
+ return NotStructReturn;
+
+ const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE);
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+ bool GuaranteedTailCallOpt) {
+ return GuaranteedTailCallOpt && IsTailCallConvention(CC);
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+ getTargetMachine().Options.GuaranteedTailCallOpt);
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+ EVT ValVT;
+
+ // If value is passed by pointer we have address passed instead of the value
+ // itself.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ValVT = VA.getLocVT();
+ else
+ ValVT = VA.getValVT();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ if (Flags.isByVal()) {
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+ return DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getLoad(ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ }
+}
+
+SDValue
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWindows = Subtarget->isTargetWindows();
+ bool IsWin64 = Subtarget->isTargetWin64();
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc, ghc or hipe");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+
+ unsigned LastVal = ~0U;
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ (void)LastVal;
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC;
+ if (RegVT == MVT::i32)
+ RC = &X86::GR32RegClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = &X86::GR64RegClass;
+ else if (RegVT == MVT::f32)
+ RC = &X86::FR32RegClass;
+ else if (RegVT == MVT::f64)
+ RC = &X86::FR64RegClass;
+ else if (RegVT.is256BitVector())
+ RC = &X86::VR256RegClass;
+ else if (RegVT.is128BitVector())
+ RC = &X86::VR128RegClass;
+ else if (RegVT == MVT::x86mmx)
+ RC = &X86::VR64RegClass;
+ else
+ llvm_unreachable("Unknown argument type!");
+
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+
+ if (VA.isExtInLoc()) {
+ // Handle MMX values passed in XMM regs.
+ if (RegVT.isVector())
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
+ else
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+ } else {
+ assert(VA.isMemLoc());
+ ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+ }
+
+ // If value is passed via pointer - do a load.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, false, 0);
+
+ InVals.push_back(ArgValue);
+ }
+
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
+ if (MF.getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+ // Align stack specially for tail calls.
+ if (FuncIsMadeTailCallSafe(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+ }
+ if (Is64Bit) {
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const uint16_t GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const uint16_t GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const uint16_t XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ const uint16_t *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
+
+ if (IsWin64) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
+ TotalNumXMMRegs);
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+
+ bool NoImplicitFloatOps = Fn->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+ assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+ NoImplicitFloatOps) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so don't push them
+ // on the stack.
+ TotalNumXMMRegs = 0;
+
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so
+ // they may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ false));
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
+ unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+ &X86::GR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+ // Now store the XMM (fp + vector) parameter registers.
+ SmallVector<SDValue, 11> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+
+ unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
+ SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+ SaveXMMOps.push_back(ALVal);
+
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getRegSaveFrameIndex()));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getVarArgsFPOffset()));
+
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+ &X86::VR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+ SaveXMMOps.push_back(Val);
+ }
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+ MVT::Other,
+ &SaveXMMOps[0], SaveXMMOps.size()));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ // Some CCs need callee pop.
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else {
+ FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ argsAreStructReturn(Ins) == StackStructReturn)
+ FuncInfo->setBytesToPopOnReturn(4);
+ }
+
+ if (!Is64Bit) {
+ // RegSaveFrameIndex is X86-64 only.
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
+ // fastcc functions can't have varargs.
+ FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+ }
+
+ FuncInfo->setArgumentStackSize(StackSize);
+
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal())
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDValue &OutRetAddr, SDValue Chain,
+ bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const {
+ // Adjust the Return address stack slot.
+ EVT VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
+ unsigned SlotSize, int FPDiff, DebugLoc dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+ false, false, 0);
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool &isTailCall = CLI.IsTailCall;
+ bool isVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+ bool IsWindows = Subtarget->isTargetWindows();
+ StructReturnType SR = callIsStructReturn(Outs);
+ bool IsSibcall = false;
+
+ if (MF.getTarget().Options.DisableTailCalls)
+ isTailCall = false;
+
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, SR != NotStructReturn,
+ MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
+ Outs, OutVals, Ins, DAG);
+
+ // Sibcalls are automatically detected tailcalls which do not require
+ // ABI changes.
+ if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
+ IsSibcall = true;
+
+ if (isTailCall)
+ ++NumTailCalls;
+ }
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc, ghc or hipe");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ if (IsSibcall)
+ // This is a sibcall. The memory operands are available in caller's
+ // own caller's stack.
+ NumBytes = 0;
+ else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ IsTailCallConvention(CallConv))
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (isTailCall && !IsSibcall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
+
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < X86Info->getTCReturnAddrDelta())
+ X86Info->setTCReturnAddrDelta(FPDiff);
+ }
+
+ if (!IsSibcall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue RetAddrFrIdx;
+ // Load return address for tail calls.
+ if (isTailCall && FPDiff)
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+ Is64Bit, FPDiff, dl);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ if (RegVT.is128BitVector()) {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ } else
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+ break;
+ case CCValAssign::Indirect: {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ Arg = SpillSlot;
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && IsWin64) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
+ } else if (!IsSibcall && (!isTailCall || isByVal)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy());
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ if (Subtarget->isPICStyleGOT()) {
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!isTailCall) {
+ RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy())));
+ } else {
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
+ }
+ }
+
+ if (Is64Bit && isVarArg && !IsWin64) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const uint16_t XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+
+ RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
+ DAG.getConstant(NumXMMRegs, MVT::i8)));
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc())
+ continue;
+ assert(VA.isMemLoc());
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl,
+ RegInfo->getStackRegister(),
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
+ getPointerTy(), RegInfo->getSlotSize(),
+ FPDiff, dl);
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ const GlobalValue *GV = G->getGlobal();
+ if (!GV->hasDLLImportLinkage()) {
+ unsigned char OpFlags = 0;
+ bool ExtraLoad = false;
+ unsigned WrapperKind = ISD::DELETED_NODE;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ isa<Function>(GV) &&
+ cast<Function>(GV)->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NonLazyBind)) {
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ OpFlags = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ ExtraLoad = true;
+ }
+
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ G->getOffset(), OpFlags);
+
+ // Add a wrapper if needed.
+ if (WrapperKind != ISD::DELETED_NODE)
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ // Add extra indirection if needed.
+ if (ExtraLoad)
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ OpFlags);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+
+ if (!IsSibcall && isTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (isTailCall) {
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ SR == StackStructReturn)
+ // If this is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ // For MSVC Win32 targets, the caller pops the hidden struct pointer.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
+ // Returns a flag for retval copy to use.
+ if (!IsSibcall) {
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ unsigned SlotSize = RegInfo->getSlotSize();
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ return Offset;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const X86InstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ unsigned Opcode = Def->getOpcode();
+ if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+ Def->getOperand(1).isFI()) {
+ FI = Def->getOperand(1).getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+ FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+ FI = FINode->getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ Type *RetTy,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG &DAG) const {
+ if (!IsTailCallConvention(CalleeCC) &&
+ CalleeCC != CallingConv::C)
+ return false;
+
+ // If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that.
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
+ return false;
+
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ if (isVarArg && !Outs.empty()) {
+
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87
+ // stack. Therefore, if it's not used by the call it is not safe to optimize
+ // this into a sibcall.
+ bool Unused = false;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (!Ins[i].Used) {
+ Unused = true;
+ break;
+ }
+ }
+ if (Unused) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+ }
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+ return false;
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const X86InstrInfo *TII =
+ ((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
+ unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == MaxInRegs)
+ return false;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return X86::createFastISel(funcInfo, libInfo);
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFP:
+ case X86ISD::PALIGNR:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
+ case X86ISD::VPERMI:
+ return true;
+ }
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask,
+ SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERMI:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask,
+ SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGNR:
+ case X86ISD::SHUFP:
+ case X86ISD::VPERM2X128:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+}
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ unsigned SlotSize = RegInfo->getSlotSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ false);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement) {
+ // Offset should fit into 32 bit immediate field.
+ if (!isInt<32>(Offset))
+ return false;
+
+ // If we don't have a symbolic displacement - we don't have any extra
+ // restrictions.
+ if (!hasSymbolicDisplacement)
+ return true;
+
+ // FIXME: Some tweaks might be needed for medium code model.
+ if (M != CodeModel::Small && M != CodeModel::Kernel)
+ return false;
+
+ // For small code model we assume that latest object is 16MB before end of 31
+ // bits boundary. We may also accept pretty large negative constants knowing
+ // that all objects are in the positive half of address space.
+ if (M == CodeModel::Small && Offset < 16*1024*1024)
+ return true;
+
+ // For kernel code model we know that all object resist in the negative half
+ // of 32bits address space. We may not accept negative offsets, since they may
+ // be just off and we may accept pretty large positive ones.
+ if (M == CodeModel::Kernel && Offset > 0)
+ return true;
+
+ return false;
+}
+
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !is64Bit;
+ case CallingConv::X86_FastCall:
+ return !is64Bit;
+ case CallingConv::X86_ThisCall:
+ return !is64Bit;
+ case CallingConv::Fast:
+ return TailCallOpt;
+ case CallingConv::GHC:
+ return TailCallOpt;
+ case CallingConv::HiPE:
+ return TailCallOpt;
+ }
+}
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_NS;
+ }
+ if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ return X86::COND_S;
+ }
+ if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_LE;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
+
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Condcode should be pre-legalized away");
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: return X86::COND_A;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: return X86::COND_AE;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: return X86::COND_B;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: return X86::COND_BE;
+ case ISD::SETONE:
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETUO: return X86::COND_P;
+ case ISD::SETO: return X86::COND_NP;
+ case ISD::SETOEQ:
+ case ISD::SETUNE: return X86::COND_INVALID;
+ }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+ if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+ return true;
+ }
+ return false;
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+ return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+ return (Val < 0 || Val == CmpVal);
+}
+
+/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// sequential range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (!isUndefOrEqual(Mask[i], Low))
+ return false;
+ return true;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
+ return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return (Mask[0] < 2 && Mask[1] < 2);
+ return false;
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
+ return false;
+
+ // Lower quadword copied in order or undef.
+ if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
+ return false;
+
+ // Upper quadword shuffled.
+ for (unsigned i = 4; i != 8; ++i)
+ if (!isUndefOrInRange(Mask[i], 4, 8))
+ return false;
+
+ if (VT == MVT::v16i16) {
+ // Lower quadword copied in order or undef.
+ if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
+ return false;
+
+ // Upper quadword shuffled.
+ for (unsigned i = 12; i != 16; ++i)
+ if (!isUndefOrInRange(Mask[i], 12, 16))
+ return false;
+ }
+
+ return true;
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
+ return false;
+
+ // Upper quadword copied in order.
+ if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 0; i != 4; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, 4))
+ return false;
+
+ if (VT == MVT::v16i16) {
+ // Upper quadword copied in order.
+ if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 8; i != 12; ++i)
+ if (!isUndefOrInRange(Mask[i], 8, 12))
+ return false;
+ }
+
+ return true;
+}
+
+/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PALIGNR.
+static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()))
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ // Do not handle 64-bit element shuffles with palignr.
+ if (NumLaneElts == 2)
+ return false;
+
+ for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
+ unsigned i;
+ for (i = 0; i != NumLaneElts; ++i) {
+ if (Mask[i+l] >= 0)
+ break;
+ }
+
+ // Lane is all undef, go to next lane
+ if (i == NumLaneElts)
+ continue;
+
+ int Start = Mask[i+l];
+
+ // Make sure its in this lane in one of the sources
+ if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
+ !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
+ return false;
+
+ // If not lane 0, then we must match lane 0
+ if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
+ return false;
+
+ // Correct second source to be contiguous with first source
+ if (Start >= (int)NumElts)
+ Start -= NumElts - NumLaneElts;
+
+ // Make sure we're shifting in the right direction.
+ if (Start <= (int)(i+l))
+ return false;
+
+ Start -= i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != NumLaneElts; ++i) {
+ int Idx = Mask[i+l];
+
+ // Make sure its in this lane
+ if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
+ !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
+ return false;
+
+ // If not lane 0, then we must match lane 0
+ if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
+ return false;
+
+ if (Idx >= (int)NumElts)
+ Idx -= NumElts - NumLaneElts;
+
+ if (!isUndefOrEqual(Idx, Start+i))
+ return false;
+
+ }
+ }
+
+ return true;
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+ unsigned NumElems) {
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128/256-bit
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
+ bool Commuted = false) {
+ if (!HasFp256 && VT.is256BitVector())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElems = NumElems/NumLanes;
+
+ if (NumLaneElems != 2 && NumLaneElems != 4)
+ return false;
+
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
+ // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
+ //
+ // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
+ // Y3..Y0, Y3..Y0, X3..X0, X3..X0
+ //
+ // VSHUFPDY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X3 X2 X1 X0
+ // SRC2 => Y3 Y2 Y1 Y0
+ //
+ // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
+ //
+ unsigned HalfLaneElems = NumLaneElems/2;
+ for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ int Idx = Mask[i+l];
+ unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
+ if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
+ return false;
+ // For VSHUFPSY, the mask of the second half must be the same as the
+ // first but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
+ if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Idx, Mask[i]+l))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(Mask[0], 6) &&
+ isUndefOrEqual(Mask[1], 7) &&
+ isUndefOrEqual(Mask[2], 2) &&
+ isUndefOrEqual(Mask[3], 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(Mask[0], 2) &&
+ isUndefOrEqual(Mask[1], 3) &&
+ isUndefOrEqual(Mask[2], 2) &&
+ isUndefOrEqual(Mask[3], 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i + e], i + NumElems))
+ return false;
+
+ return true;
+}
+
+//
+// Some special combinations that can be optimized.
+//
+static
+SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ DebugLoc dl = SVOp->getDebugLoc();
+
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return SDValue();
+
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // These are the special masks that may be optimized.
+ static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ bool MatchEvenMask = true;
+ bool MatchOddMask = true;
+ for (int i=0; i<8; ++i) {
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
+ MatchEvenMask = false;
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
+ MatchOddMask = false;
+ }
+
+ if (!MatchEvenMask && !MatchOddMask)
+ return SDValue();
+
+ SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
+
+ SDValue Op0 = SVOp->getOperand(0);
+ SDValue Op1 = SVOp->getOperand(1);
+
+ if (MatchEvenMask) {
+ // Shift the second operand right to 32 bits.
+ static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
+ Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
+ } else {
+ // Shift the first operand left to 32 bits.
+ static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
+ Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
+ }
+ static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
+ bool HasInt256, bool V2IsSplat = false) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+ i != (l+1)*NumLaneElts;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
+ bool HasInt256, bool V2IsSplat = false) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != (l+1)*NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ unsigned NumElts = VT.getVectorNumElements();
+ bool Is256BitVec = VT.is256BitVector();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ return false;
+
+ // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
+ // FIXME: Need a better way to get rid of this, there's no latency difference
+ // between UNPCKLPD and MOVDDUP, the later should always be checked first and
+ // the former later. We should also remove the "_undef" special mask.
+ if (NumElts == 4 && Is256BitVec)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+ i != (l+1)*NumLaneElts;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != (l+1)*NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
+ if (VT.getVectorElementType().getSizeInBits() < 32)
+ return false;
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if (!isUndefOrEqual(Mask[0], NumElts))
+ return false;
+
+ for (unsigned i = 1; i != NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
+ return false;
+
+ // The shuffle result is divided into half A and half B. In total the two
+ // sources have 4 halves, namely: C, D, E, F. The final values of A and
+ // B must come from C, D, E or F.
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+ bool MatchA = false, MatchB = false;
+
+ // Check if A comes from one of C, D, E, F.
+ for (unsigned Half = 0; Half != 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ // Check if B comes from one of C, D, E, F.
+ for (unsigned Half = 0; Half != 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+
+ unsigned FstHalf = 0, SndHalf = 0;
+ for (unsigned i = 0; i < HalfSize; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ FstHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+ for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ SndHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+
+ return (FstHalf | (SndHalf << 4));
+}
+
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ // Only match 256-bit with 32/64-bit types
+ if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
+ return false;
+
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned LaneSize = NumElts/NumLanes;
+ for (unsigned l = 0; l != NumElts; l += LaneSize) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+ return false;
+ if (NumElts != 8 || l == 0)
+ continue;
+ // VPERMILPS handling
+ if (Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
+ bool V2IsSplat = false, bool V2IsUndef = false) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned NumOps = VT.getVectorNumElements();
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], 0))
+ return false;
+
+ for (unsigned i = 1; i != NumOps; ++i)
+ if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+ return false;
+
+ return true;
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
+static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
+ return false;
+
+ // "i+1" is the value the indexed mask element must have
+ for (unsigned i = 0; i != NumElems; i += 2)
+ if (!isUndefOrEqual(Mask[i], i+1) ||
+ !isUndefOrEqual(Mask[i+1], i+1))
+ return false;
+
+ return true;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
+static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3())
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
+ return false;
+
+ // "i" is the value the indexed mask element must have
+ for (unsigned i = 0; i != NumElems; i += 2)
+ if (!isUndefOrEqual(Mask[i], i) ||
+ !isUndefOrEqual(Mask[i+1], i))
+ return false;
+
+ return true;
+}
+
+/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// version of MOVDDUP.
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts != 4)
+ return false;
+
+ for (unsigned i = 0; i != NumElts/2; ++i)
+ if (!isUndefOrEqual(Mask[i], 0))
+ return false;
+ for (unsigned i = NumElts/2; i != NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], NumElts/2))
+ return false;
+ return true;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// version of MOVDDUP.
+static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ unsigned e = VT.getVectorNumElements() / 2;
+ for (unsigned i = 0; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+ for (unsigned i = 0; i != e; ++i)
+ if (!isUndefOrEqual(Mask[e+i], i))
+ return false;
+ return true;
+}
+
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
+/// Handles 128-bit and 256-bit.
+static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0).getSimpleVT();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for PSHUF/SHUFP");
+
+ // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
+ // independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ assert((NumLaneElts == 2 || NumLaneElts == 4) &&
+ "Only supports 2 or 4 elements per lane");
+
+ unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
+ unsigned Mask = 0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt < 0) continue;
+ Elt &= NumLaneElts - 1;
+ unsigned ShAmt = (i << Shift) % 8;
+ Mask |= Elt << ShAmt;
+ }
+
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
+static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0).getSimpleVT();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
+ "Unsupported vector type for PSHUFHW");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned Mask = 0;
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ // 8 nodes per lane, but we only care about the last 4.
+ for (unsigned i = 0; i < 4; ++i) {
+ int Elt = N->getMaskElt(l+i+4);
+ if (Elt < 0) continue;
+ Elt &= 0x3; // only 2-bits.
+ Mask |= Elt << (i * 2);
+ }
+ }
+
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
+static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0).getSimpleVT();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
+ "Unsupported vector type for PSHUFHW");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned Mask = 0;
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ // 8 nodes per lane, but we only care about the first 4.
+ for (unsigned i = 0; i < 4; ++i) {
+ int Elt = N->getMaskElt(l+i);
+ if (Elt < 0) continue;
+ Elt &= 0x3; // only 2-bits
+ Mask |= Elt << (i * 2);
+ }
+ }
+
+ return Mask;
+}
+
+/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ int Val = 0;
+ unsigned i;
+ for (i = 0; i != NumElts; ++i) {
+ Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ break;
+ }
+ if (Val >= (int)NumElts)
+ Val -= NumElts - NumLaneElts;
+
+ assert(Val - i > 0 && "PALIGNR imm should be positive");
+ return (Val - i) * EltSize;
+}
+
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+ return Index / NumElemsPerChunk;
+}
+
+/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
+/// Handles 256-bit.
+static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0).getSimpleVT();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is256BitVector() && NumElts == 4) &&
+ "Unsupported vector type for VPERMQ/VPERMPD");
+
+ unsigned Mask = 0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt < 0)
+ continue;
+ Mask |= Elt << (i*2);
+ }
+
+ return Mask;
+}
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+ return CN->isNullValue();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+ return CFP->getValueAPF().isPosZero();
+ return false;
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = SVOp->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElems)
+ Idx += NumElems;
+ else
+ Idx -= NumElems;
+ }
+ MaskVec.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+ SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+ if (VT.getVectorNumElements() != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Mask[i], i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+}
+
+// Test whether the given value is a vector value which will be legalized
+// into a load.
+static bool WillBeConstantPoolLoad(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ // Check for any non-constant elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ switch (N->getOperand(i).getNode()->getOpcode()) {
+ case ISD::UNDEF:
+ case ISD::ConstantFP:
+ case ISD::Constant:
+ break;
+ default:
+ return false;
+ }
+
+ // Vectors of all-zeros and all-ones are materialized with special
+ // instructions rather than being loaded.
+ return !ISD::isBuildVectorAllZeros(N) &&
+ !ISD::isBuildVectorAllOnes(N);
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+ ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+ for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDValue SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx >= (int)NumElems) {
+ unsigned Opc = V2.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
+ return false;
+ } else if (Idx >= 0) {
+ unsigned Opc = V1.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V1.getOperand(Idx)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build SSE zero vectors as <4 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
+ SDValue Vec;
+ if (VT.is128BitVector()) { // SSE
+ if (Subtarget->hasSSE2()) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.is256BitVector()) { // AVX
+ if (Subtarget->hasInt256()) { // AVX2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else {
+ // 256-bit logic and arithmetic instructions in AVX are all
+ // floating-point, no support for integer ops. Emit fp zeroed vectors.
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ }
+ } else
+ llvm_unreachable("Unexpected vector type");
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
+/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Then bitcast to their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec;
+ if (VT.is256BitVector()) {
+ if (HasInt256) { // AVX2
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else { // AVX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
+ }
+ } else if (VT.is128BitVector()) {
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else
+ llvm_unreachable("Unexpected vector type");
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] > (int)NumElems) {
+ Mask[i] = NumElems;
+ }
+ }
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ Mask.push_back(i);
+ Mask.push_back(i + NumElems);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
+ Mask.push_back(i + Half);
+ Mask.push_back(i + NumElems + Half);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
+// a generic shuffle instruction because the target has no such instructions.
+// Generate shuffles which repeat i16 and i8 several times until they can be
+// represented by v4f32 and then be manipulated by target suported shuffles.
+static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
+ EVT VT = V.getValueType();
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = V.getDebugLoc();
+
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ V = getUnpackl(DAG, dl, VT, V, V);
+ } else {
+ V = getUnpackh(DAG, dl, VT, V, V);
+ EltNo -= NumElems/2;
+ }
+ NumElems >>= 1;
+ }
+ return V;
+}
+
+/// getLegalSplat - Generate a legal splat with supported x86 shuffles
+static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
+ EVT VT = V.getValueType();
+ DebugLoc dl = V.getDebugLoc();
+
+ if (VT.is128BitVector()) {
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
+ &SplatMask[0]);
+ } else if (VT.is256BitVector()) {
+ // To use VPERMILPS to splat scalars, the second half of indicies must
+ // refer to the higher part, which is a duplication of the lower one,
+ // because VPERMILPS can only handle in-lane permutations.
+ int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
+ EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
+
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
+ V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
+ &SplatMask[0]);
+ } else
+ llvm_unreachable("Vector size not supported");
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+/// PromoteSplat - Splat is promoted to target supported vector shuffles.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+ EVT SrcVT = SV->getValueType(0);
+ SDValue V1 = SV->getOperand(0);
+ DebugLoc dl = SV->getDebugLoc();
+
+ int EltNo = SV->getSplatIndex();
+ int NumElems = SrcVT.getVectorNumElements();
+ bool Is256BitVec = SrcVT.is256BitVector();
+
+ assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
+ "Unknown how to promote splat for type");
+
+ // Extract the 128-bit part containing the splat element and update
+ // the splat element index when it refers to the higher register.
+ if (Is256BitVec) {
+ V1 = Extract128BitVector(V1, EltNo, DAG, dl);
+ if (EltNo >= NumElems/2)
+ EltNo -= NumElems/2;
+ }
+
+ // All i16 and i8 vector types can't be used directly by a generic shuffle
+ // instruction because the target has no such instruction. Generate shuffles
+ // which repeat i16 and i8 several times until they fit in i32, and then can
+ // be manipulated by target suported shuffles.
+ EVT EltVT = SrcVT.getVectorElementType();
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ V1 = PromoteSplati8i16(V1, DAG, EltNo);
+
+ // Recreate the 256-bit vector and place the same 128-bit vector
+ // into the low and high part. This is necessary because we want
+ // to use VPERM* to shuffle the vectors
+ if (Is256BitVec) {
+ V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
+ }
+
+ return getLegalSplat(DAG, V1, EltNo);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+ bool IsZero,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ EVT VT = V2.getValueType();
+ SDValue V1 = IsZero
+ ? getZeroVector(VT, Subtarget, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(i == Idx ? NumElems : i);
+ return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, MVT VT,
+ SmallVectorImpl<int> &Mask, bool &IsUnary) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue ImmN;
+
+ IsUnary = false;
+ switch(N->getOpcode()) {
+ case X86ISD::SHUFP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, Mask);
+ break;
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, Mask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, Mask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, Mask);
+ break;
+ case X86ISD::PALIGNR:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::PSHUFD:
+ case X86ISD::VPERMILP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::VPERMI:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i) {
+ Mask.push_back(i);
+ }
+ break;
+ }
+ case X86ISD::VPERM2X128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ if (Mask.empty()) return false;
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ // Not yet implemented
+ return false;
+ default: llvm_unreachable("unknown target shuffle node");
+ }
+
+ return true;
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ int Elt = SV->getMaskElt(Index);
+
+ if (Elt < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+ : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ MVT ShufVT = V.getValueType().getSimpleVT();
+ unsigned NumElems = ShufVT.getVectorNumElements();
+ SmallVector<int, 16> ShuffleMask;
+ bool IsUnary;
+
+ if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
+ return SDValue();
+
+ int Elt = ShuffleMask[Index];
+ if (Elt < 0)
+ return DAG.getUNDEF(ShufVT.getVectorElementType());
+
+ SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
+ : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
+ Depth+1);
+ }
+
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BITCAST) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two different directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ unsigned i;
+ for (i = 0; i != NumElems; ++i) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ }
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+ unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+ unsigned NumElems, unsigned &OpNum) {
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+
+ for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
+ continue;
+
+ if (Idx < (int)NumElems)
+ SeenV1 = true;
+ else
+ SeenV2 = true;
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
+ return false;
+ }
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros, // Mask End Index(exclusive)
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems, // Mask End Index(exclusive)
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = true;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ // Although the logic below support any bitwidth size, there are no
+ // shift instructions which handle more than 128-bit vectors.
+ if (!SVOp->getValueType(0).is128BitVector())
+ return false;
+
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDValue ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.getNode())
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i/2));
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v8i16, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
+ assert(VT.is128BitVector() && "Unknown type for VShift");
+ EVT ShVT = MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(Opc, dl, ShVT, SrcOp,
+ DAG.getConstant(NumBits,
+ TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
+}
+
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // FIXME: 256-bit vector instructions don't require a strict alignment,
+ // improve this code to support it better.
+ unsigned RequiredAlign = VT.getSizeInBits()/8;
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16 or 32.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. FIXME: It's possible to compute
+ // the exact stack offset and reference FI + adjust offset instead.
+ // If someone *really* cares about this. That's the way to implement it.
+ return SDValue();
+ } else {
+ MFI->setObjectAlignment(FI, RequiredAlign);
+ }
+ }
+
+ // (Offset % 16 or 32) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % RequiredAlign) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~(RequiredAlign-1);
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ unsigned NumElems = VT.getVectorNumElements();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
+ SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
+ false, false, false, 0);
+
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumElems; ++i)
+ Mask.push_back(EltNo);
+
+ return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &DL, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->isInvariant(), 0);
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->isInvariant(), LDBase->getAlignment());
+ }
+ if (NumElems == 4 && LastLoadedElt == 1 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+ LDBase->getPointerInfo(),
+ LDBase->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as LDBase in
+ // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
+ // update uses of LDBase's output chain to use the TokenFactor.
+ if (LDBase->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+ }
+ return SDValue();
+}
+
+/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
+/// to generate a splat value for the following cases:
+/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
+/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
+/// a scalar load, or a constant.
+/// The VBROADCAST node is returned when a pattern is found,
+/// or SDValue() otherwise.
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
+ if (!Subtarget->hasFp256())
+ return SDValue();
+
+ MVT VT = Op.getValueType().getSimpleVT();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for broadcast.");
+
+ SDValue Ld;
+ bool ConstSplatVal;
+
+ switch (Op.getOpcode()) {
+ default:
+ // Unknown pattern found.
+ return SDValue();
+
+ case ISD::BUILD_VECTOR: {
+ // The BUILD_VECTOR node must be a splat.
+ if (!isSplatVector(Op.getNode()))
+ return SDValue();
+
+ Ld = Op.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
+
+ // The suspected load node has several users. Make sure that all
+ // of its users are from the BUILD_VECTOR node.
+ // Constants may have multiple users.
+ if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ return SDValue();
+ break;
+ }
+
+ case ISD::VECTOR_SHUFFLE: {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+ // Shuffles must have a splat mask where the first element is
+ // broadcasted.
+ if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
+ return SDValue();
+
+ SDValue Sc = Op.getOperand(0);
+ if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
+ Sc.getOpcode() != ISD::BUILD_VECTOR) {
+
+ if (!Subtarget->hasInt256())
+ return SDValue();
+
+ // Use the register form of the broadcast instruction available on AVX2.
+ if (VT.is256BitVector())
+ Sc = Extract128BitVector(Sc, 0, DAG, dl);
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+ }
+
+ Ld = Sc.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
+
+ // The scalar_to_vector node and the suspected
+ // load node must have exactly one user.
+ // Constants may have multiple users.
+ if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
+ return SDValue();
+ break;
+ }
+ }
+
+ bool Is256 = VT.is256BitVector();
+
+ // Handle the broadcasting a single constant scalar from the constant pool
+ // into a vector. On Sandybridge it is still better to load a constant vector
+ // from the constant pool and not to broadcast it from a scalar.
+ if (ConstSplatVal && Subtarget->hasInt256()) {
+ EVT CVT = Ld.getValueType();
+ assert(!CVT.isVector() && "Must not broadcast a vector type");
+ unsigned ScalarSize = CVT.getSizeInBits();
+
+ if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) {
+ const Constant *C = 0;
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
+ C = CI->getConstantIntValue();
+ else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
+ C = CF->getConstantFPValue();
+
+ assert(C && "Invalid constant type");
+
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ }
+ }
+
+ bool IsLoad = ISD::isNormalLoad(Ld.getNode());
+ unsigned ScalarSize = Ld.getValueType().getSizeInBits();
+
+ // Handle AVX2 in-register broadcasts.
+ if (!IsLoad && Subtarget->hasInt256() &&
+ (ScalarSize == 32 || (Is256 && ScalarSize == 64)))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+ // The scalar source must be a normal load.
+ if (!IsLoad)
+ return SDValue();
+
+ if (ScalarSize == 32 || (Is256 && ScalarSize == 64))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+ // The integer check is needed for the 64-bit into 128-bit so it doesn't match
+ // double since there is no vbroadcastsd xmm
+ if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
+ if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ }
+
+ // Unsupported broadcast.
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // Skip if insert_vec_elt is not supported.
+ if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned NumElems = Op.getNumOperands();
+
+ SDValue VecIn1;
+ SDValue VecIn2;
+ SmallVector<unsigned, 4> InsertIndices;
+ SmallVector<int, 8> Mask(NumElems, -1);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned Opc = Op.getOperand(i).getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ if (Opc != ISD::EXTRACT_VECTOR_ELT) {
+ // Quit if more than 1 elements need inserting.
+ if (InsertIndices.size() > 1)
+ return SDValue();
+
+ InsertIndices.push_back(i);
+ continue;
+ }
+
+ SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
+ SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
+ // Quit if extracted from vector of different type.
+ if (ExtractedFromVec.getValueType() != VT)
+ return SDValue();
+
+ // Quit if non-constant index.
+ if (!isa<ConstantSDNode>(ExtIdx))
+ return SDValue();
+
+ if (VecIn1.getNode() == 0)
+ VecIn1 = ExtractedFromVec;
+ else if (VecIn1 != ExtractedFromVec) {
+ if (VecIn2.getNode() == 0)
+ VecIn2 = ExtractedFromVec;
+ else if (VecIn2 != ExtractedFromVec)
+ // Quit if more than 2 vectors to shuffle
+ return SDValue();
+ }
+
+ unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
+
+ if (ExtractedFromVec == VecIn1)
+ Mask[i] = Idx;
+ else if (ExtractedFromVec == VecIn2)
+ Mask[i] = Idx + NumElems;
+ }
+
+ if (VecIn1.getNode() == 0)
+ return SDValue();
+
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
+ for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
+ unsigned Idx = InsertIndices[i];
+ NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
+ DAG.getIntPtrConstant(Idx));
+ }
+
+ return NV;
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT ExtVT = VT.getVectorElementType();
+ unsigned NumElems = Op.getNumOperands();
+
+ // Vectors containing all zeros can be matched by pxor and xorps later
+ if (ISD::isBuildVectorAllZeros(Op.getNode())) {
+ // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
+ // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
+ if (VT == MVT::v4i32 || VT == MVT::v8i32)
+ return Op;
+
+ return getZeroVector(VT, Subtarget, DAG, dl);
+ }
+
+ // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
+ // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
+ // vpcmpeqd on 256-bit vectors.
+ if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
+ return Op;
+
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ }
+
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ bool IsAllConstants = true;
+ SmallSet<SDValue, 8> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ IsAllConstants = false;
+ if (X86::isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
+ return DAG.getUNDEF(VT);
+
+ // Special case for single non-zero, non-undef, element.
+ if (NumNonZero == 1) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SmallVector<int, 4> Mask;
+ Mask.push_back(Idx);
+ for (unsigned i = 1; i != VecElts; ++i)
+ Mask.push_back(i);
+ Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
+ &Mask[0]);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is.
+ if (Idx == 0) {
+ if (NumZero == 0)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+ (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+ if (VT.is256BitVector()) {
+ SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
+ Item, DAG.getIntPtrConstant(0));
+ }
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ }
+
+ if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+ if (VT.is256BitVector()) {
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+ Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+ } else {
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ }
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ X86::isZeroNode(Op.getOperand(0)) &&
+ !X86::isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this, dl);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+ return SDValue();
+
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
+ if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ MaskVec.push_back(i == Idx ? 0 : 1);
+ return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ }
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
+ return SDValue();
+ }
+
+ // A vector full of immediates; various special cases are already
+ // handled, so this is best done with a single constant-pool load.
+ if (IsAllConstants)
+ return SDValue();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and use
+ // shuffles to put them in place.
+ if (VT.is256BitVector()) {
+ SmallVector<SDValue, 32> V;
+ for (unsigned i = 0; i != NumElems; ++i)
+ V.push_back(Op.getOperand(i));
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build both the lower and upper subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ // Recreate the wider vector with the lower and upper part.
+ return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+ }
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
+ }
+ return SDValue();
+ }
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this);
+ if (V.getNode()) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this);
+ if (V.getNode()) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDValue, 8> V(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, Subtarget, DAG, dl);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ break;
+ case 2:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ case 3:
+ V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ }
+ }
+
+ bool Reverse1 = (NonZeros & 0x3) == 2;
+ bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ int MaskVec[] = {
+ Reverse1 ? 1 : 0,
+ Reverse1 ? 0 : 1,
+ static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
+ static_cast<int>(Reverse2 ? NumElems : NumElems+1)
+ };
+ return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ }
+
+ if (Values.size() > 1 && VT.is128BitVector()) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // Check for a build vector from mostly shuffle plus few inserting.
+ SDValue Sh = buildFromShuffleMostly(Op, DAG);
+ if (Sh.getNode())
+ return Sh;
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
+ if (getSubtarget()->hasSSE41()) {
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
+ Op.getOperand(i), DAG.getIntPtrConstant(i));
+ }
+ return Result;
+ }
+
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
+ }
+ return V[0];
+ }
+ return SDValue();
+}
+
+// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// to create 256-bit vectors from two other 128-bit ones.
+static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT ResVT = Op.getValueType().getSimpleVT();
+
+ assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+
+ return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 2);
+
+ // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
+ // from two other 128-bit ones.
+ return LowerAVXCONCAT_VECTORS(Op, DAG);
+}
+
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue
+LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
+ return SDValue();
+
+ // Check the mask for BLEND and build the value.
+ unsigned MaskValue = 0;
+ // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumElemsInLane = NumElems / NumLanes;
+
+ // Blend for v16i16 should be symetric for the both lanes.
+ for (unsigned i = 0; i < NumElemsInLane; ++i) {
+
+ int SndLaneEltIdx = (NumLanes == 2) ?
+ SVOp->getMaskElt(i + NumElemsInLane) : -1;
+ int EltIdx = SVOp->getMaskElt(i);
+
+ if ((EltIdx < 0 || EltIdx == (int)i) &&
+ (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ continue;
+
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx < 0 ||
+ (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+ MaskValue |= (1<<i);
+ else
+ return SDValue();
+ }
+
+ // Convert i32 vectors to floating point if it is not AVX2.
+ // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+ MVT BlendVT = VT;
+ if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+ }
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all] pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+static SDValue
+LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 8> MaskVals;
+
+ // Determine if more than 1 of the words in each of the low and high quadwords
+ // of the result come from the same quadword of one of the two inputs. Undef
+ // mask values count as coming from any quadword, for better codegen.
+ unsigned LoQuad[] = { 0, 0, 0, 0 };
+ unsigned HiQuad[] = { 0, 0, 0, 0 };
+ std::bitset<4> InputQuads;
+ for (unsigned i = 0; i < 8; ++i) {
+ unsigned *Quad = i < 4 ? LoQuad : HiQuad;
+ int EltIdx = SVOp->getMaskElt(i);
+ MaskVals.push_back(EltIdx);
+ if (EltIdx < 0) {
+ ++Quad[0];
+ ++Quad[1];
+ ++Quad[2];
+ ++Quad[3];
+ continue;
+ }
+ ++Quad[EltIdx / 4];
+ InputQuads.set(EltIdx / 4);
+ }
+
+ int BestLoQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LoQuad[i] > MaxQuad) {
+ BestLoQuad = i;
+ MaxQuad = LoQuad[i];
+ }
+ }
+
+ int BestHiQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HiQuad[i] > MaxQuad) {
+ BestHiQuad = i;
+ MaxQuad = HiQuad[i];
+ }
+ }
+
+ // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+ // of the two input vectors, shuffle them into one input vector so only a
+ // single pshufb instruction is necessary. If There are more than 2 input
+ // quads, disable the next transformation since it does not help SSSE3.
+ bool V1Used = InputQuads[0] || InputQuads[1];
+ bool V2Used = InputQuads[2] || InputQuads[3];
+ if (Subtarget->hasSSSE3()) {
+ if (InputQuads.count() == 2 && V1Used && V2Used) {
+ BestLoQuad = InputQuads[0] ? 0 : 1;
+ BestHiQuad = InputQuads[2] ? 2 : 3;
+ }
+ if (InputQuads.count() > 2) {
+ BestLoQuad = -1;
+ BestHiQuad = -1;
+ }
+ }
+
+ // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+ // the shuffle mask. If a quad is scored as -1, that means that it contains
+ // words from all 4 input quadwords.
+ SDValue NewV;
+ if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+ int MaskV[] = {
+ BestLoQuad < 0 ? 0 : BestLoQuad,
+ BestHiQuad < 0 ? 1 : BestHiQuad
+ };
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
+
+ // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+ // source words for the shuffle, to aid later transformations.
+ bool AllWordsInNewV = true;
+ bool InOrder[2] = { true, true };
+ for (unsigned i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx != (int)i)
+ InOrder[i/4] = false;
+ if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+ continue;
+ AllWordsInNewV = false;
+ break;
+ }
+
+ bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+ if (AllWordsInNewV) {
+ for (int i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0)
+ continue;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ if ((idx != i) && idx < 4)
+ pshufhw = false;
+ if ((idx != i) && idx > 3)
+ pshuflw = false;
+ }
+ V1 = NewV;
+ V2Used = false;
+ BestLoQuad = 0;
+ BestHiQuad = 1;
+ }
+
+ // If we've eliminated the use of V2, and the new mask is a pshuflw or
+ // pshufhw, that's as cheap as it gets. Return the new shuffle.
+ if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
+ TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
+ getShufflePSHUFLWImmediate(SVOp);
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
+ }
+ }
+
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
+ // If we have SSSE3, and all words of the result are from 1 input vector,
+ // case 2 is generated, otherwise case 3 is generated. If no SSSE3
+ // is present, fall back to case 4.
+ if (Subtarget->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If we have elements from both input vectors, set the high bit of the
+ // shuffle mask element to zero out elements that come from V2 in the V1
+ // mask, and elements that come from V1 in the V2 mask, so that the two
+ // results can be OR'd together.
+ bool TwoInputs = V1Used && V2Used;
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
+ int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
+ }
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ int Idx0 = (EltIdx < 16) ? 0x80 : EltIdx - 16;
+ int Idx1 = (EltIdx < 16) ? 0x80 : EltIdx - 15;
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
+ }
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ }
+
+ // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+ // and update MaskVals with new element order.
+ std::bitset<8> InOrder;
+ if (BestLoQuad >= 0) {
+ int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
+ for (int i = 0; i != 4; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ InOrder.set(i);
+ } else if ((idx / 4) == BestLoQuad) {
+ MaskV[i] = idx & 3;
+ InOrder.set(i);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ getShufflePSHUFLWImmediate(SVOp), DAG);
+ }
+ }
+
+ // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+ // and update MaskVals with the new element order.
+ if (BestHiQuad >= 0) {
+ int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
+ for (unsigned i = 4; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ InOrder.set(i);
+ } else if ((idx / 4) == BestHiQuad) {
+ MaskV[i] = (idx & 3) + 4;
+ InOrder.set(i);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ getShufflePSHUFHWImmediate(SVOp), DAG);
+ }
+ }
+
+ // In case BestHi & BestLo were both -1, which means each quadword has a word
+ // from each of the four input quadwords, calculate the InOrder bitvector now
+ // before falling through to the insert/extract cleanup.
+ if (BestLoQuad == -1 && BestHiQuad == -1) {
+ NewV = V1;
+ for (int i = 0; i != 8; ++i)
+ if (MaskVals[i] < 0 || MaskVals[i] == i)
+ InOrder.set(i);
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ SDValue ExtOp = (EltIdx < 8) ?
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx)) :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ DAG.getIntPtrConstant(EltIdx - 8));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+ DAG.getIntPtrConstant(i));
+ }
+ return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ const X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ ArrayRef<int> MaskVals = SVOp->getMask();
+
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
+ // If we have SSSE3, case 1 is generated when all result bytes come from
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // present, fall back to case 3.
+
+ // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If all result elements are from one input vector, then only translate
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ //
+ // Otherwise, we have elements from both input vectors, and must zero out
+ // elements that come from V2 in the first mask, and V1 in the second mask
+ // so that we can OR them together.
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || EltIdx >= 16)
+ EltIdx = 0x80;
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+
+ // As PSHUFB will zero elements with negative indices, it's safe to ignore
+ // the 2nd operand if it's undefined or zero.
+ if (V2.getOpcode() == ISD::UNDEF ||
+ ISD::isBuildVectorAllZeros(V2.getNode()))
+ return V1;
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ }
+
+ // No SSSE3 - Calculate in place words and then fix all out of place words
+ // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
+ // the 16 different words that comprise the two doublequadword input vectors.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+ SDValue NewV = V1;
+ for (int i = 0; i != 8; ++i) {
+ int Elt0 = MaskVals[i*2];
+ int Elt1 = MaskVals[i*2+1];
+
+ // This word of the result is all undef, skip it.
+ if (Elt0 < 0 && Elt1 < 0)
+ continue;
+
+ // This word of the result is already in the correct place, skip it.
+ if ((Elt0 == i*2) && (Elt1 == i*2+1))
+ continue;
+
+ SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+ SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+ SDValue InsElt;
+
+ // If Elt0 and Elt1 are defined, are consecutive, and can be load
+ // using a single extract together, load it and store it.
+ if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ continue;
+ }
+
+ // If Elt1 is defined, extract it from the appropriate source. If the
+ // source byte is not also odd, shift the extracted word left 8 bits
+ // otherwise clear the bottom 8 bits if we need to do an or.
+ if (Elt1 >= 0) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ if ((Elt1 & 1) == 0)
+ InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt.getValueType())));
+ else if (Elt0 >= 0)
+ InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+ DAG.getConstant(0xFF00, MVT::i16));
+ }
+ // If Elt0 is defined, extract it from the appropriate source. If the
+ // source byte is not also even, shift the extracted word right 8 bits. If
+ // Elt1 was also defined, OR the extracted values together before
+ // inserting them in the result.
+ if (Elt0 >= 0) {
+ SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+ if ((Elt0 & 1) != 0)
+ InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt0.getValueType())));
+ else if (Elt1 >= 0)
+ InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+ DAG.getConstant(0x00FF, MVT::i16));
+ InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+ : InsElt0;
+ }
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
+}
+
+// v32i8 shuffles - Translate to VPSHUFB if possible.
+static
+SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
+
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // VPSHUFB may be generated if
+ // (1) one of input vector is undefined or zeroinitializer.
+ // The mask value 0x80 puts 0 in the corresponding slot of the vector.
+ // And (2) the mask indexes don't cross the 128-bit lane.
+ if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
+ (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
+ return SDValue();
+
+ if (V1IsAllZero && !V2IsAllZero) {
+ CommuteVectorShuffleMask(MaskVals, 32);
+ V1 = V2;
+ }
+ SmallVector<SDValue, 32> pshufbMask;
+ for (unsigned i = 0; i != 32; i++) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || EltIdx >= 32)
+ EltIdx = 0x80;
+ else {
+ if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
+ // Cross lane is not allowed.
+ return SDValue();
+ EltIdx &= 0xf;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v32i8, &pshufbMask[0], 32));
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ DebugLoc dl = SVOp->getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ MVT NewVT;
+ unsigned Scale;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected!");
+ case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
+ case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
+ case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
+ }
+
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; i += Scale) {
+ int StartIdx = -1;
+ for (unsigned j = 0; j != Scale; ++j) {
+ int EltIdx = SVOp->getMaskElt(i+j);
+ if (EltIdx < 0)
+ continue;
+ if (StartIdx < 0)
+ StartIdx = (EltIdx / Scale);
+ if (EltIdx != (int)(StartIdx*Scale + j))
+ return SDValue();
+ }
+ MaskVec.push_back(StartIdx);
+ }
+
+ SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
+ SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
+ return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(MVT VT, EVT OpVT,
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ OpVT,
+ SrcOp.getOperand(0)
+ .getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
+/// which could not be matched by any known target speficic shuffle
+static SDValue
+LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+
+ SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumLaneElems = NumElems / 2;
+
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT EltVT = VT.getVectorElementType();
+ MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ SDValue Output[2];
+
+ SmallVector<int, 16> Mask;
+ for (unsigned l = 0; l < 2; ++l) {
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with UseBuildVector set.
+ bool UseBuildVector = false;
+ int InputUsed[2] = { -1, -1 }; // Not yet discovered.
+ unsigned LaneStart = l * NumLaneElems;
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
+ int Idx = SVOp->getMaskElt(i+LaneStart);
+ if (Idx < 0) {
+ // the mask element does not index into any input vector.
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumLaneElems;
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumLaneElems;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input)
+ // This input vector is already an operand.
+ break;
+ if (InputUsed[OpNo] < 0) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ UseBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Mask.push_back(Idx + OpNo * NumLaneElems);
+ }
+
+ if (UseBuildVector) {
+ SmallVector<SDValue, 16> SVOps;
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
+ int Idx = SVOp->getMaskElt(i+LaneStart);
+ if (Idx < 0) {
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumElems;
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumElems;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ SVOp->getOperand(Input),
+ DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the output using a BUILD_VECTOR.
+ Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0],
+ SVOps.size());
+ } else if (InputUsed[0] < 0) {
+ // No input vectors were used! The result is undefined.
+ Output[l] = DAG.getUNDEF(NVT);
+ } else {
+ SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
+ (InputUsed[0] % 2) * NumLaneElems,
+ DAG, dl);
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
+ Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
+ (InputUsed[1] % 2) * NumLaneElems, DAG, dl);
+ // At least one input vector was used. Create a new shuffle vector.
+ Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
+ }
+
+ Mask.clear();
+ }
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
+}
+
+/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
+/// 4 elements, and match them with several different shuffle types.
+static SDValue
+LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+
+ assert(VT.is128BitVector() && "Unsupported vector size");
+
+ std::pair<int, int> Locs[4];
+ int Mask1[] = { -1, -1, -1, -1 };
+ SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
+
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Idx < 4) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Idx;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < 4)
+ Mask1[2+NumHi] = Idx;
+ NumHi++;
+ }
+ }
+ }
+
+ if (NumLo <= 2 && NumHi <= 2) {
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ int Mask2[] = { -1, -1, -1, -1 };
+
+ for (unsigned i = 0; i != 4; ++i)
+ if (Locs[i].first != -1) {
+ unsigned Idx = (i < 2) ? 0 : 4;
+ Idx += Locs[i].first * 2 + Locs[i].second;
+ Mask2[i] = Idx;
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ }
+
+ if (NumLo == 3 || NumHi == 3) {
+ // Otherwise, we must have three elements from one vector, call it X, and
+ // one element from the other, call it Y. First, use a shufps to build an
+ // intermediate vector with the one element from Y and the element from X
+ // that will be in the same half in the final destination (the indexes don't
+ // matter). Then, use a shufps to build the final vector, taking the half
+ // containing the element from Y from the intermediate, and the other half
+ // from X.
+ if (NumHi == 3) {
+ // Normalize it so the 3 elements come from V1.
+ CommuteVectorShuffleMask(PermMask, 4);
+ std::swap(V1, V2);
+ }
+
+ // Find the element from V2.
+ unsigned HiIndex;
+ for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+ int Val = PermMask[HiIndex];
+ if (Val < 0)
+ continue;
+ if (Val >= 4)
+ break;
+ }
+
+ Mask1[0] = PermMask[HiIndex];
+ Mask1[1] = -1;
+ Mask1[2] = PermMask[HiIndex^1];
+ Mask1[3] = -1;
+ V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ if (HiIndex >= 2) {
+ Mask1[0] = PermMask[0];
+ Mask1[1] = PermMask[1];
+ Mask1[2] = HiIndex & 1 ? 6 : 4;
+ Mask1[3] = HiIndex & 1 ? 4 : 6;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ }
+
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ int LoMask[] = { -1, -1, -1, -1 };
+ int HiMask[] = { -1, -1, -1, -1 };
+
+ int *MaskPtr = LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = 2;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (i == 2) {
+ MaskPtr = HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = 2;
+ }
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (Idx < 4) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ MaskPtr[LoIdx] = Idx;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ MaskPtr[HiIdx] = Idx;
+ HiIdx++;
+ }
+ }
+
+ SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+ SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+ int MaskOps[] = { -1, -1, -1, -1 };
+ for (unsigned i = 0; i != 4; ++i)
+ if (Locs[i].first != -1)
+ MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
+ return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+static bool MayFoldVectorLoad(SDValue V) {
+ while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
+ V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
+ // BUILD_VECTOR (load), undef
+ V = V.getOperand(0);
+
+ return MayFoldLoad(V);
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasSSE2 && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ // If we don't care about the second element, proceed to use movss.
+ if (SVOp->getMaskElt(1) != -1)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if (HasSSE2) {
+ // FIXME: isMOVLMask should be checked and matched before getMOVLP,
+ // as to remove this logic from here, as much as possible
+ if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+// Reduce a vector shuffle to zext.
+SDValue
+X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+ // PMOVZX is only available from SSE41.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // Only AVX2 support 256-bit vector integer extending.
+ if (!Subtarget->hasInt256() && VT.is256BitVector())
+ return SDValue();
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Extending is an unary operation and the element type of the source vector
+ // won't be equal to or larger than i64.
+ if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
+ VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
+ unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
+ while ((1U << Shift) < NumElems) {
+ if (SVOp->getMaskElt(1U << Shift) == 1)
+ break;
+ Shift += 1;
+ // The maximal ratio is 8, i.e. from i8 to i64.
+ if (Shift > 3)
+ return SDValue();
+ }
+
+ // Check the shuffle mask.
+ unsigned Mask = (1U << Shift) - 1;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if ((i & Mask) != 0 && EltIdx != -1)
+ return SDValue();
+ if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
+ return SDValue();
+ }
+
+ LLVMContext *Context = DAG.getContext();
+ unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
+
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
+ // Simplify the operand as it's prepared to be fed into shuffle.
+ unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
+ if (V1.getOpcode() == ISD::BITCAST &&
+ V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ V1.getOperand(0)
+ .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+ SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
+ ConstantSDNode *CIdx =
+ dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
+ // If it's foldable, i.e. normal load with single use, we will let code
+ // selection to fold it. Otherwise, we will short the conversion sequence.
+ if (CIdx && CIdx->getZExtValue() == 0 &&
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
+}
+
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ MVT VT = Op.getValueType().getSimpleVT();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+
+ if (isZeroShuffle(SVOp))
+ return getZeroVector(VT, Subtarget, DAG, dl);
+
+ // Handle splat operations
+ if (SVOp->isSplat()) {
+ // Use vbroadcast whenever the splat comes from a foldable load
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
+ }
+
+ // Check integer expanding shuffles.
+ SDValue NewOp = LowerVectorIntExtend(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
+ VT == MVT::v16i16 || VT == MVT::v32i8) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
+ if (NewOp.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
+ } else if ((VT == MVT::v4i32 ||
+ (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
+ if (NewOp.getNode()) {
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
+ if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
+ NewVT, true, false))
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
+ DAG, Subtarget, dl);
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
+ if (NewOp.getNode()) {
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
+ if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT VT = Op.getValueType().getSimpleVT();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasFp256 = Subtarget->hasFp256();
+ bool HasInt256 = Subtarget->hasInt256();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+
+ if (V1IsUndef && V2IsUndef)
+ return DAG.getUNDEF(VT);
+
+ assert(!V1IsUndef && "Op 1 of shuffle should not be undef");
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general idea is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
+ SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
+
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
+
+ if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
+ V2IsUndef && MayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (isMOVHLPS_v_undef_Mask(M, VT))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
+
+ if (isPSHUFDMask(M, VT)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
+
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+ DAG);
+
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
+
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDValue ShVal;
+ bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ if (isMOVLMask(M, VT)) {
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+ if (!isMOVLPMask(M, VT)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
+ }
+
+ // FIXME: fold these into legal mask.
+ if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+ if (isMOVHLPSMask(M, VT))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (isMOVLPMask(M, VT))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
+
+ if (ShouldXformToMOVHLPS(M, VT) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ if (isShift) {
+ // No better options. Use a vshldq / vsrldq.
+ MVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
+ V1IsSplat = isSplatVector(V1.getNode());
+ V2IsSplat = isSplatVector(V2.getNode());
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
+ if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
+ std::swap(V1IsSplat, V2IsSplat);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
+ // Shuffling low element of v1 into undef, just return v1.
+ if (V2IsUndef)
+ return V1;
+ // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+ // the instruction selector will not match, so get a canonical MOVL with
+ // swapped operands to undo the commute.
+ return getMOVL(DAG, dl, VT, V2, V1);
+ }
+
+ if (isUNPCKLMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+
+ if (isUNPCKHMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.p
+ SmallVector<int, 8> NewMask(M.begin(), M.end());
+ NormalizeMask(NewMask, NumElems);
+ if (isUNPCKLMask(NewMask, VT, HasInt256, true))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+ if (isUNPCKHMask(NewMask, VT, HasInt256, true))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
+ }
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ // FIXME: this seems wrong.
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
+ std::swap(V1IsSplat, V2IsSplat);
+ Commuted = false;
+
+ if (isUNPCKLMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+
+ if (isUNPCKHMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
+ }
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+
+ if (isPALIGNRMask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
+ getShufflePALIGNRImmediate(SVOp),
+ DAG);
+
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ }
+
+ if (isPSHUFHWMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT, HasFp256))
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
+ getShuffleSHUFImmediate(SVOp), DAG);
+
+ if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Generate target specific nodes for 128 or 256-bit shuffles only
+ // supported in the AVX instruction set.
+ //
+
+ // Handle VMOVDDUPY permutations
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
+ return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+
+ // Handle VPERMILPS/D* permutations
+ if (isVPERMILPMask(M, VT, HasFp256)) {
+ if (HasInt256 && VT == MVT::v8i32)
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
+ }
+
+ // Handle VPERM2F128/VPERM2I128 permutations
+ if (isVPERM2X128Mask(M, VT, HasFp256))
+ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+ V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
+
+ SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
+ if (BlendOp.getNode())
+ return BlendOp;
+
+ if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+ SmallVector<SDValue, 8> permclMask;
+ for (unsigned i = 0; i != 8; ++i) {
+ permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
+ }
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
+ &permclMask[0], 8);
+ // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
+ return DAG.getNode(X86ISD::VPERMV, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
+ }
+
+ if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+ return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
+ getShuffleCLImmediate(SVOp), DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Since no target specific shuffle was selected for this generic one,
+ // lower it into other known shuffles. FIXME: this isn't true yet, but
+ // this is the plan.
+ //
+
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v16i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v32i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ // Handle all 128-bit wide vectors with 4 elements, and match them with
+ // several different shuffle types.
+ if (NumElems == 4 && VT.is128BitVector())
+ return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
+
+ // Handle general 256-bit shuffles
+ if (VT.is256BitVector())
+ return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
+ return SDValue();
+
+ if (VT.getSizeInBits() == 8) {
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ }
+
+ if (VT.getSizeInBits() == 16) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1)));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ }
+
+ if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
+ if (!Op.hasOneUse())
+ return SDValue();
+ SDNode *User = *Op.getNode()->use_begin();
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ (User->getOpcode() != ISD::BITCAST ||
+ User->getValueType(0) != MVT::i32))
+ return SDValue();
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+ }
+
+ if (VT == MVT::i32 || VT == MVT::i64) {
+ // ExtractPS/pextrq works with constant index.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ MVT VecVT = Vec.getValueType().getSimpleVT();
+
+ // If this is a 256-bit vector result, first extract the 128-bit vector and
+ // then extract the element from the 128-bit vector.
+ if (VecVT.is256BitVector()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
+
+ if (IdxVal >= NumElems/2)
+ IdxVal -= NumElems/2;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ DAG.getConstant(IdxVal, MVT::i32));
+ }
+
+ assert(VecVT.is128BitVector() && "Unexpected vector length");
+
+ if (Subtarget->hasSSE41()) {
+ SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ MVT VT = Op.getValueType().getSimpleVT();
+ DebugLoc dl = Op.getDebugLoc();
+ // TODO: handle v16i8.
+ if (VT.getSizeInBits() == 16) {
+ SDValue Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, Vec),
+ Op.getOperand(1)));
+ // Transform it so it match pextrw which produces a 32-bit result.
+ MVT EltVT = MVT::i32;
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ }
+
+ if (VT.getSizeInBits() == 32) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // SHUFPS the element to the lowest double word, then movss.
+ int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ if (VT.getSizeInBits() == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ int Mask[2] = { 1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
+ isa<ConstantSDNode>(N2)) {
+ unsigned Opc;
+ if (VT == MVT::v8i16)
+ Opc = X86ISD::PINSRW;
+ else if (VT == MVT::v16i8)
+ Opc = X86ISD::PINSRB;
+ else
+ Opc = X86ISD::PINSRB;
+
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ }
+
+ if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ }
+
+ if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
+ // PINSR* works with constant index.
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first extract the 128-bit vector,
+ // insert the element into the extracted half and then place it back.
+ if (VT.is256BitVector()) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the desired 128-bit vector half.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
+
+ // Insert the element into the desired half.
+ bool Upper = IdxVal >= NumElems/2;
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
+ DAG.getConstant(Upper ? IdxVal-NumElems/2 : IdxVal, MVT::i32));
+
+ // Insert the changed part back to the 256-bit vector
+ return Insert128BitVector(N0, V, IdxVal, DAG, dl);
+ }
+
+ if (Subtarget->hasSSE41())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+ if (EltVT == MVT::i8)
+ return SDValue();
+
+ if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ }
+ return SDValue();
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (!OpVT.is128BitVector()) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
+ }
+
+ if (OpVT == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
+ SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ if (Subtarget->hasFp256()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).is128BitVector() &&
+ Vec.getNode()->getValueType(0).is256BitVector() &&
+ isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return Extract128BitVector(Vec, IdxVal, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ if (Subtarget->hasFp256()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).is256BitVector() &&
+ SubVec.getNode()->getValueType(0).is128BitVector() &&
+ isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment(),
+ CP->getOffset(), OpFlag);
+ DebugLoc DL = CP->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ OpFlag);
+ DebugLoc DL = JT->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag)
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+ OpFlag = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOT;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+ } else if (Subtarget->isPICStyleStubNoDynamic()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY;
+ }
+
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+
+ DebugLoc DL = Op.getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit()) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ // For symbols that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlag))
+ Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ // Create the TargetBlockAddressAddress node.
+ unsigned char OpFlags =
+ Subtarget->ClassifyBlockAddressReference();
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
+ OpFlags);
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const {
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ unsigned char OpFlags =
+ Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ SDValue Result;
+ if (OpFlags == X86II::MO_NO_FLAG &&
+ X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ // A direct static reference to a global.
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Offset = 0;
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ // For globals that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlags))
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+ SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags, bool LocalDynamic = false) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(),
+ OperandFlags);
+
+ X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
+ : X86ISD::TLSADDR;
+
+ if (InFlag) {
+ SDValue Ops[] = { Chain, TGA, *InFlag };
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+ } else {
+ SDValue Ops[] = { Chain, TGA };
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+ }
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setAdjustsStack(true);
+
+ SDValue Flag = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ SDValue InFlag;
+ DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+ X86::RAX, X86II::MO_TLSGD);
+}
+
+static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the start address of the TLS block for this module.
+ X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<X86MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ SDValue Base;
+ if (is64Bit) {
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
+ X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ } else {
+ SDValue InFlag;
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+ Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
+ X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ }
+
+ // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
+ // of Base.
+
+ // Build x@dtpoff.
+ unsigned char OperandFlags = X86II::MO_DTPOFF;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ // Add x@dtpoff with the base.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT, TLSModel::Model model,
+ bool is64Bit, bool isPIC) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr),
+ false, false, false, 0);
+
+ unsigned char OperandFlags = 0;
+ // Most TLS accesses are not RIP relative, even on x86-64. One exception is
+ // initialexec.
+ unsigned WrapperKind = X86ISD::Wrapper;
+ if (model == TLSModel::LocalExec) {
+ OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+ } else if (model == TLSModel::InitialExec) {
+ if (is64Bit) {
+ OperandFlags = X86II::MO_GOTTPOFF;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else {
+ OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
+ }
+ } else {
+ llvm_unreachable("Unexpected model");
+ }
+
+ // emit "addl x@ntpoff,%eax" (local exec)
+ // or "addl x@indntpoff,%eax" (initial exec)
+ // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ if (model == TLSModel::InitialExec) {
+ if (isPIC && !is64Bit) {
+ Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT),
+ Offset);
+ }
+
+ Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(), false, false, false,
+ 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GA->getGlobal();
+
+ if (Subtarget->isTargetELF()) {
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ if (Subtarget->is64Bit())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ case TLSModel::LocalDynamic:
+ return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+ Subtarget->is64Bit());
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+ Subtarget->is64Bit(),
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ }
+ llvm_unreachable("Unknown TLS model.");
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin only has one model of TLS. Lower to that.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
+ if (PIC32)
+ OpFlag = X86II::MO_TLVP_PIC_BASE;
+ else
+ OpFlag = X86II::MO_TLVP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ GA->getValueType(0),
+ GA->getOffset(), OpFlag);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC32, the address is actually $g + Offset.
+ if (PIC32)
+ Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Offset);
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
+ // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // And our return value (tls address) is in the standard call return value
+ // location.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
+ Chain.getValue(1));
+ }
+
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
+ // Just use the implicit TLS architecture
+ // Need to generate someting similar to:
+ // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
+ // ; from TEB
+ // mov ecx, dword [rel _tls_index]: Load index (from C runtime)
+ // mov rcx, qword [rdx+rcx*8]
+ // mov eax, .tls$:tlsvar
+ // [rax+rcx] contains the address
+ // Windows 64bit: gs:0x58
+ // Windows 32bit: fs:__tls_array
+
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Chain = DAG.getEntryNode();
+
+ // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
+ Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
+ ? Type::getInt8PtrTy(*DAG.getContext(),
+ 256)
+ : Type::getInt32PtrTy(*DAG.getContext(),
+ 257));
+
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
+ MachinePointerInfo(Ptr),
+ false, false, false, 0);
+
+ // Load the _tls_index variable
+ SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+ if (Subtarget->is64Bit())
+ IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
+ IDX, MachinePointerInfo(), MVT::i32,
+ false, false, 0);
+ else
+ IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
+ false, false, false, 0);
+
+ SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+ getPointerTy());
+ IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+
+ SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+ res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Get the offset of start of .tls section
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), X86II::MO_SECREL);
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+ }
+
+ llvm_unreachable("TLS not implemented for this target.");
+}
+
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
+/// and take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8))
+ : DAG.getConstant(0, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ }
+
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDValue Hi, Lo;
+ SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ } else {
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ }
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+
+ if (SrcVT.isVector())
+ return SDValue();
+
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; return the operand so the caller accepts it as
+ // Legal.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Op;
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ Subtarget->is64Bit()) {
+ return Op;
+ }
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Size = SrcVT.getSizeInBits()/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) const {
+ // Build the FILD
+ DebugLoc DL = Op.getDebugLoc();
+ SDVTList Tys;
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+ MachineMemOperand *MMO;
+ if (FI) {
+ int SSFI = FI->getIndex();
+ MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ } else {
+ MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ StackSlot = StackSlot.getOperand(1);
+ }
+ SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
+
+ if (useSSE) {
+ Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {
+ Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
+ };
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, false, 0);
+ }
+
+ return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+ SelectionDAG &DAG) const {
+ // This algorithm is not obvious. Here it is what we're trying to output:
+ /*
+ movq %rax, %xmm0
+ punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
+ subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
+ #ifdef __SSE3__
+ haddpd %xmm0, %xmm0
+ #else
+ pshufd $0x4e, %xmm0, %xmm1
+ addpd %xmm1, %xmm0
+ #endif
+ */
+
+ DebugLoc dl = Op.getDebugLoc();
+ LLVMContext *Context = DAG.getContext();
+
+ // Build some magic constants.
+ const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+ Constant *C0 = ConstantDataVector::get(*Context, CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+ SmallVector<Constant*,2> CV1;
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4330000000000000ULL))));
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4530000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+ // Load the 64-bit value into an XMM register.
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ Op.getOperand(0));
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
+ CLod0);
+
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+ SDValue Result;
+
+ if (Subtarget->hasSSE3()) {
+ // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+ Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
+ } else {
+ SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
+ SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
+ S2F, 0x4E, DAG);
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
+ Sub);
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
+ DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // FP constant to bias correct the final result.
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+
+ // Load the 32-bit value into an XMM register.
+ SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ Op.getOperand(0));
+
+ // Zero out the upper parts of the register.
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
+
+ Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+ DAG.getIntPtrConstant(0));
+
+ // Or the load with the bias.
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Load)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Bias)));
+ Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
+ DAG.getIntPtrConstant(0));
+
+ // Subtract the bias.
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+ // Handle final rounding.
+ EVT DestVT = Op.getValueType();
+
+ if (DestVT.bitsLT(MVT::f64))
+ return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ if (DestVT.bitsGT(MVT::f64))
+ return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+
+ // Handle final rounding.
+ return Sub;
+}
+
+SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ EVT SVT = N0.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
+ SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
+ "Custom UINT_TO_FP is not supported!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ SVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getValueType().isVector())
+ return lowerUINT_TO_FP_vec(Op, DAG);
+
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+ EVT SrcVT = N0.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i64(Op, DAG);
+ if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i32(Op, DAG);
+ if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
+ return SDValue();
+
+ // Make a 64-bit buffer, and use it to build an FILD.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+}
+
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
+ DebugLoc DL = Op.getDebugLoc();
+
+ EVT DstTy = Op.getValueType();
+
+ if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
+ assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+ DstTy = MVT::i64;
+ }
+
+ assert(DstTy.getSimpleVT() <= MVT::i64 &&
+ DstTy.getSimpleVT() >= MVT::i16 &&
+ "Unknown FP_TO_INT to lower!");
+
+ // These are really Legal.
+ if (DstTy == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+ if (Subtarget->is64Bit() &&
+ DstTy == MVT::i64 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+
+ // We lower FP->int64 either into FISTP64 followed by a load from a temporary
+ // stack slot, or into the FTOL runtime function.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = DstTy.getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ unsigned Opc;
+ if (!IsSigned && isIntegerTypeFTOL(DstTy))
+ Opc = X86ISD::WIN_FTOL;
+ else
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Value = Op.getOperand(0);
+ EVT TheVT = Op.getOperand(0).getValueType();
+ // FIXME This causes a redundant load/store if the SSE-class value is already
+ // in memory, such as if it is on the callstack.
+ if (isScalarFPTypeInSSEReg(TheVT)) {
+ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ Chain, StackSlot, DAG.getValueType(TheVT)
+ };
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
+ if (Opc != X86ISD::WIN_FTOL) {
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
+ return std::make_pair(FIST, StackSlot);
+ } else {
+ SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, Value);
+ SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
+ MVT::i32, ftol.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
+ MVT::i32, eax.getValue(2));
+ SDValue Ops[] = { eax, edx };
+ SDValue pair = IsReplace
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
+ : DAG.getMergeValues(Ops, 2, DL);
+ return std::make_pair(pair, SDValue());
+ }
+}
+
+static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ DebugLoc dl = Op->getDebugLoc();
+
+ // Optimize vectors in AVX mode:
+ //
+ // v8i16 -> v8i32
+ // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
+ // Concat upper and lower parts.
+ //
+ // v4i32 -> v4i64
+ // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
+ // Concat upper and lower parts.
+ //
+
+ if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
+
+ SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ SDValue Undef = DAG.getUNDEF(InVT);
+ bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+ SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+ SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+
+ MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
+SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
+
+ return SDValue();
+}
+SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getValueType().getSimpleVT();
+
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
+
+ if (!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements())
+ return SDValue();
+
+ assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
+
+ // AVX2 has better support of integer extending.
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
+ static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
+ SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
+ DAG.getVectorShuffle(MVT::v8i16, DL, In,
+ DAG.getUNDEF(MVT::v8i16),
+ &Mask[0]));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+}
+
+SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getValueType().getSimpleVT();
+
+ if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ // On AVX2, v4i64 -> v4i32 becomes VPERMD.
+ if (Subtarget->hasInt256()) {
+ static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+ In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
+ ShufMask);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(2));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The PSHUFD mask:
+ static const int ShufMask1[] = {0, 2, 0, 0};
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
+
+ // The MOVLHPS mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
+ }
+
+ if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+ if (Subtarget->hasInt256()) {
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+
+ SmallVector<SDValue,32> pshufbMask;
+ for (unsigned i = 0; i < 2; ++i) {
+ pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+ for (unsigned j = 0; j < 8; ++j)
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
+ &pshufbMask[0], 32);
+ In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+
+ static const int ShufMask[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
+ &ShufMask[0]);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, In);
+ }
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(4));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+
+ // The PSHUFB mask:
+ static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The MOVLHPS Mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+ }
+
+ // Handle truncation of V256 to V128 using shuffles.
+ if (!VT.is128BitVector() || !SVT.is256BitVector())
+ return SDValue();
+
+ assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
+ "Invalid op");
+ assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ NumElems * 2);
+
+ SmallVector<int, 16> MaskVec(NumElems * 2, -1);
+ // Prepare truncation shuffle mask
+ for (unsigned i = 0; i != NumElems; ++i)
+ MaskVec[i] = i * 2;
+ SDValue V = DAG.getVectorShuffle(NVT, DL,
+ DAG.getNode(ISD::BITCAST, DL, NVT, In),
+ DAG.getUNDEF(NVT), &MaskVec[0]);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
+ DAG.getIntPtrConstant(0));
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op.getValueType().getSimpleVT();
+ if (VT.isVector()) {
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT,
+ DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
+ MVT::v8i32, Op.getOperand(0)));
+ return SDValue();
+ }
+
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+ /*IsSigned=*/ true, /*IsReplace=*/ false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (FIST.getNode() == 0) return Op;
+
+ if (StackSlot.getNode())
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // The node is the result.
+ return FIST;
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+ /*IsSigned=*/ false, /*IsReplace=*/ false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ assert(FIST.getNode() && "Unexpected failure");
+
+ if (StackSlot.getNode())
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // The node is the result.
+ return FIST;
+}
+
+static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getValueType().getSimpleVT();
+
+ assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+ In, DAG.getUNDEF(SVT)));
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
+ EltVT = VT.getVectorElementType();
+ NumElts = VT.getVectorNumElements();
+ }
+ Constant *C;
+ if (EltVT == MVT::f64)
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, ~(1ULL << 63))));
+ else
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, ~(1U << 31))));
+ C = ConstantVector::getSplat(NumElts, C);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ if (VT.isVector()) {
+ MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::AND, dl, ANDVT,
+ DAG.getNode(ISD::BITCAST, dl, ANDVT,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
+ }
+ return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
+ EltVT = VT.getVectorElementType();
+ NumElts = VT.getVectorNumElements();
+ }
+ Constant *C;
+ if (EltVT == MVT::f64)
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 1ULL << 63)));
+ else
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, 1U << 31)));
+ C = ConstantVector::getSplat(NumElts, C);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ if (VT.isVector()) {
+ MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::XOR, dl, XORVT,
+ DAG.getNode(ISD::BITCAST, dl, XORVT,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
+ }
+
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT SrcVT = Op1.getValueType().getSimpleVT();
+
+ // If second operand is smaller, extend it first.
+ if (SrcVT.bitsLT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+ SrcVT = VT;
+ }
+ // And if it is bigger, shrink it first.
+ if (SrcVT.bitsGT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
+
+ // First get the sign bit of second operand.
+ SmallVector<Constant*,4> CV;
+ if (SrcVT == MVT::f64) {
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
+ } else {
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+ SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (SrcVT.bitsGT(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
+ } else {
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ }
+ C = ConstantVector::get(CV);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+ SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+ SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
+// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
+//
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
+
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ if (!Op->hasOneUse())
+ return SDValue();
+
+ SDNode *N = Op.getNode();
+ DebugLoc DL = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Opnds;
+ DenseMap<SDValue, unsigned> VecInMap;
+ EVT VT = MVT::Other;
+
+ // Recognize a special case where a vector is casted into wide integer to
+ // test all 0s.
+ Opnds.push_back(N->getOperand(0));
+ Opnds.push_back(N->getOperand(1));
+
+ for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
+ SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
+ // BFS traverse all OR'd operands.
+ if (I->getOpcode() == ISD::OR) {
+ Opnds.push_back(I->getOperand(0));
+ Opnds.push_back(I->getOperand(1));
+ // Re-evaluate the number of nodes to be traversed.
+ e += 2; // 2 more nodes (LHS and RHS) are pushed.
+ continue;
+ }
+
+ // Quit if a non-EXTRACT_VECTOR_ELT
+ if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Quit if without a constant index.
+ SDValue Idx = I->getOperand(1);
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ SDValue ExtractedFromVec = I->getOperand(0);
+ DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
+ if (M == VecInMap.end()) {
+ VT = ExtractedFromVec.getValueType();
+ // Quit if not 128/256-bit vector.
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+ // Quit if not the same type.
+ if (VecInMap.begin() != VecInMap.end() &&
+ VT != VecInMap.begin()->first.getValueType())
+ return SDValue();
+ M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
+ }
+ M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
+ }
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Not extracted from 128-/256-bit vector.");
+
+ unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
+ SmallVector<SDValue, 8> VecIns;
+
+ for (DenseMap<SDValue, unsigned>::const_iterator
+ I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
+ // Quit if not all elements are used.
+ if (I->second != FullMask)
+ return SDValue();
+ VecIns.push_back(I->first);
+ }
+
+ EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+
+ // Cast all vectors into TestVT for PTEST.
+ for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
+ VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
+
+ // If more than one full vectors are evaluated, OR them first before PTEST.
+ for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
+ // Each iteration will OR 2 nodes and append the result until there is only
+ // 1 node left, i.e. the final OR'd value of all vectors.
+ SDValue LHS = VecIns[Slot];
+ SDValue RHS = VecIns[Slot + 1];
+ VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
+ }
+
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
+ VecIns.back(), VecIns.back());
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CF and OF aren't always set the way we want. Determine which
+ // of these we need.
+ bool NeedCF = false;
+ bool NeedOF = false;
+ switch (X86CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ NeedOF = true;
+ break;
+ }
+
+ // See if we can use the EFLAGS value from the operand instead of
+ // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+ // we prove that the arithmetic won't overflow, we can't use OF or CF.
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+
+ // Truncate operations may prevent the merge of the SETCC instruction
+ // and the arithmetic intruction before it. Attempt to truncate the operands
+ // of the arithmetic instruction and use a reduced bit-width instruction.
+ bool NeedTruncation = false;
+ SDValue ArithOp = Op;
+ if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+ SDValue Arith = Op->getOperand(0);
+ // Both the trunc and the arithmetic op need to have one user each.
+ if (Arith->hasOneUse())
+ switch (Arith.getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ NeedTruncation = true;
+ ArithOp = Arith;
+ }
+ }
+ }
+
+ // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+ // which may be the result of a CAST. We use the variable 'Op', which is the
+ // non-casted variable when we check for possible users.
+ switch (ArithOp.getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg &&
+ UI->getOpcode() != ISD::SETCC &&
+ UI->getOpcode() != ISD::STORE)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
+ }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
+ NonFlagUse = true;
+ break;
+ }
+ }
+
+ if (!NonFlagUse)
+ break;
+ }
+ // FALL THROUGH
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (ArithOp.getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
+ case ISD::OR: {
+ if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+ SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG);
+ if (EFLAGS.getNode())
+ return EFLAGS;
+ }
+ Opcode = X86ISD::OR;
+ break;
+ }
+ }
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
+ }
+
+ // If we found that truncation is beneficial, perform the truncation and
+ // update 'Op'.
+ if (NeedTruncation) {
+ EVT VT = Op.getValueType();
+ SDValue WideVal = Op->getOperand(0);
+ EVT WideVT = WideVal.getValueType();
+ unsigned ConvertedOp = 0;
+ // Use a target machine opcode to prevent further DAGCombine
+ // optimizations that may separate the arithmetic operations
+ // from the setcc node.
+ switch (WideVal.getOpcode()) {
+ default: break;
+ case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+ case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+ case ISD::AND: ConvertedOp = X86ISD::AND; break;
+ case ISD::OR: ConvertedOp = X86ISD::OR; break;
+ case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+ }
+
+ if (ConvertedOp) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+ SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+ SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+ Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+ }
+ }
+ }
+
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ if (C->getAPIntValue() == 0)
+ return EmitTest(Op0, X86CC, DAG);
+
+ DebugLoc dl = Op0.getDebugLoc();
+ if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+ Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+ Op0, Op1);
+ return SDValue(Sub.getNode(), 1);
+ }
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+/// Convert a comparison if required by the subtarget.
+SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
+ SelectionDAG &DAG) const {
+ // If the subtarget does not support the FUCOMI instruction, floating-point
+ // comparisons have to be converted.
+ if (Subtarget->hasCMov() ||
+ Cmp.getOpcode() != X86ISD::CMP ||
+ !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
+ !Cmp.getOperand(1).getValueType().isFloatingPoint())
+ return Cmp;
+
+ // The instruction selector will select an FUCOM instruction instead of
+ // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
+ // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
+ // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
+ DebugLoc dl = Cmp.getDebugLoc();
+ SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
+ SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
+ SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
+ DAG.getConstant(8, MVT::i8));
+ SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+ return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ SDValue Op0 = And.getOperand(0);
+ SDValue Op1 = And.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ SDValue LHS, RHS;
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+ if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
+ LHS = Op1;
+ RHS = Op0.getOperand(1);
+ }
+ } else if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ uint64_t AndRHSVal = AndRHS->getZExtValue();
+ SDValue AndLHS = Op0;
+
+ if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+
+ // Use BT if the immediate can't be encoded in a TEST instruction.
+ if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
+ LHS = AndLHS;
+ RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
+ // the condition code later.
+ bool Invert = false;
+ if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
+ Invert = true;
+ LHS = LHS.getOperand(0);
+ }
+
+ // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (LHS.getValueType() == MVT::i8 ||
+ LHS.getValueType() == MVT::i16)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ // Flip the condition if the LHS was a not instruction
+ if (Invert)
+ Cond = X86::GetOppositeBranchCondition(Cond);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+
+ return SDValue();
+}
+
+// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
+// ones, and then concatenate the result back.
+static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
+ "Unsupported value type for operation");
+
+ unsigned NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC = Op.getOperand(2);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
+
+ // Issue the operation on the smaller types and concatenate the result back
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
+}
+
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDValue Cond;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getValueType().getSimpleVT();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (isFP) {
+#ifndef NDEBUG
+ MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+
+ unsigned SSECC;
+ bool Swap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true; // Fallthrough
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true; // Fallthrough
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // In the two special cases we can't handle, emit two comparisons.
+ if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
+ if (SetCCOpcode == ISD::SETUEQ) {
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
+ }
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
+ }
+ // Handle all other FP comparisons here.
+ return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(SSECC, MVT::i8));
+ }
+
+ // Break 256-bit integer vector compare into smaller ones.
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
+ return Lower256IntVSETCC(Op, DAG);
+
+ // We are handling one of the integer comparisons here. Since SSE only has
+ // GT and EQ comparisons for integer, swapping operands and multiple
+ // operations may be required for some comparisons.
+ unsigned Opc;
+ bool Swap = false, Invert = false, FlipSigns = false;
+
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
+ case ISD::SETGE: Swap = true;
+ case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
+ case ISD::SETUGE: Swap = true;
+ case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Check that the operation in question is available (most are plain SSE2,
+ // but PCMPGTQ and PCMPEQQ have different requirements).
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+ // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+ // pcmpeqd + pshufd + pand.
+ assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+ // First cast everything to the right type,
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Do the compare.
+ SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+ // Make sure the lower and upper halves are both all-ones.
+ const int Mask[] = { 1, 0, 3, 2 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
+ Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
+ SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+ // If the logical-not of the result is required, perform that now.
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+ unsigned Opc = Op.getNode()->getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
+ Opc == X86ISD::SAHF)
+ return true;
+ if (Op.getResNo() == 1 &&
+ (Opc == X86ISD::ADD ||
+ Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
+ Opc == X86ISD::SMUL ||
+ Opc == X86ISD::UMUL ||
+ Opc == X86ISD::INC ||
+ Opc == X86ISD::DEC ||
+ Opc == X86ISD::OR ||
+ Opc == X86ISD::XOR ||
+ Opc == X86ISD::AND))
+ return true;
+
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
+ return false;
+}
+
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+ if (V.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue VOp0 = V.getOperand(0);
+ unsigned InBits = VOp0.getValueSizeInBits();
+ unsigned Bits = V.getValueSizeInBits();
+ return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Cond = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+ if (Cond.getOpcode() == X86ISD::SETCC &&
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
+ SDValue Cmp = Cond.getOperand(1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ // Apply further optimizations for special cases
+ // (select (x != 0), -1, 0) -> neg & sbb
+ // (select (x == 0), 0, -1) -> neg & sbb
+ if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
+ if (YC->isNullValue() &&
+ (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
+ SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
+ DAG.getConstant(0, CmpOp0.getValueType()),
+ CmpOp0);
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ return Res;
+ }
+
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
+ }
+ }
+
+ // Look past (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ unsigned CondOpcode = Cond.getOpcode();
+ if (CondOpcode == X86ISD::SETCC ||
+ CondOpcode == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ bool IllegalFPCMov = false;
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+ CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+ ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+ Cond.getOperand(0).getValueType() != MVT::i8)) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ unsigned X86Opcode;
+ unsigned X86Cond;
+ SDVTList VTs;
+ switch (CondOpcode) {
+ case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+ case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+ case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+ case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+ case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+ case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+ default: llvm_unreachable("unexpected overflowing operator");
+ }
+ if (CondOpcode == ISD::UMULO)
+ VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+ MVT::i32);
+ else
+ VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+ SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
+
+ if (CondOpcode == ISD::UMULO)
+ Cond = X86Op.getValue(2);
+ else
+ Cond = X86Op.getValue(1);
+
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ addTest = false;
+ }
+
+ if (addTest) {
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::SUB) {
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
+ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+ // widen the cmov and push the truncate through. This avoids introducing a new
+ // branch during isel and doesn't add any extensions.
+ if (Op.getValueType() == MVT::i8 &&
+ Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+ if (T1.getValueType() == T2.getValueType() &&
+ // Blacklist CopyFromReg to avoid partial register stalls.
+ T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+ SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+ }
+ }
+
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SDValue Ops[] = { Op2, Op1, CC, Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+}
+
+SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ DebugLoc dl = Op->getDebugLoc();
+
+ if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
+ (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
+ unsigned NumElems = InVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(InVT);
+
+ SmallVector<int,8> ShufMask1(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask1[i] = i;
+
+ SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
+
+ SmallVector<int,8> ShufMask2(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask2[i] = i + NumElems/2;
+
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
+
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+ if (Op.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (N1C && N1C->getAPIntValue() == 1) {
+ return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse();
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+ bool Inverted = false;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ // Check for setcc([su]{add,sub,mul}o == 0).
+ if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
+ isa<ConstantSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+ Cond.getOperand(0).getResNo() == 1 &&
+ (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
+ Cond.getOperand(0).getOpcode() == ISD::UADDO ||
+ Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
+ Cond.getOperand(0).getOpcode() == ISD::USUBO ||
+ Cond.getOperand(0).getOpcode() == ISD::SMULO ||
+ Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
+ Inverted = true;
+ Cond = Cond.getOperand(0);
+ } else {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+ }
+#if 0
+ // FIXME: LowerXALUO doesn't handle these!!
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
+#endif
+
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ unsigned CondOpcode = Cond.getOpcode();
+ if (CondOpcode == X86ISD::SETCC ||
+ CondOpcode == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+ Cond = Cmp;
+ addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_B:
+ // These can only come from an arithmetic instruction with overflow,
+ // e.g. SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ }
+ CondOpcode = Cond.getOpcode();
+ if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+ CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+ ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+ Cond.getOperand(0).getValueType() != MVT::i8)) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ unsigned X86Opcode;
+ unsigned X86Cond;
+ SDVTList VTs;
+ switch (CondOpcode) {
+ case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+ case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+ case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+ case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+ case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+ case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+ default: llvm_unreachable("unexpected overflowing operator");
+ }
+ if (Inverted)
+ X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
+ if (CondOpcode == ISD::UMULO)
+ VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+ MVT::i32);
+ else
+ VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+ SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+
+ if (CondOpcode == ISD::UMULO)
+ Cond = X86Op.getValue(2);
+ else
+ Cond = X86Op.getValue(1);
+
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ addTest = false;
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
+ } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+ // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+ // It should be transformed during dag combiner except when the condition
+ // is set by a arithmetics with overflow node.
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cond.getOperand(0).getOperand(1);
+ addTest = false;
+ } else if (Cond.getOpcode() == ISD::SETCC &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
+ // For FCMP_OEQ, we can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Op.getNode()->hasOneUse()) {
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+ Dest = FalseBB;
+
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_P, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ } else if (Cond.getOpcode() == ISD::SETCC &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
+ // For FCMP_UNE, we can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Op.getNode()->hasOneUse()) {
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_UNE.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_NP, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ Dest = FalseBB;
+ }
+ }
+ }
+ }
+
+ if (addTest) {
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
+ return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cond);
+}
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
+ getTargetMachine().Options.EnableSegmentedStacks) &&
+ "This should be used only on Windows targets or when segmented stacks "
+ "are being used");
+ assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ bool Is64Bit = Subtarget->is64Bit();
+ EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+
+ if (getTargetMachine().Options.EnableSegmentedStacks) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (Is64Bit) {
+ // The 64 bit implementation of segmented stacks needs to clobber both r10
+ // r11. This makes it impossible to use it along with nested parameters.
+ const Function *F = MF.getFunction();
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (I->hasNestAttr())
+ report_fatal_error("Cannot use segmented stacks with functions that "
+ "have nested arguments.");
+ }
+
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ SDValue Ops1[2] = { Value, Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ } else {
+ SDValue Flag;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+ Flag = Chain.getValue(1);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ SPTy).getValue(1);
+
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ }
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDValue, 8> MemOps;
+ SDValue FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(8));
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!getTargetMachine().Options.UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->getAttributes()
+ .hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) &&
+ Subtarget->hasSSE1());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+// getTargetVShiftNode - Handle vector element shifts where the shift amount
+// may or may not be a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue SrcOp, SDValue ShAmt,
+ SelectionDAG &DAG) {
+ assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+
+ if (isa<ConstantSDNode>(ShAmt)) {
+ // Constant may be a TargetConstant. Use a regular constant.
+ uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ return DAG.getNode(Opc, dl, VT, SrcOp,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ // Change opcode to non-immediate version
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
+ case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
+ case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
+ }
+
+ // Need to build a vector containing shift amount
+ // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+
+ // The return type has to be a 128-bit type with the same element
+ // type as the input type.
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+}
+
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc;
+ ISD::CondCode CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+ SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // Arithmetic intrinsics.
+ case Intrinsic::x86_sse2_pmulu_dq:
+ case Intrinsic::x86_avx2_pmulu_dq:
+ return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ // SSE2/AVX2 sub with unsigned saturation intrinsics
+ case Intrinsic::x86_sse2_psubus_b:
+ case Intrinsic::x86_sse2_psubus_w:
+ case Intrinsic::x86_avx2_psubus_b:
+ case Intrinsic::x86_avx2_psubus_w:
+ return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE2/SSE41/AVX2 integer max/min intrinsics.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ Opcode = X86ISD::UMAX;
+ break;
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ Opcode = X86ISD::UMIN;
+ break;
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ Opcode = X86ISD::SMAX;
+ break;
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d:
+ Opcode = X86ISD::SMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/SSE2/AVX floating point max/min intrinsics.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ Opcode = X86ISD::FMAX;
+ break;
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256:
+ Opcode = X86ISD::FMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_ssse3_psign_b_128:
+ case Intrinsic::x86_ssse3_psign_w_128:
+ case Intrinsic::x86_ssse3_psign_d_128:
+ case Intrinsic::x86_avx2_psign_b:
+ case Intrinsic::x86_avx2_psign_w:
+ case Intrinsic::x86_avx2_psign_d:
+ return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse41_insertps:
+ return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ // Operands intentionally swapped. Mask is last operand to intrinsic,
+ // but second operand for node/intruction.
+ return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(1));
+
+ case Intrinsic::x86_sse_sqrt_ps:
+ case Intrinsic::x86_sse2_sqrt_pd:
+ case Intrinsic::x86_avx_sqrt_ps_256:
+ case Intrinsic::x86_avx_sqrt_pd_256:
+ return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
+ unsigned X86CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
+ // ZF = 1
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
+ // CF = 1
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ // ZF and CF = 0
+ X86CC = X86::COND_A;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // SSE/AVX shift intrinsics
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
+ break;
+ }
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ }
+
+ case Intrinsic::x86_sse42_pcmpistria128:
+ case Intrinsic::x86_sse42_pcmpestria128:
+ case Intrinsic::x86_sse42_pcmpistric128:
+ case Intrinsic::x86_sse42_pcmpestric128:
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ case Intrinsic::x86_sse42_pcmpistris128:
+ case Intrinsic::x86_sse42_pcmpestris128:
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ case Intrinsic::x86_sse42_pcmpestriz128: {
+ unsigned Opcode;
+ unsigned X86CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse42_pcmpistria128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpestria128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpistric128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpestric128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpistris128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpestris128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_sse42_pcmpestriz128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_E;
+ break;
+ }
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8),
+ SDValue(PCMP.getNode(), 1));
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ case Intrinsic::x86_sse42_pcmpistri128:
+ case Intrinsic::x86_sse42_pcmpestri128: {
+ unsigned Opcode;
+ if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
+ Opcode = X86ISD::PCMPISTRI;
+ else
+ Opcode = X86ISD::PCMPESTRI;
+
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ }
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ Opc = X86ISD::FMADD;
+ break;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ Opc = X86ISD::FMSUB;
+ break;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ Opc = X86ISD::FNMADD;
+ break;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ Opc = X86ISD::FNMSUB;
+ break;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ Opc = X86ISD::FMADDSUB;
+ break;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ Opc = X86ISD::FMSUBADD;
+ break;
+ }
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
+ }
+}
+
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+
+ // RDRAND/RDSEED intrinsics.
+ case Intrinsic::x86_rdrand_16:
+ case Intrinsic::x86_rdrand_32:
+ case Intrinsic::x86_rdrand_64:
+ case Intrinsic::x86_rdseed_16:
+ case Intrinsic::x86_rdseed_32:
+ case Intrinsic::x86_rdseed_64: {
+ unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
+ IntNo == Intrinsic::x86_rdseed_32 ||
+ IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
+ X86ISD::RDRAND;
+ // Emit the node with the right value type.
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
+ SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
+
+ // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
+ // Otherwise return the value from Rand, which is always 0, casted to i32.
+ SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
+ DAG.getConstant(1, Op->getValueType(1)),
+ DAG.getConstant(X86::COND_B, MVT::i32),
+ SDValue(Result.getNode(), 1) };
+ SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
+ DAG.getVTList(Op->getValueType(1), MVT::Glue),
+ Ops, 4);
+
+ // Return { result, isValid, chain }.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
+ SDValue(Result.getNode(), 2));
+ }
+
+ // XTEST intrinsics.
+ case Intrinsic::x86_xtest: {
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NE, MVT::i8),
+ InTrans);
+ SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+ Ret, SDValue(InTrans.getNode(), 1));
+ }
+ }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+ SelectionDAG &DAG) const {
+ return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
+ unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(RegInfo->getSlotSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+
+ return DAG.getNode(X86ISD::EH_RETURN, dl,
+ MVT::Other,
+ Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
+ return Op.getOperand(0);
+}
+
+SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+
+ if (Subtarget->is64Bit()) {
+ SDValue OutChains[6];
+
+ // Large code-model.
+ const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
+ const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
+
+ const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
+ const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDValue Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
+ false, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
+ false, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
+ } else {
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ CallingConv::ID CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::X86_StdCall: {
+ // Pass 'nest' parameter in ECX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::ECX;
+
+ // Check that ECX wasn't needed by an 'inreg' parameter.
+ FunctionType *FTy = Func->getFunctionType();
+ const AttributeSet &Attrs = Func->getAttributes();
+
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
+ unsigned InRegCount = 0;
+ unsigned Idx = 1;
+
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I, ++Idx)
+ if (Attrs.hasAttribute(Idx, Attribute::InReg))
+ // FIXME: should only count parameters that are lowered to integers.
+ InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+ if (InRegCount > 2) {
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
+ }
+ }
+ break;
+ }
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::Fast:
+ // Pass 'nest' parameter in EAX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::EAX;
+ break;
+ }
+
+ SDValue OutChains[4];
+ SDValue Addr, Disp;
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(10, MVT::i32));
+ Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+ // This is storing the opcode for MOV32ri.
+ const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
+ const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
+ OutChains[0] = DAG.getStore(Root, dl,
+ DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(1, MVT::i32));
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
+ false, false, 1);
+
+ const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(5, MVT::i32));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(6, MVT::i32));
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
+ false, false, 1);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
+ }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
+
+ // Load FP Control Word from stack slot
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+}
+
+static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits+NumBits-1, OpVT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse).
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // And xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = Op.getOperand(0);
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits, VT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops));
+}
+
+// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
+// ones, and then concatenate the result back.
+static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.is256BitVector() && VT.isInteger() &&
+ "Unsupported value type for operation");
+
+ unsigned NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
+}
+
+static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType().is256BitVector() &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType().is256BitVector() &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
+ return Lower256IntArith(Op, DAG);
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
+ if (VT == MVT::v4i32) {
+ assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+ "Should not custom lower when pmuldq is available!");
+
+ // Extract the odd parts.
+ const int UnpackMask[] = { 1, -1, 3, -1 };
+ SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
+ SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
+
+ // Multiply the even parts.
+ SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
+ // Now multiply odd parts.
+ SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
+
+ Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
+ Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+
+ // Merge the two vectors back together with a shuffle. This expands into 2
+ // shuffles.
+ const int ShufMask[] = { 0, 4, 2, 6 };
+ return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
+ }
+
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ "Only know how to lower V2I64/V4I64 multiply");
+
+ // Ahi = psrlqi(a, 32);
+ // Bhi = psrlqi(b, 32);
+ //
+ // AloBlo = pmuludq(a, b);
+ // AloBhi = pmuludq(a, Bhi);
+ // AhiBlo = pmuludq(Ahi, b);
+
+ // AloBhi = psllqi(AloBhi, 32);
+ // AhiBlo = psllqi(AhiBlo, 32);
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue ShAmt = DAG.getConstant(32, MVT::i32);
+
+ SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
+ SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+
+ // Bit cast to 32-bit vectors for MULUDQ
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
+ B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
+ Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
+ Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
+
+ SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
+ SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+
+ AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
+ AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+
+ SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+ return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+}
+
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltTy = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Lower sdiv X, pow2-const.
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+ if (!C)
+ return SDValue();
+
+ APInt SplatValue, SplatUndef;
+ unsigned MinSplatBits;
+ bool HasAnyUndefs;
+ if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+ return SDValue();
+
+ if ((SplatValue != 0) &&
+ (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+ unsigned lg2 = SplatValue.countTrailingZeros();
+ // Splat the sign bit.
+ SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+ SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+ SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+ SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+ SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (SplatValue.isNonNegative())
+ return SRA;
+
+ SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+ return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ }
+ return SDValue();
+}
+
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ if (Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+
+ if (VT == MVT::v16i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
+ llvm_unreachable("Unknown shift opcode.");
+ }
+
+ if (Subtarget->hasInt256() && VT == MVT::v32i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
+ llvm_unreachable("Unknown shift opcode.");
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+ uint64_t ShiftAmt = 0;
+ for (unsigned i = 0; i != Ratio; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+ }
+ // Check remaining shift amounts.
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ SDValue BaseShAmt;
+ EVT EltVT = VT.getVectorElementType();
+
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i, j;
+ for (i = 0; i != NumElts; ++i) {
+ if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ break;
+ }
+ for (j = i; j != NumElts; ++j) {
+ SDValue Arg = Amt.getOperand(j);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != Amt.getOperand(i))
+ break;
+ }
+ if (i != NumElts && j == NumElts)
+ BaseShAmt = Amt.getOperand(i);
+ } else {
+ if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Amt = Amt.getOperand(0);
+ if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+ SDValue InVec = Amt.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx =
+ cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ if (BaseShAmt.getNode()) {
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRA:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ std::vector<SDValue> Vals(Ratio);
+ for (unsigned i = 0; i != Ratio; ++i)
+ Vals[i] = Amt.getOperand(i);
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned j = 0; j != Ratio; ++j)
+ if (Vals[j] != Amt.getOperand(i + j))
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ SDValue V;
+
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ V = LowerScalarVariableShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+ if (Subtarget->hasInt256()) {
+ if (Op.getOpcode() == ISD::SRL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return Op;
+ }
+
+ // Lower SHL with variable shift amount.
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+ assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
+
+ // a = a << 5;
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
+ Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
+
+ // Turn 'a' into a mask suitable for VSELECT
+ SDValue VSelM = DAG.getConstant(0x80, VT);
+ SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ SDValue CM1 = DAG.getConstant(0x0f, VT);
+ SDValue CM2 = DAG.getConstant(0x3f, VT);
+
+ // r = VSELECT(r, psllw(r & (char16)15, 4), a);
+ SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(4, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ // r = VSELECT(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(2, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ // return VSELECT(r, r+r, a);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
+ DAG.getNode(ISD::ADD, dl, VT, R, R), R);
+ return R;
+ }
+
+ // Decompose 256-bit shifts into smaller 128-bit shifts.
+ if (VT.is256BitVector()) {
+ unsigned NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ // Extract the two vectors
+ SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
+ SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl);
+
+ // Recreate the shift amount vectors
+ SDValue Amt1, Amt2;
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ // Constant shift amount
+ SmallVector<SDValue, 4> Amt1Csts;
+ SmallVector<SDValue, 4> Amt2Csts;
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ Amt1Csts.push_back(Amt->getOperand(i));
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ Amt2Csts.push_back(Amt->getOperand(i));
+
+ Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt1Csts[0], NumElems/2);
+ Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt2Csts[0], NumElems/2);
+ } else {
+ // Variable shift amount
+ Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
+ Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl);
+ }
+
+ // Issue new vector shifts for the smaller types
+ V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+ V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown ovf instruction!");
+ case ISD::SADDO:
+ // A subtract of one will be selected as a INC. Note that INC doesn't
+ // set CF, so we can't do this for UADDO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ // A subtract of one will be selected as a DEC. Note that DEC doesn't
+ // set CF, so we can't do this for USUBO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
+ }
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
+}
+
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT VT = Op.getValueType();
+
+ if (!Subtarget->hasSSE2() || !VT.isVector())
+ return SDValue();
+
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v8i32:
+ case MVT::v16i16:
+ if (!Subtarget->hasFp256())
+ return SDValue();
+ if (!Subtarget->hasInt256()) {
+ // needs to be split
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ unsigned ExtraNumElems = ExtraVT.getVectorNumElements();
+ ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+ ExtraNumElems/2);
+ SDValue Extra = DAG.getValueType(ExtraVT);
+
+ LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+ LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
+ }
+ // fall through
+ case MVT::v4i32:
+ case MVT::v8i16: {
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ }
+ }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+ // There isn't any reason to disable it if the target processor supports it.
+ if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ if (Subtarget->hasSSE2() || Subtarget->is64Bit())
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ EVT T = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid value type!");
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
+ break;
+ }
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+ Op.getOperand(2), SDValue());
+ SDValue Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
+ SDValue cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BITCAST");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
+
+static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT T = Node->getValueType(0);
+ SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+}
+
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert seq_cst store -> xchg
+ // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
+ // FIXME: On 32-bit, store -> fist or movq would be more efficient
+ // (The only way to get a 16-byte store is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
+ if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ return Swap.getValue(1);
+ }
+ // Other atomic stores have a simple pattern.
+ return Op;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: return LowerShift(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ }
+}
+
+static void ReplaceATOMIC_LOAD(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert wide load -> cmpxchg8b/cmpxchg16b
+ // FIXME: On 32-bit, load -> fild or movq would be more efficient
+ // (The only way to get a 16-byte load is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+}
+
+static void
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ SDValue Ops[] = { Chain, In1, In2L, In2H };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
+ return;
+
+ std::pair<SDValue,SDValue> Vals =
+ FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ EVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ if (StackSlot.getNode() != 0)
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(),
+ false, false, false, 0));
+ else
+ Results.push_back(FIST);
+ }
+ return;
+ }
+ case ISD::UINT_TO_FP: {
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
+ N->getValueType(0) != MVT::v2f32)
+ return;
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
+ Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ return;
+ }
+ case ISD::FP_ROUND: {
+ if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+ return;
+ SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ Results.push_back(V);
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+ rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ EVT T = N->getValueType(0);
+ assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
+ bool Regs64bit = T == MVT::i128;
+ EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(0, HalfT));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(1, HalfT));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ cpInH, cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(0, HalfT));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(1, HalfT));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
+ Regs64bit ? X86::RBX : X86::EBX,
+ swapInL, cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
+ Regs64bit ? X86::RCX : X86::ECX,
+ swapInH, swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
+ X86ISD::LCMPXCHG8_DAG;
+ SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
+ Ops, 3, T, MMO);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ HalfT, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ HalfT, cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MAX:
+ Opc = X86ISD::ATOMMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ Opc = X86ISD::ATOMMIN64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ Opc = X86ISD::ATOMUMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ Opc = X86ISD::ATOMUMIN64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
+ return;
+ }
+ case ISD::ATOMIC_LOAD:
+ ReplaceATOMIC_LOAD(N, Results, DAG);
+ }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
+ case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
+ case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::ANDNP: return "X86ISD::ANDNP";
+ case X86ISD::PSIGN: return "X86ISD::PSIGN";
+ case X86ISD::BLENDV: return "X86ISD::BLENDV";
+ case X86ISD::BLENDI: return "X86ISD::BLENDI";
+ case X86ISD::SUBUS: return "X86ISD::SUBUS";
+ case X86ISD::HADD: return "X86ISD::HADD";
+ case X86ISD::HSUB: return "X86ISD::HSUB";
+ case X86ISD::FHADD: return "X86ISD::FHADD";
+ case X86ISD::FHSUB: return "X86ISD::FHSUB";
+ case X86ISD::UMAX: return "X86ISD::UMAX";
+ case X86ISD::UMIN: return "X86ISD::UMIN";
+ case X86ISD::SMAX: return "X86ISD::SMAX";
+ case X86ISD::SMIN: return "X86ISD::SMIN";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
+ case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
+ case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
+ case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VZEXT: return "X86ISD::VZEXT";
+ case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
+ case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
+ case X86ISD::VSRA: return "X86ISD::VSRA";
+ case X86ISD::VSHLI: return "X86ISD::VSHLI";
+ case X86ISD::VSRLI: return "X86ISD::VSRLI";
+ case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::CMPP: return "X86ISD::CMPP";
+ case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
+ case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::INC: return "X86ISD::INC";
+ case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::OR: return "X86ISD::OR";
+ case X86ISD::XOR: return "X86ISD::XOR";
+ case X86ISD::AND: return "X86ISD::AND";
+ case X86ISD::BLSI: return "X86ISD::BLSI";
+ case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
+ case X86ISD::BLSR: return "X86ISD::BLSR";
+ case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::SHUFP: return "X86ISD::SHUFP";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
+ case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
+ case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
+ case X86ISD::VPERMV: return "X86ISD::VPERMV";
+ case X86ISD::VPERMI: return "X86ISD::VPERMI";
+ case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
+ case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
+ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
+ case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
+ case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
+ case X86ISD::SAHF: return "X86ISD::SAHF";
+ case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::FMADD: return "X86ISD::FMADD";
+ case X86ISD::FMSUB: return "X86ISD::FMSUB";
+ case X86ISD::FNMADD: return "X86ISD::FNMADD";
+ case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
+ case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
+ case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
+ case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
+ case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
+ case X86ISD::XTEST: return "X86ISD::XTEST";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+ return false;
+
+ if (AM.BaseGV) {
+ unsigned GVFlags =
+ Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+ // If a reference to this global requires an extra load, we can't fold it.
+ if (isGlobalStubReference(GVFlags))
+ return false;
+
+ // If BaseGV requires a register for the PIC base, we cannot also have a
+ // BaseReg specified.
+ if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
+ return false;
+
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ return false;
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 > NumBits2;
+}
+
+bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<32>(Imm);
+}
+
+bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // Can also use sub to handle negated immediates.
+ return isInt<32>(Imm);
+}
+
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 > NumBits2;
+}
+
+bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ EVT VT1 = Val.getValueType();
+ if (isZExtFree(VT1, VT2))
+ return true;
+
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // X86 has 8, 16, and 32-bit zero-extending loads.
+ return true;
+ }
+
+ return false;
+}
+
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return false;
+
+ // FIXME: pshufb, blends, shifts.
+ return (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isMOVLMask(M, VT) ||
+ isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
+ isPSHUFDMask(M, VT) ||
+ isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
+ isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
+ isPALIGNRMask(M, VT, Subtarget) ||
+ isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+ // FIXME: This collection of masks seems suspect.
+ if (NumElts == 2)
+ return true;
+ if (NumElts == 4 && VT.is128BitVector()) {
+ return (isMOVLMask(Mask, VT) ||
+ isCommutedMOVLMask(Mask, VT, true) ||
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+/// Utility function to emit xbegin specifying the start of an RTM region.
+static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = MI->getDebugLoc();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // For the v = xbegin(), we generate
+ //
+ // thisMBB:
+ // xbegin sinkMBB
+ //
+ // mainMBB:
+ // eax = -1
+ //
+ // sinkMBB:
+ // v = eax
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineFunction *MF = MBB->getParent();
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // xbegin sinkMBB
+ // # fallthrough to mainMBB
+ // # abortion to sinkMBB
+ BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(sinkMBB);
+
+ // mainMBB:
+ // EAX = -1
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // EAX is live into the sinkMBB
+ sinkMBB->addLiveIn(X86::EAX);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::EAX);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+// Get CMPXCHG opcode for the specified data type.
+static unsigned getCmpXChgOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::LCMPXCHG8;
+ case MVT::i16: return X86::LCMPXCHG16;
+ case MVT::i32: return X86::LCMPXCHG32;
+ case MVT::i64: return X86::LCMPXCHG64;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get LOAD opcode for the specified data type.
+static unsigned getLoadOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::MOV8rm;
+ case MVT::i16: return X86::MOV16rm;
+ case MVT::i32: return X86::MOV32rm;
+ case MVT::i64: return X86::MOV64rm;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction.
+static unsigned getNonAtomicOpcode(unsigned Opc) {
+ switch (Opc) {
+ case X86::ATOMAND8: return X86::AND8rr;
+ case X86::ATOMAND16: return X86::AND16rr;
+ case X86::ATOMAND32: return X86::AND32rr;
+ case X86::ATOMAND64: return X86::AND64rr;
+ case X86::ATOMOR8: return X86::OR8rr;
+ case X86::ATOMOR16: return X86::OR16rr;
+ case X86::ATOMOR32: return X86::OR32rr;
+ case X86::ATOMOR64: return X86::OR64rr;
+ case X86::ATOMXOR8: return X86::XOR8rr;
+ case X86::ATOMXOR16: return X86::XOR16rr;
+ case X86::ATOMXOR32: return X86::XOR32rr;
+ case X86::ATOMXOR64: return X86::XOR64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction with
+// extra opcode.
+static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
+ case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
+ case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
+ case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
+ case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
+ case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
+ case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
+ case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
+ case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
+ case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
+ case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
+ case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target.
+static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
+ switch (Opc) {
+ case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
+ case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
+ case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
+ case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
+ case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
+ case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
+ case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
+ case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
+ case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target with extra opcode.
+static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
+ unsigned &HiOpc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND6432:
+ ExtraOpc = X86::NOT32r;
+ HiOpc = X86::AND32rr;
+ return X86::AND32rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get pseudo CMOV opcode from the specified data type.
+static unsigned getPseudoCMOVOpc(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::CMOV_GR8;
+ case MVT::i16: return X86::CMOV_GR16;
+ case MVT::i32: return X86::CMOV_GR32;
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown CMOV opcode!");
+}
+
+// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
+// They will be translated into a spin-loop or compare-exchange loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// t1 = LOAD MI.addr
+// loop:
+// t4 = phi(t1, t3 / loop)
+// t2 = OP MI.val, t4
+// EAX = t4
+// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
+// t3 = EAX
+// JNE loop
+// sink:
+// dst = t3
+// ...
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg, SrcReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ MVT::SimpleValueType VT = *RC->vt_begin();
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned t3 = MRI.createVirtualRegister(RC);
+ unsigned t4 = MRI.createVirtualRegister(RC);
+ unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
+
+ unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
+ unsigned LOADOpc = getLoadOpcode(VT);
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // t1 = LOAD [MI.addr]
+ // mainMBB:
+ // t4 = phi(t1 / thisMBB, t3 / mainMBB)
+ // t1 = OP MI.val, EAX
+ // EAX = t4
+ // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // t3 = EAX
+ // JNE mainMBB
+ // sinkMBB:
+ // dst = t3
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ }
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+
+ // Add a PHI.
+ MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+ case X86::ATOMAND8:
+ case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ case X86::ATOMOR8:
+ case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ case X86::ATOMXOR8:
+ case X86::ATOMXOR16:
+ case X86::ATOMXOR32:
+ case X86::ATOMXOR64: {
+ unsigned ARITHOpc = getNonAtomicOpcode(Opc);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
+ .addReg(t4);
+ break;
+ }
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64: {
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ unsigned NOTOpc;
+ unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
+ .addReg(t4);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
+ break;
+ }
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64: {
+ unsigned CMPOpc;
+ unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
+
+ BuildMI(mainMBB, DL, TII->get(CMPOpc))
+ .addReg(SrcReg)
+ .addReg(t4);
+
+ if (Subtarget->hasCMov()) {
+ if (VT != MVT::i8) {
+ // Native support
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+ .addReg(SrcReg)
+ .addReg(t4);
+ } else {
+ // Promote i8 to i32 to use CMOV32
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC32 =
+ TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
+ unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
+ unsigned AccReg32 = MRI.createVirtualRegister(RC32);
+ unsigned Tmp = MRI.createVirtualRegister(RC32);
+
+ unsigned Undef = MRI.createVirtualRegister(RC32);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
+
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
+ .addReg(Undef)
+ .addReg(SrcReg)
+ .addImm(X86::sub_8bit);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
+ .addReg(Undef)
+ .addReg(t4)
+ .addImm(X86::sub_8bit);
+
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
+ .addReg(SrcReg32)
+ .addReg(AccReg32);
+
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
+ .addReg(Tmp, 0, X86::sub_8bit);
+ }
+ } else {
+ // Use pseudo select and lower them.
+ assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
+ "Invalid atomic-load-op transformation!");
+ unsigned SelOpc = getPseudoCMOVOpc(VT);
+ X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
+ assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
+ .addReg(SrcReg).addReg(t4)
+ .addImm(CC);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
+ Phi->eraseFromParent();
+ }
+ break;
+ }
+ }
+
+ // Copy PhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
+ .addReg(t4);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ MIB.addReg(t2);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Copy PhyReg back to virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
+ .addReg(PhyReg);
+
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(t3);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
+// instructions. They will be translated into a spin-loop or compare-exchange
+// loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// t1L = LOAD [MI.addr + 0]
+// t1H = LOAD [MI.addr + 4]
+// loop:
+// t4L = phi(t1L, t3L / loop)
+// t4H = phi(t1H, t3H / loop)
+// t2L = OP MI.val.lo, t4L
+// t2H = OP MI.val.hi, t4H
+// EAX = t4L
+// EDX = t4H
+// EBX = t2L
+// ECX = t2H
+// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// t3L = EAX
+// t3H = EDX
+// JNE loop
+// sink:
+// dstL = t3L
+// dstH = t3H
+// ...
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op32 to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstLoReg, DstHiReg;
+ unsigned SrcLoReg, SrcHiReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstLoReg = MI->getOperand(CurOp++).getReg();
+ DstHiReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcLoReg = MI->getOperand(CurOp++).getReg();
+ SrcHiReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
+ const TargetRegisterClass *RC8 = &X86::GR8RegClass;
+
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ unsigned t3L = MRI.createVirtualRegister(RC);
+ unsigned t3H = MRI.createVirtualRegister(RC);
+ unsigned t4L = MRI.createVirtualRegister(RC);
+ unsigned t4H = MRI.createVirtualRegister(RC);
+
+ unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
+ unsigned LOADOpc = X86::MOV32rm;
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // t1L = LOAD [MI.addr + 0]
+ // t1H = LOAD [MI.addr + 4]
+ // mainMBB:
+ // t4L = phi(t1L / thisMBB, t3L / mainMBB)
+ // t4H = phi(t1H / thisMBB, t3H / mainMBB)
+ // t2L = OP MI.val.lo, t4L
+ // t2H = OP MI.val.hi, t4H
+ // EBX = t2L
+ // ECX = t2H
+ // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+ // t3L = EAX
+ // t3H = EDX
+ // JNE loop
+ // sinkMBB:
+ // dstL = t3L
+ // dstH = t3H
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // Lo
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ };
+ MachineInstr *LowMI = MIB;
+
+ // Hi
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
+ } else {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ }
+ MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+
+ // Add PHIs.
+ MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
+ case X86::ATOMAND6432:
+ case X86::ATOMOR6432:
+ case X86::ATOMXOR6432:
+ case X86::ATOMADD6432:
+ case X86::ATOMSUB6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
+ .addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
+ .addReg(SrcHiReg);
+ break;
+ }
+ case X86::ATOMNAND6432: {
+ unsigned HiOpc, NOTOpc;
+ unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
+ unsigned TmpL = MRI.createVirtualRegister(RC);
+ unsigned TmpH = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
+ .addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
+ .addReg(t4H);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
+ break;
+ }
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ unsigned cL = MRI.createVirtualRegister(RC8);
+ unsigned cH = MRI.createVirtualRegister(RC8);
+ unsigned cL32 = MRI.createVirtualRegister(RC);
+ unsigned cH32 = MRI.createVirtualRegister(RC);
+ unsigned cc = MRI.createVirtualRegister(RC);
+ // cl := cmp src_lo, lo
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcLoReg).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
+ // ch := cmp src_hi, hi
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcHiReg).addReg(t4H);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
+ // cc := if (src_hi == hi) ? cl : ch;
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
+ .addReg(cH32).addReg(cL32);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
+ .addReg(cH32).addReg(cL32)
+ .addImm(X86::COND_E);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
+ .addReg(SrcLoReg).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
+ .addReg(SrcHiReg).addReg(t4H);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
+ .addReg(SrcLoReg).addReg(t4L)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
+ // 2nd CMOV lowering.
+ mainMBB->addLiveIn(X86::EFLAGS);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
+ .addReg(SrcHiReg).addReg(t4H)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
+ PhiL->eraseFromParent();
+ PhiH->eraseFromParent();
+ }
+ break;
+ }
+ case X86::ATOMSWAP6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
+ break;
+ }
+ }
+
+ // Copy EDX:EAX back from HiReg:LoReg
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
+ // Copy ECX:EBX from t1H:t1L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Copy EDX:EAX back to t3H:t3L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
+
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstLoReg)
+ .addReg(t3L);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstHiReg)
+ .addReg(t3H);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
+static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
+ case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
+ case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
+ case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
+ case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
+ case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
+ case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
+ case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
+ }
+
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+
+ unsigned NumArgs = MI->getNumOperands();
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+ if (MI->hasOneMemOperand())
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ BuildMI(*BB, MI, dl,
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::XMM0);
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+// FIXME: Custom handling because TableGen doesn't support multiple implicit
+// defs in an instruction pattern
+static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
+ case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
+ case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
+ case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
+ case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
+ case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
+ case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
+ case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
+ }
+
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+
+ unsigned NumArgs = MI->getNumOperands(); // remove the results
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+ if (MI->hasOneMemOperand())
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ BuildMI(*BB, MI, dl,
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::ECX);
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
+ const TargetInstrInfo *TII,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = MI->getDebugLoc();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit code to save XMM registers to the stack. The ABI says that the
+ // number of registers to save is given in %al, so it's theoretically
+ // possible to do an indirect jump trick to avoid saving all of them,
+ // however this code takes a simpler approach and just executes all
+ // of the stores if %al is non-zero. It's less code, and it's probably
+ // easier on the hardware branch predictor, and stores aren't all that
+ // expensive anyway.
+
+ // Create the new basic blocks. One block contains all the XMM stores,
+ // and one block is the final destination regardless of whether any
+ // stores were performed.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *F = MBB->getParent();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+ MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, XMMSaveMBB);
+ F->insert(MBBIter, EndMBB);
+
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // The original block will now fall through to the XMM save block.
+ MBB->addSuccessor(XMMSaveMBB);
+ // The XMMSaveMBB will fall through to the end block.
+ XMMSaveMBB->addSuccessor(EndMBB);
+
+ // Now add the instructions.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned CountReg = MI->getOperand(0).getReg();
+ int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+ int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+ if (!Subtarget->isTargetWin64()) {
+ // If %al is 0, branch around the XMM save block.
+ BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+ BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ }
+
+ unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ // In the XMM save block, save all the XMM argument registers.
+ for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+ MachineMemOperand *MMO =
+ F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
+ /*Size=*/16, /*Align=*/16);
+ BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
+ .addFrameIndex(RegSaveFrameIndex)
+ .addImm(/*Scale=*/1)
+ .addReg(/*IndexReg=*/0)
+ .addImm(/*Disp=*/Offset)
+ .addReg(/*Segment=*/0)
+ .addReg(MI->getOperand(i).getReg())
+ .addMemOperand(MMO);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return EndMBB;
+}
+
+// The EFLAGS operand of SelectItr might be missing a kill marker
+// because there were multiple uses of EFLAGS, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
+ MachineBasicBlock* BB,
+ const TargetRegisterInfo* TRI) {
+ // Scan forward through BB for a use/def of EFLAGS.
+ MachineBasicBlock::iterator miI(llvm::next(SelectItr));
+ for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(X86::EFLAGS))
+ return false;
+ if (mi.definesRegister(X86::EFLAGS))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether EFLAGS is live into a
+ // successor.
+ if (miI == BB->end()) {
+ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+ sEnd = BB->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock* succ = *sItr;
+ if (succ->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+ }
+
+ // We found a def, or hit the end of the basic block and EFLAGS wasn't live
+ // out. SelectMI should have a kill flag on EFLAGS.
+ SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
+ return true;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ if (!MI->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
+ copy0MBB->addLiveIn(X86::EFLAGS);
+ sinkMBB->addLiveIn(X86::EFLAGS);
+ }
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ copy0MBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
+ bool Is64Bit) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+ assert(getTargetMachine().Options.EnableSegmentedStacks);
+
+ unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+
+ // BB:
+ // ... [Till the alloca]
+ // If stacklet is not large enough, jump to mallocMBB
+ //
+ // bumpMBB:
+ // Allocate by subtracting from RSP
+ // Jump to continueMBB
+ //
+ // mallocMBB:
+ // Allocate by call to runtime
+ //
+ // continueMBB:
+ // ...
+ // [rest of original BB]
+ //
+
+ MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+
+ unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+ SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
+ sizeVReg = MI->getOperand(1).getReg(),
+ physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+
+ MachineFunction::iterator MBBIter = BB;
+ ++MBBIter;
+
+ MF->insert(MBBIter, bumpMBB);
+ MF->insert(MBBIter, mallocMBB);
+ MF->insert(MBBIter, continueMBB);
+
+ continueMBB->splice(continueMBB->begin(), BB, llvm::next
+ (MachineBasicBlock::iterator(MI)), BB->end());
+ continueMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add code to the main basic block to check if the stack limit has been hit,
+ // and if so, jump to mallocMBB otherwise to bumpMBB.
+ BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+ .addReg(tmpSPVReg).addReg(sizeVReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+ .addReg(SPLimitVReg);
+ BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
+
+ // bumpMBB simply decreases the stack pointer, since we know the current
+ // stacklet has enough space.
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ .addReg(SPLimitVReg);
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ .addReg(SPLimitVReg);
+ BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Calls into a routine in libgcc to allocate more space from the heap.
+ const uint32_t *RegMask =
+ getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::RDI, RegState::Implicit)
+ .addReg(X86::RAX, RegState::ImplicitDefine);
+ } else {
+ BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ .addImm(12);
+ BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
+ }
+
+ if (!Is64Bit)
+ BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ .addImm(16);
+
+ BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ .addReg(Is64Bit ? X86::RAX : X86::EAX);
+ BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Set up the CFG correctly.
+ BB->addSuccessor(bumpMBB);
+ BB->addSuccessor(mallocMBB);
+ mallocMBB->addSuccessor(continueMBB);
+ bumpMBB->addSuccessor(continueMBB);
+
+ // Take care of the PHI nodes.
+ BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(mallocPtrVReg).addMBB(mallocMBB)
+ .addReg(bumpSPPtrVReg).addMBB(bumpMBB);
+
+ // Delete the original pseudo instruction.
+ MI->eraseFromParent();
+
+ // And we're done.
+ return continueMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(!Subtarget->isTargetEnvMacho());
+
+ // The lowering is pretty easy: we're just emitting the call to _alloca. The
+ // non-trivial part is impdef of ESP.
+
+ if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetCygMing()) {
+ // ___chkstk(Mingw64):
+ // Clobbers R10, R11, RAX and EFLAGS.
+ // Updates RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("___chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::RSP, RegState::Implicit)
+ .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // __chkstk(MSVCRT): does not update stack pointer.
+ // Clobbers R10, R11 and EFLAGS.
+ // FIXME: RAX(allocated size) might be reused and not killed.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("__chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ // RAX has the offset to subtracted from RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
+ } else {
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This is pretty easy. We're taking the value that we received from
+ // our load from the relocation, sticking it in either RDI (x86-64)
+ // or EAX and doing an indirect call. The return value will then
+ // be in the normal return register.
+ const X86InstrInfo *TII
+ = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+ assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+ // Get a register mask for the lowered call.
+ // FIXME: The 32-bit calls have non-standard calling conventions. Use a
+ // proper register mask.
+ const uint32_t *RegMask =
+ getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ if (Subtarget->is64Bit()) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
+ MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
+ } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(0)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(TII->getGlobalBaseReg(F))
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg;
+ unsigned MemOpndSlot = 0;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MemOpndSlot = CurOp;
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // buf[LabelOffset] = restoreMBB
+ // SjLjSetup restoreMBB
+ //
+ // mainMBB:
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+ // restoreMBB:
+ // v_restore = 1
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+ MF->push_back(restoreMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ unsigned PtrStoreOpc = 0;
+ unsigned LabelReg = 0;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ Reloc::Model RM = getTargetMachine().getRelocationModel();
+ bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ (RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
+
+ // Prepare IP either in reg or imm.
+ if (!UseImmLabel) {
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ LabelReg = MRI.createVirtualRegister(PtrRC);
+ if (Subtarget->is64Bit()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
+ } else {
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+ .addReg(0);
+ }
+ } else
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+ // Store IP
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
+ else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ if (!UseImmLabel)
+ MIB.addReg(LabelReg);
+ else
+ MIB.addMBB(restoreMBB);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+ .addMBB(restoreMBB);
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(restoreMBB);
+
+ // mainMBB:
+ // EAX = 0
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(restoreMBB);
+
+ // restoreMBB:
+ BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
+ restoreMBB->addSuccessor(sinkMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
+ unsigned SP = RegInfo->getStackRegister();
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+
+ unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+ unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
+
+ // Reload FP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload IP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), LabelOffset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload SP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), SPOffset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Jump
+ BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unexpected instr type to insert");
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ llvm_unreachable("TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ return BB;
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
+ case X86::SEG_ALLOCA_32:
+ return EmitLoweredSegAlloca(MI, BB, false);
+ case X86::SEG_ALLOCA_64:
+ return EmitLoweredSegAlloca(MI, BB, true);
+ case X86::TLSCall_32:
+ case X86::TLSCall_64:
+ return EmitLoweredTLSCall(MI, BB);
+ case X86::CMOV_GR8:
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
+ return EmitLoweredSelect(MI, BB);
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM:
+ case X86::FP80_TO_INT16_IN_MEM:
+ case X86::FP80_TO_INT32_IN_MEM:
+ case X86::FP80_TO_INT64_IN_MEM: {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
+ CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+ case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+ case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isReg()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImm())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImm())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobal()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+ // String/text processing lowering.
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ assert(Subtarget->hasSSE42() &&
+ "Target must have SSE4.2 or AVX features enabled");
+ return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo());
+
+ // String/text processing lowering.
+ case X86::PCMPISTRIREG:
+ case X86::VPCMPISTRIREG:
+ case X86::PCMPISTRIMEM:
+ case X86::VPCMPISTRIMEM:
+ case X86::PCMPESTRIREG:
+ case X86::VPCMPESTRIREG:
+ case X86::PCMPESTRIMEM:
+ case X86::VPCMPESTRIMEM:
+ assert(Subtarget->hasSSE42() &&
+ "Target must have SSE4.2 or AVX features enabled");
+ return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo());
+
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget);
+
+ // xbegin
+ case X86::XBEGIN:
+ return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo());
+
+ // Atomic Lowering.
+ case X86::ATOMAND8:
+ case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ // Fall through
+ case X86::ATOMOR8:
+ case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ // Fall through
+ case X86::ATOMXOR16:
+ case X86::ATOMXOR8:
+ case X86::ATOMXOR32:
+ case X86::ATOMXOR64:
+ // Fall through
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64:
+ // Fall through
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ // Fall through
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ // Fall through
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ // Fall through
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64:
+ return EmitAtomicLoadArith(MI, BB);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ case X86::ATOMOR6432:
+ case X86::ATOMXOR6432:
+ case X86::ATOMNAND6432:
+ case X86::ATOMADD6432:
+ case X86::ATOMSUB6432:
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432:
+ case X86::ATOMSWAP6432:
+ return EmitAtomicLoadArith6432(MI, BB);
+
+ case X86::VASTART_SAVE_XMM_REGS:
+ return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
+
+ case X86::EH_SjLj_SetJmp32:
+ case X86::EH_SjLj_SetJmp64:
+ return emitEHSjLjSetJmp(MI, BB);
+
+ case X86::EH_SjLj_LongJmp32:
+ case X86::EH_SjLj_LongJmp64:
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ // These nodes' second result is a boolean.
+ if (Op.getResNo() == 0)
+ break;
+ // Fallthrough
+ case X86ISD::SETCC:
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned NumLoBits = 0;
+ switch (IntId) {
+ default: break;
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ // High bits of movmskp{s|d}, pmovmskb are known zero.
+ switch (IntId) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
+ case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
+ case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
+ case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
+ case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
+ }
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
+ break;
+ }
+ }
+ break;
+ }
+ }
+}
+
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ const GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+ return true;
+ }
+ }
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
+/// same as extracting the high 128-bit part of 256-bit vector and then
+/// inserting the result into the low part of a new 256-bit vector
+static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
+/// same as extracting the low 128-bit part of 256-bit vector and then
+/// inserting the result into the high part of a new 256-bit vector
+static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V2.getOpcode() == ISD::CONCAT_VECTORS) {
+ //
+ // 0,0,0,...
+ // |
+ // V UNDEF BUILD_VECTOR UNDEF
+ // \ / \ /
+ // CONCAT_VECTOR CONCAT_VECTOR
+ // \ /
+ // \ /
+ // RESULT: V + zero extended
+ //
+ if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
+ V2.getOperand(1).getOpcode() != ISD::UNDEF ||
+ V1.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
+ return SDValue();
+
+ // To match the shuffle mask, the first half of the mask should
+ // be exactly the first vector, and all the rest a splat with the
+ // first element of the second one.
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
+ !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
+ return SDValue();
+
+ // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
+ if (Ld->hasNUsesOfValue(1, 0)) {
+ SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
+ SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+ Ld->getMemoryVT(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as Ld in
+ // terms of dependency. We create a TokenFactor for Ld and ResNode,
+ // and update uses of Ld's output chain to use the TokenFactor.
+ if (Ld->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+ }
+ }
+
+ // Emit a zeroed vector and insert the desired subvector on its
+ // first half.
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Combine some shuffles into subvector extracts and inserts:
+ //
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (isShuffleHigh128VectorInsertLow(SVOp)) {
+ SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (isShuffleLow128VectorInsertHigh(SVOp)) {
+ SDValue V = Extract128BitVector(V1, 0, DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ return SDValue();
+}
+
+/// PerformShuffleCombine - Performs several different shuffle combines.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
+ // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
+ if (Subtarget->hasFp256() && VT.is256BitVector() &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE)
+ return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
+
+ // Only handle 128 wide vector from here on.
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
+ // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
+ // consecutive, non-overlapping, and in the right order.
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformTruncateCombine - Converts truncate operation to
+/// a sequence of vector shuffle operations.
+/// It is possible when we truncate 256-bit vector to 128-bit vector
+static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ return SDValue();
+}
+
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue InVec = N->getOperand(0);
+ SDValue EltNo = N->getOperand(1);
+
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+
+ EVT VT = InVec.getValueType();
+
+ bool HasShuffleIntoBitcast = false;
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ return SDValue();
+ InVec = InVec.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ if (!isTargetShuffle(InVec.getOpcode()))
+ return SDValue();
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ SmallVector<int, 16> ShuffleMask;
+ bool UnaryShuffle;
+ if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
+ UnaryShuffle))
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+ : InVec.getOperand(1);
+
+ // If inputs to shuffle are the same for both ops, then allow 2 uses
+ unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+ if (LdNode.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+ return SDValue();
+
+ AllowedUses = 1; // only allow 1 load use if we have a bitcast
+ LdNode = LdNode.getOperand(0);
+ }
+
+ if (!ISD::isNormalLoad(LdNode.getNode()))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+ if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ return SDValue();
+
+ if (HasShuffleIntoBitcast) {
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getDataLayout()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return SDValue();
+ }
+
+ // All checks match so transform back to vector_shuffle so that DAG combiner
+ // can finish the job
+ DebugLoc dl = N->getDebugLoc();
+
+ // Create shuffle node taking into account the case that its a unary shuffle
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ InVec.getOperand(0), Shuffle,
+ &ShuffleMask[0]);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+ EltNo);
+}
+
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+ if (NewOp.getNode())
+ return NewOp;
+
+ SDValue InputVector = N->getOperand(0);
+ // Detect whether we are trying to convert from mmx to i32 and the bitcast
+ // from mmx to v2i32 has a single usage.
+ if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
+ InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
+ InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
+ N->getValueType(0),
+ InputVector.getNode()->getOperand(0));
+
+ // Only operate on vectors of 4 elements, where the alternative shuffling
+ // gets to be more expensive.
+ if (InputVector.getValueType() != MVT::v4i32)
+ return SDValue();
+
+ // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+ // single use which is a sign-extend or zero-extend, and all elements are
+ // used.
+ SmallVector<SDNode *, 4> Uses;
+ unsigned ExtractedElements = 0;
+ for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+ UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != InputVector.getResNo())
+ return SDValue();
+
+ SDNode *Extract = *UI;
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ if (Extract->getValueType(0) != MVT::i32)
+ return SDValue();
+ if (!Extract->hasOneUse())
+ return SDValue();
+ if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+ Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+ return SDValue();
+
+ // Record which element was extracted.
+ ExtractedElements |=
+ 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+ Uses.push_back(Extract);
+ }
+
+ // If not all the elements were used, this may not be worthwhile.
+ if (ExtractedElements != 15)
+ return SDValue();
+
+ // Ok, we've now decided to do the transformation.
+ DebugLoc dl = InputVector.getDebugLoc();
+
+ // Store the value to a temporary stack slot.
+ SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Replace each use (extract) with a load of the appropriate element.
+ for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+ UE = Uses.end(); UI != UE; ++UI) {
+ SDNode *Extract = *UI;
+
+ // cOMpute the element's address.
+ SDValue Idx = Extract->getOperand(1);
+ unsigned EltSize =
+ InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+ uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ StackPtr, OffsetVal);
+
+ // Load the scalar.
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Replace the exact with the load.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+ }
+
+ // The replacement was made in place; don't return anything.
+ return SDValue();
+}
+
+/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
+static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (!VT.isVector())
+ return 0;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ if (!Subtarget->hasAVX2())
+ return 0;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ if (!Subtarget->hasSSE2())
+ return 0;
+ }
+
+ // SSE2 has only a small subset of the operations.
+ bool hasUnsigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v16i8);
+ bool hasSigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v8i16);
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ }
+ }
+
+ return 0;
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
+/// nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Cond = N->getOperand(0);
+ // Get the LHS/RHS of the select.
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = LHS.getValueType();
+
+ // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
+ if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
+ VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ (Subtarget->hasSSE2() ||
+ (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETULE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETULT:
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETULE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
+ // Canonicalize max and min:
+ // (x > y) ? x : y -> (x >= y) ? x : y
+ // (x < y) ? x : y -> (x <= y) ? x : y
+ // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
+ // the need for an extra compare
+ // against zero. e.g.
+ // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
+ // subl %esi, %edi
+ // testl %edi, %edi
+ // movl $0, %eax
+ // cmovgl %edi, %eax
+ // =>
+ // xorl %eax, %eax
+ // subl %esi, $edi
+ // cmovsl %eax, %edi
+ if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
+ DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGT: {
+ ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
+ Cond = DAG.getSetCC(Cond.getDebugLoc(), Cond.getValueType(),
+ Cond.getOperand(0), Cond.getOperand(1), NewCC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
+ }
+ }
+ }
+
+ // Match VSELECTs into subs with unsigned saturation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
+ ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on the
+ // left side invert the predicate to simplify logic below.
+ SDValue Other;
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+ Other = RHS;
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
+ Other = LHS;
+ }
+
+ if (Other.getNode() && Other->getNumOperands() == 2 &&
+ DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+ SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
+ SDValue CondRHS = Cond->getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> subus x, y
+ // x > y ? x-y : 0 --> subus x, y
+ if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
+ Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+
+ // If the RHS is a constant we have to reverse the const canonicalization.
+ // x > C-1 ? x+-C : 0 --> subus x, C
+ if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+ isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(-A, VT));
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use ComputeMaskedBits to determine whether
+ // it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> subus x, C
+ if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (A.isSignBit())
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Try to match a min/max vector operation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
+ if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
+ return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+
+ // If we know that this node is legal then we know that it is going to be
+ // matched by one of the SSE/AVX BLEND instructions. These instructions only
+ // depend on the highest bit in each word. Try to use SimplifyDemandedBits
+ // to simplify previous instructions.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
+ !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
+ unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+
+ // Don't optimize vector selects that map to mask-registers.
+ if (BitWidth == 1)
+ return SDValue();
+
+ assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+ DCI.isBeforeLegalizeOps());
+ if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ return SDValue();
+}
+
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+ // Quit if not CMP and SUB with its value result used.
+ if (Cmp.getOpcode() != X86ISD::CMP &&
+ (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+ return SDValue();
+
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ // Check CMP operands. One of them should be 0 or 1 and the other should be
+ // an SetCC or extended from it.
+ SDValue Op1 = Cmp.getOperand(0);
+ SDValue Op2 = Cmp.getOperand(1);
+
+ SDValue SetCC;
+ const ConstantSDNode* C = 0;
+ bool needOppositeCond = (CC == X86::COND_E);
+
+ if ((C = dyn_cast<ConstantSDNode>(Op1)))
+ SetCC = Op2;
+ else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+ SetCC = Op1;
+ else // Quit if all operands are not constants.
+ return SDValue();
+
+ if (C->getZExtValue() == 1)
+ needOppositeCond = !needOppositeCond;
+ else if (C->getZExtValue() != 0)
+ // Quit if the constant is neither 0 or 1.
+ return SDValue();
+
+ // Skip 'zext' or 'trunc' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+ SetCC.getOpcode() == ISD::TRUNCATE)
+ SetCC = SetCC.getOperand(0);
+
+ switch (SetCC.getOpcode()) {
+ case X86ISD::SETCC:
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(1);
+ case X86ISD::CMOV: {
+ // Check whether false/true value has canonical one, i.e. 0 or 1.
+ ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
+ ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
+ // Quit if true value is not a constant.
+ if (!TVal)
+ return SDValue();
+ // Quit if false value is not a constant.
+ if (!FVal) {
+ SDValue Op = SetCC.getOperand(0);
+ // Skip 'zext' or 'trunc' node.
+ if (Op.getOpcode() == ISD::ZERO_EXTEND ||
+ Op.getOpcode() == ISD::TRUNCATE)
+ Op = Op.getOperand(0);
+ // A special case for rdrand/rdseed, where 0 is set if false cond is
+ // found.
+ if ((Op.getOpcode() != X86ISD::RDRAND &&
+ Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
+ return SDValue();
+ }
+ // Quit if false value is not the constant 0 or 1.
+ bool FValIsFalse = true;
+ if (FVal && FVal->getZExtValue() != 0) {
+ if (FVal->getZExtValue() != 1)
+ return SDValue();
+ // If FVal is 1, opposite cond is needed.
+ needOppositeCond = !needOppositeCond;
+ FValIsFalse = false;
+ }
+ // Quit if TVal is not the constant opposite of FVal.
+ if (FValIsFalse && TVal->getZExtValue() != 1)
+ return SDValue();
+ if (!FValIsFalse && TVal->getZExtValue() != 0)
+ return SDValue();
+ CC = X86::CondCode(SetCC.getConstantOperandVal(2));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(3);
+ }
+ }
+
+ return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ SDValue FalseOp = N->getOperand(0);
+ SDValue TrueOp = N->getOperand(1);
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+ SDValue Cond = N->getOperand(3);
+
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ switch (Cond.getOpcode()) {
+ default: break;
+ case X86ISD::BSR:
+ case X86ISD::BSF:
+ // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+ if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+ return (CC == X86::COND_E) ? FalseOp : TrueOp;
+ }
+ }
+
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(Cond, CC);
+ if (Flags.getNode() &&
+ // Extra check as FCMOV only supports a subset of X86 cond.
+ (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ std::swap(TrueOp, FalseOp);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+
+ // Handle these cases:
+ // (select (x != c), e, c) -> select (x != c), e, x),
+ // (select (x == c), c, e) -> select (x == c), x, e)
+ // where the c is an integer constant, and the "select" is the combination
+ // of CMOV and CMP.
+ //
+ // The rationale for this change is that the conditional-move from a constant
+ // needs two instructions, however, conditional-move from a register needs
+ // only one instruction.
+ //
+ // CAVEAT: By replacing a constant with a symbolic value, it may obscure
+ // some instruction-combining opportunities. This opt needs to be
+ // postponed as late as possible.
+ //
+ if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
+ // the DCI.xxxx conditions are provided to postpone the optimization as
+ // late as possible.
+
+ ConstantSDNode *CmpAgainst = 0;
+ if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
+ (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
+ !isa<ConstantSDNode>(Cond.getOperand(0))) {
+
+ if (CC == X86::COND_NE &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueOp, FalseOp);
+ }
+
+ if (CC == X86::COND_E &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
+ SDValue Ops[] = { FalseOp, Cond.getOperand(0),
+ DAG.getConstant(CC, MVT::i8), Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
+ array_lengthof(Ops));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ uint64_t MulAmt = C->getZExtValue();
+ if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+ return SDValue();
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((MulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = MulAmt / 9;
+ } else if ((MulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = MulAmt / 5;
+ } else if ((MulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = MulAmt / 3;
+ }
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ DebugLoc DL = N->getDebugLoc();
+
+ if (isPowerOf2_64(MulAmt2) &&
+ !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply by
+ // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+ // is an add.
+ std::swap(MulAmt1, MulAmt2);
+
+ SDValue NewMul;
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, VT));
+
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, VT));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, NewMul, false);
+ }
+ return SDValue();
+}
+
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (VT.isInteger() && !VT.isVector() &&
+ N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ // Hardware support for vector shifts is sparse which makes us scalarize the
+ // vector operations in many cases. Also, on sandybridge ADD is faster than
+ // shl.
+ // (shl V, 1) -> add V,V
+ if (isSplatVector(N1.getNode())) {
+ assert(N0.getValueType().isVector() && "Invalid vector shift type");
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
+ // We shift all of the values by one. In many cases we do not have
+ // hardware support for this operation. This is better expressed as an ADD
+ // of two values.
+ if (N1C && (1 == N1C->getZExtValue())) {
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N0);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+/// when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (N->getOpcode() == ISD::SHL) {
+ SDValue V = PerformSHLCombine(N, DAG);
+ if (V.getNode()) return V;
+ }
+
+ return SDValue();
+}
+
+// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends. Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ unsigned opcode;
+
+ // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+ // we're requiring SSE2 for both.
+ if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CMP0 = N0->getOperand(1);
+ SDValue CMP1 = N1->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // The SETCCs should both refer to the same CMP.
+ if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ return SDValue();
+
+ SDValue CMP00 = CMP0->getOperand(0);
+ SDValue CMP01 = CMP0->getOperand(1);
+ EVT VT = CMP00.getValueType();
+
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ bool ExpectingFlags = false;
+ // Check for any users that want flags:
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ !ExpectingFlags && UI != UE; ++UI)
+ switch (UI->getOpcode()) {
+ default:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::SELECT:
+ ExpectingFlags = true;
+ break;
+ case ISD::CopyToReg:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ break;
+ }
+
+ if (!ExpectingFlags) {
+ enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+ if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+ X86::CondCode tmp = cc0;
+ cc0 = cc1;
+ cc1 = tmp;
+ }
+
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ X86ISD::NodeType NTOperator = is64BitFP ?
+ X86ISD::FSETCCsd : X86ISD::FSETCCss;
+ // FIXME: need symbolic constants for these magic numbers.
+ // See X86ATTInstPrinter.cpp:printSSECC().
+ unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ DAG.getConstant(x86cc, MVT::i8));
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+ DAG.getConstant(1, MVT::i32));
+ SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+ return OneBitOfTruth;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
+/// so it can be folded inside ANDNP.
+static bool CanFoldXORWithAllOnes(const SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Match direct AllOnes for 128 and 256-bit vectors
+ if (ISD::isBuildVectorAllOnes(N))
+ return true;
+
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ // Sometimes the operand may come from a insert_subvector building a 256-bit
+ // allones vector
+ if (VT.is256BitVector() &&
+ N->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V1.getOperand(0).getOpcode() == ISD::UNDEF &&
+ ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
+ ISD::isBuildVectorAllOnes(V2.getNode()))
+ return true;
+ }
+
+ return false;
+}
+
+// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
+// register. In most cases we actually compare or select YMM-sized registers
+// and mixing the two types creates horrible code. This method optimizes
+// some of the transition sequences.
+static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.is256BitVector())
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::ANY_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+
+ SDValue Narrow = N->getOperand(0);
+ EVT NarrowVT = Narrow->getValueType(0);
+ if (!NarrowVT.is128BitVector())
+ return SDValue();
+
+ if (Narrow->getOpcode() != ISD::XOR &&
+ Narrow->getOpcode() != ISD::AND &&
+ Narrow->getOpcode() != ISD::OR)
+ return SDValue();
+
+ SDValue N0 = Narrow->getOperand(0);
+ SDValue N1 = Narrow->getOperand(1);
+ DebugLoc DL = Narrow->getDebugLoc();
+
+ // The Left side has to be a trunc.
+ if (N0.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ // The type of the truncated inputs.
+ EVT WideVT = N0->getOperand(0)->getValueType(0);
+ if (WideVT != VT)
+ return SDValue();
+
+ // The right side has to be a 'trunc' or a constant vector.
+ bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
+ bool RHSConst = (isSplatVector(N1.getNode()) &&
+ isa<ConstantSDNode>(N1->getOperand(0)));
+ if (!RHSTrunc && !RHSConst)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+ return SDValue();
+
+ // Set N0 and N1 to hold the inputs to the new wide operation.
+ N0 = N0->getOperand(0);
+ if (RHSConst) {
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+ N1->getOperand(0));
+ SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
+ } else if (RHSTrunc) {
+ N1 = N1->getOperand(0);
+ }
+
+ // Generate the wide operation.
+ SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
+ unsigned Opcode = N->getOpcode();
+ switch (Opcode) {
+ case ISD::ANY_EXTEND:
+ return Op;
+ case ISD::ZERO_EXTEND: {
+ unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(InBits);
+ Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::AND, DL, VT,
+ Op, DAG.getConstant(Mask, VT));
+ }
+ case ISD::SIGN_EXTEND:
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+ Op, DAG.getValueType(NarrowVT));
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ // Create BLSI, and BLSR instructions
+ // BLSI is X & (-X)
+ // BLSR is X & (X-1)
+ if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for neg
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+ isZero(N0.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+ // Check RHS for neg
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+ isZero(N1.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+ // Check LHS for X-1
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+ // Check RHS for X-1
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+
+ return SDValue();
+ }
+
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (VT == MVT::v2i64 || VT == MVT::v4i64) {
+ if (!Subtarget->hasSSSE3() ||
+ (VT == MVT::v4i64 && !Subtarget->hasInt256()))
+ return SDValue();
+
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::ANDNP)
+ std::swap(N0, N1);
+ // or (and (m, y), (pandn m, x))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and ANDNP and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ // Look through mask bitcast.
+ if (Mask.getOpcode() == ISD::BITCAST)
+ Mask = Mask.getOperand(0);
+ if (X.getOpcode() == ISD::BITCAST)
+ X = X.getOperand(0);
+ if (Y.getOpcode() == ISD::BITCAST)
+ Y = Y.getOperand(0);
+
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node.
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ unsigned SraAmt = ~0;
+ if (Mask.getOpcode() == ISD::SRA) {
+ SDValue Amt = Mask.getOperand(1);
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+ SraAmt = C->getZExtValue();
+ }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+ SDValue SraC = Mask.getOperand(1);
+ SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ }
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
+ assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+ "Unsupported VT for PSIGN");
+ Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+
+ X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ }
+ }
+
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SDValue ShAmt0 = N0.getOperand(1);
+ if (ShAmt0.getValueType() != MVT::i8)
+ return SDValue();
+ SDValue ShAmt1 = N1.getOperand(1);
+ if (ShAmt1.getValueType() != MVT::i8)
+ return SDValue();
+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+ ShAmt0 = ShAmt0.getOperand(0);
+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+ ShAmt1 = ShAmt1.getOperand(0);
+
+ DebugLoc DL = N->getDebugLoc();
+ unsigned Opc = X86ISD::SHLD;
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N1.getOperand(0);
+ if (ShAmt0.getOpcode() == ISD::SUB) {
+ Opc = X86ISD::SHRD;
+ std::swap(Op0, Op1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+
+ unsigned Bits = VT.getSizeInBits();
+ if (ShAmt1.getOpcode() == ISD::SUB) {
+ SDValue Sum = ShAmt1.getOperand(0);
+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+ return DAG.getNode(Opc, DL, VT,
+ Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if (ShAmt0C &&
+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+ return DAG.getNode(Opc, DL, VT,
+ N0.getOperand(0), N1.getOperand(0),
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+
+ return SDValue();
+}
+
+// Generate NEG and CMOV for integer abs.
+static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Since X86 does not have CMOV for 8-bit integer, we don't convert
+ // 8-bit integer abs to NEG and CMOV.
+ if (VT.isInteger() && VT.getSizeInBits() == 8)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
+ // and change it to SUB and CMOV.
+ if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
+ N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA &&
+ N1.getOperand(0) == N0.getOperand(0))
+ if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
+ if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
+ // Generate SUB & CMOV.
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
+ DAG.getConstant(0, VT), N0.getOperand(0));
+
+ SDValue Ops[] = { N0.getOperand(0), Neg,
+ DAG.getConstant(X86::COND_GE, MVT::i8),
+ SDValue(Neg.getNode(), 1) };
+ return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue),
+ Ops, array_lengthof(Ops));
+ }
+ return SDValue();
+}
+
+// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (Subtarget->hasCMov()) {
+ SDValue RV = performIntegerAbsCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Try forming BMI if it is available.
+ if (!Subtarget->hasBMI())
+ return SDValue();
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions");
+
+ // Create BLSMSK instructions by finding X ^ (X-1)
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1);
+
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0);
+
+ return SDValue();
+}
+
+/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
+static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ EVT RegVT = Ld->getValueType(0);
+ EVT MemVT = Ld->getMemoryVT();
+ DebugLoc dl = Ld->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
+
+ // On Sandybridge unaligned 256bit loads are inefficient.
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+ unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
+ if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ NumElems/2);
+ SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ std::min(16U, Alignment));
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load1.getValue(1),
+ Load2.getValue(1));
+
+ SDValue NewVec = DAG.getUNDEF(RegVT);
+ NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
+ NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
+ return DCI.CombineTo(N, NewVec, TF, true);
+ }
+
+ // If this is a vector EXT Load then attempt to optimize it using a
+ // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
+ // expansion is still better than scalar code.
+ // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
+ // emit a shuffle and a arithmetic shift.
+ // TODO: It is possible to support ZExt by zeroing the undef values
+ // during the shuffle phase or after the shuffle.
+ if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
+ (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
+ return SDValue();
+
+ // All sizes must be a power of two.
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems))
+ return SDValue();
+
+ // Attempt to load the original value using scalar loads.
+ // Find the largest scalar type that divides the total loaded size.
+ MVT SclrLoadTy = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
+ SclrLoadTy = Tp;
+ }
+ }
+
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
+ (64 <= MemSz))
+ SclrLoadTy = MVT::f64;
+
+ // Calculate the number of scalar loads that we need to perform
+ // in order to load our vector from memory.
+ unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+ if (Ext == ISD::SEXTLOAD && NumLoads > 1)
+ return SDValue();
+
+ unsigned loadRegZize = RegSz;
+ if (Ext == ISD::SEXTLOAD && RegSz == 256)
+ loadRegZize /= 2;
+
+ // Represent our vector as a sequence of elements which are the
+ // largest scalar that we can load.
+ EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
+ loadRegZize/SclrLoadTy.getSizeInBits());
+
+ // Represent the data using the same element type that is stored in
+ // memory. In practice, we ''widen'' MemVT.
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ loadRegZize/MemVT.getScalarType().getSizeInBits());
+
+ assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
+ "Invalid vector type");
+
+ // We can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Chains;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ptr, Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ Chains.push_back(ScalarLoad.getValue(1));
+ // Create the first element type using SCALAR_TO_VECTOR in order to avoid
+ // another round of DAGCombining.
+ if (i == 0)
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
+ else
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
+ ScalarLoad, DAG.getIntPtrConstant(i));
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
+ unsigned SizeRatio = RegSz/MemSz;
+
+ if (Ext == ISD::SEXTLOAD) {
+ // If we have SSE4.1 we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ return DCI.CombineTo(N, Sext, TF, true);
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
+ return SDValue();
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
+
+ return DCI.CombineTo(N, Shuff, TF, true);
+ }
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ // Replace the original load with the new sequence
+ // and return the new chain.
+ return DCI.CombineTo(N, Shuff, TF, true);
+ }
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT VT = St->getValue().getValueType();
+ EVT StVT = St->getMemoryVT();
+ DebugLoc dl = St->getDebugLoc();
+ SDValue StoredVal = St->getOperand(1);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we are saving a concatenation of two XMM registers, perform two stores.
+ // On Sandy Bridge, 256-bit memory operations are executed by two
+ // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
+ // memory operation.
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
+ if (VT.is256BitVector() && !Subtarget->hasInt256() &&
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
+
+ SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
+ SDValue Ptr0 = St->getBasePtr();
+ SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
+
+ SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), Alignment);
+ SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(16U, Alignment));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ }
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store.
+ // First, pack all of the elements in one place. Next, store to memory
+ // in fewer chunks.
+ if (St->isTruncatingStore() && VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromSz) % ToSz) return SDValue();
+
+ unsigned SizeRatio = FromSz / ToSz;
+
+ assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StVT.getScalarType(), NumElems*SizeRatio);
+
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
+ StoreType = Tp;
+ }
+
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
+ (64 <= NumElems * ToSz))
+ StoreType = MVT::f64;
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Ptr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(i));
+ SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Chains.push_back(Ch);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+
+ // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+ if (VT.getSizeInBits() != 64)
+ return SDValue();
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool NoImplicitFloatOps = F->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
+ && Subtarget->hasSSE2();
+ if ((VT.isVector() ||
+ (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().getNode();
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDValue, 8> Ops;
+ SDNode* ChainVal = St->getChain().getNode();
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).getNode() == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+
+ if (!Ld || !ISD::isNormalLoad(Ld))
+ return SDValue();
+
+ // If this is not the MMX case, i.e. we are just turning i64 load/store
+ // into f64 load/store, avoid the transformation if there are multiple
+ // uses of the loaded value.
+ if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ return SDValue();
+
+ DebugLoc LdDL = Ld->getDebugLoc();
+ DebugLoc StDL = N->getDebugLoc();
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+ // pair instead.
+ if (Subtarget->is64Bit() || F64IsLegal) {
+ EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
+ SDValue NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ }
+
+ // Otherwise, lower to two pairs of 32-bit loads / stores.
+ SDValue LoAddr = Ld->getBasePtr();
+ SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDValue NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+ St->getPointerInfo().getWithOffset(4),
+ St->isVolatile(),
+ St->isNonTemporal(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ }
+ return SDValue();
+}
+
+/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
+/// and return the operands for the horizontal operation in LHS and RHS. A
+/// horizontal operation performs the binary operation on successive elements
+/// of its first operand, then on successive elements of its second operand,
+/// returning the resulting values in a vector. For example, if
+/// A = < float a0, float a1, float a2, float a3 >
+/// and
+/// B = < float b0, float b1, float b2, float b3 >
+/// then the result of doing a horizontal operation on A and B is
+/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
+/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
+/// A horizontal-op B, for some already available A and B, and if so then LHS is
+/// set to A, RHS to B, and the routine returns 'true'.
+/// Note that the binary operation should have the property that if one of the
+/// operands is UNDEF then the result is UNDEF.
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
+ // Look for the following pattern: if
+ // A = < float a0, float a1, float a2, float a3 >
+ // B = < float b0, float b1, float b2, float b3 >
+ // and
+ // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
+ // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
+ // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
+ // which is A horizontal-op B.
+
+ // At least one of the operands should be a vector shuffle.
+ if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ EVT VT = LHS.getValueType();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for horizontal add/sub");
+
+ // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+ // operate independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ assert((NumLaneElts % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ unsigned HalfLaneElts = NumLaneElts/2;
+
+ // View LHS in the form
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // If LHS is not a shuffle then pretend it is the shuffle
+ // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
+ // NOTE: in what follows a default initialized SDValue represents an UNDEF of
+ // type VT.
+ SDValue A, B;
+ SmallVector<int, 16> LMask(NumElts);
+ if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ A = LHS.getOperand(0);
+ if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ B = LHS.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
+ std::copy(Mask.begin(), Mask.end(), LMask.begin());
+ } else {
+ if (LHS.getOpcode() != ISD::UNDEF)
+ A = LHS;
+ for (unsigned i = 0; i != NumElts; ++i)
+ LMask[i] = i;
+ }
+
+ // Likewise, view RHS in the form
+ // RHS = VECTOR_SHUFFLE C, D, RMask
+ SDValue C, D;
+ SmallVector<int, 16> RMask(NumElts);
+ if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ C = RHS.getOperand(0);
+ if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ D = RHS.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
+ std::copy(Mask.begin(), Mask.end(), RMask.begin());
+ } else {
+ if (RHS.getOpcode() != ISD::UNDEF)
+ C = RHS;
+ for (unsigned i = 0; i != NumElts; ++i)
+ RMask[i] = i;
+ }
+
+ // Check that the shuffles are both shuffling the same vectors.
+ if (!(A == C && B == D) && !(A == D && B == C))
+ return false;
+
+ // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
+ if (!A.getNode() && !B.getNode())
+ return false;
+
+ // If A and B occur in reverse order in RHS, then "swap" them (which means
+ // rewriting the mask).
+ if (A != C)
+ CommuteVectorShuffleMask(RMask, NumElts);
+
+ // At this point LHS and RHS are equivalent to
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // RHS = VECTOR_SHUFFLE A, B, RMask
+ // Check that the masks correspond to performing a horizontal operation.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int LIdx = LMask[i], RIdx = RMask[i];
+
+ // Ignore any UNDEF components.
+ if (LIdx < 0 || RIdx < 0 ||
+ (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+ (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
+ continue;
+
+ // Check that successive elements are being operated on. If not, this is
+ // not a horizontal operation.
+ unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+ unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+ int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+ if (!(LIdx == Index && RIdx == Index + 1) &&
+ !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
+ return false;
+ }
+
+ LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
+ RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+ return true;
+}
+
+/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
+static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ isHorizontalBinOp(LHS, RHS, true))
+ return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
+static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal subs from subs of shuffles.
+ if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ isHorizontalBinOp(LHS, RHS, false))
+ return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDValue();
+}
+
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and FMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // BT ignores high bits in the bit index operand.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.hasOneUse()) {
+ unsigned BitWidth = Op1.getValueSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return SDValue();
+}
+
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ EVT VT = N->getValueType(0), OpVT = Op.getValueType();
+ if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+ VT.getVectorElementType().getSizeInBits() ==
+ OpVT.getVectorElementType().getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ }
+ return SDValue();
+}
+
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+ // both SSE and AVX2 since there is no sign-extended shift right
+ // operation on a vector with 64-bit elements.
+ //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+ // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+ if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+
+ // EXTLOAD has a better solution on AVX2,
+ // it may be replaced with X86ISD::VSEXT node.
+ if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+ if (!ISD::isNormalLoad(N00.getNode()))
+ return SDValue();
+
+ if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+ SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
+ N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+ }
+ }
+ return SDValue();
+}
+
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!Subtarget->hasFp256())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && VT.getSizeInBits() == 256) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ EVT ScalarVT = VT.getScalarType();
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+ return SDValue();
+
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+
+ bool NegA = (A.getOpcode() == ISD::FNEG);
+ bool NegB = (B.getOpcode() == ISD::FNEG);
+ bool NegC = (C.getOpcode() == ISD::FNEG);
+
+ // Negative multiplication when NegA xor NegB
+ bool NegMul = (NegA != NegB);
+ if (NegA)
+ A = A.getOperand(0);
+ if (NegB)
+ B = B.getOperand(0);
+ if (NegC)
+ C = C.getOperand(0);
+
+ unsigned Opcode;
+ if (!NegMul)
+ Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
+ else
+ Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
+ return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
+ // (and (i32 x86isd::setcc_carry), 1)
+ // This eliminates the zext. This transformation is necessary because
+ // ISD::SETCC is always legalized to i8.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() == ISD::AND &&
+ N0.hasOneUse() &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
+ }
+
+ if (VT.is256BitVector()) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+ }
+
+ return SDValue();
+}
+
+// Optimize x == -y --> x+y == 0
+// x != -y --> x+y != 0
+static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
+ if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(),
+ LHS.getValueType(), RHS, LHS.getOperand(1));
+ return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0),
+ addV, DAG.getConstant(0, addV.getValueType()), CC);
+ }
+ if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
+ if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(),
+ RHS.getValueType(), LHS, RHS.getOperand(1));
+ return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0),
+ addV, DAG.getConstant(0, addV.getValueType()), CC);
+ }
+ return SDValue();
+}
+
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
+// an all-ones bit which is more useful than 0/1 in some cases.
+static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
+ DAG.getConstant(1, MVT::i8));
+}
+
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+ SDValue EFLAGS = N->getOperand(1);
+
+ if (CC == X86::COND_A) {
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
+ EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ return MaterializeSETB(DL, NewEFLAGS, DAG);
+ }
+ }
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (CC == X86::COND_B)
+ return MaterializeSETB(DL, EFLAGS, DAG);
+
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
+
+ return SDValue();
+}
+
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue EFLAGS = N->getOperand(3);
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+ SDValue Flags;
+
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
+ SDValue Op0 = N->getOperand(0);
+ EVT InVT = Op0->getValueType(0);
+
+ // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
+ if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ }
+
+ // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+ // a 32-bit target where SSE doesn't support i64->FP operations.
+ if (Op0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+ EVT VT = Ld->getValueType(0);
+ if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+ !XTLI->getSubtarget()->is64Bit() &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+ Ld->getChain(), Op0, DAG);
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+ return FILDChain;
+ }
+ }
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
+/// PerformADDCombine - Do target-specific dag combines on integer adds.
+static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
+ return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // X86 can't encode an immediate LHS of a sub. See if we can push the
+ // negation into a preceding instruction.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+ // If the RHS of the sub is a XOR with one use and a constant, invert the
+ // immediate. Then add one to the LHS of the sub so we can turn
+ // X-Y -> X+~Y+1, saving one register.
+ if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+ isa<ConstantSDNode>(Op1.getOperand(1))) {
+ APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+ EVT VT = Op0.getValueType();
+ SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+ Op1.getOperand(0),
+ DAG.getConstant(~XorC, VT));
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+ DAG.getConstant(C->getAPIntValue()+1, VT));
+ }
+ }
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ EVT VT = N->getValueType(0);
+ if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
+ return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
+/// performVZEXTCombine - Performs build vector combines
+static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // (vzext (bitcast (vzext (x)) -> (vzext x)
+ SDValue In = N->getOperand(0);
+ while (In.getOpcode() == ISD::BITCAST)
+ In = In.getOperand(0);
+
+ if (In.getOpcode() != X86ISD::VZEXT)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
+ case ISD::VSELECT:
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
+ case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
+ case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
+ case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
+ case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
+ case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
+ case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::SHUFP: // Handle all target specific shuffles
+ case X86ISD::PALIGNR:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+ case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
+ }
+
+ return SDValue();
+}
+
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+ if (!isTypeLegal(VT))
+ return false;
+ if (VT != MVT::i16)
+ return true;
+
+ switch (Opc) {
+ default:
+ return true;
+ case ISD::LOAD:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return false;
+ }
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+ EVT VT = Op.getValueType();
+ if (VT != MVT::i16)
+ return false;
+
+ bool Promote = false;
+ bool Commute = false;
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If the non-extending load has a single use and it's not live out, then it
+ // might be folded.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+ Op.hasOneUse()*/) {
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ // The only case where we'd want to promote LOAD (rather then it being
+ // promoted as an operand is when it's only use is liveout.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ }
+ }
+ Promote = true;
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Promote = true;
+ break;
+ case ISD::SHL:
+ case ISD::SRL: {
+ SDValue N0 = Op.getOperand(0);
+ // Look out for (store (shl (load), x)).
+ if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+ return false;
+ Promote = true;
+ break;
+ }
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ Commute = true;
+ // fallthrough
+ case ISD::SUB: {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (!Commute && MayFoldLoad(N1))
+ return false;
+ // Avoid disabling potential load folding opportunities.
+ if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+ return false;
+ if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+ return false;
+ Promote = true;
+ }
+ }
+
+ PVT = MVT::i32;
+ return Promote;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ // Helper to match a string separated by whitespace.
+ bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
+ s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
+
+ for (unsigned i = 0, e = args.size(); i != e; ++i) {
+ StringRef piece(*args[i]);
+ if (!s.startswith(piece)) // Check if the piece matches.
+ return false;
+
+ s = s.substr(piece.size());
+ StringRef::size_type pos = s.find_first_not_of(" \t");
+ if (pos == 0) // We matched a prefix.
+ return false;
+
+ s = s.substr(pos);
+ }
+
+ return s.empty();
+ }
+ const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+
+ std::string AsmStr = IA->getAsmString();
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical
+ // ops instead of emitting the bswap asm. For now, we don't support 486 or
+ // lower so don't worry about this.
+ // bswap $0
+ if (matchAsm(AsmPieces[0], "bswap", "$0") ||
+ matchAsm(AsmPieces[0], "bswapl", "$0") ||
+ matchAsm(AsmPieces[0], "bswapq", "$0") ||
+ matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
+ matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
+ matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType()->isIntegerTy(16) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+ (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
+ matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}")
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ case 3:
+ if (CI->getType()->isIntegerTy(32) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+ matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
+ matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
+ matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}")
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
+ matchAsm(AsmPieces[1], "bswap", "%edx") &&
+ matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'f':
+ case 't':
+ case 'u':
+ case 'y':
+ case 'x':
+ case 'Y':
+ case 'l':
+ return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
+ case 'e':
+ case 'Z':
+ return C_Other;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(EVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
+ if (Subtarget->hasSSE2())
+ return "Y";
+ if (Subtarget->hasSSE1())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 31) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'K':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (isInt<8>(C->getSExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 255) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'e': {
+ // 32-bit signed value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+ break;
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ }
+ return;
+ }
+ case 'Z': {
+ // 32-bit unsigned value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ return;
+ }
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+ break;
+ }
+
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ return;
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = 0;
+ int64_t Offset = 0;
+
+ // Match either (GA), (GA+C), (GA+C1+C2), etc.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+ Offset += GA->getOffset();
+ break;
+ } else if (Op.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += -C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ }
+
+ // Otherwise, this isn't something we can handle, reject it.
+ return;
+ }
+
+ const GlobalValue *GV = GA->getGlobal();
+ // If we require an extra load to get this address, as in PIC mode, we
+ // can't accept it.
+ if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+ getTargetMachine())))
+ return;
+
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ // TODO: Slight differences here in allocation order and leaving
+ // RIP in the class. Do they matter any more here than they do
+ // in the normal allocation?
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ if (Subtarget->is64Bit()) {
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, &X86::GR32RegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16RegClass);
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8RegClass);
+ if (VT == MVT::i64 || VT == MVT::f64)
+ return std::make_pair(0U, &X86::GR64RegClass);
+ break;
+ }
+ // 32-bit fallthrough
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, &X86::GR32_ABCDRegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16_ABCDRegClass);
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &X86::GR64_ABCDRegClass);
+ break;
+ case 'r': // GENERAL_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8RegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16RegClass);
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, &X86::GR32RegClass);
+ return std::make_pair(0U, &X86::GR64RegClass);
+ case 'R': // LEGACY_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8_NOREXRegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16_NOREXRegClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, &X86::GR32_NOREXRegClass);
+ return std::make_pair(0U, &X86::GR64_NOREXRegClass);
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, &X86::RFP32RegClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, &X86::RFP64RegClass);
+ return std::make_pair(0U, &X86::RFP80RegClass);
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, &X86::VR64RegClass);
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasSSE2()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+ if (!Subtarget->hasSSE1()) break;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, &X86::FR32RegClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, &X86::FR64RegClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, &X86::VR128RegClass);
+ // AVX types.
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v8f32:
+ case MVT::v4f64:
+ return std::make_pair(0U, &X86::VR256RegClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // Map st(0) -> st(7) -> ST0
+ if (Constraint.size() == 7 && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 's' &&
+ tolower(Constraint[2]) == 't' &&
+ Constraint[3] == '(' &&
+ (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+ Constraint[5] == ')' &&
+ Constraint[6] == '}') {
+
+ Res.first = X86::ST0+Constraint[4]-'0';
+ Res.second = &X86::RFP80RegClass;
+ return Res;
+ }
+
+ // GCC allows "st(0)" to be called just plain "st".
+ if (StringRef("{st}").equals_lower(Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = &X86::RFP80RegClass;
+ return Res;
+ }
+
+ // flags -> EFLAGS
+ if (StringRef("{flags}").equals_lower(Constraint)) {
+ Res.first = X86::EFLAGS;
+ Res.second = &X86::CCRRegClass;
+ return Res;
+ }
+
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = &X86::GR32_ADRegClass;
+ return Res;
+ }
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second == &X86::GR16RegClass) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = &X86::GR8RegClass;
+ }
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = &X86::GR32RegClass;
+ }
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = &X86::GR64RegClass;
+ }
+ }
+ } else if (Res.second == &X86::FR32RegClass ||
+ Res.second == &X86::FR64RegClass ||
+ Res.second == &X86::VR128RegClass) {
+ // Handle references to XMM physical registers that got mapped into the
+ // wrong class. This can happen with constraints like {xmm0} where the
+ // target independent register mapper will just pick the first match it can
+ // find, ignoring the required type.
+
+ if (VT == MVT::f32 || VT == MVT::i32)
+ Res.second = &X86::FR32RegClass;
+ else if (VT == MVT::f64 || VT == MVT::i64)
+ Res.second = &X86::FR64RegClass;
+ else if (X86::VR128RegClass.hasType(VT))
+ Res.second = &X86::VR128RegClass;
+ else if (X86::VR256RegClass.hasType(VT))
+ Res.second = &X86::VR256RegClass;
+ }
+
+ return Res;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 000000000000..5725f7aea581
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,950 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// BSF - Bit scan forward.
+ /// BSR - Bit scan reverse.
+ BSF,
+ BSR,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// CALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ CALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, COMI, UCOMI,
+
+ /// X86 bit-test instructions.
+ BT,
+
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
+ SETCC,
+
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
+
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCCss, FSETCCsd,
+
+ /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+ /// result in an integer GPR. Needs masking for scalar result.
+ FGETSIGNx86,
+
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction. It also writes a
+ /// flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector. If you think this is too close to the previous
+ /// mnemonic, so do I; blame Intel.
+ MOVDQ2Q,
+
+ /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+ /// vector to a GPR.
+ MMX_MOVD2W,
+
+ /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW, MMX_PINSRW,
+
+ /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ ANDNP,
+
+ /// PSIGN - Copy integer sign.
+ PSIGN,
+
+ /// BLENDV - Blend where the selector is a register.
+ BLENDV,
+
+ /// BLENDI - Blend where the selector is an immediate.
+ BLENDI,
+
+ // SUBUS - Integer sub with unsigned saturation.
+ SUBUS,
+
+ /// HADD - Integer horizontal add.
+ HADD,
+
+ /// HSUB - Integer horizontal sub.
+ HSUB,
+
+ /// FHADD - Floating point horizontal add.
+ FHADD,
+
+ /// FHSUB - Floating point horizontal sub.
+ FHSUB,
+
+ /// UMAX, UMIN - Unsigned integer max and min.
+ UMAX, UMIN,
+
+ /// SMAX, SMIN - Signed integer max and min.
+ SMAX, SMIN,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ FMAXC, FMINC,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // TLSADDR - Thread Local Storage.
+ TLSADDR,
+
+ // TLSBASEADDR - Thread Local Storage. A call to get the start address
+ // of the TLS block for the current module.
+ TLSBASEADDR,
+
+ // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
+
+ // EH_RETURN - Exception Handling helpers.
+ EH_RETURN,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+ /// the list of operands.
+ TC_RETURN,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VSEXT_MOVL - Vector move low and sign extend.
+ VSEXT_MOVL,
+
+ // VZEXT - Vector integer zero-extend.
+ VZEXT,
+
+ // VSEXT - Vector integer signed-extend.
+ VSEXT,
+
+ // VFPEXT - Vector FP extend.
+ VFPEXT,
+
+ // VFPROUND - Vector FP round.
+ VFPROUND,
+
+ // VSHL, VSRL - 128-bit vector logical left / right shift
+ VSHLDQ, VSRLDQ,
+
+ // VSHL, VSRL, VSRA - Vector shift elements
+ VSHL, VSRL, VSRA,
+
+ // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+ VSHLI, VSRLI, VSRAI,
+
+ // CMPP - Vector packed double/float comparison.
+ CMPP,
+
+ // PCMP* - Vector integer comparisons.
+ PCMPEQ, PCMPGT,
+
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
+ INC, DEC, OR, XOR, AND,
+
+ BLSI, // BLSI - Extract lowest set isolated bit
+ BLSMSK, // BLSMSK - Get mask up to lowest set bit
+ BLSR, // BLSR - Reset lowest set bit
+
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
+ // MUL_IMM - X86 specific multiply by immediate.
+ MUL_IMM,
+
+ // PTEST - Vector bitwise comparisons
+ PTEST,
+
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGNR,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ SHUFP,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKL,
+ UNPCKH,
+ VPERMILP,
+ VPERMV,
+ VPERMI,
+ VPERM2X128,
+ VBROADCAST,
+
+ // PMULUDQ - Vector multiply packed unsigned doubleword integers
+ PMULUDQ,
+
+ // FMA nodes
+ FMADD,
+ FNMADD,
+ FMSUB,
+ FNMSUB,
+ FMADDSUB,
+ FMSUBADD,
+
+ // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+ // according to %al. An operator is needed so that this can be expanded
+ // with control flow.
+ VASTART_SAVE_XMM_REGS,
+
+ // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // segmented stacks. Check if the current stacklet has enough space, and
+ // falls back to heap allocation if not.
+ SEG_ALLOCA,
+
+ // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+ WIN_FTOL,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
+
+ // FNSTSW16r - Store FP status word into i16 register.
+ FNSTSW16r,
+
+ // SAHF - Store contents of %ah into %eflags.
+ SAHF,
+
+ // RDRAND - Get a random integer and indicate whether it is valid in CF.
+ RDRAND,
+
+ // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+ // indicate whether it is valid in CF.
+ RDSEED,
+
+ // PCMP*STRI
+ PCMPISTRI,
+ PCMPESTRI,
+
+ // XTEST - Test if in transactional execution.
+ XTEST,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMMAX64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMAX64_DAG,
+ ATOMUMIN64_DAG,
+ ATOMSWAP64_DAG,
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+ LCMPXCHG16_DAG,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// VAARG_64 - This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64
+
+ // WARNING: Do not add anything in the end unless you want the node to
+ // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+ // thought as target memory ops!
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
+ /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+ /// constant +0.0.
+ bool isZeroNode(SDValue Elt);
+
+ /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// fit into displacement field of the instruction.
+ bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement = true);
+
+
+ /// isCalleePop - Determines whether the callee is required to pop its
+ /// own arguments. Callee pop is necessary to support tail calls.
+ bool isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ public:
+ explicit X86TargetLowering(X86TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ virtual const MCExpr *
+ LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned uid,
+ MCContext &Ctx) const;
+
+ /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+ /// jumptable.
+ virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const;
+ virtual const MCExpr *
+ getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI, MCContext &Ctx) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+ /// specified type to expand memcpy / memset inline. This is mostly true
+ /// for all types except for some special cases. For example, on X86
+ /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+ /// also does type conversion. Note the specified type doesn't have to be
+ /// legal as the hook is used before type legalization.
+ virtual bool isSafeMemOpType(MVT VT) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isTypeDesirableForOp - Return true if the target has native support for
+ /// the specified value type and it is 'desirable' to use the type for the
+ /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+ /// instruction encodings are longer and some i16 instructions are slow.
+ virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+ /// isTypeDesirable - Return true if the target has native support for the
+ /// specified value type and it is 'desirable' to use the type. e.g. On x86
+ /// i16 is legal, but undesirable since i16 instruction encodings are longer
+ /// and some i16 instructions are slow.
+ virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+ // operation that are sign bits.
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ virtual ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a
+ /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in
+ /// unknown ways, such as incoming arguments, or copies from unknown
+ /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+ /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+ /// all instructions that define 32-bit values implicit zero-extend the
+ /// result out to 64 bits.
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// seek to shrink the FP constant of the specified type to a smaller type
+ /// in order to save space and / or reduce runtime.
+ virtual bool ShouldShrinkFPConstant(EVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !X86ScalarSSEf64 || VT == MVT::f80;
+ }
+
+ const X86Subtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
+ /// for fptoui.
+ bool isTargetFTOL() const {
+ return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
+ }
+
+ /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
+ /// used for fptoui to the given type.
+ bool isIntegerTypeFTOL(EVT VT) const {
+ return isTargetFTOL() && VT == MVT::i64;
+ }
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
+
+ /// getStackCookieLocation - Return true if the target stores stack
+ /// protector cookies at a fixed offset in some non-standard address
+ /// space, and populates the address space and offset as
+ /// appropriate.
+ virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG) const;
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(MVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const X86RegisterInfo *RegInfo;
+ const DataLayout *TD;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
+
+ /// LegalFPImmediates - A list of legal fp immediates.
+ std::vector<APFloat> LegalFPImmediates;
+
+ /// addLegalFPImmediate - Indicate that this x86 target can instruction
+ /// select the specified FP immediate natively.
+ void addLegalFPImmediate(const APFloat& Imm) {
+ LegalFPImmediates.push_back(Imm);
+ }
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ // Call lowering helpers.
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ Type *RetTy,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+ SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+ SDValue Chain, bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const;
+
+ unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG &DAG) const;
+
+ std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool isSigned,
+ bool isReplace) const;
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
+ SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
+ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+ SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ virtual MVT
+ getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, max, min, umax, umin). It takes the corresponding instruction to
+ /// expand, the associated machine basic block, and the associated X86
+ /// opcodes for reg/reg.
+ MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
+ MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Utility function to emit the low-level va_arg code for X86-64.
+ MachineBasicBlock *EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit the xmm reg save portion of va_start.
+ MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool Is64Bit) const;
+
+ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Emit nodes that will be selected as "test Op0,Op0", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+
+ /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const;
+
+ /// Convert a comparison if required by the subtarget.
+ SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+ };
+
+ namespace X86 {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+ }
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 000000000000..ba1aede3c1a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,103 @@
+//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+}
+
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
+}
+
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+ [(int_x86_mmx_femms)]>;
+
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+ "prefetch\t$addr",
+ [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>;
+
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+ [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB,
+ Requires<[HasPrefetchW]>;
+
+// "3DNowA" instructions
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 000000000000..225e9720da0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1361 @@
+//===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+let SchedRW = [WriteLEA] in {
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins i32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+ Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>,
+ Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions.
+//
+
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+// AL,AH = AL*GR8
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src",
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src",
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
+}
+
+let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
+
+let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))], IIC_IMUL16_RR>,
+ TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))], IIC_IMUL32_RR>,
+ TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
+ TB;
+} // isCommutable, SchedRW
+
+// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))],
+ IIC_IMUL16_RM>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))],
+ IIC_IMUL32_RM>,
+ TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))],
+ IIC_IMUL64_RM>,
+ TB;
+} // SchedRW
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Surprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))],
+ IIC_IMUL16_RRI>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))],
+ IIC_IMUL16_RRI>,
+ OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))],
+ IIC_IMUL32_RRI>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))],
+ IIC_IMUL32_RRI>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))],
+ IIC_IMUL64_RRI>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))],
+ IIC_IMUL64_RRI>;
+} // SchedRW
+
+// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))],
+ IIC_IMUL16_RMI>,
+ OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))], IIC_IMUL16_RMI>,
+ OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))],
+ IIC_IMUL32_RMI>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))],
+ IIC_IMUL32_RMI>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))],
+ IIC_IMUL64_RMI>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))],
+ IIC_IMUL64_RMI>;
+} // SchedRW
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", [], IIC_DIV8_REG>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", [], IIC_DIV16>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", [], IIC_DIV32>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+ "div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
+}
+
+// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", [], IIC_IDIV8>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", [], IIC_IDIV32>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+ "idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
+}
+} // hasSideEffects = 0
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
+ (implicit EFLAGS)], IIC_UNARY_REG>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
+ (implicit EFLAGS)], IIC_UNARY_REG>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
+ (implicit EFLAGS)], IIC_UNARY_REG>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src1)),
+ (implicit EFLAGS)], IIC_UNARY_REG>;
+} // Constraints = "$src1 = $dst", SchedRW
+
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
+def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))], IIC_UNARY_REG>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))], IIC_UNARY_REG>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))], IIC_UNARY_REG>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
+}
+} // Constraints = "$src1 = $dst", SchedRW
+
+let SchedRW = [WriteALULd, WriteRMW] in {
+def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>,
+ OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))],
+ IIC_UNARY_REG>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+ IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))],
+ IIC_UNARY_REG>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
+ IIC_UNARY_REG>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+ IIC_UNARY_REG>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+ IIC_UNARY_REG>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+ IIC_UNARY_REG>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst", SchedRW
+
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ Requires<[In32BitMode]>;
+ def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ Requires<[In64BitMode]>;
+} // CodeSize = 2, SchedRW
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
+ IIC_UNARY_REG>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+ IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+ IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
+ IIC_UNARY_REG>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst", SchedRW
+
+
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
+ Requires<[In32BitMode]>;
+ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
+} // CodeSize = 2, SchedRW
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ Operand imm8operand, SDPatternOperator imm8operator,
+ bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// HasOpSizePrefix - This bit is set to true if the instruction should have
+ /// the 0x66 operand size prefix. This is set for i16 types.
+ bit HasOpSizePrefix = hasOpSizePrefix;
+
+ /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+ Imm8 , i8imm , imm, i8imm , invalid_node,
+ 0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+ Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ 1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+ Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ 1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+ Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+ 1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
+ itin> {
+
+ // Infer instruction prefixes from type info.
+ let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+ let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern, InstrItinClass itin,
+ Format f = MRMDestReg>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f = MRMDestReg>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM, f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))], IIC_BIN_NONMEM>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo,
+ (outs typeinfo.RegClass:$dst),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+}
+
+// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
+class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo, (outs),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ list<dag> pattern>
+ : ITy<opcode, MRMDestMem, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ : ITy<opcode, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src))],
+ opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern>
+ : ITy<0x82, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
+ : ITy<opcode, RawFrm, typeinfo,
+ (outs), (ins typeinfo.ImmOperand:$src),
+ mnemonic, operands, []>, Sched<[WriteALU]> {
+ let ImmT = typeinfo.ImmEncoding;
+ let Uses = [areg];
+ let Defs = [areg];
+ let hasSideEffects = 0;
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnodeflag, SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isCommutable
+
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...". It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+ X86and_flag, and, 1, 0>;
+defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+ X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+ X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+ X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+ X86sub_flag, sub, 0, 0>;
+}
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
+}
+
+let isCompare = 1 in {
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result. From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let isCompare = 1, Defs = [EFLAGS] in {
+ let isCommutable = 1 in {
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ } // isCommutable
+
+ def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+ def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+ def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+ def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+ def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+ def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+ def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+ def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+ def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+
+ // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
+ // register class is constrained to GR8_NOREX.
+ let isPseudo = 1 in
+ def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
+ "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
+}
+
+//===----------------------------------------------------------------------===//
+// ANDN Instruction
+//
+multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
+ PatFrag ld_frag> {
+ def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
+ def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
+}
+
+let Predicates = [HasBMI] in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm GR64:$src1, addr:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// MULX Instruction
+//
+multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ let isCommutable = 1 in
+ def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>;
+
+ let mayLoad = 1 in
+ def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>;
+}
+}
+
+let Predicates = [HasBMI2] in {
+ let Uses = [EDX] in
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>;
+ let Uses = [RDX] in
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// ADCX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
+ def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize;
+
+ def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
+
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
+ def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize;
+
+ def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ADOX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
+ def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS;
+
+ def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
+
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
+ def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS;
+
+ def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 000000000000..aaef4a466ded
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,183 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ int Disp;
+ const GlobalValue *GV;
+ unsigned GVOpFlags;
+
+ X86AddressMode()
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+ Base.Reg = 0;
+ }
+
+
+ void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+ assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+
+ if (BaseType == X86AddressMode::RegBase)
+ MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+ false, false, false, 0, false));
+ else {
+ assert(BaseType == X86AddressMode::FrameIndexBase);
+ MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+ }
+
+ MO.push_back(MachineOperand::CreateImm(Scale));
+ MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+ false, false, false, 0, false));
+
+ if (GV)
+ MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+ else
+ MO.push_back(MachineOperand::CreateImm(Disp));
+
+ MO.push_back(MachineOperand::CreateReg(0, false, false,
+ false, false, false, 0, false));
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+ // Because memory references are always represented with five
+ // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+ return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase);
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ }
+
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ else
+ MIB.addImm(AM.Disp);
+
+ return MIB.addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+static inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ unsigned GlobalBaseReg, unsigned char OpFlags) {
+ //FIXME: factor this
+ return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+ .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 000000000000..a967a4da5cf7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,111 @@
+//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1, SchedRW = [WriteALU] in {
+ def NAME#16rr
+ : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
+ IIC_CMOV16_RR>,TB,OpSize;
+ def NAME#32rr
+ : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
+ IIC_CMOV32_RR>, TB;
+ def NAME#64rr
+ :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))],
+ IIC_CMOV32_RR>, TB;
+ }
+
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteALULd, ReadAfterLd] in {
+ def NAME#16rm
+ : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ CondNode, EFLAGS))], IIC_CMOV16_RM>,
+ TB, OpSize;
+ def NAME#32rm
+ : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
+ def NAME#64rm
+ :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
+ } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+ let Uses = [EFLAGS] in {
+ def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
+ IIC_SET_R>, TB, Sched<[WriteALU]>;
+ def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)],
+ IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
+ } // Uses = [EFLAGS]
+}
+
+defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
+defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
+defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
+defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
+defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
+defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
+defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
+defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 000000000000..d9ff0c63c55f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1839 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 8 bits.
+ return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+ (outs GR64:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_64 $dst, $ap, $size, $mode, $align",
+ [(set GR64:$dst,
+ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+ (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets. These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86WinAlloca)]>;
+
+// When using segmented stacks these are lowered into instructions which first
+// check if the current stacklet has enough free memory. If it does, memory is
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// the heap.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR32:$dst,
+ (X86SegAlloca GR32:$size))]>,
+ Requires<[In32BitMode]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
+def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR64:$dst,
+ (X86SegAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
+}
+
+// The MSVC runtime contains an _ftol2 routine for converting floating-point
+// to integer values. It has a strange calling convention: the input is
+// popped from the x87 stack, and the return value is given in EDX:EAX. No
+// other registers (aside from flags) are touched.
+// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
+// variant is unnecessary.
+
+let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
+ def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
+ "# win32 fptoui",
+ [(X86WinFTOL RFP32:$src)]>,
+ Requires<[In32BitMode]>;
+
+ def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
+ "# win32 fptoui",
+ [(X86WinFTOL RFP64:$src)]>,
+ Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let SchedRW = [WriteSystem] in {
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
+
+}
+
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in {
+ def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+} // SchedRW
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+ def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions used by segmented stacks.
+//
+
+// This is lowered into a RET instruction by MCInstLower. We need
+// this so that we don't have to have a MachineBasicBlock which ends
+// with a RET and also has successors.
+let isPseudo = 1 in {
+def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
+ "", []>;
+
+// This instruction is lowered to a RET followed by a MOV. The two
+// instructions are not generated on a higher level since then the
+// verifier sees a MachineBasicBlock ending with a non-terminator.
+def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
+ "", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+ Sched<[WriteZero]>;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "", [(set GR64:$dst, i64immZExt32:$src)],
+ IIC_ALU_NONMEM>, Sched<[WriteALU]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
+
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+ (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC64ri8 GR64:$op, 0)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+}
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,ECX,EDI] in
+ def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+ let Uses = [AX,ECX,EDI] in
+ def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+ let Uses = [EAX,ECX,EDI] in
+ def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+}
+
+let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,RCX,RDI] in
+ def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+ let Uses = [AX,RCX,RDI] in
+ def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addr32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In32BitMode]>;
+def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_base_addr32",
+ [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
+ Requires<[In32BitMode]>;
+}
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_addr64",
+ [(X86tlsaddr tls64addr:$sym)]>,
+ Requires<[In64BitMode]>;
+def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_base_addr64",
+ [(X86tlsbaseaddr tls64baseaddr:$sym)]>,
+ Requires<[In64BitMode]>;
+}
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure.
+let usesCustomInserter = 1, Uses = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+ "#CMOV_GR32* PSEUDO!",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+ "#CMOV_GR16* PSEUDO!",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+} // Predicates = [NoCMov]
+
+// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
+// SSE1.
+let Predicates = [FPStackf32] in
+def CMOV_RFP32 : I<0, Pseudo,
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ "#CMOV_RFP32 PSEUDO!",
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ EFLAGS))]>;
+// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
+// SSE2.
+let Predicates = [FPStackf64] in
+def CMOV_RFP64 : I<0, Pseudo,
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ "#CMOV_RFP64 PSEUDO!",
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ "#CMOV_RFP80 PSEUDO!",
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ EFLAGS))]>;
+} // UsesCustomInserter = 1, Uses = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo atomic instructions
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
+ let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
+ let Defs = [EFLAGS, AL] in
+ def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
+ (ins i8mem:$ptr, GR8:$val),
+ !strconcat(mnemonic, "8 PSEUDO!"), []>;
+ let Defs = [EFLAGS, AX] in
+ def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+ (ins i16mem:$ptr, GR16:$val),
+ !strconcat(mnemonic, "16 PSEUDO!"), []>;
+ let Defs = [EFLAGS, EAX] in
+ def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+ (ins i32mem:$ptr, GR32:$val),
+ !strconcat(mnemonic, "32 PSEUDO!"), []>;
+ let Defs = [EFLAGS, RAX] in
+ def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+ (ins i64mem:$ptr, GR64:$val),
+ !strconcat(mnemonic, "64 PSEUDO!"), []>;
+ }
+}
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
+ def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
+ (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
+ (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
+ (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
+ def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
+ (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
+}
+
+// Atomic exchange, and, or, xor
+defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
+defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
+defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
+defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
+defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
+
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
+ let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
+ mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
+ def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ !strconcat(mnemonic, "6432 PSEUDO!"), []>;
+}
+
+defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
+defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
+defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
+defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
+defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
+defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
+defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
+defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1, Defs = [EFLAGS] in
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
+ Sched<[WriteALULd, WriteRMW]>;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Sched<[WriteLoad]>;
+
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+ Format ImmMod, string mnemonic> {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, LOCK;
+def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
+def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, LOCK;
+def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, LOCK;
+
+def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+
+def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+}
+
+}
+
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
+
+// Optimized codegen when the non-memory output is not used.
+multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
+ string mnemonic> {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
+ !strconcat(mnemonic, "{b}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+ !strconcat(mnemonic, "{w}\t$dst"),
+ [], IIC_UNARY_MEM>, OpSize, LOCK;
+def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+ !strconcat(mnemonic, "{l}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+ !strconcat(mnemonic, "{q}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+}
+}
+
+defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
+defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
+
+// Atomic compare and swap.
+multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
+ SDPatternOperator frag, X86MemOperand x86memop,
+ InstrItinClass itin> {
+let isCodeGenOnly = 1 in {
+ def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
+ !strconcat(mnemonic, "\t$ptr"),
+ [(frag addr:$ptr)], itin>, TB, LOCK;
+}
+}
+
+multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
+ string mnemonic, SDPatternOperator frag,
+ InstrItinClass itin8, InstrItinClass itin> {
+let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
+ let Defs = [AL, EFLAGS], Uses = [AL] in
+ def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+ let Defs = [AX, EFLAGS], Uses = [AX] in
+ def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+ let Defs = [EAX, EFLAGS], Uses = [EAX] in
+ def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+ let Defs = [RAX, EFLAGS], Uses = [RAX] in
+ def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+}
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ SchedRW = [WriteALULd, WriteRMW] in {
+defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
+ X86cas8, i64mem,
+ IIC_CMPX_LOCK_8B>;
+}
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
+defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
+ X86cas16, i128mem,
+ IIC_CMPX_LOCK_16B>, REX_W;
+}
+
+defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
+ X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
+
+// Atomic exchange and add
+multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
+ string frag,
+ InstrItinClass itin8, InstrItinClass itin> {
+ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+ def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin8>;
+ def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+ itin>;
+ }
+}
+
+defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
+ IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
+ TB, LOCK;
+
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+
+def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_8 addr:$dst, GR8 :$src)]>;
+def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_16 addr:$dst, GR16:$src)]>;
+def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_32 addr:$dst, GR32:$src)]>;
+def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_64 addr:$dst, GR64:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V8F32 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V8F32 PSEUDO!",
+ [(set VR256:$dst,
+ (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4F64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4F64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4I64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4I64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+ (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+ Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
+// can never use callee-saved registers. That is the purpose of the GR64_TC
+// register classes.
+//
+// The only volatile register that is never used by the calling convention is
+// %r11. This happens when calling a vararg function with 6 arguments.
+//
+// Match an X86tcret that uses less than 7 volatile registers.
+def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
+ (X86tcret node:$ptr, node:$off), [{
+ // X86tcret args: (*chain, ptr, imm, regs..., glue)
+ unsigned NumRegs = 0;
+ for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
+ if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
+ return false;
+ return true;
+}]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi addr:$dst, imm:$off)>,
+ Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Don't fold loads into X86tcret requiring more than 6 regs.
+// There wouldn't be enough scratch registers for base+index.
+def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
+ (TCRETURNmi64 addr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+ Instruction Inst64> {
+ let Predicates = [HasCMov] in {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+ }
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+ sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+ (MOVZX32rr8 GR8 :$src), sub_16bit)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies. However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read. To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+// Try this before the selecting to OR.
+let AddedComplexity = 5, SchedRW = [WriteALU] in {
+
+let isConvertibleToThreeAddress = 1,
+ Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "", // orw/addw REG, REG
+ [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "", // orl/addl REG, REG
+ [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "", // orq/addq REG, REG
+ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "", // orw/addw REG, imm8
+ [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "", // orw/addw REG, imm
+ [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "", // orl/addl REG, imm8
+ [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "", // orl/addl REG, imm
+ [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "", // orq/addq REG, imm8
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "", // orq/addq REG, imm
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt32:$src2))]>;
+}
+} // AddedComplexity, SchedRW
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri8
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo8XForm imm:$imm))),
+ sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+ (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (i8
+ (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+ (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (MOVSX32rr8
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+ (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// Helper imms that check if a mask doesn't change significant shift bits.
+def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
+def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, immShift32)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, immShift32)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, immShift32)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, immShift32)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, immShift32)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, immShift32)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, immShift32)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, immShift32)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, immShift32)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, immShift64)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, immShift64)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, immShift64)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub 0, reg
+def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
+def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
+def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
+def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Bit scan instruction patterns to match explicit zero-undef behavior.
+def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
+def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
+def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
+def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 000000000000..0e696513d47c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,287 @@
+//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Return instructions.
+//
+// The X86retflag return instructions are variadic because we may add ST0 and
+// ST1 arguments when returning values on the x87 stack.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)], IIC_RET>;
+ def RETW : I <0xC3, RawFrm, (outs), (ins),
+ "ret{w}",
+ [], IIC_RET>, OpSize;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag timm:$amt)], IIC_RET_IMM>;
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+ "ret{w}\t$amt",
+ [], IIC_RET_IMM>, OpSize;
+ def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ "{l}ret{l|f}", [], IIC_RET>;
+ def LRETW : I <0xCB, RawFrm, (outs), (ins),
+ "{l}ret{w|f}", [], IIC_RET>, OpSize;
+ def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ "{l}ret{q|f}", [], IIC_RET>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{l|f}\t$amt", [], IIC_RET>;
+ def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", [], IIC_JMP_REL>;
+ // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious
+ // with JMP_1.
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmpq\t$dst", [], IIC_JMP_REL>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
+ IIC_Jcc>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB;
+ }
+}
+
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+ // These are the 32-bit versions of this instruction for the asmparser. In
+ // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+ // jecxz.
+ let Uses = [CX] in
+ def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>;
+ let Uses = [ECX] in
+ def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>;
+
+ // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+ // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+ // is jrcxz.
+ let Uses = [ECX] in
+ def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>;
+ let Uses = [RCX] in
+ def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>,
+ Sched<[WriteJump]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
+
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
+ Sched<[WriteJump]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJump]>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
+}
+
+
+// Loop instructions
+let SchedRW = [WriteJump] in {
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Uses = [ESP] in {
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst),
+ "call{l}\t$dst", [], IIC_CALL_RI>,
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
+ IIC_CALL_MEM>,
+ Requires<[In32BitMode,FavorMemIndirectCall]>,
+ Sched<[WriteJumpLd]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", [],
+ IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", [],
+ IIC_CALL_FAR_PTR>, Sched<[WriteJump]>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst),
+ "callw\t$dst", []>, OpSize;
+ }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [ESP] in {
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset), []>;
+
+ // FIXME: The should be pseudo instructions that are lowered when going to
+ // mcinst.
+ def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst),
+ "jmp\t$dst # TAILCALL",
+ [], IIC_JMP_REL>;
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+ "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead.
+ let mayLoad = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
+ "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+
+// RSP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst),
+ "call{q}\t$dst", [], IIC_CALL_RI>,
+ Requires<[In64BitMode]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)],
+ IIC_CALL_RI>,
+ Requires<[In64BitMode]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
+ IIC_CALL_MEM>,
+ Requires<[In64BitMode,FavorMemIndirectCall]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+}
+
+let isCall = 1, isCodeGenOnly = 1 in
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+ let Defs = [RAX, R10, R11, RSP, EFLAGS],
+ Uses = [RSP] in {
+ def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst),
+ "call{q}\t$dst", [], IIC_CALL_RI>,
+ Requires<[IsWin64]>, Sched<[WriteJump]>;
+ }
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
+ SchedRW = [WriteJump] in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset), []>;
+
+ def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst),
+ "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>;
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+ "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
+
+ let mayLoad = 1 in
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+ "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 000000000000..6dc7175357b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,186 @@
+//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+
+// Sign/Zero extenders
+let neverHasSideEffects = 1 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
+ TB, OpSize, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
+ TB, OpSize, Sched<[WriteALULd]>;
+} // neverHasSideEffects = 1
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALULd]>;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
+ TB, Sched<[WriteALULd]>;
+
+let neverHasSideEffects = 1 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
+ TB, OpSize, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
+ TB, OpSize, Sched<[WriteALULd]>;
+} // neverHasSideEffects = 1
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALULd]>;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+}
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
+ TB, Sched<[WriteALULd]>;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
+ TB, Sched<[WriteALULd]>;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+ Sched<[WriteALU]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+ Sched<[WriteALULd]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
+ IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+ Sched<[WriteALU]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
+ IIC_MOVZX>, Sched<[WriteALULd]>;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 000000000000..7759a8a2dabb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,368 @@
+//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ ValueType OpVT128, ValueType OpVT256,
+ SDPatternOperator Op = null_frag> {
+ let isCommutable = 1 in
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
+ VR128:$src1, VR128:$src3)))]>;
+
+ let mayLoad = 1 in
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
+ (MemFrag128 addr:$src3))))]>;
+
+ let isCommutable = 1 in
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
+ VR256:$src3)))]>, VEX_L;
+
+ let mayLoad = 1 in
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (OpVT256 (Op VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3))))]>, VEX_L;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+ defm r213 : fma3p_rm<opc213,
+ !strconcat(OpcodeStr, "213", PackTy),
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3p_rm<opc132,
+ !strconcat(OpcodeStr, "132", PackTy),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ defm r231 : fma3p_rm<opc231,
+ !strconcat(OpcodeStr, "231", PackTy),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+} // neverHasSideEffects = 1
+}
+
+// Fused Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
+ memopv8f32, X86Fmadd, v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
+ memopv8f32, X86Fmsub, v4f32, v8f32>;
+ defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
+ memopv4f32, memopv8f32, X86Fmaddsub,
+ v4f32, v8f32>;
+ defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
+ memopv4f32, memopv8f32, X86Fmsubadd,
+ v4f32, v8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
+ memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
+ defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
+ memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+ defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
+ memopv2f64, memopv4f64, X86Fmaddsub,
+ v2f64, v4f64>, VEX_W;
+ defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
+ memopv2f64, memopv4f64, X86Fmsubadd,
+ v2f64, v4f64>, VEX_W;
+}
+
+// Fused Negative Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+ defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
+ memopv8f32, X86Fnmadd, v4f32, v8f32>;
+ defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
+ memopv8f32, X86Fnmsub, v4f32, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
+ memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+ defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
+ memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+ v4f64>, VEX_W;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
+ RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ SDPatternOperator OpNode = null_frag> {
+ let isCommutable = 1 in
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = 1 in
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
+}
+
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic IntId,
+ RegisterClass RC> {
+ let isCommutable = 1 in
+ def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+ VR128:$src3))]>;
+ def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, string PackTy, Intrinsic Int,
+ SDNode OpNode, RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
+ ComplexPattern mem_cpat> {
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
+ x86memop, RC, OpVT, mem_frag>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
+ x86memop, RC, OpVT, mem_frag>;
+}
+
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
+ x86memop, RC, OpVT, mem_frag, OpNode>,
+ fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+ memop, mem_cpat, Int, RC>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+ SDNode OpNode> {
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+ FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+ FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+}
+
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+ int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+ int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
+
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+ int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+ int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
+
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+ PatFrag mem_frag> {
+ let isCommutable = 1 in
+ def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
+// For disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+}
+
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic Int> {
+ let isCommutable = 1 in
+ def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
+ mem_cpat:$src3))]>, VEX_W, MemOp4;
+ def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, memop:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT128, ValueType OpVT256,
+ PatFrag ld_frag128, PatFrag ld_frag256> {
+ let isCommutable = 1 in
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+ VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
+ (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ let isCommutable = 1 in
+ def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+ VEX_W, MemOp4, VEX_L;
+ def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
+ (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
+ def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (OpNode VR256:$src1,
+ (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
+// For disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+ VEX_L;
+} // isCodeGenOnly = 1
+}
+
+defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 000000000000..2224a08d59f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,692 @@
+//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+ SDTCisVT<1, f80>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+ [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+ [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+ [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+// All FP Stack operations are represented with four instructions here. The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information. These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+ [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpIf32<(outs RFP32:$dst),
+ (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+}
+
+let Defs = [FPSW] in {
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+}
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">;
+def COMP_FST0r : FPST0rInst <0xD8, "fcomp\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+let Defs = [FPSW] in {
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+} // Defs = [FPSW]
+
+// Versions of FP instructions that take a single memory operand. Added for the
+// disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
+def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+ CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc, EFLAGS))]>;
+ def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+ CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc, EFLAGS))]>;
+ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ CondMovFP,
+ [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+ cc, EFLAGS))]>,
+ Requires<[HasCMov]>;
+}
+
+let Defs = [FPSW] in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovb\t{$op, %st(0)|ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovbe\t{$op, %st(0)|ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmove\t{$op, %st(0)|ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovu\t {$op, %st(0)|ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnb\t{$op, %st(0)|ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnbe\t{$op, %st(0)|ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovne\t{$op, %st(0)|ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnu\t{$op, %st(0)|ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+ def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+ [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
+ IIC_FLD>;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
+ IIC_FLD>;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src",
+ IIC_FLD80>;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src",
+ IIC_FILD>;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
+ IIC_FILD>;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
+ IIC_FILD>;
+}
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
+ IIC_FST>;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
+ IIC_FST>;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst",
+ IIC_FST>;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst",
+ IIC_FST>;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst",
+ IIC_FST80>;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst",
+ IIC_FIST>;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst",
+ IIC_FIST>;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst",
+ IIC_FIST>;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst",
+ IIC_FIST>;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst",
+ IIC_FIST>;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+let Predicates = [HasSSE3] in {
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+} // Predicates = [HasSSE3]
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
+ IIC_FST>;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
+ IIC_FST>;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
+ "fisttp{ll}\t$dst", IIC_FST>;
+}
+
+// FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
+ IIC_FLD>, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
+ IIC_FST>, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
+ IIC_FST>, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
+ IIC_FXCH>, D9;
+}
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm1)]>;
+}
+
+let SchedRW = [WriteZero] in {
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
+}
+
+// Floating point compares.
+let SchedRW = [WriteFAdd] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
+} // Defs = [FPSW]
+
+let SchedRW = [WriteFAdd] in {
+// CC = ST(0) cmp ST(i)
+let Defs = [EFLAGS, FPSW] in {
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [FPSW], Uses = [ST0] in {
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucom\t$reg", IIC_FUCOM>, DD;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomp\t$reg", IIC_FUCOM>, DD;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (outs), (ins),
+ "fucompp", IIC_FUCOM>, DA;
+}
+
+let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucomi\t$reg", IIC_FUCOMI>, DB;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucompi\t$reg", IIC_FUCOMI>, DF;
+}
+
+let Defs = [EFLAGS, FPSW] in {
+def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcomi\t$reg", IIC_FCOMI>, DB;
+def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcompi\t$reg", IIC_FCOMI>, DF;
+}
+} // SchedRW
+
+// Floating point flag ops.
+let SchedRW = [WriteALU] in {
+let Defs = [AX], Uses = [FPSW] in
+def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw %ax",
+ [(set AX, (X86fp_stsw FPSW))], IIC_FNSTSW>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+ [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
+} // SchedRW
+let mayLoad = 1 in
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>,
+ Sched<[WriteLoad]>;
+
+// FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW] in
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
+def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
+ "ffree\t$reg", IIC_FFREE>, DD;
+// Clear exceptions
+
+let Defs = [FPSW] in
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
+} // SchedRW
+
+// Operandless floating-point instructions for the disassembler.
+let SchedRW = [WriteMicrocoded] in {
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", [], IIC_FXAM>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", [], IIC_FLDL>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", [], IIC_FLDL>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", [], IIC_FLDL>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", [], IIC_FLDL>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", [], IIC_FLDL>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", [], IIC_F2XM1>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", [], IIC_FYL2X>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", [], IIC_FPTAN>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", [], IIC_FPATAN>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", [], IIC_FXTRACT>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", [], IIC_FPREM1>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", [], IIC_FPSTP>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", [], IIC_FPSTP>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", [], IIC_FPREM>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", [], IIC_FYL2XP1>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", [], IIC_FSINCOS>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", [], IIC_FRNDINT>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", [], IIC_FSCALE>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", [], IIC_FCOMPP>, DE;
+
+def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsave\t$dst", [], IIC_FXSAVE>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", [], IIC_FXSAVE>, TB, REX_W,
+ Requires<[In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+ RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+ RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+ Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack. We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+ Requires<[FPStackf64]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 000000000000..0ef9491eb7fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,658 @@
+//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
+def MRM_D4 : Format<47>;
+def MRM_D5 : Format<48>;
+def MRM_D6 : Format<49>;
+def MRM_D8 : Format<50>;
+def MRM_D9 : Format<51>;
+def MRM_DA : Format<52>;
+def MRM_DB : Format<53>;
+def MRM_DC : Format<54>;
+def MRM_DD : Format<55>;
+def MRM_DE : Format<56>;
+def MRM_DF : Format<57>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm8PCRel : ImmType<2>;
+def Imm16 : ImmType<3>;
+def Imm16PCRel : ImmType<4>;
+def Imm32 : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64 : ImmType<7>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class SegFS { bits<2> SegOvrBits = 1; }
+class SegGS { bits<2> SegOvrBits = 2; }
+class TB { bits<5> Prefix = 1; }
+class REP { bits<5> Prefix = 2; }
+class D8 { bits<5> Prefix = 3; }
+class D9 { bits<5> Prefix = 4; }
+class DA { bits<5> Prefix = 5; }
+class DB { bits<5> Prefix = 6; }
+class DC { bits<5> Prefix = 7; }
+class DD { bits<5> Prefix = 8; }
+class DE { bits<5> Prefix = 9; }
+class DF { bits<5> Prefix = 10; }
+class XD { bits<5> Prefix = 11; }
+class XS { bits<5> Prefix = 12; }
+class T8 { bits<5> Prefix = 13; }
+class TA { bits<5> Prefix = 14; }
+class A6 { bits<5> Prefix = 15; }
+class A7 { bits<5> Prefix = 16; }
+class T8XD { bits<5> Prefix = 17; }
+class T8XS { bits<5> Prefix = 18; }
+class TAXD { bits<5> Prefix = 19; }
+class XOP8 { bits<5> Prefix = 20; }
+class XOP9 { bits<5> Prefix = 21; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_4VOp3 : VEX { bit hasVEX_4VOp3Prefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
+class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
+class MemOp4 { bit hasMemOp4Prefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+ string AsmStr,
+ InstrItinClass itin,
+ Domain d = GenericDomain>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ string AsmString = AsmStr;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ let Itinerary = itin;
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<5> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
+ FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
+ bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
+ bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_4VOp3Prefix = 0; // Does this inst require the VEX.VVVV field to
+ // encode the third operand?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
+ // to be encoded in a immediate field?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
+ bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+ bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands
+ bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
+
+ // TSFlags layout should be kept in sync with X86InstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+ let TSFlags{6} = hasOpSizePrefix;
+ let TSFlags{7} = hasAdSizePrefix;
+ let TSFlags{12-8} = Prefix;
+ let TSFlags{13} = hasREX_WPrefix;
+ let TSFlags{16-14} = ImmT.Value;
+ let TSFlags{19-17} = FPForm.Value;
+ let TSFlags{20} = hasLockPrefix;
+ let TSFlags{22-21} = SegOvrBits;
+ let TSFlags{24-23} = ExeDomain.Value;
+ let TSFlags{32-25} = Opcode;
+ let TSFlags{33} = hasVEXPrefix;
+ let TSFlags{34} = hasVEX_WPrefix;
+ let TSFlags{35} = hasVEX_4VPrefix;
+ let TSFlags{36} = hasVEX_4VOp3Prefix;
+ let TSFlags{37} = hasVEX_i8ImmReg;
+ let TSFlags{38} = hasVEX_L;
+ let TSFlags{39} = ignoresVEX_L;
+ let TSFlags{40} = has3DNow0F0FOpcode;
+ let TSFlags{41} = hasMemOp4Prefix;
+ let TSFlags{42} = hasXOP_Prefix;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> {
+ let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
+ Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
+ Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, [], itin> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
+ InstrItinClass itin = NoItinerary>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
+ let FPForm = fp;
+ let Pattern = pattern;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+// Iseg16 - 16-bit segment selector, 16-bit offset
+// Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+def __xs : XS;
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin, Domain d>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin, Domain d>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
+ let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with TB prefix in AVX form.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
+ Requires<[UseSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
+ Requires<[UseSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
+ Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// S2SI - SSE2 instructions with XS prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+// VSDI - SSE2 instructions with XD prefix in AVX form.
+// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
+// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+// MMX operands.
+// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+// MMX operands.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
+ Requires<[UseSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
+ Requires<[UseSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
+ Requires<[HasAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
+ OpSize, Requires<[HasAVX]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
+ Requires<[UseSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
+ Requires<[UseSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
+ Requires<[UseSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[UseSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[UseSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[UseSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[UseSSE42]>;
+
+// SS42FI - SSE 4.2 instructions with T8XD prefix.
+// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
+
+// SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[UseSSE42]>;
+
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
+// AVX2 Instruction Templates:
+// Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+// AVX28I - AVX2 instructions with T8 and OpSize prefix.
+// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX2]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[HasAES]>;
+
+// PCLMUL Instruction Templates
+class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, Requires<[HasPCLMUL]>;
+
+class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, T8,
+ OpSize, VEX_4V, FMASC, Requires<[HasFMA]>;
+
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+ XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+ XOP, XOP8, Requires<[HasXOP]>;
+
+// XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
+class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 000000000000..2a72fb6f7b2a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,432 @@
+//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
+def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
+def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
+def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
+def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
+def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
+def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
+def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86andnp : SDNode<"X86ISD::ANDNP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psign : SDNode<"X86ISD::PSIGN",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+
+def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisOpSmallerThanOp<1, 0> ]>>;
+
+def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vzext : SDNode<"X86ISD::VZEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vsext : SDNode<"X86ISD::VSEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vfpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+def X86vfpround: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+
+def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
+def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
+def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+
+def X86vshl : SDNode<"X86ISD::VSHL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+def X86vsrl : SDNode<"X86ISD::VSRL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+def X86vsra : SDNode<"X86ISD::VSRA",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+
+def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
+def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
+def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
+def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<1,2>]>>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+
+def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+
+def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
+
+def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
+def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
+
+def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
+def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+
+def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
+ SDTCisVT<4, i8>]>;
+def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
+ SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
+ SDTCisVT<6, i8>]>;
+
+def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
+def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+// 128-bit load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// 256-bit load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// 128-/256-bit extload pattern fragments
+def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+
+// Like 'store', but always requires 128-bit vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'store', but always requires 256-bit vector alignment.
+def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+// Like 'load', but always requires 128-bit vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+ return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires 256-bit vector alignment.
+def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+ (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+ (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+ (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+ (v2f64 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+ (v2i64 (alignedload node:$ptr))>;
+
+// 256-bit aligned load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload256 node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload256 node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload256 node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+
+// 256-bit memop pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
+// 128-bit bitconvert pattern fragments
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+// 256-bit bitconvert pattern fragments
+def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
+def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 000000000000..7ba542c87520
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,4898 @@
+//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include <limits>
+
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
+
+enum {
+ // Select which memory operand is being unfolded.
+ // (stored in bits 0 - 3)
+ TB_INDEX_0 = 0,
+ TB_INDEX_1 = 1,
+ TB_INDEX_2 = 2,
+ TB_INDEX_3 = 3,
+ TB_INDEX_MASK = 0xf,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 4,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 5,
+
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
+};
+
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint16_t Flags;
+};
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKDOWN64
+ : X86::ADJCALLSTACKDOWN32),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKUP64
+ : X86::ADJCALLSTACKUP32)),
+ TM(tm), RI(tm, *this) {
+
+ static const X86OpTblEntry OpTbl2Addr[] = {
+ { X86::ADC32ri, X86::ADC32mi, 0 },
+ { X86::ADC32ri8, X86::ADC32mi8, 0 },
+ { X86::ADC32rr, X86::ADC32mr, 0 },
+ { X86::ADC64ri32, X86::ADC64mi32, 0 },
+ { X86::ADC64ri8, X86::ADC64mi8, 0 },
+ { X86::ADC64rr, X86::ADC64mr, 0 },
+ { X86::ADD16ri, X86::ADD16mi, 0 },
+ { X86::ADD16ri8, X86::ADD16mi8, 0 },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16rr, X86::ADD16mr, 0 },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri, X86::ADD32mi, 0 },
+ { X86::ADD32ri8, X86::ADD32mi8, 0 },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32mr, 0 },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32, X86::ADD64mi32, 0 },
+ { X86::ADD64ri8, X86::ADD64mi8, 0 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64mr, 0 },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri, X86::ADD8mi, 0 },
+ { X86::ADD8rr, X86::ADD8mr, 0 },
+ { X86::AND16ri, X86::AND16mi, 0 },
+ { X86::AND16ri8, X86::AND16mi8, 0 },
+ { X86::AND16rr, X86::AND16mr, 0 },
+ { X86::AND32ri, X86::AND32mi, 0 },
+ { X86::AND32ri8, X86::AND32mi8, 0 },
+ { X86::AND32rr, X86::AND32mr, 0 },
+ { X86::AND64ri32, X86::AND64mi32, 0 },
+ { X86::AND64ri8, X86::AND64mi8, 0 },
+ { X86::AND64rr, X86::AND64mr, 0 },
+ { X86::AND8ri, X86::AND8mi, 0 },
+ { X86::AND8rr, X86::AND8mr, 0 },
+ { X86::DEC16r, X86::DEC16m, 0 },
+ { X86::DEC32r, X86::DEC32m, 0 },
+ { X86::DEC64_16r, X86::DEC64_16m, 0 },
+ { X86::DEC64_32r, X86::DEC64_32m, 0 },
+ { X86::DEC64r, X86::DEC64m, 0 },
+ { X86::DEC8r, X86::DEC8m, 0 },
+ { X86::INC16r, X86::INC16m, 0 },
+ { X86::INC32r, X86::INC32m, 0 },
+ { X86::INC64_16r, X86::INC64_16m, 0 },
+ { X86::INC64_32r, X86::INC64_32m, 0 },
+ { X86::INC64r, X86::INC64m, 0 },
+ { X86::INC8r, X86::INC8m, 0 },
+ { X86::NEG16r, X86::NEG16m, 0 },
+ { X86::NEG32r, X86::NEG32m, 0 },
+ { X86::NEG64r, X86::NEG64m, 0 },
+ { X86::NEG8r, X86::NEG8m, 0 },
+ { X86::NOT16r, X86::NOT16m, 0 },
+ { X86::NOT32r, X86::NOT32m, 0 },
+ { X86::NOT64r, X86::NOT64m, 0 },
+ { X86::NOT8r, X86::NOT8m, 0 },
+ { X86::OR16ri, X86::OR16mi, 0 },
+ { X86::OR16ri8, X86::OR16mi8, 0 },
+ { X86::OR16rr, X86::OR16mr, 0 },
+ { X86::OR32ri, X86::OR32mi, 0 },
+ { X86::OR32ri8, X86::OR32mi8, 0 },
+ { X86::OR32rr, X86::OR32mr, 0 },
+ { X86::OR64ri32, X86::OR64mi32, 0 },
+ { X86::OR64ri8, X86::OR64mi8, 0 },
+ { X86::OR64rr, X86::OR64mr, 0 },
+ { X86::OR8ri, X86::OR8mi, 0 },
+ { X86::OR8rr, X86::OR8mr, 0 },
+ { X86::ROL16r1, X86::ROL16m1, 0 },
+ { X86::ROL16rCL, X86::ROL16mCL, 0 },
+ { X86::ROL16ri, X86::ROL16mi, 0 },
+ { X86::ROL32r1, X86::ROL32m1, 0 },
+ { X86::ROL32rCL, X86::ROL32mCL, 0 },
+ { X86::ROL32ri, X86::ROL32mi, 0 },
+ { X86::ROL64r1, X86::ROL64m1, 0 },
+ { X86::ROL64rCL, X86::ROL64mCL, 0 },
+ { X86::ROL64ri, X86::ROL64mi, 0 },
+ { X86::ROL8r1, X86::ROL8m1, 0 },
+ { X86::ROL8rCL, X86::ROL8mCL, 0 },
+ { X86::ROL8ri, X86::ROL8mi, 0 },
+ { X86::ROR16r1, X86::ROR16m1, 0 },
+ { X86::ROR16rCL, X86::ROR16mCL, 0 },
+ { X86::ROR16ri, X86::ROR16mi, 0 },
+ { X86::ROR32r1, X86::ROR32m1, 0 },
+ { X86::ROR32rCL, X86::ROR32mCL, 0 },
+ { X86::ROR32ri, X86::ROR32mi, 0 },
+ { X86::ROR64r1, X86::ROR64m1, 0 },
+ { X86::ROR64rCL, X86::ROR64mCL, 0 },
+ { X86::ROR64ri, X86::ROR64mi, 0 },
+ { X86::ROR8r1, X86::ROR8m1, 0 },
+ { X86::ROR8rCL, X86::ROR8mCL, 0 },
+ { X86::ROR8ri, X86::ROR8mi, 0 },
+ { X86::SAR16r1, X86::SAR16m1, 0 },
+ { X86::SAR16rCL, X86::SAR16mCL, 0 },
+ { X86::SAR16ri, X86::SAR16mi, 0 },
+ { X86::SAR32r1, X86::SAR32m1, 0 },
+ { X86::SAR32rCL, X86::SAR32mCL, 0 },
+ { X86::SAR32ri, X86::SAR32mi, 0 },
+ { X86::SAR64r1, X86::SAR64m1, 0 },
+ { X86::SAR64rCL, X86::SAR64mCL, 0 },
+ { X86::SAR64ri, X86::SAR64mi, 0 },
+ { X86::SAR8r1, X86::SAR8m1, 0 },
+ { X86::SAR8rCL, X86::SAR8mCL, 0 },
+ { X86::SAR8ri, X86::SAR8mi, 0 },
+ { X86::SBB32ri, X86::SBB32mi, 0 },
+ { X86::SBB32ri8, X86::SBB32mi8, 0 },
+ { X86::SBB32rr, X86::SBB32mr, 0 },
+ { X86::SBB64ri32, X86::SBB64mi32, 0 },
+ { X86::SBB64ri8, X86::SBB64mi8, 0 },
+ { X86::SBB64rr, X86::SBB64mr, 0 },
+ { X86::SHL16rCL, X86::SHL16mCL, 0 },
+ { X86::SHL16ri, X86::SHL16mi, 0 },
+ { X86::SHL32rCL, X86::SHL32mCL, 0 },
+ { X86::SHL32ri, X86::SHL32mi, 0 },
+ { X86::SHL64rCL, X86::SHL64mCL, 0 },
+ { X86::SHL64ri, X86::SHL64mi, 0 },
+ { X86::SHL8rCL, X86::SHL8mCL, 0 },
+ { X86::SHL8ri, X86::SHL8mi, 0 },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
+ { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
+ { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
+ { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
+ { X86::SHR16r1, X86::SHR16m1, 0 },
+ { X86::SHR16rCL, X86::SHR16mCL, 0 },
+ { X86::SHR16ri, X86::SHR16mi, 0 },
+ { X86::SHR32r1, X86::SHR32m1, 0 },
+ { X86::SHR32rCL, X86::SHR32mCL, 0 },
+ { X86::SHR32ri, X86::SHR32mi, 0 },
+ { X86::SHR64r1, X86::SHR64m1, 0 },
+ { X86::SHR64rCL, X86::SHR64mCL, 0 },
+ { X86::SHR64ri, X86::SHR64mi, 0 },
+ { X86::SHR8r1, X86::SHR8m1, 0 },
+ { X86::SHR8rCL, X86::SHR8mCL, 0 },
+ { X86::SHR8ri, X86::SHR8mi, 0 },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
+ { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
+ { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
+ { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
+ { X86::SUB16ri, X86::SUB16mi, 0 },
+ { X86::SUB16ri8, X86::SUB16mi8, 0 },
+ { X86::SUB16rr, X86::SUB16mr, 0 },
+ { X86::SUB32ri, X86::SUB32mi, 0 },
+ { X86::SUB32ri8, X86::SUB32mi8, 0 },
+ { X86::SUB32rr, X86::SUB32mr, 0 },
+ { X86::SUB64ri32, X86::SUB64mi32, 0 },
+ { X86::SUB64ri8, X86::SUB64mi8, 0 },
+ { X86::SUB64rr, X86::SUB64mr, 0 },
+ { X86::SUB8ri, X86::SUB8mi, 0 },
+ { X86::SUB8rr, X86::SUB8mr, 0 },
+ { X86::XOR16ri, X86::XOR16mi, 0 },
+ { X86::XOR16ri8, X86::XOR16mi8, 0 },
+ { X86::XOR16rr, X86::XOR16mr, 0 },
+ { X86::XOR32ri, X86::XOR32mi, 0 },
+ { X86::XOR32ri8, X86::XOR32mi8, 0 },
+ { X86::XOR32rr, X86::XOR32mr, 0 },
+ { X86::XOR64ri32, X86::XOR64mi32, 0 },
+ { X86::XOR64ri8, X86::XOR64mi8, 0 },
+ { X86::XOR64rr, X86::XOR64mr, 0 },
+ { X86::XOR8ri, X86::XOR8mi, 0 },
+ { X86::XOR8rr, X86::XOR8mr, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
+ AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 0, folded load and store, no alignment requirement.
+ Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ }
+
+ static const X86OpTblEntry OpTbl0[] = {
+ { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
+ { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
+ { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
+ { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
+ { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
+ { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
+ { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
+ { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
+ { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
+ { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
+ { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
+ { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
+ { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
+ { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
+ { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
+ { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
+ { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
+ { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
+ { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
+ { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
+ { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
+ { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
+ { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
+ { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
+ { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
+ { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
+ { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
+ { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
+ { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
+ { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
+ { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
+ { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
+ { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
+ { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
+ { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
+ { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+ { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
+ { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
+ { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
+ { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
+ { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
+ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
+ { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
+ { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
+ { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
+ { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
+ { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
+ { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
+ { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
+ { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
+ { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
+ { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
+ { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
+ { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
+ { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
+ { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
+ { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
+ { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
+ { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
+ { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
+ { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
+ { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
+ { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+ { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
+ // AVX 256-bit foldable instructions
+ { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
+ AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
+ RegOp, MemOp, TB_INDEX_0 | Flags);
+ }
+
+ static const X86OpTblEntry OpTbl1[] = {
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
+ { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
+ { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
+ { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
+ { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
+ { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
+ { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
+ { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
+ { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
+ { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
+ { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
+ { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
+ { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
+ { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
+ { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
+ { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPABSBrr128, X86::VPABSBrm128, 0 },
+ { X86::VPABSDrr128, X86::VPABSDrm128, 0 },
+ { X86::VPABSWrr128, X86::VPABSWrm128, 0 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
+ { X86::VRCPPSr, X86::VRCPPSm, 0 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
+ { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
+ { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
+
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+
+ // AVX2 foldable instructions
+ { X86::VPABSBrr256, X86::VPABSBrm256, 0 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, 0 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, 0 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
+
+ // BMI/BMI2/LZCNT/POPCNT foldable instructions
+ { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
+ { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BLSI32rr, X86::BLSI32rm, 0 },
+ { X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
+ { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
+ { X86::BLSR32rr, X86::BLSR32rm, 0 },
+ { X86::BLSR64rr, X86::BLSR64rm, 0 },
+ { X86::BZHI32rr, X86::BZHI32rm, 0 },
+ { X86::BZHI64rr, X86::BZHI64rm, 0 },
+ { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
+ { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
+ { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
+ { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
+ { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
+ { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
+ { X86::RORX32ri, X86::RORX32mi, 0 },
+ { X86::RORX64ri, X86::RORX64mi, 0 },
+ { X86::SARX32rr, X86::SARX32rm, 0 },
+ { X86::SARX64rr, X86::SARX64rm, 0 },
+ { X86::SHRX32rr, X86::SHRX32rm, 0 },
+ { X86::SHRX64rr, X86::SHRX64rm, 0 },
+ { X86::SHLX32rr, X86::SHLX32rm, 0 },
+ { X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
+ { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
+ { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
+ AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 1, folded load
+ Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
+ }
+
+ static const X86OpTblEntry OpTbl2[] = {
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
+ { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
+ { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
+ { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
+ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
+ { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
+ { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
+ { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
+ { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
+ { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
+ { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
+ { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
+ { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
+ { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
+ { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
+ { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
+ { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
+ { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
+ { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
+ { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
+ { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
+ { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
+ { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
+ { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
+ { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
+ { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
+ { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
+ { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
+ { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
+ { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
+ { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
+ { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
+ { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
+ { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
+ { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
+ { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
+ { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
+ { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
+ { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
+ { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
+ { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
+ { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
+ { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
+ { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
+ { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
+ { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
+ { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
+ { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
+ { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
+ { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
+ { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
+ { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
+ { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
+ { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
+ { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
+ { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
+ { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
+ { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
+ { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
+ { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
+ { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
+ { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
+ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
+ { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
+ { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
+ { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
+ { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
+ { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
+ { X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPSrr, X86::VADDPSrm, 0 },
+ { X86::VADDSDrr, X86::VADDSDrm, 0 },
+ { X86::VADDSSrr, X86::VADDSSrm, 0 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
+ { X86::VANDPDrr, X86::VANDPDrm, 0 },
+ { X86::VANDPSrr, X86::VANDPSrm, 0 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
+ { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
+ { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
+ { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
+ { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
+ { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
+ { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 },
+ { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 },
+ { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 },
+ { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 },
+ { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
+ { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
+ { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
+ { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
+ { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
+ { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
+ { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
+ { X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPSrr, X86::VMINPSrm, 0 },
+ { X86::VMINSDrr, X86::VMINSDrm, 0 },
+ { X86::VMINSSrr, X86::VMINSSrm, 0 },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPSrr, X86::VMULPSrm, 0 },
+ { X86::VMULSDrr, X86::VMULSDrm, 0 },
+ { X86::VMULSSrr, X86::VMULSSrm, 0 },
+ { X86::VORPDrr, X86::VORPDrm, 0 },
+ { X86::VORPSrr, X86::VORPSrm, 0 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
+ { X86::VPADDBrr, X86::VPADDBrm, 0 },
+ { X86::VPADDDrr, X86::VPADDDrm, 0 },
+ { X86::VPADDQrr, X86::VPADDQrm, 0 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
+ { X86::VPADDWrr, X86::VPADDWrm, 0 },
+ { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
+ { X86::VPANDNrr, X86::VPANDNrm, 0 },
+ { X86::VPANDrr, X86::VPANDrm, 0 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
+ { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
+ { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
+ { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
+ { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
+ { X86::VPORrr, X86::VPORrm, 0 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
+ { X86::VPSRADrr, X86::VPSRADrm, 0 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
+ { X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
+ { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
+ { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
+ { X86::VXORPDrr, X86::VXORPDrm, 0 },
+ { X86::VXORPSrr, X86::VXORPSrm, 0 },
+ // AVX 256-bit foldable instructions
+ { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
+ { X86::VORPDYrr, X86::VORPDYrm, 0 },
+ { X86::VORPSYrr, X86::VORPSYrm, 0 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
+ // AVX2 foldable instructions
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
+ { X86::VPANDYrr, X86::VPANDYrm, 0 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
+ { X86::VPERMQYri, X86::VPERMQYmi, 0 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
+ { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
+ { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
+ { X86::VPORYrr, X86::VPORYrm, 0 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
+ { X86::VPXORYrr, X86::VPXORYrm, 0 },
+ // FIXME: add AVX 256-bit foldable instructions
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
+
+ // BMI/BMI2 foldable instructions
+ { X86::ANDN32rr, X86::ANDN32rm, 0 },
+ { X86::ANDN64rr, X86::ANDN64rm, 0 },
+ { X86::MULX32rr, X86::MULX32rm, 0 },
+ { X86::MULX64rr, X86::MULX64rm, 0 },
+ { X86::PDEP32rr, X86::PDEP32rm, 0 },
+ { X86::PDEP64rr, X86::PDEP64rm, 0 },
+ { X86::PEXT32rr, X86::PEXT32rm, 0 },
+ { X86::PEXT64rr, X86::PEXT64rm, 0 },
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
+ AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 2, folded load
+ Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ }
+
+ static const X86OpTblEntry OpTbl3[] = {
+ // FMA foldable instructions
+ { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
+ { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
+ { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
+ { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
+ { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 },
+
+ { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
+
+ { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
+ { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
+ { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
+ { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 },
+
+ { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
+
+ { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
+ { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
+ { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
+ { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 },
+
+ { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
+
+ { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
+ { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
+ { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
+ { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 },
+
+ { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
+
+ { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
+
+ { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
+ unsigned RegOp = OpTbl3[i].RegOp;
+ unsigned MemOp = OpTbl3[i].MemOp;
+ unsigned Flags = OpTbl3[i].Flags;
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 3, folded load
+ Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+ }
+
+}
+
+void
+X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags) {
+ if ((Flags & TB_NO_FORWARD) == 0) {
+ assert(!R2MTable.count(RegOp) && "Duplicate entry!");
+ R2MTable[RegOp] = std::make_pair(MemOp, Flags);
+ }
+ if ((Flags & TB_NO_REVERSE) == 0) {
+ assert(!M2RTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ M2RTable[MemOp] = std::make_pair(RegOp, Flags);
+ }
+}
+
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ // It's not always legal to reference the low 8-bit of the larger
+ // register in 32-bit mode.
+ return false;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32: {
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ SubIdx = X86::sub_8bit;
+ break;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ SubIdx = X86::sub_16bit;
+ break;
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32:
+ SubIdx = X86::sub_32bit;
+ break;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return true;
+ }
+}
+
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MOVDQAmr:
+ case X86::VMOVSSmr:
+ case X86::VMOVSDmr:
+ case X86::VMOVAPSmr:
+ case X86::VMOVAPDmr:
+ case X86::VMOVDQAmr:
+ case X86::VMOVAPSYmr:
+ case X86::VMOVAPDYmr:
+ case X86::VMOVDQAYmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
+ return MI->getOperand(X86::AddrNumOperands).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ // Don't waste compile time scanning use-def chains of physregs.
+ if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ return false;
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+ }
+
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// a few instructions in each direction it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 4 instructions in each direction, we will assume
+ // it's not safe.
+ MachineBasicBlock::iterator Iter = I;
+ for (unsigned i = 0; Iter != E && i < 4; ++i) {
+ bool SeenDef = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SeenDef = true;
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
+ }
+
+ // It is safe to clobber EFLAGS at the end of a block of no successor has it
+ // live in.
+ if (Iter == E) {
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ return true;
+ }
+
+ MachineBasicBlock::iterator B = MBB.begin();
+ Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ // If we make it to the beginning of the block, it's safe to clobber
+ // EFLAGS iff EFLAGS is not live-in.
+ if (Iter == B)
+ return !MBB.isLiveIn(X86::EFLAGS);
+
+ --Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
+ bool SawKill = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ // A register mask may clobber EFLAGS, but we should still look for a
+ // live EFLAGS def.
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SawKill = true;
+ if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+ if (MO.isDef()) return MO.isDead();
+ if (MO.isKill()) SawKill = true;
+ }
+ }
+
+ if (SawKill)
+ // This instruction kills EFLAGS and doesn't redefine it, so
+ // there's no need to look further.
+ return true;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ DebugLoc DL = Orig->getDebugLoc();
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // Re-materialize them as movri instructions to avoid side effects.
+ bool Clone = true;
+ unsigned Opc = Orig->getOpcode();
+ switch (Opc) {
+ default: break;
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ switch (Opc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
+ }
+ Clone = false;
+ }
+ break;
+ }
+ }
+
+ if (Clone) {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MBB.insert(I, MI);
+ } else {
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
+ }
+
+ MachineInstr *NewMI = prior(I);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ // All instructions input are two-addr instructions. Get the known operands.
+ const MachineOperand &Dest = MI->getOperand(0);
+ const MachineOperand &Src = MI->getOperand(1);
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
+ bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ unsigned MIOpc = MI->getOpcode();
+ switch (MIOpc) {
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addOperand(Dest).addOperand(Src).addImm(M);
+ break;
+ }
+ case X86::SHUFPDrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned M = MI->getOperand(3).getImm();
+
+ // Convert to PSHUFD mask.
+ M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addOperand(Dest).addOperand(Src).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR64_NOSPRegClass))
+ return 0;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR32_NOSPRegClass))
+ return 0;
+
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
+ break;
+ }
+ default: {
+ // The following opcodes also sets the condition code register(s). Only
+ // convert them to equivalent lea if the condition code register def's
+ // are dead!
+ if (hasLiveCondCodeDef(MI))
+ return 0;
+
+ switch (MIOpc) {
+ default: return 0;
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::INC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
+ return 0;
+
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), 1);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::DEC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
+ return 0;
+
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), -1);
+ break;
+ }
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = &X86::GR64_NOSPRegClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = &X86::GR32_NOSPRegClass;
+ }
+
+
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
+ break;
+ }
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
+ break;
+ }
+ }
+ }
+
+ if (!NewMI) return 0;
+
+ if (LV) { // Update live variables
+ if (Src.isKill())
+ LV->replaceKillInstruction(Src.getReg(), MI, NewMI);
+ if (Dest.isDead())
+ LV->replaceKillInstruction(Dest.getReg(), MI, NewMI);
+ }
+
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+ case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImm();
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
+ case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
+ case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
+ case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
+ case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
+ case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
+ case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
+ case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+ case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
+ case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
+ case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+ case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
+ case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+ case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
+ case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
+ case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
+ case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
+ case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+ case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+ case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+ case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
+ case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
+ case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
+ case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+ case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+ case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+ case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
+ case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
+ case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
+ case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+ case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+ case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+ case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+ case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+ case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+ case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
+ case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
+ case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
+ case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
+ case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
+ case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
+ case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+ case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+ case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+ case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
+ case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
+ case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
+ case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+ case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+ case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+ case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
+ case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
+ case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
+ case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+ case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+ case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+ }
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ // Fallthrough intended.
+ }
+ default:
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
+}
+
+static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE_4: return X86::COND_E;
+ case X86::JNE_4: return X86::COND_NE;
+ case X86::JL_4: return X86::COND_L;
+ case X86::JLE_4: return X86::COND_LE;
+ case X86::JG_4: return X86::COND_G;
+ case X86::JGE_4: return X86::COND_GE;
+ case X86::JB_4: return X86::COND_B;
+ case X86::JBE_4: return X86::COND_BE;
+ case X86::JA_4: return X86::COND_A;
+ case X86::JAE_4: return X86::COND_AE;
+ case X86::JS_4: return X86::COND_S;
+ case X86::JNS_4: return X86::COND_NS;
+ case X86::JP_4: return X86::COND_P;
+ case X86::JNP_4: return X86::COND_NP;
+ case X86::JO_4: return X86::COND_O;
+ case X86::JNO_4: return X86::COND_NO;
+ }
+}
+
+/// getCondFromSETOpc - return condition code of a SET opcode.
+static X86::CondCode getCondFromSETOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::SETAr: case X86::SETAm: return X86::COND_A;
+ case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
+ case X86::SETBr: case X86::SETBm: return X86::COND_B;
+ case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
+ case X86::SETEr: case X86::SETEm: return X86::COND_E;
+ case X86::SETGr: case X86::SETGm: return X86::COND_G;
+ case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
+ case X86::SETLr: case X86::SETLm: return X86::COND_L;
+ case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
+ case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
+ case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
+ case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
+ case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
+ case X86::SETOr: case X86::SETOm: return X86::COND_O;
+ case X86::SETPr: case X86::SETPm: return X86::COND_P;
+ case X86::SETSr: case X86::SETSm: return X86::COND_S;
+ }
+}
+
+/// getCondFromCmovOpc - return condition code of a CMov opcode.
+X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
+ case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
+ return X86::COND_A;
+ case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
+ return X86::COND_AE;
+ case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
+ case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
+ return X86::COND_B;
+ case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
+ return X86::COND_BE;
+ case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
+ case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
+ return X86::COND_E;
+ case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
+ case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
+ return X86::COND_G;
+ case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
+ case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
+ return X86::COND_GE;
+ case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
+ case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
+ return X86::COND_L;
+ case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
+ case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
+ return X86::COND_LE;
+ case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
+ return X86::COND_NE;
+ case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
+ case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
+ return X86::COND_NO;
+ case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
+ return X86::COND_NP;
+ case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
+ case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
+ return X86::COND_NS;
+ case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
+ case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
+ return X86::COND_O;
+ case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
+ case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
+ return X86::COND_P;
+ case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
+ case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
+ return X86::COND_S;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::JE_4;
+ case X86::COND_NE: return X86::JNE_4;
+ case X86::COND_L: return X86::JL_4;
+ case X86::COND_LE: return X86::JLE_4;
+ case X86::COND_G: return X86::JG_4;
+ case X86::COND_GE: return X86::JGE_4;
+ case X86::COND_B: return X86::JB_4;
+ case X86::COND_BE: return X86::JBE_4;
+ case X86::COND_A: return X86::JA_4;
+ case X86::COND_AE: return X86::JAE_4;
+ case X86::COND_S: return X86::JS_4;
+ case X86::COND_NS: return X86::JNS_4;
+ case X86::COND_P: return X86::JP_4;
+ case X86::COND_NP: return X86::JNP_4;
+ case X86::COND_O: return X86::JO_4;
+ case X86::COND_NO: return X86::JNO_4;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+static X86::CondCode getSwappedCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: return X86::COND_INVALID;
+ case X86::COND_E: return X86::COND_E;
+ case X86::COND_NE: return X86::COND_NE;
+ case X86::COND_L: return X86::COND_G;
+ case X86::COND_LE: return X86::COND_GE;
+ case X86::COND_G: return X86::COND_L;
+ case X86::COND_GE: return X86::COND_LE;
+ case X86::COND_B: return X86::COND_A;
+ case X86::COND_BE: return X86::COND_AE;
+ case X86::COND_A: return X86::COND_B;
+ case X86::COND_AE: return X86::COND_BE;
+ }
+}
+
+/// getSETFromCond - Return a set opcode for the given condition and
+/// whether it has memory operand.
+static unsigned getSETFromCond(X86::CondCode CC,
+ bool HasMemoryOperand) {
+ static const uint16_t Opc[16][2] = {
+ { X86::SETAr, X86::SETAm },
+ { X86::SETAEr, X86::SETAEm },
+ { X86::SETBr, X86::SETBm },
+ { X86::SETBEr, X86::SETBEm },
+ { X86::SETEr, X86::SETEm },
+ { X86::SETGr, X86::SETGm },
+ { X86::SETGEr, X86::SETGEm },
+ { X86::SETLr, X86::SETLm },
+ { X86::SETLEr, X86::SETLEm },
+ { X86::SETNEr, X86::SETNEm },
+ { X86::SETNOr, X86::SETNOm },
+ { X86::SETNPr, X86::SETNPm },
+ { X86::SETNSr, X86::SETNSm },
+ { X86::SETOr, X86::SETOm },
+ { X86::SETPr, X86::SETPm },
+ { X86::SETSr, X86::SETSm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ return Opc[CC][HasMemoryOperand ? 1 : 0];
+}
+
+/// getCMovFromCond - Return a cmov opcode for the given condition,
+/// register size in bytes, and operand type.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
+ static const uint16_t Opc[32][3] = {
+ { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
+ { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+ { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
+ { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+ { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
+ { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
+ { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+ { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
+ { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+ { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+ { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+ { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+ { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
+ { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
+ { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
+ { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
+ { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
+ { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
+ { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
+ { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
+ { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
+ { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
+ { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
+ { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
+ { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
+ { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
+ { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
+ { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
+ { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
+ { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
+ { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ unsigned Idx = HasMemoryOperand ? 16+CC : CC;
+ switch(RegBytes) {
+ default: llvm_unreachable("Illegal register size!");
+ case 2: return Opc[Idx][0];
+ case 4: return Opc[Idx][1];
+ case 8: return Opc[Idx][2];
+ }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
+ if (!I->isBranch())
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == X86::JMP_4) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditional destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+ // If we can modify the code and it ends in something like:
+ //
+ // jCC L1
+ // jmp L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Then we can change this to:
+ //
+ // jnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Which is a bit more efficient.
+ // We conditionally jump to the fall-through block.
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ unsigned JNCC = GetCondBranchFromCond(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB());
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+ .addMBB(TargetBB);
+
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ // Restart the analysis.
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ // If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
+ if ((OldBranchCode == X86::COND_NP &&
+ BranchCode == X86::COND_E) ||
+ (OldBranchCode == X86::COND_E &&
+ BranchCode == X86::COND_NP))
+ BranchCode = X86::COND_NP_OR_E;
+ else if ((OldBranchCode == X86::COND_P &&
+ BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE &&
+ BranchCode == X86::COND_P))
+ BranchCode = X86::COND_NE_OR_P;
+ else
+ return true;
+
+ // Update the MachineOperand.
+ Cond[0].setImm(BranchCode);
+ }
+
+ return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != X86::JMP_4 &&
+ getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+ switch (CC) {
+ case X86::COND_NP_OR_E:
+ // Synthesize NP_OR_E with two branches.
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+ ++Count;
+ break;
+ case X86::COND_NE_OR_P:
+ // Synthesize NE_OR_P with two branches.
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+ ++Count;
+ break;
+ default: {
+ unsigned Opc = GetCondBranchFromCond(CC);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ ++Count;
+ }
+ }
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ // Not all subtargets have cmov instructions.
+ if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ return false;
+ if (Cond.size() != 1)
+ return false;
+ // We cannot do the composite conditions, at least not in SSA form.
+ if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+ if (X86::GR16RegClass.hasSubClassEq(RC) ||
+ X86::GR32RegClass.hasSubClassEq(RC) ||
+ X86::GR64RegClass.hasSubClassEq(RC)) {
+ // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+ // Bridge. Probably Ivy Bridge as well.
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
+ return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert(Cond.size() == 1 && "Invalid Cond array");
+ unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+ MRI.getRegClass(DstReg)->getSize(),
+ false/*HasMemoryOperand*/);
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+ return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
+ bool HasAVX) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg))
+ // Copy from a VR128 register to a GR64 register.
+ return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
+ if (X86::VR64RegClass.contains(SrcReg))
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ // SrcReg(FR32) -> DestReg(GR32)
+ // SrcReg(GR32) -> DestReg(FR32)
+
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+
+ if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+
+ return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ Opc = X86::MOV8rr_NOREX;
+ // Both operands must be encodable without an REX prefix.
+ assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
+ "8-bit H register can not be copied outside GR8_NOREX");
+ } else
+ Opc = X86::MOV8rr;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
+ else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSYrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
+ BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ return;
+ }
+ if (X86::GR32RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
+ BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+ return;
+ }
+ }
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
+ return;
+ }
+ if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
+}
+
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ switch (RC->getSize()) {
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 1:
+ assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case 2:
+ assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case 4:
+ if (X86::GR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::FR32RegClass.hasSubClassEq(RC))
+ return load ?
+ (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
+ (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ if (X86::RFP32RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ llvm_unreachable("Unknown 4-byte regclass");
+ case 8:
+ if (X86::GR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::FR64RegClass.hasSubClassEq(RC))
+ return load ?
+ (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
+ (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ if (X86::VR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+ if (X86::RFP64RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ llvm_unreachable("Unknown 8-byte regclass");
+ case 10:
+ assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case 16: {
+ assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ?
+ (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
+ (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
+ else
+ return load ?
+ (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
+ (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
+ case 32:
+ assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+ else
+ return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+ }
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+bool X86InstrInfo::
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ // A SUB can be used to perform comparison.
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ SrcReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(1).getReg() != SrcReg) return false;
+ // Compare against zero.
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ }
+ return false;
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// This function can be extended later on.
+/// SrcReg, SrcRegs: register operands for FlagI.
+/// ImmValue: immediate for FlagI if it takes an immediate.
+inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if (((FlagI->getOpcode() == X86::CMP64rr &&
+ OI->getOpcode() == X86::SUB64rr) ||
+ (FlagI->getOpcode() == X86::CMP32rr &&
+ OI->getOpcode() == X86::SUB32rr)||
+ (FlagI->getOpcode() == X86::CMP16rr &&
+ OI->getOpcode() == X86::SUB16rr)||
+ (FlagI->getOpcode() == X86::CMP8rr &&
+ OI->getOpcode() == X86::SUB8rr)) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if (((FlagI->getOpcode() == X86::CMP64ri32 &&
+ OI->getOpcode() == X86::SUB64ri32) ||
+ (FlagI->getOpcode() == X86::CMP64ri8 &&
+ OI->getOpcode() == X86::SUB64ri8) ||
+ (FlagI->getOpcode() == X86::CMP32ri &&
+ OI->getOpcode() == X86::SUB32ri) ||
+ (FlagI->getOpcode() == X86::CMP32ri8 &&
+ OI->getOpcode() == X86::SUB32ri8) ||
+ (FlagI->getOpcode() == X86::CMP16ri &&
+ OI->getOpcode() == X86::SUB16ri) ||
+ (FlagI->getOpcode() == X86::CMP16ri8 &&
+ OI->getOpcode() == X86::SUB16ri8) ||
+ (FlagI->getOpcode() == X86::CMP8ri &&
+ OI->getOpcode() == X86::SUB8ri)) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// isDefConvertible - check whether the definition can be converted
+/// to remove a comparison against zero.
+inline static bool isDefConvertible(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
+ case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
+ case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
+ case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
+ case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
+ case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
+ case X86::DEC64_32r: case X86::DEC64_16r:
+ case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
+ case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
+ case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
+ case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
+ case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
+ case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
+ case X86::INC64_32r: case X86::INC64_16r:
+ case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
+ case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
+ case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
+ case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
+ case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
+ case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
+ case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
+ case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
+ case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
+ case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
+ case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
+ case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
+ case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
+ case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
+ case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
+ case X86::ANDN32rr: case X86::ANDN32rm:
+ case X86::ANDN64rr: case X86::ANDN64rm:
+ return true;
+ }
+}
+
+/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// operates on the same source operands and sets flags in the same way as
+/// Compare; remove Compare if possible.
+bool X86InstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Check whether we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
+ switch (CmpInstr->getOpcode()) {
+ default: break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with CMP.
+ switch (CmpInstr->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
+ case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
+ case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
+ case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
+ case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
+ case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
+ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
+ case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
+ case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+ case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
+ case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
+ case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
+ case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
+ case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
+ case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ }
+ CmpInstr->setDesc(get(NewOpcode));
+ CmpInstr->RemoveOperand(0);
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
+ }
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // CmpInstr is the first instruction of the BB.
+ MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+
+ // If we are comparing against zero, check whether we can use MI to update
+ // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
+ bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
+ if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
+ !isDefConvertible(MI)))
+ return false;
+
+ // We are searching for an earlier instruction that can make CmpInstr
+ // redundant and that instruction will be saved in Sub.
+ MachineInstr *Sub = NULL;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // We iterate backward, starting from the instruction before CmpInstr and
+ // stop when reaching the definition of a source register or done with the BB.
+ // RI points to the instruction before CmpInstr.
+ // If the definition is in this basic block, RE points to the definition;
+ // otherwise, RE is the rend of the basic block.
+ MachineBasicBlock::reverse_iterator
+ RI = MachineBasicBlock::reverse_iterator(I),
+ RE = CmpInstr->getParent() == MI->getParent() ?
+ MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
+ CmpInstr->getParent()->rend();
+ MachineInstr *Movr0Inst = 0;
+ for (; RI != RE; ++RI) {
+ MachineInstr *Instr = &*RI;
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (!IsCmpZero &&
+ isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ Sub = Instr;
+ break;
+ }
+
+ if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
+ Instr->readsRegister(X86::EFLAGS, TRI)) {
+ // This instruction modifies or uses EFLAGS.
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // They are safe to move up, if the definition to EFLAGS is dead and
+ // earlier instructions do not read or write EFLAGS.
+ if (!Movr0Inst && (Instr->getOpcode() == X86::MOV8r0 ||
+ Instr->getOpcode() == X86::MOV16r0 ||
+ Instr->getOpcode() == X86::MOV32r0 ||
+ Instr->getOpcode() == X86::MOV64r0) &&
+ Instr->registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = Instr;
+ continue;
+ }
+
+ // We can't remove CmpInstr.
+ return false;
+ }
+ }
+
+ // Return false if no candidates exist.
+ if (!IsCmpZero && !Sub)
+ return false;
+
+ bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
+
+ // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
+ // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
+ // If we are done with the basic block, we need to check whether EFLAGS is
+ // live-out.
+ bool IsSafe = false;
+ SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+ MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
+ for (++I; I != E; ++I) {
+ const MachineInstr &Instr = *I;
+ bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
+ bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
+ // We should check the usage if this instruction uses and updates EFLAGS.
+ if (!UseEFLAGS && ModifyEFLAGS) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again.
+ IsSafe = true;
+ break;
+ }
+ if (!UseEFLAGS && !ModifyEFLAGS)
+ continue;
+
+ // EFLAGS is used by this instruction.
+ X86::CondCode OldCC;
+ bool OpcIsSET = false;
+ if (IsCmpZero || IsSwapped) {
+ // We decode the condition code from opcode.
+ if (Instr.isBranch())
+ OldCC = getCondFromBranchOpc(Instr.getOpcode());
+ else {
+ OldCC = getCondFromSETOpc(Instr.getOpcode());
+ if (OldCC != X86::COND_INVALID)
+ OpcIsSET = true;
+ else
+ OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
+ }
+ if (OldCC == X86::COND_INVALID) return false;
+ }
+ if (IsCmpZero) {
+ switch (OldCC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ // CF and OF are used, we can't perform this optimization.
+ return false;
+ }
+ } else if (IsSwapped) {
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+ // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ // We swap the condition code and synthesize the new opcode.
+ X86::CondCode NewCC = getSwappedCondition(OldCC);
+ if (NewCC == X86::COND_INVALID) return false;
+
+ // Synthesize the new opcode.
+ bool HasMemoryOperand = Instr.hasOneMemOperand();
+ unsigned NewOpc;
+ if (Instr.isBranch())
+ NewOpc = GetCondBranchFromCond(NewCC);
+ else if(OpcIsSET)
+ NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
+ else {
+ unsigned DstReg = Instr.getOperand(0).getReg();
+ NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
+ HasMemoryOperand);
+ }
+
+ // Push the MachineInstr to OpsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // instructions will be modified.
+ OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+ }
+ if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
+ IsSafe = true;
+ break;
+ }
+ }
+
+ // If EFLAGS is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if ((IsCmpZero || IsSwapped) && !IsSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+
+ // The instruction to be updated is either Sub or MI.
+ Sub = IsCmpZero ? MI : Sub;
+ // Move Movr0Inst to the place right before Sub.
+ if (Movr0Inst) {
+ Sub->getParent()->remove(Movr0Inst);
+ Sub->getParent()->insert(MachineBasicBlock::iterator(Sub), Movr0Inst);
+ }
+
+ // Make sure Sub instruction defines EFLAGS and mark the def live.
+ unsigned LastOperand = Sub->getNumOperands() - 1;
+ assert(Sub->getNumOperands() >= 2 &&
+ Sub->getOperand(LastOperand).isReg() &&
+ Sub->getOperand(LastOperand).getReg() == X86::EFLAGS &&
+ "EFLAGS should be the last operand of SUB, ADD, OR, XOR, AND");
+ Sub->getOperand(LastOperand).setIsDef(true);
+ Sub->getOperand(LastOperand).setIsDead(false);
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of instructions in OpsToUpdate.
+ for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
+ OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+ return true;
+}
+
+/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// operand at the use. We fold the load instructions if load defines a virtual
+/// register, the virtual register is used once in the same BB, and the
+/// instructions in-between do not load or store, and have no side effects.
+MachineInstr* X86InstrInfo::
+optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ if (FoldAsLoadDefReg == 0)
+ return 0;
+ // To be conservative, if there exists another load, clear the load candidate.
+ if (MI->mayLoad()) {
+ FoldAsLoadDefReg = 0;
+ return 0;
+ }
+
+ // Check whether we can move DefMI here.
+ DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+ assert(DefMI);
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(this, 0, SawStore))
+ return 0;
+
+ // We try to commute MI if possible.
+ unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
+ for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+ return 0;
+
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand) return 0;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
+ if (Idx == 1) {
+ // MI was changed but it didn't help, commute it back!
+ commuteInstruction(MI, false);
+ return 0;
+ }
+
+ // Check whether we can commute MI and enable folding.
+ if (MI->isCommutable()) {
+ MachineInstr *NewMI = commuteInstruction(MI, false);
+ // Unable to commute.
+ if (!NewMI) return 0;
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
+/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined. This is
+/// used for mapping:
+/// %xmm4 = V_SET0
+/// to:
+/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
+///
+static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
+ const MCInstrDesc &Desc) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ unsigned Reg = MIB->getOperand(0).getReg();
+ MIB->setDesc(Desc);
+
+ // MachineInstr::addOperand() will insert explicit operands before any
+ // implicit operands.
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ // But we don't trust that.
+ assert(MIB->getOperand(1).getReg() == Reg &&
+ MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
+ return true;
+}
+
+bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ switch (MI->getOpcode()) {
+ case X86::SETB_C8r:
+ return Expand2AddrUndef(MIB, get(X86::SBB8rr));
+ case X86::SETB_C16r:
+ return Expand2AddrUndef(MIB, get(X86::SBB16rr));
+ case X86::SETB_C32r:
+ return Expand2AddrUndef(MIB, get(X86::SBB32rr));
+ case X86::SETB_C64r:
+ return Expand2AddrUndef(MIB, get(X86::SBB64rr));
+ case X86::V_SET0:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+ case X86::AVX_SET0:
+ assert(HasAVX && "AVX not supported");
+ return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ case X86::V_SETALLONES:
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ case X86::AVX2_SETALLONES:
+ return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+ case X86::TEST8ri_NOREX:
+ MI->setDesc(get(X86::TEST8ri));
+ return true;
+ }
+ return false;
+}
+
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ X86AddressMode AM;
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = FrameIx;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+ addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ // Create the base instruction with the memory operand as the first part.
+ // Omit the implicit operands, something BuildMI can't do.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+
+ // Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = MI->getDesc().getNumOperands()-2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ MIB.addOperand(MO);
+ }
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB.addOperand(MO);
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+ unsigned Opcode, unsigned OpNo,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI, const TargetInstrInfo &TII) {
+ // Omit the implicit operands, something BuildMI can't do.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI, unsigned i,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Align) const {
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
+ bool isTwoAddrFold = false;
+
+ // Atom favors register form of call. So, we do not fold loads into calls
+ // when X86Subtarget is Atom.
+ if (isCallRegIndirect &&
+ (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+ return NULL;
+ }
+
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV64r0: Opc = X86::MOV64mi32; break;
+ case X86::MOV32r0: Opc = X86::MOV32mi; break;
+ case X86::MOV16r0: Opc = X86::MOV16mi; break;
+ case X86::MOV8r0: Opc = X86::MOV8mi; break;
+ }
+ if (Opc)
+ NewMI = MakeM0Inst(*this, Opc, MOs, MI);
+ if (NewMI)
+ return NewMI;
+
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (i == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (i == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (i == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
+ if (I != OpcodeTablePtr->end()) {
+ unsigned Opcode = I->second.first;
+ unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
+ if (Align < MinAlign)
+ return NULL;
+ bool NarrowToMOV32rm = false;
+ if (Size) {
+ unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
+ if (Size < RCSize) {
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+ return NULL;
+ // If this is a 64-bit load, but the spill slot is 32, then we can do
+ // a 32-bit load which is implicitly zero-extended. This likely is due
+ // to liveintervalanalysis remat'ing a load from stack slot.
+ if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+ return NULL;
+ Opcode = X86::MOV32rm;
+ NarrowToMOV32rm = true;
+ }
+ }
+
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+ else
+ NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+ if (NarrowToMOV32rm) {
+ // If this is the special case where we use a MOV32rm to load a 32-bit
+ // value and zero-extend the top bits. Change the destination register
+ // to a 32-bit one.
+ unsigned DstReg = NewMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+ X86::sub_32bit));
+ else
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
+ }
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing && !MI->isCopy())
+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+ return NULL;
+}
+
+/// hasPartialRegUpdate - Return true for all instructions that only update
+/// the first 32 or 64-bits of the destination register and leave the rest
+/// unmodified. This can be used to avoid folding loads if the instructions
+/// only update part of the destination register, and the non-updated part is
+/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
+/// instructions breaks the partial register dependency and it can improve
+/// performance. e.g.:
+///
+/// movss (%rdi), %xmm0
+/// cvtss2sd %xmm0, %xmm0
+///
+/// Instead of
+/// cvtss2sd (%rdi), %xmm0
+///
+/// FIXME: This should be turned into a TSFlags.
+///
+static bool hasPartialRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::CVTSI2SSrr:
+ case X86::CVTSI2SS64rr:
+ case X86::CVTSI2SDrr:
+ case X86::CVTSI2SD64rr:
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
+ case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX encoded versions
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
+ case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+ return true;
+ }
+
+ return false;
+}
+
+/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// instructions we would like before a partial register update.
+unsigned X86InstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // If MI is marked as reading Reg, the partial register update is wanted.
+ const MachineOperand &MO = MI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MO.readsReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else {
+ if (MI->readsRegister(Reg, TRI))
+ return 0;
+ }
+
+ // If any of the preceding 16 instructions are reading Reg, insert a
+ // dependency breaking instruction. The magic number is based on a few
+ // Nehalem experiments.
+ return 16;
+}
+
+void X86InstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (X86::VR128RegClass.contains(Reg)) {
+ // These instructions are all floating point domain, so xorps is the best
+ // choice.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
+ .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // Use vxorps to clear the full ymm register.
+ // It wants to read and write the xmm sub-register.
+ unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
+ .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ } else
+ return;
+ MI->addRegisterKilled(Reg, TRI, true);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ unsigned RCSize = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+ }
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Size < RCSize)
+ return NULL;
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,4> MOs;
+ MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // Determine the alignment of the load.
+ unsigned Alignment = 0;
+ if (LoadMI->hasOneMemOperand())
+ Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+ else
+ switch (LoadMI->getOpcode()) {
+ case X86::AVX2_SETALLONES:
+ case X86::AVX_SET0:
+ Alignment = 32;
+ break;
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ Alignment = 16;
+ break;
+ case X86::FsFLD0SD:
+ Alignment = 8;
+ break;
+ case X86::FsFLD0SS:
+ Alignment = 4;
+ break;
+ default:
+ return 0;
+ }
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+ switch (LoadMI->getOpcode()) {
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX_SET0:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS: {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // Medium and large mode can't fold loads this way.
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Kernel)
+ return NULL;
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ PICBase = X86::RIP;
+ else
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return NULL;
+ }
+
+ // Create a constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ Type *Ty;
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
+ Ty = Type::getFloatTy(MF.getFunction()->getContext());
+ else if (Opc == X86::FsFLD0SD)
+ Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
+ else
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
+ const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ break;
+ }
+ default: {
+ if ((LoadMI->getOpcode() == X86::MOVSSrm ||
+ LoadMI->getOpcode() == X86::VMOVSSrm) &&
+ MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+ > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return NULL;
+ if ((LoadMI->getOpcode() == X86::MOVSDrm ||
+ LoadMI->getOpcode() == X86::VMOVSDrm) &&
+ MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+ > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return NULL;
+
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ break;
+ }
+ }
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
+ }
+ }
+
+ if (Ops.size() != 1)
+ return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: return true;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (OpNum == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
+ }
+
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
+ return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ if (UnfoldLoad && !FoldedLoad)
+ return false;
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
+ return false;
+ UnfoldStore &= FoldedStore;
+
+ const MCInstrDesc &MCID = get(Opc);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
+ SmallVector<MachineOperand,2> BeforeOps;
+ SmallVector<MachineOperand,2> AfterOps;
+ SmallVector<MachineOperand,4> ImpOps;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (i >= Index && i < Index + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (Op.isReg() && Op.isImplicit())
+ ImpOps.push_back(Op);
+ else if (i < Index)
+ BeforeOps.push_back(Op);
+ else if (i > Index)
+ AfterOps.push_back(Op);
+ }
+
+ // Emit the load instruction.
+ if (UnfoldLoad) {
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+ if (UnfoldStore) {
+ // Address operands cannot be marked isKill.
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
+ MachineOperand &MO = NewMIs[0]->getOperand(i);
+ if (MO.isReg())
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ // Emit the data processing instruction.
+ MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, DataMI);
+
+ if (FoldedStore)
+ MIB.addReg(Reg, RegState::Define);
+ for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+ MIB.addOperand(BeforeOps[i]);
+ if (FoldedLoad)
+ MIB.addReg(Reg);
+ for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+ MIB.addOperand(AfterOps[i]);
+ for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+ MachineOperand &MO = ImpOps[i];
+ MIB.addReg(MO.getReg(),
+ getDefRegState(MO.isDef()) |
+ RegState::Implicit |
+ getKillRegState(MO.isKill()) |
+ getDeadRegState(MO.isDead()) |
+ getUndefRegState(MO.isUndef()));
+ }
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ unsigned NewOpc;
+ switch (DataMI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::CMP64ri8:
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setDesc(get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
+
+ // Emit the store instruction.
+ if (UnfoldStore) {
+ const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+ }
+
+ return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ if (!N->isMachineOpcode())
+ return false;
+
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ const MCInstrDesc &MCID = get(Opc);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ unsigned NumDefs = MCID.NumDefs;
+ std::vector<SDValue> AddrOps;
+ std::vector<SDValue> BeforeOps;
+ std::vector<SDValue> AfterOps;
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumOps = N->getNumOperands();
+ for (unsigned i = 0; i != NumOps-1; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (i < Index-NumDefs)
+ BeforeOps.push_back(Op);
+ else if (i > Index-NumDefs)
+ AfterOps.push_back(Op);
+ }
+ SDValue Chain = N->getOperand(NumOps-1);
+ AddrOps.push_back(Chain);
+
+ // Emit the load instruction.
+ SDNode *Load = 0;
+ if (FoldedLoad) {
+ EVT VT = *RC->vt_begin();
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Load);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ // Emit the data processing instruction.
+ std::vector<EVT> VTs;
+ const TargetRegisterClass *DstRC = 0;
+ if (MCID.getNumDefs() > 0) {
+ DstRC = getRegClass(MCID, 0, &RI, MF);
+ VTs.push_back(*DstRC->vt_begin());
+ }
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
+ VTs.push_back(VT);
+ }
+ if (Load)
+ BeforeOps.push_back(SDValue(Load, 0));
+ std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
+ NewNodes.push_back(NewNode);
+
+ // Emit the store instruction.
+ if (FoldedStore) {
+ AddrOps.pop_back();
+ AddrOps.push_back(SDValue(NewNode, 0));
+ AddrOps.push_back(Chain);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
+ SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Store);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
+ if (I == MemOp2RegOpTable.end())
+ return 0;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
+ if (UnfoldLoad && !FoldedLoad)
+ return 0;
+ if (UnfoldStore && !FoldedStore)
+ return 0;
+ if (LoadRegIndex)
+ *LoadRegIndex = I->second.second & TB_INDEX_MASK;
+ return I->second.first;
+}
+
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const {
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ switch (Opc1) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+ switch (Opc2) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+
+ // Check if chain operands and base addresses match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(5) != Load2->getOperand(5))
+ return false;
+ // Segment operands should match as well.
+ if (Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+ // Scale should be 1, Index should be Reg0.
+ if (Load1->getOperand(1) == Load2->getOperand(1) &&
+ Load1->getOperand(2) == Load2->getOperand(2)) {
+ if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+ return false;
+
+ // Now let's examine the displacements.
+ if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+ isa<ConstantSDNode>(Load2->getOperand(3))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+ return true;
+ }
+ }
+ return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1);
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ if (Opc1 != Opc2)
+ return false; // FIXME: overly conservative?
+
+ switch (Opc1) {
+ default: break;
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return false;
+ }
+
+ EVT VT = Load1->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+ // have 16 of them to play with.
+ if (TM.getSubtargetImpl()->is64Bit()) {
+ if (NumLoads >= 3)
+ return false;
+ } else if (NumLoads) {
+ return false;
+ }
+ break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ if (NumLoads)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+ if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+ return true;
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // FIXME: Return false for x87 stack register classes for now. We can't
+ // allow any loads of these registers before FpGet_ST0_80.
+ return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+ RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const uint16_t ReplaceableInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+ // AVX 256-bit support
+ { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
+ { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
+ { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
+ { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
+ { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+};
+
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
+ { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
+ { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
+ { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
+ { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
+ { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
+ { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+ if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+ return ReplaceableInstrsAVX2[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ uint16_t validDomains = 0;
+ if (domain && lookup(MI->getOpcode(), domain))
+ validDomains = 0xe;
+ else if (domain && lookupAVX2(MI->getOpcode(), domain))
+ validDomains = hasAVX2 ? 0xe : 0x6;
+ return std::make_pair(domain, validDomains);
+}
+
+void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
+ if (!table) { // try the other table
+ assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ "256-bit vector operations only available in AVX2");
+ table = lookupAVX2(MI->getOpcode(), dom);
+ }
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(X86::NOOP);
+}
+
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDr:
+ case X86::SQRTPSm:
+ case X86::SQRTPSr:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX instructions with high latency
+ case X86::VDIVSDrm:
+ case X86::VDIVSDrm_Int:
+ case X86::VDIVSDrr:
+ case X86::VDIVSDrr_Int:
+ case X86::VDIVSSrm:
+ case X86::VDIVSSrm_Int:
+ case X86::VDIVSSrr:
+ case X86::VDIVSSrr_Int:
+ case X86::VSQRTPDm:
+ case X86::VSQRTPDr:
+ case X86::VSQRTPSm:
+ case X86::VSQRTPSr:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSSr:
+ return true;
+ }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ else
+ PC = GlobalBaseReg;
+
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
+ }
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ is64Bit ? X86::RAX : X86::EAX)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
+ ? &X86::GR64RegClass
+ : &X86::GR32RegClass);
+
+ // Insert a copy from RAX/EAX to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(is64Bit ? X86::RAX : X86::EAX);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 000000000000..260f054d69cb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,419 @@
+//===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches to
+ // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs.
+ COND_NE_OR_P,
+ COND_NP_OR_E,
+
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ // Turn CMov opcode into condition code.
+ CondCode getCondFromCMovOpc(unsigned Opc);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+} // end namespace X86;
+
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_DLLIMPORT: // dllimport stub.
+ case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOT: // normal GOT reference.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_GOTOFF: // isPICStyleGOT: local global.
+ case X86II::MO_GOT: // isPICStyleGOT: other global.
+ case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ case X86II::MO_TLVP: // ??? Pretty sure..
+ return true;
+ default:
+ return false;
+ }
+}
+
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImm() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isReg() &&
+ (MI->getOperand(Op+3).isImm() ||
+ MI->getOperand(Op+3).isGlobal() ||
+ MI->getOperand(Op+3).isCPI() ||
+ MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+5 <= MI->getNumOperands() &&
+ MI->getOperand(Op+4).isReg() &&
+ isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public X86GenInstrInfo {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+
+ /// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2, RegOp2MemOpTable3 - Load / store folding opcode maps.
+ ///
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > RegOp2MemOpTableType;
+ RegOp2MemOpTableType RegOp2MemOpTable2Addr;
+ RegOp2MemOpTableType RegOp2MemOpTable0;
+ RegOp2MemOpTableType RegOp2MemOpTable1;
+ RegOp2MemOpTableType RegOp2MemOpTable2;
+ RegOp2MemOpTableType RegOp2MemOpTable3;
+
+ /// MemOp2RegOpTable - Load / store unfolding opcode map.
+ ///
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
+ MemOp2RegOpTableType MemOp2RegOpTable;
+
+ static void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
+
+public:
+ explicit X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const;
+ virtual void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand(s). If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
+
+ /// canFoldMemoryOperand - Returns true if the specified load / store is
+ /// folding is possible.
+ virtual bool canFoldMemoryOperand(const MachineInstr*,
+ const SmallVectorImpl<unsigned> &) const;
+
+ /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+ /// a store or a load and a store into two or more instruction. If this is
+ /// possible, returns true as well as the new instructions by reference.
+ virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const;
+
+ /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+ /// index of the operand which will hold the register holding the loaded
+ /// value.
+ virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+ /// to determine if two loads are loading from the same base address. It
+ /// should only return true if the base pointers are the same and the
+ /// only differences between the two addresses are the offset. It also returns
+ /// the offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+ /// instruction that defines the specified register class.
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isReg()) return false;
+ return X86II::isX86_64ExtendedReg(MO.getReg());
+ }
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
+ unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
+
+ bool isHighLatencyDef(int opc) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const;
+
+ /// optimizeCompareInstr - Check if there exists an earlier instruction that
+ /// operates on the same source operands and sets flags in the same way as
+ /// Compare; remove Compare if possible.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// optimizeLoadInstr - Try to remove the load by folding it to a register
+ /// operand at the use. We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const;
+
+private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 000000000000..ccc1aa2e35a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,2197 @@
+//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
+
+def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisVT<1, iPTR>,
+ SDTCisVT<2, iPTR>]>;
+
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
+def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain,SDNPSideEffect]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
+
+def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain, SDNPVariadic]>;
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
+def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
+def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
+
+def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>;
+def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
+def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
+ [SDNPHasChain]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem"; let PredicateMethod = "isMem";
+}
+def X86Mem8AsmOperand : AsmOperandClass {
+ let Name = "Mem8"; let PredicateMethod = "isMem8";
+}
+def X86Mem16AsmOperand : AsmOperandClass {
+ let Name = "Mem16"; let PredicateMethod = "isMem16";
+}
+def X86Mem32AsmOperand : AsmOperandClass {
+ let Name = "Mem32"; let PredicateMethod = "isMem32";
+}
+def X86Mem64AsmOperand : AsmOperandClass {
+ let Name = "Mem64"; let PredicateMethod = "isMem64";
+}
+def X86Mem80AsmOperand : AsmOperandClass {
+ let Name = "Mem80"; let PredicateMethod = "isMem80";
+}
+def X86Mem128AsmOperand : AsmOperandClass {
+ let Name = "Mem128"; let PredicateMethod = "isMem128";
+}
+def X86Mem256AsmOperand : AsmOperandClass {
+ let Name = "Mem256"; let PredicateMethod = "isMem256";
+}
+
+// Gather mem operands
+def X86MemVX32Operand : AsmOperandClass {
+ let Name = "MemVX32"; let PredicateMethod = "isMemVX32";
+}
+def X86MemVY32Operand : AsmOperandClass {
+ let Name = "MemVY32"; let PredicateMethod = "isMemVY32";
+}
+def X86MemVX64Operand : AsmOperandClass {
+ let Name = "MemVX64"; let PredicateMethod = "isMemVX64";
+}
+def X86MemVY64Operand : AsmOperandClass {
+ let Name = "MemVY64"; let PredicateMethod = "isMemVY64";
+}
+
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86MemAsmOperand];
+}
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+def opaque512mem : X86MemOperand<"printopaquemem">;
+
+def i8mem : X86MemOperand<"printi8mem"> {
+ let ParserMatchClass = X86Mem8AsmOperand; }
+def i16mem : X86MemOperand<"printi16mem"> {
+ let ParserMatchClass = X86Mem16AsmOperand; }
+def i32mem : X86MemOperand<"printi32mem"> {
+ let ParserMatchClass = X86Mem32AsmOperand; }
+def i64mem : X86MemOperand<"printi64mem"> {
+ let ParserMatchClass = X86Mem64AsmOperand; }
+def i128mem : X86MemOperand<"printi128mem"> {
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def i256mem : X86MemOperand<"printi256mem"> {
+ let ParserMatchClass = X86Mem256AsmOperand; }
+def f32mem : X86MemOperand<"printf32mem"> {
+ let ParserMatchClass = X86Mem32AsmOperand; }
+def f64mem : X86MemOperand<"printf64mem"> {
+ let ParserMatchClass = X86Mem64AsmOperand; }
+def f80mem : X86MemOperand<"printf80mem"> {
+ let ParserMatchClass = X86Mem80AsmOperand; }
+def f128mem : X86MemOperand<"printf128mem"> {
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def f256mem : X86MemOperand<"printf256mem">{
+ let ParserMatchClass = X86Mem256AsmOperand; }
+
+// Gather mem operands
+def vx32mem : X86MemOperand<"printi32mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86MemVX32Operand; }
+def vy32mem : X86MemOperand<"printi32mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86MemVY32Operand; }
+def vx64mem : X86MemOperand<"printi64mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86MemVX64Operand; }
+def vy64mem : X86MemOperand<"printi64mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86MemVY64Operand; }
+}
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86Mem8AsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// GPRs available for tailcall.
+// It represents GR32_TC, GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
+ i32imm, i8imm);
+ let ParserMatchClass = X86Mem32AsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
+ let ParserMatchClass = X86Mem64AsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+let OperandType = "OPERAND_PCREL",
+ ParserMatchClass = X86AbsMemAsmOperand,
+ PrintMethod = "printPCRelImm" in {
+def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def AVXCC : Operand<i8> {
+ let PrintMethod = "printAVXCC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+class ImmZExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x000000FF]
+def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
+ let Name = "ImmZExtu32u8";
+}
+
+
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16> {
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32> {
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
+def u32u8imm : Operand<i32> {
+ let ParserMatchClass = ImmZExtu32u8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "printPCRelImm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+// Memory operands that use 64-bit pointers in both ILP32 and LP64.
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex],
+ []>;
+// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
+def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or,
+ frameindex, X86WrapperRIP],
+ []>;
+
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasCMov : Predicate<"Subtarget->hasCMov()">;
+def NoCMov : Predicate<"!Subtarget->hasCMov()">;
+
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
+
+def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
+def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
+def HasFMA : Predicate<"Subtarget->hasFMA()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP : Predicate<"Subtarget->hasXOP()">;
+def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
+def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
+def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
+def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI : Predicate<"Subtarget->hasBMI()">;
+def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
+def HasRTM : Predicate<"Subtarget->hasRTM()">;
+def HasHLE : Predicate<"Subtarget->hasHLE()">;
+def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
+def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<"!Mode64Bit", "32-bit mode">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<"Mode64Bit", "64-bit mode">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+ "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+ "TM.getCodeModel() == CodeModel::Kernel">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
+def OptForSize : Predicate<"OptForSize">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
+
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
+ def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
+ "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
+ def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+ "nop{l}\t$zero", [], IIC_NOP>, TB;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
+
+let SchedRW = [WriteALU] in {
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE : I<0xC9, RawFrm,
+ (outs), (ins), "leave", [], IIC_LEAVE>,
+ Requires<[In32BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", [], IIC_LEAVE>,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
+ IIC_POP_REG16>, OpSize;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
+ IIC_POP_REG>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
+ IIC_POP_REG>, OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", [],
+ IIC_POP_MEM>, OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
+ IIC_POP_REG>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
+ IIC_POP_MEM>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
+ Requires<[In32BitMode]>;
+} // mayLoad, SchedRW
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
+ IIC_PUSH_REG>, OpSize;
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
+ IIC_PUSH_REG>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
+ IIC_PUSH_REG>, OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+ IIC_PUSH_MEM>,
+ OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
+ IIC_PUSH_REG>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+ IIC_PUSH_MEM>;
+
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+ "push{l}\t$imm", [], IIC_PUSH_IMM>;
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize;
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{l}\t$imm", [], IIC_PUSH_IMM>;
+
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
+ OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
+ Requires<[In32BitMode]>;
+
+} // mayStore, SchedRW
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
+ IIC_POP_REG>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
+ IIC_POP_MEM>;
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
+ IIC_PUSH_REG>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
+ IIC_PUSH_MEM>;
+} // mayStore, SchedRW
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+ SchedRW = [WriteStore] in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
+ Requires<[In64BitMode]>, Sched<[WriteLoad]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
+ Requires<[In64BitMode]>, Sched<[WriteStore]>;
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
+ Requires<[In32BitMode]>;
+}
+
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
+} // Constraints = "$src = $dst", SchedRW
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
+ IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
+ IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+ Sched<[WriteShift]>;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
+ IIC_BSF>, TB, Sched<[WriteShift]>;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
+ TB, OpSize, Sched<[WriteShift]>;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
+ IIC_BSR>, TB,
+ OpSize, Sched<[WriteShiftLd]>;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+} // Defs = [EFLAGS]
+
+let SchedRW = [WriteMicrocoded] in {
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", [], IIC_MOVS>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", [], IIC_MOVS>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", [], IIC_MOVS>;
+}
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", [], IIC_STOS>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", [], IIC_STOS>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", [], IIC_STOS>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", [], IIC_STOS>;
+
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", [], IIC_SCAS>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", [], IIC_SCAS>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", [], IIC_SCAS>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", [], IIC_SCAS>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+let SchedRW = [WriteMove] in {
+let neverHasSideEffects = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)], IIC_MOV>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)], IIC_MOV>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)], IIC_MOV>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)], IIC_MOV>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
+}
+} // SchedRW
+
+let SchedRW = [WriteStore] in {
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
+
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
+ Requires<[In32BitMode]>;
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+ "mov{w}\t{$src, %ax|AL, $src}", [], IIC_MOV_MEM>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+ "mov{l}\t{$src, %eax|EAX, $src}", [], IIC_MOV_MEM>,
+ Requires<[In32BitMode]>;
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, AL}", [], IIC_MOV_MEM>,
+ Requires<[In32BitMode]>;
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, AL}", [], IIC_MOV_MEM>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
+ Requires<[In32BitMode]>;
+}
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
+*/
+
+
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))], IIC_MOV_MEM>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))], IIC_MOV_MEM>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
+}
+
+let SchedRW = [WriteStore] in {
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)], IIC_MOV_MEM>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)], IIC_MOV_MEM>;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
+ Sched<[WriteMove]>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
+ IIC_MOV_MEM>, Sched<[WriteStore]>;
+let mayLoad = 1, neverHasSideEffects = 1,
+ canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
+ IIC_MOV_MEM>, Sched<[WriteLoad]>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
+let Defs = [EFLAGS], Uses = [AH] in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
+ [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
+ IIC_AHF>; // AH = flags
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
+ OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now.
+
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi16 addr:$src1), GR16:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, OpSize, TB, Requires<[FastBTMem]>;
+ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi32 addr:$src1), GR32:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB, Requires<[FastBTMem]>;
+ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi64 addr:$src1), GR64:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
+ IIC_BT_RI>, OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))],
+ IIC_BT_RI>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
+ IIC_BT_RI>, TB;
+} // SchedRW
+
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+ ], IIC_BT_MI>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+ ], IIC_BT_MI>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
+
+let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
+def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+} // hasSideEffects = 0
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
+ InstrItinClass itin> {
+ let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
+ def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin>;
+ def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+ itin>;
+ }
+}
+
+defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
+
+// Swap between registers.
+let SchedRW = [WriteALU] in {
+let Constraints = "$val = $dst" in {
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
+ "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
+}
+
+// Swap between EAX and other registers.
+def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
+ "xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize;
+def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>,
+ Requires<[In32BitMode]>;
+// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding.
+// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP.
+def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>,
+ Requires<[In64BitMode]>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
+} // SchedRW
+
+let SchedRW = [WriteALU] in {
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
+ OpSize;
+def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
+def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
+ OpSize;
+def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
+
+}
+
+let SchedRW = [WriteALU] in {
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG8>, TB;
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB, OpSize;
+def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB;
+} // SchedRW
+
+let SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1 in {
+def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM8>, TB;
+def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB, OpSize;
+def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB;
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+ "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
+ TB, Requires<[HasCmpxchg16b]>;
+} // SchedRW
+
+
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
+}
+
+
+// String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
+}
+
+let SchedRW = [WriteSystem] in {
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
+}
+
+// Flag instructions
+let SchedRW = [WriteALU] in {
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
+
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
+
+// Table lookup instructions
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
+ Sched<[WriteLoad]>;
+
+let SchedRW = [WriteMicrocoded] in {
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
+ Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", [], IIC_AAD>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", [], IIC_AAM>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>,
+ Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
+ Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
+ Requires<[In32BitMode]>;
+} // SchedRW
+
+let SchedRW = [WriteSystem] in {
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>,
+ Requires<[In32BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
+ Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
+ Requires<[In32BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// MOVBE Instructions
+//
+let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
+ def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
+ OpSize, T8;
+ def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))], IIC_MOVBE>,
+ T8;
+ def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
+ T8;
+ }
+ let SchedRW = [WriteStore] in {
+ def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
+ OpSize, T8;
+ def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR32:$src), addr:$dst)], IIC_MOVBE>,
+ T8;
+ def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
+ T8;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND Instruction
+//
+let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
+ def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
+ "rdrand{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize, TB;
+ def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
+ "rdrand{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdrand))]>, TB;
+ def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
+ "rdrand{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdrand))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS] in {
+ def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins),
+ "rdseed{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB;
+ def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+ "rdseed{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB;
+ def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins),
+ "rdseed{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// LZCNT Instruction
+//
+let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
+ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// BMI Instructions
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
+ RegisterClass RC, X86MemOperand x86memop, SDNode OpNode,
+ PatFrag ld_frag> {
+ def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V;
+ def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>,
+ T8, VEX_4V;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
+ X86blsr, loadi32>;
+ defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
+ X86blsr, loadi64>, VEX_W;
+ defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
+ X86blsmsk, loadi32>;
+ defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
+ X86blsmsk, loadi64>, VEX_W;
+ defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
+ X86blsi, loadi32>;
+ defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
+ X86blsi, loadi64>, VEX_W;
+}
+
+multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int,
+ PatFrag ld_frag> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8, VEX_4VOp3;
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8, VEX_4VOp3;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem,
+ int_x86_bmi_bextr_32, loadi32>;
+ defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem,
+ int_x86_bmi_bextr_64, loadi64>, VEX_W;
+}
+
+let Predicates = [HasBMI2], Defs = [EFLAGS] in {
+ defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
+ int_x86_bmi_bzhi_32, loadi32>;
+ defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
+ int_x86_bmi_bzhi_64, loadi64>, VEX_W;
+}
+
+multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int,
+ PatFrag ld_frag> {
+ def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>,
+ VEX_4V;
+ def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, VEX_4V;
+}
+
+let Predicates = [HasBMI2] in {
+ defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+ int_x86_bmi_pdep_32, loadi32>, T8XD;
+ defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+ int_x86_bmi_pdep_64, loadi64>, T8XD, VEX_W;
+ defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+ int_x86_bmi_pext_32, loadi32>, T8XS;
+ defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+ int_x86_bmi_pext_64, loadi64>, T8XS, VEX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// Subsystems.
+//===----------------------------------------------------------------------===//
+
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// XOP
+include "X86InstrXOP.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+include "X86InstrSVM.td"
+
+include "X86InstrTSX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cwd", "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cdqe", "cltq">;
+def : MnemonicAlias<"cqo", "cqto">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl">;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+def : MnemonicAlias<"sysexit", "sysexitl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove">;
+def : MnemonicAlias<"fcmova", "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna", "fcmovbe">;
+def : MnemonicAlias<"fcmovae", "fcmovnb">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fistpq", "fistpll">;
+def : MnemonicAlias<"fisttpq", "fisttpll">;
+def : MnemonicAlias<"fldcww", "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
+def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
+def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcom", (COM_FST0r ST1)>;
+def : InstAlias<"fcomp", (COMP_FST0r ST1)>;
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+def : InstAlias<"fcompi", (COM_FIPr ST1)>;
+def : InstAlias<"fucom", (UCOM_Fr ST1)>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1)>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1)>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW16r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW16r)>;
+def : InstAlias<"fnstsw" , (FNSTSW16r)>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move. All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, CL
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
+
+/* FIXME: This is disabled because the asm matcher is currently incapable of
+ * matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
+def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>;
+def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 000000000000..49721df7c118
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,624 @@
+//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let Sched = WriteVecALU in {
+def MMX_INTALU_ITINS : OpndItins<
+ IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
+>;
+
+def MMX_INTALUQ_ITINS : OpndItins<
+ IIC_MMX_ALUQ_RR, IIC_MMX_ALUQ_RM
+>;
+
+def MMX_PHADDSUBW : OpndItins<
+ IIC_MMX_PHADDSUBW_RR, IIC_MMX_PHADDSUBW_RM
+>;
+
+def MMX_PHADDSUBD : OpndItins<
+ IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
+>;
+}
+
+let Sched = WriteVecIMul in
+def MMX_PMUL_ITINS : OpndItins<
+ IIC_MMX_PMUL, IIC_MMX_PMUL
+>;
+
+let Sched = WriteVecALU in {
+def MMX_PSADBW_ITINS : OpndItins<
+ IIC_MMX_PSADBW, IIC_MMX_PSADBW
+>;
+
+def MMX_MISC_FUNC_ITINS : OpndItins<
+ IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
+>;
+}
+
+def MMX_SHIFT_ITINS : ShiftOpndItins<
+ IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
+>;
+
+let Sched = WriteShuffle in {
+def MMX_UNPCK_H_ITINS : OpndItins<
+ IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
+>;
+
+def MMX_UNPCK_L_ITINS : OpndItins<
+ IIC_MMX_UNPCK_L, IIC_MMX_UNPCK_L
+>;
+
+def MMX_PCK_ITINS : OpndItins<
+ IIC_MMX_PCK_RR, IIC_MMX_PCK_RM
+>;
+
+def MMX_PSHUF_ITINS : OpndItins<
+ IIC_MMX_PSHUF, IIC_MMX_PSHUF
+>;
+} // Sched
+
+let Sched = WriteCvtF2I in {
+def MMX_CVT_PD_ITINS : OpndItins<
+ IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
+>;
+
+def MMX_CVT_PS_ITINS : OpndItins<
+ IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
+>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins, bit Commutable = 0> {
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]> {
+ let isCommutable = Commutable;
+ }
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, ShiftOpndItins itins> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[WriteVecShift]>;
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))],
+ itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
+ def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+ (ins VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+ Sched<[WriteVecShift]>;
+ }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, OpndItins itins> {
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
+
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))],
+ itins.rm>, Sched<[itins.Sched.Folded]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, OpndItins itins> {
+ let isCommutable = 0 in
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, OpndItins itins, Domain d> {
+ def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>,
+ Sched<[itins.Sched]>;
+ def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
+ NoItinerary, d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
+ NoItinerary, d>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+ [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector GR32:$src)))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
+let canFoldAsLoad = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
+ Sched<[WriteStore]>;
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (MMX_X86movd2w (x86mmx VR64:$src)))],
+ IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
+
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+let SchedRW = [WriteMove] in {
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+ (outs GR64:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>;
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+let SchedRW = [WriteLoad] in {
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))],
+ IIC_MMX_MOVQ_RM>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (x86mmx VR64:$src), addr:$dst)],
+ IIC_MMX_MOVQ_RM>;
+} // SchedRW
+
+let SchedRW = [WriteMove] in {
+def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))],
+ IIC_MMX_MOVQ_RR>;
+
+def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64
+ (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))],
+ IIC_MMX_MOVQ_RR>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
+
+def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movntq\t{$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
+ IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+ (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx
+ (scalar_to_vector (loadi32 addr:$src))))))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
+ MMX_INTALU_ITINS>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
+ MMX_INTALUQ_ITINS, 1>;
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
+ MMX_PHADDSUBW>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
+ MMX_PHADDSUBD>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
+ MMX_PHADDSUBW>;
+
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
+ MMX_INTALUQ_ITINS, 1>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
+ MMX_PHADDSUBW>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
+ MMX_PHADDSUBD>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
+ MMX_PHADDSUBW>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
+ MMX_PMUL_ITINS, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
+ MMX_PMUL_ITINS, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
+ MMX_PMUL_ITINS, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
+ MMX_PMUL_ITINS, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw, MMX_PMUL_ITINS>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
+ MMX_PMUL_ITINS, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>;
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
+ MMX_PSADBW_ITINS, 1>;
+
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
+ MMX_MISC_FUNC_ITINS>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
+ MMX_MISC_FUNC_ITINS>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
+ MMX_MISC_FUNC_ITINS>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
+ MMX_INTALU_ITINS>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
+ MMX_SHIFT_ITINS>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
+ MMX_SHIFT_ITINS>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
+ MMX_SHIFT_ITINS>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
+ MMX_SHIFT_ITINS>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
+ MMX_SHIFT_ITINS>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
+ MMX_SHIFT_ITINS>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
+ MMX_SHIFT_ITINS>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
+ MMX_SHIFT_ITINS>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
+ MMX_INTALU_ITINS>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
+ MMX_INTALU_ITINS>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw,
+ MMX_UNPCK_H_ITINS>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd,
+ MMX_UNPCK_H_ITINS>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq,
+ MMX_UNPCK_H_ITINS>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw,
+ MMX_UNPCK_L_ITINS>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd,
+ MMX_UNPCK_L_ITINS>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq,
+ MMX_UNPCK_L_ITINS>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
+ MMX_PCK_ITINS>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
+ MMX_PCK_ITINS>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
+ MMX_PCK_ITINS>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
+ MMX_PSHUF_ITINS>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
+ IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))],
+ IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
+
+
+
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ MMX_CVT_PS_ITINS, SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ MMX_CVT_PS_ITINS, SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+ (iPTR imm:$src2)))],
+ IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
+let Constraints = "$src1 = $dst" in {
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
+ (outs VR64:$dst),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))],
+ IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
+ (outs VR64:$dst),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))],
+ IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let SchedRW = [WriteShuffle] in {
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
+ IIC_MMX_MASKMOV>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
+ IIC_MMX_MASKMOV>;
+}
+
+// 64-bit bit convert.
+let Predicates = [HasSSE2] in {
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+ (MMX_MOVFR642Qrr FR64:$src)>;
+}
+
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 000000000000..384238741b18
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,8328 @@
+//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
+ InstrItinClass rr = arg_rr;
+ InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
+}
+
+class SizeItins<OpndItins arg_s, OpndItins arg_d> {
+ OpndItins s = arg_s;
+ OpndItins d = arg_d;
+}
+
+
+class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
+ InstrItinClass arg_ri> {
+ InstrItinClass rr = arg_rr;
+ InstrItinClass rm = arg_rm;
+ InstrItinClass ri = arg_ri;
+}
+
+
+// scalar
+let Sched = WriteFAdd in {
+def SSE_ALU_F32S : OpndItins<
+ IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
+>;
+
+def SSE_ALU_F64S : OpndItins<
+ IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
+>;
+}
+
+def SSE_ALU_ITINS_S : SizeItins<
+ SSE_ALU_F32S, SSE_ALU_F64S
+>;
+
+let Sched = WriteFMul in {
+def SSE_MUL_F32S : OpndItins<
+ IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
+>;
+
+def SSE_MUL_F64S : OpndItins<
+ IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
+>;
+}
+
+def SSE_MUL_ITINS_S : SizeItins<
+ SSE_MUL_F32S, SSE_MUL_F64S
+>;
+
+let Sched = WriteFDiv in {
+def SSE_DIV_F32S : OpndItins<
+ IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
+>;
+
+def SSE_DIV_F64S : OpndItins<
+ IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
+>;
+}
+
+def SSE_DIV_ITINS_S : SizeItins<
+ SSE_DIV_F32S, SSE_DIV_F64S
+>;
+
+// parallel
+let Sched = WriteFAdd in {
+def SSE_ALU_F32P : OpndItins<
+ IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
+>;
+
+def SSE_ALU_F64P : OpndItins<
+ IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
+>;
+}
+
+def SSE_ALU_ITINS_P : SizeItins<
+ SSE_ALU_F32P, SSE_ALU_F64P
+>;
+
+let Sched = WriteFMul in {
+def SSE_MUL_F32P : OpndItins<
+ IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
+>;
+
+def SSE_MUL_F64P : OpndItins<
+ IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
+>;
+}
+
+def SSE_MUL_ITINS_P : SizeItins<
+ SSE_MUL_F32P, SSE_MUL_F64P
+>;
+
+let Sched = WriteFDiv in {
+def SSE_DIV_F32P : OpndItins<
+ IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
+>;
+
+def SSE_DIV_F64P : OpndItins<
+ IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
+>;
+}
+
+def SSE_DIV_ITINS_P : SizeItins<
+ SSE_DIV_F32P, SSE_DIV_F64P
+>;
+
+def SSE_BIT_ITINS_P : OpndItins<
+ IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
+>;
+
+let Sched = WriteVecALU in {
+def SSE_INTALU_ITINS_P : OpndItins<
+ IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
+def SSE_INTALUQ_ITINS_P : OpndItins<
+ IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
+>;
+}
+
+let Sched = WriteVecIMul in
+def SSE_INTMUL_ITINS_P : OpndItins<
+ IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
+>;
+
+def SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
+ IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
+>;
+
+def SSE_MOVA_ITINS : OpndItins<
+ IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM
+>;
+
+def SSE_MOVU_ITINS : OpndItins<
+ IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
+>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 Instructions Classes
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop,
+ OpndItins itins,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat,
+ OpndItins itins,
+ bit Is2Addr = 1> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, mem_cpat:$src2))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, OpndItins itins, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
+ let mayLoad = 1 in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm,
+ bit Is2Addr = 1> {
+ let isCommutable = 1, hasSideEffects = 0 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rr, NoItinerary, d>,
+ Sched<[WriteVecLogic]>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rm, NoItinerary, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+
+// A 128-bit subvector extract from the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))),
+ (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))),
+ (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
+
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))),
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))),
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
+
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))),
+ (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))),
+ (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
+
+// A 128-bit subvector insert to the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+let AddedComplexity = 25 in { // to give priority over vinsertf128rm
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+}
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
+}
+
+// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero] in {
+ def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
+ def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX & SSE - Zero/One Vectors
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor / xorp* for sse.
+// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
+// swizzled by ExecutionDepsFix to pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero] in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+}
+
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+
+
+// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
+// and doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
+def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>;
+}
+
+let Predicates = [HasAVX] in
+ def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
+}
+
+// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
+// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+}
+
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero] in {
+ def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+ let Predicates = [HasAVX2] in
+ def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move FP Scalar Instructions
+//
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string base_opc,
+ string asm_opr> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
+
+ // For the disassembler
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
+}
+
+multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string OpcodeStr> {
+ // AVX
+ defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ VEX_4V, VEX_LIG;
+
+ def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ VEX, VEX_LIG, Sched<[WriteStore]>;
+ // SSE1 & 2
+ let Constraints = "$src1 = $dst" in {
+ defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}">;
+ }
+
+ def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
+}
+
+// Loading from memory automatically zeroing upper bits.
+multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> {
+ def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+ def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
+}
+
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+ let AddedComplexity = 20 in
+ defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+// Patterns
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
+
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
+
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+}
+
+let Predicates = [UseSSE1] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSS to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm already zeros the high parts of the register.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
+
+ // Shuffle with MOVSS
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
+}
+
+let Predicates = [UseSSE2] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSD to the lower bits.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSDrm already zeros the high parts of the register.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
+
+ // Shuffle with MOVSD
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ OpndItins itins,
+ bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
+}
+
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB, VEX, VEX_L;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize, VEX, VEX_L;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB, VEX, VEX_L;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize, VEX, VEX_L;
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize;
+
+let SchedRW = [WriteStore] in {
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v8f32 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v4f64 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
+
+// For disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
+ def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
+ def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
+ def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
+ def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
+ def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+ (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+ (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+ (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+}
+
+
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
+let SchedRW = [WriteStore] in {
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>;
+} // SchedRW
+
+// For disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
+ def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+ def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+ def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>;
+ def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (VMOVUPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [UseSSE1] in
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [UseSSE2] in
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (MOVUPDmr addr:$dst, VR128:$src)>;
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+ // 128-bit load/store
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+}
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [UseSSE1] in {
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
+def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
+def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
+let isCodeGenOnly = 1 in {
+ def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX;
+ def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX;
+}
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, string asm_opr,
+ InstrItinClass itin> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(base_opc, "s", asm_opr),
+ [(set VR128:$dst,
+ (psnode VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(base_opc, "d", asm_opr),
+ [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+
+}
+
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, InstrItinClass itin> {
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $dst|$dst, $src2}",
+ itin>;
+}
+
+let AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+ IIC_SSE_MOV_LH>;
+}
+
+let SchedRW = [WriteStore] in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>, VEX;
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>;
+} // SchedRW
+
+let Predicates = [HasAVX] in {
+ // Shuffle with VMOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Shuffle with VMOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [UseSSE1] in {
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+ (iPTR 0))), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+ // Shuffle with MOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Hi packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+ IIC_SSE_MOV_LH>;
+}
+
+let SchedRW = [WriteStore] in {
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (bc_v2f64 (v4f32 VR128:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (bc_v2f64 (v4f32 VR128:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
+
+let Predicates = [HasAVX] in {
+ // VMOVHPS patterns
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold
+ // cause it has two uses through a bitcast. One use disappears at isel time
+ // and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE1] in {
+ // MOVHPS patterns
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold
+ // cause it has two uses through a bitcast. One use disappears at isel time
+ // and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>,
+ VEX_4V, Sched<[WriteShuffle]>;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>,
+ VEX_4V, Sched<[WriteShuffle]>;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
+}
+
+let Predicates = [HasAVX] in {
+ // MOVLHPS patterns
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [UseSSE1] in {
+ // MOVLHPS patterns
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+
+def SSE_CVT_PD : OpndItins<
+ IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
+>;
+
+let Sched = WriteCvtI2F in
+def SSE_CVT_PS : OpndItins<
+ IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
+>;
+
+let Sched = WriteCvtI2F in
+def SSE_CVT_Scalar : OpndItins<
+ IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
+>;
+
+let Sched = WriteCvtF2I in
+def SSE_CVT_SS2SI_32 : OpndItins<
+ IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
+>;
+
+let Sched = WriteCvtF2I in
+def SSE_CVT_SS2SI_64 : OpndItins<
+ IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
+>;
+
+let Sched = WriteCvtF2I in
+def SSE_CVT_SD2SI : OpndItins<
+ IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
+>;
+
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, OpndItins itins> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
+ itins.rm>, Sched<[itins.Sched.Folded]>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm, Domain d,
+ OpndItins itins> {
+let neverHasSideEffects = 1 in {
+ def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [], itins.rr, d>, Sched<[itins.Sched]>;
+ let mayLoad = 1 in
+ def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
+}
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+let neverHasSideEffects = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2F]>;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2FLd, ReadAfterLd]>;
+} // neverHasSideEffects = 1
+}
+
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>,
+ XS, VEX, VEX_LIG;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W, VEX_LIG;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>,
+ XD, VEX, VEX_LIG;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W, VEX_LIG;
+
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
+ XS, VEX_4V, VEX_LIG;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
+ XS, VEX_4V, VEX_W, VEX_LIG;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
+ XD, VEX_4V, VEX_LIG;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
+ XD, VEX_4V, VEX_W, VEX_LIG;
+
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+ def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>, XD, REX_W;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XS, REX_W;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XD, REX_W;
+
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
+ string asm, OpndItins itins> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, OpndItins itins,
+ bit Is2Addr = 1> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
+
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+
+
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
+ SSE_CVT_Scalar, 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
+ SSE_CVT_Scalar, 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
+ SSE_CVT_Scalar, 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
+ SSE_CVT_Scalar, 0>, XD,
+ VEX_4V, VEX_W;
+
+let Constraints = "$src1 = $dst" in {
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
+}
+
+/// SSE 1 Only
+
+// Aliases for intrinsics
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ ssmem, sse_load_f32, "cvttss2si",
+ SSE_CVT_SS2SI_32>, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si", SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si", SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ ssmem, sse_load_f32, "cvttss2si",
+ SSE_CVT_SS2SI_32>, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si",
+ SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si",
+ SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
+
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si",
+ SSE_CVT_SS2SI_32>, XS;
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
+
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, VEX_L, Requires<[HasAVX]>;
+
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, Requires<[UseSSE2]>;
+
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+
+/// SSE 2 Only
+
+// Convert scalar double to scalar single
+let neverHasSideEffects = 1 in {
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [], IIC_SSE_CVT_Scalar_RM>,
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))],
+ IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
+def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))],
+ IIC_SSE_CVT_Scalar_RM>,
+ XD,
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
+
+def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+// Convert scalar single to scalar double
+// SSE2 instructions with XS prefix
+let neverHasSideEffects = 1 in {
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [], IIC_SSE_CVT_Scalar_RR>,
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [], IIC_SSE_CVT_Scalar_RM>,
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
+
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))],
+ IIC_SSE_CVT_Scalar_RR>, XS,
+ Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))],
+ IIC_SSE_CVT_Scalar_RM>, XS,
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
+
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
+
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+// Convert packed single/double fp to doubleword
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
+
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ VEX, Sched<[WriteCvtF2I]>;
+
+// XMM only
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ Sched<[WriteCvtF2ILd]>;
+
+// YMM only
+def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtF2I]>;
+def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+}
+
+def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
+def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
+
+// Convert with truncation packed single/double fp to doubleword
+// SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+ (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+ Sched<[WriteCvtF2ILd]>;
+
+def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
+def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VCVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (VCVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ (VCVTTPS2DQrm addr:$src)>;
+
+ def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
+ (VCVTTPS2DQYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
+ (VCVTTPS2DQYrm addr:$src)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ (CVTTPS2DQrm addr:$src)>;
+}
+
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
+
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+
+// XMM only
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTTPD2DQYrm addr:$src)>;
+} // Predicates = [HasAVX]
+
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>,
+ Sched<[WriteCvtF2ILd]>;
+
+// Convert packed single to packed double
+let Predicates = [HasAVX] in {
+ // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+}
+
+let Predicates = [UseSSE2] in {
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
+def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
+}
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+let neverHasSideEffects = 1, mayLoad = 1 in
+def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
+def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+ Sched<[WriteCvtI2F]>;
+def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2FLd]>;
+def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2F]>;
+}
+
+let neverHasSideEffects = 1, mayLoad = 1 in
+def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
+def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
+
+// AVX 256-bit register conversion intrinsics
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VCVTDQ2PDYrm addr:$src)>;
+} // Predicates = [HasAVX]
+
+// Convert packed double to packed single
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
+
+// XMM only
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
+
+
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+ // Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (VCVTPD2PSXrm addr:$src)>;
+ def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (VCVTPS2PDrr VR128:$src)>;
+ def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+}
+
+let Predicates = [UseSSE2] in {
+ // Match fround and fextend for 128 conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (CVTPD2PSrm addr:$src)>;
+
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ Operand CC, SDNode OpNode, ValueType VT,
+ PatFrag ld_frag, string asm, string asm_alt,
+ OpndItins itins> {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1),
+ (ld_frag addr:$src2), imm:$cc))],
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
+ IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
+ IIC_SSE_ALU_F32S_RM>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSE_ALU_F32S>,
+ XS, VEX_4V, VEX_LIG;
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSE_ALU_F32S>, // same latency as 32 bit compare
+ XD, VEX_4V, VEX_LIG;
+
+let Constraints = "$src1 = $dst" in {
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
+ XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSE_ALU_F32S>, // same latency as 32 bit compare
+ XD;
+}
+
+multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
+ Intrinsic Int, string asm, OpndItins itins> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, CC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))],
+ itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))],
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ SSE_ALU_F32S>,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ SSE_ALU_F32S>, // same latency as f32
+ XD, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+ defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ SSE_ALU_F32S>, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ SSE_ALU_F32S>, // same latency as f32
+ XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))],
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
+let Defs = [EFLAGS] in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB, VEX,
+ VEX_LIG;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, TB, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Operand CC, Intrinsic Int, string asm,
+ string asm_alt, Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
+ }
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+ "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+ "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [UseSSE1] in {
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [UseSSE2] in {
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
+}
+
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>,
+ TB, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (X86Shufp VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+ def : Pat<(v2i64 (X86Shufp VR128:$src1,
+ (memopv2i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+ // 256-bit patterns
+ def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8i32 (X86Shufp VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4i64 (X86Shufp VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+let Predicates = [UseSSE1] in {
+ def : Pat<(v4i32 (X86Shufp VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [UseSSE2] in {
+ // Generic SHUFPD patterns
+ def : Pat<(v2i64 (X86Shufp VR128:$src1,
+ (memopv2i64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))],
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))],
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V, VEX_L;
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+
+let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX1Only] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+let Predicates = [UseSSE2] in {
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, TB,
+ VEX, VEX_L;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX, VEX_L;
+
+ def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedDouble>, TB,
+ OpSize, VEX, Sched<[WriteVecLogic]>;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedDouble>, TB,
+ OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
+}
+
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[UseSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[UseSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[UseSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[UseSSE2]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, OpndItins itins,
+ bit IsCommutable, bit Is2Addr> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))],
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+} // ExeDomain = SSEPackedInt
+
+multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
+ ValueType OpVT128, ValueType OpVT256,
+ OpndItins itins, bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
+ VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
+ memopv2i64, i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
+ OpVT256, VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
+}
+
+// These are ordered here for pattern ordering requirements with the fp versions
+
+defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
+ SSE_BIT_ITINS_P, 0>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, OpndItins itins> {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
+ TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
+ TB, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
+ TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
+ TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+ SSE_BIT_ITINS_P>;
+defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
+ SSE_BIT_ITINS_P>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+
+ defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V, VEX_L;
+
+ // In AVX no need to add a pattern for 128-bit logical rr ps, because they
+ // are all promoted to v2i64, and the patterns are covered by the int
+ // version. This is needed in SSE only, because v2i64 isn't supported on
+ // SSE1, but only on SSE2.
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem, [],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
+ }
+}
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem,
+ itins.s, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem,
+ itins.d, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SizeItins itins> {
+let Predicates = [HasAVX] in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
+
+ defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, VR256, v8f32, f256mem, memopv8f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
+ defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, VR256, v4f64, f256mem, memopv4f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s, 1>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d, 1>, TB, OpSize;
+}
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d, Is2Addr>, XD;
+}
+
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
+}
+
+let isCodeGenOnly = 1 in {
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+}
+
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
+
+ let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
+ }
+}
+
+let isCodeGenOnly = 1 in {
+ defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ let Constraints = "$src1 = $dst" in {
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
+ }
+}
+
+/// Unop Arithmetic
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+
+let Sched = WriteFSqrt in {
+def SSE_SQRTP : OpndItins<
+ IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
+>;
+
+def SSE_SQRTS : OpndItins<
+ IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
+>;
+}
+
+let Sched = WriteFRcp in {
+def SSE_RCPP : OpndItins<
+ IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
+>;
+
+def SSE_RCPS : OpndItins<
+ IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
+>;
+}
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+ let mayLoad = 1 in {
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+ let mayLoad = 1 in {
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>, Sched<[itins.Sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int, Intrinsic V8F32Int,
+ OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int VR256:$src))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>, Sched<[itins.Sched.Folded]>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+ let mayLoad = 1 in {
+ def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1,f64mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ SSE_SQRTS>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+ int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
+ int_x86_avx_rcp_ps_256, SSE_RCPP>;
+
+def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
+ (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
+ (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR64)),
+ VR128)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
+ def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
+ (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
+ def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
+ (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+let Predicates = [UseSSE1] in {
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (RSQRTSSr_Int VR128:$src, VR128:$src)>;
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (RCPSSr_Int VR128:$src, VR128:$src)>;
+}
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti{l}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)],
+ IIC_SSE_MOVNT>,
+ TB, Requires<[HasSSE2]>;
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti{q}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)],
+ IIC_SSE_MOVNT>,
+ TB, Requires<[HasSSE2]>;
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Prefetch and memory fence
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
+def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+}
+
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
+ IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
+ "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
+ TB, Requires<[HasSSE1]>;
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
+ TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
+ TB, Requires<[HasSSE2]>;
+} // SchedRW
+
+def : Pat<(X86SFence), (SFENCE)>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+ VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+ VEX, VEX_L;
+def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+ VEX;
+def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+ VEX, VEX_L;
+}
+
+// For Disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
+def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>,
+ VEX;
+def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>,
+ VEX;
+def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+}
+
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+ VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+ VEX, VEX_L;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX, VEX_L;
+}
+}
+
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+ VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+ VEX, VEX_L;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+ XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+ XS, VEX, VEX_L;
+}
+}
+
+let SchedRW = [WriteMove] in {
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
+
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
+
+// For Disassembler
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
+def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+
+def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
+}
+} // SchedRW
+
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
+ IIC_SSE_MOVA_P_RM>;
+def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
+ IIC_SSE_MOVU_P_RM>,
+ XS, Requires<[UseSSE2]>;
+}
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
+ IIC_SSE_MOVA_P_MR>;
+def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/],
+ IIC_SSE_MOVU_P_MR>,
+ XS, Requires<[UseSSE2]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+}
+let Predicates = [UseSSE2] in
+def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (MOVDQUmr addr:$dst, VR128:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let Sched = WriteVecIMul in
+def SSE_PMADD : OpndItins<
+ IIC_SSE_PMADD, IIC_SSE_PMADD
+>;
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0,
+ bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+ Intrinsic IntId256, OpndItins itins,
+ bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
+ VR128, memopv2i64, i128mem, itins,
+ IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
+ i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
+ VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
+}
+
+multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, SDNode OpNode,
+ SDNode OpNode2, RegisterClass RC,
+ ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
+ ShiftOpndItins itins,
+ bit Is2Addr = 1> {
+ // src2 is always 128-bit
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
+ itins.rr>, Sched<[WriteVecShift]>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode RC:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
+ def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
+}
+
+/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
+multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+} // ExeDomain = SSEPackedInt
+
+defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 0>;
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+
+// Intrinsic forms
+defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
+ int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
+defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+ int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
+defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+ int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+ int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+ int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+ int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+ int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+ int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+ int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+ int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
+defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+ int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
+defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+ int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+
+let Predicates = [HasAVX] in
+defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
+ memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V;
+let Predicates = [HasAVX2] in
+defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
+ VR256, memopv4i64, i256mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
+ memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX]
+
+let Predicates = [HasAVX2] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR256, v4i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR256, v4i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+ // 256-bit logical shifts.
+ def VPSLLDQYri : PDIi8<0x73, MRM7r,
+ (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR256:$dst,
+ (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
+ VEX_4V, VEX_L;
+ def VPSRLDQYri : PDIi8<0x73, MRM3r,
+ (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR256:$dst,
+ (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
+ VEX_4V, VEX_L;
+ // PSRADQYri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX2]
+
+let Constraints = "$src1 = $dst" in {
+defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P>;
+
+defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P>;
+
+defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+ (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+ (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
+ (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
+ (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+ (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+ (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
+
+defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+ int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>;
+defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+ int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>;
+defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+ int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
+ SDNode OpNode> {
+let Predicates = [HasAVX] in {
+ def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+ def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
+}
+
+let Predicates = [HasAVX2] in {
+ def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
+}
+
+let Predicates = [UseSSE2] in {
+ def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+ def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
+}
+}
+} // ExeDomain = SSEPackedInt
+
+defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize;
+defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
+defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+ SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))],
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
+ SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : PDI<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def Yrm : PDI<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
+ bc_v4i32, 0>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64, 0>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
+ bc_v4i32, 0>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
+ bc_v2i64, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
+ bc_v32i8>, VEX_4V, VEX_L;
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+ bc_v8i32>, VEX_4V, VEX_L;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+ bc_v4i64>, VEX_4V, VEX_L;
+
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
+ bc_v32i8>, VEX_4V, VEX_L;
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
+ bc_v8i32>, VEX_4V, VEX_L;
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
+ bc_v4i64>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
+ bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
+ bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
+ bc_v4i32>;
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64>;
+
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
+ bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
+ bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
+ bc_v4i32>;
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
+ bc_v2i64>;
+}
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+// Extract
+let Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+
+// Insert
+let Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
+
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ IIC_SSE_MOVMSK>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
+
+let Predicates = [HasAVX2] in {
+def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L;
+def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ IIC_SSE_MOVMSK>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
+
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+ IIC_SSE_MASKMOV>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+ IIC_SSE_MASKMOV>, VEX;
+
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+ IIC_SSE_MASKMOV>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+ IIC_SSE_MASKMOV>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Packed Double Int
+//
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteMove]>;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteLoad]>;
+def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Single Scalar
+//
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteLoad]>;
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int to Packed Double Int
+//
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteLoad]>;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+let SchedRW = [WriteMove] in {
+def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "vmov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>,
+ TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>;
+} //SchedRW
+
+//===---------------------------------------------------------------------===//
+// Bitcast FR64 <-> GR64
+//
+let Predicates = [HasAVX] in
+def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX, Sched<[WriteLoad]>;
+def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+
+def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+
+//===---------------------------------------------------------------------===//
+// Move Scalar Single to Double Int
+//
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+
+//===---------------------------------------------------------------------===//
+// Patterns and instructions to describe movd/movq to XMM register zero-extends
+//
+let SchedRW = [WriteMove] in {
+let AddedComplexity = 15 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))],
+ IIC_SSE_MOVDQ>, VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))],
+ IIC_SSE_MOVDQ>,
+ VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))],
+ IIC_SSE_MOVDQ>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))],
+ IIC_SSE_MOVDQ>;
+}
+} // SchedRW
+
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))],
+ IIC_SSE_MOVDQ>, VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))],
+ IIC_SSE_MOVDQ>;
+} // AddedComplexity, SchedRW
+
+let Predicates = [HasAVX] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ }
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
+}
+
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+}
+
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Move Quadword Int to Packed Quadword Int
+//
+
+let SchedRW = [WriteLoad] in {
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
+ IIC_SSE_MOVDQ>, XS,
+ Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
+
+//===---------------------------------------------------------------------===//
+// Move Packed Quadword Int to Quadword Int
+//
+let SchedRW = [WriteStore] in {
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>;
+} // SchedRW
+
+//===---------------------------------------------------------------------===//
+// Store / copy lower 64-bits of a XMM register.
+//
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+ Sched<[WriteStore]>;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+
+let AddedComplexity = 20 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))],
+ IIC_SSE_MOVDQ>,
+ XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
+
+let AddedComplexity = 20 in
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))],
+ IIC_SSE_MOVDQ>,
+ XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
+
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVZQI2PQIrm addr:$src)>;
+}
+
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
+}
+
+//===---------------------------------------------------------------------===//
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+//
+let SchedRW = [WriteVecLogic] in {
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+ IIC_SSE_MOVQ_RR>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+ IIC_SSE_MOVQ_RR>,
+ XS, Requires<[UseSSE2]>;
+} // SchedRW
+
+let SchedRW = [WriteVecLogicLd] in {
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>,
+ XS, Requires<[UseSSE2]>;
+}
+} // SchedRW
+
+let AddedComplexity = 20 in {
+ let Predicates = [HasAVX] in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (VMOVZPQILo2PQIrr VR128:$src)>;
+ }
+ let Predicates = [UseSSE2] in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
+ }
+}
+
+// Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
+
+// Instructions for the disassembler
+// xr = XMM register
+// xm = mem64
+
+let SchedRW = [WriteMove] in {
+let Predicates = [HasAVX] in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
+def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (OpNode RC:$src)))],
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
+def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+ defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
+ memopv4f32, f128mem>;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
+ memopv4f32, f128mem>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (VMOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (VMOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSLDUPrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movshdup VR256:$src)),
+ (VMOVSHDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSHDUPYrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movsldup VR256:$src)),
+ (VMOVSLDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSLDUPYrm addr:$src)>;
+}
+
+let Predicates = [UseSSE3] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSLDUPrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Double FP - MOVDDUP
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+let neverHasSideEffects = 1 in
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (v2f64 (X86Movddup
+ (scalar_to_vector (loadf64 addr:$src)))))],
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
+}
+
+// FIXME: Merge with above classe when there're patterns for the ymm version
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+ Sched<[WriteShuffle]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (v4f64 (X86Movddup
+ (scalar_to_vector (loadf64 addr:$src)))))]>,
+ Sched<[WriteShuffleLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
+}
+
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+}
+
+let Predicates = [UseSSE3] in {
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+
+let SchedRW = [WriteLoad] in {
+let Predicates = [HasAVX] in {
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
+ VEX, VEX_L;
+}
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
+ IIC_SSE_LDDQU>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, OpndItins itins,
+ bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L;
+ }
+}
+let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
+ let ExeDomain = SSEPackedSingle in
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem, SSE_ALU_F32P>, TB, XD;
+ let ExeDomain = SSEPackedDouble in
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem, SSE_ALU_F64P>, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
+
+// Horizontal ops
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V, VEX_L;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V, VEX_L;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V, VEX_L;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V, VEX_L;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ }
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Absolute Instructions
+//===---------------------------------------------------------------------===//
+
+
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
+ OpSize, Sched<[WriteVecALU]>;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
+ OpSize, Sched<[WriteVecALULd]>;
+}
+
+/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId256 VR256:$src))]>,
+ OpSize, Sched<[WriteVecALU]>;
+
+ def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (IntId256
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+ Sched<[WriteVecALULd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
+ int_x86_ssse3_pabs_b_128>, VEX;
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
+ int_x86_ssse3_pabs_w_128>, VEX;
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
+ int_x86_ssse3_pabs_d_128>, VEX;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
+ int_x86_avx2_pabs_b>, VEX, VEX_L;
+ defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
+ int_x86_avx2_pabs_w>, VEX, VEX_L;
+ defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
+ int_x86_avx2_pabs_d>, VEX, VEX_L;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
+ int_x86_ssse3_pabs_d_128>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
+
+let Sched = WriteVecALU in {
+def SSE_PHADDSUBD : OpndItins<
+ IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
+>;
+def SSE_PHADDSUBSW : OpndItins<
+ IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM
+>;
+def SSE_PHADDSUBW : OpndItins<
+ IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
+>;
+}
+let Sched = WriteShuffle in
+def SSE_PSHUFB : OpndItins<
+ IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
+>;
+let Sched = WriteVecALU in
+def SSE_PSIGN : OpndItins<
+ IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
+>;
+let Sched = WriteVecIMul in
+def SSE_PMULHRSW : OpndItins<
+ IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
+>;
+
+/// SS3I_binop_rm - Simple SSSE3 bin op
+multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, OpndItins itins,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ OpSize, Sched<[itins.Sched]>;
+ def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, OpndItins itins,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize, Sched<[itins.Sched]>;
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ let isCommutable = 1 in
+ def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
+ OpSize;
+ def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (IntId256 VR256:$src1,
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+ defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBD, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBD, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
+ memopv2i64, i128mem,
+ SSE_PSHUFB, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
+ int_x86_ssse3_phadd_sw_128,
+ SSE_PHADDSUBSW, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
+ int_x86_ssse3_phsub_sw_128,
+ SSE_PHADDSUBSW, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw_128,
+ SSE_PMADD, 0>, VEX_4V;
+}
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
+ int_x86_ssse3_pmul_hr_sw_128,
+ SSE_PMULHRSW, 0>, VEX_4V;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX2] in {
+let isCommutable = 0 in {
+ defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
+ int_x86_avx2_phadd_sw>, VEX_4V, VEX_L;
+ defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
+ int_x86_avx2_phsub_sw>, VEX_4V, VEX_L;
+ defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
+ int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L;
+}
+defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
+ int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L;
+}
+
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBW>;
+ defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBD>;
+ defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBW>;
+ defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBD>;
+ defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
+ memopv2i64, i128mem, SSE_PSHUFB>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
+ int_x86_ssse3_phadd_sw_128,
+ SSE_PHADDSUBSW>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
+ int_x86_ssse3_phsub_sw_128,
+ SSE_PHADDSUBSW>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw_128,
+ SSE_PMULHRSW>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
+ def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
+ let mayLoad = 1 in
+ def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ }
+}
+
+multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
+ def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize, Sched<[WriteShuffle]>;
+ let mayLoad = 1 in
+ def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ }
+}
+
+let Predicates = [HasAVX] in
+ defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
+let Predicates = [HasAVX2] in
+ defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
+ defm PALIGN : ssse3_palignr<"palignr">;
+
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [UseSSSE3] in {
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Thread synchronization
+//===---------------------------------------------------------------------===//
+
+let SchedRW = [WriteSystem] in {
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
+ Requires<[HasSSE3]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
+ TB, Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
+ TB, Requires<[HasSSE3]>;
+} // SchedRW
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Move with Sign/Zero Extend
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
+ int_x86_avx2_pmovsxbw>, VEX, VEX_L;
+defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
+ int_x86_avx2_pmovsxwd>, VEX, VEX_L;
+defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
+ int_x86_avx2_pmovsxdq>, VEX, VEX_L;
+defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
+ int_x86_avx2_pmovzxbw>, VEX, VEX_L;
+defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
+ int_x86_avx2_pmovzxwd>, VEX, VEX_L;
+defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
+ int_x86_avx2_pmovzxdq>, VEX, VEX_L;
+}
+
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVSXDQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (VPMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (VPMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (VPMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVSXDQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
+ (PMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
+ (PMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
+ (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX2] in {
+ let AddedComplexity = 15 in {
+ def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))),
+ (VPMOVZXDQYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
+ (VPMOVZXWDYrr VR128:$src)>;
+ }
+
+ def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+}
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
+ int_x86_avx2_pmovsxbd>, VEX, VEX_L;
+defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
+ int_x86_avx2_pmovsxwq>, VEX, VEX_L;
+defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
+ int_x86_avx2_pmovzxbd>, VEX, VEX_L;
+defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
+ int_x86_avx2_pmovzxwq>, VEX, VEX_L;
+}
+
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXWQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>;
+}
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
+}
+
+multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
+}
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
+ int_x86_avx2_pmovsxbq>, VEX, VEX_L;
+defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
+ int_x86_avx2_pmovzxbq>, VEX, VEX_L;
+}
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+ (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXWDYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXDQYrm addr:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQYrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),
+ (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),
+ (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),
+ (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),
+ (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (VPMOVZXDQrm addr:$src)>;
+
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (PMOVZXDQrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in {
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
+
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+ }
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+}
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasAVX]>;
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[UseSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+ defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
+
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
+}
+
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+ let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
+let ExeDomain = SSEPackedSingle in {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm : SS4AIi8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ OpSize;
+} // ExeDomain = SSEPackedSingle
+
+let ExeDomain = SSEPackedDouble in {
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ OpSize;
+} // ExeDomain = SSEPackedDouble
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+let ExeDomain = GenericDomain in {
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+} // ExeDomain = GenericDomain
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let Predicates = [HasAVX] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX, VEX_L;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+
+ def : Pat<(ffloor FR32:$src),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x3))>;
+
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x3))>;
+
+ def : Pat<(v8f32 (ffloor VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (fnearbyint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0xC))>;
+ def : Pat<(v8f32 (fceil VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x2))>;
+ def : Pat<(v8f32 (frint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x4))>;
+ def : Pat<(v8f32 (ftrunc VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x3))>;
+
+ def : Pat<(v4f64 (ffloor VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (fnearbyint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0xC))>;
+ def : Pat<(v4f64 (fceil VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x2))>;
+ def : Pat<(v4f64 (frint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x4))>;
+ def : Pat<(v4f64 (ftrunc VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x3))>;
+}
+
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x3))>;
+
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x3))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX, VEX_L;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX, VEX_L;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedSingle in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>,
+ VEX_L;
+}
+let ExeDomain = SSEPackedDouble in {
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
+ VEX_L;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
+ def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ OpSize, XS;
+ def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, OpSize, XS;
+
+ def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+}
+
+/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ let isCommutable = 1 in
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (IntId256 VR256:$src1,
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+}
+
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+ int_x86_avx2_packusdw>, VEX_4V, VEX_L;
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+ int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+}
+let Predicates = [HasAVX2] in {
+ defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
+ memopv2i64, i128mem>;
+}
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv8f32,
+ f256mem, 0>, VEX_4V, VEX_L;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256,VR256, memopv4f64,
+ f256mem, 0>, VEX_4V, VEX_L;
+ }
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedSingle in
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in {
+ defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
+ VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
+ VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ let ExeDomain = SSEPackedSingle in
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv4f32, f128mem>;
+ let ExeDomain = SSEPackedDouble in
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv2f64, f128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv2i64, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv2i64, i128mem>;
+ }
+ let ExeDomain = SSEPackedSingle in
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv4f32, f128mem>;
+ let ExeDomain = SSEPackedDouble in
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv2f64, f128mem>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+
+let Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedDouble in {
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
+ memopv2f64, int_x86_sse41_blendvpd>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
+ memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
+} // ExeDomain = SSEPackedDouble
+let ExeDomain = SSEPackedSingle in {
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
+ memopv4f32, int_x86_sse41_blendvps>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
+ memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
+} // ExeDomain = SSEPackedSingle
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv2i64, int_x86_sse41_pblendvb>;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
+ memopv4i64, int_x86_avx2_pblendvb>, VEX_L;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+ (v8i32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
+ (v8f32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+ (v4i64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
+ (v4f64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+
+ def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+ def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ (imm:$mask))),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+ (imm:$mask))),
+ (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+}
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ X86MemOperand x86memop, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+let ExeDomain = SSEPackedDouble in
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
+ int_x86_sse41_blendvpd>;
+let ExeDomain = SSEPackedSingle in
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
+ int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
+ int_x86_sse41_pblendvb>;
+
+// Aliases with the implicit xmm0 argument
+def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPDrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>;
+def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPSrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>;
+def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (PBLENDVBrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ (imm:$mask))),
+ (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (imm:$mask))),
+ (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (imm:$mask))),
+ (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+
+}
+
+let Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize, VEX;
+let Predicates = [HasAVX2] in
+def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
+ OpSize, VEX, VEX_L;
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS42I_binop_rm - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
+ OpSize;
+ def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+
+let Predicates = [HasAVX2] in
+ defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+
+let Constraints = "$src1 = $dst" in
+ defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
+ memopv2i64, i128mem>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
+}
+
+multiclass pcmpistrm_SS42AI<string asm> {
+ def rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm :SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
+ defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+ (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
+}
+
+multiclass SS42AI_pcmpestrm<string asm> {
+ def rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
+ defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+multiclass pseudo_pcmpistri<string asm> {
+ def REG : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
+ def MEM : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
+ defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
+}
+
+multiclass SS42AI_pcmpistri<string asm> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+ defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+multiclass pseudo_pcmpestri<string asm> {
+ def REG : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : PseudoI<(outs GR32:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ [(set GR32:$dst, EFLAGS,
+ (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
+ imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
+ defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
+}
+
+multiclass SS42AI_pcmpestri<string asm> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ let Predicates = [HasAVX] in
+ defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+ defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+ def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i16mem:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+ def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR16:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
+ OpSize;
+ def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
+ REX_W;
+ def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
+ REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
+}
+
+// Perform One Round of an AES Encryption/Decryption Flow
+let Predicates = [HasAVX, HasAES] in {
+ defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
+ int_x86_aesni_aesenc, 0>, VEX_4V;
+ defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
+ int_x86_aesni_aesenclast, 0>, VEX_4V;
+ defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
+ int_x86_aesni_aesdec, 0>, VEX_4V;
+ defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
+ int_x86_aesni_aesdeclast, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+ defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+ defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+ defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
+}
+
+// Perform the AES InvMixColumn Transformation
+let Predicates = [HasAVX, HasAES] in {
+ def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize, VEX;
+ def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ OpSize, VEX;
+}
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize;
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ OpSize;
+
+// AES Round Key Generation Assist
+let Predicates = [HasAVX, HasAES] in {
+ def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize, VEX;
+ def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ OpSize, VEX;
+}
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// PCLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
+
+def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
+ (memopv2i64 addr:$src2), imm:$src3))]>;
+
+// Carry-less Multiplication instructions
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
+
+def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
+ (memopv2i64 addr:$src2), imm:$src3))]>;
+} // Constraints = "$src1 = $dst"
+
+
+multiclass pclmul_alias<string asm, int immop> {
+ def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+ def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
+
+//===----------------------------------------------------------------------===//
+// SSE4A Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSSE4A] in {
+
+let Constraints = "$src = $dst" in {
+def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst),
+ (ins VR128:$src, i8imm:$len, i8imm:$idx),
+ "extrq\t{$idx, $len, $src|$src, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ imm:$idx))]>, TB, OpSize;
+def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "extrq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
+ VR128:$mask))]>, TB, OpSize;
+
+def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx),
+ "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
+ VR128:$src2, imm:$len, imm:$idx))]>, XD;
+def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "insertq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
+ VR128:$mask))]>, XD;
+}
+
+def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
+ "movntss\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
+
+def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movntsd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+// AVX2 adds register forms
+class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ Intrinsic Int> :
+ AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int VR128:$src))]>, VEX;
+
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcast_ss>;
+ def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcast_ss_256>, VEX_L;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>, VEX_L;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>, VEX_L;
+
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
+ int_x86_avx2_vbroadcast_ss_ps>;
+ def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
+ int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L;
+
+let Predicates = [HasAVX2] in
+def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
+ int_x86_avx2_vbroadcasti128>, VEX_L;
+
+let Predicates = [HasAVX] in
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+
+//===----------------------------------------------------------------------===//
+// VINSERTF128 - Insert packed floating-point values
+//
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V, VEX_L;
+let mayLoad = 1 in
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V, VEX_L;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTF128 - Extract packed floating-point values
+//
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX, VEX_L;
+let mayStore = 1 in
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX, VEX_L;
+}
+
+// AVX1 patterns
+let Predicates = [HasAVX] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VMASKMOV - Conditional SIMD Packed Loads and Stores
+//
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V, VEX_L;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
+}
+
+let ExeDomain = SSEPackedSingle in
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256>;
+let ExeDomain = SSEPackedDouble in
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256>;
+
+//===----------------------------------------------------------------------===//
+// VPERMIL - Permute Single and Double Floating-Point Values
+//
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag i_frag,
+ Intrinsic IntVar, ValueType vt> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1,
+ (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX;
+}
+
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+ defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
+
+def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+ (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDmi addr:$src1, imm:$imm)>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
+let ExeDomain = SSEPackedSingle in {
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+ (i8 imm:$src3))))]>, VEX_4V, VEX_L;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
+ (i8 imm:$src3)))]>, VEX_4V, VEX_L;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+//===----------------------------------------------------------------------===//
+// VZERO - Zero YMM registers
+//
+let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
+ // Zero All YMM registers
+ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
+
+ // Zero Upper bits of YMM registers
+ def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//===----------------------------------------------------------------------===//
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+ def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set RC:$dst, (Int VR128:$src))]>,
+ T8, OpSize, VEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+}
+
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+ def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+ TA, OpSize, VEX;
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : Ii8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
+
+let Predicates = [HasAVX, HasF16C] in {
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX2 Instructions
+//===----------------------------------------------------------------------===//
+
+/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ let isCommutable = 1 in
+ def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ VEX_4V;
+ def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ VEX_4V;
+}
+
+let isCommutable = 0 in {
+defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
+ VR128, memopv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
+ VR256, memopv4i64, i256mem>, VEX_L;
+}
+
+def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+ imm:$mask)),
+ (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
+ imm:$mask)),
+ (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
+//===----------------------------------------------------------------------===//
+// VPBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
+multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int128, Intrinsic Int256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+ VEX, VEX_L;
+}
+
+defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+ int_x86_avx2_pbroadcastb_128,
+ int_x86_avx2_pbroadcastb_256>;
+defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+ int_x86_avx2_pbroadcastw_128,
+ int_x86_avx2_pbroadcastw_256>;
+defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+ int_x86_avx2_pbroadcastd_128,
+ int_x86_avx2_pbroadcastd_256>;
+defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+ int_x86_avx2_pbroadcastq_128,
+ int_x86_avx2_pbroadcastq_256>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBrm addr:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDrm addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDYrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQYrm addr:$src)>;
+
+ def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBrr VR128:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBYrr VR128:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDrr VR128:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDYrr VR128:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQYrr VR128:$src)>;
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSrr VR128:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSYrr VR128:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VBROADCASTSDYrr VR128:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ }
+}
+
+// AVX1 broadcast patterns
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSDYrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSDYrm addr:$src)>;
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ // 128bit broadcasts:
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+
+ def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
+ def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// VPERM - Permute instructions
+//
+
+multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ ValueType OpVT> {
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
+ VEX_4V, VEX_L;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OpVT (X86VPermv VR256:$src1,
+ (bitconvert (mem_frag addr:$src2)))))]>,
+ VEX_4V, VEX_L;
+}
+
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
+
+multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ ValueType OpVT> {
+ def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
+ VEX, VEX_L;
+ def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OpVT (X86VPermi (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, VEX, VEX_L;
+}
+
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
+
+//===----------------------------------------------------------------------===//
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
+//
+def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+ (i8 imm:$src3))))]>, VEX_4V, VEX_L;
+def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ (i8 imm:$src3)))]>, VEX_4V, VEX_L;
+
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// VINSERTI128 - Insert packed integer values
+//
+let neverHasSideEffects = 1 in {
+def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V, VEX_L;
+let mayLoad = 1 in
+def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V, VEX_L;
+}
+
+let Predicates = [HasAVX2] in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (iPTR imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTI128 - Extract packed integer values
+//
+def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ VEX, VEX_L;
+let neverHasSideEffects = 1, mayStore = 1 in
+def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX, VEX_L;
+
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v2i64 (VEXTRACTI128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v4i32 (VEXTRACTI128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v8i16 (VEXTRACTI128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
+ (v16i8 (VEXTRACTI128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
+//
+multiclass avx2_pmovmask<string OpcodeStr,
+ Intrinsic IntLd128, Intrinsic IntLd256,
+ Intrinsic IntSt128, Intrinsic IntSt256> {
+ def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
+ def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V, VEX_L;
+ def mr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
+}
+
+defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
+ int_x86_avx2_maskload_d,
+ int_x86_avx2_maskload_d_256,
+ int_x86_avx2_maskstore_d,
+ int_x86_avx2_maskstore_d_256>;
+defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
+ int_x86_avx2_maskload_q,
+ int_x86_avx2_maskload_q_256,
+ int_x86_avx2_maskstore_q,
+ int_x86_avx2_maskstore_q_256>, VEX_W;
+
+
+//===----------------------------------------------------------------------===//
+// Variable Bit Shifts
+//
+multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128, ValueType vt256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
+ VEX_4V;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1,
+ (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
+ VEX_4V;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
+ VEX_4V, VEX_L;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1,
+ (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
+ VEX_4V, VEX_L;
+}
+
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+
+//===----------------------------------------------------------------------===//
+// VGATHER - GATHER Operations
+multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
+ X86MemOperand memop128, X86MemOperand memop256> {
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb),
+ (ins VR128:$src1, memop128:$src2, VR128:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
+ []>, VEX_4VOp3;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb),
+ (ins RC256:$src1, memop256:$src2, RC256:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
+ []>, VEX_4VOp3, VEX_L;
+}
+
+let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSVM.td b/contrib/llvm/lib/Target/X86/X86InstrSVM.td
new file mode 100644
index 000000000000..757dcd0b5dcb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSVM.td
@@ -0,0 +1,62 @@
+//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD SVM instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVM instructions
+
+// 0F 01 D9
+def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
+
+// 0F 01 DC
+def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
+
+// 0F 01 DD
+def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
+
+// 0F 01 DE
+let Uses = [EAX] in
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|EAX}", []>, TB;
+
+// 0F 01 D8
+let Uses = [EAX] in
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins),
+ "vmrun\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins),
+ "vmrun\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DA
+let Uses = [EAX] in
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins),
+ "vmload\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins),
+ "vmload\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DB
+let Uses = [EAX] in
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins),
+ "vmsave\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins),
+ "vmsave\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DF
+let Uses = [EAX, ECX] in
+def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
+ "invlpga\t{%ecx, %eax|EAX, ECX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX, ECX] in
+def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
+ "invlpga\t{%ecx, %rax|RAX, ECX}", []>, TB, Requires<[In64BitMode]>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 000000000000..5b6298b541bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,978 @@
+//===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (shl GR8:$src1, CL))], IIC_SR>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (shl GR16:$src1, CL))], IIC_SR>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (shl GR32:$src1, CL))], IIC_SR>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
+ OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+let hasSideEffects = 0 in {
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", [], IIC_SR>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", [], IIC_SR>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", [], IIC_SR>;
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t$dst", [], IIC_SR>;
+} // hasSideEffects = 0
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst", SchedRW
+
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
+ OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
+}
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+
+// Shift by 1
+def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+} // SchedRW
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (srl GR8:$src1, CL))], IIC_SR>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (srl GR16:$src1, CL))], IIC_SR>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (srl GR32:$src1, CL))], IIC_SR>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
+
+// Shift right by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))], IIC_SR>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))], IIC_SR>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))], IIC_SR>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
+} // Constraints = "$src = $dst", SchedRW
+
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+let Uses = [CL] in {
+def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
+ OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
+}
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+
+// Shift by 1
+def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,OpSize;
+def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+} // SchedRW
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (sra GR8:$src1, CL))],
+ IIC_SR>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (sra GR16:$src1, CL))],
+ IIC_SR>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (sra GR32:$src1, CL))],
+ IIC_SR>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))],
+ IIC_SR>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))],
+ IIC_SR>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))],
+ IIC_SR>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
+ IIC_SR>;
+} // Constraints = "$src = $dst", SchedRW
+
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+let Uses = [CL] in {
+def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+}
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+
+// Shift by 1
+def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t$dst", [], IIC_SR>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t$dst", [], IIC_SR>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t$dst", [], IIC_SR>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t$dst", [], IIC_SR>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t$dst", [], IIC_SR>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t$dst", [], IIC_SR>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t$dst", [], IIC_SR>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t$dst", [], IIC_SR>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+} // Constraints = "$src = $dst"
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t$dst", [], IIC_SR>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t$dst", [], IIC_SR>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t$dst", [], IIC_SR>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t$dst", [], IIC_SR>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t$dst", [], IIC_SR>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t$dst", [], IIC_SR>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t$dst", [], IIC_SR>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t$dst", [], IIC_SR>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
+}
+} // SchedRW
+} // hasSideEffects = 0
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotl GR8:$src1, CL))], IIC_SR>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotl GR16:$src1, CL))], IIC_SR>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotl GR32:$src1, CL))], IIC_SR>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
+ OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))],
+ IIC_SR>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))],
+ IIC_SR>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
+ IIC_SR>;
+} // Constraints = "$src = $dst", SchedRW
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+let Uses = [CL] in {
+def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %cl}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+}
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+ "rol{b}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+ "rol{w}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+ "rol{l}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+ "rol{q}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
+
+// Rotate by 1
+def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+} // SchedRW
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotr GR8:$src1, CL))], IIC_SR>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotr GR16:$src1, CL))], IIC_SR>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotr GR32:$src1, CL))], IIC_SR>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
+ OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))],
+ IIC_SR>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))],
+ IIC_SR>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
+ IIC_SR>;
+} // Constraints = "$src = $dst", SchedRW
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+let Uses = [CL] in {
+def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
+}
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
+
+// Rotate by 1
+def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
+ OpSize;
+def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
+} // SchedRW
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
+ IIC_SHD16_REG_CL>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
+ IIC_SHD16_REG_CL>,
+ TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
+ IIC_SHD32_REG_CL>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
+ IIC_SHD32_REG_CL>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
+ IIC_SHD64_REG_CL>,
+ TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
+ IIC_SHD64_REG_CL>,
+ TB;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
+ TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
+ TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
+ TB;
+}
+} // Constraints = "$src = $dst", SchedRW
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)], IIC_SHD32_MEM_CL>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)], IIC_SHD32_MEM_CL>, TB;
+
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)], IIC_SHD64_MEM_CL>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)], IIC_SHD64_MEM_CL>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD16_MEM_IM>,
+ TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD16_MEM_IM>,
+ TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD32_MEM_IM>,
+ TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD32_MEM_IM>,
+ TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD64_MEM_IM>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD64_MEM_IM>,
+ TB;
+} // SchedRW
+
+} // Defs = [EFLAGS]
+
+def ROT32L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
+ return getI8Imm(32 - N->getZExtValue());
+}]>;
+
+def ROT64L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
+ return getI8Imm(64 - N->getZExtValue());
+}]>;
+
+multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TAXD, VEX, Sched<[WriteShift]>;
+ let mayLoad = 1 in
+ def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TAXD, VEX, Sched<[WriteShiftLd]>;
+}
+}
+
+multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX_4VOp3, Sched<[WriteShift]>;
+ let mayLoad = 1 in
+ def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX_4VOp3,
+ Sched<[WriteShiftLd,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src1
+ ReadAfterLd]>;
+}
+}
+
+let Predicates = [HasBMI2] in {
+ defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
+ defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, VEX_W;
+ defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W;
+ defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
+ defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+
+ // Prefer RORX which is non-destructive and doesn't update EFLAGS.
+ let AddedComplexity = 10 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
+ // immedidate shift, i.e. the following code is considered better
+ //
+ // mov %edi, %esi
+ // shl $imm, %esi
+ // ... %edi, ...
+ //
+ // than
+ //
+ // movb $imm, %sil
+ // shlx %sil, %edi, %esi
+ // ... %edi, ...
+ //
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
+ //
+ // mov (%ecx), %esi
+ // shl $imm, $esi
+ //
+ // over
+ //
+ // movb $imm %al
+ // shlx %al, (%ecx), %esi
+ //
+ // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
+ // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 000000000000..053417ccde63
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,522 @@
+//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes. These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedRW = [WriteSystem] in {
+let Defs = [RAX, RDX] in
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
+ TB;
+
+let Defs = [RAX, RCX, RDX] in
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+ def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", [], IIC_HLT>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", [], IIC_RSM>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
+
+def : Pat<(debugtrap),
+ (INT3)>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+let SchedRW = [WriteSystem] in {
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)], IIC_INT>;
+
+
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", [], IIC_SYSCALL>, TB;
+def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", [], IIC_SYSCALL>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", [], IIC_SYSCALL>, TB,
+ Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", [],
+ IIC_SYS_ENTER_EXIT>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [],
+ IIC_SYS_ENTER_EXIT>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
+ Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions.
+//
+let SchedRW = [WriteSystem] in {
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|AX, DX}", [], IIC_IN_RR>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|EAX, DX}", [], IIC_IN_RR>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+ "in{b}\t{$port, %al|AL, $port}", [], IIC_IN_RI>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{w}\t{$port, %ax|AX, $port}", [], IIC_IN_RI>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{l}\t{$port, %eax|EAX, $port}", [], IIC_IN_RI>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|DX, AL}", [], IIC_OUT_RR>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|DX, AX}", [], IIC_OUT_RR>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|DX, EAX}", [], IIC_OUT_RR>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+ "out{b}\t{%al, $port|$port, AL}", [], IIC_OUT_IR>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{w}\t{%ax, $port|$port, AX}", [], IIC_OUT_IR>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{l}\t{%eax, $port|$port, EAX}", [], IIC_OUT_IR>;
+
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+let SchedRW = [WriteMove] in {
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+let SchedRW = [WriteSystem] in {
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr",
+ [], IIC_INVLPG>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t$dst", [], IIC_STR>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t$dst", [], IIC_STR>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t$dst", [], IIC_STR>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t$dst", [], IIC_STR>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+ "ltr{w}\t$src", [], IIC_LTR>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+ "ltr{w}\t$src", [], IIC_LTR>, TB;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+ "push{w}\t{%cs|CS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+ "push{l}\t{%cs|CS}", [], IIC_PUSH_CS>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+ "push{w}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+ "push{l}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+ "push{w}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+ "push{l}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+ "push{w}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+ "push{l}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
+
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+ "push{w}\t{%fs|FS}", [], IIC_PUSH_SR>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+ "push{l}\t{%fs|FS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+ "push{w}\t{%gs|GS}", [], IIC_PUSH_SR>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+ "push{l}\t{%gs|GS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+ "push{q}\t{%fs|FS}", [], IIC_PUSH_SR>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+ "push{q}\t{%gs|GS}", [], IIC_PUSH_SR>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+ "pop{w}\t{%ss|SS}", [], IIC_POP_SR_SS>,
+ OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+ "pop{l}\t{%ss|SS}", [], IIC_POP_SR_SS>,
+ Requires<[In32BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{w}\t{%ds|DS}", [], IIC_POP_SR>,
+ OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{l}\t{%ds|DS}", [], IIC_POP_SR>,
+ Requires<[In32BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+ "pop{w}\t{%es|ES}", [], IIC_POP_SR>,
+ OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+ "pop{l}\t{%es|ES}", [], IIC_POP_SR>,
+ Requires<[In32BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{w}\t{%fs|FS}", [], IIC_POP_SR>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{l}\t{%fs|FS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{q}\t{%fs|FS}", [], IIC_POP_SR>, TB;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{w}\t{%gs|GS}", [], IIC_POP_SR>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{l}\t{%gs|GS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{q}\t{%gs|GS}", [], IIC_POP_SR>, TB;
+
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+ "verr\t$seg", [], IIC_VERR>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", [], IIC_VERR>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+ "verw\t$seg", [], IIC_VERW_MEM>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+ "verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+let SchedRW = [WriteSystem] in {
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt\t$dst", [], IIC_SGDT>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+ "sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{w}\t$dst", [], IIC_SLDT>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+ "sldt{l}\t$dst", [], IIC_SLDT>, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+// extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+ "sldt{q}\t$dst", [], IIC_SLDT>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{q}\t$dst", [], IIC_SLDT>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt\t$src", [], IIC_LGDT>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt\t$src", [], IIC_LIDT>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+ "lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+ "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+let SchedRW = [WriteSystem] in {
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+ "smsw{w}\t$dst", [], IIC_SMSW>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+ "smsw{l}\t$dst", [], IIC_SMSW>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+ "smsw{q}\t$dst", [], IIC_SMSW>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+ "smsw{w}\t$dst", [], IIC_SMSW>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+ "lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+ "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
+
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+let SchedRW = [WriteSystem] in {
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
+let SchedRW = [WriteSystem] in {
+let Defs = [RDX, RAX], Uses = [RCX] in
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+ def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+let Uses = [RDX, RAX] in {
+ def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsave\t$dst", []>, TB;
+ def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstor\t$dst", []>, TB;
+ def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveopt\t$dst", []>, TB;
+ def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+ def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+def : InstAlias<"xstorerng", (XSTORE)>;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+ def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+ def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+ def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+ def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+ def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+ def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+ def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+ def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [HasFSGSBase, In64BitMode] in {
+ def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+ "rdfsbase{l}\t$dst",
+ [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS;
+ def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+ "rdfsbase{q}\t$dst",
+ [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS;
+ def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+ "rdgsbase{l}\t$dst",
+ [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS;
+ def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+ "rdgsbase{q}\t$dst",
+ [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS;
+ def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
+ "wrfsbase{l}\t$src",
+ [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS;
+ def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
+ "wrfsbase{q}\t$src",
+ [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS;
+ def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
+ "wrgsbase{l}\t$src",
+ [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS;
+ def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
+ "wrgsbase{q}\t$src",
+ [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// INVPCID Instruction
+def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
+def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm/lib/Target/X86/X86InstrTSX.td
new file mode 100644
index 000000000000..363a190aa854
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrTSX.td
@@ -0,0 +1,39 @@
+//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel TSX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TSX instructions
+
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+let usesCustomInserter = 1 in
+def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
+ "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
+ Requires<[HasRTM]>;
+
+let isBranch = 1, isTerminator = 1, Defs = [EAX] in
+def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
+ "xbegin\t$dst", []>, Requires<[HasRTM]>;
+
+def XEND : I<0x01, MRM_D5, (outs), (ins),
+ "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+
+def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
+ "xabort\t$imm",
+ [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 000000000000..6d3548f09398
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,66 @@
+//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
+// 66 0F 38 81
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmclear\t$vmcs", []>, OpSize, TB;
+// OF 01 D4
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+ "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+ "vmxon\t$vmxon", []>, XS;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 000000000000..2aa08fad7836
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,311 @@
+//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
+}
+
+// Scalar load 2 addr operand instructions
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ Operand memop, ComplexPattern mem_cpat> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+ ssmem, sse_load_f32>;
+ defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+ sdmem, sse_load_f64>;
+}
+
+multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ PatFrag memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+ defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ PatFrag memop> {
+ def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
+ def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
+ memopv8f32>;
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
+ memopv4f64>;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+ VEX_4V, VEX_W;
+ def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
+ VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+ defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+ defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+ defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+ defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+ defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+ defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+ defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+ defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+ defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+ defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, imm:$src2))]>, VEX;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
+ defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
+ defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+ defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+ defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+ defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+ defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+ defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+ defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+ defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+ defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+ defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+ defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+ defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ VEX_4V;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ imm:$src3))]>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
+ defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
+ defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
+ defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
+ defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
+ defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
+ defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
+ defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))))]>,
+ VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+ def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+ defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
+ VEX_4V, VEX_I8IMM, VEX_L;
+ def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int VR256:$src1, VR256:$src2,
+ (bitconvert (memopv4i64 addr:$src3))))]>,
+ VEX_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
+ def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
+ VR256:$src3))]>,
+ VEX_4V, VEX_I8IMM, VEX_L;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
+ Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
+ def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
+ def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4;
+ def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
+ def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
+ def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4, VEX_L;
+ def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,
+ VEX_L;
+}
+
+defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
+ int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>;
+defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
+ int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 000000000000..44d8cce05413
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,581 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Valgrind.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+ // X86 doesn't need to invalidate the processor cache, so just invalidate
+ // Valgrind's cache directly.
+ sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+// need to add some workarounds for compilers, which support
+// only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ // Save RBP
+ "pushq %rbp\n"
+ CFI(".cfi_def_cfa_offset 16\n")
+ CFI(".cfi_offset %rbp, -16\n")
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ CFI(".cfi_def_cfa_register %rbp\n")
+ // Save all int arg registers
+ "pushq %rdi\n"
+ CFI(".cfi_rel_offset %rdi, 0\n")
+ "pushq %rsi\n"
+ CFI(".cfi_rel_offset %rsi, 8\n")
+ "pushq %rdx\n"
+ CFI(".cfi_rel_offset %rdx, 16\n")
+ "pushq %rcx\n"
+ CFI(".cfi_rel_offset %rcx, 24\n")
+ "pushq %r8\n"
+ CFI(".cfi_rel_offset %r8, 32\n")
+ "pushq %r9\n"
+ CFI(".cfi_rel_offset %r9, 40\n")
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
+#endif
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ CFI(".cfi_def_cfa_register %rsp\n")
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ CFI(".cfi_adjust_cfa_offset 48\n")
+ "popq %r9\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r9\n")
+ "popq %r8\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r8\n")
+ "popq %rcx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rcx\n")
+ "popq %rdx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdx\n")
+ "popq %rsi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rsi\n")
+ "popq %rdi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdi\n")
+ // Restore RBP
+ "popq %rbp\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rbp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+# else
+ // No inline assembler support on this platform. The routine is in external
+ // file.
+ void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+# if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+# endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+# else
+ void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push ebp
+ mov ebp, esp
+ push eax
+ push edx
+ push ecx
+ and esp, -16
+ sub esp, 16
+ mov eax, dword ptr [ebp+4]
+ mov dword ptr [esp+4], eax
+ mov dword ptr [esp], ebp
+ call LLVMX86CompilationCallback2
+ mov esp, ebp
+ sub esp, 12
+ pop ecx
+ pop edx
+ pop eax
+ pop ebp
+ ret
+ }
+ }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
+ }
+#endif
+}
+
+/// This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" {
+LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
+ intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ // We are reading raw stack data here. Tell MemorySanitizer that it is
+ // sufficiently initialized.
+ __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
+
+ // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#if defined (X86_64_JIT)
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+ // If the target address is within 32-bit range of the stub, use a
+ // PC-relative branch instead of loading the actual address. (This is
+ // considerably shorter than the 64-bit immediate load already there.)
+ // We assume here intptr_t is 64 bits.
+ intptr_t diff = NewVal-RetAddr+7;
+ if (diff >= -2147483648LL && diff <= 2147483647LL) {
+ *(unsigned char*)(RetAddr-0xc) = 0xE9;
+ *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+ } else {
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+ }
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ TsanIgnoreWritesBegin();
+ JITCompilerFunction = F;
+ TsanIgnoreWritesEnd();
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ if (Subtarget->hasSSE1())
+ return X86CompilationCallback_SSE;
+#endif
+
+ return X86CompilationCallback;
+}
+
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ useGOT = 0;
+ TLSOffset = 0;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+ const unsigned Alignment = 8;
+ uint8_t Buffer[8];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
+#else
+ const unsigned Alignment = 4;
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
+#endif
+ return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
+}
+
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+ // The 64-bit stub contains:
+ // movabs r10 <- 8-byte-target-address # 10 bytes
+ // call|jmp *r10 # 3 bytes
+ // The 32-bit stub contains a 5-byte call|jmp.
+ // If the stub is a call to the compilation callback, an extra byte is added
+ // to mark it as a stub.
+ StubLayout Result = {14, 4};
+ return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+ Target != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ JCE.emitAlignment(4);
+ void *Result = (void*)JCE.getCurrentPCValue();
+ if (NotCC) {
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // jmpq *r10
+ JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+ return Result;
+ }
+
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // callq *r10
+ JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+
+ // This used to use 0xCD, but that value is used by JITMemoryManager to
+ // initialize the buffer with garbage, which means it may follow a
+ // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929.
+ JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
+ return Result;
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+ return BB - Entry;
+#else
+ return BB - PICBase;
+#endif
+}
+
+template<typename T> static void addUnaligned(void *Pos, T Delta) {
+ T Value;
+ std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
+ sizeof(T));
+ Value += Delta;
+ std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
+ sizeof(T));
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
+ break;
+ }
+ case X86::reloc_absolute_word:
+ case X86::reloc_absolute_word_sext:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
+ break;
+ case X86::reloc_absolute_dword:
+ addUnaligned<intptr_t>(RelocPos, ResultPtr);
+ break;
+ }
+ }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+ TLSOffset -= size;
+ return TLSOffset;
+#else
+ llvm_unreachable("Cannot allocate thread local storage on this arch!");
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.h b/contrib/llvm/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 000000000000..f916327378a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,81 @@
+//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+ class X86Subtarget;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ const X86Subtarget *Subtarget;
+ uintptr_t PICBase;
+ char* TLSOffset;
+ public:
+ explicit X86JITInfo(X86TargetMachine &tm);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on X86.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE);
+
+ /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+ /// specific basic block.
+ virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// allocateThreadLocalMemory - Each target has its own way of
+ /// handling thread local variables. This method returns a value only
+ /// meaningful to the target.
+ virtual char* allocateThreadLocalMemory(size_t size);
+
+ /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+ /// PIC jumptable entry.
+ void setPICBase(uintptr_t Base) { PICBase = Base; }
+ uintptr_t getPICBase() const { return PICBase; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
new file mode 100644
index 000000000000..a8a9fd8accde
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -0,0 +1,794 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+namespace {
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
+ X86AsmPrinter &AsmPrinter;
+public:
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+} // end anonymous namespace
+
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
+MCSymbol *X86MCInstLower::
+GetSymbolFromOperand(const MachineOperand &MO) const {
+ assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
+
+ SmallString<128> Name;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ isImplicitlyPrivate = true;
+
+ Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ } else if (MO.isSymbol()) {
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else if (MO.isMBB()) {
+ Name += MO.getMBB()->getSymbol()->getName();
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ switch (MO.getTargetFlags()) {
+ default: break;
+ case X86II::MO_DLLIMPORT: {
+ // Handle dllimport linkage.
+ const char *Prefix = "__imp_";
+ Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+ break;
+ }
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_STUB: {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = 0;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ // These affect the name of the symbol, not any suffix.
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ break;
+
+ case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+ Ctx),
+ Ctx);
+ break;
+ case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
+ case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+ case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break;
+ case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
+ case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+ case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+ case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+ case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
+ case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+ case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
+ case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+ case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
+ case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+ case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+ Ctx);
+ if (MO.isJTI() && MAI.hasSetDirective()) {
+ // If .set directive is supported, use it to reduce the number of
+ // relocations the assembler will generate for differences between
+ // local labels. This is only safe when the symbols are in the same
+ // section so we are restricting it to jumptable references.
+ MCSymbol *Label = Ctx.CreateTempSymbol();
+ AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+ Expr = MCSymbolRefExpr::Create(Label, Ctx);
+ }
+ break;
+ }
+
+ if (Expr == 0)
+ Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg32.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (Reg != 0)
+ MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg64.
+ for (unsigned i = 0; i != 4; ++i) {
+ if (!MI->getOperand(OpNo+i).isReg()) continue;
+
+ unsigned Reg = MI->getOperand(OpNo+i).getReg();
+ // LEAs can use RIP-relative addressing, and RIP has no sub/super register.
+ if (Reg == 0 || Reg == X86::RIP) continue;
+
+ MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+ }
+}
+
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ OutMI.addOperand(OutMI.getOperand(0));
+ OutMI.addOperand(OutMI.getOperand(0));
+}
+
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() &&
+ (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (Inst.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+
+ if (Absolute &&
+ (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1))
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO,
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+
+ // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
+ switch (OutMI.getOpcode()) {
+ case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+ lower_lea64_32mem(&OutMI, 1);
+ // FALL THROUGH.
+ case X86::LEA64r:
+ case X86::LEA16r:
+ case X86::LEA32r:
+ // LEA should have a segment register, but it must be empty.
+ assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+ "Unexpected # of LEA operands");
+ assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+ "LEA has segment specified!");
+ break;
+ case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+ case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+ case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+ case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+ case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+ case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+ case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+ case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+ case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+
+ case X86::MOV16r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+ case X86::MOV64r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
+ // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
+ // inputs modeled as normal uses instead of implicit uses. As such, truncate
+ // off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+ }
+
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
+ // The assembler backend wants to see branches in their small form and relax
+ // them to their large form. The JIT can only handle the large form because
+ // it does not do relaxation. For now, translate the large form to the
+ // small one here.
+ case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+ case X86::JO_4: OutMI.setOpcode(X86::JO_1); break;
+ case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+ case X86::JB_4: OutMI.setOpcode(X86::JB_1); break;
+ case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+ case X86::JE_4: OutMI.setOpcode(X86::JE_1); break;
+ case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+ case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+ case X86::JA_4: OutMI.setOpcode(X86::JA_1); break;
+ case X86::JS_4: OutMI.setOpcode(X86::JS_1); break;
+ case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+ case X86::JP_4: OutMI.setOpcode(X86::JP_1); break;
+ case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+ case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;
+ case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+ case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+ case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // Atomic load and store require a separate pseudo-inst because Acquire
+ // implies mayStore and Release implies mayLoad; fix these to regular MOV
+ // instructions here
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
+
+ case X86::MORESTACK_RET:
+ OutMI.setOpcode(X86::RET);
+ break;
+
+ case X86::MORESTACK_RET_RESTORE_R10:
+ OutMI.setOpcode(X86::MOV64rr);
+ OutMI.addOperand(MCOperand::CreateReg(X86::R10));
+ OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
+
+ AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
+ break;
+ }
+}
+
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
+ MI.getOpcode() == X86::TLS_base_addr64;
+
+ bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
+
+ MCContext &context = OutStreamer.getContext();
+
+ if (needsPadding)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+
+ MCSymbolRefExpr::VariantKind SRVK;
+ switch (MI.getOpcode()) {
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSGD;
+ break;
+ case X86::TLS_base_addr32:
+ SRVK = MCSymbolRefExpr::VK_TLSLDM;
+ break;
+ case X86::TLS_base_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSLD;
+ break;
+ default:
+ llvm_unreachable("unexpected opcode");
+ }
+
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (needsPadding) {
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX));
+ }
+
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+ : X86::CALLpcrel32)
+ .addExpr(tlsRef));
+}
+
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ std::string TmpStr;
+ raw_string_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ // Lower these as normal, but add some comments.
+ OutStreamer.AddComment("TAILCALL");
+ break;
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
+ case X86::MOVPC32r: {
+ // This is a pseudo op for a two instruction sequence with a label, which
+ // looks like:
+ // call "L1$pb"
+ // "L1$pb":
+ // popl %esi
+
+ // Emit the call.
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::CALLpcrel32)
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+
+ // popl $reg
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::POP32r)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+
+ case X86::ADD32ri: {
+ // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+ if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ break;
+
+ // Okay, we have something like:
+ // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+
+ // For this, we want to print something like:
+ // MYGLOBAL + (. - PICBASE)
+ // However, we can't generate a ".", so just emit a new label here and refer
+ // to it.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+
+ // Now that we have emitted the label, lower the complex operand expression.
+ MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *PICBase =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr, OutContext);
+
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::ADD32ri)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(DotExpr));
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..568dc222d9d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void X86MachineFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 000000000000..78d20ce870f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,145 @@
+//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+ /// to the space used by the return address).
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// TailCallReturnAddrDelta - The number of bytes by which return address
+ /// stack slot is moved as the result of tail call optimization.
+ int TailCallReturnAddrDelta;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// RegSaveFrameIndex - X86-64 vararg func register save area.
+ int RegSaveFrameIndex;
+ /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+ unsigned VarArgsGPOffset;
+ /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+ unsigned VarArgsFPOffset;
+ /// ArgumentStackSize - The number of bytes on stack consumed by the arguments
+ /// being passed on the stack.
+ unsigned ArgumentStackSize;
+ /// NumLocalDynamics - Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF)
+ : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+ void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+ int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+ void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+ unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+ void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+ unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+ void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
new file mode 100644
index 000000000000..83e75ea994ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -0,0 +1,212 @@
+//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will pad short functions to prevent
+// a stall if a function returns before the return address is ready. This
+// is needed for some Intel Atom processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+
+#define DEBUG_TYPE "x86-pad-short-functions"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumBBsPadded, "Number of basic blocks padded");
+
+namespace {
+ struct VisitedBBInfo {
+ // HasReturn - Whether the BB contains a return instruction
+ bool HasReturn;
+
+ // Cycles - Number of cycles until return if HasReturn is true, otherwise
+ // number of cycles until end of the BB
+ unsigned int Cycles;
+
+ VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+ VisitedBBInfo(bool HasReturn, unsigned int Cycles)
+ : HasReturn(HasReturn), Cycles(Cycles) {}
+ };
+
+ struct PadShortFunc : public MachineFunctionPass {
+ static char ID;
+ PadShortFunc() : MachineFunctionPass(ID)
+ , Threshold(4), TM(0), TII(0) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Atom pad short functions";
+ }
+
+ private:
+ void findReturns(MachineBasicBlock *MBB,
+ unsigned int Cycles = 0);
+
+ bool cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles);
+
+ void addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd);
+
+ const unsigned int Threshold;
+
+ // ReturnBBs - Maps basic blocks that return to the minimum number of
+ // cycles until the return, starting from the entry block.
+ DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
+
+ // VisitedBBs - Cache of previously visited BBs.
+ DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
+
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ };
+
+ char PadShortFunc::ID = 0;
+}
+
+FunctionPass *llvm::createX86PadShortFunctions() {
+ return new PadShortFunc();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// NOOP instructions before early exits.
+bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
+ const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize)) {
+ return false;
+ }
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+
+ // Search through basic blocks and mark the ones that have early returns
+ ReturnBBs.clear();
+ VisitedBBs.clear();
+ findReturns(MF.begin());
+
+ bool MadeChange = false;
+
+ MachineBasicBlock *MBB;
+ unsigned int Cycles = 0;
+
+ // Pad the identified basic blocks with NOOPs
+ for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
+ I != ReturnBBs.end(); ++I) {
+ MBB = I->first;
+ Cycles = I->second;
+
+ if (Cycles < Threshold) {
+ // BB ends in a return. Skip over any DBG_VALUE instructions
+ // trailing the terminator.
+ assert(MBB->size() > 0 &&
+ "Basic block should contain at least a RET but is empty");
+ MachineBasicBlock::iterator ReturnLoc = --MBB->end();
+
+ while (ReturnLoc->isDebugValue())
+ --ReturnLoc;
+ assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
+ "Basic block does not end with RET");
+
+ addPadding(MBB, ReturnLoc, Threshold - Cycles);
+ NumBBsPadded++;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// findReturn - Starting at MBB, follow control flow and add all
+/// basic blocks that contain a return to ReturnBBs.
+void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
+ // If this BB has a return, note how many cycles it takes to get there.
+ bool hasReturn = cyclesUntilReturn(MBB, Cycles);
+ if (Cycles >= Threshold)
+ return;
+
+ if (hasReturn) {
+ ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
+ return;
+ }
+
+ // Follow branches in BB and look for returns
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
+ I != MBB->succ_end(); ++I) {
+ if (*I == MBB)
+ continue;
+ findReturns(*I, Cycles);
+ }
+}
+
+/// cyclesUntilReturn - return true if the MBB has a return instruction,
+/// and return false otherwise.
+/// Cycles will be incremented by the number of cycles taken to reach the
+/// return or the end of the BB, whichever occurs first.
+bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles) {
+ // Return cached result if BB was previously visited
+ DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
+ = VisitedBBs.find(MBB);
+ if (it != VisitedBBs.end()) {
+ VisitedBBInfo BBInfo = it->second;
+ Cycles += BBInfo.Cycles;
+ return BBInfo.HasReturn;
+ }
+
+ unsigned int CyclesToEnd = 0;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin();
+ MBBI != MBB->end(); ++MBBI) {
+ MachineInstr *MI = MBBI;
+ // Mark basic blocks with a return instruction. Calls to other
+ // functions do not count because the called function will be padded,
+ // if necessary.
+ if (MI->isReturn() && !MI->isCall()) {
+ VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return true;
+ }
+
+ CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ }
+
+ VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return false;
+}
+
+/// addPadding - Add the given number of NOOP instructions to the function
+/// just prior to the return at MBBI
+void PadShortFunc::addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd) {
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ while (NOOPsToAdd-- > 0) {
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 000000000000..16886e432d19
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,691 @@
+//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86RegisterInfo.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
+using namespace llvm;
+
+cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP)),
+ TM(tm), TII(tii) {
+ X86_MC::InitLLVM2SEHRegisterMapping(this);
+
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ IsWin64 = Subtarget->isTargetWin64();
+
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+ // Use a callee-saved register as the base pointer. These registers must
+ // not conflict with any ABI requirements. For example, in 32-bit mode PIC
+ // requires GOT in the EBX register before function calls via PLT GOT pointer.
+ BasePtr = Is64Bit ? X86::RBX : X86::ESI;
+}
+
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+ switch (getLLVMRegNum(RegNum, isEH)) {
+ case X86::EBX: case X86::RBX: return 1;
+ case X86::ECX: case X86::R12: return 2;
+ case X86::EDX: case X86::R13: return 3;
+ case X86::EDI: case X86::R14: return 4;
+ case X86::ESI: case X86::R15: return 5;
+ case X86::EBP: case X86::RBP: return 6;
+ }
+
+ return -1;
+}
+
+bool
+X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ // Only enable when post-RA scheduling is enabled and this is needed.
+ return TM.getSubtargetImpl()->postRAScheduler();
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+ return getEncodingValue(i);
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
+ unsigned Idx) const {
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ // It behaves just like the sub_8bit_hi index.
+ if (!Is64Bit && Idx == X86::sub_8bit)
+ Idx = X86::sub_8bit_hi;
+
+ // Forward to TableGen's default version.
+ return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ if (!Is64Bit && SubIdx == X86::sub_8bit) {
+ A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
+ if (!A)
+ return 0;
+ }
+ return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
+}
+
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ // Don't allow super-classes of GR8_NOREX. This class is only used after
+ // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // to the full GR8 register class in 64-bit mode, so we cannot allow the
+ // reigster class inflation.
+ //
+ // The GR8_NOREX class is always used in a way that won't be constrained to a
+ // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
+ // full GR8 class.
+ if (RC == &X86::GR8_NOREXRegClass)
+ return RC;
+
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+ case 0: // Normal GPRs.
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64RegClass;
+ return &X86::GR32RegClass;
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64_NOSPRegClass;
+ return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (Subtarget.isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ else if (Subtarget.is64Bit())
+ return &X86::GR64_TCRegClass;
+
+ const Function *F = MF.getFunction();
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
+ return &X86::GR32_TCRegClass;
+ }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &X86::CCRRegClass) {
+ if (Is64Bit)
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+ }
+ return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 12 - FPDiff;
+ case X86::VR128RegClassID:
+ return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
+const uint16_t *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ switch (MF->getFunction()->getCallingConv()) {
+ case CallingConv::GHC:
+ case CallingConv::HiPE:
+ return CSR_NoRegs_SaveList;
+
+ case CallingConv::Intel_OCL_BI: {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ if (HasAVX && IsWin64)
+ return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+ if (HasAVX && Is64Bit)
+ return CSR_64_Intel_OCL_BI_AVX_SaveList;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_SaveList;
+ break;
+ }
+
+ case CallingConv::Cold:
+ if (Is64Bit)
+ return CSR_MostRegs_64_SaveList;
+ break;
+
+ default:
+ break;
+ }
+
+ bool CallsEHReturn = MF->getMMI().callsEHReturn();
+ if (Is64Bit) {
+ if (IsWin64)
+ return CSR_Win64_SaveList;
+ if (CallsEHReturn)
+ return CSR_64EHRet_SaveList;
+ return CSR_64_SaveList;
+ }
+ if (CallsEHReturn)
+ return CSR_32EHRet_SaveList;
+ return CSR_32_SaveList;
+}
+
+const uint32_t*
+X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+
+ if (CC == CallingConv::Intel_OCL_BI) {
+ if (IsWin64 && HasAVX)
+ return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
+ if (Is64Bit && HasAVX)
+ return CSR_64_Intel_OCL_BI_AVX_RegMask;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_RegMask;
+ }
+ if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
+ return CSR_NoRegs_RegMask;
+ if (!Is64Bit)
+ return CSR_32_RegMask;
+ if (CC == CallingConv::Cold)
+ return CSR_MostRegs_64_RegMask;
+ if (IsWin64)
+ return CSR_Win64_RegMask;
+ return CSR_64_RegMask;
+}
+
+const uint32_t*
+X86RegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Set the stack-pointer register and its aliases as reserved.
+ Reserved.set(X86::RSP);
+ for (MCSubRegIterator I(X86::RSP, this); I.isValid(); ++I)
+ Reserved.set(*I);
+
+ // Set the instruction pointer register and its aliases as reserved.
+ Reserved.set(X86::RIP);
+ for (MCSubRegIterator I(X86::RIP, this); I.isValid(); ++I)
+ Reserved.set(*I);
+
+ // Set the frame-pointer register and its aliases as reserved if needed.
+ if (TFI->hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ for (MCSubRegIterator I(X86::RBP, this); I.isValid(); ++I)
+ Reserved.set(*I);
+ }
+
+ // Set the base-pointer register and its aliases as reserved if needed.
+ if (hasBasePointer(MF)) {
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ const uint32_t* RegMask = getCallPreservedMask(CC);
+ if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported with"
+ "this calling convention.");
+
+ Reserved.set(getBaseRegister());
+ for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
+ Reserved.set(*I);
+ }
+
+ // Mark the segment registers as reserved.
+ Reserved.set(X86::CS);
+ Reserved.set(X86::SS);
+ Reserved.set(X86::DS);
+ Reserved.set(X86::ES);
+ Reserved.set(X86::FS);
+ Reserved.set(X86::GS);
+
+ // Mark the floating point stack registers as reserved.
+ Reserved.set(X86::ST0);
+ Reserved.set(X86::ST1);
+ Reserved.set(X86::ST2);
+ Reserved.set(X86::ST3);
+ Reserved.set(X86::ST4);
+ Reserved.set(X86::ST5);
+ Reserved.set(X86::ST6);
+ Reserved.set(X86::ST7);
+
+ // Reserve the registers that only exist in 64-bit mode.
+ if (!Is64Bit) {
+ // These 8-bit registers are part of the x86-64 extension even though their
+ // super-registers are old 32-bits.
+ Reserved.set(X86::SIL);
+ Reserved.set(X86::DIL);
+ Reserved.set(X86::BPL);
+ Reserved.set(X86::SPL);
+
+ for (unsigned n = 0; n != 8; ++n) {
+ // R8, R9, ...
+ static const uint16_t GPR64[] = {
+ X86::R8, X86::R9, X86::R10, X86::R11,
+ X86::R12, X86::R13, X86::R14, X86::R15
+ };
+ for (MCRegAliasIterator AI(GPR64[n], this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+
+ // XMM8, XMM9, ...
+ assert(X86::XMM15 == X86::XMM8+7);
+ for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+ }
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ if (!EnableBasePointer)
+ return false;
+
+ // When we need stack realignment and there are dynamic allocas, we can't
+ // reference off of the stack pointer, so we reserve a base pointer.
+ //
+ // This is also true if the function contain MS-style inline assembly. We
+ // do this because if any stack changes occur in the inline assembly, e.g.,
+ // "pusha", then any C local variable or C argument references in the
+ // inline assembly will be wrong because the SP is not properly tracked.
+ if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) ||
+ MF.hasMSInlineAsm())
+ return true;
+
+ return false;
+}
+
+bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(FramePtr))
+ return false;
+
+ // If a base pointer is necessary. Check that it isn't too late to reserve
+ // it.
+ if (MFI->hasVarSizedObjects())
+ return MRI->canReserveReg(BasePtr);
+ return true;
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
+ FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+ return true;
+ }
+ return false;
+}
+
+void
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ unsigned BasePtr;
+
+ unsigned Opc = MI.getOpcode();
+ bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+ if (hasBasePointer(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
+ else if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else if (AfterFPPop)
+ BasePtr = StackPtr;
+ else
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ int FIOffset;
+ if (AfterFPPop) {
+ // Tail call jmp happens after FP is popped.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+ } else
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
+ if (MI.getOperand(FIOperandNum+3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
+ int Offset = FIOffset + Imm;
+ assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+ "Requesting 64-bit offset in 32-bit immediate!");
+ MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset = FIOffset +
+ (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
+ MI.getOperand(FIOperandNum + 3).setOffset(Offset);
+ }
+}
+
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
+ switch (VT) {
+ default: llvm_unreachable("Unexpected VT");
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return getX86SubSuperRegister(Reg, MVT::i64);
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+}
+}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 000000000000..b9d7b8cf8b9a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,143 @@
+//===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ ///
+ bool Is64Bit;
+
+ /// IsWin64 - Is the target on of win64 flavours
+ ///
+ bool IsWin64;
+
+ /// SlotSize - Stack slot size in bytes.
+ ///
+ unsigned SlotSize;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ ///
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ ///
+ unsigned FramePtr;
+
+ /// BasePtr - X86 physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const;
+
+ /// getCompactUnwindRegNum - This function maps the register to the number for
+ /// compact unwind encoding. Return -1 if the register isn't valid.
+ int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
+ /// Code Generation virtual methods...
+ ///
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ virtual const TargetRegisterClass *
+ getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+ /// values.
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns NULL if it is possible to copy
+ /// between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getStackRegister() const { return StackPtr; }
+ unsigned getBaseRegister() const { return BasePtr; }
+ // FIXME: Move to FrameInfok
+ unsigned getSlotSize() const { return SlotSize; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 000000000000..be6282a643bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,423 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
+ let Namespace = "X86";
+ let HWEncoding = Enc;
+ let SubRegs = subregs;
+}
+
+// Subregister indices.
+let Namespace = "X86" in {
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+}
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+
+// In the register alias definitions below, we define which registers alias
+// which others. We only specify which registers the small registers alias,
+// because the register file generator is smart enough to figure out that
+// AL aliases AX if we tell it that AX aliased AL (for example).
+
+// Dwarf numbering is different for 32-bit and 64-bit, and there are
+// variations by target as well. Currently the first entry is for X86-64,
+// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+// and debug information on X86-32/Darwin)
+
+// 8-bit registers
+// Low registers
+def AL : X86Reg<"al", 0>;
+def DL : X86Reg<"dl", 2>;
+def CL : X86Reg<"cl", 1>;
+def BL : X86Reg<"bl", 3>;
+
+// High registers. On x86-64, these cannot be used in any instruction
+// with a REX prefix.
+def AH : X86Reg<"ah", 4>;
+def DH : X86Reg<"dh", 6>;
+def CH : X86Reg<"ch", 5>;
+def BH : X86Reg<"bh", 7>;
+
+// X86-64 only, requires REX.
+let CostPerUse = 1 in {
+def SIL : X86Reg<"sil", 6>;
+def DIL : X86Reg<"dil", 7>;
+def BPL : X86Reg<"bpl", 5>;
+def SPL : X86Reg<"spl", 4>;
+def R8B : X86Reg<"r8b", 8>;
+def R9B : X86Reg<"r9b", 9>;
+def R10B : X86Reg<"r10b", 10>;
+def R11B : X86Reg<"r11b", 11>;
+def R12B : X86Reg<"r12b", 12>;
+def R13B : X86Reg<"r13b", 13>;
+def R14B : X86Reg<"r14b", 14>;
+def R15B : X86Reg<"r15b", 15>;
+}
+
+// 16-bit registers
+let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
+def AX : X86Reg<"ax", 0, [AL,AH]>;
+def DX : X86Reg<"dx", 2, [DL,DH]>;
+def CX : X86Reg<"cx", 1, [CL,CH]>;
+def BX : X86Reg<"bx", 3, [BL,BH]>;
+}
+let SubRegIndices = [sub_8bit] in {
+def SI : X86Reg<"si", 6, [SIL]>;
+def DI : X86Reg<"di", 7, [DIL]>;
+def BP : X86Reg<"bp", 5, [BPL]>;
+def SP : X86Reg<"sp", 4, [SPL]>;
+}
+def IP : X86Reg<"ip", 0>;
+
+// X86-64 only, requires REX.
+let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+def R8W : X86Reg<"r8w", 8, [R8B]>;
+def R9W : X86Reg<"r9w", 9, [R9B]>;
+def R10W : X86Reg<"r10w", 10, [R10B]>;
+def R11W : X86Reg<"r11w", 11, [R11B]>;
+def R12W : X86Reg<"r12w", 12, [R12B]>;
+def R13W : X86Reg<"r13w", 13, [R13B]>;
+def R14W : X86Reg<"r14w", 14, [R14B]>;
+def R15W : X86Reg<"r15w", 15, [R15B]>;
+}
+
+// 32-bit registers
+let SubRegIndices = [sub_16bit] in {
+def EAX : X86Reg<"eax", 0, [AX]>, DwarfRegNum<[-2, 0, 0]>;
+def EDX : X86Reg<"edx", 2, [DX]>, DwarfRegNum<[-2, 2, 2]>;
+def ECX : X86Reg<"ecx", 1, [CX]>, DwarfRegNum<[-2, 1, 1]>;
+def EBX : X86Reg<"ebx", 3, [BX]>, DwarfRegNum<[-2, 3, 3]>;
+def ESI : X86Reg<"esi", 6, [SI]>, DwarfRegNum<[-2, 6, 6]>;
+def EDI : X86Reg<"edi", 7, [DI]>, DwarfRegNum<[-2, 7, 7]>;
+def EBP : X86Reg<"ebp", 5, [BP]>, DwarfRegNum<[-2, 4, 5]>;
+def ESP : X86Reg<"esp", 4, [SP]>, DwarfRegNum<[-2, 5, 4]>;
+def EIP : X86Reg<"eip", 0, [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+// X86-64 only, requires REX
+let CostPerUse = 1 in {
+def R8D : X86Reg<"r8d", 8, [R8W]>;
+def R9D : X86Reg<"r9d", 9, [R9W]>;
+def R10D : X86Reg<"r10d", 10, [R10W]>;
+def R11D : X86Reg<"r11d", 11, [R11W]>;
+def R12D : X86Reg<"r12d", 12, [R12W]>;
+def R13D : X86Reg<"r13d", 13, [R13W]>;
+def R14D : X86Reg<"r14d", 14, [R14W]>;
+def R15D : X86Reg<"r15d", 15, [R15W]>;
+}}
+
+// 64-bit registers, X86-64 only
+let SubRegIndices = [sub_32bit] in {
+def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
+def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
+def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
+def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
+def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
+def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
+def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
+def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+// These also require REX.
+let CostPerUse = 1 in {
+def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>;
+def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>;
+def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
+def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
+def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
+def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
+def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
+def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
+def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
+}}
+
+// MMX Registers. These are actually aliased to ST0 .. ST7
+def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
+def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
+def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
+def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
+def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
+def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
+def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
+def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
+
+// Pseudo Floating Point registers
+def FP0 : X86Reg<"fp0", 0>;
+def FP1 : X86Reg<"fp1", 0>;
+def FP2 : X86Reg<"fp2", 0>;
+def FP3 : X86Reg<"fp3", 0>;
+def FP4 : X86Reg<"fp4", 0>;
+def FP5 : X86Reg<"fp5", 0>;
+def FP6 : X86Reg<"fp6", 0>;
+
+// XMM Registers, used by the various SSE instruction set extensions.
+def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
+def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
+def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
+def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
+def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
+def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
+def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
+def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
+
+// X86-64 only
+let CostPerUse = 1 in {
+def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>;
+def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>;
+def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
+def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
+def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
+def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
+def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
+def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
+} // CostPerUse
+
+// YMM Registers, used by AVX instructions
+let SubRegIndices = [sub_xmm] in {
+def YMM0: X86Reg<"ymm0", 0, [XMM0]>, DwarfRegAlias<XMM0>;
+def YMM1: X86Reg<"ymm1", 1, [XMM1]>, DwarfRegAlias<XMM1>;
+def YMM2: X86Reg<"ymm2", 2, [XMM2]>, DwarfRegAlias<XMM2>;
+def YMM3: X86Reg<"ymm3", 3, [XMM3]>, DwarfRegAlias<XMM3>;
+def YMM4: X86Reg<"ymm4", 4, [XMM4]>, DwarfRegAlias<XMM4>;
+def YMM5: X86Reg<"ymm5", 5, [XMM5]>, DwarfRegAlias<XMM5>;
+def YMM6: X86Reg<"ymm6", 6, [XMM6]>, DwarfRegAlias<XMM6>;
+def YMM7: X86Reg<"ymm7", 7, [XMM7]>, DwarfRegAlias<XMM7>;
+def YMM8: X86Reg<"ymm8", 8, [XMM8]>, DwarfRegAlias<XMM8>;
+def YMM9: X86Reg<"ymm9", 9, [XMM9]>, DwarfRegAlias<XMM9>;
+def YMM10: X86Reg<"ymm10", 10, [XMM10]>, DwarfRegAlias<XMM10>;
+def YMM11: X86Reg<"ymm11", 11, [XMM11]>, DwarfRegAlias<XMM11>;
+def YMM12: X86Reg<"ymm12", 12, [XMM12]>, DwarfRegAlias<XMM12>;
+def YMM13: X86Reg<"ymm13", 13, [XMM13]>, DwarfRegAlias<XMM13>;
+def YMM14: X86Reg<"ymm14", 14, [XMM14]>, DwarfRegAlias<XMM14>;
+def YMM15: X86Reg<"ymm15", 15, [XMM15]>, DwarfRegAlias<XMM15>;
+}
+
+class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {
+ let Aliases = A;
+}
+
+// Floating point stack registers. These don't map one-to-one to the FP
+// pseudo registers, but we still mark them as aliasing FP registers. That
+// way both kinds can be live without exceeding the stack depth. ST registers
+// are only live around inline assembly.
+def ST0 : STRegister<"st(0)", 0, []>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : STRegister<"st(1)", 1, [FP6]>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : STRegister<"st(2)", 2, [FP5]>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : STRegister<"st(3)", 3, [FP4]>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : STRegister<"st(4)", 4, [FP3]>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : STRegister<"st(5)", 5, [FP2]>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : STRegister<"st(6)", 6, [FP1]>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : STRegister<"st(7)", 7, [FP0]>, DwarfRegNum<[40, 19, 18]>;
+
+// Floating-point status word
+def FPSW : X86Reg<"fpsw", 0>;
+
+// Status flags register
+def EFLAGS : X86Reg<"flags", 0>;
+
+// Segment registers
+def CS : X86Reg<"cs", 1>;
+def DS : X86Reg<"ds", 3>;
+def SS : X86Reg<"ss", 2>;
+def ES : X86Reg<"es", 0>;
+def FS : X86Reg<"fs", 4>;
+def GS : X86Reg<"gs", 5>;
+
+// Debug registers
+def DR0 : X86Reg<"dr0", 0>;
+def DR1 : X86Reg<"dr1", 1>;
+def DR2 : X86Reg<"dr2", 2>;
+def DR3 : X86Reg<"dr3", 3>;
+def DR4 : X86Reg<"dr4", 4>;
+def DR5 : X86Reg<"dr5", 5>;
+def DR6 : X86Reg<"dr6", 6>;
+def DR7 : X86Reg<"dr7", 7>;
+
+// Control registers
+def CR0 : X86Reg<"cr0", 0>;
+def CR1 : X86Reg<"cr1", 1>;
+def CR2 : X86Reg<"cr2", 2>;
+def CR3 : X86Reg<"cr3", 3>;
+def CR4 : X86Reg<"cr4", 4>;
+def CR5 : X86Reg<"cr5", 5>;
+def CR6 : X86Reg<"cr6", 6>;
+def CR7 : X86Reg<"cr7", 7>;
+def CR8 : X86Reg<"cr8", 8>;
+def CR9 : X86Reg<"cr9", 9>;
+def CR10 : X86Reg<"cr10", 10>;
+def CR11 : X86Reg<"cr11", 11>;
+def CR12 : X86Reg<"cr12", 12>;
+def CR13 : X86Reg<"cr13", 13>;
+def CR14 : X86Reg<"cr14", 14>;
+def CR15 : X86Reg<"cr15", 15>;
+
+// Pseudo index registers
+def EIZ : X86Reg<"eiz", 4>;
+def RIZ : X86Reg<"riz", 4>;
+
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+def GR64 : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+
+// Segment registers for use by MOV instructions (and others) that have a
+// segment register as one operand. Always contain a 16-bit segment
+// descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+ R8, R9, R11, RIP)>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+ R8, R9, R11)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+ let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP)>;
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+
+// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit
+// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs
+// to clear upper 32-bits of RAX so is not a NOP.
+def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)>;
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+ (and GR32_NOREX, GR32_NOSP)>;
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+ (and GR64_NOREX, GR64_NOSP)>;
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+ let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add FR32)>;
+def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ 256, (sequence "YMM%u", 0, 15)>;
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
+def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Relocations.h b/contrib/llvm/lib/Target/X86/X86Relocations.h
new file mode 100644
index 000000000000..03330566161d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,52 @@
+//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// the terminology here doesn't follow x86 convention - word means
+ /// 32-bit and dword means 64-bit. The relocations will be treated
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// emitter but JIT and ObjectCode will treat them differently
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
new file mode 100644
index 000000000000..7de6791f2e48
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -0,0 +1,126 @@
+//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Haswell to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def HaswellModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+ // instructions per cycle.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 40;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = HaswellModel in {
+
+// Haswell can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, 6 and 7 handle all computation.
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores. Port 7 can handle address calculations.
+def HWPort0 : ProcResource<1>;
+def HWPort1 : ProcResource<1>;
+def HWPort2 : ProcResource<1>;
+def HWPort3 : ProcResource<1>;
+def HWPort4 : ProcResource<1>;
+def HWPort5 : ProcResource<1>;
+def HWPort6 : ProcResource<1>;
+def HWPort7 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
+def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
+def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
+def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
+
+// Integer division issued on port 0.
+def HWDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [HWPort4]>;
+
+def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
+def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [HWPort0156]>;
+def : WriteRes<WriteZero, []>;
+
+defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
+defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
+defm : HWWriteResPair<WriteShift, HWPort056, 1>;
+defm : HWWriteResPair<WriteJump, HWPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [HWPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
+defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
+defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
+defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
+defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
+defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
+defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
+
+// Vector integer operations.
+defm : HWWriteResPair<WriteVecShift, HWPort05, 1>;
+defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
+defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
+defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteShuffle, HWPort15, 1>;
+
+def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
new file mode 100644
index 000000000000..74d5f1b6eba8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -0,0 +1,122 @@
+//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Sandy Bridge to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SandyBridgeModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and SB can decode 4
+ // instructions per cycle.
+ // FIXME: Identify instructions that aren't a single fused micro-op.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 30;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = SandyBridgeModel in {
+
+// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
+
+// Ports 0, 1, and 5 handle all computation.
+def SBPort0 : ProcResource<1>;
+def SBPort1 : ProcResource<1>;
+def SBPort5 : ProcResource<1>;
+
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores.
+def SBPort23 : ProcResource<2>;
+
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+def SBPort4 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
+def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
+def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
+
+// Integer division issued on port 0.
+def SBDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [SBPort4]>;
+
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
+
+defm : SBWriteResPair<WriteALU, SBPort015, 1>;
+defm : SBWriteResPair<WriteIMul, SBPort1, 3>;
+defm : SBWriteResPair<WriteShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteJump, SBPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SBPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
+defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
+defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
+defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
+defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
+defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
+
+// Vector integer operations.
+defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
+defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
+
+def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
+} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td
new file mode 100644
index 000000000000..9fbde88b7100
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Schedule.td
@@ -0,0 +1,573 @@
+//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for X86
+def IIC_ALU_MEM : InstrItinClass;
+def IIC_ALU_NONMEM : InstrItinClass;
+def IIC_LEA : InstrItinClass;
+def IIC_LEA_16 : InstrItinClass;
+def IIC_MUL8 : InstrItinClass;
+def IIC_MUL16_MEM : InstrItinClass;
+def IIC_MUL16_REG : InstrItinClass;
+def IIC_MUL32_MEM : InstrItinClass;
+def IIC_MUL32_REG : InstrItinClass;
+def IIC_MUL64 : InstrItinClass;
+// imul by al, ax, eax, tax
+def IIC_IMUL8 : InstrItinClass;
+def IIC_IMUL16_MEM : InstrItinClass;
+def IIC_IMUL16_REG : InstrItinClass;
+def IIC_IMUL32_MEM : InstrItinClass;
+def IIC_IMUL32_REG : InstrItinClass;
+def IIC_IMUL64 : InstrItinClass;
+// imul reg by reg|mem
+def IIC_IMUL16_RM : InstrItinClass;
+def IIC_IMUL16_RR : InstrItinClass;
+def IIC_IMUL32_RM : InstrItinClass;
+def IIC_IMUL32_RR : InstrItinClass;
+def IIC_IMUL64_RM : InstrItinClass;
+def IIC_IMUL64_RR : InstrItinClass;
+// imul reg = reg/mem * imm
+def IIC_IMUL16_RMI : InstrItinClass;
+def IIC_IMUL16_RRI : InstrItinClass;
+def IIC_IMUL32_RMI : InstrItinClass;
+def IIC_IMUL32_RRI : InstrItinClass;
+def IIC_IMUL64_RMI : InstrItinClass;
+def IIC_IMUL64_RRI : InstrItinClass;
+// div
+def IIC_DIV8_MEM : InstrItinClass;
+def IIC_DIV8_REG : InstrItinClass;
+def IIC_DIV16 : InstrItinClass;
+def IIC_DIV32 : InstrItinClass;
+def IIC_DIV64 : InstrItinClass;
+// idiv
+def IIC_IDIV8 : InstrItinClass;
+def IIC_IDIV16 : InstrItinClass;
+def IIC_IDIV32 : InstrItinClass;
+def IIC_IDIV64 : InstrItinClass;
+// neg/not/inc/dec
+def IIC_UNARY_REG : InstrItinClass;
+def IIC_UNARY_MEM : InstrItinClass;
+// add/sub/and/or/xor/adc/sbc/cmp/test
+def IIC_BIN_MEM : InstrItinClass;
+def IIC_BIN_NONMEM : InstrItinClass;
+// shift/rotate
+def IIC_SR : InstrItinClass;
+// shift double
+def IIC_SHD16_REG_IM : InstrItinClass;
+def IIC_SHD16_REG_CL : InstrItinClass;
+def IIC_SHD16_MEM_IM : InstrItinClass;
+def IIC_SHD16_MEM_CL : InstrItinClass;
+def IIC_SHD32_REG_IM : InstrItinClass;
+def IIC_SHD32_REG_CL : InstrItinClass;
+def IIC_SHD32_MEM_IM : InstrItinClass;
+def IIC_SHD32_MEM_CL : InstrItinClass;
+def IIC_SHD64_REG_IM : InstrItinClass;
+def IIC_SHD64_REG_CL : InstrItinClass;
+def IIC_SHD64_MEM_IM : InstrItinClass;
+def IIC_SHD64_MEM_CL : InstrItinClass;
+// cmov
+def IIC_CMOV16_RM : InstrItinClass;
+def IIC_CMOV16_RR : InstrItinClass;
+def IIC_CMOV32_RM : InstrItinClass;
+def IIC_CMOV32_RR : InstrItinClass;
+def IIC_CMOV64_RM : InstrItinClass;
+def IIC_CMOV64_RR : InstrItinClass;
+// set
+def IIC_SET_R : InstrItinClass;
+def IIC_SET_M : InstrItinClass;
+// jmp/jcc/jcxz
+def IIC_Jcc : InstrItinClass;
+def IIC_JCXZ : InstrItinClass;
+def IIC_JMP_REL : InstrItinClass;
+def IIC_JMP_REG : InstrItinClass;
+def IIC_JMP_MEM : InstrItinClass;
+def IIC_JMP_FAR_MEM : InstrItinClass;
+def IIC_JMP_FAR_PTR : InstrItinClass;
+// loop
+def IIC_LOOP : InstrItinClass;
+def IIC_LOOPE : InstrItinClass;
+def IIC_LOOPNE : InstrItinClass;
+// call
+def IIC_CALL_RI : InstrItinClass;
+def IIC_CALL_MEM : InstrItinClass;
+def IIC_CALL_FAR_MEM : InstrItinClass;
+def IIC_CALL_FAR_PTR : InstrItinClass;
+// ret
+def IIC_RET : InstrItinClass;
+def IIC_RET_IMM : InstrItinClass;
+//sign extension movs
+def IIC_MOVSX : InstrItinClass;
+def IIC_MOVSX_R16_R8 : InstrItinClass;
+def IIC_MOVSX_R16_M8 : InstrItinClass;
+def IIC_MOVSX_R16_R16 : InstrItinClass;
+def IIC_MOVSX_R32_R32 : InstrItinClass;
+//zero extension movs
+def IIC_MOVZX : InstrItinClass;
+def IIC_MOVZX_R16_R8 : InstrItinClass;
+def IIC_MOVZX_R16_M8 : InstrItinClass;
+
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
+// SSE scalar/parallel binary operations
+def IIC_SSE_ALU_F32S_RR : InstrItinClass;
+def IIC_SSE_ALU_F32S_RM : InstrItinClass;
+def IIC_SSE_ALU_F64S_RR : InstrItinClass;
+def IIC_SSE_ALU_F64S_RM : InstrItinClass;
+def IIC_SSE_MUL_F32S_RR : InstrItinClass;
+def IIC_SSE_MUL_F32S_RM : InstrItinClass;
+def IIC_SSE_MUL_F64S_RR : InstrItinClass;
+def IIC_SSE_MUL_F64S_RM : InstrItinClass;
+def IIC_SSE_DIV_F32S_RR : InstrItinClass;
+def IIC_SSE_DIV_F32S_RM : InstrItinClass;
+def IIC_SSE_DIV_F64S_RR : InstrItinClass;
+def IIC_SSE_DIV_F64S_RM : InstrItinClass;
+def IIC_SSE_ALU_F32P_RR : InstrItinClass;
+def IIC_SSE_ALU_F32P_RM : InstrItinClass;
+def IIC_SSE_ALU_F64P_RR : InstrItinClass;
+def IIC_SSE_ALU_F64P_RM : InstrItinClass;
+def IIC_SSE_MUL_F32P_RR : InstrItinClass;
+def IIC_SSE_MUL_F32P_RM : InstrItinClass;
+def IIC_SSE_MUL_F64P_RR : InstrItinClass;
+def IIC_SSE_MUL_F64P_RM : InstrItinClass;
+def IIC_SSE_DIV_F32P_RR : InstrItinClass;
+def IIC_SSE_DIV_F32P_RM : InstrItinClass;
+def IIC_SSE_DIV_F64P_RR : InstrItinClass;
+def IIC_SSE_DIV_F64P_RM : InstrItinClass;
+
+def IIC_SSE_COMIS_RR : InstrItinClass;
+def IIC_SSE_COMIS_RM : InstrItinClass;
+
+def IIC_SSE_HADDSUB_RR : InstrItinClass;
+def IIC_SSE_HADDSUB_RM : InstrItinClass;
+
+def IIC_SSE_BIT_P_RR : InstrItinClass;
+def IIC_SSE_BIT_P_RM : InstrItinClass;
+
+def IIC_SSE_INTALU_P_RR : InstrItinClass;
+def IIC_SSE_INTALU_P_RM : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RR : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RM : InstrItinClass;
+
+def IIC_SSE_INTMUL_P_RR : InstrItinClass;
+def IIC_SSE_INTMUL_P_RM : InstrItinClass;
+
+def IIC_SSE_INTSH_P_RR : InstrItinClass;
+def IIC_SSE_INTSH_P_RM : InstrItinClass;
+def IIC_SSE_INTSH_P_RI : InstrItinClass;
+
+def IIC_SSE_CMPP_RR : InstrItinClass;
+def IIC_SSE_CMPP_RM : InstrItinClass;
+
+def IIC_SSE_SHUFP : InstrItinClass;
+def IIC_SSE_PSHUF : InstrItinClass;
+
+def IIC_SSE_UNPCK : InstrItinClass;
+
+def IIC_SSE_MOVMSK : InstrItinClass;
+def IIC_SSE_MASKMOV : InstrItinClass;
+
+def IIC_SSE_PEXTRW : InstrItinClass;
+def IIC_SSE_PINSRW : InstrItinClass;
+
+def IIC_SSE_PABS_RR : InstrItinClass;
+def IIC_SSE_PABS_RM : InstrItinClass;
+
+def IIC_SSE_SQRTP_RR : InstrItinClass;
+def IIC_SSE_SQRTP_RM : InstrItinClass;
+def IIC_SSE_SQRTS_RR : InstrItinClass;
+def IIC_SSE_SQRTS_RM : InstrItinClass;
+
+def IIC_SSE_RCPP_RR : InstrItinClass;
+def IIC_SSE_RCPP_RM : InstrItinClass;
+def IIC_SSE_RCPS_RR : InstrItinClass;
+def IIC_SSE_RCPS_RM : InstrItinClass;
+
+def IIC_SSE_MOV_S_RR : InstrItinClass;
+def IIC_SSE_MOV_S_RM : InstrItinClass;
+def IIC_SSE_MOV_S_MR : InstrItinClass;
+
+def IIC_SSE_MOVA_P_RR : InstrItinClass;
+def IIC_SSE_MOVA_P_RM : InstrItinClass;
+def IIC_SSE_MOVA_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVU_P_RR : InstrItinClass;
+def IIC_SSE_MOVU_P_RM : InstrItinClass;
+def IIC_SSE_MOVU_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVDQ : InstrItinClass;
+def IIC_SSE_MOVD_ToGP : InstrItinClass;
+def IIC_SSE_MOVQ_RR : InstrItinClass;
+
+def IIC_SSE_MOV_LH : InstrItinClass;
+
+def IIC_SSE_LDDQU : InstrItinClass;
+
+def IIC_SSE_MOVNT : InstrItinClass;
+
+def IIC_SSE_PHADDSUBD_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBD_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RM : InstrItinClass;
+def IIC_SSE_PSHUFB_RR : InstrItinClass;
+def IIC_SSE_PSHUFB_RM : InstrItinClass;
+def IIC_SSE_PSIGN_RR : InstrItinClass;
+def IIC_SSE_PSIGN_RM : InstrItinClass;
+
+def IIC_SSE_PMADD : InstrItinClass;
+def IIC_SSE_PMULHRSW : InstrItinClass;
+def IIC_SSE_PALIGNR : InstrItinClass;
+def IIC_SSE_MWAIT : InstrItinClass;
+def IIC_SSE_MONITOR : InstrItinClass;
+
+def IIC_SSE_PREFETCH : InstrItinClass;
+def IIC_SSE_PAUSE : InstrItinClass;
+def IIC_SSE_LFENCE : InstrItinClass;
+def IIC_SSE_MFENCE : InstrItinClass;
+def IIC_SSE_SFENCE : InstrItinClass;
+def IIC_SSE_LDMXCSR : InstrItinClass;
+def IIC_SSE_STMXCSR : InstrItinClass;
+
+def IIC_SSE_CVT_PD_RR : InstrItinClass;
+def IIC_SSE_CVT_PD_RM : InstrItinClass;
+def IIC_SSE_CVT_PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PS_RM : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RM : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RR : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RR : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+
+// MMX
+def IIC_MMX_MOV_MM_RM : InstrItinClass;
+def IIC_MMX_MOV_REG_MM : InstrItinClass;
+def IIC_MMX_MOVQ_RM : InstrItinClass;
+def IIC_MMX_MOVQ_RR : InstrItinClass;
+
+def IIC_MMX_ALU_RM : InstrItinClass;
+def IIC_MMX_ALU_RR : InstrItinClass;
+def IIC_MMX_ALUQ_RM : InstrItinClass;
+def IIC_MMX_ALUQ_RR : InstrItinClass;
+def IIC_MMX_PHADDSUBW_RM : InstrItinClass;
+def IIC_MMX_PHADDSUBW_RR : InstrItinClass;
+def IIC_MMX_PHADDSUBD_RM : InstrItinClass;
+def IIC_MMX_PHADDSUBD_RR : InstrItinClass;
+def IIC_MMX_PMUL : InstrItinClass;
+def IIC_MMX_MISC_FUNC_MEM : InstrItinClass;
+def IIC_MMX_MISC_FUNC_REG : InstrItinClass;
+def IIC_MMX_PSADBW : InstrItinClass;
+def IIC_MMX_SHIFT_RI : InstrItinClass;
+def IIC_MMX_SHIFT_RM : InstrItinClass;
+def IIC_MMX_SHIFT_RR : InstrItinClass;
+def IIC_MMX_UNPCK_H_RM : InstrItinClass;
+def IIC_MMX_UNPCK_H_RR : InstrItinClass;
+def IIC_MMX_UNPCK_L : InstrItinClass;
+def IIC_MMX_PCK_RM : InstrItinClass;
+def IIC_MMX_PCK_RR : InstrItinClass;
+def IIC_MMX_PSHUF : InstrItinClass;
+def IIC_MMX_PEXTR : InstrItinClass;
+def IIC_MMX_PINSRW : InstrItinClass;
+def IIC_MMX_MASKMOV : InstrItinClass;
+
+def IIC_MMX_CVT_PD_RR : InstrItinClass;
+def IIC_MMX_CVT_PD_RM : InstrItinClass;
+def IIC_MMX_CVT_PS_RR : InstrItinClass;
+def IIC_MMX_CVT_PS_RM : InstrItinClass;
+
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
+def IIC_FILD : InstrItinClass;
+def IIC_FLD : InstrItinClass;
+def IIC_FLD80 : InstrItinClass;
+def IIC_FST : InstrItinClass;
+def IIC_FST80 : InstrItinClass;
+def IIC_FIST : InstrItinClass;
+def IIC_FLDZ : InstrItinClass;
+def IIC_FUCOM : InstrItinClass;
+def IIC_FUCOMI : InstrItinClass;
+def IIC_FCOMI : InstrItinClass;
+def IIC_FNSTSW : InstrItinClass;
+def IIC_FNSTCW : InstrItinClass;
+def IIC_FLDCW : InstrItinClass;
+def IIC_FNINIT : InstrItinClass;
+def IIC_FFREE : InstrItinClass;
+def IIC_FNCLEX : InstrItinClass;
+def IIC_WAIT : InstrItinClass;
+def IIC_FXAM : InstrItinClass;
+def IIC_FNOP : InstrItinClass;
+def IIC_FLDL : InstrItinClass;
+def IIC_F2XM1 : InstrItinClass;
+def IIC_FYL2X : InstrItinClass;
+def IIC_FPTAN : InstrItinClass;
+def IIC_FPATAN : InstrItinClass;
+def IIC_FXTRACT : InstrItinClass;
+def IIC_FPREM1 : InstrItinClass;
+def IIC_FPSTP : InstrItinClass;
+def IIC_FPREM : InstrItinClass;
+def IIC_FYL2XP1 : InstrItinClass;
+def IIC_FSINCOS : InstrItinClass;
+def IIC_FRNDINT : InstrItinClass;
+def IIC_FSCALE : InstrItinClass;
+def IIC_FCOMPP : InstrItinClass;
+def IIC_FXSAVE : InstrItinClass;
+def IIC_FXRSTOR : InstrItinClass;
+
+def IIC_FXCH : InstrItinClass;
+
+// System instructions
+def IIC_CPUID : InstrItinClass;
+def IIC_INT : InstrItinClass;
+def IIC_INT3 : InstrItinClass;
+def IIC_INVD : InstrItinClass;
+def IIC_INVLPG : InstrItinClass;
+def IIC_IRET : InstrItinClass;
+def IIC_HLT : InstrItinClass;
+def IIC_LXS : InstrItinClass;
+def IIC_LTR : InstrItinClass;
+def IIC_RDTSC : InstrItinClass;
+def IIC_RSM : InstrItinClass;
+def IIC_SIDT : InstrItinClass;
+def IIC_SGDT : InstrItinClass;
+def IIC_SLDT : InstrItinClass;
+def IIC_STR : InstrItinClass;
+def IIC_SWAPGS : InstrItinClass;
+def IIC_SYSCALL : InstrItinClass;
+def IIC_SYS_ENTER_EXIT : InstrItinClass;
+def IIC_IN_RR : InstrItinClass;
+def IIC_IN_RI : InstrItinClass;
+def IIC_OUT_RR : InstrItinClass;
+def IIC_OUT_IR : InstrItinClass;
+def IIC_INS : InstrItinClass;
+def IIC_MOV_REG_DR : InstrItinClass;
+def IIC_MOV_DR_REG : InstrItinClass;
+def IIC_MOV_REG_CR : InstrItinClass;
+def IIC_MOV_CR_REG : InstrItinClass;
+def IIC_MOV_REG_SR : InstrItinClass;
+def IIC_MOV_MEM_SR : InstrItinClass;
+def IIC_MOV_SR_REG : InstrItinClass;
+def IIC_MOV_SR_MEM : InstrItinClass;
+def IIC_LAR_RM : InstrItinClass;
+def IIC_LAR_RR : InstrItinClass;
+def IIC_LSL_RM : InstrItinClass;
+def IIC_LSL_RR : InstrItinClass;
+def IIC_LGDT : InstrItinClass;
+def IIC_LIDT : InstrItinClass;
+def IIC_LLDT_REG : InstrItinClass;
+def IIC_LLDT_MEM : InstrItinClass;
+def IIC_PUSH_CS : InstrItinClass;
+def IIC_PUSH_SR : InstrItinClass;
+def IIC_POP_SR : InstrItinClass;
+def IIC_POP_SR_SS : InstrItinClass;
+def IIC_VERR : InstrItinClass;
+def IIC_VERW_REG : InstrItinClass;
+def IIC_VERW_MEM : InstrItinClass;
+def IIC_WRMSR : InstrItinClass;
+def IIC_RDMSR : InstrItinClass;
+def IIC_RDPMC : InstrItinClass;
+def IIC_SMSW : InstrItinClass;
+def IIC_LMSW_REG : InstrItinClass;
+def IIC_LMSW_MEM : InstrItinClass;
+def IIC_ENTER : InstrItinClass;
+def IIC_LEAVE : InstrItinClass;
+def IIC_POP_MEM : InstrItinClass;
+def IIC_POP_REG16 : InstrItinClass;
+def IIC_POP_REG : InstrItinClass;
+def IIC_POP_F : InstrItinClass;
+def IIC_POP_FD : InstrItinClass;
+def IIC_POP_A : InstrItinClass;
+def IIC_PUSH_IMM : InstrItinClass;
+def IIC_PUSH_MEM : InstrItinClass;
+def IIC_PUSH_REG : InstrItinClass;
+def IIC_PUSH_F : InstrItinClass;
+def IIC_PUSH_A : InstrItinClass;
+def IIC_BSWAP : InstrItinClass;
+def IIC_BSF : InstrItinClass;
+def IIC_BSR : InstrItinClass;
+def IIC_MOVS : InstrItinClass;
+def IIC_STOS : InstrItinClass;
+def IIC_SCAS : InstrItinClass;
+def IIC_CMPS : InstrItinClass;
+def IIC_MOV : InstrItinClass;
+def IIC_MOV_MEM : InstrItinClass;
+def IIC_AHF : InstrItinClass;
+def IIC_BT_MI : InstrItinClass;
+def IIC_BT_MR : InstrItinClass;
+def IIC_BT_RI : InstrItinClass;
+def IIC_BT_RR : InstrItinClass;
+def IIC_BTX_MI : InstrItinClass;
+def IIC_BTX_MR : InstrItinClass;
+def IIC_BTX_RI : InstrItinClass;
+def IIC_BTX_RR : InstrItinClass;
+def IIC_XCHG_REG : InstrItinClass;
+def IIC_XCHG_MEM : InstrItinClass;
+def IIC_XADD_REG : InstrItinClass;
+def IIC_XADD_MEM : InstrItinClass;
+def IIC_CMPXCHG_MEM : InstrItinClass;
+def IIC_CMPXCHG_REG : InstrItinClass;
+def IIC_CMPXCHG_MEM8 : InstrItinClass;
+def IIC_CMPXCHG_REG8 : InstrItinClass;
+def IIC_CMPXCHG_8B : InstrItinClass;
+def IIC_CMPXCHG_16B : InstrItinClass;
+def IIC_LODS : InstrItinClass;
+def IIC_OUTS : InstrItinClass;
+def IIC_CLC : InstrItinClass;
+def IIC_CLD : InstrItinClass;
+def IIC_CLI : InstrItinClass;
+def IIC_CMC : InstrItinClass;
+def IIC_CLTS : InstrItinClass;
+def IIC_STC : InstrItinClass;
+def IIC_STI : InstrItinClass;
+def IIC_STD : InstrItinClass;
+def IIC_XLAT : InstrItinClass;
+def IIC_AAA : InstrItinClass;
+def IIC_AAD : InstrItinClass;
+def IIC_AAM : InstrItinClass;
+def IIC_AAS : InstrItinClass;
+def IIC_DAA : InstrItinClass;
+def IIC_DAS : InstrItinClass;
+def IIC_BOUND : InstrItinClass;
+def IIC_ARPL_REG : InstrItinClass;
+def IIC_ARPL_MEM : InstrItinClass;
+def IIC_MOVBE : InstrItinClass;
+
+def IIC_NOP : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+// IssueWidth is analagous to the number of decode units. Core and its
+// descendents, including Nehalem and SandyBridge have 4 decoders.
+// Resources beyond the decoder operate on micro-ops and are bufferred
+// so adjacent micro-ops don't directly compete.
+//
+// MinLatency=0 indicates that RAW dependencies can be decoded in the
+// same cycle.
+//
+// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
+// indicates high latency opcodes. Alternatively, InstrItinData
+// entries may be included here to define specific operand
+// latencies. Since these latencies are not used for pipeline hazards,
+// they do not need to be exact.
+//
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
+// The GenericModel contains no instruciton itineraries.
+def GenericModel : SchedMachineModel {
+ let IssueWidth = 4;
+ let MinLatency = 0;
+ let LoadLatency = 4;
+ let HighLatency = 10;
+ let ILPWindow = 10;
+}
+
+include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
new file mode 100644
index 000000000000..cce8f1b11436
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -0,0 +1,530 @@
+//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Intel Atom (Bonnell)
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from the "Intel 64 and IA32 Architectures
+// Optimization Reference Manual", Chapter 13, Section 4.
+// Functional Units
+// Port 0
+def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
+ // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
+ // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomItineraries : ProcessorItineraries<
+ [ Port0, Port1 ],
+ [], [
+ // P0 only
+ // InstrItinData<class, [InstrStage<N, [P0]>] >,
+ // P0 or P1
+ // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
+ // P0 and P1
+ // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >,
+ //
+ // Default is 1 cycle, port0 or port1
+ InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
+ // mul
+ InstrItinData<IIC_MUL8, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL64, [InstrStage<12, [Port0, Port1]>] >,
+ // imul by al, ax, eax, rax
+ InstrItinData<IIC_IMUL8, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL64, [InstrStage<12, [Port0, Port1]>] >,
+ // imul reg by reg|mem
+ InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
+ // imul reg = reg/mem * imm
+ InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
+ // idiv
+ InstrItinData<IIC_IDIV8, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV16, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV32, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV64, [InstrStage<130, [Port0, Port1]>] >,
+ // div
+ InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV16, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV32, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV64, [InstrStage<130, [Port0, Port1]>] >,
+ // neg/not/inc/dec
+ InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
+ // add/sub/and/or/xor/adc/sbc/cmp/test
+ InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
+ // shift/rotate
+ InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
+ // shift double
+ InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
+ // cmov
+ InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
+ // set
+ InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
+ // jcc
+ InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
+ // jcxz/jecxz/jrcxz
+ InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
+ // jmp rel
+ InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
+ // jmp indirect
+ InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ // jmp far
+ InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
+ // loop/loope/loopne
+ InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
+ // call - all but reg/imm
+ InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
+ InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
+ InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
+ //ret
+ InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
+ InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
+ //sign extension movs
+ InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
+ InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [Port0, Port1]>] >,
+ //zero extension movs
+ InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
+ // SSE binary operations
+ // arithmetic fp scalar
+ InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ // arithmetic fp parallel
+ InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
+
+ // bitwise parallel
+ InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
+
+ // arithmetic int parallel
+ InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+
+ // multiply int parallel
+ InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
+
+ // shift parallel
+ InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
+
+ // conversions
+ // to/from PD ...
+ InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
+ // to/from PS except to/from PD and PS2PI
+ InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ // MMX MOVs
+ InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
+ // other MMX
+ InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PMUL, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PSADBW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PEXTR, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [Port0]>] >,
+ // conversions
+ // from/to PD
+ InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
+ // from/to PI
+ InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>]>,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FILD, [InstrStage<5, [Port0], 0>, InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_FLD, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_FLD80, [InstrStage<4, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FST, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_FST80, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_FIST, [InstrStage<6, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FLDZ, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FUCOM, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FUCOMI, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_FCOMI, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNSTSW, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNSTCW, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_FLDCW, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNINIT, [InstrStage<63, [Port0, Port1]>] >,
+ InstrItinData<IIC_FFREE, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNCLEX, [InstrStage<25, [Port0, Port1]>] >,
+ InstrItinData<IIC_WAIT, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXAM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_FNOP, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FLDL, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_F2XM1, [InstrStage<99, [Port0, Port1]>] >,
+ InstrItinData<IIC_FYL2X, [InstrStage<146, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPTAN, [InstrStage<168, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPATAN, [InstrStage<183, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXTRACT, [InstrStage<25, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPREM1, [InstrStage<71, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPSTP, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPREM, [InstrStage<55, [Port0, Port1]>] >,
+ InstrItinData<IIC_FYL2XP1, [InstrStage<147, [Port0, Port1]>] >,
+ InstrItinData<IIC_FSINCOS, [InstrStage<174, [Port0, Port1]>] >,
+ InstrItinData<IIC_FRNDINT, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_FSCALE, [InstrStage<77, [Port0, Port1]>] >,
+ InstrItinData<IIC_FCOMPP, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FXSAVE, [InstrStage<140, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXRSTOR, [InstrStage<141, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
+
+ // System instructions
+ InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
+ InstrItinData<IIC_INT, [InstrStage<127, [Port0, Port1]>] >,
+ InstrItinData<IIC_INT3, [InstrStage<130, [Port0, Port1]>] >,
+ InstrItinData<IIC_INVD, [InstrStage<1003, [Port0, Port1]>] >,
+ InstrItinData<IIC_INVLPG, [InstrStage<71, [Port0, Port1]>] >,
+ InstrItinData<IIC_IRET, [InstrStage<109, [Port0, Port1]>] >,
+ InstrItinData<IIC_HLT, [InstrStage<121, [Port0, Port1]>] >,
+ InstrItinData<IIC_LXS, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_LTR, [InstrStage<83, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDTSC, [InstrStage<30, [Port0, Port1]>] >,
+ InstrItinData<IIC_RSM, [InstrStage<741, [Port0, Port1]>] >,
+ InstrItinData<IIC_SIDT, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SGDT, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SLDT, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_STR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SWAPGS, [InstrStage<22, [Port0, Port1]>] >,
+ InstrItinData<IIC_SYSCALL, [InstrStage<96, [Port0, Port1]>] >,
+ InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<88, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_IN_RR, [InstrStage<94, [Port0, Port1]>] >,
+ InstrItinData<IIC_IN_RI, [InstrStage<92, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUT_RR, [InstrStage<68, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUT_IR, [InstrStage<72, [Port0, Port1]>] >,
+ InstrItinData<IIC_INS, [InstrStage<59, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_MOV_REG_DR, [InstrStage<88, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_DR_REG, [InstrStage<123, [Port0, Port1]>] >,
+ // worst case for mov REG_CRx
+ InstrItinData<IIC_MOV_REG_CR, [InstrStage<12, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_CR_REG, [InstrStage<136, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MOV_MEM_SR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_SR_REG, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_SR_MEM, [InstrStage<26, [Port0, Port1]>] >,
+ // LAR
+ InstrItinData<IIC_LAR_RM, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_LAR_RR, [InstrStage<54, [Port0, Port1]>] >,
+ // LSL
+ InstrItinData<IIC_LSL_RM, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_LSL_RR, [InstrStage<49, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_LGDT, [InstrStage<44, [Port0, Port1]>] >,
+ InstrItinData<IIC_LIDT, [InstrStage<44, [Port0, Port1]>] >,
+ InstrItinData<IIC_LLDT_REG, [InstrStage<60, [Port0, Port1]>] >,
+ InstrItinData<IIC_LLDT_MEM, [InstrStage<64, [Port0, Port1]>] >,
+ // push control register, segment registers
+ InstrItinData<IIC_PUSH_CS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_SR, [InstrStage<2, [Port0, Port1]>] >,
+ // pop control register, segment registers
+ InstrItinData<IIC_POP_SR, [InstrStage<29, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_SR_SS, [InstrStage<48, [Port0, Port1]>] >,
+ // VERR, VERW
+ InstrItinData<IIC_VERR, [InstrStage<41, [Port0, Port1]>] >,
+ InstrItinData<IIC_VERW_REG, [InstrStage<51, [Port0, Port1]>] >,
+ InstrItinData<IIC_VERW_MEM, [InstrStage<50, [Port0, Port1]>] >,
+ // WRMSR, RDMSR
+ InstrItinData<IIC_WRMSR, [InstrStage<202, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDMSR, [InstrStage<78, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDPMC, [InstrStage<46, [Port0, Port1]>] >,
+ // SMSW, LMSW
+ InstrItinData<IIC_SMSW, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_LMSW_REG, [InstrStage<69, [Port0, Port1]>] >,
+ InstrItinData<IIC_LMSW_MEM, [InstrStage<67, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_ENTER, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_LEAVE, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_POP_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_REG16, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_REG, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_POP_F, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_FD, [InstrStage<26, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_A, [InstrStage<9, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_PUSH_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_REG, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_PUSH_F, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_BSF, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BSR, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPS, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_AHF, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_MI, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_MR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_RI, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BT_RR, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BTX_MI, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_BTX_MR, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_BTX_RI, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BTX_RR, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_XCHG_REG, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XCHG_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_REG, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG, [InstrStage<15, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
+ InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
+ InstrItinData<IIC_STC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
+ InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAA, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAD, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAM, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAS, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_DAA, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_DAS, [InstrStage<20, [Port0, Port1]>] >,
+ InstrItinData<IIC_BOUND, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
+ InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
+ ]>;
+
+// Atom machine model.
+def AtomModel : SchedMachineModel {
+ let IssueWidth = 2; // Allows 2 instructions per scheduling group.
+ let MinLatency = 1; // InstrStage cycles overrides MinLatency.
+ // OperandCycles may be used for expected latency.
+ let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
+ let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let ILPWindow = 0; // Always try to hide expected latency.
+
+ let Itineraries = AtomItineraries;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..f934fdd85914
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,270 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(CLI);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, it is more efficient to call the library. However
+ /// if calling the library is not allowed (AlwaysInline), then soldier on as
+ /// the code generated here is better than the long load-store sequence we
+ /// would otherwise get.
+ if (!AlwaysInline && (Align & 3) != 0)
+ return SDValue();
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ // ESI might be used as a base pointer, in that case we can't simply overwrite
+ // the register. Fall back to generic code.
+ const X86RegisterInfo *TRI =
+ static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
+ if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
+ TRI->getBaseRegister() == X86::ESI)
+ return SDValue();
+
+ MVT AVT;
+ if (Align & 1)
+ AVT = MVT::i8;
+ else if (Align & 2)
+ AVT = MVT::i16;
+ else if (Align & 4)
+ // DWORD aligned
+ AVT = MVT::i32;
+ else
+ // QWORD aligned
+ AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 000000000000..d1d66fe76e94
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,56 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
+public:
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+ ~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 000000000000..14619b63927b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,495 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
+ if (isPICStyleGOT()) // 32-bit ELF targets.
+ return X86II::MO_GOTOFF;
+
+ if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Direct static reference to label.
+ return X86II::MO_NO_FLAG;
+}
+
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+ // DLLImport only exists on windows, it is implemented as a load from a
+ // DLLIMPORT stub.
+ if (GV->hasDLLImportLinkage())
+ return X86II::MO_DLLIMPORT;
+
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ // X86-64 in PIC mode.
+ if (isPICStyleRIPRel()) {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
+
+ if (isTargetDarwin()) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ if (GV->hasDefaultVisibility() &&
+ (isDecl || GV->isWeakForLinker()))
+ return X86II::MO_GOTPCREL;
+ } else if (!isTargetWin64()) {
+ assert(isTargetELF() && "Unknown rip-relative target");
+
+ // Extra load is needed for all externally visible.
+ if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+ return X86II::MO_GOTPCREL;
+ }
+
+ return X86II::MO_NO_FLAG;
+ }
+
+ if (isPICStyleGOT()) { // 32-bit ELF targets.
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return X86II::MO_GOTOFF;
+ return X86II::MO_GOT;
+ }
+
+ if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
+ // Determine whether we have a stub reference and/or whether the reference
+ // is relative to the PIC base or not.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage()) {
+ // Hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+ }
+
+ // Otherwise, no stub.
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
+
+ if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
+ // Determine whether we have a stub reference.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_NO_FLAG;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY;
+
+ // Otherwise, no stub.
+ return X86II::MO_NO_FLAG;
+ }
+
+ // Direct static reference to global.
+ return X86II::MO_NO_FLAG;
+}
+
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
+ return "__bzero";
+
+ return 0;
+}
+
+bool X86Subtarget::hasSinCos() const {
+ return getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+ is64Bit();
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+ if (In64BitMode)
+ return false;
+ return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ unsigned MaxLevel;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+ MaxLevel < 1)
+ return;
+
+ X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
+ if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
+ if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
+ if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); }
+ if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); }
+ if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
+ if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
+ if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
+ if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
+
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+ if ((ECX >> 1) & 0x1) {
+ HasPCLMUL = true;
+ ToggleFeature(X86::FeaturePCLMUL);
+ }
+ if ((ECX >> 12) & 0x1) {
+ HasFMA = true;
+ ToggleFeature(X86::FeatureFMA);
+ }
+ if (IsIntel && ((ECX >> 22) & 0x1)) {
+ HasMOVBE = true;
+ ToggleFeature(X86::FeatureMOVBE);
+ }
+ if ((ECX >> 23) & 0x1) {
+ HasPOPCNT = true;
+ ToggleFeature(X86::FeaturePOPCNT);
+ }
+ if ((ECX >> 25) & 0x1) {
+ HasAES = true;
+ ToggleFeature(X86::FeatureAES);
+ }
+ if ((ECX >> 29) & 0x1) {
+ HasF16C = true;
+ ToggleFeature(X86::FeatureF16C);
+ }
+ if (IsIntel && ((ECX >> 30) & 0x1)) {
+ HasRDRAND = true;
+ ToggleFeature(X86::FeatureRDRAND);
+ }
+
+ if ((ECX >> 13) & 0x1) {
+ HasCmpxchg16b = true;
+ ToggleFeature(X86::FeatureCMPXCHG16B);
+ }
+
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ X86_MC::DetectFamilyModel(EAX, Family, Model);
+ if (IsAMD || (Family == 6 && Model >= 13)) {
+ IsBTMemSlow = true;
+ ToggleFeature(X86::FeatureSlowBTMem);
+ }
+
+ // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+ // memory access is fast. We hard code model numbers here because they
+ // aren't strictly increasing for Intel chips it seems.
+ if (IsIntel &&
+ ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+ // Jasper Froest
+ (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
+ (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+ (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+ (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+ (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+ (Family == 6 && Model == 0x2A) || // SandyBridge
+ (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+ (Family == 6 && Model == 0x3A))) {// IvyBridge
+ IsUAMemFast = true;
+ ToggleFeature(X86::FeatureFastUAMem);
+ }
+
+ // Set processor type. Currently only Atom is detected.
+ if (Family == 6 &&
+ (Model == 28 || Model == 38 || Model == 39
+ || Model == 53 || Model == 54)) {
+ X86ProcFamily = IntelAtom;
+
+ UseLeaForSP = true;
+ ToggleFeature(X86::FeatureLeaForSP);
+ }
+
+ unsigned MaxExtLevel;
+ X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ if (MaxExtLevel >= 0x80000001) {
+ X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 29) & 0x1) {
+ HasX86_64 = true;
+ ToggleFeature(X86::Feature64Bit);
+ }
+ if ((ECX >> 5) & 0x1) {
+ HasLZCNT = true;
+ ToggleFeature(X86::FeatureLZCNT);
+ }
+ if (IsIntel && ((ECX >> 8) & 0x1)) {
+ HasPRFCHW = true;
+ ToggleFeature(X86::FeaturePRFCHW);
+ }
+ if (IsAMD) {
+ if ((ECX >> 6) & 0x1) {
+ HasSSE4A = true;
+ ToggleFeature(X86::FeatureSSE4A);
+ }
+ if ((ECX >> 11) & 0x1) {
+ HasXOP = true;
+ ToggleFeature(X86::FeatureXOP);
+ }
+ if ((ECX >> 16) & 0x1) {
+ HasFMA4 = true;
+ ToggleFeature(X86::FeatureFMA4);
+ }
+ }
+ }
+ }
+
+ if (MaxLevel >= 7) {
+ if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
+ if (IsIntel && (EBX & 0x1)) {
+ HasFSGSBase = true;
+ ToggleFeature(X86::FeatureFSGSBase);
+ }
+ if ((EBX >> 3) & 0x1) {
+ HasBMI = true;
+ ToggleFeature(X86::FeatureBMI);
+ }
+ if ((EBX >> 4) & 0x1) {
+ HasHLE = true;
+ ToggleFeature(X86::FeatureHLE);
+ }
+ if (IsIntel && ((EBX >> 5) & 0x1)) {
+ X86SSELevel = AVX2;
+ ToggleFeature(X86::FeatureAVX2);
+ }
+ if (IsIntel && ((EBX >> 8) & 0x1)) {
+ HasBMI2 = true;
+ ToggleFeature(X86::FeatureBMI2);
+ }
+ if (IsIntel && ((EBX >> 11) & 0x1)) {
+ HasRTM = true;
+ ToggleFeature(X86::FeatureRTM);
+ }
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 18) & 0x1)) {
+ HasRDSEED = true;
+ ToggleFeature(X86::FeatureRDSEED);
+ }
+ }
+ }
+}
+
+void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+ std::string CPUName = CPU;
+ if (!FS.empty() || !CPU.empty()) {
+ if (CPUName.empty()) {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ // Make sure 64-bit features are available in 64-bit mode. (But make sure
+ // SSE2 can be turned off explicitly.)
+ std::string FullFS = FS;
+ if (In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+64bit,+sse2," + FullFS;
+ else
+ FullFS = "+64bit,+sse2";
+ }
+
+ // If feature string is not empty, parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+ } else {
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+
+ // Make sure 64-bit features are available in 64-bit mode.
+ if (In64BitMode) {
+ HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+ HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+
+ if (X86SSELevel < SSE2) {
+ X86SSELevel = SSE2;
+ ToggleFeature(X86::FeatureSSE1);
+ ToggleFeature(X86::FeatureSSE2);
+ }
+ }
+ }
+
+ // CPUName may have been set by the CPU detection code. Make sure the
+ // new MCSchedModel is used.
+ InitMCProcessorInfo(CPUName, FS);
+
+ if (X86ProcFamily == IntelAtom)
+ PostRAScheduler = true;
+
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // It's important to keep the MCSubtargetInfo feature bits in sync with
+ // target data structure which is shared with MC code emitter, etc.
+ if (In64BitMode)
+ ToggleFeature(X86::Mode64Bit);
+
+ DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n");
+ assert((!In64BitMode || HasX86_64) &&
+ "64-bit code requested on a subtarget that doesn't support it!");
+
+ // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
+ // 32 and 64 bit) and for all 64-bit targets.
+ if (StackAlignOverride)
+ stackAlignment = StackAlignOverride;
+ else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
+ In64BitMode)
+ stackAlignment = 16;
+}
+
+void X86Subtarget::initializeEnvironment() {
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasHLE = false;
+ HasADX = false;
+ HasPRFCHW = false;
+ HasRDSEED = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ CallRegIndirect = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , X86ProcFamily(Others)
+ , PICStyle(PICStyles::None)
+ , TargetTriple(TT)
+ , StackAlignOverride(StackAlignOverride)
+ , In64BitMode(is64Bit) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
+bool X86Subtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 000000000000..6fbdb1d5f00f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,379 @@
+//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
+namespace PICStyles {
+enum Style {
+ StubPIC, // Used on i386-darwin in -fPIC mode.
+ StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+ GOT, // Used on many 32-bit unices in -fPIC mode.
+ RIPRel, // Used on X86-64 when not in -static mode.
+ None // Set when in -static mode (not PIC or DynamicNoPIC mode).
+};
+}
+
+class X86Subtarget : public X86GenSubtargetInfo {
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ enum X86ProcFamilyEnum {
+ Others, IntelAtom
+ };
+
+ /// X86ProcFamily - X86 processor family: Intel Atom, and others
+ X86ProcFamilyEnum X86ProcFamily;
+
+ /// PICStyle - Which PIC style to use
+ ///
+ PICStyles::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+ /// none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ ///
+ X863DNowEnum X863DNowLevel;
+
+ /// HasCMov - True if this processor has conditional move instructions
+ /// (generally pentium pro+).
+ bool HasCMov;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ ///
+ bool HasX86_64;
+
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
+ /// HasSSE4A - True if the processor supports SSE4A instructions.
+ bool HasSSE4A;
+
+ /// HasAES - Target has AES instructions
+ bool HasAES;
+
+ /// HasPCLMUL - Target has carry-less multiplication
+ bool HasPCLMUL;
+
+ /// HasFMA - Target has 3-operand fused multiply-add
+ bool HasFMA;
+
+ /// HasFMA4 - Target has 4-operand fused multiply-add
+ bool HasFMA4;
+
+ /// HasXOP - Target has XOP instructions
+ bool HasXOP;
+
+ /// HasMOVBE - True if the processor has the MOVBE instruction.
+ bool HasMOVBE;
+
+ /// HasRDRAND - True if the processor has the RDRAND instruction.
+ bool HasRDRAND;
+
+ /// HasF16C - Processor has 16-bit floating point conversion instructions.
+ bool HasF16C;
+
+ /// HasFSGSBase - Processor has FS/GS base insturctions.
+ bool HasFSGSBase;
+
+ /// HasLZCNT - Processor has LZCNT instruction.
+ bool HasLZCNT;
+
+ /// HasBMI - Processor has BMI1 instructions.
+ bool HasBMI;
+
+ /// HasBMI2 - Processor has BMI2 instructions.
+ bool HasBMI2;
+
+ /// HasRTM - Processor has RTM instructions.
+ bool HasRTM;
+
+ /// HasHLE - Processor has HLE.
+ bool HasHLE;
+
+ /// HasADX - Processor has ADX instructions.
+ bool HasADX;
+
+ /// HasPRFCHW - Processor has PRFCHW instructions.
+ bool HasPRFCHW;
+
+ /// HasRDSEED - Processor has RDSEED instructions.
+ bool HasRDSEED;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
+ /// IsUAMemFast - True if unaligned memory access is fast.
+ bool IsUAMemFast;
+
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+ /// operands. This may require setting a feature bit in the processor.
+ bool HasVectorUAMem;
+
+ /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
+ /// this is true for most x86-64 chips, but not the first AMD chips.
+ bool HasCmpxchg16b;
+
+ /// UseLeaForSP - True if the LEA instruction should be used for adjusting
+ /// the stack pointer. This is an optimization for Intel Atom processors.
+ bool UseLeaForSP;
+
+ /// HasSlowDivide - True if smaller divides are significantly faster than
+ /// full divides and should be used when possible.
+ bool HasSlowDivide;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
+ /// PadShortFunctions - True if the short functions should be padded to prevent
+ /// a stall when returning too early.
+ bool PadShortFunctions;
+
+ /// CallRegIndirect - True if the Calls with memory reference should be converted
+ /// to a register-based indirect call.
+ bool CallRegIndirect;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ ///
+ unsigned MaxInlineSizeThreshold;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// Instruction itineraries for scheduling
+ InstrItineraryData InstrItins;
+
+private:
+ /// StackAlignOverride - Override the stack alignment.
+ unsigned StackAlignOverride;
+
+ /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+ bool In64BitMode;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ /// \brief Reset the features for the X86 target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
+ /// Is this x86_64? (disregarding specific ABI / programming model)
+ bool is64Bit() const {
+ return In64BitMode;
+ }
+
+ /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
+ bool isTarget64BitILP32() const {
+ return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32);
+ }
+
+ /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
+ bool isTarget64BitLP64() const {
+ return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+ }
+
+ PICStyles::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+
+ bool hasCMov() const { return HasCMov; }
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasAVX() const { return X86SSELevel >= AVX; }
+ bool hasAVX2() const { return X86SSELevel >= AVX2; }
+ bool hasFp256() const { return hasAVX(); }
+ bool hasInt256() const { return hasAVX2(); }
+ bool hasSSE4A() const { return HasSSE4A; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
+ bool hasAES() const { return HasAES; }
+ bool hasPCLMUL() const { return HasPCLMUL; }
+ bool hasFMA() const { return HasFMA; }
+ // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
+ bool hasFMA4() const { return HasFMA4 && !HasFMA; }
+ bool hasXOP() const { return HasXOP; }
+ bool hasMOVBE() const { return HasMOVBE; }
+ bool hasRDRAND() const { return HasRDRAND; }
+ bool hasF16C() const { return HasF16C; }
+ bool hasFSGSBase() const { return HasFSGSBase; }
+ bool hasLZCNT() const { return HasLZCNT; }
+ bool hasBMI() const { return HasBMI; }
+ bool hasBMI2() const { return HasBMI2; }
+ bool hasRTM() const { return HasRTM; }
+ bool hasHLE() const { return HasHLE; }
+ bool hasADX() const { return HasADX; }
+ bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasRDSEED() const { return HasRDSEED; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool hasVectorUAMem() const { return HasVectorUAMem; }
+ bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+ bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasSlowDivide() const { return HasSlowDivide; }
+ bool padShortFunctions() const { return PadShortFunctions; }
+ bool callRegIndirect() const { return CallRegIndirect; }
+
+ bool isAtom() const { return X86ProcFamily == IntelAtom; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
+ bool isTargetELF() const {
+ return (TargetTriple.getEnvironment() == Triple::ELF ||
+ TargetTriple.isOSBinFormatELF());
+ }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NaCl;
+ }
+ bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
+ bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
+ bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
+ bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
+ bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
+ bool isTargetCOFF() const {
+ return (TargetTriple.getEnvironment() != Triple::ELF &&
+ TargetTriple.isOSBinFormatCOFF());
+ }
+ bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
+
+ bool isTargetWin64() const {
+ // FIXME: x86_64-cygwin has not been released yet.
+ return In64BitMode && TargetTriple.isOSWindows();
+ }
+
+ bool isTargetWin32() const {
+ // FIXME: Cygwin is included for isTargetWin64 -- should it be included
+ // here too?
+ return !In64BitMode && (isTargetMingw() || isTargetWindows());
+ }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+
+ bool isPICStyleStubPIC() const {
+ return PICStyle == PICStyles::StubPIC;
+ }
+
+ bool isPICStyleStubNoDynamic() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC;
+ }
+ bool isPICStyleStubAny() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC ||
+ PICStyle == PICStyles::StubPIC; }
+
+ /// ClassifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM)const;
+
+ /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyBlockAddressReference() const;
+
+ /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+ /// to immediate address.
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
+
+ /// enablePostRAScheduler - run for Atom optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ bool postRAScheduler() const { return PostRAScheduler; }
+
+ /// getInstrItins = Return the instruction itineraries based on the
+ /// subtarget selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 000000000000..8aa58a204260
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,227 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeX86Target() {
+ // Register the target.
+ RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+ RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
+}
+
+void X86_32TargetMachine::anchor() { }
+
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
+ DL(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
+ "n8:16:32-S128" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S128"),
+ InstrInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ JITInfo(*this) {
+}
+
+void X86_64TargetMachine::anchor() { }
+
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
+ // The x32 ABI dictates the ILP32 programming model for x64.
+ DL(getSubtargetImpl()->isTarget64BitILP32() ?
+ "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128" :
+ "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
+ InstrInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ JITInfo(*this) {
+}
+
+/// X86TargetMachine ctor - Create an X86 target.
+///
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
+ FrameLowering(*this, Subtarget),
+ InstrItins(Subtarget.getInstrItineraryData()){
+ // Determine the PICStyle based on the target selected.
+ if (getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ } else if (Subtarget.isTargetCygMing()) {
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.isTargetDarwin()) {
+ if (getRelocationModel() == Reloc::PIC_)
+ Subtarget.setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(getRelocationModel() == Reloc::DynamicNoPIC);
+ Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (Subtarget.isTargetELF()) {
+ Subtarget.setPICStyle(PICStyles::GOT);
+ }
+
+ // default to hard float ABI
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Hard;
+}
+
+//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+static cl::opt<bool>
+UseVZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(true));
+
+// Temporary option to control early if-conversion for x86 while adding machine
+// models.
+static cl::opt<bool>
+X86EarlyIfConv("x86-early-ifcvt",
+ cl::desc("Enable early if-conversion on X86"));
+
+//===----------------------------------------------------------------------===//
+// X86 Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our X86 pass. This
+ // allows the X86 pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createX86TargetTransformInfoPass(this));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// X86 Code Generator Pass Configuration Options.
+class X86PassConfig : public TargetPassConfig {
+public:
+ X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ X86TargetMachine &getX86TargetMachine() const {
+ return getTM<X86TargetMachine>();
+ }
+
+ const X86Subtarget &getX86Subtarget() const {
+ return *getX86TargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addILPOpts();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new X86PassConfig(this, PM);
+}
+
+bool X86PassConfig::addInstSelector() {
+ // Install an instruction selector.
+ addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ addPass(createCleanupLocalDynamicTLSPass());
+
+ // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+ if (!getX86Subtarget().is64Bit())
+ addPass(createGlobalBaseRegPass());
+
+ return false;
+}
+
+bool X86PassConfig::addILPOpts() {
+ if (X86EarlyIfConv && getX86Subtarget().hasCMov()) {
+ addPass(&EarlyIfConverterID);
+ return true;
+ }
+ return false;
+}
+
+bool X86PassConfig::addPreRegAlloc() {
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
+bool X86PassConfig::addPostRegAlloc() {
+ addPass(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86PassConfig::addPreEmitPass() {
+ bool ShouldPrint = false;
+ if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
+ addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ ShouldPrint = true;
+ }
+
+ if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
+ addPass(createX86IssueVZeroUpperPass());
+ ShouldPrint = true;
+ }
+
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().padShortFunctions()) {
+ addPass(createX86PadShortFunctions());
+ ShouldPrint = true;
+ }
+
+ return ShouldPrint;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 000000000000..174d3918318d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,137 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "X86.h"
+#include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class StringRef;
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ X86FrameLowering FrameLowering;
+ InstrItineraryData InstrItins;
+
+public:
+ X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ llvm_unreachable("getSelectionDAGInfo not implemented");
+ }
+ virtual const X86RegisterInfo *getRegisterInfo() const {
+ return &getInstrInfo()->getRegisterInfo();
+ }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ /// \brief Register X86 analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
+ // Set up the pass pipeline.
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual bool addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+ virtual void anchor();
+ const DataLayout DL; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+ virtual void anchor();
+ const DataLayout DL; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 000000000000..871dacd6a1c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,49 @@
+//===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+using namespace dwarf;
+
+const MCExpr *X86_64MachoTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+ // is an indirect pc-relative reference.
+ if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ const MCSymbol *Sym = Mang->getSymbol(GV);
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+ return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+ }
+
+ return TargetLoweringObjectFileMachO::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *X86_64MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+void
+X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 000000000000..9d26d389d4de
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,43 @@
+//===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// x86-64.
+ class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ public:
+ virtual const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
+ };
+
+ /// X86LinuxTargetObjectFile - This implementation is used for linux x86
+ /// and x86-64.
+ class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
new file mode 100644
index 000000000000..a98c6991192c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -0,0 +1,495 @@
+//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// X86 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86tti"
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeX86TTIPass(PassRegistry &);
+}
+
+namespace {
+
+class X86TTI : public ImmutablePass, public TargetTransformInfo {
+ const X86TargetMachine *TM;
+ const X86Subtarget *ST;
+ const X86TargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ X86TTI(const X86TargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeX86TTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
+ "X86 Target Transform Info", true, true, false)
+char X86TTI::ID = 0;
+
+ImmutablePass *
+llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
+ return new X86TTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // TODO: Currently the __builtin_popcount() implementation using SSE3
+ // instructions is inefficient. Once the problem is fixed, we should
+ // call ST->hasSSE3() instead of ST->hasSSE4().
+ return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasSSE1())
+ return 0;
+
+ if (ST->is64Bit())
+ return 16;
+ return 8;
+}
+
+unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAVX()) return 256;
+ if (ST->hasSSE1()) return 128;
+ return 0;
+ }
+
+ if (ST->is64Bit())
+ return 64;
+ return 32;
+
+}
+
+unsigned X86TTI::getMaximumUnrollFactor() const {
+ if (ST->isAtom())
+ return 1;
+
+ // Sandybridge and Haswell have multiple execution ports and pipelined
+ // vector units.
+ if (ST->hasAVX())
+ return 4;
+
+ return 2;
+}
+
+unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const CostTblEntry<MVT> AVX2CostTable[] = {
+ // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+ // customize them to detect the cases where shift amount is a scalar one.
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 1 },
+ { ISD::SRA, MVT::v4i32, 1 },
+ { ISD::SHL, MVT::v8i32, 1 },
+ { ISD::SRL, MVT::v8i32, 1 },
+ { ISD::SRA, MVT::v8i32, 1 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 1 },
+ { ISD::SHL, MVT::v4i64, 1 },
+ { ISD::SRL, MVT::v4i64, 1 },
+
+ { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
+ { ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
+
+ { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
+
+ { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2CostTable[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+ // We don't correctly identify costs of casts because they are marked as
+ // custom.
+ // Constant splats are cheaper for the following instructions.
+ { ISD::SHL, MVT::v16i8, 1 }, // psllw.
+ { ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v2i64, 1 }, // psllq.
+
+ { ISD::SRL, MVT::v16i8, 1 }, // psrlw.
+ { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+
+ { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
+ { ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2()) {
+ int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
+ array_lengthof(SSE2UniformConstCostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+ }
+
+
+ static const CostTblEntry<MVT> SSE2CostTable[] = {
+ // We don't correctly identify costs of casts because they are marked as
+ // custom.
+ // For some cases, where the shift amount is a scalar we would be able
+ // to generate better code. Unfortunately, when this is the case the value
+ // (the splat) will get hoisted out of the loop, thereby making it invisible
+ // to ISel. The cost model must return worst case assumptions because it is
+ // used for vectorization and we don't want to make vectorized code worse
+ // than scalar code.
+ { ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
+ { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
+
+ { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
+ { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
+
+ { ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
+ { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
+ };
+
+ if (ST->hasSSE2()) {
+ int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE2CostTable[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT> AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+ // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+ // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+ // Because we believe v4i64 to be a legal type, we must also include the
+ // split factor of two in the cost table. Therefore, the cost here is 18
+ // instead of 9.
+ { ISD::MUL, MVT::v4i64, 18 },
+ };
+
+ // Look for AVX1 lowering tricks.
+ if (ST->hasAVX() && !ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX1CostTable[Idx].Cost;
+ }
+
+ // Custom lowering of vectors.
+ static const CostTblEntry<MVT> CustomLowered[] = {
+ // A v2i64/v4i64 and multiply is custom lowered as a series of long
+ // multiplies(3), shifts(4) and adds(2).
+ { ISD::MUL, MVT::v2i64, 9 },
+ { ISD::MUL, MVT::v4i64, 9 },
+ };
+ int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * CustomLowered[Idx].Cost;
+
+ // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
+ // 2x pmuludq, 2x shuffle.
+ if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
+ !ST->hasSSE41())
+ return 6;
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only estimate the cost of reverse shuffles.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
+
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+}
+
+unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
+
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
+
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
+ };
+
+ if (ST->hasAVX()) {
+ int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
+ array_lengthof(AVXConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return AVXConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const CostTblEntry<MVT> SSE42CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+ { ISD::SETCC, MVT::v2i64, 1 },
+ { ISD::SETCC, MVT::v4i32, 1 },
+ { ISD::SETCC, MVT::v8i16, 1 },
+ { ISD::SETCC, MVT::v16i8, 1 },
+ };
+
+ static const CostTblEntry<MVT> AVX1CostTbl[] = {
+ { ISD::SETCC, MVT::v4f64, 1 },
+ { ISD::SETCC, MVT::v8f32, 1 },
+ // AVX1 does not support 8-wide integer compare.
+ { ISD::SETCC, MVT::v4i64, 4 },
+ { ISD::SETCC, MVT::v8i32, 4 },
+ { ISD::SETCC, MVT::v16i16, 4 },
+ { ISD::SETCC, MVT::v32i8, 4 },
+ };
+
+ static const CostTblEntry<MVT> AVX2CostTbl[] = {
+ { ISD::SETCC, MVT::v4i64, 1 },
+ { ISD::SETCC, MVT::v8i32, 1 },
+ { ISD::SETCC, MVT::v16i16, 1 },
+ { ISD::SETCC, MVT::v32i8, 1 },
+ };
+
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX2CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ if (Index != -1U) {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+ // This type is legalized to a scalar type.
+ if (!LT.second.isVector())
+ return 0;
+
+ // The type may be split. Normalize the index to the new type.
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+
+ // Floating point scalars are already located in index #0.
+ if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ return 0;
+ }
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // On Sandybridge 256bit load/stores are double pumped
+ // (but not on Haswell).
+ if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
+ Cost*=2;
+
+ return Cost;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
new file mode 100644
index 000000000000..0f77948c0eff
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -0,0 +1,293 @@
+//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts x86 AVX vzeroupper instructions
+// before calls to SSE encoded functions. This avoids transition latency
+// penalty when tranfering control between AVX encoded instructions and old
+// SSE encoding mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-vzeroupper"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
+
+namespace {
+ struct VZeroUpperInserter : public MachineFunctionPass {
+ static char ID;
+ VZeroUpperInserter() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Any YMM register live-in to this function?
+ bool FnHasLiveInYmm;
+
+ // BBState - Contains the state of each MBB: unknown, clean, dirty
+ SmallVector<uint8_t, 8> BBState;
+
+ // BBSolved - Keep track of all MBB which had been already analyzed
+ // and there is no further processing required.
+ BitVector BBSolved;
+
+ // Machine Basic Blocks are classified according this pass:
+ //
+ // ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
+ // until the MBB exit there isn't a instruction using YMM to change
+ // the state to dirty, or one of the incoming predecessors is unknown
+ // and there's not a dirty predecessor between them.
+ //
+ // ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
+ // instructions using YMM and be marked ST_CLEAN, as long as the state
+ // is cleaned by a vzeroupper before any call.
+ //
+ // ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
+ // vzeroupper instruction.
+ //
+ // ST_INIT - Placeholder for an empty state set
+ //
+ enum {
+ ST_UNKNOWN = 0,
+ ST_CLEAN = 1,
+ ST_DIRTY = 2,
+ ST_INIT = 3
+ };
+
+ // computeState - Given two states, compute the resulting state, in
+ // the following way
+ //
+ // 1) One dirty state yields another dirty state
+ // 2) All states must be clean for the result to be clean
+ // 3) If none above and one unknown, the result state is also unknown
+ //
+ static unsigned computeState(unsigned PrevState, unsigned CurState) {
+ if (PrevState == ST_INIT)
+ return CurState;
+
+ if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
+ return ST_DIRTY;
+
+ if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
+ return ST_CLEAN;
+
+ return ST_UNKNOWN;
+ }
+
+ };
+ char VZeroUpperInserter::ID = 0;
+}
+
+FunctionPass *llvm::createX86IssueVZeroUpperPass() {
+ return new VZeroUpperInserter();
+}
+
+static bool isYmmReg(unsigned Reg) {
+ if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
+ return true;
+
+ return false;
+}
+
+static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
+ for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
+ E = MRI.livein_end(); I != E; ++I)
+ if (isYmmReg(I->first))
+ return true;
+
+ return false;
+}
+
+static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+ for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
+ if (!MO.clobbersPhysReg(reg))
+ return false;
+ }
+ return true;
+}
+
+static bool hasYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+ return true;
+ if (!MO.isReg())
+ continue;
+ if (MO.isDebug())
+ continue;
+ if (isYmmReg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// vzero upper instructions before function calls.
+bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool EverMadeChange = false;
+
+ // Fast check: if the function doesn't use any ymm registers, we don't need
+ // to insert any VZEROUPPER instructions. This is constant-time, so it is
+ // cheap in the common case of no ymm use.
+ bool YMMUsed = false;
+ const TargetRegisterClass *RC = &X86::VR256RegClass;
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
+ i != e; i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YMMUsed = true;
+ break;
+ }
+ }
+ if (!YMMUsed)
+ return EverMadeChange;
+
+ // Pre-compute the existence of any live-in YMM registers to this function
+ FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
+
+ assert(BBState.empty());
+ BBState.resize(MF.getNumBlockIDs(), 0);
+ BBSolved.resize(MF.getNumBlockIDs(), 0);
+
+ // Each BB state depends on all predecessors, loop over until everything
+ // converges. (Once we converge, we can implicitly mark everything that is
+ // still ST_UNKNOWN as ST_CLEAN.)
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ MadeChange |= processBasicBlock(MF, *I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+
+ BBState.clear();
+ BBSolved.clear();
+ return EverMadeChange;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// inserting vzero upper instructions before function calls.
+bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &BB) {
+ bool Changed = false;
+ unsigned BBNum = BB.getNumber();
+
+ // Don't process already solved BBs
+ if (BBSolved[BBNum])
+ return false; // No changes
+
+ // Check the state of all predecessors
+ unsigned EntryState = ST_INIT;
+ for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
+ if (EntryState == ST_DIRTY)
+ break;
+ }
+
+
+ // The entry MBB for the function may set the initial state to dirty if
+ // the function receives any YMM incoming arguments
+ if (&BB == MF.begin()) {
+ EntryState = ST_CLEAN;
+ if (FnHasLiveInYmm)
+ EntryState = ST_DIRTY;
+ }
+
+ // The current state is initialized according to the predecessors
+ unsigned CurState = EntryState;
+ bool BBHasCall = false;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = I->getDebugLoc();
+ bool isControlFlow = MI->isCall() || MI->isReturn();
+
+ // Shortcut: don't need to check regular instructions in dirty state.
+ if (!isControlFlow && CurState == ST_DIRTY)
+ continue;
+
+ if (hasYmmReg(MI)) {
+ // We found a ymm-using instruction; this could be an AVX instruction,
+ // or it could be control flow.
+ CurState = ST_DIRTY;
+ continue;
+ }
+
+ // Check for control-flow out of the current function (which might
+ // indirectly execute SSE instructions).
+ if (!isControlFlow)
+ continue;
+
+ BBHasCall = true;
+
+ // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
+ // registers. This instruction has zero latency. In addition, the processor
+ // changes back to Clean state, after which execution of Intel SSE
+ // instructions or Intel AVX instructions has no transition penalty. Add
+ // the VZEROUPPER instruction before any function call/return that might
+ // execute SSE code.
+ // FIXME: In some cases, we may want to move the VZEROUPPER into a
+ // predecessor block.
+ if (CurState == ST_DIRTY) {
+ // Only insert the VZEROUPPER in case the entry state isn't unknown.
+ // When unknown, only compute the information within the block to have
+ // it available in the exit if possible, but don't change the block.
+ if (EntryState != ST_UNKNOWN) {
+ BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+
+ // After the inserted VZEROUPPER the state becomes clean again, but
+ // other YMM may appear before other subsequent calls or even before
+ // the end of the BB.
+ CurState = ST_CLEAN;
+ }
+ }
+
+ DEBUG(dbgs() << "MBB #" << BBNum
+ << ", current state: " << CurState << '\n');
+
+ // A BB can only be considered solved when we both have done all the
+ // necessary transformations, and have computed the exit state. This happens
+ // in two cases:
+ // 1) We know the entry state: this immediately implies the exit state and
+ // all the necessary transformations.
+ // 2) There are no calls, and and a non-call instruction marks this block:
+ // no transformations are necessary, and we know the exit state.
+ if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
+ BBSolved[BBNum] = true;
+
+ if (CurState != BBState[BBNum])
+ Changed = true;
+
+ BBState[BBNum] = CurState;
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
new file mode 100644
index 000000000000..7b99967c4f32
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -0,0 +1,800 @@
+//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the XCore Disassembler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreRegisterInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// \brief A disassembler class for XCore.
+class XCoreDisassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
+ MCDisassembler(STI), RegInfo(Info) {}
+
+ /// \brief See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+}
+
+static bool readInstruction16(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint16_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 2 Bytes of data.
+ if (region.readBytes(address, 2, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 16-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+ return true;
+}
+
+static bool readInstruction32(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint32_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 4 Bytes of data.
+ if (region.readBytes(address, 4, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 32-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ return true;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
+ return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus Decode2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "XCoreGenDisassemblerTables.inc"
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder)
+{
+ if (RegNo > 11)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder)
+{
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val > 11)
+ return MCDisassembler::Fail;
+ static unsigned Values[] = {
+ 32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
+ };
+ Inst.addOperand(MCOperand::CreateImm(Values[Val]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined < 27)
+ return MCDisassembler::Fail;
+ if (fieldFromInstruction(Insn, 5, 1)) {
+ if (Combined == 31)
+ return MCDisassembler::Fail;
+ Combined += 5;
+ }
+ Combined -= 27;
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = Combined / 3;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2,
+ unsigned &Op3) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined >= 27)
+ return MCDisassembler::Fail;
+
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = (Combined / 3) % 3;
+ unsigned Op3High = Combined / 9;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a 3R instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 11, 5);
+ switch (Opcode) {
+ case 0x0:
+ Inst.setOpcode(XCore::STW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x1:
+ Inst.setOpcode(XCore::LDW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x2:
+ Inst.setOpcode(XCore::ADD_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3:
+ Inst.setOpcode(XCore::SUB_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4:
+ Inst.setOpcode(XCore::SHL_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5:
+ Inst.setOpcode(XCore::SHR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6:
+ Inst.setOpcode(XCore::EQ_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7:
+ Inst.setOpcode(XCore::AND_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8:
+ Inst.setOpcode(XCore::OR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9:
+ Inst.setOpcode(XCore::LDW_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10:
+ Inst.setOpcode(XCore::LD16S_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11:
+ Inst.setOpcode(XCore::LD8U_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12:
+ Inst.setOpcode(XCore::ADD_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x13:
+ Inst.setOpcode(XCore::SUB_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14:
+ Inst.setOpcode(XCore::SHL_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x15:
+ Inst.setOpcode(XCore::SHR_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x16:
+ Inst.setOpcode(XCore::EQ_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x17:
+ Inst.setOpcode(XCore::TSETR_3r);
+ return Decode3RImmInstruction(Inst, Insn, Address, Decoder);
+ case 0x18:
+ Inst.setOpcode(XCore::LSS_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19:
+ Inst.setOpcode(XCore::LSU_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op2));
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L3R / L2RUS instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 16, 4) |
+ fieldFromInstruction(Insn, 27, 5) << 4;
+ switch (Opcode) {
+ case 0x0c:
+ Inst.setOpcode(XCore::STW_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x1c:
+ Inst.setOpcode(XCore::XOR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x2c:
+ Inst.setOpcode(XCore::ASHR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3c:
+ Inst.setOpcode(XCore::LDAWF_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4c:
+ Inst.setOpcode(XCore::LDAWB_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5c:
+ Inst.setOpcode(XCore::LDA16F_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6c:
+ Inst.setOpcode(XCore::LDA16B_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7c:
+ Inst.setOpcode(XCore::MUL_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8c:
+ Inst.setOpcode(XCore::DIVS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9c:
+ Inst.setOpcode(XCore::DIVU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10c:
+ Inst.setOpcode(XCore::ST16_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11c:
+ Inst.setOpcode(XCore::ST8_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12c:
+ Inst.setOpcode(XCore::ASHR_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12d:
+ Inst.setOpcode(XCore::OUTPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12e:
+ Inst.setOpcode(XCore::INPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x13c:
+ Inst.setOpcode(XCore::LDAWF_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14c:
+ Inst.setOpcode(XCore::LDAWB_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x15c:
+ Inst.setOpcode(XCore::CRC_l3r);
+ return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder);
+ case 0x18c:
+ Inst.setOpcode(XCore::REMS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19c:
+ Inst.setOpcode(XCore::REMU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5, Op6;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return S;
+ S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6);
+ if (S != MCDisassembler::Success)
+ return S;
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L6R instruction.
+ Inst.clear();
+ unsigned Opcode = fieldFromInstruction(Insn, 27, 5);
+ switch (Opcode) {
+ case 0x00:
+ Inst.setOpcode(XCore::LMUL_l6r);
+ return DecodeL6RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+ S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+MCDisassembler::DecodeStatus
+XCoreDisassembler::getInstruction(MCInst &instr,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ uint16_t insn16;
+
+ if (!readInstruction16(Region, Address, Size, insn16)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus Result = decodeInstruction(DecoderTable16, instr, insn16,
+ Address, this, STI);
+ if (Result != Fail) {
+ Size = 2;
+ return Result;
+ }
+
+ uint32_t insn32;
+
+ if (!readInstruction32(Region, Address, Size, insn32)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTable32, instr, insn32, Address, this, STI);
+ if (Result != Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ return Fail;
+}
+
+namespace llvm {
+ extern Target TheXCoreTarget;
+}
+
+static MCDisassembler *createXCoreDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new XCoreDisassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeXCoreDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheXCoreTarget,
+ createXCoreDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
new file mode 100644
index 000000000000..1592351c3861
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an XCore MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCoreInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void XCoreInstPrinter::
+printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT");
+}
+
+void XCoreInstPrinter::
+printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT32");
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+ int Offset = 0;
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert(SRE && CE && "Binary expression must be sym+const.");
+ Offset = CE->getValue();
+ } else {
+ SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+ assert(SRE && "Unexpected MCExpr type.");
+ }
+ assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+ OS << SRE->getSymbol();
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+}
+
+void XCoreInstPrinter::
+printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ printExpr(Op.getExpr(), O);
+}
+
+void XCoreInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1, O);
+}
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
new file mode 100644
index 000000000000..772c515b5c9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -0,0 +1,44 @@
+//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the XCoreInstPrinter class,
+/// which is used to print XCore MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTPRINTER_H
+#define XCOREINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class XCoreInstPrinter : public MCInstPrinter {
+public:
+ XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+private:
+ void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+ void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
new file mode 100644
index 000000000000..1cfdbda003b5
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -0,0 +1,32 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+void XCoreMCAsmInfo::anchor() { }
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
+ SupportsDebugInformation = true;
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = 0;
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+
+ PrivateGlobalPrefix = ".L";
+ AscizDirective = ".asciiz";
+ WeakDefDirective = "\t.weak\t";
+ WeakRefDirective = "\t.weak\t";
+
+ // Debug
+ HasLEB128 = true;
+}
+
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
new file mode 100644
index 000000000000..076777541e33
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class StringRef;
+ class Target;
+
+ class XCoreMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 000000000000..b5b072dcbda6
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,104 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "InstPrinter/XCoreInstPrinter.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitXCoreMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitXCoreMCRegisterInfo(X, XCore::LR);
+ return X;
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new XCoreInstPrinter(MAI, MII, MRI);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget,
+ createXCoreMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+ createXCoreMCSubtargetInfo);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
+ createXCoreMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 000000000000..a255adb2e0f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 000000000000..00e34e04fbe5
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() {
+ RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
new file mode 100644
index 000000000000..08f091e5b870
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -0,0 +1,32 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class XCoreTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCore.td b/contrib/llvm/lib/Target/XCore/XCore.td
new file mode 100644
index 000000000000..e9a6d88fd68e
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.td
@@ -0,0 +1,53 @@
+//===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"xs1b-generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCoreAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def XCore : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = XCoreInstrInfo;
+ let AssemblyWriters = [XCoreAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..0d146ba4d98d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,342 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "InstPrinter/XCoreInstPrinter.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMCInstLower.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden,
+ cl::value_desc("number"),
+ cl::init(8));
+
+namespace {
+ class XCoreAsmPrinter : public AsmPrinter {
+ const XCoreSubtarget &Subtarget;
+ XCoreMCInstLower MCInstLowering;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+ public:
+ explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
+ MCInstLowering(*this) {}
+
+ virtual const char *getPassName() const {
+ return "XCore Assembly Printer";
+ }
+
+ void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive = ".jmptable");
+ void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ printInlineJT(MI, opNum, O, ".jmptable32");
+ }
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+ virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+ void EmitFunctionEntryLabel();
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitFunctionBodyStart();
+ void EmitFunctionBodyEnd();
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ };
+} // end of anonymous namespace
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+ assert(((GV->hasExternalLinkage() ||
+ GV->hasWeakLinkage()) ||
+ GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+ if (ArrayType *ATy = dyn_cast<ArrayType>(
+ cast<PointerType>(GV->getType())->getElementType())) {
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ // FIXME: MCStreamerize.
+ OutStreamer.EmitRawText(StringRef(".globound"));
+ OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+ OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+ ".globound");
+ }
+ }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (!GV->hasInitializer() ||
+ EmitSpecialLLVMGlobal(GV))
+ return;
+
+ const DataLayout *TD = TM.getDataLayout();
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const Constant *C = GV->getInitializer();
+ unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+
+ // Mark the start of the global
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+ GVSym->getName());
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::AppendingLinkage:
+ report_fatal_error("AppendingLinkage is not supported by this target!");
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ emitArrayBound(GVSym, GV);
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::DLLImportLinkage:
+ llvm_unreachable("DLLImport linkage is not supported by this target!");
+ case GlobalValue::DLLExportLinkage:
+ llvm_unreachable("DLLExport linkage is not supported by this target!");
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+
+ EmitAlignment(Align > 2 ? Align : 2, GV);
+
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ if (GV->isThreadLocal()) {
+ Size *= MaxThreads;
+ }
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+ Twine(Size));
+ }
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(C);
+ if (GV->isThreadLocal()) {
+ for (unsigned i = 1; i < MaxThreads; ++i)
+ EmitGlobalConstant(C);
+ }
+ // The ABI requires that unsigned scalar types smaller than 32 bits
+ // are padded to 32 bits.
+ if (Size < 4)
+ OutStreamer.EmitZeros(4 - Size);
+
+ // Mark the end of the global
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+void XCoreAsmPrinter::EmitFunctionBodyStart() {
+ MCInstLowering.Initialize(Mang, &MF->getContext());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+ // Emit function end directives
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+ ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+ // Mark the start of the function
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+ ".function," + CurrentFnSym->getName());
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive) {
+ unsigned JTI = MI->getOperand(opNum).getIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ O << "\t" << directive << " ";
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (i > 0)
+ O << ",";
+ O << *MBB->getSymbol();
+ }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << XCoreInstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+ default:
+ llvm_unreachable("not implemented");
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps == 4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+MachineLocation XCoreAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ switch (MI->getOpcode()) {
+ case XCore::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case XCore::ADD_2rus:
+ if (MI->getOperand(2).getImm() == 0) {
+ O << "\tmov "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+ break;
+ case XCore::BR_JT:
+ case XCore::BR_JT32:
+ O << "\tbru "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()) << '\n';
+ if (MI->getOpcode() == XCore::BR_JT)
+ printInlineJT(MI, 0, O);
+ else
+ printInlineJT32(MI, 0, O);
+ O << '\n';
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
+ RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 000000000000..b20d71f49cfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,36 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+ // i32 are returned in registers R0, R1, R2, R3
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 000000000000..beeb07f831c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,423 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreFrameLowering.h"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("storeToStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) {
+ // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = &MF.getMMI();
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ bool FP = hasFP(MF);
+ const AttributeSet &PAL = MF.getFunction()->getAttributes();
+
+ if (PAL.hasAttrSomewhere(Attribute::Nest))
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+ }
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, TRI);
+ if (emitFrameMoves) {
+ MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+ XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const{
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+ RC, TRI);
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+ errs() << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+ false);
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 000000000000..ebad62f2fa53
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,58 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreSubtarget;
+
+ class XCoreFrameLowering: public TargetFrameLowering {
+ public:
+ XCoreFrameLowering(const XCoreSubtarget &STI);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fbf86c523054
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,296 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class XCoreDAGToDAGISel : public SelectionDAGISel {
+ const XCoreTargetLowering &Lowering;
+ const XCoreSubtarget &Subtarget;
+
+ public:
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ SDNode *Select(SDNode *N);
+ SDNode *SelectBRIND(SDNode *N);
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
+ // Complex Pattern Selectors.
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ virtual const char *getPassName() const {
+ return "XCore DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "XCoreGenDAGISel.inc"
+ };
+} // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new XCoreDAGToDAGISel(TM, OptLevel);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
+ // Transformation function: get the size of a mask
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+ return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+ MVT::i32, MskSize);
+ }
+ else if (!isUInt<16>(Val)) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+ SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ return node;
+ }
+ break;
+ }
+ case XCoreISD::LADD: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::LSUB: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::MACCU: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::MACCS: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::LMUL: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::CRC8: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case ISD::BRIND:
+ if (SDNode *ResNode = SelectBRIND(N))
+ return ResNode;
+ break;
+ // Other cases are autogenerated.
+ }
+ return SelectCode(N);
+}
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+ if (Chain == Old)
+ return New;
+ if (Chain->getOpcode() != ISD::TokenFactor)
+ return SDValue();
+ SmallVector<SDValue, 8> Ops;
+ bool found = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+ if (Chain->getOperand(i) == Old) {
+ Ops.push_back(New);
+ found = true;
+ } else {
+ Ops.push_back(Chain->getOperand(i));
+ }
+ }
+ if (!found)
+ return SDValue();
+ return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+ &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // (brind (int_xcore_checkevent (addr)))
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return 0;
+ unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::xcore_checkevent)
+ return 0;
+ SDValue nextAddr = Addr->getOperand(2);
+ SDValue CheckEventChainOut(Addr.getNode(), 1);
+ if (!CheckEventChainOut.use_empty()) {
+ // If the chain out of the checkevent intrinsic is an operand of the
+ // indirect branch or used in a TokenFactor which is the operand of the
+ // indirect branch then build a new chain which uses the chain coming into
+ // the checkevent intrinsic instead.
+ SDValue CheckEventChainIn = Addr->getOperand(0);
+ SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+ CheckEventChainIn);
+ if (!NewChain.getNode())
+ return 0;
+ Chain = NewChain;
+ }
+ // Enable events on the thread using setsr 1 and then disable them immediately
+ // after with clrsr 1. If any resources owned by the thread are ready an event
+ // will be taken. If no resource is ready we branch to the address which was
+ // the operand to the checkevent intrinsic.
+ SDValue constOne = getI32Imm(1);
+ SDValue Glue =
+ SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+ constOne, Chain), 0);
+ Glue =
+ SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+ constOne, Glue), 0);
+ if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+ nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+ return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+ nextAddr->getOperand(0), Glue);
+ }
+ return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 000000000000..a5d2be88db7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,1644 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCore.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case XCoreISD::BL : return "XCoreISD::BL";
+ case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+ case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+ case XCoreISD::STWSP : return "XCoreISD::STWSP";
+ case XCoreISD::RETSP : return "XCoreISD::RETSP";
+ case XCoreISD::LADD : return "XCoreISD::LADD";
+ case XCoreISD::LSUB : return "XCoreISD::LSUB";
+ case XCoreISD::LMUL : return "XCoreISD::LMUL";
+ case XCoreISD::MACCU : return "XCoreISD::MACCU";
+ case XCoreISD::MACCS : return "XCoreISD::MACCS";
+ case XCoreISD::CRC8 : return "XCoreISD::CRC8";
+ case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
+ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
+ default : return NULL;
+ }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+ : TargetLowering(XTM, new XCoreTargetObjectFile()),
+ TM(XTM),
+ Subtarget(*XTM.getSubtargetImpl()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &XCore::GRRegsRegClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(XCore::SP);
+
+ setSchedulingPreference(Sched::RegPressure);
+
+ // Use i32 for setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // XCore does not have the NodeTypes below.
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+ // Stop the combiner recombining select and set_cc
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // 64bit
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // Bit Manipulation
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Jump tables.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Conversion of i64 -> double produces constantpool nodes
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Loads
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Custom expand misaligned loads / stores.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ // Varargs
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ // Dynamic stack
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize
+ = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2;
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ADD);
+
+ setMinFunctionAlignment(1);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
+ // FIXME: Remove these when LegalizeDAGTypes lands.
+ case ISD::ADD:
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ case ISD::ADD:
+ case ISD::SUB:
+ Results.push_back(ExpandADDSUB(N, DAG));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const
+{
+ // FIXME there is no actual debug info here
+ DebugLoc dl = GA.getDebugLoc();
+ const GlobalValue *UnderlyingGV = GV;
+ // If GV is an alias then use the aliasee to determine the wrapper type
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ UnderlyingGV = GA->resolveAliasedGlobal();
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
+ if (GVar->isConstant())
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+ }
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
+ return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(Type *Ty) {
+ ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ // transform to label + getid() * size
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine size
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ if (!GVar) {
+ llvm_unreachable("Thread local object not a GlobalVariable?");
+ }
+ Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+#ifndef NDEBUG
+ errs() << "Size of thread local object " << GVar->getName()
+ << " is unknown\n";
+#endif
+ llvm_unreachable(0);
+ }
+ SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+ const DataLayout *TD = TM.getDataLayout();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+ DAG.getConstant(Size, MVT::i32));
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really debug info here
+ DebugLoc dl = CP->getDebugLoc();
+ EVT PtrVT = Op.getValueType();
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry()) {
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ } else {
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ }
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+}
+
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue XCoreTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+ if (NumEntries <= 32) {
+ return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+ }
+ assert((NumEntries >> 31) == 0);
+ SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+ ScaledIndex);
+}
+
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+ int64_t &Offset)
+{
+ if (Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+ ConstantSDNode *CN = 0;
+ if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ return false;
+ }
+ int64_t off = CN->getSExtValue();
+ const SDValue &Base = Addr.getOperand(0);
+ const SDValue *Root = &Base;
+ if (Base.getOpcode() == ISD::ADD &&
+ Base.getOperand(1).getOpcode() == ISD::SHL) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+ .getOperand(1));
+ if (CN && (CN->getSExtValue() >= 2)) {
+ Root = &Base.getOperand(0);
+ }
+ }
+ if (isa<FrameIndexSDNode>(*Root)) {
+ // All frame indicies are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+ Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+ // All dp / cp relative addresses are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ // Check for an aligned global variable.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
+ const GlobalValue *GV = GA->getGlobal();
+ if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Unexpected extension type");
+ assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+ if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+ return SDValue();
+
+ unsigned ABIAlignment = getDataLayout()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned load alone.
+ if (LD->getAlignment() >= ABIAlignment)
+ return SDValue();
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Base;
+ int64_t Offset;
+ if (!LD->isVolatile() &&
+ IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+ if (Offset % 4 == 0) {
+ // We've managed to infer better alignment information than the load
+ // already has. Use an aligned load.
+ //
+ return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ }
+ // Lower to
+ // ldw low, base[offset >> 2]
+ // ldw high, base[(offset >> 2) + 1]
+ // shr low_shifted, low, (offset & 0x3) * 8
+ // shl high_shifted, high, 32 - (offset & 0x3) * 8
+ // or result, low_shifted, high_shifted
+ SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+ SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+ SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+ SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+
+ SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
+
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ if (LD->getAlignment() == 2) {
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+ BasePtr, LD->getPointerInfo(), MVT::i16,
+ LD->isVolatile(), LD->isNonTemporal(), 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+ HighAddr,
+ LD->getPointerInfo().getWithOffset(2),
+ MVT::i16, LD->isVolatile(),
+ LD->isNonTemporal(), 2);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
+ DAG.getConstant(16, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ // Lower to a call to __misaligned_load(BasePtr).
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false,
+ false, false, 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+ Args, DAG, DL);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+ assert(!ST->isTruncatingStore() && "Unexpected store type");
+ assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+ if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ return SDValue();
+ }
+ unsigned ABIAlignment = getDataLayout()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned store alone.
+ if (ST->getAlignment() >= ABIAlignment) {
+ return SDValue();
+ }
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (ST->getAlignment() == 2) {
+ SDValue Low = Value;
+ SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+ DAG.getConstant(16, MVT::i32));
+ SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+ ST->getPointerInfo(), MVT::i16,
+ ST->isVolatile(), ST->isNonTemporal(),
+ 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+ ST->getPointerInfo().getWithOffset(2),
+ MVT::i16, ST->isVolatile(),
+ ST->isNonTemporal(), 2);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+ }
+
+ // Lower to a call to __misaligned_store(BasePtr, Value).
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()), false, false,
+ false, false, 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue XCoreTargetLowering::
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
+ LHS, RHS);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
+ Zero, Zero);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// isADDADDMUL - Return whether Op is in a form that is equivalent to
+/// add(add(mul(x,y),a),b). If requireIntermediatesHaveOneUse is true then
+/// each intermediate result in the calculation must also have a single use.
+/// If the Op is in the correct form the constituent parts are written to Mul0,
+/// Mul1, Addend0 and Addend1.
+static bool
+isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
+ SDValue &Addend1, bool requireIntermediatesHaveOneUse)
+{
+ if (Op.getOpcode() != ISD::ADD)
+ return false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue AddOp;
+ SDValue OtherOp;
+ if (N0.getOpcode() == ISD::ADD) {
+ AddOp = N0;
+ OtherOp = N1;
+ } else if (N1.getOpcode() == ISD::ADD) {
+ AddOp = N1;
+ OtherOp = N0;
+ } else {
+ return false;
+ }
+ if (requireIntermediatesHaveOneUse && !AddOp.hasOneUse())
+ return false;
+ if (OtherOp.getOpcode() == ISD::MUL) {
+ // add(add(a,b),mul(x,y))
+ if (requireIntermediatesHaveOneUse && !OtherOp.hasOneUse())
+ return false;
+ Mul0 = OtherOp.getOperand(0);
+ Mul1 = OtherOp.getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = AddOp.getOperand(1);
+ return true;
+ }
+ if (AddOp.getOperand(0).getOpcode() == ISD::MUL) {
+ // add(add(mul(x,y),a),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(0).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(0).getOperand(0);
+ Mul1 = AddOp.getOperand(0).getOperand(1);
+ Addend0 = AddOp.getOperand(1);
+ Addend1 = OtherOp;
+ return true;
+ }
+ if (AddOp.getOperand(1).getOpcode() == ISD::MUL) {
+ // add(add(a,mul(x,y)),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(1).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(1).getOperand(0);
+ Mul1 = AddOp.getOperand(1).getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = OtherOp;
+ return true;
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
+{
+ SDValue Mul;
+ SDValue Other;
+ if (N->getOperand(0).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(0);
+ Other = N->getOperand(1);
+ } else if (N->getOperand(1).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(1);
+ Other = N->getOperand(0);
+ } else {
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, RL, AddendL, AddendH;
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(0, MVT::i32));
+ RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(0, MVT::i32));
+ AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(0, MVT::i32));
+ AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(1, MVT::i32));
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
+ if (DAG.MaskedValueIsZero(Mul.getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(Mul.getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ if (LHSSB > 32 && RHSSB > 32) {
+ // The inputs are both sign-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ SDValue LH, RH;
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(1, MVT::i32));
+ RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(1, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ RH = DAG.getNode(ISD::MUL, dl, MVT::i32, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, MVT::i32, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, LH);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
+{
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Unknown operand to lower!");
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue Result = TryExpandADDWithMul(N, DAG);
+ if (Result.getNode() != 0)
+ return Result;
+ }
+
+ DebugLoc dl = N->getDebugLoc();
+
+ // Extract components
+ SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(0, MVT::i32));
+ SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(1, MVT::i32));
+ SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(0, MVT::i32));
+ SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+ // Expand
+ unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+ XCoreISD::LSUB;
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Carry(Lo.getNode(), 1);
+
+ SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Ignored(Hi.getNode(), 1);
+ // Merge the pieces
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
+{
+ llvm_unreachable("unimplemented");
+ // FIXME Arguments passed by reference need a extra dereference.
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(1), MachinePointerInfo(V),
+ false, false, false, 0);
+ // Increment the pointer, VAList, to the next vararg
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ // vastart stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument
+ MachineFunction &MF = DAG.getMachineFunction();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ MachinePointerInfo(), false, false, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+SDValue XCoreTargetLowering::
+LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue XCoreTargetLowering::
+LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ // .align 4
+ // LDAPF_u10 r11, nest
+ // LDW_2rus r11, r11[0]
+ // STWSP_ru6 r11, sp[0]
+ // LDAPF_u10 r11, fptr
+ // LDW_2rus r11, r11[0]
+ // BAU_1r r11
+ // nest:
+ // .word nest
+ // fptr:
+ // .word fptr
+ SDValue OutChains[5];
+
+ SDValue Addr = Trmp;
+
+ DebugLoc dl = Op.getDebugLoc();
+ OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(4, MVT::i32));
+ OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 4), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(8, MVT::i32));
+ OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 8), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(12, MVT::i32));
+ OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(16, MVT::i32));
+ OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 16), false, false,
+ 0);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
+}
+
+SDValue XCoreTargetLowering::
+LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::xcore_crc8:
+ EVT VT = Op.getValueType();
+ SDValue Data =
+ DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT),
+ Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3));
+ SDValue Crc(Data.getNode(), 1);
+ SDValue Results[] = { Crc, Data };
+ return DAG.getMergeValues(Results, 2, DL);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ // XCore target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // For now, only CallingConv::C implemented
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // The ABI dictates there should be one stack slot available to the callee
+ // on function entry (for saving lr).
+ CCInfo.AllocateStack(4, 4);
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ int Offset = VA.getLocMemOffset();
+
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ Chain, Arg,
+ DAG.getConstant(Offset/4, MVT::i32)));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
+
+ unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
+
+ unsigned LRSaveSize = StackSlotSize;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i32:
+ unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ }
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > StackSlotSize) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize,
+ LRSaveSize + VA.getLocMemOffset(),
+ true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0));
+ }
+ }
+
+ if (isVarArg) {
+ /* Argument registers */
+ static const uint16_t ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+ };
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+ array_lengthof(ArgRegs));
+ if (FirstVAReg < array_lengthof(ArgRegs)) {
+ SmallVector<SDValue, 4> MemOps;
+ int offset = 0;
+ // Save remaining registers, storing higher register numbers at a higher
+ // address
+ for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
+ // Create a stack slot
+ int FI = MFI->CreateFixedObject(4, offset, true);
+ if (i == (int)FirstVAReg) {
+ XFI->setVarArgsFrameIndex(FI);
+ }
+ offset -= StackSlotSize;
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // Move argument from phys reg -> virt reg
+ unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
+ RegInfo.addLiveIn(ArgRegs[i], VReg);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ // Move argument from virt reg -> stack
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else {
+ // This will point to the next argument passed via stack.
+ XFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+ true));
+ }
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_XCore);
+}
+
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Return on XCore is always a "retsp 0"
+ RetOps.push_back(DAG.getConstant(0, MVT::i32));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == XCore::SELECT_CC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case XCoreISD::LADD: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
+
+ // fold (ladd 0, 0, x) -> 0, x & 1
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
+ DAG.getConstant(1, VT));
+ SDValue Ops[] = { Result, Carry };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+
+ // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
+ SDValue Ops[] = { Result, Carry };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LSUB: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
+ SDValue Borrow = N2;
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, VT), N2);
+ SDValue Ops[] = { Result, Borrow };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+
+ // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
+ SDValue Borrow = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
+ SDValue Ops[] = { Result, Borrow };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LMUL: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ // Canonicalize multiplicative constant to RHS. If both multiplicative
+ // operands are constant canonicalize smallest to RHS.
+ if ((N0C && !N1C) ||
+ (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+ return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT),
+ N1, N0, N2, N3);
+
+ // lmul(x, 0, a, b)
+ if (N1C && N1C->isNullValue()) {
+ // If the high result is unused fold to add(a, b)
+ if (N->hasNUsesOfValue(0, 0)) {
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+ SDValue Ops[] = { Lo, Lo };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ // Otherwise fold to ladd(a, b, 0)
+ SDValue Result =
+ DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ SDValue Carry(Result.getNode(), 1);
+ SDValue Ops[] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ break;
+ case ISD::ADD: {
+ // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b). The high result of lmul will be ignored.
+ // This is only profitable if the intermediate results are unused
+ // elsewhere.
+ SDValue Mul0, Mul1, Addend0, Addend1;
+ if (N->getValueType(0) == MVT::i32 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+ SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0,
+ Mul1, Addend0, Addend1);
+ SDValue Result(Ignored.getNode(), 1);
+ return Result;
+ }
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b) if all operands are zero-extended. We do this
+ // before type legalization as it is messy to match the operands after
+ // that.
+ if (N->getValueType(0) == MVT::i64 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+ DAG.MaskedValueIsZero(Mul0, HighMask) &&
+ DAG.MaskedValueIsZero(Mul1, HighMask) &&
+ DAG.MaskedValueIsZero(Addend0, HighMask) &&
+ DAG.MaskedValueIsZero(Addend1, HighMask)) {
+ SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul0, DAG.getConstant(0, MVT::i32));
+ SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul1, DAG.getConstant(0, MVT::i32));
+ SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend0, DAG.getConstant(0, MVT::i32));
+ SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend1, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+ Addend0L, Addend1L);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ }
+ break;
+ case ISD::STORE: {
+ // Replace unaligned store of unaligned load with memmove.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!DCI.isBeforeLegalize() ||
+ allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+ ST->isVolatile() || ST->isIndexed()) {
+ break;
+ }
+ SDValue Chain = ST->getChain();
+
+ unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+ if (StoreBits % 8) {
+ break;
+ }
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(
+ ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+ unsigned Alignment = ST->getAlignment();
+ if (Alignment >= ABIAlignment) {
+ break;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+ if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+ LD->getAlignment() == Alignment &&
+ !LD->isVolatile() && !LD->isIndexed() &&
+ Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+ return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+ LD->getBasePtr(),
+ DAG.getConstant(StoreBits/8, MVT::i32),
+ Alignment, false, ST->getPointerInfo(),
+ LD->getPointerInfo());
+ }
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case XCoreISD::LADD:
+ case XCoreISD::LSUB:
+ if (Op.getResNo() == 1) {
+ // Top bits of carry / borrow are clear.
+ KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
+ KnownZero.getBitWidth() - 1);
+ }
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+ return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+ return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+ return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
+
+ const DataLayout *TD = TM.getDataLayout();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (AM.BaseGV) {
+ return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
+ AM.BaseOffs%4 == 0;
+ }
+
+ switch (Size) {
+ case 1:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs(AM.BaseOffs);
+ }
+ // reg + reg
+ return AM.Scale == 1 && AM.BaseOffs == 0;
+ case 2:
+ case 3:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs2(AM.BaseOffs);
+ }
+ // reg + reg<<1
+ return AM.Scale == 2 && AM.BaseOffs == 0;
+ default:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs4(AM.BaseOffs);
+ }
+ // reg + reg<<2
+ return AM.Scale == 4 && AM.BaseOffs == 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ return std::make_pair(0U, &XCore::GRRegsRegClass);
+ }
+ }
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 000000000000..8d258f5054c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,199 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "XCore.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+ // Forward delcarations
+ class XCoreSubtarget;
+ class XCoreTargetMachine;
+
+ namespace XCoreISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Branch and link (call)
+ BL,
+
+ // pc relative address
+ PCRelativeWrapper,
+
+ // dp relative address
+ DPRelativeWrapper,
+
+ // cp relative address
+ CPRelativeWrapper,
+
+ // Store word to stack
+ STWSP,
+
+ // Corresponds to retsp instruction
+ RETSP,
+
+ // Corresponds to LADD instruction
+ LADD,
+
+ // Corresponds to LSUB instruction
+ LSUB,
+
+ // Corresponds to LMUL instruction
+ LMUL,
+
+ // Corresponds to MACCU instruction
+ MACCU,
+
+ // Corresponds to MACCS instruction
+ MACCS,
+
+ // Corresponds to CRC8 instruction
+ CRC8,
+
+ // Jumptable branch.
+ BR_JT,
+
+ // Jumptable branch using long branches for each entry.
+ BR_JT32
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class XCoreTargetLowering : public TargetLowering
+ {
+ public:
+
+ explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const;
+
+ private:
+ const XCoreTargetMachine &TM;
+ const XCoreSubtarget &Subtarget;
+
+ // Lower Operand helpers
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+ SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const;
+
+ // Lower Operand specifics
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
+ // Inline asm support
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Expand specifics
+ SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+ SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const;
+ };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 000000000000..379cc39aa617
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,277 @@
+//===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "XCore";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Size = sz;
+ field bits<32> SoftFail = 0;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<0, outs, ins, asmstr, pattern> {
+ let isPseudo = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode3RInstruction";
+}
+
+// 3R with first operand as an immediate. Used for TSETR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F3RImm<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode3RImmInstruction";
+}
+
+class _FL3R<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL3RInstruction";
+}
+
+// L3R with first operand as both a source and a destination.
+class _FL3RSrcDst<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _FL3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL3RSrcDstInstruction";
+}
+
+class _F2RUS<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode2RUSInstruction";
+}
+
+// 2RUS with bitp operand
+class _F2RUSBitp<bits<5> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _F2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RUSBitpInstruction";
+}
+
+class _FL2RUS<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL2RUSInstruction";
+}
+
+// L2RUS with bitp operand
+class _FL2RUSBitp<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL2RUSBitpInstruction";
+}
+
+class _FRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<6> b;
+
+ let Inst{15-10} = opc;
+ let Inst{9-6} = a;
+ let Inst{5-0} = b;
+}
+
+class _FLRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<16> b;
+
+ let Inst{31-26} = opc;
+ let Inst{25-22} = a;
+ let Inst{21-16} = b{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = b{15-6};
+}
+
+class _FU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<6> a;
+
+ let Inst{15-6} = opc;
+ let Inst{5-0} = a;
+}
+
+class _FLU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<16> a;
+
+ let Inst{31-22} = opc;
+ let Inst{21-16} = a{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{15-6};
+}
+
+class _FU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<10> a;
+
+ let Inst{15-10} = opc;
+ let Inst{9-0} = a;
+}
+
+class _FLU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<20> a;
+
+ let Inst{31-26} = opc;
+ let Inst{25-16} = a{9-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{19-10};
+}
+
+class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "Decode2RInstruction";
+}
+
+// 2R with first operand as an immediate. Used for TSETMR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F2RImm<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RImmInstruction";
+}
+
+// 2R with first operand as both a source and a destination.
+class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RSrcDstInstruction";
+}
+
+// Same as 2R with last two operands swapped
+class _FR2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeR2RInstruction";
+}
+
+class _FRUS<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeRUSInstruction";
+}
+
+// RUS with bitp operand
+class _FRUSBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSBitpInstruction";
+}
+
+// RUS with first operand as both a source and a destination and a bitp second
+// operand
+class _FRUSSrcDstBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSSrcDstBitpInstruction";
+}
+
+class _FL2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{9-5};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{4-1};
+
+ let Inst{15-11} = 0b11111;
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeL2RInstruction";
+}
+
+// Same as L2R with last two operands swapped
+class _FLR2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _FL2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeLR2RInstruction";
+}
+
+class _F1R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+
+ let Inst{15-11} = opc{5-1};
+ let Inst{10-5} = 0b111111;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = a;
+}
+
+class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{9-5};
+ let Inst{10-5} = 0b111111;
+ let Inst{4-0} = opc{4-0};
+}
+
+class _FL4R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> d;
+
+ let Inst{31-27} = opc{5-1};
+ let Inst{26-21} = 0b111111;
+ let Inst{20} = opc{0};
+ let Inst{19-16} = d;
+ let Inst{15-11} = 0b11111;
+}
+
+// L4R with 4th operand as both a source and a destination.
+class _FL4RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstInstruction";
+}
+
+// L4R with 1st and 4th operand as both a source and a destination.
+class _FL4RSrcDstSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction";
+}
+
+class _FL5R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{5-1};
+ let Inst{20} = opc{0};
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL5RInstruction";
+}
+
+class _FL6R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc;
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL6RInstruction";
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 000000000000..e457e0dbf027
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,406 @@
+//===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+namespace XCore {
+
+ // XCore Condition Codes
+ enum CondCode {
+ COND_TRUE,
+ COND_FALSE,
+ COND_INVALID
+ };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo()
+ : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::STWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+ return BrOpc == XCore::BRFU_u6
+ || BrOpc == XCore::BRFU_lu6
+ || BrOpc == XCore::BRBU_u6
+ || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+ return BrOpc == XCore::BRFT_ru6
+ || BrOpc == XCore::BRFT_lru6
+ || BrOpc == XCore::BRBT_ru6
+ || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+ return BrOpc == XCore::BRFF_ru6
+ || BrOpc == XCore::BRFF_lru6
+ || BrOpc == XCore::BRBF_ru6
+ || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+ return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+static inline bool IsBR_JT(unsigned BrOpc) {
+ return BrOpc == XCore::BR_JT
+ || BrOpc == XCore::BR_JT32;
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ if (IsBRT(BrOpc)) {
+ return XCore::COND_TRUE;
+ } else if (IsBRF(BrOpc)) {
+ return XCore::COND_FALSE;
+ } else {
+ return XCore::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::BRFT_lru6;
+ case XCore::COND_FALSE : return XCore::BRFF_lru6;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::COND_FALSE;
+ case XCore::COND_FALSE : return XCore::COND_TRUE;
+ }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (IsBRU(LastInst->getOpcode())) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == XCore::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ // If the block ends with conditional branch followed by unconditional,
+ // handle it.
+ if (BranchCode != XCore::COND_INVALID
+ && IsBRU(LastInst->getOpcode())) {
+
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(SecondLastInst->getOperand(0));
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (IsBRU(SecondLastInst->getOpcode()) &&
+ IsBRU(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Unexpected number of components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ assert(Cond.size() == 2 && "Unexpected number of components!");
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
+ return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!IsCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+ bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
+
+ if (GRDest && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ if (GRDest && SrcReg == XCore::SP) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+ return;
+ }
+
+ if (DestReg == XCore::SP && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::STWFI))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+MachineInstr*
+XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert((Cond.size() == 2) &&
+ "Invalid XCore branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 000000000000..42eeed8370f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,93 @@
+//===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+
+class XCoreInstrInfo : public XCoreGenInstrInfo {
+ const XCoreRegisterInfo RI;
+public:
+ XCoreInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 000000000000..03653cb2b3de
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,1267 @@
+//===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>;
+
+def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def SDT_XCoreAddress : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+ // Transformation function: imm/4
+ assert(N->getZExtValue() % 4 == 0);
+ return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+ // Transformation function: get the size of a mask
+ assert(isMask_32(N->getZExtValue()));
+ // look for the first non-zero bit
+ return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(-value);
+}]>;
+
+def bpwsub_xform : SDNodeXForm<imm, [{
+ // Transformation function: 32-imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(32-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm/4
+ uint32_t value = N->getZExtValue();
+ assert(-value % 4 == 0);
+ return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+ return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
+
+def immBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 1 && value <= 8)
+ || value == 16
+ || value == 24
+ || value == 32;
+}]>;
+
+def immBpwSubBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 24 && value <= 31)
+ || value == 16
+ || value == 8
+ || value == 0;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+ []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+ []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let DecoderMethod = "DecodeMEMiiOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+// Jump tables.
+def InlineJT : Operand<i32> {
+ let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+ let PrintMethod = "printInlineJT32";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<bits<5> opc1, bits<5> opc2, string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<bits<5> opc1, bits<5> opc2, string OpcStr> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
+}
+
+multiclass F3R_2RBITP<bits<5> opc1, bits<5> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<bits<5> opc, string OpcStr, SDNode OpNode> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<bits<5> opc, string OpcStr> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<bits<9> opc, string OpcStr, SDNode OpNode> :
+ _FL3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
+}
+
+multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
+}
+
+multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
+ def _lru6: _FLRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
+}
+
+// U6
+multiclass FU6_LU6<bits<10> opc, string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU16:$a)]>;
+}
+
+multiclass FU6_LU6_int<bits<10> opc, string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU16:$a)]>;
+}
+
+multiclass FU6_LU6_np<bits<10> opc, string OpcStr> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
+}
+
+// Two operand short
+
+class F2R_np<bits<6> opc, string OpcStr> :
+ _F2R<opc, (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"), []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+ "# ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "# ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "# LDWFI $dst, $addr",
+ [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "# LDAWFI $dst, $addr",
+ [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+ "# STWFI $src, $addr",
+ [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
+ def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+ (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+ "# SELECT_CC PSEUDO!",
+ [(set GRRegs:$dst,
+ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>;
+defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">;
+def LSS_3r : F3R_np<0b11000, "lss">;
+def LSU_3r : F3R_np<0b11001, "lsu">;
+}
+def AND_3r : F3R<0b00111, "and", and>;
+def OR_3r : F3R<0b01000, "or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]", []>;
+
+def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]", []>;
+
+def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]", []>;
+
+def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]", []>;
+}
+
+let mayStore=1 in {
+def STW_l3r : _FL3R<0b000001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]", []>;
+
+def STW_2rus : _F2RUS<0b0000, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]", []>;
+}
+
+defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>;
+defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>;
+
+// The first operand is treated as an immediate since it refers to a register
+// number in another thread.
+def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c),
+ "set t[$c]:r$a, $b", []>;
+
+// Three operand long
+def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]", []>;
+
+def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]", []>;
+
+def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<0b001111100, "mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>;
+def DIVU_l3r : FL3R<0b010011100, "divu", udiv>;
+def REMS_l3r : FL3R<0b110001100, "rems", srem>;
+def REMU_l3r : FL3R<0b110011100, "remu", urem>;
+}
+def XOR_l3r : FL3R<0b000011100, "xor", xor>;
+defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>;
+
+let Constraints = "$src1 = $dst" in
+def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc32 $dst, $src2, $src3",
+ [(set GRRegs:$dst,
+ (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+ GRRegs:$src3))]>;
+
+let mayStore=1 in {
+def ST16_l3r : _FL3R<0b100001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]", []>;
+
+def ST8_l3r : _FL3R<0b100011100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]", []>;
+}
+
+def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a),
+ (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c",
+ []>;
+
+def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs),
+ (ins GRRegs:$a, GRRegs:$b, i32imm:$c),
+ "outpw res[$b], $a, $c", []>;
+
+// Four operand long
+let Constraints = "$e = $a,$f = $b" in {
+def MACCU_l4r : _FL4RSrcDstSrcDst<
+ 0b000001, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>;
+
+def MACCS_l4r : _FL4RSrcDstSrcDst<
+ 0b000010, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>;
+}
+
+let Constraints = "$e = $b" in
+def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$c, GRRegs:$d),
+ "crc8 $b, $a, $c, $d", []>;
+
+// Five operand long
+
+def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst2, $dst1, $src1, $src2, $src3",
+ []>;
+
+def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst2, $dst1, $src1, $src2, $src3", []>;
+
+def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldivu $dst1, $dst2, $src3, $src1, $src2", []>;
+
+// Six operand long
+
+def LMUL_l6r : _FL6R<
+ 0b00000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]", []>;
+
+let isReMaterializable = 1 in
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]",
+ [(set RRegs:$a, ADDRdpii:$b)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]", []>;
+
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]",
+ [(set RRegs:$a, (load ADDRdpii:$b))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]", []>;
+
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]",
+ [(store RRegs:$a, ADDRdpii:$b)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
+defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp RRegs:$a, immU6:$b)]>;
+
+def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp RRegs:$a, immU16:$b)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
+
+def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
+
+def LDAWSP_lru6 : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set RRegs:$a, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set RRegs:$a, immU16:$b)]>;
+}
+
+def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU6:$b)]>;
+
+def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU16:$b)]>;
+
+// Operand register - U6
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<0b011100, "bt">;
+defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">;
+defm BRFF: FRU6_LRU6_branch<0b011110, "bf">;
+defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">;
+
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
+defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>;
+}
+}
+
+let neverHasSideEffects = 1 in
+defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">;
+
+let Uses = [R11], isCall=1 in
+defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+
+def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
+
+def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
+}
+
+//let Uses = [CP] in ...
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+let Defs = [R11] in
+defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
+
+defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ isCodeGenOnly = 1 in {
+defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">;
+defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">;
+}
+
+defm KCALL : FU6_LU6_np<0b0111001111, "kcall">;
+
+let Uses = [SP], Defs = [SP], mayStore = 1 in
+defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">;
+
+let Uses = [SP], Defs = [SP], mayLoad = 1 in
+defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
+
+// U10
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
+
+let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tblockaddress:$a))]>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
+def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU10:$a)]>;
+
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU20:$a)]>;
+}
+
+let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1 in {
+def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>;
+
+def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]",
+ []>;
+}
+
+// Two operand short
+def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<0b100100, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+let Constraints = "$src1 = $dst" in {
+def SEXT_rus :
+ _FRUSSrcDstBitp<0b001101, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r :
+ _F2RSrcDst<0b001100, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ZEXT_rus :
+ _FRUSSrcDstBitp<0b010001, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r :
+ _F2RSrcDst<0b010000, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ANDNOT_2r :
+ _F2RSrcDst<0b001010, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size", []>;
+
+def MKMSK_2r : _F2R<0b101000, (outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
+
+def GETR_rus : _FRUS<0b100000, (outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def GETTS_2r : _F2R<0b001110, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _FR2R<0b001111, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_2r : _F2R<0b010010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _FRUS<0b010011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _FR2R<0b000011, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _FR2R<0b101010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+let Constraints = "$src = $dst" in
+def OUTSHR_2r :
+ _F2RSrcDst<0b101011, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
+def INCT_2r : _F2R<0b100001, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<0b100011, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<0b101100, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "in $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+let Constraints = "$src = $dst" in
+def INSHR_2r :
+ _F2RSrcDst<0b101101, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
+def CHKCT_2r : _F2R<0b110010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _FRUSBitp<0b110011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def TESTCT_2r : _F2R<0b101111, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "testct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
+
+def TESTWCT_2r : _F2R<0b110001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "testwct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
+
+def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
+def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "getst $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<0b000100, (outs), (ins GRRegs:$src, GRRegs:$t),
+ "init t[$t]:sp, $src",
+ [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<0b000000, (outs), (ins GRRegs:$src, GRRegs:$t),
+ "init t[$t]:pc, $src",
+ [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<0b000110, (outs), (ins GRRegs:$src, GRRegs:$t),
+ "init t[$t]:cp, $src",
+ [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<0b000010, (outs), (ins GRRegs:$src, GRRegs:$t),
+ "init t[$t]:dp, $src",
+ [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
+def PEEK_2r : _F2R<0b101110, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
+def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eef $a, res[$b]", []>;
+
+def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eet $a, res[$b]", []>;
+
+def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b),
+ "tsetmr r$a, $b", []>;
+
+// Two operand long
+def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getd $dst, res[$src]", []>;
+
+def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getn $dst, res[$src]", []>;
+
+def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
+def SETTW_l2r : _FLR2R<0b0010011001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
+def GETPS_l2r : _FL2R<0b0001011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FLR2R<0b0001111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<0b0001011000, (outs), (ins GRRegs:$src, GRRegs:$t),
+ "init t[$t]:lr, $src",
+ [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setn res[$src1], $src2", []>;
+
+def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "testlcl $dst, res[$src]", []>;
+
+// One operand short
+def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a),
+ "msync res[$a]",
+ [(int_xcore_msync GRRegs:$a)]>;
+def MJOIN_1r : _F1R<0b000101, (outs), (ins GRRegs:$a),
+ "mjoin res[$a]",
+ [(int_xcore_mjoin GRRegs:$a)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BAU_1r : _F1R<0b001001, (outs), (ins GRRegs:$a),
+ "bau $a",
+ [(brind GRRegs:$a)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BRU_1r : _F1R<0b001010, (outs), (ins GRRegs:$a), "bru $a", []>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
+
+let hasCtrlDep = 1 in
+def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
+ "ecallt $a",
+ []>;
+
+let hasCtrlDep = 1 in
+def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a),
+ "ecallf $a",
+ []>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
+def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a),
+ "bla $a",
+ [(XCoreBranchLink GRRegs:$a)]>;
+}
+
+def SYNCR_1r : _F1R<0b100001, (outs), (ins GRRegs:$a),
+ "syncr res[$a]",
+ [(int_xcore_syncr GRRegs:$a)]>;
+
+def FREER_1r : _F1R<0b000100, (outs), (ins GRRegs:$a),
+ "freer res[$a]",
+ [(int_xcore_freer GRRegs:$a)]>;
+
+let Uses=[R11] in {
+def SETV_1r : _F1R<0b010001, (outs), (ins GRRegs:$a),
+ "setv res[$a], r11",
+ [(int_xcore_setv GRRegs:$a, R11)]>;
+
+def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a),
+ "setev res[$a], r11",
+ [(int_xcore_setev GRRegs:$a, R11)]>;
+}
+
+def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>;
+
+def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>;
+
+def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a),
+ "eeu res[$a]",
+ [(int_xcore_eeu GRRegs:$a)]>;
+
+def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>;
+
+def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>;
+
+def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>;
+
+def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>;
+
+def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>;
+
+// Zero operand short
+
+def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>;
+
+def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>;
+
+let Defs = [SP], Uses = [SP] in
+def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>;
+
+let Defs = [SP] in
+def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>;
+
+def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>;
+
+def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>;
+
+let Defs = [R11] in {
+def GETID_0R : _F0R<0b0001001110, (outs), (ins),
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+def GETED_0R : _F0R<0b0000111110, (outs), (ins),
+ "get r11, ed",
+ [(set R11, (int_xcore_geted))]>;
+
+def GETET_0R : _F0R<0b0000111111, (outs), (ins),
+ "get r11, et",
+ [(set R11, (int_xcore_getet))]>;
+
+def GETKEP_0R : _F0R<0b0001001111, (outs), (ins),
+ "get r11, kep", []>;
+
+def GETKSP_0R : _F0R<0b0001011100, (outs), (ins),
+ "get r11, ksp", []>;
+}
+
+let Defs = [SP] in
+def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>;
+
+let Uses = [SP], mayLoad = 1 in {
+def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>;
+
+def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>;
+
+def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>;
+
+def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>;
+}
+
+let Uses=[R11] in
+def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>;
+
+def SSYNC_0r : _F0R<0b0000001110, (outs), (ins),
+ "ssync",
+ [(int_xcore_ssync)]>;
+
+let Uses = [SP], mayStore = 1 in {
+def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>;
+
+def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>;
+
+def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>;
+
+def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>;
+}
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ hasSideEffects = 1 in
+def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+ (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+ (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+ (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+ (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+ (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+ (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+ (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+ (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+ (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+ (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+ (ASHR_l2rus GRRegs:$src, 32)>;
+
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+ (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+ (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+ (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
new file mode 100644
index 000000000000..f96eda9fcb9f
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower XCore MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+#include "XCoreMCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter)
+: Printer(asmprinter) {}
+
+void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) {
+ Mang = M;
+ Ctx = C;
+}
+
+MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy,
+ unsigned Offset) const {
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ const MCSymbol *Symbol;
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = Printer.GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = Printer.GetCPISymbol(MO.getIndex());
+ Offset += MO.getOffset();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+ if (!Offset)
+ return MCOperand::CreateExpr(MCSym);
+
+ // Assume offset is never negative.
+ assert(Offset > 0);
+
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
+}
+
+MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
+ unsigned offset) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm() + offset);
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, offset);
+ case MachineOperand::MO_RegisterMask:
+ break;
+ }
+
+ return MCOperand();
+}
+
+void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MO);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
new file mode 100644
index 000000000000..28e702bb9884
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
@@ -0,0 +1,42 @@
+//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCINSTLOWER_H
+#define XCOREMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class AsmPrinter;
+
+/// \brief This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext *Ctx;
+ Mangler *Mang;
+ AsmPrinter &Printer;
+public:
+ XCoreMCInstLower(class AsmPrinter &asmprinter);
+ void Initialize(Mangler *mang, MCContext *C);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy, unsigned Offset) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..7ca06729120e
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void XCoreFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 000000000000..69d5de3e03ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+ bool UsesLR;
+ int LRSpillSlot;
+ int FPSpillSlot;
+ int VarArgsFrameIndex;
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
+
+public:
+ XCoreFunctionInfo() :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ explicit XCoreFunctionInfo(MachineFunction &MF) :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ ~XCoreFunctionInfo() {}
+
+ void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+
+ void setUsesLR(bool val) { UsesLR = val; }
+ bool getUsesLR() const { return UsesLR; }
+
+ void setLRSpillSlot(int off) { LRSpillSlot = off; }
+ int getLRSpillSlot() const { return LRSpillSlot; }
+
+ void setFPSpillSlot(int off) { FPSpillSlot = off; }
+ int getFPSpillSlot() const { return FPSpillSlot; }
+
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
+ return SpillLabels;
+ }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 000000000000..49b563497c0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,259 @@
+//===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCore.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+ : XCoreGenRegisterInfo(XCore::LR), TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+ return MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+}
+
+const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const uint16_t CalleeSavedRegs[] = {
+ XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+ XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+ 0
+ };
+ return CalleeSavedRegs;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(XCore::CP);
+ Reserved.set(XCore::DP);
+ Reserved.set(XCore::SP);
+ Reserved.set(XCore::LR);
+ if (TFI->hasFP(MF)) {
+ Reserved.set(XCore::R10);
+ }
+ return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // TODO can we estimate stack size?
+ return TFI->hasFP(MF);
+}
+
+bool
+XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return requiresRegisterScavenging(MF);
+}
+
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ return false;
+}
+
+void
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ MachineOperand &FrameOp = MI.getOperand(FIOperandNum);
+ int FrameIndex = FrameOp.getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int StackSize = MF.getFrameInfo()->getStackSize();
+
+ #ifndef NDEBUG
+ DEBUG(errs() << "\nFunction : "
+ << MF.getName() << "\n");
+ DEBUG(errs() << "<--------->\n");
+ DEBUG(MI.print(errs()));
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n");
+ DEBUG(errs() << "FrameOffset : " << Offset << "\n");
+ DEBUG(errs() << "StackSize : " << StackSize << "\n");
+ #endif
+
+ Offset += StackSize;
+
+ unsigned FrameReg = getFrameRegister(MF);
+
+ // Special handling of DBG_VALUE instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // fold constant into offset.
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ Offset/=4;
+
+ bool FP = TFI->hasFP(MF);
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+ assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (FP) {
+ bool isUs = isImmUs(Offset);
+
+ if (!isUs) {
+ if (!RS)
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+ unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II,
+ SPAdj);
+ loadConstant(MBB, II, ScratchReg, Offset, dl);
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ } else {
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ } else {
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+
+ switch (MI.getOpcode()) {
+ int NewOpcode;
+ case XCore::LDWFI:
+ NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode))
+ .addReg(Reg, getKillRegState(isKill))
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ // Erase old instruction.
+ MBB.erase(II);
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const {
+ // TODO use mkmsk if possible.
+ if (!isImmU16(Value)) {
+ // TODO use constant pool.
+ report_fatal_error("loadConstant value too big " + Twine(Value));
+ }
+ int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 000000000000..1db32489cf8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,70 @@
+//===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+ void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+ void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+ XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //! Return whether to emit frame moves
+ static bool needsFrameMoves(const MachineFunction &MF);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 000000000000..6694b2882aca
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,59 @@
+//===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the XCore register file
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+ let Num = num;
+}
+
+// CPU registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+ // Return values and arguments
+ (add R0, R1, R2, R3,
+ // Callee save
+ R4, R5, R6, R7, R8, R9, R10,
+ // Not preserved across procedure calls
+ R11)>;
+
+// Reserved
+def RRegs : RegisterClass<"XCore", [i32], 32,
+ (add R0, R1, R2, R3,
+ R4, R5, R6, R7, R8, R9, R10,
+ R11, CP, DP, SP, LR)> {
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..44aeb6057ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreTargetMachine.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 000000000000..0386968638bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine;
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+ ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 000000000000..8cfb77089f31
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,30 @@
+//===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+void XCoreSubtarget::anchor() { }
+
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+ const std::string &CPU, const std::string &FS)
+ : XCoreGenSubtargetInfo(TT, CPU, FS)
+{
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 000000000000..5ac4dbc4bc07
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,43 @@
+//===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
+ virtual void anchor();
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ XCoreSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 000000000000..28c3d12c05fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DL("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ "i16:16:32-i32:32:32-i64:32:32-n32"),
+ InstrInfo(),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this) {
+}
+
+namespace {
+/// XCore Code Generator Pass Configuration Options.
+class XCorePassConfig : public TargetPassConfig {
+public:
+ XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ XCoreTargetMachine &getXCoreTargetMachine() const {
+ return getTM<XCoreTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+};
+} // namespace
+
+TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new XCorePassConfig(this, PM);
+}
+
+bool XCorePassConfig::addInstSelector() {
+ addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+ RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 000000000000..eb9a1aa420eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,64 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "XCoreFrameLowering.h"
+#include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSelectionDAGInfo.h"
+#include "XCoreSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+ XCoreSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameLowering FrameLowering;
+ XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
+public:
+ XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const XCoreFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const XCoreTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 000000000000..820389935b38
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ DataSection =
+ Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getDataRel());
+ BSSSection =
+ Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getBSS());
+
+ MergeableConst4Section =
+ Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst4());
+ MergeableConst8Section =
+ Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst8());
+ MergeableConst16Section =
+ Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst16());
+
+ // TLS globals are lowered in the backend to arrays indexed by the current
+ // thread id. After lowering they require no special handling by the linker
+ // and can be placed in the standard data / bss sections.
+ TLSDataSection = DataSection;
+ TLSBSSSection = BSSSection;
+
+ ReadOnlySection =
+ Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getReadOnlyWithRel());
+
+ // Dynamic linking is not supported. Data with relocations is placed in the
+ // same section as data without relocations.
+ DataRelSection = DataRelLocalSection = DataSection;
+ DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 000000000000..27875e783b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,25 @@
+//===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ // TODO: Classify globals as xcore wishes.
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 000000000000..e6fa4edf612e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,897 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments. In
+// practice, this means looking for internal functions that have pointer
+// arguments. If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value. This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded. Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently. This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+
+namespace {
+ /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+ ///
+ struct ArgPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned maxElements = 3)
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// A vector used to hold the indices of a single GEP instruction
+ typedef std::vector<uint64_t> IndicesVector;
+
+ private:
+ CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ CallGraphNode *DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
+ };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+ return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = false, LocalChange;
+
+ do { // Iterate until we stop promoting from this SCC.
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ if (CallGraphNode *CGN = PromoteArguments(*I)) {
+ LocalChange = true;
+ SCC.ReplaceNode(*I, CGN);
+ }
+ }
+ Changed |= LocalChange; // Remember that we changed something.
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ // Make sure that it is local to this module.
+ if (!F || !F->hasLocalLinkage()) return 0;
+
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (I->getType()->isPointerTy())
+ PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+ if (PointerArgs.empty()) return 0;
+
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ // Must be a direct call.
+ if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
+
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument*, 8> ArgsToPromote;
+ SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+ for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+ bool isByVal=F->getAttributes().
+ hasAttribute(PointerArgs[i].second+1, Attribute::ByVal);
+ Argument *PtrArg = PointerArgs[i].first;
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe.
+ if (isByVal) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName() << "' because it would require adding more"
+ << " than " << maxElements << " arguments to the function.\n");
+ continue;
+ }
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
+ }
+ }
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
+ }
+ }
+
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (STy->getElementType(i) == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
+ }
+ }
+
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, isByVal))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return 0;
+
+ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+ Function *Callee = Arg->getParent();
+
+ unsigned ArgNo = std::distance(Callee->arg_begin(),
+ Function::arg_iterator(Arg));
+
+ // Look at all call sites of the function. At this pointer we know we only
+ // have direct callees.
+ for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ assert(CS && "Should only have direct calls!");
+
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+ const ArgPromotion::IndicesVector &Longer) {
+ if (Prefix.size() > Longer.size())
+ return false;
+ return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+ std::set<ArgPromotion::IndicesVector> &Set) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+ std::set<ArgPromotion::IndicesVector> &Safe) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Safe.upper_bound(ToMark);
+ // Guard against the case where Safe is empty
+ if (Low != Safe.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This
+ // means it points to a prefix of Indices (possibly Indices itself), if
+ // such prefix exists.
+ if (Low != Safe.end()) {
+ if (IsPrefix(*Low, ToMark))
+ // If there is already a prefix of these indices (or exactly these
+ // indices) marked a safe, don't bother adding these indices
+ return;
+
+ // Increment Low, so we can use it as a "insert before" hint
+ ++Low;
+ }
+ // Insert
+ Low = Safe.insert(Low, ToMark);
+ ++Low;
+ // If there we're a prefix of longer index list(s), remove those
+ std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ while (Low != End && IsPrefix(ToMark, *Low)) {
+ std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ ++Low;
+ Safe.erase(Remove);
+ }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+ typedef std::set<IndicesVector> GEPIndicesSet;
+
+ // Quick exit for unused arguments
+ if (Arg->use_empty())
+ return true;
+
+ // We can only promote this argument if all of the uses are loads, or are GEP
+ // instructions (with constant indices) that are subsequently loaded.
+ //
+ // Promoting the argument causes it to be loaded in the caller
+ // unconditionally. This is only safe if we can prove that either the load
+ // would have happened in the callee anyway (ie, there is a load in the entry
+ // block) or the pointer passed in at every call site is guaranteed to be
+ // valid.
+ // In the former case, invalid loads can happen, but would have happened
+ // anyway, in the latter case, invalid loads won't happen. This prevents us
+ // from introducing an invalid load that wouldn't have happened in the
+ // original code.
+ //
+ // This set will contain all sets of indices that are loaded in the entry
+ // block, and thus are safe to unconditionally load in the caller.
+ GEPIndicesSet SafeToUnconditionallyLoad;
+
+ // This set contains all the sets of indices that we are planning to promote.
+ // This makes it possible to limit the number of arguments added.
+ GEPIndicesSet ToPromote;
+
+ // If the pointer is always valid, any load with first index 0 is valid.
+ if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ // First, iterate the entry block and mark loads of (geps of) arguments as
+ // safe.
+ BasicBlock *EntryBlock = Arg->getParent()->begin();
+ // Declare this here so we can reuse it
+ IndicesVector Indices;
+ for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+ I != E; ++I)
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *V = LI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ V = GEP->getPointerOperand();
+ if (V == Arg) {
+ // This load actually loads (part of) Arg? Check the indices then.
+ Indices.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+ Indices.push_back(CI->getSExtValue());
+ else
+ // We found a non-constant GEP index for this argument? Bail out
+ // right away, can't promote this argument at all.
+ return false;
+
+ // Indices checked out, mark them as safe
+ MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ Indices.clear();
+ }
+ } else if (V == Arg) {
+ // Direct loads are equivalent to a GEP with a single 0 index.
+ MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ }
+ }
+
+ // Now, iterate all uses of the argument to see if there are any uses that are
+ // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ SmallVector<LoadInst*, 16> Loads;
+ IndicesVector Operands;
+ for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ Operands.clear();
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
+ Loads.push_back(LI);
+ // Direct loads are equivalent to a GEP with a zero index and then a load.
+ Operands.push_back(0);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEP->use_empty()) {
+ // Dead GEP's cause trouble later. Just remove them if we run into
+ // them.
+ getAnalysis<AliasAnalysis>().deleteValue(GEP);
+ GEP->eraseFromParent();
+ // TODO: This runs the above loop over and over again for dead GEPs
+ // Couldn't we just do increment the UI iterator earlier and erase the
+ // use?
+ return isSafeToPromoteArgument(Arg, isByVal);
+ }
+
+ // Ensure that all of the indices are constants.
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+ Operands.push_back(C->getSExtValue());
+ else
+ return false; // Not a constant operand GEP!
+
+ // Ensure that the only users of the GEP are load instructions.
+ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+ UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
+ Loads.push_back(LI);
+ } else {
+ // Other uses than load?
+ return false;
+ }
+ } else {
+ return false; // Not a load or a GEP.
+ }
+
+ // Now, see if it is safe to promote this load / loads of this GEP. Loading
+ // is safe if Operands, or a prefix of Operands, is marked as safe.
+ if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ return false;
+
+ // See if we are already promoting a load with these indices. If not, check
+ // to make sure that we aren't promoting too many elements. If so, nothing
+ // to do.
+ if (ToPromote.find(Operands) == ToPromote.end()) {
+ if (maxElements > 0 && ToPromote.size() == maxElements) {
+ DEBUG(dbgs() << "argpromotion not promoting argument '"
+ << Arg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n");
+ // We limit aggregate promotion to only promoting up to a fixed number
+ // of elements of the aggregate.
+ return false;
+ }
+ ToPromote.insert(Operands);
+ }
+ }
+
+ if (Loads.empty()) return true; // No users, this is a dead argument.
+
+ // Okay, now we know that the argument is only used by load instructions and
+ // it is safe to unconditionally perform all of them. Use alias analysis to
+ // check to see if the pointer is guaranteed to not be modified from entry of
+ // the function to each of the load instructions.
+
+ // Because there could be several/many load instructions, remember which
+ // blocks we know to be transparent to the load.
+ SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ // Check to see if the load is invalidated from the start of the block to
+ // the load itself.
+ LoadInst *Load = Loads[i];
+ BasicBlock *BB = Load->getParent();
+
+ AliasAnalysis::Location Loc = AA.getLocation(Load);
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+ return false; // Pointer is invalidated!
+
+ // Now check every path from the entry block to the load for transparency.
+ // To do this, we perform a depth first search on the inverse CFG from the
+ // loading block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+ I = idf_ext_begin(P, TranspBlocks),
+ E = idf_ext_end(P, TranspBlocks); I != E; ++I)
+ if (AA.canBasicBlockModify(**I, Loc))
+ return false;
+ }
+ }
+
+ // If the path from the entry of the function to each load is free of
+ // instructions that potentially invalidate the load, we can make the
+ // transformation!
+ return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ typedef std::set<IndicesVector> ScalarizeTable;
+
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+ // Attribute - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeSet, 8> AttributesVec;
+ const AttributeSet &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getRetAttributes()));
+
+ // First, determine the new argument list
+ unsigned ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(I)) {
+ // Simple byval argument? Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Params.push_back(STy->getElementType(i));
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes(ArgIndex)) {
+ AttrBuilder B(attrs, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
+
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+ IndicesVector Indices;
+ Indices.reserve(User->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(Indices);
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(User))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(User->use_back());
+ OriginalLoads[Indices] = OrigLoad;
+ }
+
+ // Add a parameter to the function for each element passed in.
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+ assert(Params.back());
+ }
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
+
+ // Add any function attributes.
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
+ PAL.getFnAttributes()));
+
+ Type *RetTy = FTy->getReturnType();
+
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
+
+
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
+ AttributesVec.clear();
+
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Get the alias analysis information that we need to update to reflect our
+ // changes.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Get a new callgraph node for NF.
+ CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value*, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttributeSet &CallPAL = CS.getAttributes();
+
+ // Add any return attributes.
+ if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ CallPAL.getRetAttributes()));
+
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ Args.push_back(*AI); // Unmodified argument
+
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ } else if (ByValArgsToTransform.count(I)) {
+ // Emit a GEP and load for each element of the struct.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
+ (*AI)->getName()+"."+utostr(i),
+ Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value*> Ops;
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ Value *V = *AI;
+ LoadInst *OrigLoad = OriginalLoads[*SI];
+ if (!SI->empty()) {
+ Ops.reserve(SI->size());
+ Type *ElTy = V->getType();
+ for (IndicesVector::const_iterator II = SI->begin(),
+ IE = SI->end(); II != IE; ++II) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ Type *IdxTy = (ElTy->isStructTy() ?
+ Type::getInt32Ty(F->getContext()) :
+ Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, *II));
+ // Keep track of the type we're currently indexing.
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+ Ops.clear();
+ AA.copyValue(OrigLoad->getOperand(0), V);
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the TBAA info too.
+ newLoad->setMetadata(LLVMContext::MD_tbaa,
+ OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+ Args.push_back(newLoad);
+ AA.copyValue(OrigLoad, Args.back());
+ }
+ }
+
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ // Add any function attributes.
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
+ AttributesVec));
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
+ AttributesVec));
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ AttributesVec.clear();
+
+ // Update the alias analysis implementation to know that we are replacing
+ // the old call with a new one.
+ AA.replaceWithNewValue(Call, New);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+ CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I) {
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ AA.replaceWithNewValue(I, I2);
+ ++I2;
+ continue;
+ }
+
+ if (ByValArgsToTransform.count(I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = NF->begin()->begin();
+
+ // Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx =
+ GetElementPtrInst::Create(TheAlloca, Idxs,
+ TheAlloca->getName()+"."+Twine(i),
+ InsertPt);
+ I2->setName(I->getName()+"."+Twine(i));
+ new StoreInst(I2++, Idx, InsertPt);
+ }
+
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(I);
+ AA.replaceWithNewValue(I, TheAlloca);
+ continue;
+ }
+
+ if (I->use_empty()) {
+ AA.deleteValue(I);
+ continue;
+ }
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+ assert(ArgIndices.begin()->empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName()+".val");
+ LI->replaceAllUsesWith(I2);
+ AA.replaceWithNewValue(LI, I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ *It != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
+ }
+
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
+ }
+ NewName += ".val";
+ TheArg->setName(NewName);
+
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->use_back());
+ L->replaceAllUsesWith(TheArg);
+ AA.replaceWithNewValue(L, TheArg);
+ L->eraseFromParent();
+ }
+ AA.deleteValue(GEP);
+ GEP->eraseFromParent();
+ }
+ }
+
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ std::advance(I2, ArgIndices.size());
+ }
+
+ // Tell the alias analysis that the old function is about to disappear.
+ AA.replaceWithNewValue(F, NF);
+
+
+ NF_CGN->stealCalledFunctionsFrom(CG[F]);
+
+ // Now that the old function is dead, delete it. If there is a dangling
+ // reference to the CallgraphNode, just leave the dead function around for
+ // someone else to nuke.
+ CallGraphNode *CGN = CG[F];
+ if (CGN->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(CGN);
+ else
+ F->setLinkage(Function::ExternalLinkage);
+
+ return NF_CGN;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 000000000000..2e32240621f9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+ static char ID; // Pass identification.
+
+ BarrierNoop() : ModulePass(ID) {
+ initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 000000000000..8336d3ad3479
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,229 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared. This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+ struct ConstantMerge : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ // For this pass, process all of the globals in the module, eliminating
+ // duplicate constants.
+ bool runOnModule(Module &M);
+
+ // Return true iff we can determine the alignment of this global variable.
+ bool hasKnownAlignment(GlobalVariable *GV) const;
+
+ // Return the alignment of the global, including converting the default
+ // alignment to a concrete value.
+ unsigned getAlignment(GlobalVariable *GV) const;
+
+ const DataLayout *TD;
+ };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
+ if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+ return true;
+
+ if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+ return false;
+
+ return A.hasUnnamedAddr();
+}
+
+bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
+ return TD || GV->getAlignment() != 0;
+}
+
+unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ if (TD)
+ return TD->getPreferredAlignment(GV);
+ return GV->getAlignment();
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
+ // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
+ // want to merge globals of unknown alignment with those of explicit
+ // alignment. If we have DataLayout, we always know the alignment.
+ DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
+
+ // Replacements - This vector contains a list of replacements to perform.
+ SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+ bool MadeChange = false;
+
+ // Iterate constant merging while we are still making progress. Merging two
+ // constants together may allow us to merge other constants together if the
+ // second level constants have initializers which point to the globals that
+ // were just merged.
+ while (1) {
+
+ // First: Find the canonical constants others will be merged with.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // If this GV is dead, remove it.
+ GV->removeDeadConstantUsers();
+ if (GV->use_empty() && GV->hasLocalLinkage()) {
+ GV->eraseFromParent();
+ continue;
+ }
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // This transformation is legal for weak ODR globals in the sense it
+ // doesn't change semantics, but we really don't want to perform it
+ // anyway; it's likely to pessimize code generation, and some tools
+ // (like the Darwin linker in cases involving CFString) don't expect it.
+ if (GV->isWeakForLinker())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *&Slot = CMap[Pair];
+
+ // If this is the first constant we find or if the old one is local,
+ // replace with the current one. If the current is externally visible
+ // it cannot be replace, but can be the canonical constant we merge with.
+ if (Slot == 0 || IsBetterCannonical(*GV, *Slot))
+ Slot = GV;
+ }
+
+ // Second: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // We can only replace constant with local linkage.
+ if (!GV->hasLocalLinkage())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *Slot = CMap[Pair];
+
+ if (!Slot || Slot == GV)
+ continue;
+
+ if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ continue;
+
+ if (!GV->hasUnnamedAddr())
+ Slot->setUnnamedAddr(false);
+
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+
+ if (Replacements.empty())
+ return MadeChange;
+ CMap.clear();
+
+ // Now that we have figured out which replacements must be made, do them all
+ // now. This avoid invalidating the pointers in CMap, which are unneeded
+ // now.
+ for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Bump the alignment if necessary.
+ if (Replacements[i].first->getAlignment() ||
+ Replacements[i].second->getAlignment()) {
+ Replacements[i].second->setAlignment(std::max(
+ Replacements[i].first->getAlignment(),
+ Replacements[i].second->getAlignment()));
+ }
+
+ // Eliminate any uses of the dead global.
+ Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+ // Delete the global value from the module.
+ assert(Replacements[i].first->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
+ Replacements[i].first->eraseFromParent();
+ }
+
+ NumMerged += Replacements.size();
+ Replacements.clear();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 000000000000..49ef1e75f1cd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1078 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions. Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions. This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef,
+ "Number of unread args replaced with undef");
+namespace {
+ /// DAE - The dead argument elimination pass.
+ ///
+ class DAE : public ModulePass {
+ public:
+
+ /// Struct that represents (part of) either a return value or a function
+ /// argument. Used so that arguments and return values can be used
+ /// interchangeably.
+ struct RetOrArg {
+ RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ IsArg(IsArg) {}
+ const Function *F;
+ unsigned Idx;
+ bool IsArg;
+
+ /// Make RetOrArg comparable, so we can put it into a map.
+ bool operator<(const RetOrArg &O) const {
+ if (F != O.F)
+ return F < O.F;
+ else if (Idx != O.Idx)
+ return Idx < O.Idx;
+ else
+ return IsArg < O.IsArg;
+ }
+
+ /// Make RetOrArg comparable, so we can easily iterate the multimap.
+ bool operator==(const RetOrArg &O) const {
+ return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+ }
+
+ std::string getDescription() const {
+ return std::string((IsArg ? "Argument #" : "Return value #"))
+ + utostr(Idx) + " of function " + F->getName().str();
+ }
+ };
+
+ /// Liveness enum - During our initial pass over the program, we determine
+ /// that things are either alive or maybe alive. We don't mark anything
+ /// explicitly dead (even if we know they are), since anything not alive
+ /// with no registered uses (in Uses) will never be marked alive and will
+ /// thus become dead in the end.
+ enum Liveness { Live, MaybeLive };
+
+ /// Convenience wrapper
+ RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, false);
+ }
+ /// Convenience wrapper
+ RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, true);
+ }
+
+ typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+ /// This maps a return value or argument to any MaybeLive return values or
+ /// arguments it uses. This allows the MaybeLive values to be marked live
+ /// when any of its users is marked live.
+ /// For example (indices are left out for clarity):
+ /// - Uses[ret F] = ret G
+ /// This means that F calls G, and F returns the value returned by G.
+ /// - Uses[arg F] = ret G
+ /// This means that some function calls G and passes its result as an
+ /// argument to F.
+ /// - Uses[ret F] = arg F
+ /// This means that F returns one of its own arguments.
+ /// - Uses[arg F] = arg G
+ /// This means that G calls F and passes one of its own (G's) arguments
+ /// directly to F.
+ UseMap Uses;
+
+ typedef std::set<RetOrArg> LiveSet;
+ typedef std::set<const Function*> LiveFuncSet;
+
+ /// This set contains all values that have been determined to be live.
+ LiveSet LiveValues;
+ /// This set contains all values that are cannot be changed in any way.
+ LiveFuncSet LiveFunctions;
+
+ typedef SmallVector<RetOrArg, 5> UseVector;
+
+ // Map each LLVM function to corresponding metadata with debug info. If
+ // the function is replaced with another one, we should patch the pointer
+ // to LLVM function in metadata.
+ // As the code generation for module is finished (and DIBuilder is
+ // finalized) we assume that subprogram descriptors won't be changed, and
+ // they are stored in map for short duration anyway.
+ typedef DenseMap<Function*, DISubprogram> FunctionDIMap;
+ FunctionDIMap FunctionDIs;
+
+ protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+ virtual bool ShouldHackArguments() const { return false; }
+
+ private:
+ Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+ unsigned RetValNum = 0);
+ Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+ void CollectFunctionDIs(Module &M);
+ void SurveyFunction(const Function &F);
+ void MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses);
+ void MarkLive(const RetOrArg &RA);
+ void MarkLive(const Function &F);
+ void PropagateLiveness(const RetOrArg &RA);
+ bool RemoveDeadStuffFromFunction(Function *F);
+ bool DeleteDeadVarargs(Function &Fn);
+ bool RemoveDeadArgumentsFromCallers(Function &Fn);
+ };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+ /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+ /// deletes arguments to functions which are external. This is only for use
+ /// by bugpoint.
+ struct DAH : public DAE {
+ static char ID;
+ DAH() : DAE(ID) {}
+
+ virtual bool ShouldHackArguments() const { return true; }
+ };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// CollectFunctionDIs - Map each function in the module to its debug info
+/// descriptor.
+void DAE::CollectFunctionDIs(Module &M) {
+ FunctionDIs.clear();
+
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I) {
+ NamedMDNode &NMD = *I;
+ for (unsigned MDIndex = 0, MDNum = NMD.getNumOperands();
+ MDIndex < MDNum; ++MDIndex) {
+ MDNode *Node = NMD.getOperand(MDIndex);
+ if (!DIDescriptor(Node).isCompileUnit())
+ continue;
+ DICompileUnit CU(Node);
+ const DIArray &SPs = CU.getSubprograms();
+ for (unsigned SPIndex = 0, SPNum = SPs.getNumElements();
+ SPIndex < SPNum; ++SPIndex) {
+ DISubprogram SP(SPs.getElement(SPIndex));
+ if (!SP.Verify())
+ continue;
+ if (Function *F = SP.getFunction())
+ FunctionDIs[F] = SP;
+ }
+ }
+ }
+}
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+ assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+ // Ensure that the function is only directly called.
+ if (Fn.hasAddressTaken())
+ return false;
+
+ // Okay, we know we can transform this function if safe. Scan its body
+ // looking for calls to llvm.vastart.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+ }
+ }
+
+ // If we get here, there are no calls to llvm.vastart in the function body,
+ // remove the "..." and adjust all the calls.
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but doesn't have isVarArg set.
+ FunctionType *FTy = Fn.getFunctionType();
+
+ std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+ Params, false);
+ unsigned NumArgs = Params.size();
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ NF->copyAttributesFrom(&Fn);
+ Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ NF->takeName(&Fn);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!Fn.use_empty()) {
+ CallSite CS(Fn.use_back());
+ Instruction *Call = CS.getInstruction();
+
+ // Pass all the same arguments.
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+ // Drop any attributes that were on the vararg arguments.
+ AttributeSet PAL = CS.getAttributes();
+ if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
+ SmallVector<AttributeSet, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlotAttributes(i));
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
+ PAL.getFnAttributes()));
+ PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+ }
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(PAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(PAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty())
+ Call->replaceAllUsesWith(New);
+
+ New->takeName(Call);
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well. While we're at
+ // it, remove the dead arguments from the DeadArguments list.
+ //
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // Move the name and users over to the new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ }
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ FunctionDIMap::iterator DI = FunctionDIs.find(&Fn);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(NF);
+
+ // Finally, nuke the old function.
+ Fn.eraseFromParent();
+ return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+ if (Fn.isDeclaration() || Fn.mayBeOverridden())
+ return false;
+
+ // Functions with local linkage should already have been handled.
+ if (Fn.hasLocalLinkage())
+ return false;
+
+ if (Fn.use_empty())
+ return false;
+
+ SmallVector<unsigned, 8> UnusedArgs;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I) {
+ Argument *Arg = I;
+
+ if (Arg->use_empty() && !Arg->hasByValAttr())
+ UnusedArgs.push_back(Arg->getArgNo());
+ }
+
+ if (UnusedArgs.empty())
+ return false;
+
+ bool Changed = false;
+
+ for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end();
+ I != E; ++I) {
+ CallSite CS(*I);
+ if (!CS || !CS.isCallee(I))
+ continue;
+
+ // Now go through all unused args and replace them with "undef".
+ for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+ unsigned ArgNo = UnusedArgs[I];
+
+ Value *Arg = CS.getArgument(ArgNo);
+ CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ ++NumArgumentsReplacedWithUndef;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+ if (F->getReturnType()->isVoidTy())
+ return 0;
+ else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ return STy->getNumElements();
+ else
+ return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+ // We're live if our use or its Function is already marked as live.
+ if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ return Live;
+
+ // We're maybe live otherwise, but remember that we must become live if
+ // Use becomes live.
+ MaybeLiveUses.push_back(Use);
+ return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+ UseVector &MaybeLiveUses, unsigned RetValNum) {
+ const User *V = *U;
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // original Use.
+ RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ && IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (Value::const_use_iterator I = IV->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+ }
+
+ if (ImmutableCallSite CS = V) {
+ const Function *F = CS.getCalledFunction();
+ if (F) {
+ // Used in a direct call.
+
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and the we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CS.getArgumentNo(U);
+
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
+
+ assert(CS.getArgument(ArgNo)
+ == CS->getOperand(U.getOperandNo())
+ && "Argument is not where we expected it");
+
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = CreateArg(F, ArgNo);
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ }
+ // Used in any other way? Value must be live.
+ return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+ // Assume it's dead (which will only hold if there are no uses at all..).
+ Liveness Result = MaybeLive;
+ // Check each use.
+ for (Value::const_use_iterator I = V->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value. This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+ unsigned RetCount = NumRetVals(&F);
+ // Assume all return values are dead
+ typedef SmallVector<Liveness, 5> RetVals;
+ RetVals RetValLiveness(RetCount, MaybeLive);
+
+ typedef SmallVector<UseVector, 5> RetUses;
+ // These vectors map each return value to the uses that make it MaybeLive, so
+ // we can add those to the Uses map if the return value really turns out to be
+ // MaybeLive. Initialized to a list of RetCount empty lists.
+ RetUses MaybeLiveRetUses(RetCount);
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+ != F.getFunctionType()->getReturnType()) {
+ // We don't support old style multiple return values.
+ MarkLive(F);
+ return;
+ }
+
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ MarkLive(F);
+ return;
+ }
+
+ DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+ // Keep track of the number of live retvals, so we can skip checks once all
+ // of them turn out to be live.
+ unsigned NumLiveRetVals = 0;
+ Type *STy = dyn_cast<StructType>(F.getReturnType());
+ // Loop all uses of the function.
+ for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+ I != E; ++I) {
+ // If the function is PASSED IN as an argument, its address has been
+ // taken.
+ ImmutableCallSite CS(*I);
+ if (!CS || !CS.isCallee(I)) {
+ MarkLive(F);
+ return;
+ }
+
+ // If this use is anything other than a call site, the function is alive.
+ const Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) { // Not a direct call site?
+ MarkLive(F);
+ return;
+ }
+
+ // If we end up here, we are looking at a direct call to our function.
+
+ // Now, check how our return value(s) is/are used in this caller. Don't
+ // bother checking return values if all of them are live already.
+ if (NumLiveRetVals != RetCount) {
+ if (STy) {
+ // Check all uses of the return value.
+ for (Value::const_use_iterator I = TheCall->use_begin(),
+ E = TheCall->use_end(); I != E; ++I) {
+ const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ if (Ext && Ext->hasIndices()) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
+ }
+ } else {
+ // Used by something else than extractvalue. Mark all return
+ // values as live.
+ for (unsigned i = 0; i != RetCount; ++i )
+ RetValLiveness[i] = Live;
+ NumLiveRetVals = RetCount;
+ break;
+ }
+ }
+ } else {
+ // Single return value
+ RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+ if (RetValLiveness[0] == Live)
+ NumLiveRetVals = RetCount;
+ }
+ }
+ }
+
+ // Now we've inspected all callers, record the liveness of our return values.
+ for (unsigned i = 0; i != RetCount; ++i)
+ MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+ DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+ // Now, check all of our arguments.
+ unsigned i = 0;
+ UseVector MaybeLiveArgUses;
+ for (Function::const_arg_iterator AI = F.arg_begin(),
+ E = F.arg_end(); AI != E; ++AI, ++i) {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ // Mark the result.
+ MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ // Clear the vector again for the next iteration.
+ MaybeLiveArgUses.clear();
+ }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
+ switch (L) {
+ case Live: MarkLive(RA); break;
+ case MaybeLive:
+ {
+ // Note any uses of this value, so this return value can be
+ // marked live whenever one of the uses becomes live.
+ for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+ UE = MaybeLiveUses.end(); UI != UE; ++UI)
+ Uses.insert(std::make_pair(*UI, RA));
+ break;
+ }
+ }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+ DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+ if (LiveFunctions.count(RA.F))
+ return; // Function was already marked Live.
+
+ if (!LiveValues.insert(RA).second)
+ return; // We were already marked Live.
+
+ DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+ PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+ // We don't use upper_bound (or equal_range) here, because our recursive call
+ // to ourselves is likely to cause the upper_bound (which is the first value
+ // not belonging to RA) to become erased and the iterator invalidated.
+ UseMap::iterator Begin = Uses.lower_bound(RA);
+ UseMap::iterator E = Uses.end();
+ UseMap::iterator I;
+ for (I = Begin; I != E && I->first == RA; ++I)
+ MarkLive(I->second);
+
+ // Erase RA from the Uses map (from the lower bound to wherever we ended up
+ // after the loop).
+ Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+ // Don't modify fully live functions
+ if (LiveFunctions.count(F))
+ return false;
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has fewer arguments and a different return type.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ // Set up to build a new list of parameter attributes.
+ SmallVector<AttributeSet, 8> AttributesVec;
+ const AttributeSet &PAL = F->getAttributes();
+
+ // Find out the new return value.
+ Type *RetTy = FTy->getReturnType();
+ Type *NRetTy = NULL;
+ unsigned RetCount = NumRetVals(F);
+
+ // -1 means unused, other numbers are the new index
+ SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+ std::vector<Type*> RetTypes;
+ if (RetTy->isVoidTy()) {
+ NRetTy = RetTy;
+ } else {
+ StructType *STy = dyn_cast<StructType>(RetTy);
+ if (STy)
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(STy->getElementType(i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
+ } else {
+ ++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ else
+ // We used to return a single value.
+ if (LiveValues.erase(CreateRet(F, 0))) {
+ RetTypes.push_back(RetTy);
+ NewRetIdxs[0] = 0;
+ } else {
+ DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
+ << "\n");
+ ++NumRetValsEliminated;
+ }
+ if (RetTypes.size() > 1)
+ // More than one return type? Return a struct with them. Also, if we used
+ // to return a struct and didn't change the number of return values,
+ // return a struct again. This prevents changing {something} into
+ // something and {} into void.
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ else if (RetTypes.size() == 1)
+ // One return type? Just a simple value then, but only if we didn't use to
+ // return a struct with that simple value before.
+ NRetTy = RetTypes.front();
+ else if (RetTypes.size() == 0)
+ // No return types? Make it void, but only if we didn't use to return {}.
+ NRetTy = Type::getVoidTy(F->getContext());
+ }
+
+ assert(NRetTy && "No new return type found?");
+
+ // The existing function return attributes.
+ AttributeSet RAttrs = PAL.getRetAttributes();
+
+ // Remove any incompatible attributes, but only if we removed all return
+ // values. Otherwise, ensure that we don't have any conflicting attributes
+ // here. Currently, this should not be possible, but special handling might be
+ // required when new return value attributes are added.
+ if (NRetTy->isVoidTy())
+ RAttrs =
+ AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
+ AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex));
+ else
+ assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex) &&
+ "Return attributes no longer compatible?");
+
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
+
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (PAL.hasAttributes(i + 1)) {
+ AttrBuilder B(PAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
+ } else {
+ ++NumArgumentsEliminated;
+ DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+ << ") from " << F->getName() << "\n");
+ }
+ }
+
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getFnAttributes()));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+ // Create the new function type based on the recomputed parameters.
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+ // No change?
+ if (NFTy == FTy)
+ return false;
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(NewPAL);
+ // Insert the new function before the old function, so we won't be processing
+ // it again.
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ Instruction *Call = CS.getInstruction();
+
+ AttributesVec.clear();
+ const AttributeSet &CallPAL = CS.getAttributes();
+
+ // The call return attributes.
+ AttributeSet RAttrs = CallPAL.getRetAttributes();
+
+ // Adjust in case the function was changed to return void.
+ RAttrs =
+ AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
+ AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NF->getReturnType(),
+ AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex));
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+
+ // Declare these outside of the loops, so we can reuse them for the second
+ // loop, which loops the varargs.
+ CallSite::arg_iterator I = CS.arg_begin();
+ unsigned i = 0;
+ // Loop over those operands, corresponding to the normal arguments to the
+ // original function, and add those that are still alive.
+ for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+ if (ArgAlive[i]) {
+ Args.push_back(*I);
+ // Get original parameter attributes, but skip return attributes.
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ // Push any varargs arguments on the list. Don't forget their attributes.
+ for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ Args.push_back(*I);
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewCallPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty()) {
+ if (New->getType() == Call->getType()) {
+ // Return type not changed? Just replace users then.
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ } else if (New->getType()->isVoidTy()) {
+ // Our return value has uses, but they will get removed later on.
+ // Replace by null for now.
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ } else {
+ assert(RetTy->isStructTy() &&
+ "Return type changed, but not into a void. The old return type"
+ " must have been a struct!");
+ Instruction *InsertPt = Call;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock::iterator IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP)) ++IP;
+ InsertPt = IP;
+ }
+
+ // We used to return a struct. Instead of doing smart stuff with all the
+ // uses of this struct, we will just rebuild it using
+ // extract/insertvalue chaining and let instcombine clean that up.
+ //
+ // Start out building up our return value from undef
+ Value *RetVal = UndefValue::get(RetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ Value *V;
+ if (RetTypes.size() > 1)
+ // We are still returning a struct, so extract the value from our
+ // return value
+ V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+ InsertPt);
+ else
+ // We are now returning a single element, so just insert that
+ V = New;
+ // Insert the value at the old position
+ RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ }
+ // Now, replace all uses of the old call instruction with the return
+ // struct we built
+ Call->replaceAllUsesWith(RetVal);
+ New->takeName(Call);
+ }
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++i)
+ if (ArgAlive[i]) {
+ // If this is a live argument, move the name and users over to the new
+ // version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ ++I2;
+ } else {
+ // If this argument is dead, replace any uses of it with null constants
+ // (these are guaranteed to become unused later on).
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ }
+
+ // If we change the return value of the function we must rewrite any return
+ // instructions. Check this now.
+ if (F->getReturnType() != NF->getReturnType())
+ for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Value *RetVal;
+
+ if (NFTy->getReturnType()->isVoidTy()) {
+ RetVal = 0;
+ } else {
+ assert (RetTy->isStructTy());
+ // The original return value was a struct, insert
+ // extractvalue/insertvalue chains to extract only the values we need
+ // to return and insert them into our new result.
+ // This does generate messy code, but we'll let it to instcombine to
+ // clean that up.
+ Value *OldRet = RI->getOperand(0);
+ // Start out building up our return value from undef
+ RetVal = UndefValue::get(NRetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+ "oldret", RI);
+ if (RetTypes.size() > 1) {
+ // We're still returning a struct, so reinsert the value into
+ // our new return value at the new index
+
+ RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+ "newret", RI);
+ } else {
+ // We are now only returning a simple value, so just return the
+ // extracted value.
+ RetVal = EV;
+ }
+ }
+ }
+ // Replace the return instruction with one returning the new return
+ // value (possibly 0 if we became void).
+ ReturnInst::Create(F->getContext(), RetVal, RI);
+ BB->getInstList().erase(RI);
+ }
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ FunctionDIMap::iterator DI = FunctionDIs.find(F);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(NF);
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+
+ return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Collect debug info descriptors for functions.
+ CollectFunctionDIs(M);
+
+ // First pass: Do a simple check to see if any functions can have their "..."
+ // removed. We can do this if they never call va_start. This loop cannot be
+ // fused with the next loop, because deleting a function invalidates
+ // information computed while surveying other functions.
+ DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function &F = *I++;
+ if (F.getFunctionType()->isVarArg())
+ Changed |= DeleteDeadVarargs(F);
+ }
+
+ // Second phase:loop through the module, determining which arguments are live.
+ // We assume all arguments are dead unless proven otherwise (allowing us to
+ // determine that dead arguments passed into recursive functions are dead).
+ //
+ DEBUG(dbgs() << "DAE - Determining liveness\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ SurveyFunction(*I);
+
+ // Now, remove all dead arguments and return values from each function in
+ // turn.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ // Increment now, because the function will probably get removed (ie.
+ // replaced by a new one).
+ Function *F = I++;
+ Changed |= RemoveDeadStuffFromFunction(F);
+ }
+
+ // Finally, look for any unused parameters in functions with non-local
+ // linkage and replace the passed in parameters with undef.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function& F = *I;
+
+ Changed |= RemoveDeadArgumentsFromCallers(F);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 000000000000..fa3d72ddcf16
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,135 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// @brief A pass to extract specific functions and their dependencies.
+ class GVExtractorPass : public ModulePass {
+ SetVector<GlobalValue *> Named;
+ bool deleteStuff;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+ /// specified function. Otherwise, it deletes as much of the module as
+ /// possible, except for the function specified.
+ ///
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+ bool runOnModule(Module &M) {
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ bool Delete =
+ deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ if (!Delete) {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ if (I->getName() == "llvm.global_ctors")
+ continue;
+ }
+
+ bool Local = I->isDiscardableIfUnused();
+ if (Local)
+ I->setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Local || Delete)
+ I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Delete)
+ I->setInitializer(0);
+ }
+
+ // Visit the Functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ bool Delete =
+ deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ if (!Delete) {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ }
+
+ bool Local = I->isDiscardableIfUnused();
+ if (Local)
+ I->setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Local || Delete)
+ I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Delete)
+ I->deleteBody();
+ }
+
+ // Visit the Aliases.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator CurI = I;
+ ++I;
+
+ if (CurI->isDiscardableIfUnused()) {
+ CurI->setVisibility(GlobalValue::HiddenVisibility);
+ CurI->setLinkage(GlobalValue::ExternalLinkage);
+ }
+
+ if (deleteStuff == (bool)Named.count(CurI)) {
+ Type *Ty = CurI->getType()->getElementType();
+
+ CurI->removeFromParent();
+ llvm::Value *Declaration;
+ if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ CurI->getName(), &M);
+
+ } else {
+ Declaration =
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ 0, CurI->getName());
+
+ }
+ CurI->replaceAllUsesWith(Declaration);
+ delete CurI;
+ }
+ }
+
+ return true;
+ }
+ };
+
+ char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 000000000000..bc5109b4d48d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,1345 @@
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly. In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call. This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+namespace {
+ struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+ bool AddReadAttrs(const CallGraphSCC &SCC);
+
+ // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+ bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+
+ // IsFunctionMallocLike - Does this function allocate new memory?
+ bool IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &) const;
+
+ // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+ bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+
+ // Utility methods used by inferPrototypeAttributes to add attributes
+ // and maintain annotation statistics.
+
+ void setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ }
+ }
+
+ // inferPrototypeAttributes - Analyze the name and prototype of the
+ // given function and set any applicable attributes. Returns true
+ // if any attributes were set and false otherwise.
+ bool inferPrototypeAttributes(Function &F);
+
+ // annotateLibraryCalls - Adds attributes to well-known standard library
+ // call declarations.
+ bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
+ };
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - may write memory. Just give up.
+ return false;
+
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+ if (MRB == AliasAnalysis::DoesNotAccessMemory)
+ // Already perfect!
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden()) {
+ if (!AliasAnalysis::onlyReadsMemory(MRB))
+ // May write memory. Just give up.
+ return false;
+
+ ReadsMemory = true;
+ continue;
+ }
+
+ // Scan the function body for instructions that may read or write memory.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+ // If the call doesn't access arbitrary memory, we may be able to
+ // figure out something.
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // If the call does access argument pointees, check each argument.
+ if (AliasAnalysis::doesAccessArgPointees(MRB))
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (Arg->getType()->isPointerTy()) {
+ AliasAnalysis::Location Loc(Arg,
+ AliasAnalysis::UnknownSize,
+ I->getMetadata(LLVMContext::MD_tbaa));
+ if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+ if (MRB & AliasAnalysis::Mod)
+ // Writes non-local memory. Give up.
+ return false;
+ if (MRB & AliasAnalysis::Ref)
+ // Ok, it reads non-local memory.
+ ReadsMemory = true;
+ }
+ }
+ }
+ continue;
+ }
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & AliasAnalysis::Mod)
+ return false;
+ // If it reads, note it.
+ if (MRB & AliasAnalysis::Ref)
+ ReadsMemory = true;
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+ if (!LI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(LI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+ if (!SI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(SI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ AliasAnalysis::Location Loc = AA->getLocation(VI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return false;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+ }
+
+ // Success! Functions in this SCC do not access memory, or only read memory.
+ // Give them the appropriate attribute.
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ if (F->onlyReadsMemory() && ReadsMemory)
+ // No change.
+ continue;
+
+ MadeChange = true;
+
+ // Clear out any existing attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+ F->removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(),
+ AttributeSet::FunctionIndex, B));
+
+ // Add in the new attribute.
+ F->addAttribute(AttributeSet::FunctionIndex,
+ ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+
+ if (ReadsMemory)
+ ++NumReadOnly;
+ else
+ ++NumReadNone;
+ }
+
+ return MadeChange;
+}
+
+namespace {
+ // For a given pointer Argument, this retains a list of Arguments of functions
+ // in the same SCC that the pointer data flows into. We use this to build an
+ // SCC of the arguments.
+ struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode*, 4> Uses;
+ };
+
+ class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+ public:
+ ArgumentGraph() { SyntheticRoot.Definition = 0; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+ };
+
+ // This tracker checks whether callees are in the SCC, and if so it does not
+ // consider that a capture, instead adding it to the "Uses" list and
+ // continuing with the analysis.
+ struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool captured(Use *U) {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) { Captured = true; return true; }
+
+ Function *F = CS.getCalledFunction();
+ if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
+
+ Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
+ PI != PE; ++PI, ++AI) {
+ if (AI == AE) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
+ }
+ if (PI == U) {
+ Uses.push_back(AI);
+ break;
+ }
+ }
+ assert(!Uses.empty() && "Capturing call-site captured nothing?");
+ return false;
+ }
+
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument*, 4> Uses; // Uses within our SCC.
+
+ const SmallPtrSet<Function*, 8> &SCCNodes;
+ };
+}
+
+namespace llvm {
+ template<> struct GraphTraits<ArgumentGraphNode*> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+ };
+ template<> struct GraphTraits<ArgumentGraph*>
+ : public GraphTraits<ArgumentGraphNode*> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) {
+ return AG->end();
+ }
+ };
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+ bool Changed = false;
+
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F && !F->isDeclaration() && !F->mayBeOverridden())
+ SCCNodes.insert(F);
+ }
+
+ ArgumentGraph AG;
+
+ AttrBuilder B;
+ B.addAttribute(Attribute::NoCapture);
+
+ // Check each function in turn, determining which pointer arguments are not
+ // captured.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - only a problem for arguments that we pass to it.
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that captures pointers, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ continue;
+
+ // Functions that are readonly (or readnone) and nounwind and don't return
+ // a value can't capture arguments. Don't analyze them.
+ if (F->onlyReadsMemory() && F->doesNotThrow() &&
+ F->getReturnType()->isVoidTy()) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+ continue;
+ }
+
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ ArgumentUsesTracker Tracker(SCCNodes);
+ PointerMayBeCaptured(A, &Tracker);
+ if (!Tracker.Captured) {
+ if (Tracker.Uses.empty()) {
+ // If it's trivially not captured, mark it nocapture now.
+ A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
+ ++NumNoCapture;
+ Changed = true;
+ } else {
+ // If it's not trivially captured and not trivially not captured,
+ // then it must be calling into another function in our SCC. Save
+ // its particulars for Argument-SCC analysis later.
+ ArgumentGraphNode *Node = AG[A];
+ for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end(); UI != UE; ++UI)
+ Node->Uses.push_back(AG[*UI]);
+ }
+ }
+ // Otherwise, it's captured. Don't bother doing SCC analysis on it.
+ }
+ }
+
+ // The graph we've collected is partial because we stopped scanning for
+ // argument uses once we solved the argument trivially. These partial nodes
+ // show up as ArgumentGraphNode objects with an empty Uses list, and for
+ // these nodes the final decision about whether they capture has already been
+ // made. If the definition doesn't have a 'nocapture' attribute by now, it
+ // captures.
+
+ for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG);
+ I != E; ++I) {
+ std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+ if (ArgumentSCC.size() == 1) {
+ if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
+
+ // eg. "void f(int* x) { if (...) f(x); }"
+ if (ArgumentSCC[0]->Uses.size() == 1 &&
+ ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
+ ArgumentSCC[0]->
+ Definition->
+ addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(),
+ ArgumentSCC[0]->Definition->getArgNo() + 1,
+ B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+ continue;
+ }
+
+ bool SCCCaptured = false;
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *Node = *I;
+ if (Node->Uses.empty()) {
+ if (!Node->Definition->hasNoCaptureAttr())
+ SCCCaptured = true;
+ }
+ }
+ if (SCCCaptured) continue;
+
+ SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+ // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
+ // quickly looking up whether a given Argument is in this ArgumentSCC.
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E; ++I) {
+ ArgumentSCCNodes.insert((*I)->Definition);
+ }
+
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *N = *I;
+ for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end(); UI != UE; ++UI) {
+ Argument *A = (*UI)->Definition;
+ if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
+ continue;
+ SCCCaptured = true;
+ break;
+ }
+ }
+ if (SCCCaptured) continue;
+
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &SCCNodes) const {
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ if (Constant *C = dyn_cast<Constant>(RetVal)) {
+ if (!C->isNullValue() && !isa<UndefValue>(C))
+ return false;
+
+ continue;
+ }
+
+ if (isa<Argument>(RetVal))
+ return false;
+
+ if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() &&
+ SCCNodes.count(CS.getCalledFunction()))
+ break;
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
+ }
+
+ if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check each function in turn, determining which functions return noalias
+ // pointers.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ return false;
+
+ // Already noalias.
+ if (F->doesNotAlias(0))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate noalias return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
+
+ if (!IsFunctionMallocLike(F, SCCNodes))
+ return false;
+ }
+
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ continue;
+
+ F->setDoesNotAlias(0);
+ ++NumNoAlias;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes. Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+ return false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strcmp:
+ case LibFunc::strspn:
+ case LibFunc::strncmp:
+ case LibFunc::strcspn:
+ case LibFunc::strcoll:
+ case LibFunc::strcasecmp:
+ case LibFunc::strncasecmp:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::scanf:
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat:
+ case LibFunc::sscanf:
+ case LibFunc::sprintf:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::mkdir:
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::rmdir:
+ case LibFunc::rewind:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::rename:
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ break;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ case LibFunc::fread:
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ case LibFunc::fputs:
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::uname:
+ case LibFunc::unlink:
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::pread:
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vsscanf:
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ break;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ break;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ default:
+ // Didn't mark any attributes.
+ return false;
+ }
+
+ return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+ bool MadeChange = false;
+
+ // Check each function in turn annotating well-known library function
+ // declarations with attributes.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F != 0 && F->isDeclaration())
+ MadeChange |= inferPrototypeAttributes(*F);
+ }
+
+ return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ bool Changed = annotateLibraryCalls(SCC);
+ Changed |= AddReadAttrs(SCC);
+ Changed |= AddNoCaptureAttrs(SCC);
+ Changed |= AddNoAliasAttrs(SCC);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 000000000000..dc99492990a3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,211 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program. It uses an aggressive algorithm, searching out globals that are
+// known to be alive. After it finds all of the globals which are needed, it
+// deletes whatever is left over. This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct GlobalDCE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the GlobalDCE pass on the specified module, optionally updating
+ // the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M);
+
+ private:
+ SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+
+ /// GlobalIsNeeded - mark the specific global value as needed, and
+ /// recursively mark anything that it uses as also needed.
+ void GlobalIsNeeded(GlobalValue *GV);
+ void MarkUsedGlobalsAsNeeded(Constant *C);
+
+ bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ };
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Loop over the module, adding globals which are obviously necessary.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Functions with external linkage are needed if they have a body
+ if (!I->isDiscardableIfUnused() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible & appending globals are needed, if they have an
+ // initializer.
+ if (!I->isDiscardableIfUnused() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible aliases are needed.
+ if (!I->isDiscardableIfUnused())
+ GlobalIsNeeded(I);
+ }
+
+ // Now that all globals which are needed are in the AliveGlobals set, we loop
+ // through the program, deleting those which are not alive.
+ //
+
+ // The first pass is to drop initializers of global variables which are dead.
+ std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadGlobalVars.push_back(I); // Keep track of dead globals
+ I->setInitializer(0);
+ }
+
+ // The second pass drops the bodies of functions which are dead...
+ std::vector<Function*> DeadFunctions;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadFunctions.push_back(I); // Keep track of dead globals
+ if (!I->isDeclaration())
+ I->deleteBody();
+ }
+
+ // The third pass drops targets of aliases which are dead...
+ std::vector<GlobalAlias*> DeadAliases;
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+ ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadAliases.push_back(I);
+ I->setAliasee(0);
+ }
+
+ if (!DeadFunctions.empty()) {
+ // Now that all interferences have been dropped, delete the actual objects
+ // themselves.
+ for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadFunctions[i]);
+ M.getFunctionList().erase(DeadFunctions[i]);
+ }
+ NumFunctions += DeadFunctions.size();
+ Changed = true;
+ }
+
+ if (!DeadGlobalVars.empty()) {
+ for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+ M.getGlobalList().erase(DeadGlobalVars[i]);
+ }
+ NumVariables += DeadGlobalVars.size();
+ Changed = true;
+ }
+
+ // Now delete any dead aliases.
+ if (!DeadAliases.empty()) {
+ for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadAliases[i]);
+ M.getAliasList().erase(DeadAliases[i]);
+ }
+ NumAliases += DeadAliases.size();
+ Changed = true;
+ }
+
+ // Make sure that all memory is released
+ AliveGlobals.clear();
+
+ return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+ // If the global is already in the set, no need to reprocess it.
+ if (!AliveGlobals.insert(G))
+ return;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+ // If this is a global variable, we must make sure to add any global values
+ // referenced by the initializer to the alive set.
+ if (GV->hasInitializer())
+ MarkUsedGlobalsAsNeeded(GV->getInitializer());
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+ // The target of a global alias is needed.
+ MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else {
+ // Otherwise this must be a function object. We have to scan the body of
+ // the function looking for constants and global values which are used as
+ // operands. Any operands of these types must be processed to ensure that
+ // any globals used will be marked as needed.
+ Function *F = cast<Function>(G);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ GlobalIsNeeded(GV);
+ else if (Constant *C = dyn_cast<Constant>(*U))
+ MarkUsedGlobalsAsNeeded(C);
+ }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GlobalIsNeeded(GV);
+
+ // Loop over all of the operands of the constant, adding any globals they
+ // use to the list of needed globals.
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+ if (Constant *OpC = dyn_cast<Constant>(*I))
+ MarkUsedGlobalsAsNeeded(OpC);
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it. This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+ if (GV.use_empty()) return false;
+ GV.removeDeadConstantUsers();
+ return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 000000000000..b035a821b4cf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,3256 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+
+namespace {
+ struct GlobalStatus;
+ struct GlobalOpt : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+ private:
+ GlobalVariable *FindGlobalCtors(Module &M);
+ bool OptimizeFunctions(Module &M);
+ bool OptimizeGlobalVars(Module &M);
+ bool OptimizeGlobalAliases(Module &M);
+ bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+ bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS);
+ bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
+ };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it. If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct GlobalStatus {
+ /// isCompared - True if the global's address is used in a comparison.
+ bool isCompared;
+
+ /// isLoaded - True if the global is ever loaded. If the global isn't ever
+ /// loaded it can be deleted.
+ bool isLoaded;
+
+ /// StoredType - Keep track of what stores to the global look like.
+ ///
+ enum StoredType {
+ /// NotStored - There is no store to this global. It can thus be marked
+ /// constant.
+ NotStored,
+
+ /// isInitializerStored - This global is stored to, but the only thing
+ /// stored is the constant it was initialized with. This is only tracked
+ /// for scalar globals.
+ isInitializerStored,
+
+ /// isStoredOnce - This global is stored to, but only its initializer and
+ /// one other value is ever stored to it. If this global isStoredOnce, we
+ /// track the value stored to it in StoredOnceValue below. This is only
+ /// tracked for scalar globals.
+ isStoredOnce,
+
+ /// isStored - This global is stored to by multiple values or something else
+ /// that we cannot track.
+ isStored
+ } StoredType;
+
+ /// StoredOnceValue - If only one value (besides the initializer constant) is
+ /// ever stored to this global, keep track of what value it is.
+ Value *StoredOnceValue;
+
+ /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+ /// null/false. When the first accessing function is noticed, it is recorded.
+ /// When a second different accessing function is noticed,
+ /// HasMultipleAccessingFunctions is set to true.
+ const Function *AccessingFunction;
+ bool HasMultipleAccessingFunctions;
+
+ /// HasNonInstructionUser - Set to true if this global has a user that is not
+ /// an instruction (e.g. a constant expr or GV initializer).
+ bool HasNonInstructionUser;
+
+ /// AtomicOrdering - Set to the strongest atomic ordering requirement.
+ AtomicOrdering Ordering;
+
+ GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false),
+ HasNonInstructionUser(false), Ordering(NotAtomic) {}
+};
+
+}
+
+/// StrongerOrdering - Return the stronger of the two ordering. If the two
+/// orderings are acquire and release, then return AcquireRelease.
+///
+static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release) return AcquireRelease;
+ if (Y == Acquire && X == Release) return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+/// by constants itself. Note that constants cannot be cyclic, so this test is
+/// pretty easy to implement recursively.
+///
+static bool SafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!SafeToDestroyConstant(CU)) return false;
+ } else
+ return false;
+ return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure. If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType())) return true;
+
+ if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.isLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile()) return true;
+ GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V) return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile()) return true;
+
+ GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::isStored) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+ SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+ GS.StoredType = GlobalStatus::isStoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<SelectInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PHIUsers.insert(PN)) // Not already visited.
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.isCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile()) return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::isStored;
+ if (MTI->getArgOperand(1) == V)
+ GS.isLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile()) return true;
+ GS.StoredType = GlobalStatus::isStored;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!SafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
+/// as a root? If so, we might not really want to eliminate the stores to it.
+static bool isLeakCheckerRoot(GlobalVariable *GV) {
+ // A global variable is a root if it is a pointer, or could plausibly contain
+ // a pointer. There are two challenges; one is that we could have a struct
+ // the has an inner member which is a pointer. We recurse through the type to
+ // detect these (up to a point). The other is that we may actually be a union
+ // of a pointer and another type, and so our LLVM type is an integer which
+ // gets converted into a pointer, or our type is an [i8 x #] with a pointer
+ // potentially contained here.
+
+ if (GV->hasPrivateLinkage())
+ return false;
+
+ SmallVector<Type *, 4> Types;
+ Types.push_back(cast<PointerType>(GV->getType())->getElementType());
+
+ unsigned Limit = 20;
+ do {
+ Type *Ty = Types.pop_back_val();
+ switch (Ty->getTypeID()) {
+ default: break;
+ case Type::PointerTyID: return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID: {
+ SequentialType *STy = cast<SequentialType>(Ty);
+ Types.push_back(STy->getElementType());
+ break;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+ if (STy->isOpaque()) return true;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Type *InnerTy = *I;
+ if (isa<PointerType>(InnerTy)) return true;
+ if (isa<CompositeType>(InnerTy))
+ Types.push_back(InnerTy);
+ }
+ break;
+ }
+ }
+ if (--Limit == 0) return true;
+ } while (!Types.empty());
+ return false;
+}
+
+/// Given a value that is stored to a global but never read, determine whether
+/// it's safe to remove the store and the chain of computation that feeds the
+/// store.
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
+ do {
+ if (isa<Constant>(V))
+ return true;
+ if (!V->hasOneUse())
+ return false;
+ if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+ isa<GlobalValue>(V))
+ return false;
+ if (isAllocationFn(V, TLI))
+ return true;
+
+ Instruction *I = cast<Instruction>(V);
+ if (I->mayHaveSideEffects())
+ return false;
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllConstantIndices())
+ return false;
+ } else if (I->getNumOperands() != 1) {
+ return false;
+ }
+
+ V = I->getOperand(0);
+ } while (1);
+}
+
+/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users
+/// of the global and clean up any that obviously don't assign the global a
+/// value that isn't dynamically allocated.
+///
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+ const TargetLibraryInfo *TLI) {
+ // A brief explanation of leak checkers. The goal is to find bugs where
+ // pointers are forgotten, causing an accumulating growth in memory
+ // usage over time. The common strategy for leak checkers is to whitelist the
+ // memory pointed to by globals at exit. This is popular because it also
+ // solves another problem where the main thread of a C++ program may shut down
+ // before other threads that are still expecting to use those globals. To
+ // handle that case, we expect the program may create a singleton and never
+ // destroy it.
+
+ bool Changed = false;
+
+ // If Dead[n].first is the only use of a malloc result, we can delete its
+ // chain of computation and the store to the global in Dead[n].second.
+ SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
+
+ // Constants can't be pointers to dynamically allocated memory.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E;) {
+ User *U = *UI++;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ Value *V = SI->getValueOperand();
+ if (isa<Constant>(V)) {
+ Changed = true;
+ SI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, SI));
+ }
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
+ if (isa<Constant>(MSI->getValue())) {
+ Changed = true;
+ MSI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MSI));
+ }
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
+ GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
+ if (MemSrc && MemSrc->isConstant()) {
+ Changed = true;
+ MTI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MTI));
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ if (SafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ Dead.clear();
+ CleanupPointerRootUsers(GV, TLI);
+ return true;
+ }
+ }
+ }
+
+ for (int i = 0, e = Dead.size(); i != e; ++i) {
+ if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
+ Dead[i].second->eraseFromParent();
+ Instruction *I = Dead[i].first;
+ do {
+ if (isAllocationFn(I, TLI))
+ break;
+ Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
+ if (!J)
+ break;
+ I->eraseFromParent();
+ I = J;
+ } while (1);
+ I->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
+/// users of the global, cleaning up the obvious ones. This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft. This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+ DataLayout *TD, TargetLibraryInfo *TLI) {
+ bool Changed = false;
+ SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+ while (!WorkList.empty()) {
+ User *U = WorkList.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (Init) {
+ // Replace the load with the initializer.
+ LI->replaceAllUsesWith(Init);
+ LI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Store must be unreachable or storing Init into the global.
+ SI->eraseFromParent();
+ Changed = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Constant *SubInit = 0;
+ if (Init)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ CE->getType()->isPointerTy()) {
+ // Pointer cast, delete any stores and memsets to the global.
+ Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
+ }
+
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+ // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+ // and will invalidate our notion of what Init is.
+ Constant *SubInit = 0;
+ if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ ConstantExpr *CE =
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
+ if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+ // If the initializer is an all-null value and we have an inbounds GEP,
+ // we already know what the result of any load from that GEP is.
+ // TODO: Handle splats.
+ if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+ SubInit = Constant::getNullValue(GEP->getType()->getElementType());
+ }
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
+
+ if (GEP->use_empty()) {
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+ if (MI->getRawDest() == V) {
+ MI->eraseFromParent();
+ Changed = true;
+ }
+
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ // If we have a chain of dead constantexprs or other things dangling from
+ // us, and if they are all dead, nuke them without remorse.
+ if (SafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ CleanupConstantGlobalUsers(V, Init, TD, TLI);
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+ // We might have a dead and dangling constant hanging off of here.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return SafeToDestroyConstant(C);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Loads are ok.
+ if (isa<LoadInst>(I)) return true;
+
+ // Stores *to* the pointer are ok.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getOperand(0) != V;
+
+ // Otherwise, it must be a GEP.
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+ if (GEPI == 0) return false;
+
+ if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+ !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+ return false;
+
+ for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+ I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+ return false;
+
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue() ||
+ !isa<ConstantInt>(U->getOperand(2)))
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // If this is a use of an array allocation, do a bit more checking for sanity.
+ if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ uint64_t NumElements = AT->getNumElements();
+ ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+ // Check to make sure that index falls within the array. If not,
+ // something funny is going on, so we won't do the optimization.
+ //
+ if (Idx->getZExtValue() >= NumElements)
+ return false;
+
+ // We cannot scalar repl this level of the array unless any array
+ // sub-indices are in-range constants. In particular, consider:
+ // A[0][i]. We cannot know that the user isn't doing invalid things like
+ // allowing i to index an out-of-range subscript that accesses A[1].
+ //
+ // Scalar replacing *just* the outer index of the array is probably not
+ // going to be a win anyway, so just give up.
+ for (++GEPI; // Skip array index.
+ GEPI != E;
+ ++GEPI) {
+ uint64_t NumElements;
+ if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ NumElements = SubArrayTy->getNumElements();
+ else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+ NumElements = SubVectorTy->getNumElements();
+ else {
+ assert((*GEPI)->isStructTy() &&
+ "Indexed GEP type is not array, vector, or struct!");
+ continue;
+ }
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+ return false;
+ }
+ }
+
+ for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+ return false;
+ }
+ return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable. This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way. We have determined that
+/// this transformation is safe already. We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
+ // Make sure this global only has simple uses that we can SRA.
+ if (!GlobalUsersSafeToSRA(GV))
+ return 0;
+
+ assert(GV->hasLocalLinkage() && !GV->isConstant());
+ Constant *Init = GV->getInitializer();
+ Type *Ty = Init->getType();
+
+ std::vector<GlobalVariable*> NewGlobals;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *TD.getStructLayout(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Constant *In = Init->getAggregateElement(i);
+ assert(In && "Couldn't get element of initializer?");
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
+ }
+ } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ unsigned NumElements = 0;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(STy)->getNumElements();
+
+ if (NumElements > 16 && GV->hasNUsesOrMore(16))
+ return 0; // It's not worth it.
+ NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Constant *In = Init->getAggregateElement(i);
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
+ }
+ }
+
+ if (NewGlobals.empty())
+ return 0;
+
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+ // Loop over all of the uses of the global, replacing the constantexpr geps,
+ // with smaller constantexpr geps or direct references.
+ while (!GV->use_empty()) {
+ User *GEP = GV->use_back();
+ assert(((isa<ConstantExpr>(GEP) &&
+ cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+ isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+ Value *NewPtr = NewGlobals[Val];
+
+ // Form a shorter GEP if needed.
+ if (GEP->getNumOperands() > 3) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+ Idxs.push_back(CE->getOperand(i));
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+ } else {
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+ SmallVector<Value*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+ Idxs.push_back(GEPI->getOperand(i));
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
+ GEPI->getName()+"."+Twine(Val),GEPI);
+ }
+ }
+ GEP->replaceAllUsesWith(NewPtr);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+ GEPI->eraseFromParent();
+ else
+ cast<ConstantExpr>(GEP)->destroyConstant();
+ }
+
+ // Delete the old global, now that it is dead.
+ Globals.erase(GV);
+ ++NumSRA;
+
+ // Loop over the new globals array deleting any globals that are obviously
+ // dead. This can arise due to scalarization of a structure or an array that
+ // has elements that are dead.
+ unsigned FirstGlobal = 0;
+ for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+ if (NewGlobals[i]->use_empty()) {
+ Globals.erase(NewGlobals[i]);
+ if (FirstGlobal == i) ++FirstGlobal;
+ }
+
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+
+ if (isa<LoadInst>(U)) {
+ // Will trap.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Storing the value.
+ }
+ } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+ if (CI->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ if (II->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If we've already seen this phi node, ignore it, it has already been
+ // checked.
+ if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ return false;
+ } else if (isa<ICmpInst>(U) &&
+ isa<ConstantPointerNull>(UI->getOperand(1))) {
+ // Ignore icmp X, null
+ } else {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null. Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (isa<StoreInst>(U)) {
+ // Ignore stores to the global.
+ } else {
+ // We don't know or understand this user, bail out.
+ //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+ Instruction *I = cast<Instruction>(*UI++);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setOperand(0, NewV);
+ Changed = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) == V) {
+ SI->setOperand(1, NewV);
+ Changed = true;
+ }
+ } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ CallSite CS(I);
+ if (CS.getCalledValue() == V) {
+ // Calling through the pointer! Turn into a direct call, but be careful
+ // that the pointer is not also being passed as an argument.
+ CS.setCalledFunction(NewV);
+ Changed = true;
+ bool PassedAsArg = false;
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == V) {
+ PassedAsArg = true;
+ CS.setArgument(i, NewV);
+ }
+
+ if (PassedAsArg) {
+ // Being passed as an argument also. Be careful to not invalidate UI!
+ UI = V->use_begin();
+ }
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+ ConstantExpr::getCast(CI->getOpcode(),
+ NewV, CI->getType()));
+ if (CI->use_empty()) {
+ Changed = true;
+ CI->eraseFromParent();
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ // Should handle GEP here.
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.reserve(GEPI->getNumOperands()-1);
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
+ Idxs.push_back(C);
+ else
+ break;
+ if (Idxs.size() == GEPI->getNumOperands()-1)
+ Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+ ConstantExpr::getGetElementPtr(NewV, Idxs));
+ if (GEPI->use_empty()) {
+ Changed = true;
+ GEPI->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it. If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+ DataLayout *TD,
+ TargetLibraryInfo *TLI) {
+ bool Changed = false;
+
+ // Keep track of whether we are able to remove all the uses of the global
+ // other than the store that defines it.
+ bool AllNonStoreUsesGone = true;
+
+ // Replace all uses of loads with uses of uses of the stored value.
+ for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+ User *GlobalUser = *GUI++;
+ if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+ // If we were able to delete all uses of the loads
+ if (LI->use_empty()) {
+ LI->eraseFromParent();
+ Changed = true;
+ } else {
+ AllNonStoreUsesGone = false;
+ }
+ } else if (isa<StoreInst>(GlobalUser)) {
+ // Ignore the store that stores "LV" to the global.
+ assert(GlobalUser->getOperand(1) == GV &&
+ "Must be storing *to* the global");
+ } else {
+ AllNonStoreUsesGone = false;
+
+ // If we get here we could have other crazy uses that are transitively
+ // loaded.
+ assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+ isa<BitCastInst>(GlobalUser) ||
+ isa<GetElementPtrInst>(GlobalUser)) &&
+ "Only expect load and stores!");
+ }
+ }
+
+ if (Changed) {
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ ++NumGlobUses;
+ }
+
+ // If we nuked all of the loads, then none of the stores are needed either,
+ // nor is the global.
+ if (AllNonStoreUsesGone) {
+ if (isLeakCheckerRoot(GV)) {
+ Changed |= CleanupPointerRootUsers(GV, TLI);
+ } else {
+ Changed = true;
+ CleanupConstantGlobalUsers(GV, 0, TD, TLI);
+ }
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ Changed = true;
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ }
+ return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V,
+ DataLayout *TD, TargetLibraryInfo *TLI) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+ if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
+ I->replaceAllUsesWith(NewC);
+
+ // Advance UI to the next non-I use to avoid invalidating it!
+ // Instructions could multiply use V.
+ while (UI != E && *UI == I)
+ ++UI;
+ I->eraseFromParent();
+ }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc. Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc. Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+ CallInst *CI,
+ Type *AllocTy,
+ ConstantInt *NElements,
+ DataLayout *TD,
+ TargetLibraryInfo *TLI) {
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
+
+ Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+ // Create the new global variable. The contents of the malloc'd memory is
+ // undefined, so initialize with an undef value.
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalType, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType),
+ GV->getName()+".body",
+ GV,
+ GV->getThreadLocalMode());
+
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = 0;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->use_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (TheBC == 0)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
+ Constant *RepValue = NewGV;
+ if (NewGV->getType() != GV->getType()->getElementType())
+ RepValue = ConstantExpr::getBitCast(RepValue,
+ GV->getType()->getElementType());
+
+ // If there is a comparison against null, we will insert a global bool to
+ // keep track of whether the global was initialized yet or not.
+ GlobalVariable *InitBool =
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->getThreadLocalMode());
+ bool InitBoolUsed = false;
+
+ // Loop over all uses of GV, processing them in turn.
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+ // The global is initialized when the store to it occurs.
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->eraseFromParent();
+ continue;
+ }
+
+ LoadInst *LI = cast<LoadInst>(GV->use_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser())) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ // Sink the load to where the compare was, if atomic rules allow us to.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+ LI->getOrdering(), LI->getSynchScope(),
+ LI->isUnordered() ? (Instruction*)ICI : LI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
+
+ // If the initialization boolean was used, insert it, otherwise delete it.
+ if (!InitBoolUsed) {
+ while (!InitBool->use_empty()) // Delete initializations
+ cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+ delete InitBool;
+ } else
+ GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+ // Now the GV is dead, nuke it and the malloc..
+ GV->eraseFromParent();
+ CI->eraseFromParent();
+
+ // To further other optimizations, loop over all users of NewGV and try to
+ // constant prop them. This will promote GEP instructions with constant
+ // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+ ConstantPropUsersOf(NewGV, TD, TLI);
+ if (RepValue != NewGV)
+ ConstantPropUsersOf(RepValue, TD, TLI);
+
+ return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V. We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+ const GlobalVariable *GV,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ const Instruction *Inst = cast<Instruction>(*UI);
+
+ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+ continue; // Fine, ignore.
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+ return false; // Storing the pointer itself... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
+
+ // Must index into the array and into the struct.
+ if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+ // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
+ // cycles.
+ if (PHIs.insert(PN))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere. Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer. Further, delete the store into
+/// GV. This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+ GlobalVariable *GV) {
+ while (!Alloc->use_empty()) {
+ Instruction *U = cast<Instruction>(*Alloc->use_begin());
+ Instruction *InsertPt = U;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If this is the store of the allocation into the global, remove it.
+ if (SI->getOperand(1) == GV) {
+ SI->eraseFromParent();
+ continue;
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Insert the load in the corresponding predecessor, not right before the
+ // PHI.
+ InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+ } else if (isa<BitCastInst>(U)) {
+ // Must be bitcast between the malloc and store to initialize the global.
+ ReplaceUsesOfMallocWithGlobal(U, GV);
+ U->eraseFromParent();
+ continue;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ // If this is a "GEP bitcast" and the user is a store to the global, then
+ // just process it as a bitcast.
+ if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+ if (SI->getOperand(1) == GV) {
+ // Must be bitcast GEP between the malloc and store to initialize
+ // the global.
+ ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+ GEPI->eraseFromParent();
+ continue;
+ }
+ }
+
+ // Insert a load from the global, and use it instead of the malloc.
+ Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ U->replaceUsesOfWith(Alloc, NL);
+ }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on. This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ // We permit two users of the load: setcc comparing against the null
+ // pointer, and a getelementptr of a specific form.
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+
+ // Comparison against null is ok.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ continue;
+ }
+
+ // getelementptr is also ok, but only a simple form.
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Must index into the array and into the struct.
+ if (GEPI->getNumOperands() < 3)
+ return false;
+
+ // Otherwise the GEP is ok.
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (!LoadUsingPHIsPerLoad.insert(PN))
+ // This means some phi nodes are dependent on each other.
+ // Avoid infinite looping!
+ return false;
+ if (!LoadUsingPHIs.insert(PN))
+ // If we have already analyzed this PHI, then it is safe.
+ continue;
+
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+ LoadUsingPHIs, LoadUsingPHIsPerLoad))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise we don't know what this is, not ok.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+ Instruction *StoredVal) {
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI)
+ if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+ LoadUsingPHIsPerLoad))
+ return false;
+ LoadUsingPHIsPerLoad.clear();
+ }
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+ , E = LoadUsingPHIs.end(); I != E; ++I) {
+ const PHINode *PN = *I;
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == StoredVal) continue;
+
+ if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
+ return false;
+ }
+
+ // Load from GV is ok.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f"+Twine(FieldNo), LI);
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+ StructType *ST =
+ cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+ PHINode *NewPN =
+ PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getNumIncomingValues(),
+ PN->getName()+".f"+Twine(FieldNo), PN);
+ Result = NewPN;
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ } else {
+ llvm_unreachable("Unknown usable value");
+ }
+
+ return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ // If this is a comparison against null, handle it.
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+ assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+ // If we have a setcc of the loaded pointer, we can use a setcc of any
+ // field.
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+ Constant::getNullValue(NPtr->getType()),
+ SCI->getName());
+ SCI->replaceAllUsesWith(New);
+ SCI->eraseFromParent();
+ return;
+ }
+
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+ assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+ && "Unexpected GEPI!");
+
+ // Load the pointer for this field.
+ unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ // Create the new GEP idx vector.
+ SmallVector<Value*, 8> GEPIdx;
+ GEPIdx.push_back(GEPI->getOperand(1));
+ GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NGEPI);
+ GEPI->eraseFromParent();
+ return;
+ }
+
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ if (!InsertedScalarizedValues.insert(std::make_pair(PN,
+ std::vector<Value*>())).second)
+ return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
+/// is a value loaded from the global. Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead. All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value *NElems, DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
+ Type *MAT = getMallocAllocatedType(CI, TLI);
+ StructType *STy = cast<StructType>(MAT);
+
+ // There is guaranteed to be at least one use of the malloc (storing
+ // it into GV). If there are other uses, change them to be uses of
+ // the global to simplify later code. This also deletes the store
+ // into GV.
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+ // Okay, at this point, there are no users of the malloc. Insert N
+ // new mallocs at the same place as CI, and N globals.
+ std::vector<Value*> FieldGlobals;
+ std::vector<Value*> FieldMallocs;
+
+ for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+ Type *FieldTy = STy->getElementType(FieldNo);
+ PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+
+ GlobalVariable *NGV =
+ new GlobalVariable(*GV->getParent(),
+ PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy),
+ GV->getName() + ".f" + Twine(FieldNo), GV,
+ GV->getThreadLocalMode());
+ FieldGlobals.push_back(NGV);
+
+ unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ if (StructType *ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems, 0,
+ CI->getName() + ".f" + Twine(FieldNo));
+ FieldMallocs.push_back(NMI);
+ new StoreInst(NMI, NGV, CI);
+ }
+
+ // The tricky aspect of this transformation is handling the case when malloc
+ // fails. In the original code, malloc failing would set the result pointer
+ // of malloc to null. In this case, some mallocs could succeed and others
+ // could fail. As such, we emit code that looks like this:
+ // F0 = malloc(field0)
+ // F1 = malloc(field1)
+ // F2 = malloc(field2)
+ // if (F0 == 0 || F1 == 0 || F2 == 0) {
+ // if (F0) { free(F0); F0 = 0; }
+ // if (F1) { free(F1); F1 = 0; }
+ // if (F2) { free(F2); F2 = 0; }
+ // }
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+ ConstantZero, "isneg");
+ for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ }
+
+ // Split the basic block at the old malloc.
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+
+ // Create the block to check the first condition. Put all these blocks at the
+ // end of the function as they are unlikely to be executed.
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
+ OrigBB->getParent());
+
+ // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+ // branch on RunningOr.
+ OrigBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+ // Within the NullPtrBlock, we need to emit a comparison and branch for each
+ // pointer, because some may be null while others are not.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Constant::getNullValue(GVVal->getType()));
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+ OrigBB->getParent());
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+ OrigBB->getParent());
+ Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+ Cmp, NullPtrBlock);
+
+ // Fill in FreeBlock.
+ CallInst::CreateFree(GVVal, BI);
+ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+ FreeBlock);
+ BranchInst::Create(NextBlock, FreeBlock);
+
+ NullPtrBlock = NextBlock;
+ }
+
+ BranchInst::Create(ContBB, NullPtrBlock);
+
+ // CI is no longer needed, remove it.
+ CI->eraseFromParent();
+
+ /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+ /// update all uses of the load, keep track of what scalarized loads are
+ /// inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+ // Okay, the malloc site is completely handled. All of the uses of GV are now
+ // loads, and all uses of those loads are simple. Rewrite them to use loads
+ // of the per-field globals instead.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+ }
+ }
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
+ // The old global is now dead, remove it.
+ GV->eraseFromParent();
+
+ ++NumHeapSRA;
+ return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+ CallInst *CI,
+ Type *AllocTy,
+ AtomicOrdering Ordering,
+ Module::global_iterator &GVI,
+ DataLayout *TD,
+ TargetLibraryInfo *TLI) {
+ if (!TD)
+ return false;
+
+ // If this is a malloc of an abstract type, don't touch it.
+ if (!AllocTy->isSized())
+ return false;
+
+ // We can't optimize this global unless all uses of it are *known* to be
+ // of the malloc value, not of the null initializer value (consider a use
+ // that compares the global's value against zero to see if the malloc has
+ // been reached). To do this, we check to see if all uses of the global
+ // would trap if the global were null: this proves that they must all
+ // happen after the malloc.
+ if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ return false;
+
+ // We can't optimize this if the malloc itself is used in a complex way,
+ // for example, being stored into multiple globals. This allows the
+ // malloc to be stored into the specified global, loaded icmp'd, and
+ // GEP'd. These are all things we could transform to using the global
+ // for.
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+ return false;
+
+ // If we have a global that is only initialized with a fixed size malloc,
+ // transform the program to use global memory instead of malloc'd memory.
+ // This eliminates dynamic allocation, avoids an indirection accessing the
+ // data, and exposes the resultant global to further GlobalOpt.
+ // We cannot optimize the malloc if we cannot determine malloc array size.
+ Value *NElems = getMallocArraySize(CI, TD, TLI, true);
+ if (!NElems)
+ return false;
+
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
+ return true;
+ }
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+
+ if (Ordering != NotAtomic)
+ return false;
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+ if (!AllocSTy)
+ return false;
+
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
+ Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ 0, CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
+ CI->eraseFromParent();
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
+ CI = cast<CallInst>(BCI->getOperand(0));
+ else
+ CI = cast<CallInst>(Malloc);
+ }
+
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
+ TD, TLI);
+ return true;
+ }
+
+ return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ AtomicOrdering Ordering,
+ Module::global_iterator &GVI,
+ DataLayout *TD, TargetLibraryInfo *TLI) {
+ // Ignore no-op GEPs and bitcasts.
+ StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+ // If we are dealing with a pointer global that is initialized to null and
+ // only has one (non-null) value stored into it, then we can optimize any
+ // users of the loaded value (often calls and loads) that would trap if the
+ // value was null.
+ if (GV->getInitializer()->getType()->isPointerTy() &&
+ GV->getInitializer()->isNullValue()) {
+ if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+ if (GV->getInitializer()->getType() != SOVC->getType())
+ SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+ // Optimize away any trapping uses of the loaded value.
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
+ return true;
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ Type *MallocType = getMallocAllocatedType(CI, TLI);
+ if (MallocType &&
+ TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
+ TD, TLI))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal. See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used. This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+ Type *GVElType = GV->getType()->getElementType();
+
+ // If GVElType is already i1, it is already shrunk. If the type of the GV is
+ // an FP value, pointer or vector, don't do this optimization because a select
+ // between them is very expensive and unlikely to lead to later
+ // simplification. In these cases, we typically end up with "cond ? v1 : v2"
+ // where v1 and v2 both require constant pool loads, a big loss.
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPointTy() ||
+ GVElType->isPointerTy() || GVElType->isVectorTy())
+ return false;
+
+ // Walk the use list of the global seeing if all the uses are load or store.
+ // If there is anything else, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+
+ // Create the new global, initializing it to false.
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".b",
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+ Constant *InitVal = GV->getInitializer();
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+ "No reason to shrink to bool!");
+
+ // If initialized to zero and storing one into the global, we can use a cast
+ // instead of a select to synthesize the desired value.
+ bool IsOneZero = false;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+ while (!GV->use_empty()) {
+ Instruction *UI = cast<Instruction>(GV->use_back());
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Change the store into a boolean store.
+ bool StoringOther = SI->getOperand(0) == OtherVal;
+ // Only do this if we weren't storing a loaded value.
+ Value *StoreVal;
+ if (StoringOther || SI->getOperand(0) == InitVal) {
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
+ } else {
+ // Otherwise, we are storing a previously loaded copy. To do this,
+ // change the copy from copying the original value to just copying the
+ // bool.
+ Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+ // If we've already replaced the input, StoredVal will be a cast or
+ // select instruction. If not, it will be a load of the original
+ // global.
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ assert(LI->getOperand(0) == GV && "Not a copy!");
+ // Insert a new load, to preserve the saved value.
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
+ } else {
+ assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+ "This is not a form that we understand!");
+ StoreVal = StoredVal->getOperand(0);
+ assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+ }
+ }
+ new StoreInst(StoreVal, NewGV, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
+ } else {
+ // Change the load into a load of bool then a select.
+ LoadInst *LI = cast<LoadInst>(UI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
+ Value *NSI;
+ if (IsOneZero)
+ NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+ else
+ NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+ NSI->takeName(LI);
+ LI->replaceAllUsesWith(NSI);
+ }
+ UI->eraseFromParent();
+ }
+
+ // Retain the name of the old global variable. People who are debugging their
+ // programs may expect these variables to be named the same.
+ NewGV->takeName(GV);
+ GV->eraseFromParent();
+ return true;
+}
+
+
+/// ProcessGlobal - Analyze the specified global variable and optimize it if
+/// possible. If we make a change, return true.
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ if (!GV->isDiscardableIfUnused())
+ return false;
+
+ // Do more involved optimizations if the global is internal.
+ GV->removeDeadConstantUsers();
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
+ GV->eraseFromParent();
+ ++NumDeleted;
+ return true;
+ }
+
+ if (!GV->hasLocalLinkage())
+ return false;
+
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+
+ if (AnalyzeGlobal(GV, GS, PHIUsers))
+ return false;
+
+ if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ GV->setUnnamedAddr(true);
+ NumUnnamed++;
+ }
+
+ if (GV->isConstant() || !GV->hasInitializer())
+ return false;
+
+ return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS) {
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ Type *ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+ bool Changed;
+ if (isLeakCheckerRoot(GV)) {
+ // Delete any constant stores to the global.
+ Changed = CleanupPointerRootUsers(GV, TLI);
+ } else {
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+ }
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ Changed = true;
+ }
+ return Changed;
+
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
+ GV->setConstant(true);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
+ }
+
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
+ TD, TLI))
+ return true;
+
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ if (isa<BlockAddress>(*UI))
+ continue;
+ CallSite User(cast<Instruction>(*UI));
+ User.setCallingConv(CallingConv::Fast);
+ }
+}
+
+static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ unsigned Index = Attrs.getSlotIndex(i);
+ if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
+ continue;
+
+ // There can be only one.
+ return Attrs.removeAttribute(C, Index, Attribute::Nest);
+ }
+
+ return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+ F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ if (isa<BlockAddress>(*UI))
+ continue;
+ CallSite User(cast<Instruction>(*UI));
+ User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
+ }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+ bool Changed = false;
+ // Optimize functions.
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+ Function *F = FI++;
+ // Functions without names cannot be referenced outside this module.
+ if (!F->hasName() && !F->isDeclaration())
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->removeDeadConstantUsers();
+ if (F->isDefTriviallyDead()) {
+ F->eraseFromParent();
+ Changed = true;
+ ++NumFnDeleted;
+ } else if (F->hasLocalLinkage()) {
+ if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has C calling conventions, is not a varargs
+ // function, and is only called directly, promote it to use the Fast
+ // calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+ bool Changed = false;
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+ // Global variables without names cannot be referenced outside this module.
+ if (!GV->hasName() && !GV->isDeclaration())
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ // Simplify the initializer.
+ if (GV->hasInitializer())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
+ if (New && New != CE)
+ GV->setInitializer(New);
+ }
+
+ Changed |= ProcessGlobal(GV, GVI);
+ }
+ return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (GV == 0) return 0;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer()) return 0;
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ if (isa<ConstantAggregateZero>(*i))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
+ return 0;
+ }
+
+ return GV;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function*>();
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function*> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function*> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ Constant *CSVals[2];
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+ CSVals[1] = 0;
+
+ StructType *StructTy =
+ cast <StructType>(
+ cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+
+ // Create the new init list.
+ std::vector<Constant*> CAList;
+ for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+ if (Ctors[i]) {
+ CSVals[1] = Ctors[i];
+ } else {
+ Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ false);
+ PointerType *PFTy = PointerType::getUnqual(FTy);
+ CSVals[1] = Constant::getNullValue(PFTy);
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+ 0x7fffffff);
+ }
+ CAList.push_back(ConstantStruct::get(StructTy, CSVals));
+ }
+
+ // Create the array initializer.
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ CAList.size()), CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return GCL;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+ GCL->getLinkage(), CA, "",
+ GCL->getThreadLocalMode());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+
+ if (Ctors.size())
+ return NGV;
+ else
+ return 0;
+}
+
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const DataLayout *TD);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator. We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const DataLayout *TD) {
+ // Simple integer, undef, constant aggregate zero, global addresses, etc are
+ // all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+ isa<GlobalValue>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Constant *Op = cast<Constant>(C->getOperand(i));
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
+ return false;
+ }
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
+ TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const DataLayout *TD) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C)) return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
+}
+
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand. In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
+/// globals. This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return GV->hasUniqueInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasUniqueInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+ }
+ }
+
+ return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+ ConstantExpr *Addr, unsigned OpNo) {
+ // Base case of the recursion.
+ if (OpNo == Addr->getNumOperands()) {
+ assert(Val->getType() == Init->getType() && "Type mismatch!");
+ return Val;
+ }
+
+ SmallVector<Constant*, 32> Elts;
+ if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ // Break up the constant into its elements.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
+
+ // Replace the element that we are supposed to.
+ ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+ unsigned Idx = CU->getZExtValue();
+ assert(Idx < STy->getNumElements() && "Struct index out of range!");
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+ // Return the modified struct.
+ return ConstantStruct::get(STy, Elts);
+ }
+
+ ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+ SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+ uint64_t NumElts;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+ NumElts = ATy->getNumElements();
+ else
+ NumElts = InitTy->getVectorNumElements();
+
+ // Break up the array into elements.
+ for (uint64_t i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
+
+ assert(CI->getZExtValue() < NumElts);
+ Elts[CI->getZExtValue()] =
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+ if (Init->getType()->isArrayTy())
+ return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+ return ConstantVector::get(Elts);
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ assert(GV->hasInitializer());
+ GV->setInitializer(Val);
+ return;
+ }
+
+ ConstantExpr *CE = cast<ConstantExpr>(Addr);
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+namespace {
+
+/// Evaluator - This class evaluates LLVM IR, producing the Constant
+/// representing each SSA instruction. Changes to global variables are stored
+/// in a mapping that can be iterated over after the evaluation is complete.
+/// Once an evaluation call fails, the evaluation object should not be reused.
+class Evaluator {
+public:
+ Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
+ : TD(TD), TLI(TLI) {
+ ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ }
+
+ ~Evaluator() {
+ DeleteContainerPointers(ValueStack);
+ while (!AllocaTmps.empty()) {
+ GlobalVariable *Tmp = AllocaTmps.back();
+ AllocaTmps.pop_back();
+
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ delete Tmp;
+ }
+ }
+
+ /// EvaluateFunction - Evaluate a call to function F, returning true if
+ /// successful, false if we can't evaluate it. ActualArgs contains the formal
+ /// arguments for the function.
+ bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs);
+
+ /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+ /// successful, false if we can't evaluate it. NewBB returns the next BB that
+ /// control flows into, or null upon return.
+ bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
+
+ Constant *getVal(Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ValueStack.back()->lookup(V);
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+ }
+
+ void setVal(Value *V, Constant *C) {
+ ValueStack.back()->operator[](V) = C;
+ }
+
+ const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
+ return MutatedMemory;
+ }
+
+ const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
+ return Invariants;
+ }
+
+private:
+ Constant *ComputeLoadResult(Constant *P);
+
+ /// ValueStack - As we compute SSA register values, we store their contents
+ /// here. The back of the vector contains the current function and the stack
+ /// contains the values in the calling frames.
+ SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+
+ /// CallStack - This is used to detect recursion. In pathological situations
+ /// we could hit exponential behavior, but at least there is nothing
+ /// unbounded.
+ SmallVector<Function*, 4> CallStack;
+
+ /// MutatedMemory - For each store we execute, we update this map. Loads
+ /// check this to get the most up-to-date value. If evaluation is successful,
+ /// this state is committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+ /// to represent its body. This vector is needed so we can delete the
+ /// temporary globals when we are done.
+ SmallVector<GlobalVariable*, 32> AllocaTmps;
+
+ /// Invariants - These global variables have been marked invariant by the
+ /// static constructor.
+ SmallPtrSet<GlobalVariable*, 8> Invariants;
+
+ /// SimpleConstants - These are constants we have checked and know to be
+ /// simple enough to live in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+};
+
+} // anonymous namespace
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed. If we can't
+/// decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+ if (I != MutatedMemory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return 0;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return 0; // don't know how to evaluate.
+}
+
+/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+/// successful, false if we can't evaluate it. NewBB returns the next BB that
+/// control flows into, or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+ BasicBlock *&NextBB) {
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = 0;
+
+ DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (!SI->isSimple()) {
+ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+ Constant *Ptr = getVal(SI->getOperand(1));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+ if (!isSimpleEnoughPointerToCommit(Ptr)) {
+ // If this is too complex for us to commit, reject it.
+ DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+ return false;
+ }
+
+ Constant *Val = getVal(SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
+ DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+ << "\n");
+ return false;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+
+ DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+ }
+ }
+
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(BO->getOperand(0)),
+ getVal(BO->getOperand(1)));
+ DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(CI->getOperand(0)),
+ getVal(CI->getOperand(1)));
+ DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(CI->getOperand(0)),
+ CI->getType());
+ DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+ getVal(SI->getOperand(1)),
+ getVal(SI->getOperand(2)));
+ DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(*i));
+ InstResult =
+ ConstantExpr::getGetElementPtr(P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
+ DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+ << "\n");
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+
+ if (!LI->isSimple()) {
+ DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+
+ Constant *Ptr = getVal(LI->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: " << *Ptr << "\n");
+ }
+ InstResult = ComputeLoadResult(Ptr);
+ if (InstResult == 0) {
+ DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) {
+ DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
+ Type *Ty = AI->getType()->getElementType();
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(Ty),
+ AI->getName()));
+ InstResult = AllocaTmps.back();
+ DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallSite CS(CurInst);
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ DEBUG(dbgs() << "Ignoring debug info.\n");
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) {
+ DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+ "intrinsic.\n");
+ return false;
+ }
+ Constant *Ptr = getVal(MSI->getDest());
+ Constant *Val = getVal(MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty()) {
+ DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
+ return false;
+ }
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
+ if (!Size->isAllOnesValue() &&
+ Size->getValue().getLimitedValue() >=
+ TD->getTypeStoreSize(ElemTy)) {
+ Invariants.insert(GV);
+ DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+ << "\n");
+ } else {
+ DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
+ }
+ // Continue even if we do nothing.
+ ++CurInst;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+ return false;
+ }
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ if (!Callee || Callee->mayBeOverridden()) {
+ DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
+
+ SmallVector<Constant*, 8> Formals;
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+ Formals.push_back(getVal(*i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
+ InstResult = C;
+ DEBUG(dbgs() << "Constant folded function call. Result: " <<
+ *InstResult << "\n");
+ } else {
+ DEBUG(dbgs() << "Can not constant fold function call.\n");
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg()) {
+ DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+ return false;
+ }
+
+ Constant *RetVal = 0;
+ // Execute the call, if successful, use the return value.
+ ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ delete ValueStack.pop_back_val();
+ InstResult = RetVal;
+
+ if (InstResult != NULL) {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
+ InstResult << "\n\n");
+ } else {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ }
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NextBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NextBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = 0;
+ } else {
+ // invoke, unwind, resume, unreachable.
+ DEBUG(dbgs() << "Can not handle terminator.");
+ return false; // Cannot handle this terminator.
+ }
+
+ // We succeeded at evaluating this block!
+ DEBUG(dbgs() << "Successfully evaluated block.\n");
+ return true;
+ } else {
+ // Did not know how to evaluate this!
+ DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
+ return false;
+ }
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
+
+ setVal(CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ return true;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it. ActualArgs contains the formal
+/// arguments for the function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ setVal(AI, ActualArgs[ArgNo]);
+
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = F->begin();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (1) {
+ BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+ DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+ if (!EvaluateBlock(CurInst, NextBB))
+ return false;
+
+ if (NextBB == 0) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands())
+ RetVal = getVal(RI->getOperand(0));
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB))
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = 0;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can. Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // Call the function.
+ Evaluator Eval(TD, TLI);
+ Constant *RetValDummy;
+ bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
+ SmallVector<Constant*, 0>());
+
+ if (EvalSuccess) {
+ // We succeeded at evaluation: commit the result.
+ DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to " << Eval.getMutatedMemory().size()
+ << " stores.\n");
+ for (DenseMap<Constant*, Constant*>::const_iterator I =
+ Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
+ I != E; ++I)
+ CommitValueTo(I->second, I->first);
+ for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
+ Eval.getInvariants().begin(), E = Eval.getInvariants().end();
+ I != E; ++I)
+ (*I)->setConstant(true);
+ }
+
+ return EvalSuccess;
+}
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+ std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+ bool MadeChange = false;
+ if (Ctors.empty()) return false;
+
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (F == 0) {
+ if (i != Ctors.size()-1) {
+ Ctors.resize(i+1);
+ MadeChange = true;
+ }
+ break;
+ }
+ DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+ // We cannot simplify external ctor functions.
+ if (F->empty()) continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (EvaluateStaticConstructor(F, TD, TLI)) {
+ Ctors.erase(Ctors.begin()+i);
+ MadeChange = true;
+ --i;
+ ++NumCtorsEvaluated;
+ continue;
+ }
+ }
+
+ if (!MadeChange) return false;
+
+ GCL = InstallGlobalCtors(GCL, Ctors);
+ return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+ bool Changed = false;
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator J = I++;
+ // Aliases without names cannot be referenced outside this module.
+ if (!J->hasName() && !J->isDeclaration())
+ J->setLinkage(GlobalValue::InternalLinkage);
+ // If the aliasee may change at link time, nothing can be done - bail out.
+ if (J->mayBeOverridden())
+ continue;
+
+ Constant *Aliasee = J->getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ Target->removeDeadConstantUsers();
+ bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+ // Make all users of the alias use the aliasee instead.
+ if (!J->use_empty()) {
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ }
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!J->hasLocalLinkage()) {
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ if (!Target->hasLocalLinkage())
+ continue;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (!hasOneUse)
+ continue;
+
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(J);
+ Target->setLinkage(J->getLinkage());
+ Target->GlobalValue::copyAttributesFrom(J);
+ }
+
+ // Delete the alias.
+ M.getAliasList().erase(J);
+ ++NumAliasesRemoved;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::cxa_atexit))
+ return 0;
+
+ Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
+
+ if (!Fn)
+ return 0;
+
+ FunctionType *FTy = Fn->getFunctionType();
+
+ // Checking that the function has the right return type, the right number of
+ // parameters and that they all have pointer types should be enough.
+ if (!FTy->getReturnType()->isIntegerTy() ||
+ FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return 0;
+
+ return Fn;
+}
+
+/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
+/// destructor and can therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
+/// other side-effect free instructions.
+static bool cxxDtorIsEmpty(const Function &Fn,
+ SmallPtrSet<const Function *, 8> &CalledFunctions) {
+ // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+ // nounwind, but that doesn't seem worth doing.
+ if (Fn.isDeclaration())
+ return false;
+
+ if (++Fn.begin() != Fn.end())
+ return false;
+
+ const BasicBlock &EntryBlock = Fn.getEntryBlock();
+ for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+ I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore debug intrinsics.
+ if (isa<DbgInfoIntrinsic>(CI))
+ continue;
+
+ const Function *CalledFn = CI->getCalledFunction();
+
+ if (!CalledFn)
+ return false;
+
+ SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+ // Don't treat recursive functions as empty.
+ if (!NewCalledFunctions.insert(CalledFn))
+ return false;
+
+ if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+ return false;
+ } else if (isa<ReturnInst>(*I))
+ return true; // We're done.
+ else if (I->mayHaveSideEffects())
+ return false; // Destructor with side effects, bail.
+ }
+
+ return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+ /// Itanium C++ ABI p3.3.5:
+ ///
+ /// After constructing a global (or local static) object, that will require
+ /// destruction on exit, a termination function is registered as follows:
+ ///
+ /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+ ///
+ /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+ /// call f(p) when DSO d is unloaded, before all such termination calls
+ /// registered before this one. It returns zero if registration is
+ /// successful, nonzero on failure.
+
+ // This pass will look for calls to __cxa_atexit where the function is trivial
+ // and remove them.
+ bool Changed = false;
+
+ for (Function::use_iterator I = CXAAtExitFn->use_begin(),
+ E = CXAAtExitFn->use_end(); I != E;) {
+ // We're only interested in calls. Theoretically, we could handle invoke
+ // instructions as well, but neither llvm-gcc nor clang generate invokes
+ // to __cxa_atexit.
+ CallInst *CI = dyn_cast<CallInst>(*I++);
+ if (!CI)
+ continue;
+
+ Function *DtorFn =
+ dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+ if (!DtorFn)
+ continue;
+
+ SmallPtrSet<const Function *, 8> CalledFunctions;
+ if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ continue;
+
+ // Just remove the call.
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ CI->eraseFromParent();
+
+ ++NumCXXDtorsRemoved;
+
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+ bool Changed = false;
+
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // Try to find the llvm.globalctors list.
+ GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
+
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Delete functions that are trivially dead, ccc -> fastcc
+ LocalChange |= OptimizeFunctions(M);
+
+ // Optimize global_ctors list.
+ if (GlobalCtors)
+ LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+ // Optimize non-address-taken globals.
+ LocalChange |= OptimizeGlobalVars(M);
+
+ // Resolve aliases, when possible.
+ LocalChange |= OptimizeGlobalAliases(M);
+
+ // Try to remove trivial global destructors.
+ if (CXAAtExitFn)
+ LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
+ Changed |= LocalChange;
+ }
+
+ // TODO: Move all global ctors functions to the end of the module for code
+ // layout.
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 000000000000..4ac1dfc09682
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass. It could certainly be improved in many different ways,
+// like using a worklist. This pass makes arguments dead, but does not remove
+// them. The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+ /// IPCP - The interprocedural constant propagation pass
+ ///
+ struct IPCP : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+ private:
+ bool PropagateConstantsIntoArguments(Function &F);
+ bool PropagateConstantReturn(Function &F);
+ };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Delete any klingons.
+ I->removeDeadConstantUsers();
+ if (I->hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(*I);
+ Changed |= PropagateConstantReturn(*I);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function. If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+ if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ ArgumentConstants.resize(F.arg_size());
+
+ unsigned NumNonconstant = 0;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ // Ignore blockaddress uses.
+ if (isa<BlockAddress>(U)) continue;
+
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return false;
+
+ CallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return false;
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == 0) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
+ }
+ }
+ }
+
+ // If we got to this point, there is a constant argument!
+ assert(NumNonconstant != ArgumentConstants.size());
+ bool MadeChange = false;
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty() ||
+ (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (V == 0) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+ if (F.getReturnType()->isVoidTy())
+ return false; // No return value.
+
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.mayBeOverridden())
+ return false;
+
+ // Check to see if this function returns a constant.
+ SmallVector<Value *,4> RetVals;
+ StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ if (STy)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+ else
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+ unsigned NumNonConstant = 0;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy)
+ V = RI->getOperand(0);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
+ }
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
+ }
+ }
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = 0;
+ // All values non constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
+ }
+ }
+
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ CallSite CS(*UI);
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || !CS.isCallee(UI))
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (STy == 0) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+ I != E;) {
+ Instruction *Ins = cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ if (MadeChange) ++NumReturnValProped;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 000000000000..5d563d8bbf51
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,110 @@
+//===-- IPO.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+ std::vector<const char *> Export;
+ if (AllButMain)
+ Export.push_back("main");
+ unwrap(PM)->add(createInternalizePass(Export));
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 000000000000..a0095dad1af7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,112 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Inliner pass which only handles "always inline" functions.
+class AlwaysInliner : public Inliner {
+ InlineCostAnalysis *ICA;
+
+public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ AlwaysInliner(bool InsertLifetime)
+ : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ virtual InlineCost getInlineCost(CallSite CS);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+
+ using llvm::Pass::doFinalization;
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
+ }
+};
+
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
+ return new AlwaysInliner(InsertLifetime);
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Only inline direct calls to functions with always-inline attributes
+ // that are viable for inlining. FIXME: We shouldn't even get here for
+ // declarations.
+ if (Callee && !Callee->isDeclaration() &&
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline) &&
+ ICA->isInlineViable(*Callee))
+ return InlineCost::getAlways();
+
+ return InlineCost::getNever();
+}
+
+bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
+ ICA = &getAnalysis<InlineCostAnalysis>();
+ return Inliner::runOnSCC(SCC);
+}
+
+void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<InlineCostAnalysis>();
+ Inliner::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 000000000000..a4f702604188
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,83 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Actaul inliner pass implementation.
+///
+/// The common implementation of the inlining logic is shared between this
+/// inliner pass and the always inliner pass. The two passes use different cost
+/// analyses to determine when to inline.
+class SimpleInliner : public Inliner {
+ InlineCostAnalysis *ICA;
+
+public:
+ SimpleInliner() : Inliner(ID), ICA(0) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ SimpleInliner(int Threshold)
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ InlineCost getInlineCost(CallSite CS) {
+ return ICA->getInlineCost(CS, getInlineThreshold(CS));
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+} // end anonymous namespace
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+ return new SimpleInliner(Threshold);
+}
+
+bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
+ ICA = &getAnalysis<InlineCostAnalysis>();
+ return Inliner::runOnSCC(SCC);
+}
+
+void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<InlineCostAnalysis>();
+ Inliner::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 000000000000..663ddb75f423
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,613 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph. The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+// This weirdly named statistic tracks the number of times that, when attempting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
+
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+ InlineLimit : Threshold),
+ InsertLifetime(InsertLifetime) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ CallGraphSCCPass::getAnalysisUsage(AU);
+}
+
+
+typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
+ // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+ // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+ // clutter to the IR.
+ AttrBuilder B;
+ B.addAttribute(Attribute::StackProtect)
+ .addAttribute(Attribute::StackProtectStrong);
+ AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
+ AttributeSet::FunctionIndex,
+ B);
+ AttributeSet CallerAttr = Caller->getAttributes(),
+ CalleeAttr = Callee->getAttributes();
+
+ if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectStrong);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Caller->addFnAttr(Attribute::StackProtect);
+}
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR. The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller. If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory, bool InsertLifetime) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ if (!InlineFunction(CS, IFI, InsertLifetime))
+ return false;
+
+ AdjustCallerSSPLevel(Caller, Callee);
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+ for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ ++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = 0;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+ int thres = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
+
+ // If -inline-threshold is not given, listen to the optsize attribute when it
+ // would decrease the threshold.
+ Function *Caller = CS.getCaller();
+ bool OptSize = Caller && !Caller->isDeclaration() &&
+ Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
+ OptSizeThreshold < thres)
+ thres = OptSizeThreshold;
+
+ // Listen to the inlinehint attribute when it would increase the threshold
+ // and the caller does not need to minimize its size.
+ Function *Callee = CS.getCalledFunction();
+ bool InlineHint = Callee && !Callee->isDeclaration() &&
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InlineHint);
+ if (InlineHint && HintThreshold > thres
+ && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize))
+ thres = HintThreshold;
+
+ return thres;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+ InlineCost IC = getInlineCost(CS);
+
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return true;
+ }
+
+ if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ Function *Caller = CS.getCaller();
+ if (!IC) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ if (Caller->hasLocalLinkage() ||
+ Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
+ int TotalSecondaryCost = 0;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
+ I != E; ++I) {
+ CallSite CS2(*I);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+
+ InlineCost IC2 = getInlineCost(CS2);
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+ if (IC2.isAlways())
+ continue;
+
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += IC2.getCost();
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << IC.getCost() <<
+ ", outer Cost = " << TotalSecondaryCost << '\n');
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", Call: " << *CS.getInstruction() << '\n');
+ return true;
+}
+
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
+ SmallPtrSet<Function*, 8> SCCFunctions;
+ DEBUG(dbgs() << "Inliner visiting SCC:");
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F) SCCFunctions.insert(F);
+ DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+ }
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ SmallVector<std::pair<CallSite, int>, 16> CallSites;
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F) continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (!CS || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ CallSites.push_back(std::make_pair(CS, -1));
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // If there are no calls in this function, exit early.
+ if (CallSites.empty())
+ return false;
+
+ // Now that we have all of the call sites, move the ones to functions in the
+ // current SCC to the end of the list.
+ unsigned FirstCallInSCC = CallSites.size();
+ for (unsigned i = 0; i < FirstCallInSCC; ++i)
+ if (Function *F = CallSites[i].first.getCalledFunction())
+ if (SCCFunctions.count(F))
+ std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ InlineFunctionInfo InlineInfo(&CG, TD);
+
+ // Now that we have all of the call sites, loop over them and inline them if
+ // it looks profitable to do so.
+ bool Changed = false;
+ bool LocalChange;
+ do {
+ LocalChange = false;
+ // Iterate over the outer loop because inlining functions can cause indirect
+ // calls to become direct calls.
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi].first;
+
+ Function *Caller = CS.getCaller();
+ Function *Callee = CS.getCalledFunction();
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
+ DEBUG(dbgs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // If this call site was obtained by inlining another function, verify
+ // that the include path for the function did not include the callee
+ // itself. If so, we'd be recursively inlining the same function,
+ // which would provide the same callsites, which would cause us to
+ // infinitely inline.
+ int InlineHistoryID = CallSites[CSi].second;
+ if (InlineHistoryID != -1 &&
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ continue;
+
+
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS))
+ continue;
+
+ // Attempt to inline the function.
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID, InsertLifetime))
+ continue;
+ ++NumInlined;
+
+ // If inlining this function gave us any new call sites, throw them
+ // onto our worklist to process. They are useful inline candidates.
+ if (!InlineInfo.InlinedCalls.empty()) {
+ // Create a new inline history entry for this, so that we remember
+ // that these new callsites came about due to inlining Callee.
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+ for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+ i != e; ++i) {
+ Value *Ptr = InlineInfo.InlinedCalls[i];
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ }
+ }
+ }
+
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // TODO: Can remove if in SCC now.
+ !SCCFunctions.count(Callee) &&
+
+ // The function may be apparently dead, but if there are indirect
+ // callgraph references to the node, we cannot delete it yet, this
+ // could invalidate the CGSCC iterator.
+ CG[Callee]->getNumReferences() == 0) {
+ DEBUG(dbgs() << " -> Deleting dead function: "
+ << Callee->getName() << "\n");
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+
+ // Remove this call site from the list. If possible, use
+ // swap/pop_back for efficiency, but do not use it if doing so would
+ // move a call site to a function in this SCC before the
+ // 'FirstCallInSCC' barrier.
+ if (SCC.isSingular()) {
+ CallSites[CSi] = CallSites.back();
+ CallSites.pop_back();
+ } else {
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+
+ Changed = true;
+ LocalChange = true;
+ }
+ } while (LocalChange);
+
+ return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG);
+}
+
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+ SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+
+ // Scan for all of the functions, looking for ones that should now be removed
+ // from the program. Insert the dead ones in the FunctionsToRemove set.
+ for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+ CallGraphNode *CGN = I->second;
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration())
+ continue;
+
+ // Handle the case when this function is called and we only want to care
+ // about always-inline functions. This is a bit of a hack to share code
+ // between here and the InlineAlways pass.
+ if (AlwaysInlineOnly &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline))
+ continue;
+
+ // If the only remaining users of the function are dead constants, remove
+ // them.
+ F->removeDeadConstantUsers();
+
+ if (!F->isDefTriviallyDead())
+ continue;
+
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.push_back(CGN);
+ }
+ if (FunctionsToRemove.empty())
+ return false;
+
+ // Now that we know which functions to delete, do so. We didn't want to do
+ // this inline, because that would invalidate our CallGraph::iterator
+ // objects. :(
+ //
+ // Note that it doesn't matter that we are iterating over a non-stable order
+ // here to do this, it doesn't matter which order the functions are deleted
+ // in.
+ array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+ FunctionsToRemove.end()),
+ FunctionsToRemove.end());
+ for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end();
+ I != E; ++I) {
+ delete CG.removeFunctionFromModule(*I);
+ ++NumDeleted;
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 000000000000..4bfab5b0afbd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,188 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions and variables in the input module.
+// If the function or variable is not in the list of external names given to
+// the pass it is marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+
+namespace {
+ class InternalizePass : public ModulePass {
+ std::set<std::string> ExternalNames;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit InternalizePass();
+ explicit InternalizePass(ArrayRef<const char *> exportList);
+ void LoadFile(const char *Filename);
+ void ClearExportList();
+ void AddToExportList(const std::string &val);
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraph>();
+ }
+ };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass()
+ : ModulePass(ID) {
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ if (!APIFile.empty()) // If a filename is specified, use it.
+ LoadFile(APIFile.c_str());
+ if (!APIList.empty()) // If a list is specified, use it as well.
+ ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
+ : ModulePass(ID){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
+ itr != exportList.end(); itr++) {
+ ExternalNames.insert(*itr);
+ }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
+}
+
+void InternalizePass::ClearExportList() {
+ ExternalNames.clear();
+}
+
+void InternalizePass::AddToExportList(const std::string &val) {
+ ExternalNames.insert(val);
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+ CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+ bool Changed = false;
+
+ // Never internalize functions which code-gen might insert.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
+
+ // Mark all functions not in the api as internal.
+ // FIXME: maybe use private linkage?
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && // Function must be defined here
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !I->hasLocalLinkage() && // Can't already have internal linkage
+ !ExternalNames.count(I->getName())) {// Not marked to keep external?
+ I->setLinkage(GlobalValue::InternalLinkage);
+ // Remove a callgraph edge from the external node to this function.
+ if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
+
+ // Never internalize the llvm.used symbol. It is used to implement
+ // attribute((used)).
+ // FIXME: Shouldn't this just filter on llvm.metadata section??
+ ExternalNames.insert("llvm.used");
+ ExternalNames.insert("llvm.compiler.used");
+
+ // Never internalize anchors used by the machine module info, else the info
+ // won't find them. (see MachineModuleInfo.)
+ ExternalNames.insert("llvm.global_ctors");
+ ExternalNames.insert("llvm.global_dtors");
+ ExternalNames.insert("llvm.global.annotations");
+
+ // Never internalize symbols code-gen inserts.
+ ExternalNames.insert("__stack_chk_guard");
+
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
+ // FIXME: maybe use private linkage?
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
+
+ // Mark all aliases that are not in the api as internal as well.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createInternalizePass() {
+ return new InternalizePass();
+}
+
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
+ return new InternalizePass(el);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 000000000000..8282a8e6fabc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,304 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+ struct LoopExtractor : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned NumLoops;
+
+ explicit LoopExtractor(unsigned numLoops = ~0)
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ }
+ };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+
+namespace {
+ /// SingleLoopExtractor - For bugpoint.
+ struct SingleLoopExtractor : public LoopExtractor {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractor(1) {}
+ };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // Only visit top-level loops.
+ if (L->getParentLoop())
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
+
+ // If there is more than one top-level loop in this function, extract all of
+ // the loops. Otherwise there is exactly one top-level loop; in this case if
+ // this function is more than a minimal wrapper around the loop, extract
+ // the loop.
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ TerminatorInst *EntryTI =
+ L->getHeader()->getParent()->getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != L->getHeader()) {
+ ShouldExtractLoop = true;
+ } else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop) {
+ // We must omit landing pads. Landing pads must accompany the invoke
+ // instruction. But this would result in a loop in the extracted
+ // function. An infinite cycle occurs when it tries to extract that loop as
+ // well.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i]->isLandingPad()) {
+ ShouldExtractLoop = false;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ CodeExtractor Extractor(DT, *L);
+ if (Extractor.extractCodeRegion() != 0) {
+ Changed = true;
+ // After extraction, the loop is replaced by a function call, so
+ // we shouldn't try to run any more loop passes on it.
+ LPM.deleteLoopFromQueue(L);
+ }
+ ++NumExtracted;
+ }
+
+ return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can. This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+ return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+
+namespace {
+ /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+ /// from the module into their own functions except for those specified by the
+ /// BlocksToNotExtract list.
+ class BlockExtractorPass : public ModulePass {
+ void LoadFile(const char *Filename);
+ void SplitLandingPadPreds(Function *F);
+
+ std::vector<BasicBlock*> BlocksToNotExtract;
+ std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ BlockExtractorPass() : ModulePass(ID) {
+ if (!BlockFile.empty())
+ LoadFile(BlockFile.c_str());
+ }
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass() {
+ return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+ // Load the BlockFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+ << "'!\n";
+ return;
+ }
+ while (In) {
+ std::string FunctionName, BlockName;
+ In >> FunctionName;
+ In >> BlockName;
+ if (!BlockName.empty())
+ BlocksToNotExtractByName.push_back(
+ std::make_pair(FunctionName, BlockName));
+ }
+}
+
+/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke
+/// instruction. The critical edge breaker will refuse to break critical edges
+/// to a landing pad. So do them here. After this method runs, all landing pads
+/// should have only one predecessor.
+void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ InvokeInst *II = dyn_cast<InvokeInst>(I);
+ if (!II) continue;
+ BasicBlock *Parent = II->getParent();
+ BasicBlock *LPad = II->getUnwindDest();
+
+ // Look through the landing pad's predecessors. If one of them ends in an
+ // 'invoke', then we want to split the landing pad.
+ bool Split = false;
+ for (pred_iterator
+ PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (BB->isLandingPad() && BB != Parent &&
+ isa<InvokeInst>(Parent->getTerminator())) {
+ Split = true;
+ break;
+ }
+ }
+
+ if (!Split) continue;
+
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+ }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+ std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+ for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+ BasicBlock *BB = BlocksToNotExtract[i];
+ Function *F = BB->getParent();
+
+ // Map the corresponding function in this module.
+ Function *MF = M.getFunction(F->getName());
+ assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+ // Figure out which index the basic block is in its function.
+ Function::iterator BBI = MF->begin();
+ std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+ TranslatedBlocksToNotExtract.insert(BBI);
+ }
+
+ while (!BlocksToNotExtractByName.empty()) {
+ // There's no way to find BBs by name without looking at every BB inside
+ // every Function. Fortunately, this is always empty except when used by
+ // bugpoint in which case correctness is more important than performance.
+
+ std::string &FuncName = BlocksToNotExtractByName.back().first;
+ std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ Function &F = *FI;
+ if (F.getName() != FuncName) continue;
+
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock &BB = *BI;
+ if (BB.getName() != BlockName) continue;
+
+ TranslatedBlocksToNotExtract.insert(BI);
+ }
+ }
+
+ BlocksToNotExtractByName.pop_back();
+ }
+
+ // Now that we know which blocks to not extract, figure out which ones we WANT
+ // to extract.
+ std::vector<BasicBlock*> BlocksToExtract;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ SplitLandingPadPreds(&*F);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!TranslatedBlocksToNotExtract.count(BB))
+ BlocksToExtract.push_back(BB);
+ }
+
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+ SmallVector<BasicBlock*, 2> BlocksToExtractVec;
+ BlocksToExtractVec.push_back(BlocksToExtract[i]);
+ if (const InvokeInst *II =
+ dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ }
+
+ return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 000000000000..892100f0585a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,870 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+ FunctionType *FTy = F->getFunctionType();
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
+}
+
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// DataLayout so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+ static const ComparableFunction EmptyKey;
+ static const ComparableFunction TombstoneKey;
+ static DataLayout * const LookupOnly;
+
+ ComparableFunction(Function *Func, DataLayout *TD)
+ : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+ Function *getFunc() const { return Func; }
+ unsigned getHash() const { return Hash; }
+ DataLayout *getTD() const { return TD; }
+
+ // Drops AssertingVH reference to the function. Outside of debug mode, this
+ // does nothing.
+ void release() {
+ assert(Func &&
+ "Attempted to release function twice, or release empty/tombstone!");
+ Func = NULL;
+ }
+
+private:
+ explicit ComparableFunction(unsigned Hash)
+ : Func(NULL), Hash(Hash), TD(NULL) {}
+
+ AssertingVH<Function> Func;
+ unsigned Hash;
+ DataLayout *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+ ComparableFunction(1);
+DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1);
+
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<ComparableFunction> {
+ static ComparableFunction getEmptyKey() {
+ return ComparableFunction::EmptyKey;
+ }
+ static ComparableFunction getTombstoneKey() {
+ return ComparableFunction::TombstoneKey;
+ }
+ static unsigned getHashValue(const ComparableFunction &CF) {
+ return CF.getHash();
+ }
+ static bool isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS);
+ };
+}
+
+namespace {
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. DataLayout is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const DataLayout *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD) {}
+
+ /// Test whether the two functions have equivalent behaviour.
+ bool compare();
+
+private:
+ /// Test whether two basic blocks have equivalent behaviour.
+ bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Assign or look up previously assigned numbers for the two values, and
+ /// return whether the numbers are equal. Numbers are assigned in the order
+ /// visited.
+ bool enumerate(const Value *V1, const Value *V2);
+
+ /// Compare two Instructions for equivalence, similar to
+ /// Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
+
+ /// Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(Type *Ty1, Type *Ty2) const;
+
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const DataLayout *TD;
+
+ DenseMap<const Value *, const Value *> id_map;
+ DenseSet<const Value *> seen_values;
+};
+
+}
+
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(Type *Ty1,
+ Type *Ty2) const {
+ if (Ty1 == Ty2)
+ return true;
+ if (Ty1->getTypeID() != Ty2->getTypeID()) {
+ if (TD) {
+ LLVMContext &Ctx = Ty1->getContext();
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ }
+ return false;
+ }
+
+ switch (Ty1->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release mode.
+ case Type::IntegerTyID:
+ case Type::VectorTyID:
+ // Ty1 == Ty2 would have returned true earlier.
+ return false;
+
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ return true;
+
+ case Type::PointerTyID: {
+ PointerType *PTy1 = cast<PointerType>(Ty1);
+ PointerType *PTy2 = cast<PointerType>(Ty2);
+ return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+ }
+
+ case Type::StructTyID: {
+ StructType *STy1 = cast<StructType>(Ty1);
+ StructType *STy2 = cast<StructType>(Ty2);
+ if (STy1->getNumElements() != STy2->getNumElements())
+ return false;
+
+ if (STy1->isPacked() != STy2->isPacked())
+ return false;
+
+ for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+ if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::FunctionTyID: {
+ FunctionType *FTy1 = cast<FunctionType>(Ty1);
+ FunctionType *FTy2 = cast<FunctionType>(Ty2);
+ if (FTy1->getNumParams() != FTy2->getNumParams() ||
+ FTy1->isVarArg() != FTy2->isVarArg())
+ return false;
+
+ if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+ return false;
+
+ for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+ if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::ArrayTyID: {
+ ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy1->getNumElements() == ATy2->getNumElements() &&
+ isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ }
+ }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to isEquivalentType.
+ // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+ // * because of the above, we don't test for the tail bit on calls later on
+ if (I1->getOpcode() != I2->getOpcode() ||
+ I1->getNumOperands() != I2->getNumOperands() ||
+ !isEquivalentType(I1->getType(), I2->getType()) ||
+ !I1->hasSameSubclassOptionalData(I2))
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+ if (!isEquivalentType(I1->getOperand(i)->getType(),
+ I2->getOperand(i)->getType()))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+ return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+ return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+ return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(I1))
+ return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+ return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
+ return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
+ return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
+ return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+
+ return true;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
+ APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+ if (TD &&
+ GEP1->accumulateConstantOffset(*TD, Offset1) &&
+ GEP2->accumulateConstantOffset(*TD, Offset2)) {
+ return Offset1 == Offset2;
+ }
+
+ if (GEP1->getPointerOperand()->getType() !=
+ GEP2->getPointerOperand()->getType())
+ return false;
+
+ if (GEP1->getNumOperands() != GEP2->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+ if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+ return false;
+ }
+
+ return true;
+}
+
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
+
+ if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+ if (V1 == V2) return true;
+ const Constant *C2 = dyn_cast<Constant>(V2);
+ if (!C2) return false;
+ // TODO: constant expressions with GEP or references to F1 or F2.
+ if (C1->isNullValue() && C2->isNullValue() &&
+ isEquivalentType(C1->getType(), C2->getType()))
+ return true;
+ // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+ // then they must have equal bit patterns.
+ return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+ C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
+ }
+
+ if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+ return V1 == V2;
+
+ // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+ // check whether it's equal to V2. When there is no mapping then we need to
+ // ensure that V2 isn't already equivalent to something else. For this
+ // purpose, we track the V2 values in a set.
+
+ const Value *&map_elem = id_map[V1];
+ if (map_elem)
+ return map_elem == V2;
+ if (!seen_values.insert(V2).second)
+ return false;
+ map_elem = V2;
+ return true;
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+
+ do {
+ if (!enumerate(F1I, F2I))
+ return false;
+
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
+
+ if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ return false;
+
+ if (!isEquivalentGEP(GEP1, GEP2))
+ return false;
+ } else {
+ if (!isEquivalentOperation(F1I, F2I))
+ return false;
+
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
+
+ if (!enumerate(OpF1, OpF2))
+ return false;
+
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
+ return false;
+ }
+ }
+
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
+
+ return F1I == F1E && F2I == F2E;
+}
+
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
+ // We need to recheck everything, but check the things that weren't included
+ // in the hash first.
+
+ if (F1->getAttributes() != F2->getAttributes())
+ return false;
+
+ if (F1->hasGC() != F2->hasGC())
+ return false;
+
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
+ return false;
+
+ if (F1->hasSection() != F2->hasSection())
+ return false;
+
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
+ return false;
+
+ if (F1->isVarArg() != F2->isVarArg())
+ return false;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (F1->getCallingConv() != F2->getCallingConv())
+ return false;
+
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
+ return false;
+
+ assert(F1->arg_size() == F2->arg_size() &&
+ "Identically typed functions have different numbers of args!");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat!");
+ }
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
+
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
+
+ if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
+ return false;
+
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
+
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions()
+ : ModulePass(ID), HasGlobalAliases(false) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+private:
+ typedef DenseSet<ComparableFunction> FnSetType;
+
+ /// A work queue of functions that may have been modified and should be
+ /// analyzed again.
+ std::vector<WeakVH> Deferred;
+
+ /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// equal to one that's already present.
+ bool insert(ComparableFunction &NewF);
+
+ /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// analysis.
+ void remove(Function *F);
+
+ /// Find the functions that use this Value and remove them from FnSet and
+ /// queue the functions.
+ void removeUsers(Value *V);
+
+ /// Replace all direct calls of Old with calls of New. Will bitcast New if
+ /// necessary to make types match.
+ void replaceDirectCallers(Function *Old, Function *New);
+
+ /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+ /// be converted into a thunk. In either case, it should never be visited
+ /// again.
+ void mergeTwoFunctions(Function *F, Function *G);
+
+ /// Replace G with a thunk or an alias to F. Deletes G.
+ void writeThunkOrAlias(Function *F, Function *G);
+
+ /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+ /// of G with bitcast(F). Deletes G.
+ void writeThunk(Function *F, Function *G);
+
+ /// Replace G with an alias to F. Deletes G.
+ void writeAlias(Function *F, Function *G);
+
+ /// The set of all distinct functions. Use the insert() and remove() methods
+ /// to modify it.
+ FnSetType FnSet;
+
+ /// DataLayout for more accurate GEP comparisons. May be NULL.
+ DataLayout *TD;
+
+ /// Whether or not the target supports global aliases.
+ bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+ TD = getAnalysisIfAvailable<DataLayout>();
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ Deferred.push_back(WeakVH(I));
+ }
+ FnSet.resize(Deferred.size());
+
+ do {
+ std::vector<WeakVH> Worklist;
+ Deferred.swap(Worklist);
+
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+ DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+ DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ } while (!Deferred.empty());
+
+ FnSet.clear();
+
+ return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS) {
+ if (LHS.getFunc() == RHS.getFunc() &&
+ LHS.getHash() == RHS.getHash())
+ return true;
+ if (!LHS.getFunc() || !RHS.getFunc())
+ return false;
+
+ // One of these is a special "underlying pointer comparison only" object.
+ if (LHS.getTD() == ComparableFunction::LookupOnly ||
+ RHS.getTD() == ComparableFunction::LookupOnly)
+ return false;
+
+ assert(LHS.getTD() == RHS.getTD() &&
+ "Comparing functions for different targets");
+
+ return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+ RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+ Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter)) {
+ remove(CS.getInstruction()->getParent()->getParent());
+ TheIter.getUse().set(BitcastNew);
+ }
+ }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+ G->hasWeakLinkage()) {
+ writeAlias(F, G);
+ return;
+ }
+ }
+
+ writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+ if (!G->mayBeOverridden()) {
+ // Redirect direct callers of G to F.
+ replaceDirectCallers(G, F);
+ }
+
+ // If G was internal then we may have replaced all uses of G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
+ Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
+
+ SmallVector<Value *, 16> Args;
+ unsigned i = 0;
+ FunctionType *FFTy = F->getFunctionType();
+ for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+ AI != AE; ++AI) {
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ ++i;
+ }
+
+ CallInst *CI = Builder.CreateCall(F, Args);
+ CI->setTailCall();
+ CI->setCallingConv(F->getCallingConv());
+ if (NewG->getReturnType()->isVoidTy()) {
+ Builder.CreateRetVoid();
+ } else {
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ }
+
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ BitcastF, G->getParent());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
+
+ if (HasGlobalAliases) {
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ removeUsers(F);
+ F->replaceAllUsesWith(H);
+
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+ writeAlias(F, G);
+ writeAlias(F, H);
+
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ } else {
+ // We can't merge them. Instead, pick one and update all direct callers
+ // to call it and hope that we improve the instruction cache hit rate.
+ replaceDirectCallers(G, F);
+ }
+
+ ++NumDoubleWeak;
+ } else {
+ writeThunkOrAlias(F, G);
+ }
+
+ ++NumFunctionsMerged;
+}
+
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (Result.second) {
+ DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ return false;
+ }
+
+ const ComparableFunction &OldF = *Result.first;
+
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() ||
+ NewF.getFunc()->mayBeOverridden());
+
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
+ << NewF.getFunc()->getName() << '\n');
+
+ Function *DeleteF = NewF.getFunc();
+ NewF.release();
+ mergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
+
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+ // We need to make sure we remove F, not a function "equal" to F per the
+ // function equality comparator.
+ //
+ // The special "lookup only" ComparableFunction bypasses the expensive
+ // function comparison in favour of a pointer comparison on the underlying
+ // Function*'s.
+ ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+ if (FnSet.erase(CF)) {
+ DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ Deferred.push_back(F);
+ }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+ std::vector<Value *> Worklist;
+ Worklist.push_back(V);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ Use &U = UI.getUse();
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ remove(I->getParent()->getParent());
+ } else if (isa<GlobalValue>(U.getUser())) {
+ // do nothing
+ } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+ CUI != CUE; ++CUI)
+ Worklist.push_back(*CUI);
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 000000000000..fa518cb0abb6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,183 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+using namespace llvm;
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+ struct PartialInliner : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ static char ID; // Pass identification, replacement for typeid
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module& M);
+
+ private:
+ Function* unswitchFunction(Function* F);
+ };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+ // First, verify that this function is an unswitching candidate...
+ BasicBlock* entryBlock = F->begin();
+ BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+ if (!BR || BR->isUnconditional())
+ return 0;
+
+ BasicBlock* returnBlock = 0;
+ BasicBlock* nonReturnBlock = 0;
+ unsigned returnCount = 0;
+ for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+ SI != SE; ++SI)
+ if (isa<ReturnInst>((*SI)->getTerminator())) {
+ returnBlock = *SI;
+ returnCount++;
+ } else
+ nonReturnBlock = *SI;
+
+ if (returnCount != 1)
+ return 0;
+
+ // Clone the function, so that we can hack away on it.
+ ValueToValueMapTy VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
+ duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(duplicateFunction);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+
+ // Go ahead and update all uses to the duplicate, so that we can just
+ // use the inliner functionality when we're done hacking.
+ F->replaceAllUsesWith(duplicateFunction);
+
+ // Special hackery is needed with PHI nodes that have inputs from more than
+ // one extracted block. For simplicity, just split the PHIs into a two-level
+ // sequence of PHIs, some of which will go in the extracted region, and some
+ // of which will go outside.
+ BasicBlock* preReturn = newReturnBlock;
+ newReturnBlock = newReturnBlock->splitBasicBlock(
+ newReturnBlock->getFirstNonPHI());
+ BasicBlock::iterator I = preReturn->begin();
+ BasicBlock::iterator Ins = newReturnBlock->begin();
+ while (I != preReturn->end()) {
+ PHINode* OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi) break;
+
+ PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+ OldPhi->replaceAllUsesWith(retPhi);
+ Ins = newReturnBlock->getFirstNonPHI();
+
+ retPhi->addIncoming(I, preReturn);
+ retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+ newEntryBlock);
+ OldPhi->removeIncomingValue(newEntryBlock);
+
+ ++I;
+ }
+ newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+
+ // Gather up the blocks that we're going to extract.
+ std::vector<BasicBlock*> toExtract;
+ toExtract.push_back(newNonReturnBlock);
+ for (Function::iterator FI = duplicateFunction->begin(),
+ FE = duplicateFunction->end(); FI != FE; ++FI)
+ if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+ &*FI != newNonReturnBlock)
+ toExtract.push_back(FI);
+
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.runOnFunction(*duplicateFunction);
+
+ // Extract the body of the if.
+ Function* extractedFunction
+ = CodeExtractor(toExtract, &DT).extractCodeRegion();
+
+ InlineFunctionInfo IFI;
+
+ // Inline the top-level if test into all callers.
+ std::vector<User*> Users(duplicateFunction->use_begin(),
+ duplicateFunction->use_end());
+ for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+ UI != UE; ++UI)
+ if (CallInst *CI = dyn_cast<CallInst>(*UI))
+ InlineFunction(CI, IFI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+ InlineFunction(II, IFI);
+
+ // Ditch the duplicate, since we're done with it, and rewrite all remaining
+ // users (function pointers, etc.) back to the original function.
+ duplicateFunction->replaceAllUsesWith(F);
+ duplicateFunction->eraseFromParent();
+
+ ++NumPartialInlined;
+
+ return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+ std::vector<Function*> worklist;
+ worklist.reserve(M.size());
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+ if (!FI->use_empty() && !FI->isDeclaration())
+ worklist.push_back(&*FI);
+
+ bool changed = false;
+ while (!worklist.empty()) {
+ Function* currFunc = worklist.back();
+ worklist.pop_back();
+
+ if (currFunc->use_empty()) continue;
+
+ bool recursive = false;
+ for (Function::use_iterator UI = currFunc->use_begin(),
+ UE = currFunc->use_end(); UI != UE; ++UI)
+ if (Instruction* I = dyn_cast<Instruction>(*UI))
+ if (I->getParent()->getParent() == currFunc) {
+ recursive = true;
+ break;
+ }
+ if (recursive) continue;
+
+
+ if (Function* newFunc = unswitchFunction(currFunc)) {
+ worklist.push_back(newFunc);
+ changed = true;
+ }
+
+ }
+
+ return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
new file mode 100644
index 000000000000..027a9f2a6871
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,399 @@
+//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManagerBuilder class, which is used to set up a
+// "standard" optimization sequence suitable for languages like C and C++.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+RunLoopVectorization("vectorize-loops",
+ cl::desc("Run the Loop vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+
+static cl::opt<bool>
+UseGVNAfterVectorization("use-gvn-after-vectorization",
+ cl::init(false), cl::Hidden,
+ cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+
+static cl::opt<bool> UseNewSROA("use-new-sroa",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable the new, experimental SROA pass"));
+
+PassManagerBuilder::PassManagerBuilder() {
+ OptLevel = 2;
+ SizeLevel = 0;
+ LibraryInfo = 0;
+ Inliner = 0;
+ DisableSimplifyLibCalls = false;
+ DisableUnitAtATime = false;
+ DisableUnrollLoops = false;
+ Vectorize = RunBBVectorization;
+ LoopVectorize = RunLoopVectorization;
+}
+
+PassManagerBuilder::~PassManagerBuilder() {
+ delete LibraryInfo;
+ delete Inliner;
+}
+
+/// Set of global extensions, automatically added as part of the standard set.
+static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+
+void PassManagerBuilder::addGlobalExtension(
+ PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
+ Extensions.push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
+ PassManagerBase &PM) const {
+ for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
+ if ((*GlobalExtensions)[i].first == ETy)
+ (*GlobalExtensions)[i].second(*this, PM);
+ for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
+ if (Extensions[i].first == ETy)
+ Extensions[i].second(*this, PM);
+}
+
+void
+PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+}
+
+void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+ addExtensionsToPM(EP_EarlyAsPossible, FPM);
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ if (OptLevel == 0) return;
+
+ addInitialAliasAnalysisPasses(FPM);
+
+ FPM.add(createCFGSimplificationPass());
+ if (UseNewSROA)
+ FPM.add(createSROAPass());
+ else
+ FPM.add(createScalarReplAggregatesPass());
+ FPM.add(createEarlyCSEPass());
+ FPM.add(createLowerExpectIntrinsicPass());
+}
+
+void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+ // If all optimizations are disabled, just run the always-inline pass.
+ if (OptLevel == 0) {
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager, but we don't want to add extensions into that pass manager.
+ // To prevent this we must insert a no-op module pass to reset the pass
+ // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
+ if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+ return;
+ }
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ addInitialAliasAnalysisPasses(MPM);
+
+ if (!DisableUnitAtATime) {
+ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ }
+
+ // Start of CallGraph SCC passes.
+ if (!DisableUnitAtATime)
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+ if (!DisableUnitAtATime)
+ MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ if (OptLevel > 2)
+ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ // Start of function pass.
+ // Break up aggregate allocas, using SSAUpdater.
+ if (UseNewSROA)
+ MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+ else
+ MPM.add(createScalarReplAggregatesPass(-1, false));
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ if (!DisableSimplifyLibCalls)
+ MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createReassociatePass()); // Reassociate expressions
+ MPM.add(createLoopRotatePass()); // Rotate Loop
+ MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ MPM.add(createLoopDeletionPass()); // Delete dead loops
+
+ if (LoopVectorize && OptLevel > 2)
+ MPM.add(createLoopVectorizePass());
+
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass()); // Unroll small loops
+ addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+
+ if (OptLevel > 1)
+ MPM.add(createGVNPass()); // Remove redundancies
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
+ MPM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+
+ addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
+ if (Vectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass()); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
+
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+
+ if (!DisableUnitAtATime) {
+ // FIXME: We shouldn't bother with this anymore.
+ MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
+ // late pass of GlobalDCE. It is capable of deleting dead cycles.
+ if (OptLevel > 1) {
+ MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
+ MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
+ }
+ addExtensionsToPM(EP_OptimizerLast, MPM);
+}
+
+void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
+ bool Internalize,
+ bool RunInliner,
+ bool DisableGVNLoadPRE) {
+ // Provide AliasAnalysis services for optimizations.
+ addInitialAliasAnalysisPasses(PM);
+
+ // Now that composite has been compiled, scan through the module, looking
+ // for a main function. If main is defined, mark all other functions
+ // internal.
+ if (Internalize) {
+ std::vector<const char*> E;
+ E.push_back("main");
+ PM.add(createInternalizePass(E));
+ }
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ PM.add(createIPSCCPPass());
+
+ // Now that we internalized some globals, see if we can hack on them!
+ PM.add(createGlobalOptimizerPass());
+
+ // Linking modules together can lead to duplicated global constants, only
+ // keep one copy of each constant.
+ PM.add(createConstantMergePass());
+
+ // Remove unused arguments from functions.
+ PM.add(createDeadArgEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ PM.add(createInstructionCombiningPass());
+
+ // Inline small functions
+ if (RunInliner)
+ PM.add(createFunctionInliningPass());
+
+ PM.add(createPruneEHPass()); // Remove dead EH info.
+
+ // Optimize globals again if we ran the inliner.
+ if (RunInliner)
+ PM.add(createGlobalOptimizerPass());
+ PM.add(createGlobalDCEPass()); // Remove dead functions.
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ PM.add(createArgumentPromotionPass());
+
+ // The IPO passes may leave cruft around. Clean up after them.
+ PM.add(createInstructionCombiningPass());
+ PM.add(createJumpThreadingPass());
+ // Break up allocas
+ if (UseNewSROA)
+ PM.add(createSROAPass());
+ else
+ PM.add(createScalarReplAggregatesPass());
+
+ // Run a few AA driven optimizations here and now, to cleanup the code.
+ PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createGlobalsModRefPass()); // IP alias analysis.
+
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+ // Nuke dead stores.
+ PM.add(createDeadStoreEliminationPass());
+
+ // Cleanup and simplify the code after the scalar optimizations.
+ PM.add(createInstructionCombiningPass());
+
+ PM.add(createJumpThreadingPass());
+
+ // Delete basic blocks, which optimization passes may have killed.
+ PM.add(createCFGSimplificationPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ PM.add(createGlobalDCEPass());
+}
+
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
+ PassManagerBuilder *PMB = new PassManagerBuilder();
+ return wrap(PMB);
+}
+
+void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ delete Builder;
+}
+
+void
+LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned OptLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->OptLevel = OptLevel;
+}
+
+void
+LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned SizeLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->SizeLevel = SizeLevel;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnitAtATime = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnrollLoops = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableSimplifyLibCalls = Value;
+}
+
+void
+LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
+ unsigned Threshold) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->Inliner = createFunctionInliningPass(Threshold);
+}
+
+void
+LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ Builder->populateFunctionPassManager(*FPM);
+}
+
+void
+LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *MPM = unwrap(PM);
+ Builder->populateModulePassManager(*MPM);
+}
+
+void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM,
+ LLVMBool Internalize,
+ LLVMBool RunInliner) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *LPM = unwrap(PM);
+ Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 000000000000..73d9323195bb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,260 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+ struct PruneEH : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ bool SimplifyFunction(Function *F);
+ void DeleteBasicBlock(BasicBlock *BB);
+ };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+ SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool MadeChange = false;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert(*I);
+
+ // First pass, scan all of the functions in the SCC, simplifying them
+ // according to what we know.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+
+ // Next, check to see if any callees might throw or if there are any external
+ // functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
+ // If this SCC includes the unwind instruction, we KNOW it throws, so
+ // obviously the SCC might throw.
+ //
+ bool SCCMightUnwind = false, SCCMightReturn = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F == 0) {
+ SCCMightUnwind = true;
+ SCCMightReturn = true;
+ } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ SCCMightUnwind |= !F->doesNotThrow();
+ SCCMightReturn |= !F->doesNotReturn();
+ } else {
+ bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+ bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+ if (!CheckUnwind && !CheckReturn)
+ continue;
+
+ // Check to see if this function performs an unwind or calls an
+ // unwinding function.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (CheckUnwind && isa<ResumeInst>(BB->getTerminator())) {
+ // Uses unwind / resume!
+ SCCMightUnwind = true;
+ } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+ SCCMightReturn = true;
+ }
+
+ // Invoke instructions don't allow unwinding to continue, so we are
+ // only interested in call instructions.
+ if (CheckUnwind && !SCCMightUnwind)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->doesNotThrow()) {
+ // This call cannot throw.
+ } else if (Function *Callee = CI->getCalledFunction()) {
+ CallGraphNode *CalleeNode = CG[Callee];
+ // If the callee is outside our current SCC then we may
+ // throw because it might.
+ if (!SCCNodes.count(CalleeNode)) {
+ SCCMightUnwind = true;
+ break;
+ }
+ } else {
+ // Indirect call, it might throw.
+ SCCMightUnwind = true;
+ break;
+ }
+ }
+ if (SCCMightUnwind && SCCMightReturn) break;
+ }
+ }
+ }
+
+ // If the SCC doesn't unwind or doesn't throw, note this fact.
+ if (!SCCMightUnwind || !SCCMightReturn)
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ AttrBuilder NewAttributes;
+
+ if (!SCCMightUnwind)
+ NewAttributes.addAttribute(Attribute::NoUnwind);
+ if (!SCCMightReturn)
+ NewAttributes.addAttribute(Attribute::NoReturn);
+
+ Function *F = (*I)->getFunction();
+ const AttributeSet &PAL = F->getAttributes();
+ const AttributeSet &NPAL =
+ PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(),
+ AttributeSet::FunctionIndex,
+ NewAttributes));
+ if (PAL != NPAL) {
+ MadeChange = true;
+ F->setAttributes(NPAL);
+ }
+ }
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ // Convert any invoke instructions to non-throwing functions in this node
+ // into call instructions with a branch. This makes the exception blocks
+ // dead.
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+ }
+
+ return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+ bool MadeChange = false;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction
+ // now uses the value produced by the call instruction. Note that we
+ // do this even for void functions and calls with no uses so that the
+ // callgraph edge is updated.
+ II->replaceAllUsesWith(Call);
+ BasicBlock *UnwindBlock = II->getUnwindDest();
+ UnwindBlock->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the
+ // invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ BB->getInstList().pop_back();
+
+ // If the unwind block is now dead, nuke it.
+ if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+ DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+
+ ++NumRemoved;
+ MadeChange = true;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ // This call calls a function that cannot return. Insert an
+ // unreachable instruction after it and simplify the code. Do this
+ // by splitting the BB, adding the unreachable, then deleting the
+ // new BB.
+ BasicBlock *New = BB->splitBasicBlock(I);
+
+ // Remove the uncond branch and add an unreachable.
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+
+ DeleteBasicBlock(New); // Delete the new BB.
+ MadeChange = true;
+ ++NumUnreach;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+ assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ CallGraphNode *CGN = CG[BB->getParent()];
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+ --I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(I))
+ CGN->removeCallEdgeFor(CI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ CGN->removeCallEdgeFor(II);
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // Get the list of successors of this block.
+ std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ Succs[i]->removePredecessor(BB);
+
+ BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 000000000000..f00830aadaad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,73 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class StripDeadPrototypesPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+ bool MadeChange = false;
+
+ // Erase dead function prototypes.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function *F = I++;
+ // Function must be a prototype and unused.
+ if (F->isDeclaration() && F->use_empty()) {
+ F->eraseFromParent();
+ ++NumDeadPrototypes;
+ MadeChange = true;
+ }
+ }
+
+ // Erase dead global var prototypes.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ) {
+ GlobalVariable *GV = I++;
+ // Global must be a prototype and unused.
+ if (GV->isDeclaration() && GV->use_empty())
+ GV->eraseFromParent();
+ }
+
+ // Return an indication of whether we changed anything or not.
+ return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+ return new StripDeadPrototypesPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 000000000000..5f8681ff454e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,414 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+namespace {
+ class StripSymbols : public ModulePass {
+ bool OnlyDebugInfo;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripSymbols(bool ODI = false)
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripNonDebugSymbols : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripNonDebugSymbols()
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDebugDeclare : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDebugDeclare()
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+ return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+ return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+ return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+ for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ User *U = *I;
+ if (U != Usr)
+ return false;
+ }
+ return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+ assert(C->use_empty() && "Constant is not dead!");
+ SmallPtrSet<Constant*, 4> Operands;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ if (OnlyUsedBy(C->getOperand(i), C))
+ Operands.insert(cast<Constant>(C->getOperand(i)));
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
+ GV->eraseFromParent();
+ }
+ else if (!isa<Function>(C))
+ if (isa<CompositeType>(C->getType()))
+ C->destroyConstant();
+
+ // If the constant referenced anything, see if we can delete it as well.
+ for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
+ OE = Operands.end(); OI != OE; ++OI)
+ RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+ for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+ Value *V = VI->getValue();
+ ++VI;
+ if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+ if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+ // Set name to "", removing from symbol table!
+ V->setName("");
+ }
+ }
+}
+
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+ TypeFinder StructTypes;
+ StructTypes.run(M, false);
+
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+ continue;
+
+ STy->setName("");
+ }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ UsedValues.insert(LLVMUsed);
+
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+ findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ }
+
+ // Remove all names from types.
+ StripTypeNames(M, PreserveDbgInfo);
+
+ return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+static bool StripDebugInfo(Module &M) {
+
+ bool Changed = false;
+
+ // Remove all of the calls to the debugger intrinsics, and remove them from
+ // the module.
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ CI->eraseFromParent();
+ }
+ Declare->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+ while (!DbgVal->use_empty()) {
+ CallInst *CI = cast<CallInst>(DbgVal->use_back());
+ CI->eraseFromParent();
+ }
+ DbgVal->eraseFromParent();
+ Changed = true;
+ }
+
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
+ }
+
+ return Changed;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+ bool Changed = false;
+ Changed |= StripDebugInfo(M);
+ if (!OnlyDebugInfo)
+ Changed |= StripSymbolNames(M, false);
+ return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+ return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+ std::vector<Constant*> DeadConstants;
+
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ } else
+ RemoveDeadConstant(C);
+ }
+
+ return true;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Debugging infomration is encoded in llvm IR using metadata. This is designed
+ // such a way that debug info for symbols preserved even if symbols are
+ // optimized away by the optimizer. This special pass removes debug info for
+ // such symbols.
+
+ // llvm.dbg.gv keeps track of debug info for global variables.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(*I);
+ }
+ else
+ Changed = true;
+ }
+ }
+
+ // llvm.dbg.sp keeps track of debug info for subprograms.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DISubprogram(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ bool FnIsLive = false;
+ if (Function *F = DISubprogram(*I).getFunction())
+ if (M.getFunction(F->getName()))
+ FnIsLive = true;
+ if (FnIsLive) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(*I);
+ } else {
+ // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+ // to hold debug info for dead function's local variables.
+ StringRef FName = DISubprogram(*I).getLinkageName();
+ if (FName.empty())
+ FName = DISubprogram(*I).getName();
+ if (NamedMDNode *LVNMD =
+ M.getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName))))
+ LVNMD->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 000000000000..1f6a3a5e335d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,389 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+
+namespace llvm {
+ class CallSite;
+ class DataLayout;
+ class TargetLibraryInfo;
+ class DbgDeclareInst;
+ class MemIntrinsic;
+ class MemSetInst;
+
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+ SPF_UNKNOWN = 0,
+ SPF_SMIN, SPF_UMIN,
+ SPF_SMAX, SPF_UMAX
+ //SPF_ABS - TODO.
+};
+
+/// getComplexity: Assign a complexity or rank value to LLVM Values...
+/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (BinaryOperator::isNeg(V) ||
+ BinaryOperator::isFNeg(V) ||
+ BinaryOperator::isNot(V))
+ return 3;
+ return 4;
+ }
+ if (isa<Argument>(V)) return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
+ : public IRBuilderDefaultInserter<true> {
+ InstCombineWorklist &Worklist;
+public:
+ InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+
+ void InsertHelper(Instruction *I, const Twine &Name,
+ BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+ Worklist.Add(I);
+ }
+};
+
+/// InstCombiner - The -instcombine pass.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction*> {
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
+ bool MadeIRChange;
+ LibCallSimplifier *Simplifier;
+ bool MinimizeSize;
+public:
+ /// Worklist - All of the instructions that need to be simplified.
+ InstCombineWorklist Worklist;
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+ BuilderTy *Builder;
+
+ static char ID; // Pass identification, replacement for typeid
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ MinimizeSize = false;
+ initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+ }
+
+public:
+ virtual bool runOnFunction(Function &F);
+
+ bool DoOneIteration(Function &F, unsigned ItNum);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ DataLayout *getDataLayout() const { return TD; }
+
+ TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
+ // Visitation implementation - Implement instruction combining for different
+ // instruction types. The semantics are as follows:
+ // Return Value:
+ // null - No change was made
+ // I - Change was made, I is still valid, I may be dead though
+ // otherwise - Change was made, replace I with returned instruction
+ //
+ Instruction *visitAdd(BinaryOperator &I);
+ Instruction *visitFAdd(BinaryOperator &I);
+ Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
+ Instruction *visitSub(BinaryOperator &I);
+ Instruction *visitFSub(BinaryOperator &I);
+ Instruction *visitMul(BinaryOperator &I);
+ Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+ Instruction *InsertBefore);
+ Instruction *visitFMul(BinaryOperator &I);
+ Instruction *visitURem(BinaryOperator &I);
+ Instruction *visitSRem(BinaryOperator &I);
+ Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
+ Instruction *commonRemTransforms(BinaryOperator &I);
+ Instruction *commonIRemTransforms(BinaryOperator &I);
+ Instruction *commonDivTransforms(BinaryOperator &I);
+ Instruction *commonIDivTransforms(BinaryOperator &I);
+ Instruction *visitUDiv(BinaryOperator &I);
+ Instruction *visitSDiv(BinaryOperator &I);
+ Instruction *visitFDiv(BinaryOperator &I);
+ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *visitAnd(BinaryOperator &I);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C);
+ Instruction *visitOr (BinaryOperator &I);
+ Instruction *visitXor(BinaryOperator &I);
+ Instruction *visitShl(BinaryOperator &I);
+ Instruction *visitAShr(BinaryOperator &I);
+ Instruction *visitLShr(BinaryOperator &I);
+ Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
+ Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst = 0);
+ Instruction *visitFCmpInst(FCmpInst &I);
+ Instruction *visitICmpInst(ICmpInst &I);
+ Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHS,
+ ConstantInt *RHS);
+ Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I);
+ Instruction *commonCastTransforms(CastInst &CI);
+ Instruction *commonPointerCastTransforms(CastInst &CI);
+ Instruction *visitTrunc(TruncInst &CI);
+ Instruction *visitZExt(ZExtInst &CI);
+ Instruction *visitSExt(SExtInst &CI);
+ Instruction *visitFPTrunc(FPTruncInst &CI);
+ Instruction *visitFPExt(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
+ Instruction *visitUIToFP(CastInst &CI);
+ Instruction *visitSIToFP(CastInst &CI);
+ Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitIntToPtr(IntToPtrInst &CI);
+ Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+ Value *A, Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+ Instruction *visitCallInst(CallInst &CI);
+ Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+ Instruction *visitPHINode(PHINode &PN);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+ Instruction *visitAllocaInst(AllocaInst &AI);
+ Instruction *visitAllocSite(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
+ Instruction *visitLoadInst(LoadInst &LI);
+ Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
+ Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
+ Instruction *visitLandingPadInst(LandingPadInst &LI);
+
+ // visitInstruction - Specify what to return for unhandled instructions...
+ Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+ bool ShouldChangeType(Type *From, Type *To) const;
+ Value *dyn_castNegVal(Value *V) const;
+ Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+ Type *FindElementAtOffset(Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
+ Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+
+ /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+ /// results in any code being generated and is interesting to optimize out. If
+ /// the cast can be eliminated by some other simple transformation, we prefer
+ /// to do the simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ Type *Ty);
+
+ Instruction *visitCallSite(CallSite CS);
+ Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *TD);
+ bool transformConstExprCastCall(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp);
+ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform = true);
+ Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+ Value *EmitGEPOffset(User *GEP);
+
+public:
+ // InsertNewInstBefore - insert an instruction New before instruction Old
+ // in the program. Add the new instruction to the worklist.
+ //
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && New->getParent() == 0 &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(&Old, New); // Insert inst
+ Worklist.Add(New);
+ return New;
+ }
+
+ // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
+ // debug loc.
+ //
+ Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
+ New->setDebugLoc(Old.getDebugLoc());
+ return InsertNewInstBefore(New, Old);
+ }
+
+ // ReplaceInstUsesWith - This method is to be used when an instruction is
+ // found to be dead, replacable with another preexisting expression. Here
+ // we add all uses of I to the worklist, replace all uses of I with the new
+ // value, then return I, so that the inst combiner will know that I was
+ // modified.
+ //
+ Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ if (&I == V)
+ V = UndefValue::get(I.getType());
+
+ DEBUG(errs() << "IC: Replacing " << I << "\n"
+ " with " << *V << '\n');
+
+ I.replaceAllUsesWith(V);
+ return &I;
+ }
+
+ // EraseInstFromFunction - When dealing with an instruction that has side
+ // effects or produces a void value, we can't rely on DCE to delete the
+ // instruction. Instead, visit methods should return the value returned by
+ // this function.
+ Instruction *EraseInstFromFunction(Instruction &I) {
+ DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ // Make sure that we reprocess all operands now that we reduced their
+ // use counts.
+ if (I.getNumOperands() < 8) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
+ Worklist.Add(Op);
+ }
+ Worklist.Remove(&I);
+ I.eraseFromParent();
+ MadeIRChange = true;
+ return 0; // Don't do anything with FI
+ }
+
+ void ComputeMaskedBits(Value *V, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
+
+private:
+
+ /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+ /// operators which are associative or commutative.
+ bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+ /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+ /// which some other binary operation distributes over either by factorizing
+ /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+ /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+ /// a win). Returns the simplified value, or null if it didn't simplify.
+ Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+
+ /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+ /// based on the demanded bits.
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth=0);
+ /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
+ /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
+ Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
+ APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne);
+
+ /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+ /// SimplifyDemandedBits knows about. See if the instruction has any
+ /// properties that allow us to simplify its operands.
+ bool SimplifyDemandedInstructionBits(Instruction &Inst);
+
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts, unsigned Depth = 0);
+
+ // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+ // which has a PHI node as operand #0, see if we can fold the instruction
+ // into the PHI (which is only possible if all operands to the PHI are
+ // constants).
+ //
+ Instruction *FoldOpIntoPhi(Instruction &I);
+
+ // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+ // operator and they all are only used by the PHI, PHI together their
+ // inputs, and do the operation once, to the result of the PHI.
+ Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+
+ Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ ConstantInt *AndRHS, BinaryOperator &TheAnd);
+
+ Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+ bool isSub, Instruction &I);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside);
+ Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+ Instruction *MatchBSwap(BinaryOperator &I);
+ bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+ Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
+
+ /// Descale - Return a value X such that Val = X * Scale, or null if none. If
+ /// the multiplication is known not to overflow then NoSignedWrap is set.
+ Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
+};
+
+
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 000000000000..7595da08d3e8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,1497 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+namespace {
+
+ /// Class representing coefficient of floating-point addend.
+ /// This class needs to be highly efficient, which is especially true for
+ /// the constructor. As of I write this comment, the cost of the default
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// perform write-merging).
+ ///
+ class FAddendCoef {
+ public:
+ // The constructor has to initialize a APFloat, which is uncessary for
+ // most addends which have coefficient either 1 or -1. So, the constructor
+ // is expensive. In order to avoid the cost of the constructor, we should
+ // reuse some instances whenever possible. The pre-created instances
+ // FAddCombine::Add[0-5] embodies this idea.
+ //
+ FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
+ ~FAddendCoef();
+
+ void set(short C) {
+ assert(!insaneIntVal(C) && "Insane coefficient");
+ IsFp = false; IntVal = C;
+ }
+
+ void set(const APFloat& C);
+
+ void negate();
+
+ bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
+ Value *getValue(Type *) const;
+
+ // If possible, don't define operator+/operator- etc because these
+ // operators inevitably call FAddendCoef's constructor which is not cheap.
+ void operator=(const FAddendCoef &A);
+ void operator+=(const FAddendCoef &A);
+ void operator-=(const FAddendCoef &A);
+ void operator*=(const FAddendCoef &S);
+
+ bool isOne() const { return isInt() && IntVal == 1; }
+ bool isTwo() const { return isInt() && IntVal == 2; }
+ bool isMinusOne() const { return isInt() && IntVal == -1; }
+ bool isMinusTwo() const { return isInt() && IntVal == -2; }
+
+ private:
+ bool insaneIntVal(int V) { return V > 4 || V < -4; }
+ APFloat *getFpValPtr(void)
+ { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
+ const APFloat *getFpValPtr(void) const
+ { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
+
+ const APFloat &getFpVal(void) const {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
+ APFloat &getFpVal(void)
+ { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
+
+ bool isInt() const { return !IsFp; }
+
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
+ private:
+
+ bool IsFp;
+
+ // True iff FpValBuf contains an instance of APFloat.
+ bool BufHasFpVal;
+
+ // The integer coefficient of an individual addend is either 1 or -1,
+ // and we try to simplify at most 4 addends from neighboring at most
+ // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
+ // is overkill of this end.
+ short IntVal;
+
+ AlignedCharArrayUnion<APFloat> FpValBuf;
+ };
+
+ /// FAddend is used to represent floating-point addend. An addend is
+ /// represented as <C, V>, where the V is a symbolic value, and C is a
+ /// constant coefficient. A constant addend is represented as <C, 0>.
+ ///
+ class FAddend {
+ public:
+ FAddend() { Val = 0; }
+
+ Value *getSymVal (void) const { return Val; }
+ const FAddendCoef &getCoef(void) const { return Coeff; }
+
+ bool isConstant() const { return Val == 0; }
+ bool isZero() const { return Coeff.isZero(); }
+
+ void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
+ void set(const APFloat& Coefficient, Value *V)
+ { Coeff.set(Coefficient); Val = V; }
+ void set(const ConstantFP* Coefficient, Value *V)
+ { Coeff.set(Coefficient->getValueAPF()); Val = V; }
+
+ void negate() { Coeff.negate(); }
+
+ /// Drill down the U-D chain one step to find the definition of V, and
+ /// try to break the definition into one or two addends.
+ static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
+
+ /// Similar to FAddend::drillDownOneStep() except that the value being
+ /// splitted is the addend itself.
+ unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
+
+ void operator+=(const FAddend &T) {
+ assert((Val == T.Val) && "Symbolic-values disagree");
+ Coeff += T.Coeff;
+ }
+
+ private:
+ void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
+
+ // This addend has the value of "Coeff * Val".
+ Value *Val;
+ FAddendCoef Coeff;
+ };
+
+ /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
+ /// with its neighboring at most two instructions.
+ ///
+ class FAddCombine {
+ public:
+ FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
+ Value *simplify(Instruction *FAdd);
+
+ private:
+ typedef SmallVector<const FAddend*, 4> AddendVect;
+
+ Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
+
+ Value *performFactorization(Instruction *I);
+
+ /// Convert given addend to a Value
+ Value *createAddendVal(const FAddend &A, bool& NeedNeg);
+
+ /// Return the number of instructions needed to emit the N-ary addition.
+ unsigned calcInstrNumber(const AddendVect& Vect);
+ Value *createFSub(Value *Opnd0, Value *Opnd1);
+ Value *createFAdd(Value *Opnd0, Value *Opnd1);
+ Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
+ Value *createFNeg(Value *V);
+ Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
+ void createInstPostProc(Instruction *NewInst);
+
+ InstCombiner::BuilderTy *Builder;
+ Instruction *Instr;
+
+ private:
+ // Debugging stuff are clustered here.
+ #ifndef NDEBUG
+ unsigned CreateInstrNum;
+ void initCreateInstNum() { CreateInstrNum = 0; }
+ void incCreateInstNum() { CreateInstrNum++; }
+ #else
+ void initCreateInstNum() {}
+ void incCreateInstNum() {}
+ #endif
+ };
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of
+// {FAddendCoef, FAddend, FAddition, FAddCombine}.
+//
+//===----------------------------------------------------------------------===//
+FAddendCoef::~FAddendCoef() {
+ if (BufHasFpVal)
+ getFpValPtr()->~APFloat();
+}
+
+void FAddendCoef::set(const APFloat& C) {
+ APFloat *P = getFpValPtr();
+
+ if (isInt()) {
+ // As the buffer is meanless byte stream, we cannot call
+ // APFloat::operator=().
+ new(P) APFloat(C);
+ } else
+ *P = C;
+
+ IsFp = BufHasFpVal = true;
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
+ if (That.isInt())
+ set(That.IntVal);
+ else
+ set(That.getFpVal());
+}
+
+void FAddendCoef::operator+=(const FAddendCoef &That) {
+ enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+ if (isInt() == That.isInt()) {
+ if (isInt())
+ IntVal += That.IntVal;
+ else
+ getFpVal().add(That.getFpVal(), RndMode);
+ return;
+ }
+
+ if (isInt()) {
+ const APFloat &T = That.getFpVal();
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
+ return;
+ }
+
+ APFloat &T = getFpVal();
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
+}
+
+void FAddendCoef::operator-=(const FAddendCoef &That) {
+ enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+ if (isInt() == That.isInt()) {
+ if (isInt())
+ IntVal -= That.IntVal;
+ else
+ getFpVal().subtract(That.getFpVal(), RndMode);
+ return;
+ }
+
+ if (isInt()) {
+ const APFloat &T = That.getFpVal();
+ convertToFpType(T.getSemantics());
+ getFpVal().subtract(T, RndMode);
+ return;
+ }
+
+ APFloat &T = getFpVal();
+ T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
+}
+
+void FAddendCoef::operator*=(const FAddendCoef &That) {
+ if (That.isOne())
+ return;
+
+ if (That.isMinusOne()) {
+ negate();
+ return;
+ }
+
+ if (isInt() && That.isInt()) {
+ int Res = IntVal * (int)That.IntVal;
+ assert(!insaneIntVal(Res) && "Insane int value");
+ IntVal = Res;
+ return;
+ }
+
+ const fltSemantics &Semantic =
+ isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
+
+ if (isInt())
+ convertToFpType(Semantic);
+ APFloat &F0 = getFpVal();
+
+ if (That.isInt())
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
+ else
+ F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
+
+ return;
+}
+
+void FAddendCoef::negate() {
+ if (isInt())
+ IntVal = 0 - IntVal;
+ else
+ getFpVal().changeSign();
+}
+
+Value *FAddendCoef::getValue(Type *Ty) const {
+ return isInt() ?
+ ConstantFP::get(Ty, float(IntVal)) :
+ ConstantFP::get(Ty->getContext(), getFpVal());
+}
+
+// The definition of <Val> Addends
+// =========================================
+// A + B <1, A>, <1,B>
+// A - B <1, A>, <1,B>
+// 0 - B <-1, B>
+// C * A, <C, A>
+// A + C <1, A> <C, NULL>
+// 0 +/- 0 <0, NULL> (corner case)
+//
+// Legend: A and B are not constant, C is constant
+//
+unsigned FAddend::drillValueDownOneStep
+ (Value *Val, FAddend &Addend0, FAddend &Addend1) {
+ Instruction *I = 0;
+ if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
+ return 0;
+
+ unsigned Opcode = I->getOpcode();
+
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
+ ConstantFP *C0, *C1;
+ Value *Opnd0 = I->getOperand(0);
+ Value *Opnd1 = I->getOperand(1);
+ if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
+ Opnd0 = 0;
+
+ if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
+ Opnd1 = 0;
+
+ if (Opnd0) {
+ if (!C0)
+ Addend0.set(1, Opnd0);
+ else
+ Addend0.set(C0, 0);
+ }
+
+ if (Opnd1) {
+ FAddend &Addend = Opnd0 ? Addend1 : Addend0;
+ if (!C1)
+ Addend.set(1, Opnd1);
+ else
+ Addend.set(C1, 0);
+ if (Opcode == Instruction::FSub)
+ Addend.negate();
+ }
+
+ if (Opnd0 || Opnd1)
+ return Opnd0 && Opnd1 ? 2 : 1;
+
+ // Both operands are zero. Weird!
+ Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
+ return 1;
+ }
+
+ if (I->getOpcode() == Instruction::FMul) {
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
+ Addend0.set(C, V1);
+ return 1;
+ }
+
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
+ Addend0.set(C, V0);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+// Try to break *this* addend into two addends. e.g. Suppose this addend is
+// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
+// i.e. <2.3, X> and <2.3, Y>.
+//
+unsigned FAddend::drillAddendDownOneStep
+ (FAddend &Addend0, FAddend &Addend1) const {
+ if (isConstant())
+ return 0;
+
+ unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
+ if (!BreakNum || Coeff.isOne())
+ return BreakNum;
+
+ Addend0.Scale(Coeff);
+
+ if (BreakNum == 2)
+ Addend1.Scale(Coeff);
+
+ return BreakNum;
+}
+
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
+Value *FAddCombine::simplify(Instruction *I) {
+ assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
+
+ // Currently we are not able to handle vector type.
+ if (I->getType()->isVectorTy())
+ return 0;
+
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ // Save the instruction before calling other member-functions.
+ Instr = I;
+
+ FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
+
+ unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
+
+ // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
+ unsigned Opnd0_ExpNum = 0;
+ unsigned Opnd1_ExpNum = 0;
+
+ if (!Opnd0.isConstant())
+ Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
+
+ // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
+ if (OpndNum == 2 && !Opnd1.isConstant())
+ Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
+
+ // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
+ if (Opnd0_ExpNum && Opnd1_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd0_0);
+ AllOpnds.push_back(&Opnd1_0);
+ if (Opnd0_ExpNum == 2)
+ AllOpnds.push_back(&Opnd0_1);
+ if (Opnd1_ExpNum == 2)
+ AllOpnds.push_back(&Opnd1_1);
+
+ // Compute instruction quota. We should save at least one instruction.
+ unsigned InstQuota = 0;
+
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
+
+ if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
+ return R;
+ }
+
+ if (OpndNum != 2) {
+ // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
+ // splitted into two addends, say "V = X - Y", the instruction would have
+ // been optimized into "I = Y - X" in the previous steps.
+ //
+ const FAddendCoef &CE = Opnd0.getCoef();
+ return CE.isOne() ? Opnd0.getSymVal() : 0;
+ }
+
+ // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
+ if (Opnd1_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd0);
+ AllOpnds.push_back(&Opnd1_0);
+ if (Opnd1_ExpNum == 2)
+ AllOpnds.push_back(&Opnd1_1);
+
+ if (Value *R = simplifyFAdd(AllOpnds, 1))
+ return R;
+ }
+
+ // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
+ if (Opnd0_ExpNum) {
+ AddendVect AllOpnds;
+ AllOpnds.push_back(&Opnd1);
+ AllOpnds.push_back(&Opnd0_0);
+ if (Opnd0_ExpNum == 2)
+ AllOpnds.push_back(&Opnd0_1);
+
+ if (Value *R = simplifyFAdd(AllOpnds, 1))
+ return R;
+ }
+
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
+}
+
+Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
+
+ unsigned AddendNum = Addends.size();
+ assert(AddendNum <= 4 && "Too many addends");
+
+ // For saving intermediate results;
+ unsigned NextTmpIdx = 0;
+ FAddend TmpResult[3];
+
+ // Points to the constant addend of the resulting simplified expression.
+ // If the resulting expr has constant-addend, this constant-addend is
+ // desirable to reside at the top of the resulting expression tree. Placing
+ // constant close to supper-expr(s) will potentially reveal some optimization
+ // opportunities in super-expr(s).
+ //
+ const FAddend *ConstAdd = 0;
+
+ // Simplified addends are placed <SimpVect>.
+ AddendVect SimpVect;
+
+ // The outer loop works on one symbolic-value at a time. Suppose the input
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // The symbolic-values will be processed in this order: x, y, z.
+ //
+ for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
+
+ const FAddend *ThisAddend = Addends[SymIdx];
+ if (!ThisAddend) {
+ // This addend was processed before.
+ continue;
+ }
+
+ Value *Val = ThisAddend->getSymVal();
+ unsigned StartIdx = SimpVect.size();
+ SimpVect.push_back(ThisAddend);
+
+ // The inner loop collects addends sharing same symbolic-value, and these
+ // addends will be later on folded into a single addend. Following above
+ // example, if the symbolic value "y" is being processed, the inner loop
+ // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
+ // be later on folded into "<b1+b2, y>".
+ //
+ for (unsigned SameSymIdx = SymIdx + 1;
+ SameSymIdx < AddendNum; SameSymIdx++) {
+ const FAddend *T = Addends[SameSymIdx];
+ if (T && T->getSymVal() == Val) {
+ // Set null such that next iteration of the outer loop will not process
+ // this addend again.
+ Addends[SameSymIdx] = 0;
+ SimpVect.push_back(T);
+ }
+ }
+
+ // If multiple addends share same symbolic value, fold them together.
+ if (StartIdx + 1 != SimpVect.size()) {
+ FAddend &R = TmpResult[NextTmpIdx ++];
+ R = *SimpVect[StartIdx];
+ for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
+ R += *SimpVect[Idx];
+
+ // Pop all addends being folded and push the resulting folded addend.
+ SimpVect.resize(StartIdx);
+ if (Val != 0) {
+ if (!R.isZero()) {
+ SimpVect.push_back(&R);
+ }
+ } else {
+ // Don't push constant addend at this time. It will be the last element
+ // of <SimpVect>.
+ ConstAdd = &R;
+ }
+ }
+ }
+
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ "out-of-bound access");
+
+ if (ConstAdd)
+ SimpVect.push_back(ConstAdd);
+
+ Value *Result;
+ if (!SimpVect.empty())
+ Result = createNaryFAdd(SimpVect, InstrQuota);
+ else {
+ // The addition is folded to 0.0.
+ Result = ConstantFP::get(Instr->getType(), 0.0);
+ }
+
+ return Result;
+}
+
+Value *FAddCombine::createNaryFAdd
+ (const AddendVect &Opnds, unsigned InstrQuota) {
+ assert(!Opnds.empty() && "Expect at least one addend");
+
+ // Step 1: Check if the # of instructions needed exceeds the quota.
+ //
+ unsigned InstrNeeded = calcInstrNumber(Opnds);
+ if (InstrNeeded > InstrQuota)
+ return 0;
+
+ initCreateInstNum();
+
+ // step 2: Emit the N-ary addition.
+ // Note that at most three instructions are involved in Fadd-InstCombine: the
+ // addition in question, and at most two neighboring instructions.
+ // The resulting optimized addition should have at least one less instruction
+ // than the original addition expression tree. This implies that the resulting
+ // N-ary addition has at most two instructions, and we don't need to worry
+ // about tree-height when constructing the N-ary addition.
+
+ Value *LastVal = 0;
+ bool LastValNeedNeg = false;
+
+ // Iterate the addends, creating fadd/fsub using adjacent two addends.
+ for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+ I != E; I++) {
+ bool NeedNeg;
+ Value *V = createAddendVal(**I, NeedNeg);
+ if (!LastVal) {
+ LastVal = V;
+ LastValNeedNeg = NeedNeg;
+ continue;
+ }
+
+ if (LastValNeedNeg == NeedNeg) {
+ LastVal = createFAdd(LastVal, V);
+ continue;
+ }
+
+ if (LastValNeedNeg)
+ LastVal = createFSub(V, LastVal);
+ else
+ LastVal = createFSub(LastVal, V);
+
+ LastValNeedNeg = false;
+ }
+
+ if (LastValNeedNeg) {
+ LastVal = createFNeg(LastVal);
+ }
+
+ #ifndef NDEBUG
+ assert(CreateInstrNum == InstrNeeded &&
+ "Inconsistent in instruction numbers");
+ #endif
+
+ return LastVal;
+}
+
+Value *FAddCombine::createFSub
+ (Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFNeg(Value *V) {
+ Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0));
+ return createFSub(Zero, V);
+}
+
+Value *FAddCombine::createFAdd
+ (Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+void FAddCombine::createInstPostProc(Instruction *NewInstr) {
+ NewInstr->setDebugLoc(Instr->getDebugLoc());
+
+ // Keep track of the number of instruction created.
+ incCreateInstNum();
+
+ // Propagate fast-math flags
+ NewInstr->setFastMathFlags(Instr->getFastMathFlags());
+}
+
+// Return the number of instruction needed to emit the N-ary addition.
+// NOTE: Keep this function in sync with createAddendVal().
+unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
+ unsigned OpndNum = Opnds.size();
+ unsigned InstrNeeded = OpndNum - 1;
+
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
+
+ // Adjust the number of instructions needed to emit the N-ary add.
+ for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+ I != E; I++) {
+ const FAddend *Opnd = *I;
+ if (Opnd->isConstant())
+ continue;
+
+ const FAddendCoef &CE = Opnd->getCoef();
+ if (CE.isMinusOne() || CE.isMinusTwo())
+ NegOpndNum++;
+
+ // Let the addend be "c * x". If "c == +/-1", the value of the addend
+ // is immediately available; otherwise, it needs exactly one instruction
+ // to evaluate the value.
+ if (!CE.isMinusOne() && !CE.isOne())
+ InstrNeeded++;
+ }
+ if (NegOpndNum == OpndNum)
+ InstrNeeded++;
+ return InstrNeeded;
+}
+
+// Input Addend Value NeedNeg(output)
+// ================================================================
+// Constant C C false
+// <+/-1, V> V coefficient is -1
+// <2/-2, V> "fadd V, V" coefficient is -2
+// <C, V> "fmul V, C" false
+//
+// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
+Value *FAddCombine::createAddendVal
+ (const FAddend &Opnd, bool &NeedNeg) {
+ const FAddendCoef &Coeff = Opnd.getCoef();
+
+ if (Opnd.isConstant()) {
+ NeedNeg = false;
+ return Coeff.getValue(Instr->getType());
+ }
+
+ Value *OpndVal = Opnd.getSymVal();
+
+ if (Coeff.isMinusOne() || Coeff.isOne()) {
+ NeedNeg = Coeff.isMinusOne();
+ return OpndVal;
+ }
+
+ if (Coeff.isTwo() || Coeff.isMinusTwo()) {
+ NeedNeg = Coeff.isMinusTwo();
+ return createFAdd(OpndVal, OpndVal);
+ }
+
+ NeedNeg = false;
+ return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
+}
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+ if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+ return 0;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ if (I->getOpcode() == Instruction::Mul)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+ return I->getOperand(0);
+ if (I->getOpcode() == Instruction::Shl)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+ // The multiplier is really 1 << CST.
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+ CST = ConstantInt::get(V->getType()->getContext(),
+ APInt(BitWidth, 1).shl(CSTVal));
+ return I->getOperand(0);
+ }
+ return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // Add has the property that adding any two 2's complement numbers can only
+ // have one carry bit which can change a sign. As such, if LHS and RHS each
+ // have at least two sign bits, we know that the addition of the two values
+ // will sign extend fine.
+ if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ return true;
+
+
+ // If one of the operands only has one non-zero bit, and if the other operand
+ // has a known-zero bit in a more significant place than it (not including the
+ // sign bit) the ripple may go up to and fill the zero, but won't change the
+ // sign. For example, (X & ~4) + 1.
+
+ // TODO: Implement.
+
+ return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt &Val = CI->getValue();
+ if (Val.isSignBit())
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+
+ Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+ const APInt &RHSVal = CI->getValue();
+ unsigned ExtendAmt = 0;
+ // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+ // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+ if (XorRHS->getValue() == -RHSVal) {
+ if (RHSVal.isPowerOf2())
+ ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+ else if (XorRHS->getValue().isPowerOf2())
+ ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+ }
+
+ if (ExtendAmt) {
+ APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+ if (!MaskedValueIsZero(XorLHS, Mask))
+ ExtendAmt = 0;
+ }
+
+ if (ExtendAmt) {
+ Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+ Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+ return BinaryOperator::CreateAShr(NewShl, ShAmt);
+ }
+
+ // If this is a xor that was canonicalized from a sub, turn it back into
+ // a sub and fuse this add with it.
+ if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
+ IntegerType *IT = cast<IntegerType>(I.getType());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne);
+ if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
+ XorLHS);
+ }
+ }
+ }
+
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // X + X --> X << 1
+ if (LHS == RHS) {
+ BinaryOperator *New =
+ BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castNegVal(LHS)) {
+ if (!isa<Constant>(RHS))
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
+ }
+
+ return BinaryOperator::CreateSub(RHS, LHSV);
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castNegVal(RHS))
+ return BinaryOperator::CreateSub(LHS, V);
+
+
+ ConstantInt *C2;
+ if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+ if (X == RHS) // X*C + X --> X * (C+1)
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+ // X*C1 + X*C2 --> X * (C1+C2)
+ ConstantInt *C1;
+ if (X == dyn_castFoldableMul(RHS, C1))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+ }
+
+ // X + X*C --> X * (C+1)
+ if (dyn_castFoldableMul(RHS, C2) == LHS)
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ if (LHSKnownZero != 0) {
+ APInt RHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+
+ // No bits in common -> bitwise or.
+ if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+ }
+
+ // W*X + Y*Z --> W * (X+Z) iff W == Y
+ {
+ Value *W, *X, *Y, *Z;
+ if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+ match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+ if (W != Y) {
+ if (W == Z) {
+ std::swap(Y, Z);
+ } else if (Y == X) {
+ std::swap(W, X);
+ } else if (X == Z) {
+ std::swap(Y, Z);
+ std::swap(W, X);
+ }
+ }
+
+ if (W == Y) {
+ Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+ return BinaryOperator::CreateMul(W, NewAdd);
+ }
+ }
+ }
+
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+ Value *X = 0;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+ // (X & FF00) + xx00 -> (X+xx00) & FF00
+ if (LHS->hasOneUse() &&
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+ CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
+ }
+ }
+
+ // Try to fold constant add into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ }
+
+ // add (select X 0 (sub n A)) A --> select X A n
+ {
+ SelectInst *SI = dyn_cast<SelectInst>(LHS);
+ Value *A = RHS;
+ if (!SI) {
+ SI = dyn_cast<SelectInst>(RHS);
+ A = LHS;
+ }
+ if (SI && SI->hasOneUse()) {
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+ Value *N;
+
+ // Can we fold the add into the argument of the select?
+ // We check both true and false select arguments for a matching subtract.
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the true select value.
+ return SelectInst::Create(SI->getCondition(), N, A);
+
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the false select value.
+ return SelectInst::Create(SI->getCondition(), A, N);
+ }
+ }
+
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new, smaller add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ // Check for (x & y) + (x ^ y)
+ {
+ Value *A = 0, *B = 0;
+ if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(LHS, m_And(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateOr(A, B);
+
+ if (match(LHS, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(RHS, m_And(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateOr(A, B);
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castFNegVal(LHS))
+ return BinaryOperator::CreateFSub(RHS, LHSV);
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castFNegVal(RHS))
+ return BinaryOperator::CreateFSub(LHS, V);
+
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),"addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ if (I.hasUnsafeAlgebra()) {
+ if (Value *V = FAddCombine(Builder).simplify(&I))
+ return ReplaceInstUsesWith(I, V);
+ }
+
+ return Changed ? &I : 0;
+}
+
+
+/// Optimize pointer differences into the same array into a size. Consider:
+/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+ Type *Ty) {
+ assert(TD && "Must have target data info for this");
+
+ // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+ // this.
+ bool Swapped = false;
+ GEPOperator *GEP1 = 0, *GEP2 = 0;
+
+ // For now we require one side to be the base pointer "A" or a constant
+ // GEP derived from it.
+ if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // (gep X, ...) - X
+ if (LHSGEP->getOperand(0) == RHS) {
+ GEP1 = LHSGEP;
+ Swapped = false;
+ } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (LHSGEP->getOperand(0)->stripPointerCasts() ==
+ RHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = RHSGEP;
+ GEP1 = LHSGEP;
+ Swapped = false;
+ }
+ }
+ }
+
+ if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ // X - (gep X, ...)
+ if (RHSGEP->getOperand(0) == LHS) {
+ GEP1 = RHSGEP;
+ Swapped = true;
+ } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (RHSGEP->getOperand(0)->stripPointerCasts() ==
+ LHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = LHSGEP;
+ GEP1 = RHSGEP;
+ Swapped = true;
+ }
+ }
+ }
+
+ // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
+ // multiple users.
+ if (GEP1 == 0 ||
+ (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
+ return 0;
+
+ // Emit the offset of the GEP and an intptr_t.
+ Value *Result = EmitGEPOffset(GEP1);
+
+ // If we had a constant expression GEP on the other side offsetting the
+ // pointer, subtract it from the offset we have.
+ if (GEP2) {
+ Value *Offset = EmitGEPOffset(GEP2);
+ Result = Builder->CreateSub(Result, Offset);
+ }
+
+ // If we have p - gep(p, ...) then we have to negate the result.
+ if (Swapped)
+ Result = Builder->CreateNeg(Result, "diff.neg");
+
+ return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)-(A*C) -> A*(B-C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
+ if (Value *V = dyn_castNegVal(Op1)) {
+ BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+ Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return Res;
+ }
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ // Replace (-1 - A) with (~A).
+ if (match(Op0, m_AllOnes()))
+ return BinaryOperator::CreateNot(Op1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // C - ~X == X + (1+C)
+ Value *X = 0;
+ if (match(Op1, m_Not(m_Value(X))))
+ return BinaryOperator::CreateAdd(X, AddOne(C));
+
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ Value *X; ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateLShr(X, CI);
+ }
+
+ // Try to fold constant sub into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ // C-(X+C2) --> (C-C2)-X
+ ConstantInt *C2;
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
+ if (C->isZero() && match(Op1, m_ZExt(m_Value(X))))
+ if (X->getType()->isIntegerTy(1))
+ return CastInst::CreateSExtOrBitCast(X, Op1->getType());
+
+ // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
+ if (C->isZero() && match(Op1, m_SExt(m_Value(X))))
+ if (X->getType()->isIntegerTy(1))
+ return CastInst::CreateZExtOrBitCast(X, Op1->getType());
+ }
+
+
+ { Value *Y;
+ // X-(X+Y) == -Y X-(Y+X) == -Y
+ if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+ match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ return BinaryOperator::CreateNeg(Y);
+
+ // (X-Y)-X == -Y
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Y);
+ }
+
+ if (Op1->hasOneUse()) {
+ Value *X = 0, *Y = 0, *Z = 0;
+ Constant *C = 0;
+ ConstantInt *CI = 0;
+
+ // (X - (Y - Z)) --> (X + (Z - Y)).
+ if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+ return BinaryOperator::CreateAdd(Op0,
+ Builder->CreateSub(Z, Y, Op1->getName()));
+
+ // (X - (X & Y)) --> (X & ~Y)
+ //
+ if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+ match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ return BinaryOperator::CreateAnd(Op0,
+ Builder->CreateNot(Y, Y->getName() + ".not"));
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+ match(Op0, m_Zero()))
+ return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+ // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
+ if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+ if (Value *XNeg = dyn_castNegVal(X))
+ return BinaryOperator::CreateShl(XNeg, Y);
+
+ // X - X*C --> X * (1-C)
+ if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+ return BinaryOperator::CreateMul(Op0, CP1);
+ }
+
+ // X - X<<C --> X * (1-(1<<C))
+ if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *One = ConstantInt::get(I.getType(), 1);
+ C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+ return BinaryOperator::CreateMul(Op0, C);
+ }
+
+ // X - A*-B -> X + A*B
+ // X - -A*B -> X + A*B
+ Value *A, *B;
+ if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+ match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+
+ // X - A*CI -> X + A*-CI
+ // X - CI*A -> X + A*-CI
+ if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+ match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+ Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ return BinaryOperator::CreateAdd(Op0, NewMul);
+ }
+ }
+
+ ConstantInt *C1;
+ if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+ if (X == Op1) // X*C - X --> X * (C-1)
+ return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+ ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
+ if (X == dyn_castFoldableMul(Op1, C2))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+ }
+
+ // Optimize pointer differences into the same array into a size. Consider:
+ // &A[10] - &A[0]: we should compile this to "10".
+ if (TD) {
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // If this is a 'B = x-(-A)', change to B = x+A...
+ if (Value *V = dyn_castFNegVal(Op1))
+ return BinaryOperator::CreateFAdd(Op0, V);
+
+ if (I.hasUnsafeAlgebra()) {
+ if (Value *V = FAddCombine(Builder).simplify(&I))
+ return ReplaceInstUsesWith(I, V);
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 000000000000..990cbc3d594e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,2355 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue() + 1);
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to). This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if they have a single use.
+ if (CmpInst *CI = dyn_cast<CmpInst>(V))
+ return CI->hasOneUse();
+
+ return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+ // If this is not(not(x)) don't return that this is a not: we want the two
+ // not's to be folded first.
+ if (BinaryOperator::isNot(V)) {
+ Value *Operand = BinaryOperator::getNotArgument(V);
+ if (!isFreeToInvert(Operand))
+ return Operand;
+ }
+
+ // Constants can be considered to be not'ed values...
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantInt::get(C->getType(), ~C->getValue());
+ return 0;
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+ isOrdered = false;
+ switch (CC) {
+ case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000
+ case FCmpInst::FCMP_UNO: return 0; // 000
+ case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001
+ case FCmpInst::FCMP_UGT: return 1; // 001
+ case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010
+ case FCmpInst::FCMP_UEQ: return 2; // 010
+ case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011
+ case FCmpInst::FCMP_UGE: return 3; // 011
+ case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100
+ case FCmpInst::FCMP_ULT: return 4; // 100
+ case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101
+ case FCmpInst::FCMP_UNE: return 5; // 101
+ case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110
+ case FCmpInst::FCMP_ULE: return 6; // 110
+ // True -> 7
+ default:
+ // Not expecting FCMP_FALSE and FCMP_TRUE;
+ llvm_unreachable("Unexpected FCmp predicate!");
+ }
+}
+
+/// getNewICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate NewPred;
+ if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
+ return NewConstant;
+ return Builder->CreateICmp(NewPred, LHS, RHS);
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+ Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
+ switch (code) {
+ default: llvm_unreachable("Illegal FCmp code!");
+ case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+ case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+ case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+ case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+ case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+ case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+ case 7:
+ if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+ Pred = FCmpInst::FCMP_ORD; break;
+ }
+ return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+ ConstantInt *OpRHS,
+ ConstantInt *AndRHS,
+ BinaryOperator &TheAnd) {
+ Value *X = Op->getOperand(0);
+ Constant *Together = 0;
+ if (!Op->isShift())
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ if (Op->hasOneUse()) {
+ // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ Value *And = Builder->CreateAnd(X, AndRHS);
+ And->takeName(Op);
+ return BinaryOperator::CreateXor(And, Together);
+ }
+ break;
+ case Instruction::Or:
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
+ }
+
+ break;
+ case Instruction::Add:
+ if (Op->hasOneUse()) {
+ // Adding a one to a single bit bit-field should be turned into an XOR
+ // of the bit. First thing to check is to see if this AND is with a
+ // single bit constant.
+ const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+ // If there is only one bit set.
+ if (AndRHSV.isPowerOf2()) {
+ // Ok, at this point, we know that we are masking the result of the
+ // ADD down to exactly one bit. If the constant we are adding has
+ // no bits set below this bit, then we can eliminate the ADD.
+ const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+ // Check to see if any bits below the one bit set in AndRHSV are set.
+ if ((AddRHS & (AndRHSV-1)) == 0) {
+ // If not, the only thing that can effect the output of the AND is
+ // the bit specified by AndRHSV. If that bit is set, the effect of
+ // the XOR is to toggle the bit. If it is clear, then the ADD has
+ // no effect.
+ if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+ TheAnd.setOperand(0, X);
+ return &TheAnd;
+ } else {
+ // Pull the XOR out of the AND.
+ Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+ NewAnd->takeName(Op);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
+ }
+ }
+ }
+ }
+ break;
+
+ case Instruction::Shl: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+ AndRHS->getValue() & ShlMask);
+
+ if (CI->getValue() == ShlMask)
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
+
+ if (CI != AndRHS) { // Reducing bits set in and.
+ TheAnd.setOperand(1, CI);
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::LShr: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now! This only applies to
+ // unsigned shifts, because a signed shr may bring in set bits!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+
+ if (CI->getValue() == ShrMask)
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op);
+
+ if (CI != AndRHS) {
+ TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::AShr:
+ // Signed shr.
+ // See if this is shifting in some sign extension, then masking it out
+ // with an and.
+ if (Op->hasOneUse()) {
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ Constant *C = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+ if (C == AndRHS) { // Masking out bits shifted in.
+ // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+ // Make the argument unsigned.
+ Value *ShVal = Op->getOperand(0);
+ ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside) {
+ assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
+ ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+ "Lo is not <= Hi in range emission code!");
+
+ if (Inside) {
+ if (Lo == Hi) // Trivially false.
+ return ConstantInt::getFalse(V->getContext());
+
+ // V >= Min && V < Hi --> V < Hi
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo <u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Lo);
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpULT(Add, UpperBound);
+ }
+
+ if (Lo == Hi) // Trivially true.
+ return ConstantInt::getTrue(V->getContext());
+
+ // V < Min || V >= Hi -> V > Hi-1
+ Hi = SubOne(cast<ConstantInt>(Hi));
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo >u Hi-1-Lo
+ // Note that Hi has already had one subtracted from it, above.
+ ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side. The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+ const APInt& V = Val->getValue();
+ uint32_t BitWidth = Val->getType()->getBitWidth();
+ if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+ // look for the first zero bit after the run of ones
+ MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+ // look for the first non-zero bit
+ ME = V.getActiveBits();
+ return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub. If we can fold one of
+/// the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+ ConstantInt *Mask, bool isSub,
+ Instruction &I) {
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ if (!LHSI || LHSI->getNumOperands() != 2 ||
+ !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+ ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+ switch (LHSI->getOpcode()) {
+ default: return 0;
+ case Instruction::And:
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
+ // If the AndRHS is a power of two minus one (0+1+), this is simple.
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) ==
+ Mask->getValue().getBitWidth())
+ break;
+
+ // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+ // part, we don't need any explicit masks to take them out of A. If that
+ // is all N is, ignore it.
+ uint32_t MB = 0, ME = 0;
+ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
+ uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+ if (MaskedValueIsZero(RHS, Mask))
+ break;
+ }
+ }
+ return 0;
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
+ break;
+ return 0;
+ }
+
+ if (isSub)
+ return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+ return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst != 0 && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ } else if (ACst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C) {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ } else if (BCst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
+/// if possible. The returned predicate is either == or !=. Returns false if
+/// decomposition fails.
+static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
+ Value *&X, Value *&Y, Value *&Z) {
+ // X < 0 is equivalent to (X & SignBit) != 0.
+ if (I->getPredicate() == ICmpInst::ICMP_SLT)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (C->isZero()) {
+ X = I->getOperand(0);
+ Y = ConstantInt::get(I->getContext(),
+ APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_NE;
+ Z = C;
+ return true;
+ }
+
+ // X > -1 is equivalent to (X & SignBit) == 0.
+ if (I->getPredicate() == ICmpInst::ICMP_SGT)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (C->isAllOnesValue()) {
+ X = I->getOperand(0);
+ Y = ConstantInt::get(I->getContext(),
+ APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_EQ;
+ Z = ConstantInt::getNullValue(C->getType());
+ return true;
+ }
+
+ return false;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate &LHSCC,
+ ICmpInst::Predicate &RHSCC) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ // Check whether the icmp can be decomposed into a bit test.
+ if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
+ L21 = L22 = L1 = 0;
+ } else {
+ // Look for ANDs in the LHS icmp.
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ } else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
+ L21 = L22 = 0;
+ }
+ }
+
+ // Bail if LHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(LHSCC))
+ return 0;
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11;
+ } else {
+ return 0;
+ }
+ E = R2; R1 = 0; ok = true;
+ } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R2; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+
+ // Bail if RHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(RHSCC))
+ return 0;
+
+ // Look for ANDs in on the right side of the RHS icmp.
+ if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R1; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11; E = R1; ok = true;
+ } else {
+ return 0;
+ }
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ } else if (L12 == A) {
+ B = L11; C = L2;
+ } else if (L21 == A) {
+ B = L22; C = L1;
+ } else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate NEWCC,
+ llvm::InstCombiner::BuilderTy* Builder) {
+ Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
+ LHSCC, RHSCC);
+ if (mask == 0) return 0;
+ assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
+ "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
+
+ if (NEWCC == ICmpInst::ICMP_NE)
+ mask >>= 1; // treat "Not"-states as normal states
+
+ if (mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ // we can't use C as zero, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set
+ Value* zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NEWCC, newAnd, zero);
+ }
+ if (mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr);
+ }
+ if (mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value* newAnd1 = Builder->CreateAnd(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ return Builder->CreateICmp(NEWCC, newAnd, A);
+ }
+ if (mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+ // we can't simply use C and E, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set
+
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (CCst == 0) return 0;
+ if (LHSCC != NEWCC)
+ CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (ECst == 0) return 0;
+ if (RHSCC != NEWCC)
+ ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+ ConstantInt* MCst = dyn_cast<ConstantInt>(
+ ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+ ConstantExpr::getXor(CCst, ECst)) );
+ // if there is a conflict we should actually return a false for the
+ // whole construct
+ if (!MCst->isZero())
+ return 0;
+ Value *newOr1 = Builder->CreateOr(B, D);
+ Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value *newAnd = Builder->CreateAnd(A, newOr1);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+ }
+ return 0;
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2
+ if (LHSCC == ICmpInst::ICMP_ULT &&
+ LHSCst->getValue().isPowerOf2()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+ // where CMAX is the all ones value for the truncated type,
+ // iff the lower bits of C2 and CA are zero.
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
+ LHS->hasOneUse() && RHS->hasOneUse()) {
+ Value *V;
+ ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+ // (trunc x) == C1 & (and x, CA) == C2
+ // (and x, CA) == C2 & (trunc x) == C1
+ if (match(Val2, m_Trunc(m_Value(V))) &&
+ match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = RHSCst;
+ BigCst = LHSCst;
+ } else if (match(Val, m_Trunc(m_Value(V))) &&
+ match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = LHSCst;
+ BigCst = RHSCst;
+ }
+
+ if (SmallCst && BigCst) {
+ unsigned BigBitSize = BigCst->getType()->getBitWidth();
+ unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+ // Check that the low bits are zero.
+ APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+ if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+ Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+ APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+ Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+ return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+ }
+ }
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // Make a constant range that's the intersection of the two icmp ranges.
+ // If the intersection is empty, we know that the result is false.
+ ConstantRange LHSRange =
+ ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange RHSRange =
+ ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+ if (LHSRange.intersectWith(RHSRange).isEmptySet())
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
+ // We can't fold (ugt x, C) & (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and and'ing the result
+ // together. Because of the above check, we know that we only have
+ // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+ // (from the icmp folding check above), that the two constants
+ // are not equal and that the larger constant is on the RHS
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ return LHS;
+ }
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_ULT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+ return Builder->CreateICmpULT(Val, LHSCst);
+ break; // (X != 13 & X u< 15) -> no change
+ case ICmpInst::ICMP_SLT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+ return Builder->CreateICmpSLT(Val, LHSCst);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_NE:
+ if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ }
+ break; // (X != 13 & X != 15) -> no change
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ return LHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ return LHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ return RHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+ case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+ case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+ RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // false.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getFalse(LHS->getContext());
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp ord x,x" is "fcmp ord x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ if (Op0CC == FCmpInst::FCMP_TRUE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_TRUE)
+ return LHS;
+
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ // uno && ord -> false
+ if (Op0Pred == 0 && Op1Pred == 0 && Op0Ordered != Op1Ordered)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ if (Op1Pred == 0) {
+ std::swap(LHS, RHS);
+ std::swap(Op0Pred, Op1Pred);
+ std::swap(Op0Ordered, Op1Ordered);
+ }
+ if (Op0Pred == 0) {
+ // uno && ueq -> uno && (uno || eq) -> uno
+ // ord && olt -> ord && (ord && lt) -> olt
+ if (!Op0Ordered && (Op0Ordered == Op1Ordered))
+ return LHS;
+ if (Op0Ordered && (Op0Ordered == Op1Ordered))
+ return RHS;
+
+ // uno && oeq -> uno && (ord && eq) -> false
+ if (!Op0Ordered)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ // ord && ueq -> ord && (uno || eq) -> oeq
+ return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ const APInt &AndRHSMask = AndRHS->getValue();
+
+ // Optimize a variety of ((val OP C1) & C2) combinations...
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ Value *Op0LHS = Op0I->getOperand(0);
+ Value *Op0RHS = Op0I->getOperand(1);
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Xor:
+ case Instruction::Or: {
+ // If the mask is only needed on one incoming arm, push it up.
+ if (!Op0I->hasOneUse()) break;
+
+ APInt NotAndRHS(~AndRHSMask);
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+ // Not masking anything out for the LHS, move to RHS.
+ Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+ }
+ if (!isa<Constant>(Op0RHS) &&
+ MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+ // Not masking anything out for the RHS, move to LHS.
+ Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+ }
+
+ break;
+ }
+ case Instruction::Add:
+ // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+ if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
+ break;
+
+ case Instruction::Sub:
+ // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ if (MaskedValueIsZero(Op0LHS, Mask)) {
+ Value *NewNeg = Builder->CreateNeg(Op0RHS);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Value *NewICmp =
+ Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+ return new ZExtInst(NewICmp, I.getType());
+ }
+ break;
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+ if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+ return Res;
+ }
+
+ // If this is an integer truncation, and if the source is an 'and' with
+ // immediate, transform it. This frequently occurs for bitfield accesses.
+ {
+ Value *X = 0; ConstantInt *YC = 0;
+ if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+ // Change: and (trunc (and X, YC) to T), C2
+ // into : and (trunc X to T), trunc(YC) & C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+
+ // (~A & ~B) == (~(A | B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
+ {
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // A&(A^B) => A & ~B
+ {
+ Value *tmpOp0 = Op0;
+ Value *tmpOp1 = Op1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1 ) {
+ tmpOp1 = Op0;
+ tmpOp0 = Op1;
+ // Simplify below
+ }
+ }
+
+ if (tmpOp1->hasOneUse() &&
+ match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == tmpOp0) {
+ std::swap(A, B);
+ }
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
+ }
+ }
+
+ // (A&((~A)|B)) -> A&B
+ if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+ match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(A, Op1);
+ if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+ match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(A, Op0);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+ Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+ SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ // Only do this if the casts both really cause code to be generated.
+ if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ {
+ Value *X = 0;
+ bool OpsSwapped = false;
+ // Canonicalize SExt or Not to the LHS
+ if (match(Op1, m_SExt(m_Value())) ||
+ match(Op1, m_Not(m_Value()))) {
+ std::swap(Op0, Op1);
+ OpsSwapped = true;
+ }
+
+ // Fold (and (sext bool to A), B) --> (select bool, B, 0)
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op1->getType());
+ return SelectInst::Create(X, Op1, Zero);
+ }
+
+ // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
+ if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op0->getType());
+ return SelectInst::Create(X, Zero, Op1);
+ }
+
+ if (OpsSwapped)
+ std::swap(Op0, Op1);
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap. The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero. This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted. For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry. This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values. ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+ SmallVector<Value*, 8> &ByteValues) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (I->getOpcode() == Instruction::Or) {
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues) ||
+ CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical shift by a constant multiple of 8, recurse with
+ // OverallLeftShift and ByteMask adjusted.
+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+ unsigned ShAmt =
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined and of a byte value.
+ if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ return true;
+
+ unsigned ByteShift = ShAmt >> 3;
+ if (I->getOpcode() == Instruction::Shl) {
+ // X << 2 -> collect(X, +2)
+ OverallLeftShift += ByteShift;
+ ByteMask >>= ByteShift;
+ } else {
+ // X >>u 2 -> collect(X, -2)
+ OverallLeftShift -= ByteShift;
+ ByteMask <<= ByteShift;
+ ByteMask &= (~0U >> (32-ByteValues.size()));
+ }
+
+ if (OverallLeftShift >= (int)ByteValues.size()) return true;
+ if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical 'and' with a mask that clears bytes, clear the
+ // corresponding bytes in ByteMask.
+ if (I->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(I->getOperand(1))) {
+ // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+ unsigned NumBytes = ByteValues.size();
+ APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+
+ for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+ // If this byte is masked out by a later operation, we don't care what
+ // the and mask is.
+ if ((ByteMask & (1 << i)) == 0)
+ continue;
+
+ // If the AndMask is all zeros for this byte, clear the bit.
+ APInt MaskB = AndMask & Byte;
+ if (MaskB == 0) {
+ ByteMask &= ~(1U << i);
+ continue;
+ }
+
+ // If the AndMask is not all ones for this byte, it's not a bytezap.
+ if (MaskB != Byte)
+ return true;
+
+ // Otherwise, this byte is kept.
+ }
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+ }
+
+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
+ // the input value to the bswap. Some observations: 1) if more than one byte
+ // is demanded from this input, then it could not be successfully assembled
+ // into a byteswap. At least one of the two bytes would not be aligned with
+ // their ultimate destination.
+ if (!isPowerOf2_32(ByteMask)) return true;
+ unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+
+ // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+ // is demanded, it needs to go into byte 0 of the result. This means that the
+ // byte needs to be shifted until it lands in the right byte bucket. The
+ // shift amount depends on the position: if the byte is coming from the high
+ // part of the value (e.g. byte 3) then it must be shifted right. If from the
+ // low part, it must be shifted left.
+ unsigned DestByteNo = InputByteNo + OverallLeftShift;
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+
+ // If the destination byte value is already defined, the values are or'd
+ // together, which isn't a bswap (unless it's an or of the same bits).
+ if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ return true;
+ ByteValues[DestByteNo] = V;
+ return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+ IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy || ITy->getBitWidth() % 16 ||
+ // ByteMask only allows up to 32-byte values.
+ ITy->getBitWidth() > 32*8)
+ return 0; // Can only bswap pairs of bytes. Can't do vectors.
+
+ /// ByteValues - For each byte of the result, we keep track of which value
+ /// defines each byte.
+ SmallVector<Value*, 8> ByteValues;
+ ByteValues.resize(ITy->getBitWidth()/8);
+
+ // Try to find all the pieces corresponding to the bswap.
+ uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+ if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ return 0;
+
+ // Check to see if all of the bytes come from the same value.
+ Value *V = ByteValues[0];
+ if (V == 0) return 0; // Didn't find a byte? Must be zero.
+
+ // Check to make sure that all of the bytes come from the same value.
+ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+ if (ByteValues[i] != V)
+ return 0;
+ Module *M = I.getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+ Value *C, Value *D) {
+ // If A is not a select of -1/0, this cannot match.
+ Value *Cond = 0;
+ if (!match(A, m_SExt(m_Value(Cond))) ||
+ !Cond->getType()->isIntegerTy(1))
+ return 0;
+
+ // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+ if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+ if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+
+ // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+ if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+ // iff C2 + CA == C1.
+ if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddCst;
+ if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+ if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+ return Builder->CreateICmpULE(Val, LHSCst);
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) | (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and or'ing the result
+ // together. Because of the above check, we know that we only have
+ // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+ // icmp folding check above), that the two constants are not
+ // equal.
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
+ if (LHS->getOperand(0) == RHS->getOperand(0)) {
+ // if LHSCst and RHSCst differ only by one bit:
+ // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+ assert(LHSCst->getValue().ule(LHSCst->getValue()));
+
+ APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
+ if (Xor.isPowerOf2()) {
+ Value *NegCst = Builder->getInt(~Xor);
+ Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+ }
+ }
+
+ break; // (X == 13 | X == 15) -> no change
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ return RHS;
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ return LHS;
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ }
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(false))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+ case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ return RHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(true))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+ case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return RHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ return LHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ return LHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ RHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // true.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getTrue(LHS->getContext());
+
+ // Otherwise, no need to compare the two constants, compare the
+ // rest.
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp uno x,x" is "fcmp uno x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ if (Op0CC == FCmpInst::FCMP_FALSE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_FALSE)
+ return LHS;
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op0Ordered == Op1Ordered) {
+ // If both are ordered or unordered, return a new fcmp with
+ // or'ed predicates.
+ return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+ return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+/// ((A | B) & C1) | (B & C2)
+///
+/// into:
+///
+/// (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1) return 0;
+
+ Value *V1 = 0;
+ ConstantInt *CI2 = 0;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue()) return 0;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+ return BinaryOperator::CreateOr(NewOp, V1);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *C1 = 0; Value *X = 0;
+ // (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
+ if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateAnd(Or,
+ ConstantInt::get(I.getContext(),
+ RHS->getValue() | C1->getValue()));
+ }
+
+ // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateXor(Or,
+ ConstantInt::get(I.getContext(),
+ C1->getValue() & ~RHS->getValue()));
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ Value *A = 0, *B = 0;
+ ConstantInt *C1 = 0, *C2 = 0;
+
+ // (A | B) | C and A | (B | C) -> bswap if possible.
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ if (match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value())) ||
+ (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
+ if (Instruction *BSwap = MatchBSwap(I))
+ return BSwap;
+ }
+
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op1, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op1);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op0, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op0);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // (A & C)|(B & D)
+ Value *C = 0, *D = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ Value *V1 = 0, *V2 = 0;
+ C1 = dyn_cast<ConstantInt>(C);
+ C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2) { // (A & C1)|(B & C2)
+ // If we have: ((V + N) & C1) | (V & C2)
+ // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ // replace with V+N.
+ if (C1->getValue() == ~C2->getValue()) {
+ if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+ match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ }
+ // Or commutes, try both ways.
+ if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+ match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ }
+ }
+
+ if ((C1->getValue() & C2->getValue()) == 0) {
+ // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+ // iff (C1&C2) == 0 and (N&~C1) == 0
+ if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
+ (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(A,
+ ConstantInt::get(A->getContext(),
+ C1->getValue()|C2->getValue()));
+ // Or commutes, try both ways.
+ if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
+ (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(B,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+
+ // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+ // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+ ConstantInt *C3 = 0, *C4 = 0;
+ if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+ (C3->getValue() & ~C1->getValue()) == 0 &&
+ match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+ (C4->getValue() & ~C2->getValue()) == 0) {
+ V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+ return BinaryOperator::CreateAnd(V2,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+ }
+ }
+ }
+
+ // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
+ // Don't do this for vector select idioms, the code generator doesn't handle
+ // them well yet.
+ if (!I.getType()->isVectorTy()) {
+ if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+ return Match;
+ }
+
+ // ((A&~B)|(~A&B)) -> A^B
+ if ((match(C, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, D);
+ // ((~B&A)|(~A&B)) -> A^B
+ if ((match(A, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, D);
+ // ((A&~B)|(B&~A)) -> A^B
+ if ((match(C, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, B);
+ // ((~B&A)|(B&~A)) -> A^B
+ if ((match(A, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, B);
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
+ }
+
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
+
+ // Canonicalize xor to the RHS.
+ bool SwappedForXor = false;
+ if (match(Op0, m_Xor(m_Value(), m_Value()))) {
+ std::swap(Op0, Op1);
+ SwappedForXor = true;
+ }
+
+ // A | ( A ^ B) -> A | B
+ // A | (~A ^ B) -> A | ~B
+ // (A & B) | (A ^ B)
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (Op0 == A || Op0 == B)
+ return BinaryOperator::CreateOr(A, B);
+
+ if (match(Op0, m_And(m_Specific(A), m_Specific(B))) ||
+ match(Op0, m_And(m_Specific(B), m_Specific(A))))
+ return BinaryOperator::CreateOr(A, B);
+
+ if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(B, B->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(A, A->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+
+ // A | ~(A | B) -> A | ~B
+ // A | ~(A ^ B) -> A | ~B
+ if (match(Op1, m_Not(m_Value(A))))
+ if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+ if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+ Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+ B->getOpcode() == Instruction::Xor)) {
+ Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+ B->getOperand(0);
+ Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+
+ if (SwappedForXor)
+ std::swap(Op0, Op1);
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ CastInst *Op1C = dyn_cast<CastInst>(Op1);
+ if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+ }
+
+ // or(sext(A), B) -> A ? -1 : B where A is an i1
+ // or(A, sext(B)) -> B ? -1 : A where B is an i1
+ if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
+ }
+
+ // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
+ // Since this OR statement hasn't been optimized further yet, we hope
+ // that this transformation will allow the new ORs to be optimized.
+ {
+ Value *X = 0, *Y = 0;
+ if (Op0->hasOneUse() && Op1->hasOneUse() &&
+ match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
+ match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
+ Value *orTrue = Builder->CreateOr(A, C);
+ Value *orFalse = Builder->CreateOr(B, D);
+ return SelectInst::Create(X, orTrue, orFalse);
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Is this a ~ operation?
+ if (Value *NotOp = dyn_castNotVal(&I)) {
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+ if (Op0I->getOpcode() == Instruction::And ||
+ Op0I->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(Op0I->getOperand(1)))
+ Op0I->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1),
+ Op0I->getOperand(1)->getName()+".not");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
+
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (isFreeToInvert(Op0I->getOperand(0)) &&
+ isFreeToInvert(Op0I->getOperand(1))) {
+ Value *NotX =
+ Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
+ }
+
+ } else if (Op0I->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ }
+ }
+ }
+
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ if (RHS->isOne() && Op0->hasOneUse())
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+ return CmpInst::Create(CI->getOpcode(),
+ CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1));
+
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ (RHS == ConstantExpr::getCast(Opcode,
+ ConstantInt::getTrue(I.getContext()),
+ Op0C->getDestTy()))) {
+ CI->setPredicate(CI->getInversePredicate());
+ return CastInst::Create(Opcode, CI, Op0C->getType());
+ }
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ // ~(c-X) == X-c-1 == X+(-c-1)
+ if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+ Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+ Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+ ConstantInt::get(I.getType(), 1));
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ // ~(X-c) --> (-c-1)-X
+ if (RHS->isAllOnesValue()) {
+ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(NegOp0CI,
+ ConstantInt::get(I.getType(), 1)),
+ Op0I->getOperand(0));
+ } else if (RHS->getValue().isSignBit()) {
+ // (X + C) ^ signbit -> (X + C + signbit)
+ Constant *C = ConstantInt::get(I.getContext(),
+ RHS->getValue() + Op0CI->getValue());
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+ }
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ Worklist.Add(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
+ } else if (Op0I->getOpcode() == Instruction::LShr) {
+ // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+ // E1 = "X ^ C1"
+ BinaryOperator *E1;
+ ConstantInt *C1;
+ if (Op0I->hasOneUse() &&
+ (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
+ E1->getOpcode() == Instruction::Xor &&
+ (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
+ // fold (C1 >> C2) ^ C3
+ ConstantInt *C2 = Op0CI, *C3 = RHS;
+ APInt FoldConst = C1->getValue().lshr(C2->getValue());
+ FoldConst ^= C3->getValue();
+ // Prepare the two operands.
+ Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+ Opnd0->takeName(Op0I);
+ cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
+ Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
+
+ return BinaryOperator::CreateXor(Opnd0, FoldVal);
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+ if (Op1I) {
+ Value *A, *B;
+ if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0) { // B^(B|A) == (A|B)^B
+ Op1I->swapOperands();
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
+ Op1I->hasOneUse()){
+ if (A == Op0) { // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ }
+ }
+
+ BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+ if (Op0I) {
+ Value *A, *B;
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()) {
+ if (A == Op1) // (B|A)^B == (A|B)^B
+ std::swap(A, B);
+ if (B == Op1) // (A|B)^B == A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
+ } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()){
+ if (A == Op1) // (A&B)^A -> (B&A)^A
+ std::swap(A, B);
+ if (B == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
+ }
+ }
+ }
+
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ if (Op0I && Op1I && Op0I->isShift() &&
+ Op0I->getOpcode() == Op1I->getOpcode() &&
+ Op0I->getOperand(1) == Op1I->getOperand(1) &&
+ (Op0I->hasOneUse() || Op1I->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+ Op0I->getName());
+ return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
+ Op1I->getOperand(1));
+ }
+
+ if (Op0I && Op1I) {
+ Value *A, *B, *C, *D;
+ // (A & B)^(A | B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | B)^(A & B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ }
+
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return ReplaceInstUsesWith(I,
+ getNewICmpValue(isSigned, Code, Op0, Op1,
+ Builder));
+ }
+ }
+
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+ Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
+ // Only do this if the casts both really cause code to be generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType())) {
+ Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+ Op1C->getOperand(0), I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 000000000000..64cd1bd27891
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1405 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static Type *getPromotedType(Type *Ty) {
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (ITy->getBitWidth() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+ }
+ return Ty;
+}
+
+/// reduceToSingleValueType - Given an aggregate type which ultimately holds a
+/// single scalar element, like {{{type}}} or [1 x type], return type.
+static Type *reduceToSingleValueType(Type *T) {
+ while (!T->isSingleValueType()) {
+ if (StructType *STy = dyn_cast<StructType>(T)) {
+ if (STy->getNumElements() == 1)
+ T = STy->getElementType(0);
+ else
+ break;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
+ if (ATy->getNumElements() == 1)
+ T = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ return T;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
+ unsigned MinAlign = std::min(DstAlign, SrcAlign);
+ unsigned CopyAlign = MI->getAlignment();
+
+ if (CopyAlign < MinAlign) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ MinAlign, false));
+ return MI;
+ }
+
+ // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+ // load/store.
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+ if (MemOpLength == 0) return 0;
+
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
+ uint64_t Size = MemOpLength->getLimitedValue();
+ assert(Size && "0-sized memory transfering should be removed already.");
+
+ if (Size > 8 || (Size&(Size-1)))
+ return 0; // If not 1/2/4/8 bytes, exit.
+
+ // Use an integer load+store unless we can find something better.
+ unsigned SrcAddrSp =
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+ unsigned DstAddrSp =
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+ IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+ Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ MDNode *CopyMD = 0;
+ if (StrippedDest != MI->getArgOperand(0)) {
+ Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+ ->getElementType();
+ if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ SrcETy = reduceToSingleValueType(SrcETy);
+
+ if (SrcETy->isSingleValueType()) {
+ NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+ NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+
+ // If the memcpy has metadata describing the members, see if we can
+ // get the TBAA tag describing our copy.
+ if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
+ if (M->getNumOperands() == 3 &&
+ M->getOperand(0) &&
+ isa<ConstantInt>(M->getOperand(0)) &&
+ cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ M->getOperand(1) &&
+ isa<ConstantInt>(M->getOperand(1)) &&
+ cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
+ M->getOperand(2) &&
+ isa<MDNode>(M->getOperand(2)))
+ CopyMD = cast<MDNode>(M->getOperand(2));
+ }
+ }
+ }
+ }
+
+ // If the memcpy/memmove provides better alignment info than we can
+ // infer, use it.
+ SrcAlign = std::max(SrcAlign, CopyAlign);
+ DstAlign = std::max(DstAlign, CopyAlign);
+
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+ L->setAlignment(SrcAlign);
+ if (CopyMD)
+ L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+ StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+ S->setAlignment(DstAlign);
+ if (CopyMD)
+ S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
+ if (MI->getAlignment() < Alignment) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ Alignment, false));
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+ return 0;
+ uint64_t Len = LenC->getLimitedValue();
+ Alignment = MI->getAlignment();
+ assert(Len && "0-sized memory setting should be removed already.");
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+ Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+ Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
+ MI->isVolatile());
+ S->setAlignment(Alignment);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return 0;
+}
+
+/// visitCallInst - CallInst simplification. This mostly only handles folding
+/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ if (isFreeCall(&CI, TLI))
+ return visitFree(CI);
+
+ // If the caller function is nounwind, mark the call as nounwind, even if the
+ // callee isn't.
+ if (CI.getParent()->getParent()->doesNotThrow() &&
+ !CI.doesNotThrow()) {
+ CI.setDoesNotThrow();
+ return &CI;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+ if (!II) return visitCallSite(&CI);
+
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+ bool Changed = false;
+
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+ if (CI->getZExtValue() == 1) {
+ // Replace the instruction with just byte operations. We would
+ // transform other cases to loads/stores, but we don't know if
+ // alignment is sufficient.
+ }
+ }
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return 0;
+
+ // If we have a memmove and the source operation is a constant global,
+ // then the source and dest pointers can't alias, so we can change this
+ // into a call to memcpy.
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+ if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+ if (GVSrc->isConstant()) {
+ Module *M = CI.getParent()->getParent()->getParent();
+ Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+ Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
+ Changed = true;
+ }
+ }
+
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ // memmove(x,x,size) -> noop.
+ if (MTI->getSource() == MTI->getDest())
+ return EraseInstFromFunction(CI);
+ }
+
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemTransferInst>(MI)) {
+ if (Instruction *I = SimplifyMemTransfer(MI))
+ return I;
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
+ }
+
+ if (Changed) return II;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ uint64_t Size;
+ if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
+ return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
+ return 0;
+ }
+ case Intrinsic::bswap: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = 0;
+
+ // bswap(bswap(x)) -> x
+ if (match(IIOperand, m_BSwap(m_Value(X))))
+ return ReplaceInstUsesWith(CI, X);
+
+ // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+ if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
+ unsigned C = X->getType()->getPrimitiveSizeInBits() -
+ IIOperand->getType()->getPrimitiveSizeInBits();
+ Value *CV = ConstantInt::get(X->getType(), C);
+ Value *V = Builder->CreateLShr(X, CV);
+ return new TruncInst(V, IIOperand->getType());
+ }
+ break;
+ }
+
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return ReplaceInstUsesWith(CI, II->getArgOperand(0));
+ // powi(x, -1) -> 1/x
+ if (Power->isAllOnesValue())
+ return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+ II->getArgOperand(0));
+ }
+ break;
+ case Intrinsic::cttz: {
+ // If all bits below the first known one are known zero,
+ // this value is constant.
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ unsigned TrailingZeros = KnownOne.countTrailingZeros();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, TrailingZeros)));
+
+ }
+ break;
+ case Intrinsic::ctlz: {
+ // If all bits above the first known one are known zero,
+ // this value is constant.
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ unsigned LeadingZeros = KnownOne.countLeadingZeros();
+ APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, LeadingZeros)));
+
+ }
+ break;
+ case Intrinsic::uadd_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+ bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+ if (LHSKnownNegative || LHSKnownPositive) {
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+ bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+ if (LHSKnownNegative && RHSKnownNegative) {
+ // The sign bit is set in both cases: this MUST overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Value *Add = Builder->CreateAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ ConstantInt::getTrue(II->getContext())
+ };
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+
+ if (LHSKnownPositive && RHSKnownPositive) {
+ // The sign bit is clear in both cases: this CANNOT overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Value *Add = Builder->CreateNUWAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+ }
+ }
+ // FALL THROUGH uadd into sadd
+ case Intrinsic::sadd_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X + undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X + 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // undef - X -> undef
+ // X - undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(0)) ||
+ isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X - 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::umul_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+
+ // Get the largest possible values for each operand.
+ APInt LHSMax = ~LHSKnownZero;
+ APInt RHSMax = ~RHSKnownZero;
+
+ // If multiplying the maximum values does not overflow then we can turn
+ // this into a plain NUW mul.
+ bool Overflow;
+ LHSMax.umul_ov(RHSMax, Overflow);
+ if (!Overflow) {
+ Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ Builder->getFalse()
+ };
+ Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
+ return InsertValueInst::Create(Struct, Mul, 0);
+ }
+ } // FALL THROUGH
+ case Intrinsic::smul_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X * undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X*0 -> {0, false}
+ if (RHSI->isZero())
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+
+ // X * 1 -> {X, false}
+ if (RHSI->equalsInt(1)) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(1)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+ return new StoreInst(II->getArgOperand(1), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ APInt DemandedElts(VWidth, 1);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ DemandedElts, UndefElts)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+
+ case Intrinsic::x86_sse41_pmovsxbw:
+ case Intrinsic::x86_sse41_pmovsxwd:
+ case Intrinsic::x86_sse41_pmovsxdq:
+ case Intrinsic::x86_sse41_pmovzxbw:
+ case Intrinsic::x86_sse41_pmovzxwd:
+ case Intrinsic::x86_sse41_pmovzxdq: {
+ // pmov{s|z}x ignores the upper half of their input vectors.
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ unsigned LowHalfElts = VWidth / 2;
+ APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
+ APInt UndefElts(VWidth, 0);
+ if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ InputDemandedElts,
+ UndefElts)) {
+ II->setArgOperand(0, TmpV);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
+ assert(Mask->getType()->getVectorNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (Elt == 0 ||
+ !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ continue;
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+
+ if (ExtractedElts[Idx] == 0) {
+ ExtractedElts[Idx] =
+ Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ Builder->getInt32(Idx&15));
+ }
+
+ // Insert this value into the result vector.
+ Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+ Builder->getInt32(i));
+ }
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+ }
+ }
+ break;
+
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+
+ // Handle mul by zero first:
+ if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
+ return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
+ }
+
+ // Check for constant LHS & RHS - in this case we just simplify.
+ bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
+ VectorType *NewVT = cast<VectorType>(II->getType());
+ unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
+ if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
+ if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
+ VectorType* VT = cast<VectorType>(CV0->getType());
+ SmallVector<Constant*, 4> NewElems;
+ for (unsigned i = 0; i < VT->getNumElements(); ++i) {
+ APInt CV0E =
+ (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
+ CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
+ APInt CV1E =
+ (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
+ CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
+ NewElems.push_back(
+ ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
+ }
+ return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
+ }
+
+ // Couldn't simplify - cannonicalize constant to the RHS.
+ std::swap(Arg0, Arg1);
+ }
+
+ // Handle mul by one:
+ if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
+ if (ConstantInt *Splat =
+ dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
+ if (Splat->isOne()) {
+ if (Zext)
+ return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
+ // else
+ return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
+ }
+ }
+ }
+
+ break;
+ }
+
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
+ }
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
+ CannotRemove = true;
+ break;
+ }
+ }
+ }
+
+ // If the stack restore is in a return, resume, or unwind block and if there
+ // are no allocas or calls between the restore and the return, nuke the
+ // restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
+ }
+
+ return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+ return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const CastInst * const CI,
+ const DataLayout * const TD,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // The size of ByVal arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.isByValArgument(ix))
+ return true;
+
+ Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+ return false;
+ return true;
+}
+
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk,
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
+ if (CI->getCalledFunction() == 0) return 0;
+
+ if (Value *With = Simplifier->optimizeCall(CI)) {
+ ++NumSimplified;
+ return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
+ }
+
+ return 0;
+}
+
+static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
+ // Strip off at most one level of pointer casts, looking for an alloca. This
+ // is good enough in practice and simpler than handling any number of casts.
+ Value *Underlying = TrampMem->stripPointerCasts();
+ if (Underlying != TrampMem &&
+ (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
+ return 0;
+ if (!isa<AllocaInst>(Underlying))
+ return 0;
+
+ IntrinsicInst *InitTrampoline = 0;
+ for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
+ I != E; I++) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
+ if (!II)
+ return 0;
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
+ if (InitTrampoline)
+ // More than one init_trampoline writes to this value. Give up.
+ return 0;
+ InitTrampoline = II;
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
+ // Allow any number of calls to adjust.trampoline.
+ continue;
+ return 0;
+ }
+
+ // No call to init.trampoline found.
+ if (!InitTrampoline)
+ return 0;
+
+ // Check that the alloca is being used in the expected way.
+ if (InitTrampoline->getOperand(0) != TrampMem)
+ return 0;
+
+ return InitTrampoline;
+}
+
+static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
+ Value *TrampMem) {
+ // Visit all the previous instructions in the basic block, and try to find a
+ // init.trampoline which has a direct path to the adjust.trampoline.
+ for (BasicBlock::iterator I = AdjustTramp,
+ E = AdjustTramp->getParent()->begin(); I != E; ) {
+ Instruction *Inst = --I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
+ II->getOperand(0) == TrampMem)
+ return II;
+ if (Inst->mayWriteToMemory())
+ return 0;
+ }
+ return 0;
+}
+
+// Given a call to llvm.adjust.trampoline, find and return the corresponding
+// call to llvm.init.trampoline if the call to the trampoline can be optimized
+// to a direct call to a function. Otherwise return NULL.
+//
+static IntrinsicInst *FindInitTrampoline(Value *Callee) {
+ Callee = Callee->stripPointerCasts();
+ IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
+ if (!AdjustTramp ||
+ AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
+ return 0;
+
+ Value *TrampMem = AdjustTramp->getOperand(0);
+
+ if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
+ return IT;
+ if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
+ return IT;
+ return 0;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+ if (isAllocLikeFn(CS.getInstruction(), TLI))
+ return visitAllocSite(*CS.getInstruction());
+
+ bool Changed = false;
+
+ // If the callee is a pointer to a function, attempt to move any casts to the
+ // arguments of the call/invoke.
+ Value *Callee = CS.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ return 0;
+
+ if (Function *CalleeF = dyn_cast<Function>(Callee))
+ // If the call and callee calling conventions don't match, this call must
+ // be unreachable, as the call is undefined.
+ if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ // Only do this for calls to a function with a body. A prototype may
+ // not actually end up matching the implementation's calling conv for a
+ // variety of reasons (e.g. it may be written in assembly).
+ !CalleeF->isDeclaration()) {
+ Instruction *OldCall = CS.getInstruction();
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ OldCall);
+ // If OldCall does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!OldCall->getType()->isVoidTy())
+ ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
+ if (isa<CallInst>(OldCall))
+ return EraseInstFromFunction(*OldCall);
+
+ // We cannot remove an invoke, because it would change the CFG, just
+ // change the callee to a null pointer.
+ cast<InvokeInst>(OldCall)->setCalledFunction(
+ Constant::getNullValue(CalleeF->getType()));
+ return 0;
+ }
+
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ // If CS does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!CS.getInstruction()->getType()->isVoidTy())
+ ReplaceInstUsesWith(*CS.getInstruction(),
+ UndefValue::get(CS.getInstruction()->getType()));
+
+ if (isa<InvokeInst>(CS.getInstruction())) {
+ // Can't remove an invoke because we cannot change the CFG.
+ return 0;
+ }
+
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ CS.getInstruction());
+
+ return EraseInstFromFunction(*CS.getInstruction());
+ }
+
+ if (IntrinsicInst *II = FindInitTrampoline(Callee))
+ return transformCallThroughTrampoline(CS, II);
+
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams();
+ // See if we can optimize any arguments passed through the varargs area of
+ // the call.
+ for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
+ }
+ }
+ }
+
+ if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ // Inline asm calls cannot throw - mark them 'nounwind'.
+ CS.setDoesNotThrow();
+ Changed = true;
+ }
+
+ // Try to optimize the call if possible, we require DataLayout for most of
+ // this. None of these calls are seen as possibly dead so go ahead and
+ // delete the instruction now.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ Instruction *I = tryOptimizeCall(CI, TD);
+ // If we changed something return the result, etc. Otherwise let
+ // the fallthrough check.
+ if (I) return EraseInstFromFunction(*I);
+ }
+
+ return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+ Function *Callee =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee == 0)
+ return false;
+ Instruction *Caller = CS.getInstruction();
+ const AttributeSet &CallerPAL = CS.getAttributes();
+
+ // Okay, this is a cast from a function to a different type. Unless doing so
+ // would cause a type conversion of one of our arguments, change this call to
+ // be a direct call with arguments casted to the appropriate types.
+ //
+ FunctionType *FT = Callee->getFunctionType();
+ Type *OldRetTy = Caller->getType();
+ Type *NewRetTy = FT->getReturnType();
+
+ if (NewRetTy->isStructTy())
+ return false; // TODO: Handle multiple return values.
+
+ // Check to see if we are changing the return type...
+ if (OldRetTy != NewRetTy) {
+ if (Callee->isDeclaration() &&
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((OldRetTy->isPointerTy() || !TD ||
+ OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+ (NewRetTy->isPointerTy() || !TD ||
+ NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+ return false; // Cannot transform this return value.
+
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+ return false; // Cannot transform this return value.
+
+ if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+ if (RAttrs.
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex))
+ return false; // Attribute not compatible with transformed value.
+ }
+
+ // If the callsite is an invoke instruction, and the return value is used by
+ // a PHI node in a successor, we cannot change the return type of the call
+ // because there is no place to put the cast instruction (without breaking
+ // the critical edge). Bail out in this case.
+ if (!Caller->use_empty())
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI)
+ if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ if (PN->getParent() == II->getNormalDest() ||
+ PN->getParent() == II->getUnwindDest())
+ return false;
+ }
+
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+ Type *ParamTy = FT->getParamType(i);
+ Type *ActTy = (*AI)->getType();
+
+ if (!CastInst::isCastable(ActTy, ParamTy))
+ return false; // Cannot transform this parameter value.
+
+ if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(ParamTy, i + 1), i + 1))
+ return false; // Attribute not compatible with transformed value.
+
+ // If the parameter is passed as a byval argument, then we have to have a
+ // sized type and the sized type has to have the same size as the old type.
+ if (ParamTy != ActTy &&
+ CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
+ Attribute::ByVal)) {
+ PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+ return false;
+
+ Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ if (TD->getTypeAllocSize(CurElTy) !=
+ TD->getTypeAllocSize(ParamPTy->getElementType()))
+ return false;
+ }
+
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
+ bool isConvertible = ActTy == ParamTy ||
+ (TD && ((ParamTy->isPointerTy() ||
+ ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+ (ActTy->isPointerTy() ||
+ ActTy == TD->getIntPtrType(Caller->getContext()))));
+ if (Callee->isDeclaration() && !isConvertible) return false;
+ }
+
+ if (Callee->isDeclaration()) {
+ // Do not delete arguments unless we have a function body.
+ if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+ return false;
+
+ // If the callee is just a declaration, don't change the varargsness of the
+ // call. We don't want to introduce a varargs call where one doesn't
+ // already exist.
+ PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+ return false;
+
+ // If both the callee and the cast type are varargs, we still have to make
+ // sure the number of fixed parameters are the same or we have the same
+ // ABI issues as if we introduce a varargs call.
+ if (FT->isVarArg() &&
+ cast<FunctionType>(APTy->getElementType())->isVarArg() &&
+ FT->getNumParams() !=
+ cast<FunctionType>(APTy->getElementType())->getNumParams())
+ return false;
+ }
+
+ if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+ !CallerPAL.isEmpty())
+ // In this case we have more arguments than the new function type, but we
+ // won't be dropping them. Check that these extra arguments have attributes
+ // that are compatible with being a vararg call argument.
+ for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+ unsigned Index = CallerPAL.getSlotIndex(i - 1);
+ if (Index <= FT->getNumParams())
+ break;
+
+ // Check if it has an attribute that's incompatible with varargs.
+ AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+ if (PAttrs.hasAttribute(Index, Attribute::StructRet))
+ return false;
+ }
+
+
+ // Okay, we decided that this is a safe thing to do: go ahead and start
+ // inserting cast instructions as necessary.
+ std::vector<Value*> Args;
+ Args.reserve(NumActualArgs);
+ SmallVector<AttributeSet, 8> attrVec;
+ attrVec.reserve(NumCommonArgs);
+
+ // Get any return attributes.
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+
+ // If the return value is not being used, the type may not be compatible
+ // with the existing attributes. Wipe out any problematic attributes.
+ RAttrs.
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex);
+
+ // Add the new return attributes.
+ if (RAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(Caller->getContext(),
+ AttributeSet::ReturnIndex, RAttrs));
+
+ AI = CS.arg_begin();
+ for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+ Type *ParamTy = FT->getParamType(i);
+ if ((*AI)->getType() == ParamTy) {
+ Args.push_back(*AI);
+ } else {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+ false, ParamTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
+ }
+
+ // Add any parameter attributes.
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
+ if (PAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
+ PAttrs));
+ }
+
+ // If the function takes more arguments than the call was taking, add them
+ // now.
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+ // If we are removing arguments to the function, emit an obnoxious warning.
+ if (FT->getNumParams() < NumActualArgs) {
+ // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
+ if (FT->isVarArg()) {
+ // Add all of the arguments in their promoted form to the arg list.
+ for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+ Type *PTy = getPromotedType((*AI)->getType());
+ if (PTy != (*AI)->getType()) {
+ // Must promote to pass through va_arg area!
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(*AI, false, PTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
+ } else {
+ Args.push_back(*AI);
+ }
+
+ // Add any parameter attributes.
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
+ if (PAttrs.hasAttributes())
+ attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
+ PAttrs));
+ }
+ }
+ }
+
+ AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+ if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
+ attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
+
+ if (NewRetTy->isVoidTy())
+ Caller->setName(""); // Void type should not have a name.
+
+ const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
+ attrVec);
+
+ Instruction *NC;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args);
+ NC->takeName(II);
+ cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ } else {
+ CallInst *CI = cast<CallInst>(Caller);
+ NC = Builder->CreateCall(Callee, Args);
+ NC->takeName(CI);
+ if (CI->isTailCall())
+ cast<CallInst>(NC)->setTailCall();
+ cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+ cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ }
+
+ // Insert a cast of the return type as necessary.
+ Value *NV = NC;
+ if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+ if (!NV->getType()->isVoidTy()) {
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(NC, false, OldRetTy, false);
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy);
+ NC->setDebugLoc(Caller->getDebugLoc());
+
+ // If this is an invoke instruction, we should insert it after the first
+ // non-phi, instruction in the normal successor block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
+ InsertNewInstBefore(NC, *I);
+ } else {
+ // Otherwise, it's a call, just insert cast right after the call.
+ InsertNewInstBefore(NC, *Caller);
+ }
+ Worklist.AddUsersToWorkList(*Caller);
+ } else {
+ NV = UndefValue::get(Caller->getType());
+ }
+ }
+
+ if (!Caller->use_empty())
+ ReplaceInstUsesWith(*Caller, NV);
+
+ EraseInstFromFunction(*Caller);
+ return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by
+// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
+// underlying function.
+//
+Instruction *
+InstCombiner::transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp) {
+ Value *Callee = CS.getCalledValue();
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const AttributeSet &Attrs = CS.getAttributes();
+
+ // If the call already has the 'nest' attribute somewhere then give up -
+ // otherwise 'nest' would occur twice after splicing in the chain.
+ if (Attrs.hasAttrSomewhere(Attribute::Nest))
+ return 0;
+
+ assert(Tramp &&
+ "transformCallThroughTrampoline called with incorrect CallSite.");
+
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+ FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+ const AttributeSet &NestAttrs = NestF->getAttributes();
+ if (!NestAttrs.isEmpty()) {
+ unsigned NestIdx = 1;
+ Type *NestTy = 0;
+ AttributeSet NestAttr;
+
+ // Look for a parameter marked with the 'nest' attribute.
+ for (FunctionType::param_iterator I = NestFTy->param_begin(),
+ E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+ if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
+ // Record the parameter type and any other attributes.
+ NestTy = *I;
+ NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ break;
+ }
+
+ if (NestTy) {
+ Instruction *Caller = CS.getInstruction();
+ std::vector<Value*> NewArgs;
+ NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+ SmallVector<AttributeSet, 8> NewAttrs;
+ NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+ // Insert the nest argument into the call argument list, which may
+ // mean appending it. Likewise for attributes.
+
+ // Add any result attributes.
+ if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Attrs.getRetAttributes()));
+
+ {
+ unsigned Idx = 1;
+ CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ do {
+ if (Idx == NestIdx) {
+ // Add the chain argument and attributes.
+ Value *NestVal = Tramp->getArgOperand(2);
+ if (NestVal->getType() != NestTy)
+ NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+ NewArgs.push_back(NestVal);
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ NestAttr));
+ }
+
+ if (I == E)
+ break;
+
+ // Add the original argument and attributes.
+ NewArgs.push_back(*I);
+ AttributeSet Attr = Attrs.getParamAttributes(Idx);
+ if (Attr.hasAttributes(Idx)) {
+ AttrBuilder B(Attr, Idx);
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Idx + (Idx >= NestIdx), B));
+ }
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Add any function attributes.
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
+ Attrs.getFnAttributes()));
+
+ // The trampoline may have been bitcast to a bogus type (FTy).
+ // Handle this by synthesizing a new function type, equal to FTy
+ // with the chain parameter inserted.
+
+ std::vector<Type*> NewTypes;
+ NewTypes.reserve(FTy->getNumParams()+1);
+
+ // Insert the chain's type into the list of parameter types, which may
+ // mean appending it.
+ {
+ unsigned Idx = 1;
+ FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end();
+
+ do {
+ if (Idx == NestIdx)
+ // Add the chain's type.
+ NewTypes.push_back(NestTy);
+
+ if (I == E)
+ break;
+
+ // Add the original type.
+ NewTypes.push_back(*I);
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Replace the trampoline call with a direct call. Let the generic
+ // code sort out any function type mismatches.
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
+ FTy->isVarArg());
+ Constant *NewCallee =
+ NestF->getType() == PointerType::getUnqual(NewFTy) ?
+ NestF : ConstantExpr::getBitCast(NestF,
+ PointerType::getUnqual(NewFTy));
+ const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+
+ Instruction *NewCaller;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NewCaller = InvokeInst::Create(NewCallee,
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs);
+ cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else {
+ NewCaller = CallInst::Create(NewCallee, NewArgs);
+ if (cast<CallInst>(Caller)->isTailCall())
+ cast<CallInst>(NewCaller)->setTailCall();
+ cast<CallInst>(NewCaller)->
+ setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+ }
+
+ return NewCaller;
+ }
+ }
+
+ // Replace the trampoline call with a direct call. Since there is no 'nest'
+ // parameter, there is no need to adjust the argument list. Let the generic
+ // code sort out any function type mismatches.
+ Constant *NewCallee =
+ NestF->getType() == PTy ? NestF :
+ ConstantExpr::getBitCast(NestF, PTy);
+ CS.setCalledFunction(NewCallee);
+ return CS.getInstruction();
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 000000000000..2ee1278d23dc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1802 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression. If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+ uint64_t &Offset) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ Offset = CI->getZExtValue();
+ Scale = 0;
+ return ConstantInt::get(Val->getType(), 0);
+ }
+
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+ // Cannot look past anything that might overflow.
+ OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+ if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
+ Scale = 1;
+ Offset = 0;
+ return Val;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (I->getOpcode() == Instruction::Shl) {
+ // This is a value scaled by '1 << the shift amt'.
+ Scale = UINT64_C(1) << RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Mul) {
+ // This value is scaled by 'RHS'.
+ Scale = RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Add) {
+ // We have X+C. Check to see if we really have (X*C2)+C1,
+ // where C1 is divisible by C2.
+ unsigned SubScale;
+ Value *SubVal =
+ DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ Offset += RHS->getZExtValue();
+ Scale = SubScale;
+ return SubVal;
+ }
+ }
+ }
+
+ // Otherwise, we can't look past this.
+ Scale = 1;
+ Offset = 0;
+ return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocaInst &AI) {
+ // This requires DataLayout to get the alloca alignment and size information.
+ if (!TD) return 0;
+
+ PointerType *PTy = cast<PointerType>(CI.getType());
+
+ BuilderTy AllocaBuilder(*Builder);
+ AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+ // Get the type really allocated and the type casted to.
+ Type *AllocElTy = AI.getAllocatedType();
+ Type *CastElTy = PTy->getElementType();
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+ unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return 0;
+
+ // If the allocation has multiple uses, only promote it if we are strictly
+ // increasing the alignment of the resultant allocation. If we keep it the
+ // same, we open the door to infinite loops of various kinds.
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+
+ uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
+ // See if we can satisfy the modulus by pulling a scale out of the array
+ // size argument.
+ unsigned ArraySizeScale;
+ uint64_t ArrayOffset;
+ Value *NumElements = // See if the array size is a decomposable linear expr.
+ DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
+ // do the xform.
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+ Value *Amt = 0;
+ if (Scale == 1) {
+ Amt = NumElements;
+ } else {
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+ // Insert before the alloca, not before the cast.
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements);
+ }
+
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+ Offset, true);
+ Amt = AllocaBuilder.CreateAdd(Amt, Off);
+ }
+
+ AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+ New->setAlignment(AI.getAlignment());
+ New->takeName(&AI);
+
+ // If the allocation has multiple real uses, insert a cast and change all
+ // things that used it to use the new cast. This will also hack on CI, but it
+ // will die soon.
+ if (!AI.hasOneUse()) {
+ // New is the allocation instruction, pointer typed. AI is the original
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+ Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+ ReplaceInstUsesWith(AI, NewCast);
+ }
+ return ReplaceInstUsesWith(CI, New);
+}
+
+/// EvaluateInDifferentType - Given an expression that
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
+ bool isSigned) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ C = ConstantFoldConstantExpression(CE, TD, TLI);
+ return C;
+ }
+
+ // Otherwise, it must be an instruction.
+ Instruction *I = cast<Instruction>(V);
+ Instruction *Res = 0;
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+ Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the source type of the cast is the type we're trying for then we can
+ // just return the source. There's no need to insert it because it is not
+ // new.
+ if (I->getOperand(0)->getType() == Ty)
+ return I->getOperand(0);
+
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ // This also handles the case of zext(trunc(x)) -> zext(x).
+ Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+ Opc == Instruction::SExt);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ llvm_unreachable("Unreachable!");
+ }
+
+ Res->takeName(I);
+ return InsertNewInstWith(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps
+isEliminableCastPair(
+ const CastInst *CI, ///< The first cast instruction
+ unsigned opcode, ///< The opcode of the second cast instruction
+ Type *DstTy, ///< The target type for the second cast instruction
+ DataLayout *TD ///< The target data for pointer size
+) {
+
+ Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ Type *MidTy = CI->getType(); // B from above
+
+ // Get the opcodes of the two Cast instructions
+ Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+ Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(SrcTy) : 0;
+ Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(MidTy) : 0;
+ Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(DstTy) : 0;
+ unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+ DstTy, SrcIntPtrTy, MidIntPtrTy,
+ DstIntPtrTy);
+
+ // We don't want to form an inttoptr or ptrtoint that converts to an integer
+ // type that differs from the pointer size.
+ if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
+ (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
+ Res = 0;
+
+ return Instruction::CastOps(Res);
+}
+
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
+ if (V->getType() == Ty || isa<Constant>(V)) return false;
+
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
+ if (const CastInst *CI = dyn_cast<CastInst>(V))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
+ return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
+ return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+ // eliminate it now.
+ if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
+ if (Instruction::CastOps opc =
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ // The first cast (CSrc) is eliminable so we need to fix up or replace
+ // the second cast (CI). CSrc will then have a good chance of being dead.
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+ }
+ }
+
+ // If we are casting a select then fold the cast into the select
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+ if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+ return NV;
+
+ // If we are casting a PHI then fold the cast into the PHI
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
+ ShouldChangeType(CI.getType(), Src->getType()))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
+
+ return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value. This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V. We should return true if trunc(V)
+/// can be computed by computing V in the smaller type. If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, Type *Ty) {
+ // We can always evaluate constants in another type.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ Type *OrigTy = V->getType();
+
+ // If this is an extension from the dest type, we can eliminate it, even if it
+ // has multiple uses.
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // These operators can all arbitrarily be extended or truncated.
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ // UDiv and URem can be truncated if all the truncated bits are zero.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigBitWidth) {
+ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+ if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+ MaskedValueIsZero(I->getOperand(1), Mask)) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+ }
+ }
+ break;
+ }
+ case Instruction::Shl:
+ // If we are truncating the result of this SHL, and if it's a shift of a
+ // constant amount, we can always perform a SHL in a smaller type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (CI->getLimitedValue(BitWidth) < BitWidth)
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ break;
+ case Instruction::LShr:
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ }
+ break;
+ case Instruction::Trunc:
+ // trunc(trunc(x)) -> trunc(x)
+ return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+ CanEvaluateTruncated(SI->getFalseValue(), Ty);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+
+ // Attempt to truncate the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateTruncated(Src, DestTy)) {
+
+ // If this cast is a truncate, evaluting in a different type always
+ // eliminates the cast, so it is always a win.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid cast: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+ if (DestTy->getScalarSizeInBits() == 1) {
+ Constant *One = ConstantInt::get(Src->getType(), 1);
+ Src = Builder->CreateAnd(Src, One);
+ Value *Zero = Constant::getNullValue(Src->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ }
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
+
+ // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+ // type isn't non-native.
+ if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+ ShouldChangeType(Src->getType(), CI.getType()) &&
+ match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+ Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ return BinaryOperator::CreateAnd(NewTrunc,
+ ConstantExpr::getTrunc(Cst, CI.getType()));
+ }
+
+ return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ const APInt &Op1CV = Op1C->getValue();
+
+ // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
+ // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
+ if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ if (!DoXform) return ICI;
+
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getScalarSizeInBits()-1);
+ In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, In->getName()+".not");
+ }
+
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+ // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ // zext (X == 1) to i32 --> X iff X has only the low bit set.
+ // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 0) to i32 --> X iff X has only the low bit set.
+ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ // This only works for EQ and NE
+ ICI->isEquality()) {
+ // If Op1C some other power of two, convert:
+ uint32_t BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+ if (!DoXform) return ICI;
+
+ bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+ isNE);
+ Res = ConstantExpr::getZExt(Res, CI.getType());
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ Value *In = ICI->getOperand(0);
+ if (ShiftAmt) {
+ // Perform a logical shr by shiftamt.
+ // Insert the shift to put the result in the low bit.
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+ In->getName()+".lobit");
+ }
+
+ if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One);
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ }
+ }
+ }
+
+ // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+ // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+ // may lead to additional simplifications.
+ if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+ uint32_t BitWidth = ITy->getBitWidth();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+ APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+ ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
+ ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
+
+ if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+ APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ APInt UnknownBit = ~KnownBits;
+ if (UnknownBit.countPopulation() == 1) {
+ if (!DoXform) return ICI;
+
+ Value *Result = Builder->CreateXor(LHS, RHS);
+
+ // Mask off any bits that are set and won't be shifted away.
+ if (KnownOneLHS.uge(UnknownBit))
+ Result = Builder->CreateAnd(Result,
+ ConstantInt::get(ITy, UnknownBit));
+
+ // Shift the bit we're testing down to the lsb.
+ Result = Builder->CreateLShr(
+ Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result->takeName(ICI);
+ return ReplaceInstUsesWith(CI, Result);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits. If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out. For example, to promote something like:
+///
+/// %B = trunc i64 %A to i32
+/// %C = lshr i32 %B, 8
+/// %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63. Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
+ BitsToClear = 0;
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If the input is a truncate from the destination type, we can trivially
+ // eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode(), Tmp;
+ switch (Opc) {
+ case Instruction::ZExt: // zext(zext(x)) -> zext(x).
+ case Instruction::SExt: // zext(sext(x)) -> sext(x).
+ case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+ !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ return false;
+ // These can all be promoted if neither operand has 'bits to clear'.
+ if (BitsToClear == 0 && Tmp == 0)
+ return true;
+
+ // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+ // other side, BitsToClear is ok.
+ if (Tmp == 0 &&
+ (Opc == Instruction::And || Opc == Instruction::Or ||
+ Opc == Instruction::Xor)) {
+ // We use MaskedValueIsZero here for generality, but the case we care
+ // about the most is constant RHS.
+ unsigned VSize = V->getType()->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear)))
+ return true;
+ }
+
+ // Otherwise, we don't know how to analyze this BitsToClear case yet.
+ return false;
+
+ case Instruction::LShr:
+ // We can promote lshr(x, cst) if we can promote x. This requires the
+ // ultimate 'and' to clear out the high zero bits we're clearing out though.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ BitsToClear += Amt->getZExtValue();
+ if (BitsToClear > V->getType()->getScalarSizeInBits())
+ BitsToClear = V->getType()->getScalarSizeInBits();
+ return true;
+ }
+ // Cannot promote variable LSHR.
+ return false;
+ case Instruction::Select:
+ if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+ !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ // TODO: If important, we could handle the case when the BitsToClear are
+ // known zero in the disagreeing side.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ return false;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ // TODO: If important, we could handle the case when the BitsToClear
+ // are known zero in the disagreeing input.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ return false;
+ }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+ // If this zero extend is only used by a truncate, let the truncate be
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ // If one of the common conversion will work, do it.
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ unsigned BitsToClear;
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+ "Unreasonable BitsToClear");
+
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid zero extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with zeros, just replace this
+ // cast with the result.
+ if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept)))
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit an AND to clear the high bits.
+ Constant *C = ConstantInt::get(Res->getType(),
+ APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+ return BinaryOperator::CreateAnd(Res, C);
+ }
+
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // TODO: Subsume this into EvaluateInDifferentType.
+
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+ Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ return new ZExtInst(And, CI.getType());
+ }
+
+ if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
+ }
+ if (SrcSize > DstSize) {
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType());
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc,
+ ConstantInt::get(Trunc->getType(),
+ AndValue));
+ }
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(ICI, CI);
+
+ BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+ if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+ // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+ // of the (zext icmp) will be transformed.
+ ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+ ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ (transformZExtICmp(LHS, CI, false) ||
+ transformZExtICmp(RHS, CI, false))) {
+ Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+ Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ }
+ }
+
+ // zext(trunc(t) & C) -> (t & zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType())
+ return
+ BinaryOperator::CreateAnd(TI0,
+ ConstantExpr::getZExt(C, CI.getType()));
+ }
+
+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+ And->getOperand(1) == C)
+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ Value *NewAnd = Builder->CreateAnd(TI0, ZC);
+ return BinaryOperator::CreateXor(NewAnd, ZC);
+ }
+ }
+
+ // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
+ Value *X;
+ if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) &&
+ (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ Value *New = Builder->CreateZExt(X, CI.getType());
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+ }
+
+ return 0;
+}
+
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+ Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
+ // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+ (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+ Value *Sh = ConstantInt::get(Op0->getType(),
+ Op0->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+ // If we know that only one bit of the LHS of the icmp can be set and we
+ // have an equality comparison with zero or a power of 2, we can transform
+ // the icmp and sext into bitwise/integer operations.
+ if (ICI->hasOneUse() &&
+ ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+ unsigned BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Op0, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) {
+ Value *In = ICI->getOperand(0);
+
+ // If the icmp tests for a known zero bit we can constant fold it.
+ if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+ Value *V = Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getAllOnesValue(CI.getType()) :
+ ConstantInt::getNullValue(CI.getType());
+ return ReplaceInstUsesWith(CI, V);
+ }
+
+ if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+ // sext ((x & 2^n) == 0) -> (x >> n) - 1
+ // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+ unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ // Perform a right shift to place the desired bit in the LSB.
+ if (ShiftAmt)
+ In = Builder->CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // At this point "In" is either 1 or 0. Subtract 1 to turn
+ // {1, 0} -> {0, -1}.
+ In = Builder->CreateAdd(In,
+ ConstantInt::getAllOnesValue(In->getType()),
+ "sext");
+ } else {
+ // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
+ // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+ unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ // Perform a left shift to place the desired bit in the MSB.
+ if (ShiftAmt)
+ In = Builder->CreateShl(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // Distribute the bit over the whole bit width.
+ In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+ BitWidth - 1), "sext");
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+ }
+ }
+ }
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+ Op0->getType() == CI.getType()) {
+ Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a constant vector.
+ Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+
+ return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits. This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, Type *Ty) {
+ assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+ "Can't sign extend type to a smaller type");
+ // If this is a constant, it can be trivially promoted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is a truncate from the dest type, we can trivially eliminate it.
+ if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::SExt: // sext(sext(x)) -> sext(x)
+ case Instruction::ZExt: // sext(zext(x)) -> zext(x)
+ case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // These operators can all arbitrarily be extended if their inputs can.
+ return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+ CanEvaluateSExtd(I->getOperand(1), Ty);
+
+ //case Instruction::Shl: TODO
+ //case Instruction::LShr: TODO
+
+ case Instruction::Select:
+ return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+ CanEvaluateSExtd(I->getOperand(2), Ty);
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+ // If this sign extend is only used by a truncate, let the truncate be
+ // eliminated before we try to optimize this sext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateSExtd(Src, DestTy)) {
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid sign extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with sign bit, just replace this
+ // cast with the result.
+ if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+ ShAmt);
+ }
+
+ // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // into shifts.
+ if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+ if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+ return BinaryOperator::CreateAShr(Res, ShAmt);
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformSExtICmp(ICI, CI);
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && A->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ A = Builder->CreateShl(A, ShAmtV, CI.getName());
+ return BinaryOperator::CreateAShr(A, ShAmtV);
+ }
+
+ return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+ bool losesInfo;
+ APFloat F = CFP->getValueAPF();
+ (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(CFP->getContext(), F);
+ return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::FPExt)
+ return LookThroughFPExtensions(I->getOperand(0));
+
+ // If this value is a constant, return the constant in the smallest FP type
+ // that can accurately represent it. This allows us to turn
+ // (float)((double)X+2.0) into x+2.0f.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+ return V; // No constant folding of this.
+ // See if the value can be truncated to half and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+ return V;
+ // See if the value can be truncated to float and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ return V;
+ if (CFP->getType()->isDoubleTy())
+ return V; // Won't shrink.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ return V;
+ // Don't try to shrink to various long double types.
+ }
+
+ return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+ // smaller than the destination type, we can eliminate the truncate by doing
+ // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
+ // as many builtins (sqrt, etc).
+ BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+ if (OpI && OpI->hasOneUse()) {
+ switch (OpI->getOpcode()) {
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ Type *SrcTy = OpI->getType();
+ Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+ if (LHSTrunc->getType() != SrcTy &&
+ RHSTrunc->getType() != SrcTy) {
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If the source types were both smaller than the destination type of
+ // the cast, do this xform.
+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+ LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+ RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+ return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ }
+ }
+ break;
+ }
+
+ // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+ if (BinaryOperator::isFNeg(OpI)) {
+ Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
+ CI.getType());
+ return BinaryOperator::CreateFNeg(InnerTrunc);
+ }
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::fabs: {
+ // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+ Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+ CI.getType());
+ Type *IntrinsicType[] = { CI.getType() };
+ Function *Overload =
+ Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
+ II->getIntrinsicID(), IntrinsicType);
+
+ Value *Args[] = { InnerTrunc };
+ return CallInst::Create(Overload, Args, II->getName());
+ }
+ }
+ }
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+ Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+ Call->getNumArgOperands() == 1 &&
+ Call->hasOneUse()) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+ // If the source integer type is not the intptr_t type for this target, do a
+ // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
+ // cast to be exposed to other transforms.
+ if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits()) {
+ Type *Ty = TD->getIntPtrType(CI.getContext());
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+ // If casting the result of a getelementptr instruction with no offset, turn
+ // this into a cast of the original pointer!
+ if (GEP->hasAllZeroIndices()) {
+ // Changing the cast operand is usually not a good idea but it is safe
+ // here because the pointer operand is being replaced with another
+ // pointer operand so the opcode doesn't need to change.
+ Worklist.Add(GEP);
+ CI.setOperand(0, GEP->getOperand(0));
+ return &CI;
+ }
+
+ // If the GEP has a single use, and the base pointer is a bitcast, and the
+ // GEP computes a constant offset, see if we can convert these three
+ // instructions into fewer. This typically happens with unions and other
+ // non-type-safe code.
+ APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
+ if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ GEP->accumulateConstantOffset(*TD, Offset)) {
+ // Get the base pointer input of the bitcast, and the type it points to.
+ Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+ Type *GEPIdxTy =
+ cast<PointerType>(OrigBase->getType())->getElementType();
+ SmallVector<Value*, 8> NewIndices;
+ if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+ // If we were able to index down into an element, create the GEP
+ // and bitcast the result. This eliminates one bitcast, potentially
+ // two.
+ Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
+ NGEP->takeName(GEP);
+
+ if (isa<BitCastInst>(CI))
+ return new BitCastInst(NGEP, CI.getType());
+ assert(isa<PtrToIntInst>(CI));
+ return new PtrToIntInst(NGEP, CI.getType());
+ }
+ }
+ }
+
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+ // If the destination integer type is not the intptr_t type for this target,
+ // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
+ // to be exposed to other transforms.
+ if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
+ Type *Ty = TD->getIntPtrType(CI.getContext());
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
+ return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
+ }
+
+ return commonPointerCastTransforms(CI);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<uint32_t, 16> ShuffleMask;
+ Value *V2;
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ // The excess elements reference the first element of the zero input.
+ for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+ ShuffleMask.push_back(SrcElts);
+ }
+
+ return new ShuffleVectorInst(InVal, V2,
+ ConstantDataVector::get(V2->getContext(),
+ ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
+ Value *Src = CI.getOperand(0);
+ Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = 0;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+ // If the operands are integer typed then apply the integer transforms,
+ // otherwise just apply the common ones.
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType();
+ Type *DestTy = CI.getType();
+
+ // Get rid of casts from one type to the same type. These are useless and can
+ // be replaced by the operand.
+ if (DestTy == Src->getType())
+ return ReplaceInstUsesWith(CI, Src);
+
+ if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ Type *DstElTy = DstPTy->getElementType();
+ Type *SrcElTy = SrcPTy->getElementType();
+
+ // If the address spaces don't match, don't eliminate the bitcast, which is
+ // required for changing types.
+ if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+ return 0;
+
+ // If we are casting a alloca to a pointer to a type of the same
+ // size, rewrite the allocation instruction to allocate the "right" type.
+ // There is no need to modify malloc calls because it is their bitcast that
+ // needs to be cleaned up.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+ if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+ return V;
+
+ // If the source and destination are pointers, and this cast is equivalent
+ // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ Constant *ZeroUInt =
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+ unsigned NumZeros = 0;
+ while (SrcElTy != DstElTy &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+ SrcElTy->getNumContainedTypes() /* not "{}" */) {
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ ++NumZeros;
+ }
+
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcElTy == DstElTy) {
+ SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ return GetElementPtrInst::CreateInBounds(Src, Idxs);
+ }
+ }
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
+
+ if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+ Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+ return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+ }
+
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
+ }
+ }
+
+ if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (SrcVTy->getNumElements() == 1) {
+ // If our destination is not a vector, then make this a straight
+ // scalar-scalar cast.
+ if (!DestTy->isVectorTy()) {
+ Value *Elem =
+ Builder->CreateExtractElement(Src,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ }
+
+ // Otherwise, see if our source is an insert. If so, then use the scalar
+ // component directly.
+ if (InsertElementInst *IEI =
+ dyn_cast<InsertElementInst>(CI.getOperand(0)))
+ return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
+ DestTy);
+ }
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ // Okay, we have (bitcast (shuffle ..)). Check to see if this is
+ // a bitcast to a vector with the same # elts.
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ SVI->getType()->getNumElements() &&
+ SVI->getType()->getNumElements() ==
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ BitCastInst *Tmp;
+ // If either of the operands is a cast from CI.getType(), then
+ // evaluating the shuffle in the casted destination's type will allow
+ // us to eliminate at least one cast.
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ Tmp->getOperand(0)->getType() == DestTy) ||
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ Tmp->getOperand(0)->getType() == DestTy)) {
+ Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+ Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+ // Return a new shuffle vector. Use the same element ID's, as we
+ // know the vector types match #elts.
+ return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ }
+ }
+ }
+
+ if (SrcTy->isPointerTy())
+ return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 000000000000..a96e754f3dd0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,3167 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+static ConstantInt *getOne(Constant *C) {
+ return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ult(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().sgt(In1->getValue());
+ return Result->getValue().slt(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ugt(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().slt(In1->getValue());
+
+ return Result->getValue().sgt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit. If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+ bool &TrueIfSigned) {
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS->isZero();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
+ TrueIfSigned = true;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == high-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS->isMaxValue(true);
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS->getValue().isSignBit();
+ default:
+ return false;
+ }
+}
+
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+ if (!ICmpInst::isSigned(pred))
+ return false;
+
+ if (RHS->isZero())
+ return ICmpInst::isRelational(pred);
+
+ if (RHS->isOne()) {
+ if (pred == ICmpInst::ICMP_SLT) {
+ pred = ICmpInst::ICMP_SLE;
+ return true;
+ }
+ } else if (RHS->isAllOnesValue()) {
+ if (pred == ICmpInst::ICMP_SGT) {
+ pred = ICmpInst::ICMP_SGE;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+ return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+ const APInt& KnownOne,
+ APInt& Min, APInt& Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+ // bit if it is unknown.
+ Min = KnownOne;
+ Max = KnownOne|UnknownBits;
+
+ if (UnknownBits.isNegative()) { // Sign bit is unknown
+ Min.setBit(Min.getBitWidth()-1);
+ Max.clearBit(Max.getBitWidth()-1);
+ }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when the unknown bits are all zeros.
+ Min = KnownOne;
+ // The maximum value is when the unknown bits are all ones.
+ Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+/// cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer. Try to simplify
+/// this into some simple computation that does not need the load. For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison. This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+ CmpInst &ICI, ConstantInt *AndCst) {
+ // We need TD information to know the pointer size unless this is inbounds.
+ if (!GEP->isInBounds() && TD == 0) return 0;
+
+ Constant *Init = GV->getInitializer();
+ if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ return 0;
+
+ uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+ if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
+
+ // There are many forms of this optimization we can handle, for now, just do
+ // the simple index into a single-dimensional array.
+ //
+ // Require: GEP GV, 0, i {{, constant indices}}
+ if (GEP->getNumOperands() < 3 ||
+ !isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+ isa<Constant>(GEP->getOperand(2)))
+ return 0;
+
+ // Check that indices after the variable are constants and in-range for the
+ // type they index. Collect the indices. This is typically for arrays of
+ // structs.
+ SmallVector<unsigned, 4> LaterIndices;
+
+ Type *EltTy = Init->getType()->getArrayElementType();
+ for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (Idx == 0) return 0; // Variable index.
+
+ uint64_t IdxVal = Idx->getZExtValue();
+ if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+
+ if (StructType *STy = dyn_cast<StructType>(EltTy))
+ EltTy = STy->getElementType(IdxVal);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+ if (IdxVal >= ATy->getNumElements()) return 0;
+ EltTy = ATy->getElementType();
+ } else {
+ return 0; // Unknown type.
+ }
+
+ LaterIndices.push_back(IdxVal);
+ }
+
+ enum { Overdefined = -3, Undefined = -2 };
+
+ // Variables for our state machines.
+
+ // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+ // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+ // and 87 is the second (and last) index. FirstTrueElement is -2 when
+ // undefined, otherwise set to the first true element. SecondTrueElement is
+ // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+ int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+ // form "i != 47 & i != 87". Same state transitions as for true elements.
+ int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+
+ /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+ /// define a state machine that triggers for ranges of values that the index
+ /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
+ /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+ /// index in the range (inclusive). We use -2 for undefined here because we
+ /// use relative comparisons and don't want 0-1 to match -1.
+ int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+
+ // MagicBitvector - This is a magic bitvector where we set a bit if the
+ // comparison is true for element 'i'. If there are 64 elements or less in
+ // the array, this will fully represent all the comparison results.
+ uint64_t MagicBitvector = 0;
+
+
+ // Scan the array and see if one of our patterns matches.
+ Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+ for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
+ Constant *Elt = Init->getAggregateElement(i);
+ if (Elt == 0) return 0;
+
+ // If this is indexing an array of structures, get the structure element.
+ if (!LaterIndices.empty())
+ Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
+
+ // If the element is masked, handle it.
+ if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+
+ // Find out if the comparison would be true or false for the i'th element.
+ Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+ CompareRHS, TD, TLI);
+ // If the result is undef for this element, ignore it.
+ if (isa<UndefValue>(C)) {
+ // Extend range state machines to cover this element in case there is an
+ // undef in the middle of the range.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ continue;
+ }
+
+ // If we can't compute the result for any of the elements, we have to give
+ // up evaluating the entire conditional.
+ if (!isa<ConstantInt>(C)) return 0;
+
+ // Otherwise, we know if the comparison is true or false for this element,
+ // update our state machines.
+ bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+
+ // State machine for single/double/range index comparison.
+ if (IsTrueForElt) {
+ // Update the TrueElement state machine.
+ if (FirstTrueElement == Undefined)
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
+ else {
+ // Update double-compare state machine.
+ if (SecondTrueElement == Undefined)
+ SecondTrueElement = i;
+ else
+ SecondTrueElement = Overdefined;
+
+ // Update range state machine.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ else
+ TrueRangeEnd = Overdefined;
+ }
+ } else {
+ // Update the FalseElement state machine.
+ if (FirstFalseElement == Undefined)
+ FirstFalseElement = FalseRangeEnd = i; // First false element.
+ else {
+ // Update double-compare state machine.
+ if (SecondFalseElement == Undefined)
+ SecondFalseElement = i;
+ else
+ SecondFalseElement = Overdefined;
+
+ // Update range state machine.
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ else
+ FalseRangeEnd = Overdefined;
+ }
+ }
+
+
+ // If this element is in range, update our magic bitvector.
+ if (i < 64 && IsTrueForElt)
+ MagicBitvector |= 1ULL << i;
+
+ // If all of our states become overdefined, bail out early. Since the
+ // predicate is expensive, only check it every 8 elements. This is only
+ // really useful for really huge arrays.
+ if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+ SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+ FalseRangeEnd == Overdefined)
+ return 0;
+ }
+
+ // Now that we've scanned the entire array, emit our new comparison(s). We
+ // order the state machines in complexity of the generated code.
+ Value *Idx = GEP->getOperand(2);
+
+ // If the index is larger than the pointer size of the target, truncate the
+ // index down like the GEP would do implicitly. We don't have to do this for
+ // an inbounds GEP because the index can't be out of range.
+ if (!GEP->isInBounds() &&
+ Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+ Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+
+ // If the comparison is only true for one or two elements, emit direct
+ // comparisons.
+ if (SecondTrueElement != Overdefined) {
+ // None true -> false.
+ if (FirstTrueElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+
+ Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+
+ // True for one element -> 'i == 47'.
+ if (SecondTrueElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+
+ // True for two elements -> 'i == 47 | i == 72'.
+ Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+ Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+ Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+ return BinaryOperator::CreateOr(C1, C2);
+ }
+
+ // If the comparison is only false for one or two elements, emit direct
+ // comparisons.
+ if (SecondFalseElement != Overdefined) {
+ // None false -> true.
+ if (FirstFalseElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+
+ Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+ // False for one element -> 'i != 47'.
+ if (SecondFalseElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+
+ // False for two elements -> 'i != 47 & i != 72'.
+ Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+ Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+ return BinaryOperator::CreateAnd(C1, C2);
+ }
+
+ // If the comparison can be replaced with a range comparison for the elements
+ // where it is true, emit the range check.
+ if (TrueRangeEnd != Overdefined) {
+ assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+
+ // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+ if (FirstTrueElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ TrueRangeEnd-FirstTrueElement+1);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+ }
+
+ // False range check.
+ if (FalseRangeEnd != Overdefined) {
+ assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+ // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+ if (FirstFalseElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ FalseRangeEnd-FirstFalseElement);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+ }
+
+
+ // If a magic bitvector captures the entire comparison state
+ // of this load, replace it with computation that does:
+ // ((magic_cst >> i) & 1) != 0
+ {
+ Type *Ty = 0;
+
+ // Look for an appropriate type:
+ // - The type of Idx if the magic fits
+ // - The smallest fitting legal type if we have a DataLayout
+ // - Default to i32
+ if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+ Ty = Idx->getType();
+ else if (TD)
+ Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ else if (ArrayElementCount <= 32)
+ Ty = Type::getInt32Ty(Init->getContext());
+
+ if (Ty != 0) {
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
+ }
+
+ return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
+/// be complex, and scales are involved. The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
+ DataLayout &TD = *IC.getDataLayout();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+ }
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+ true /*Signed*/);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else. At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
+ // Don't transform signed compares of GEPs into index compares. Even if the
+ // GEP is inbounds, the final add of the base pointer can have signed overflow
+ // and would change the result of the icmp.
+ // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
+ // the maximum signed value for the pointer type.
+ if (ICmpInst::isSigned(Cond))
+ return 0;
+
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
+
+ Value *PtrBase = GEPLHS->getOperand(0);
+ if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+ // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow since the gep is inbounds. See if we can
+ // output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+ Constant::getNullValue(Offset->getType()));
+ } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+ // If the base pointers are different, but the indices are the same, just
+ // compare the base pointer.
+ if (PtrBase != GEPRHS->getOperand(0)) {
+ bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+ IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+ GEPRHS->getOperand(0)->getType();
+ if (IndicesTheSame)
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ IndicesTheSame = false;
+ break;
+ }
+
+ // If all indices are the same, just compare the base pointers.
+ if (IndicesTheSame)
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+ GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+ // If we're comparing GEPs with two base pointers that only differ in type
+ // and both GEPs have only constant indices or just one use, then fold
+ // the compare with the adjusted indices.
+ if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
+ (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
+ PtrBase->stripPointerCasts() ==
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
+ Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
+ EmitGEPOffset(GEPLHS),
+ EmitGEPOffset(GEPRHS));
+ return ReplaceInstUsesWith(I, Cmp);
+ }
+
+ // Otherwise, the base pointers are different and the indices are
+ // different, bail out.
+ return 0;
+ }
+
+ // If one of the GEPs has all zero indices, recurse.
+ bool AllZeros = true;
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+ !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+ ICmpInst::getSwappedPredicate(Cond), I);
+
+ // If the other GEP has all zero indices, recurse.
+ AllZeros = true;
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+ !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+ bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ // If the GEPs only differ by one index, compare it.
+ unsigned NumDifferences = 0; // Keep track of # differences.
+ unsigned DiffOperand = 0; // The operand that differs.
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+ GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ // Irreconcilable differences.
+ NumDifferences = 2;
+ break;
+ } else {
+ if (NumDifferences++) break;
+ DiffOperand = i;
+ }
+ }
+
+ if (NumDifferences == 0) // SAME GEP?
+ return ReplaceInstUsesWith(I, // No comparison is needed here.
+ ConstantInt::get(Type::getInt1Ty(I.getContext()),
+ ICmpInst::isTrueWhenEqual(Cond)));
+
+ else if (NumDifferences == 1 && GEPsInBounds) {
+ Value *LHSV = GEPLHS->getOperand(DiffOperand);
+ Value *RHSV = GEPRHS->getOperand(DiffOperand);
+ // Make sure we do a signed comparison here.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+ }
+ }
+
+ // Only lower this if the icmp is the only user of the GEP or if we expect
+ // the result to fold to a constant!
+ if (TD &&
+ GEPsInBounds &&
+ (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
+ Value *L = EmitGEPOffset(GEPLHS);
+ Value *R = EmitGEPOffset(GEPRHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+ }
+ }
+ return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+ Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred,
+ Value *TheAdd) {
+ // If we have X+0, exit early (simplifying logic below) and let it get folded
+ // elsewhere. icmp X+0, X -> icmp X, X
+ if (CI->isZero()) {
+ bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ // (X+4) == X -> false.
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ // (X+4) != X -> true.
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+ // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+ // so the values can never be equal. Similarly for all other "or equals"
+ // operators.
+
+ // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
+ // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
+ // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ Value *R =
+ ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+ }
+
+ // (X+1) >u X --> X <u (0-1) --> X != 255
+ // (X+2) >u X --> X <u (0-2) --> X <u 254
+ // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+
+ unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+ ConstantInt *SMax = ConstantInt::get(X->getContext(),
+ APInt::getSignedMaxValue(BitWidth));
+
+ // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
+ // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
+ // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
+ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
+ // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
+ // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+
+ // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
+ // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
+ // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+ // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+ // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
+ // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
+
+ assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+ Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+ return 0;
+ if (DivRHS->isZero())
+ return 0; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return 0; // The overflow computation also screws up here
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ /// If the division is known to be exact, then there is no remainder from the
+ /// divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ Constant *LoBound = 0, *HiBound = 0;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: llvm_unreachable("Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (HiOverflow == -1) // High bound less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return 0;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return 0;
+
+ // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+ // by a power of 2. Since we already have logic to simplify these,
+ // transform to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+ return 0;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (TheDiv == 0)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+ }
+ return 0;
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHSI,
+ ConstantInt *RHS) {
+ const APInt &RHSV = RHS->getValue();
+
+ switch (LHSI->getOpcode()) {
+ case Instruction::Trunc:
+ if (ICI.isEquality() && LHSI->hasOneUse()) {
+ // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+ // of the high bits truncated out of x are known.
+ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+ SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+ ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne);
+
+ // If all the high bits are known, we can do this xform.
+ if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+ // Pull in the high bits from known-ones set.
+ APInt NewRHS = RHS->getValue().zext(SrcBits);
+ NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(), NewRHS));
+ }
+ }
+ break;
+
+ case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
+ if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+ // fold the xor.
+ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+ (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+ Value *CompareVal = LHSI->getOperand(0);
+
+ // If the sign bit of the XorCST is not set, there is no change to
+ // the operation, just stop using the Xor.
+ if (!XorCST->isNegative()) {
+ ICI.setOperand(0, CompareVal);
+ Worklist.Add(LHSI);
+ return &ICI;
+ }
+
+ // Was the old condition true if the operand is positive?
+ bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+
+ // If so, the new one isn't.
+ isTrueIfPositive ^= true;
+
+ if (isTrueIfPositive)
+ return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+ SubOne(RHS));
+ else
+ return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+ AddOne(RHS));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
+ const APInt &NotSignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ NotSignBit));
+ }
+ }
+ }
+ break;
+ case Instruction::And: // (icmp pred (and X, AndCST), RHS)
+ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+ LHSI->getOperand(0)->hasOneUse()) {
+ ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+
+ // If the LHS is an AND of a truncating cast, we can widen the
+ // and/compare to be the input width without changing the value
+ // produced, eliminating a cast.
+ if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+ // We can do this transformation if either the AND constant does not
+ // have its sign bit set or if it is an equality comparison.
+ // Extending a relational comparison when we're checking the sign
+ // bit would not work.
+ if (ICI.isEquality() ||
+ (!AndCST->isNegative() && RHSV.isNonNegative())) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
+ }
+ }
+
+ // If the LHS is an AND of a zext, and we have an equality compare, we can
+ // shrink the and/compare to the smaller type, eliminating the cast.
+ if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+ IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+ // Make sure we don't compare the upper bits, SimplifyDemandedBits
+ // should fold the icmp to true/false in that case.
+ if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getTrunc(AndCST, Ty));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getTrunc(RHS, Ty));
+ }
+ }
+
+ // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+ // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
+ // happens a LOT in code produced by the C front-end, for bitfield
+ // access.
+ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+ if (Shift && !Shift->isShift())
+ Shift = 0;
+
+ ConstantInt *ShAmt;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
+ Type *AndTy = AndCST->getType(); // Type of the and.
+
+ // We can fold this as long as we can't shift unknown bits
+ // into the mask. This can only happen with signed shift
+ // rights, as they sign-extend.
+ if (ShAmt) {
+ bool CanFold = Shift->isLogicalShift();
+ if (!CanFold) {
+ // To test for the bad case of the signed shr, see if any
+ // of the bits shifted in could be tested after the mask.
+ uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+ int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+
+ uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ AndCST->getValue()) == 0)
+ CanFold = true;
+ }
+
+ if (CanFold) {
+ Constant *NewCst;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+ else
+ NewCst = ConstantExpr::getShl(RHS, ShAmt);
+
+ // Check to see if we are shifting out any of the bits being
+ // compared.
+ if (ConstantExpr::get(Shift->getOpcode(),
+ NewCst, ShAmt) != RHS) {
+ // If we shifted bits out, the fold is not going to work out.
+ // As a special case, check to see if this means that the
+ // result is always true or false now.
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getFalse(ICI.getContext()));
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getTrue(ICI.getContext()));
+ } else {
+ ICI.setOperand(1, NewCst);
+ Constant *NewAndCST;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+ else
+ NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+ LHSI->setOperand(1, NewAndCST);
+ LHSI->setOperand(0, Shift->getOperand(0));
+ Worklist.Add(Shift); // Shift is dead.
+ return &ICI;
+ }
+ }
+ }
+
+ // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
+ // preferable because it allows the C<<Y expression to be hoisted out
+ // of a loop if Y is invariant and X is not.
+ if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+ ICI.isEquality() && !Shift->isArithmeticShift() &&
+ !isa<Constant>(Shift->getOperand(0))) {
+ // Compute C << Y.
+ Value *NS;
+ if (Shift->getOpcode() == Instruction::LShr) {
+ NS = Builder->CreateShl(AndCST, Shift->getOperand(1));
+ } else {
+ // Insert a logical shift.
+ NS = Builder->CreateLShr(AndCST, Shift->getOperand(1));
+ }
+
+ // Compute X & (C << Y).
+ Value *NewAnd =
+ Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+
+ // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any
+ // bit set in (X & AndCST) will produce a result greater than RHSV.
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
+ unsigned NTZ = AndCST->getValue().countTrailingZeros();
+ if ((NTZ < AndCST->getBitWidth()) &&
+ APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV))
+ return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+
+ // Try to optimize things like "A[i]&42 == 0" to index computations.
+ if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+ ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+ return Res;
+ }
+ }
+ break;
+
+ case Instruction::Or: {
+ if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+ break;
+ Value *P, *Q;
+ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+ // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+ // -> and (icmp eq P, null), (icmp eq Q, null).
+ Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+ Constant::getNullValue(P->getType()));
+ Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+ Constant::getNullValue(Q->getType()));
+ Instruction *Op;
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+ else
+ Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+ return Op;
+ }
+ break;
+ }
+
+ case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
+ ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!Val) break;
+
+ // If this is a signed comparison to 0 and the mul is sign preserving,
+ // use the mul LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) && !Val->isZero() &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(Val->isNegative() ?
+ ICmpInst::getSwappedPredicate(pred) : pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ break;
+ }
+
+ case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt) break;
+
+ uint32_t TypeBits = RHSV.getBitWidth();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ if (ICI.isEquality()) {
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ Constant *Comp =
+ ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+ ShAmt);
+ if (Comp != RHS) {// Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst =
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // If the shift is NUW, then it is just shifting out zeros, no need for an
+ // AND.
+ if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
+ // If the shift is NSW and we compare to 0, then it is just shifting out
+ // sign bits, no need for an AND either.
+ if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ Constant *Mask =
+ ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
+ TypeBits-ShAmtVal));
+
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantExpr::getLShr(RHS, ShAmt));
+ }
+ }
+
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+ bool TrueIfSigned = false;
+ if (LHSI->hasOneUse() &&
+ isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+ // (X << 31) <s 0 --> (X&1) != 0
+ Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+ APInt::getOneBitSet(TypeBits,
+ TypeBits-ShAmt->getZExtValue()-1));
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+ return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+ And, Constant::getNullValue(And->getType()));
+ }
+
+ // Transform (icmp pred iM (shl iM %v, N), CI)
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
+ // This enables to get rid of the shift in favor of a trunc which can be
+ // free on the target. It has the additional benefit of comparing to a
+ // smaller constant, which will be target friendly.
+ unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
+ Constant *NCI = ConstantExpr::getTrunc(
+ ConstantExpr::getAShr(RHS,
+ ConstantInt::get(RHS->getType(), Amt)),
+ NTy);
+ return new ICmpInst(ICI.getPredicate(),
+ Builder->CreateTrunc(LHSI->getOperand(0), NTy),
+ NCI);
+ }
+
+ break;
+ }
+
+ case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
+ case Instruction::AShr: {
+ // Handle equality comparisons of shift-by-constant.
+ BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
+ return Res;
+ }
+
+ // Handle exact shr's.
+ if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+ if (RHSV.isMinValue())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // Fold: icmp pred ([us]div X, C1), C2 -> range test
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
+ // it, otherwise compute the range [low, hi) bounding the new value.
+ // See: InsertRangeTest above for the kinds of replacements possible.
+ if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+ DivRHS))
+ return R;
+ break;
+
+ case Instruction::Add:
+ // Fold: icmp pred (add X, C1), C2
+ if (!ICI.isEquality()) {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+ .subtract(LHSV);
+
+ if (ICI.isSigned()) {
+ if (CR.getLower().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ } else {
+ if (CR.getLower().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ }
+ }
+ break;
+ }
+
+ // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+ if (ICI.isEquality()) {
+ bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+ // the second operand is a constant, simplify a bit.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+ switch (BO->getOpcode()) {
+ case Instruction::SRem:
+ // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+ if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+ if (V.sgt(1) && V.isPowerOf2()) {
+ Value *NewRem =
+ Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+ BO->getName());
+ return new ICmpInst(ICI.getPredicate(), NewRem,
+ Constant::getNullValue(BO->getType()));
+ }
+ }
+ break;
+ case Instruction::Add:
+ // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+ if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getSub(RHS, BOp1C));
+ } else if (RHSV == 0) {
+ // Replace ((add A, B) != 0) with (A != -B) if A or B is
+ // efficiently invertible, or if the add has just this one use.
+ Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+ if (Value *NegVal = dyn_castNegVal(BOp1))
+ return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+ if (Value *NegVal = dyn_castNegVal(BOp0))
+ return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+ if (BO->hasOneUse()) {
+ Value *Neg = Builder->CreateNeg(BOp1);
+ Neg->takeName(BO);
+ return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+ }
+ }
+ break;
+ case Instruction::Xor:
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getXor(RHS, BOC));
+ } else if (RHSV == 0) {
+ // Replace ((xor A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ break;
+ case Instruction::Sub:
+ // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+ if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+ ConstantExpr::getSub(BOp0C, RHS));
+ } else if (RHSV == 0) {
+ // Replace ((sub A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ break;
+ case Instruction::Or:
+ // If bits are being or'd in that are not present in the constant we
+ // are comparing against, then the comparison could never succeed!
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ Constant *NotCI = ConstantExpr::getNot(RHS);
+ if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+ }
+ break;
+
+ case Instruction::And:
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // If bits are being compared against that are and'd out, then the
+ // comparison can never succeed!
+ if ((RHSV & ~BOC->getValue()) != 0)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+
+ // If we have ((X & C) == C), turn it into ((X & C) != 0).
+ if (RHS == BOC && RHSV.isPowerOf2())
+ return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+ ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+
+ // Don't perform the following transforms if the AND has multiple uses
+ if (!BO->hasOneUse())
+ break;
+
+ // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+ if (BOC->getValue().isSignBit()) {
+ Value *X = BO->getOperand(0);
+ Constant *Zero = Constant::getNullValue(X->getType());
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(pred, X, Zero);
+ }
+
+ // ((X & ~7) == 0) --> X < 8
+ if (RHSV == 0 && isHighOnes(BOC)) {
+ Value *X = BO->getOperand(0);
+ Constant *NegX = ConstantExpr::getNeg(BOC);
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(pred, X, NegX);
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (RHSV == 0 && BO->hasNoSignedWrap()) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // The trivial case (mul X, 0) is handled by InstSimplify
+ // General case : (mul X, C) != 0 iff X != 0
+ // (mul X, C) == 0 iff X == 0
+ if (!BOC->isZero())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+ break;
+ default: break;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+ // Handle icmp {eq|ne} <intrinsic>, intcst.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+ return &ICI;
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ // ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
+ if (RHSV == RHS->getType()->getBitWidth()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+ return &ICI;
+ }
+ break;
+ case Intrinsic::ctpop:
+ // popcount(A) == 0 -> A == 0 and likewise for !=
+ if (RHS->isZero()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, RHS);
+ return &ICI;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+ const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+ Value *LHSCIOp = LHSCI->getOperand(0);
+ Type *SrcTy = LHSCIOp->getType();
+ Type *DestTy = LHSCI->getType();
+ Value *RHSCIOp;
+
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+ TD->getPointerSizeInBits() ==
+ cast<IntegerType>(DestTy)->getBitWidth()) {
+ Value *RHSOp = 0;
+ if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+
+ if (RHSOp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+ }
+
+ // The code below only handles extension cast instructions, so far.
+ // Enforce this.
+ if (LHSCI->getOpcode() != Instruction::ZExt &&
+ LHSCI->getOpcode() != Instruction::SExt)
+ return 0;
+
+ bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+ bool isSignedCmp = ICI.isSigned();
+
+ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+ // Not an extension from the same type?
+ RHSCIOp = CI->getOperand(0);
+ if (RHSCIOp->getType() != LHSCIOp->getType())
+ return 0;
+
+ // If the signedness of the two casts doesn't agree (i.e. one is a sext
+ // and the other is a zext), then we can't handle this.
+ if (CI->getOpcode() != LHSCI->getOpcode())
+ return 0;
+
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedCmp && isSignedExt)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ }
+
+ // If we aren't dealing with a constant on the RHS, exit early
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+ if (!CI)
+ return 0;
+
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DestTy.
+ Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+ Res1, DestTy);
+
+ // If the re-extended constant didn't change...
+ if (Res2 == CI) {
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedExt && isSignedCmp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+ }
+
+ // The re-extended constant changed so the constant cannot be represented
+ // in the shorter type. Consequently, we cannot emit a simple comparison.
+ // All the cases that fold to true or false will have already been handled
+ // by SimplifyICmpInst, so only deal with the tricky case.
+
+ if (isSignedCmp || !isSignedExt)
+ return 0;
+
+ // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+ // should have been folded away previously and not enter in here.
+
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+
+ // Finally, return the value computed.
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
+ return ReplaceInstUsesWith(ICI, Result);
+
+ assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+ return BinaryOperator::CreateNot(Result);
+}
+
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+/// I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+/// sum = a + b
+/// if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+ ConstantInt *CI2, ConstantInt *CI1,
+ InstCombiner &IC) {
+ // The transformation we're trying to do here is to transform this into an
+ // llvm.sadd.with.overflow. To do this, we have to replace the original add
+ // with a narrower add, and discard the add-with-constant that is part of the
+ // range check (if we can't eliminate it, this isn't profitable).
+
+ // In order to eliminate the add-with-constant, the compare can be its only
+ // use.
+ Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+ if (!AddWithCst->hasOneUse()) return 0;
+
+ // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+ if (!CI2->getValue().isPowerOf2()) return 0;
+ unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+
+ // The width of the new add formed is 1 more than the bias.
+ ++NewWidth;
+
+ // Check to see that CI1 is an all-ones value with NewWidth bits.
+ if (CI1->getBitWidth() == NewWidth ||
+ CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+ return 0;
+
+ // This is only really a signed overflow check if the inputs have been
+ // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+ // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+ unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+ if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+ IC.ComputeNumSignBits(B) < NeededSignBits)
+ return 0;
+
+ // In order to replace the original add with a narrower
+ // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+ // and truncates that discard the high bits of the add. Verify that this is
+ // the case.
+ Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+ for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+ UI != E; ++UI) {
+ if (*UI == AddWithCst) continue;
+
+ // Only accept truncates for now. We would really like a nice recursive
+ // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+ // chain to see which bits of a value are actually demanded. If the
+ // original add had another add which was then immediately truncated, we
+ // could still do the transformation.
+ TruncInst *TI = dyn_cast<TruncInst>(*UI);
+ if (TI == 0 ||
+ TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ }
+
+ // If the pattern matches, truncate the inputs to the narrower type and
+ // use the sadd_with_overflow intrinsic to efficiently compute both the
+ // result and the overflow bit.
+ Module *M = I.getParent()->getParent()->getParent();
+
+ Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+ NewType);
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ Builder->SetInsertPoint(OrigAdd);
+
+ Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+ Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+ CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+ // The inner add was the result of the narrow add, zero extended to the
+ // wider type. Replace it with the result computed by the intrinsic.
+ IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+ InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+
+ // If the add is a constant expr, then we don't bother transforming it.
+ Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+ if (OrigAdd == 0) return 0;
+
+ Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(OrigAdd);
+
+ Module *M = I.getParent()->getParent()->getParent();
+ Type *Ty = LHS->getType();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+ CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0);
+
+ IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded. This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+ unsigned BitWidth, bool isSignCheck) {
+ if (isSignCheck)
+ return APInt::getSignBit(BitWidth);
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!CI) return APInt::getAllOnesValue(BitWidth);
+ const APInt &RHS = CI->getValue();
+
+ switch (I.getPredicate()) {
+ // For a UGT comparison, we don't care about any bits that
+ // correspond to the trailing ones of the comparand. The value of these
+ // bits doesn't impact the outcome of the comparison, because any value
+ // greater than the RHS must differ in a bit higher than these due to carry.
+ case ICmpInst::ICMP_UGT: {
+ unsigned trailingOnes = RHS.countTrailingOnes();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+ return ~lowBitsSet;
+ }
+
+ // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+ // Any value less than the RHS must differ in a higher bit because of carries.
+ case ICmpInst::ICMP_ULT: {
+ unsigned trailingZeros = RHS.countTrailingZeros();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+ return ~lowBitsSet;
+ }
+
+ default:
+ return APInt::getAllOnesValue(BitWidth);
+ }
+
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+ bool Changed = false;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(Op0) < getComplexity(Op1)) {
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ Changed = true;
+ }
+
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // comparing -val or val with non-zero is the same as just comparing val
+ // ie, abs(val) != 0 -> val != 0
+ if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+ {
+ Value *Cond, *SelectTrue, *SelectFalse;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+ m_Value(SelectFalse)))) {
+ if (Value *V = dyn_castNegVal(SelectTrue)) {
+ if (V == SelectFalse)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ else if (Value *V = dyn_castNegVal(SelectFalse)) {
+ if (V == SelectTrue)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ }
+ }
+
+ Type *Ty = Op0->getType();
+
+ // icmp's with boolean values can always be turned into bitwise operations
+ if (Ty->isIntegerTy(1)) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
+ Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateNot(Xor);
+ }
+ case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op0);
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+
+ unsigned BitWidth = 0;
+ if (Ty->isIntOrIntVectorTy())
+ BitWidth = Ty->getScalarSizeInBits();
+ else if (TD) // Pointers require TD info to get their size.
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+
+ bool isSignBit = false;
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = 0, *B = 0;
+
+ // Match the following pattern, which is a common idiom when writing
+ // overflow-safe integer arithmetic function. The source performs an
+ // addition in wider type, and explicitly checks for overflow using
+ // comparisons against INT_MIN and INT_MAX. Simplify this by using the
+ // sadd_with_overflow intrinsic.
+ //
+ // TODO: This could probably be generalized to handle other overflow-safe
+ // operations if we worked out the formulas to compute the appropriate
+ // magic constants.
+ //
+ // sum = a + b
+ // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
+ {
+ ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+ return Res;
+ }
+
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isZero() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+ // (icmp cond A B) if cond is equality
+ return new ICmpInst(I.getPredicate(), A, B);
+ }
+
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
+ switch (I.getPredicate()) {
+ default: break;
+ case ICmpInst::ICMP_ULE:
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_SLE:
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_UGE:
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ case ICmpInst::ICMP_SGE:
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
+ bool UnusedBit;
+ isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+ }
+
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ if (BitWidth != 0) {
+ APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+ if (SimplifyDemandedBits(I.getOperandUse(0),
+ DemandedBitsLHSMask(I, BitWidth, isSignBit),
+ Op0KnownZero, Op0KnownOne, 0))
+ return &I;
+ if (SimplifyDemandedBits(I.getOperandUse(1),
+ APInt::getAllOnesValue(BitWidth),
+ Op1KnownZero, Op1KnownOne, 0))
+ return &I;
+
+ // Given the known and unknown bits, compute a range that the LHS could be
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+ APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+ if (I.isSigned()) {
+ ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ } else {
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ }
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+ return new ICmpInst(I.getPredicate(),
+ ConstantInt::get(Op0->getType(), Op0Min), Op1);
+ if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+ return new ICmpInst(I.getPredicate(), Op0,
+ ConstantInt::get(Op1->getType(), Op1Min));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) == 0" into "x != 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) == 0" into "x != 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) != 0" into "x == 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) != 0" into "x == 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
+ break;
+ }
+ case ICmpInst::ICMP_ULT:
+ if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ Constant::getAllOnesValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ Constant::getNullValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+ if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+ if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_UGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+ if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_ULE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+ if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ }
+
+ // Turn a signed comparison into an unsigned one if both operands
+ // are known to have the same sign.
+ if (I.isSigned() &&
+ ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+ (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return 0;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+ return Res;
+ }
+
+ // Handle icmp with constant (but not simple integer constant) RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+ if (RHSC->isNullValue() &&
+ cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+ case Instruction::PHI:
+ // Only fold icmp into the PHI if the phi and icmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp).
+ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+ if (!Op1)
+ Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ if (!Op2)
+ Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+ RHSC, I.getName());
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ }
+ break;
+ }
+ case Instruction::IntToPtr:
+ // icmp pred inttoptr(X), null -> icmp pred X, 0
+ if (RHSC->isNullValue() && TD &&
+ TD->getIntPtrType(RHSC->getContext()) ==
+ LHSI->getOperand(0)->getType())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+
+ case Instruction::Load:
+ // Try to optimize things like "A[i] > 4" to index computations.
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+ return NI;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+ ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ return NI;
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, we do so
+ // now.
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // We keep moving the cast from the left operand over to the right
+ // operand, where it can often be eliminated completely.
+ Op0 = CI->getOperand(0);
+
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = CI2->getOperand(0);
+
+ // If Op1 is a constant, we can fold the cast into the constant.
+ if (Op0->getType() != Op1->getType()) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+ } else {
+ // Otherwise, cast the RHS right before the icmp
+ Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+ }
+ }
+ return new ICmpInst(I.getPredicate(), Op0, Op1);
+ }
+ }
+
+ if (isa<CastInst>(Op0)) {
+ // Handle the special case of: icmp (cast bool to X), <cst>
+ // This comes up when you have code like
+ // int X = A < B;
+ // if (X) ...
+ // For generality, we handle any zero-extension of any operand comparison
+ // with a constant or another cast from the same type.
+ if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+ if (Instruction *R = visitICmpInstWithCastAndCast(I))
+ return R;
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+ BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+ if (BO0 || BO1) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+ if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+ NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+ if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+ NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+ // Analyze the case when either Op0 or Op1 is an add instruction.
+ // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Add)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Add)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+ return new ICmpInst(Pred, A == Op1 ? B : A,
+ Constant::getNullValue(Op1->getType()));
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+ C == Op0 ? D : C);
+
+ // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse()) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
+ return new ICmpInst(Pred, Y, Z);
+ }
+
+ // Analyze the case when either Op0 or Op1 is a sub instruction.
+ // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+ A = 0; B = 0; C = 0; D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Sub)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Sub)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ if (A == Op1 && NoOp0WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+ // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ if (C == Op0 && NoOp1WrapProblem)
+ return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+ // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, A, C);
+
+ // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, D, B);
+
+ BinaryOperator *SRem = NULL;
+ // icmp (srem X, Y), Y
+ if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+ Op1 == BO0->getOperand(1))
+ SRem = BO0;
+ // icmp Y, (srem X, Y)
+ else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+ Op0 == BO1->getOperand(1))
+ SRem = BO1;
+ if (SRem) {
+ // We don't check hasOneUse to avoid increasing register pressure because
+ // the value we use is the same value this instruction was already using.
+ switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ case ICmpInst::ICMP_NE:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+ Constant::getAllOnesValue(SRem->getType()));
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+ Constant::getNullValue(SRem->getType()));
+ }
+ }
+
+ if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+ BO0->hasOneUse() && BO1->hasOneUse() &&
+ BO0->getOperand(1) == BO1->getOperand(1)) {
+ switch (BO0->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+
+ if (CI->isMaxValue(true)) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (I.isSigned())
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!BO0->isExact() || !BO1->isExact())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ case Instruction::Shl: {
+ bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+ bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && I.isSigned())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ }
+ }
+
+ { Value *A, *B;
+ // ~x < ~y --> y < x
+ // ~x < cst --> ~cst < x
+ if (match(Op0, m_Not(m_Value(A)))) {
+ if (match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+ }
+
+ // (a+b) <u a --> llvm.uadd.with.overflow.
+ // (a+b) <u b --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ (Op1 == A || Op1 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+ return R;
+
+ // a >u (a+b) --> llvm.uadd.with.overflow.
+ // b >u (a+b) --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+ (Op0 == A || Op0 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+ return R;
+ }
+
+ if (I.isEquality()) {
+ Value *A, *B, *C, *D;
+
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+ // A^c1 == C^c2 --> A == C^(c1^c2)
+ ConstantInt *C1, *C2;
+ if (match(B, m_ConstantInt(C1)) &&
+ match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+ Constant *NC = ConstantInt::get(I.getContext(),
+ C1->getValue() ^ C2->getValue());
+ Value *Xor = Builder->CreateXor(C, NC);
+ return new ICmpInst(I.getPredicate(), A, Xor);
+ }
+
+ // A^B == A^D -> B == D
+ if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+ if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+ if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+ if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+ }
+ }
+
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+ Value *X = 0, *Y = 0, *Z = 0;
+
+ if (A == C) {
+ X = B; Y = D; Z = A;
+ } else if (A == D) {
+ X = B; Y = C; Z = A;
+ } else if (B == C) {
+ X = A; Y = D; Z = B;
+ } else if (B == D) {
+ X = A; Y = C; Z = B;
+ }
+
+ if (X) { // Build (X^Y) & Z
+ Op1 = Builder->CreateXor(X, Y);
+ Op1 = Builder->CreateAnd(Op1, Z);
+ I.setOperand(0, Op1);
+ I.setOperand(1, Constant::getNullValue(Op1->getType()));
+ return &I;
+ }
+ }
+
+ // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
+ // and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
+ ConstantInt *Cst1;
+ if ((Op0->hasOneUse() &&
+ match(Op0, m_ZExt(m_Value(A))) &&
+ match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
+ (Op1->hasOneUse() &&
+ match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
+ match(Op1, m_ZExt(m_Value(A))))) {
+ APInt Pow2 = Cst1->getValue() + 1;
+ if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
+ Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
+ return new ICmpInst(I.getPredicate(), A,
+ Builder->CreateTrunc(B, A->getType()));
+ }
+
+ // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+ // "icmp (and X, mask), cst"
+ uint64_t ShAmt = 0;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+ m_ConstantInt(ShAmt))))) &&
+ match(Op1, m_ConstantInt(Cst1)) &&
+ // Only do this when A has multiple uses. This is most important to do
+ // when it exposes other optimizations.
+ !A->hasOneUse()) {
+ unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+
+ if (ShAmt < ASize) {
+ APInt MaskV =
+ APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+ MaskV <<= ShAmt;
+
+ APInt CmpV = Cst1->getValue().zext(ASize);
+ CmpV <<= ShAmt;
+
+ Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+ return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ }
+ }
+ }
+
+ {
+ Value *X; ConstantInt *Cst;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+ // icmp X, X+Cst
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ }
+ return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return 0;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return 0; // Unknown.
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+
+ // If this is a uitofp instruction, we need an extra bit to hold the sign.
+ bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+ if (LHSUnsigned)
+ ++InputSize;
+
+ // If the conversion would lose info, don't hack on this.
+ if ((int)InputSize > MantissaWidth)
+ return 0;
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+ break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+ break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+ break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case FCmpInst::FCMP_ORD:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case FCmpInst::FCMP_UNO:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+
+ IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
+
+ if (!LHSUnsigned) {
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ } else {
+ // If the RHS value is > UnsignedMax, fold the comparison. This handles
+ // +INF and large values.
+ APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+ UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+ APFloat::rmNearestTiesToEven);
+ if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
+ Pred == ICmpInst::ICMP_ULE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ if (!LHSUnsigned) {
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ } else {
+ // See if the RHS value is < UnsignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
+ Pred == ICmpInst::ICMP_UGE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+ // [0, UMAX], but it may still be fractional. See if it is fractional by
+ // casting the FP value to the integer value and back, checking for equality.
+ // Don't do this for zero, because -0.0 is not fractional.
+ Constant *RHSInt = LHSUnsigned
+ ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero()) {
+ bool Equal = LHSUnsigned
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ if (!Equal) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ case ICmpInst::ICMP_ULE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> false
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // (float)int < -4.4 --> false
+ // (float)int < 4.4 --> int <= 4
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> true
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // (float)int >= -4.4 --> true
+ // (float)int >= 4.4 --> int > 4
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Simplify 'fcmp pred X, X'
+ if (Op0 == Op1) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown predicate!");
+ case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
+ case FCmpInst::FCMP_ULT: // True if unordered or less than
+ case FCmpInst::FCMP_UGT: // True if unordered or greater than
+ case FCmpInst::FCMP_UNE: // True if unordered or not equal
+ // Canonicalize these to be 'fcmp uno %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_UNO);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+
+ case FCmpInst::FCMP_ORD: // True if ordered (no nans)
+ case FCmpInst::FCMP_OEQ: // True if ordered and equal
+ case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
+ case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
+ // Canonicalize these to be 'fcmp ord %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_ORD);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+ }
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::FPExt: {
+ // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+ FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+ ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+ if (!RHSF)
+ break;
+
+ const fltSemantics *Sem;
+ // FIXME: This shouldn't be here.
+ if (LHSExt->getSrcTy()->isHalfTy())
+ Sem = &APFloat::IEEEhalf;
+ else if (LHSExt->getSrcTy()->isFloatTy())
+ Sem = &APFloat::IEEEsingle;
+ else if (LHSExt->getSrcTy()->isDoubleTy())
+ Sem = &APFloat::IEEEdouble;
+ else if (LHSExt->getSrcTy()->isFP128Ty())
+ Sem = &APFloat::IEEEquad;
+ else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+ Sem = &APFloat::x87DoubleExtended;
+ else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
+ Sem = &APFloat::PPCDoubleDouble;
+ else
+ break;
+
+ bool Lossy;
+ APFloat F = RHSF->getValueAPF();
+ F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals. Zero is a special case
+ // that's OK to convert.
+ APFloat Fabs = F;
+ Fabs.clearSign();
+ if (!Lossy &&
+ ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero()))
+
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ ConstantFP::get(RHSC->getContext(), F));
+ break;
+ }
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (LHSI->hasOneUse()) {
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ // Fold the known value into the constant operand.
+ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op2 = Builder->CreateFCmp(I.getPredicate(),
+ LHSI->getOperand(2), RHSC, I.getName());
+ } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ // Fold the known value into the constant operand.
+ Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ }
+ }
+
+ if (Op1)
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ break;
+ }
+ case Instruction::FSub: {
+ // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+ Value *Op;
+ if (match(LHSI, m_FNeg(m_Value(Op))))
+ return new FCmpInst(I.getSwappedPredicate(), Op,
+ ConstantExpr::getFNeg(RHSC));
+ break;
+ }
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(LHSI);
+ LibFunc::Func Func;
+ // Various optimization for fabs compared with zero.
+ if (RHSC->isNullValue() && CI->getCalledFunction() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->has(Func)) {
+ if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+ Func == LibFunc::fabsl) {
+ switch (I.getPredicate()) {
+ default: break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ return ReplaceInstUsesWith(I, Builder->getFalse());
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+ RHSC);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+ // fcmp (fpext x), (fpext y) -> fcmp x, y
+ if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+ if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+ if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ RHSExt->getOperand(0));
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 000000000000..337cfe32a869
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,816 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete,
+ bool IsOffset = false) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+ IsOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return 0;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ if (TD) {
+ Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = Builder->CreateIntCast(AI.getArraySize(),
+ IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+ }
+
+ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+ if (AI.isArrayAllocation()) { // Check C != 1
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy =
+ ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+ Value *Idx[2];
+ Idx[0] = NullIdx;
+ Idx[1] = NullIdx;
+ Instruction *GEP =
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+ InsertNewInstBefore(GEP, *It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return ReplaceInstUsesWith(AI, GEP);
+ } else if (isa<UndefValue>(AI.getArraySize())) {
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+ }
+ }
+
+ if (TD && AI.getAllocatedType()->isSized()) {
+ // If the alignment is 0 (unspecified), assign it the preferred alignment.
+ if (AI.getAlignment() == 0)
+ AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+
+ // Move all alloca's of zero byte objects to the entry block and merge them
+ // together. Note that we only do this for alloca's, because malloc should
+ // allocate and return a unique pointer, even for a zero byte allocation.
+ if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ // For a zero sized alloca there is no point in doing an array allocation.
+ // This is helpful if the array size is a complicated expression not used
+ // elsewhere.
+ if (AI.isArrayAllocation()) {
+ AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
+ return &AI;
+ }
+
+ // Get the first instruction in the entry block.
+ BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
+ Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
+ if (FirstInst != &AI) {
+ // If the entry block doesn't start with a zero-size alloca then move
+ // this one to the start of the entry block. There is no problem with
+ // dominance as the array size was forced to a constant earlier already.
+ AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
+ if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
+ TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ AI.moveBefore(FirstInst);
+ return &AI;
+ }
+
+ // If the alignment of the entry block alloca is 0 (unspecified),
+ // assign it the preferred alignment.
+ if (EntryAI->getAlignment() == 0)
+ EntryAI->setAlignment(
+ TD->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ // Replace this zero-sized alloca with the one at the start of the entry
+ // block after ensuring that the address will be aligned enough for both
+ // types.
+ unsigned MaxAlign = std::max(EntryAI->getAlignment(),
+ AI.getAlignment());
+ EntryAI->setAlignment(MaxAlign);
+ if (AI.getType() != EntryAI->getType())
+ return new BitCastInst(EntryAI, AI.getType());
+ return ReplaceInstUsesWith(AI, EntryAI);
+ }
+ }
+ }
+
+ if (AI.getAlignment()) {
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
+ AI.getAlignment(), TD);
+ if (AI.getAlignment() <= SourceAlign) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+ }
+
+ // At last, use the generic allocation site handler to aggressively remove
+ // unused allocas.
+ return visitAllocSite(AI);
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+ const DataLayout *TD) {
+ User *CI = cast<User>(LI.getOperand(0));
+ Value *CastOp = CI->getOperand(0);
+
+ PointerType *DestTy = cast<PointerType>(CI->getType());
+ Type *DestPTy = DestTy->getElementType();
+ if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
+ Type *SrcPTy = SrcTy->getElementType();
+
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ DestPTy->isVectorTy()) {
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+ if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+ if (ASrcTy->getNumElements() != 0) {
+ Value *Idxs[2];
+ Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+ Idxs[1] = Idxs[0];
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
+ SrcTy = cast<PointerType>(CastOp->getType());
+ SrcPTy = SrcTy->getElementType();
+ }
+
+ if (IC.getDataLayout() &&
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ SrcPTy->isVectorTy()) &&
+ // Do not allow turning this into a load of an integer, which is then
+ // casted to a pointer, this pessimizes pointer analysis a lot.
+ (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
+ IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before the load, cast
+ // the result of the loaded value.
+ LoadInst *NewLoad =
+ IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+ NewLoad->setAlignment(LI.getAlignment());
+ NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ // Now cast the result of the load.
+ return new BitCastInst(NewLoad, LI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+ Value *Op = LI.getOperand(0);
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+ }
+
+ // load (cast X) --> cast (load X) iff safe.
+ if (isa<CastInst>(Op))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ // None of the following transforms are legal for volatile/atomic loads.
+ // FIXME: Some of it is okay for atomic loads; needs refactoring.
+ if (!LI.isSimple()) return 0;
+
+ // Do really simple store-to-load forwarding and load CSE, to catch cases
+ // where there are several consecutive memory accesses to the same location,
+ // separated by a few arithmetic operations.
+ BasicBlock::iterator BBI = &LI;
+ if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+ return ReplaceInstUsesWith(LI, AvailableVal);
+
+ // load(gep null, ...) -> unreachable
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ // load null/undef -> unreachable
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ // Instcombine load (constantexpr_cast global) -> cast (load global)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ if (Op->hasOneUse()) {
+ // Change select and PHI nodes to select values instead of addresses: this
+ // helps alias analysis out a lot, allows many others simplifications, and
+ // exposes redundancy in the code.
+ //
+ // Note that we cannot do the transformation unless we know that the
+ // introduced loads cannot trap! Something like this is valid as long as
+ // the condition is always false: load (select bool %C, int* null, int* %G),
+ // but it would not be valid if we transformed it to load from null
+ // unconditionally.
+ //
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+ // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
+ unsigned Align = LI.getAlignment();
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+ LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val");
+ LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val");
+ V1->setAlignment(Align);
+ V2->setAlignment(Align);
+ return SelectInst::Create(SI->getCondition(), V1, V2);
+ }
+
+ // load (select (cond, null, P)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
+
+ // load (select (cond, P, null)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
+ }
+ }
+ return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible. This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+ User *CI = cast<User>(SI.getOperand(1));
+ Value *CastOp = CI->getOperand(0);
+
+ Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+ if (SrcTy == 0) return 0;
+
+ Type *SrcPTy = SrcTy->getElementType();
+
+ if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
+ return 0;
+
+ /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+ /// to its first element. This allows us to handle things like:
+ /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+ /// on 32-bit hosts.
+ SmallVector<Value*, 4> NewGEPIndices;
+
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
+ // Index through pointer.
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+ NewGEPIndices.push_back(Zero);
+
+ while (1) {
+ if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+ if (!STy->getNumElements()) /* Struct can be empty {} */
+ break;
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = STy->getElementType(0);
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = ATy->getElementType();
+ } else {
+ break;
+ }
+ }
+
+ SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+ }
+
+ if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
+ return 0;
+
+ // If the pointers point into different address spaces or if they point to
+ // values with different sizes, we can't do the transformation.
+ if (!IC.getDataLayout() ||
+ SrcTy->getAddressSpace() !=
+ cast<PointerType>(CI->getType())->getAddressSpace() ||
+ IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
+ IC.getDataLayout()->getTypeSizeInBits(DestPTy))
+ return 0;
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before
+ // the store, cast the value to be stored.
+ Value *NewCast;
+ Value *SIOp0 = SI.getOperand(0);
+ Instruction::CastOps opcode = Instruction::BitCast;
+ Type* CastSrcTy = SIOp0->getType();
+ Type* CastDstTy = SrcPTy;
+ if (CastDstTy->isPointerTy()) {
+ if (CastSrcTy->isIntegerTy())
+ opcode = Instruction::IntToPtr;
+ } else if (CastDstTy->isIntegerTy()) {
+ if (SIOp0->getType()->isPointerTy())
+ opcode = Instruction::PtrToInt;
+ }
+
+ // SIOp0 is a pointer to aggregate and this is a store to the first field,
+ // emit a GEP to index into its first field.
+ if (!NewGEPIndices.empty())
+ CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
+
+ NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+ SIOp0->getName()+".c");
+ SI.setOperand(0, NewCast);
+ SI.setOperand(1, CastOp);
+ return &SI;
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+ // its only used to compare two uses within the same basic block, which
+ // means that they'll always either have the same value or one of them
+ // will have an undefined value.
+ if (isa<BinaryOperator>(A) ||
+ isa<CastInst>(A) ||
+ isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
+ if (Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+ Value *Val = SI.getOperand(0);
+ Value *Ptr = SI.getOperand(1);
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+ TD);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+ }
+
+ // Don't hack volatile/atomic stores.
+ // FIXME: Some bits are legal for atomic stores; needs refactoring.
+ if (!SI.isSimple()) return 0;
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return EraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return EraseInstFromFunction(SI);
+ }
+ }
+ }
+
+ // Do really simple DSE, to catch cases where there are several consecutive
+ // stores to the same location, separated by a few arithmetic operations. This
+ // situation often occurs with bitfield accesses.
+ BasicBlock::iterator BBI = &SI;
+ for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+ --ScanInsts) {
+ --BBI;
+ // Don't count debug info directives, lest they affect codegen,
+ // and we skip pointer-to-pointer bitcasts, which are NOPs.
+ if (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ ScanInsts++;
+ continue;
+ }
+
+ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+ // Prev store isn't volatile, and stores to the same location?
+ if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
+ ++NumDeadStore;
+ ++BBI;
+ EraseInstFromFunction(*PrevSI);
+ continue;
+ }
+ break;
+ }
+
+ // If this is a load, we have to stop. However, if the loaded value is from
+ // the pointer we're loading and is producing the pointer we're storing,
+ // then *this* store is dead (X = load P; store X -> P).
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+ LI->isSimple())
+ return EraseInstFromFunction(SI);
+
+ // Otherwise, this is a load from some other location. Stores before it
+ // may not be dead.
+ break;
+ }
+
+ // Don't skip over loads or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ break;
+ }
+
+ // store X, null -> turns into 'unreachable' in SimplifyCFG
+ if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+ if (!isa<UndefValue>(Val)) {
+ SI.setOperand(0, UndefValue::get(Val->getType()));
+ if (Instruction *U = dyn_cast<Instruction>(Val))
+ Worklist.Add(U); // Dropped a use.
+ }
+ return 0; // Do not modify these!
+ }
+
+ // store undef, Ptr -> noop
+ if (isa<UndefValue>(Val))
+ return EraseInstFromFunction(SI);
+
+ // If the pointer destination is a cast, see if we can fold the cast into the
+ // source instead.
+ if (isa<CastInst>(Ptr))
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+
+
+ // If this store is the last instruction in the basic block (possibly
+ // excepting debug info instructions), and if the block ends with an
+ // unconditional branch, try to move it to the successor block.
+ BBI = &SI;
+ do {
+ ++BBI;
+ } while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+ if (BI->isUnconditional())
+ if (SimplifyStoreAtEndOfBlock(SI))
+ return 0; // xform done!
+
+ return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+/// *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+ BasicBlock *StoreBB = SI.getParent();
+
+ // Check to see if the successor block has exactly two incoming edges. If
+ // so, see if the other predecessor contains a store to the same location.
+ // if so, insert a PHI node (if needed) and move the stores down.
+ BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+
+ // Determine whether Dest has exactly two predecessors and, if so, compute
+ // the other predecessor.
+ pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *P = *PI;
+ BasicBlock *OtherBB = 0;
+
+ if (P != StoreBB)
+ OtherBB = P;
+
+ if (++PI == pred_end(DestBB))
+ return false;
+
+ P = *PI;
+ if (P != StoreBB) {
+ if (OtherBB)
+ return false;
+ OtherBB = P;
+ }
+ if (++PI != pred_end(DestBB))
+ return false;
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
+ // Verify that the other block ends in a branch and is not otherwise empty.
+ BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+ if (!OtherBr || BBI == OtherBB->begin())
+ return false;
+
+ // If the other block ends in an unconditional branch, check for the 'if then
+ // else' case. there is an instruction before the branch.
+ StoreInst *OtherStore = 0;
+ if (OtherBr->isUnconditional()) {
+ --BBI;
+ // Skip over debugging info.
+ while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ if (BBI==OtherBB->begin())
+ return false;
+ --BBI;
+ }
+ // If this isn't a store, isn't a store to the same location, or is not the
+ // right kind of store, bail out.
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+ !SI.isSameOperationAs(OtherStore))
+ return false;
+ } else {
+ // Otherwise, the other block ended with a conditional branch. If one of the
+ // destinations is StoreBB, then we have the if/then case.
+ if (OtherBr->getSuccessor(0) != StoreBB &&
+ OtherBr->getSuccessor(1) != StoreBB)
+ return false;
+
+ // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+ // if/then triangle. See if there is a store to the same ptr as SI that
+ // lives in OtherBB.
+ for (;; --BBI) {
+ // Check to see if we find the matching store.
+ if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+ if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+ !SI.isSameOperationAs(OtherStore))
+ return false;
+ break;
+ }
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+ BBI == OtherBB->begin())
+ return false;
+ }
+
+ // In order to eliminate the store in OtherBr, we have to
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
+ for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+ // FIXME: This should really be AA driven.
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ // Insert a PHI node now if we need it.
+ Value *MergedVal = OtherStore->getOperand(0);
+ if (MergedVal != SI.getOperand(0)) {
+ PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
+ PN->addIncoming(SI.getOperand(0), SI.getParent());
+ PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ }
+
+ // Advance to a place where it is safe to insert the new store and
+ // insert it.
+ BBI = DestBB->getFirstInsertionPt();
+ StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
+ SI.isVolatile(),
+ SI.getAlignment(),
+ SI.getOrdering(),
+ SI.getSynchScope());
+ InsertNewInstBefore(NewSI, *BBI);
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
+
+ // If the two stores had the same TBAA tag, preserve it.
+ if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
+ if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+ OtherStore->getMetadata(LLVMContext::MD_tbaa))))
+ NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+
+ // Nuke the old stores.
+ EraseInstFromFunction(SI);
+ EraseInstFromFunction(*OtherStore);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 000000000000..173f2bf63304
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,1117 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// simplifyValueKnownNonZero - The specific integer value is used in a context
+/// where it is known to be non-zero. If this allows us to simplify the
+/// computation, do so and return the new operand, otherwise return null.
+static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
+ // If V has multiple uses, then we would have to do more analysis to determine
+ // if this is safe. For example, the use could be in dynamically unreached
+ // code.
+ if (!V->hasOneUse()) return 0;
+
+ bool MadeChange = false;
+
+ // ((1 << A) >>u B) --> (1 << (A-B))
+ // Because V cannot be zero, we know that B is less than A.
+ Value *A = 0, *B = 0, *PowerOf2 = 0;
+ if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
+ m_Value(B))) &&
+ // The "1" can be any value known to be a power of 2.
+ isKnownToBeAPowerOfTwo(PowerOf2)) {
+ A = IC.Builder->CreateSub(A, B);
+ return IC.Builder->CreateShl(PowerOf2, A);
+ }
+
+ // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
+ // inexact. Similarly for <<.
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
+ if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) {
+ // We know that this is an exact/nuw shift and that the input is a
+ // non-zero context as well.
+ if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
+ I->setOperand(0, V2);
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
+ I->setIsExact();
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
+ I->setHasNoUnsignedWrap();
+ MadeChange = true;
+ }
+ }
+
+ // TODO: Lots more we could do here:
+ // If V is a phi node, we can call this on each of its operands.
+ // "select cond, X, 0" can simplify to "X".
+
+ return MadeChange ? V : 0;
+}
+
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+ uint32_t W = C1->getBitWidth();
+ APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+ if (sign) {
+ LHSExt = LHSExt.sext(W * 2);
+ RHSExt = RHSExt.sext(W * 2);
+ } else {
+ LHSExt = LHSExt.zext(W * 2);
+ RHSExt = RHSExt.zext(W * 2);
+ }
+
+ APInt MulExt = LHSExt * RHSExt;
+
+ if (!sign)
+ return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+
+ APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+ APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+ return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ const APInt &Val = CI->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
+ }
+
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ { Value *X; ConstantInt *C1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+ Value *Add = Builder->CreateMul(X, CI);
+ return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
+ }
+ }
+
+ // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
+ // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
+ // The "* (2**n)" thus becomes a potential shifting opportunity.
+ {
+ const APInt & Val = CI->getValue();
+ const APInt &PosVal = Val.abs();
+ if (Val.isNegative() && PosVal.isPowerOf2()) {
+ Value *X = 0, *Y = 0;
+ if (Op0->hasOneUse()) {
+ ConstantInt *C1;
+ Value *Sub = 0;
+ if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
+ Sub = Builder->CreateSub(X, Y, "suba");
+ else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
+ Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+ if (Sub)
+ return
+ BinaryOperator::CreateMul(Sub,
+ ConstantInt::get(Y->getType(), PosVal));
+ }
+ }
+ }
+ }
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castNegVal(Op1))
+ return BinaryOperator::CreateMul(Op0v, Op1v);
+
+ // (X / Y) * Y = X - (X % Y)
+ // (X / Y) * -Y = (X % Y) - X
+ {
+ Value *Op1C = Op1;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+ if (!BO ||
+ (BO->getOpcode() != Instruction::UDiv &&
+ BO->getOpcode() != Instruction::SDiv)) {
+ Op1C = Op0;
+ BO = dyn_cast<BinaryOperator>(Op1);
+ }
+ Value *Neg = dyn_castNegVal(Op1C);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+ (BO->getOpcode() == Instruction::UDiv ||
+ BO->getOpcode() == Instruction::SDiv)) {
+ Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+ // If the division is exact, X % Y is zero, so we end up with X or -X.
+ if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
+ if (SDiv->isExact()) {
+ if (Op1BO == Op1C)
+ return ReplaceInstUsesWith(I, Op0BO);
+ return BinaryOperator::CreateNeg(Op0BO);
+ }
+
+ Value *Rem;
+ if (BO->getOpcode() == Instruction::UDiv)
+ Rem = Builder->CreateURem(Op0BO, Op1BO);
+ else
+ Rem = Builder->CreateSRem(Op0BO, Op1BO);
+ Rem->takeName(BO);
+
+ if (Op1BO == Op1C)
+ return BinaryOperator::CreateSub(Op0BO, Rem);
+ return BinaryOperator::CreateSub(Rem, Op0BO);
+ }
+ }
+
+ /// i1 mul -> i1 and.
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateAnd(Op0, Op1);
+
+ // X*(1 << Y) --> X << Y
+ // (1 << Y)*X --> X << Y
+ {
+ Value *Y;
+ if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op1, Y);
+ if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op0, Y);
+ }
+
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // X * Y (where Y is 0 or 1) -> X & (0-Y)
+ if (!I.getType()->isVectorTy()) {
+ // -2 is "-1 << 1" so it is all bits set except the low one.
+ APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+
+ Value *BoolCast = 0, *OtherOp = 0;
+ if (MaskedValueIsZero(Op0, Negative2))
+ BoolCast = Op0, OtherOp = Op1;
+ else if (MaskedValueIsZero(Op1, Negative2))
+ BoolCast = Op1, OtherOp = Op0;
+
+ if (BoolCast) {
+ Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+ BoolCast);
+ return BinaryOperator::CreateAnd(V, OtherOp);
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+//
+// Detect pattern:
+//
+// log2(Y*0.5)
+//
+// And check for corresponding fast math flags
+//
+
+static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
+
+ if (!Op->hasOneUse())
+ return;
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+ if (!II)
+ return;
+ if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+ return;
+ Log2 = II;
+
+ Value *OpLog2Of = II->getArgOperand(0);
+ if (!OpLog2Of->hasOneUse())
+ return;
+
+ Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+ if (!I)
+ return;
+ if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return;
+
+ ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
+ if (CFP && CFP->isExactlyValue(0.5)) {
+ Y = I->getOperand(1);
+ return;
+ }
+ CFP = dyn_cast<ConstantFP>(I->getOperand(1));
+ if (CFP && CFP->isExactlyValue(0.5))
+ Y = I->getOperand(0);
+}
+
+/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
+/// true iff the given value is FMul or FDiv with one and only one operand
+/// being a normal constant (i.e. not Zero/NaN/Infinity).
+static bool isFMulOrFDivWithConstant(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || (I->getOpcode() != Instruction::FMul &&
+ I->getOpcode() != Instruction::FDiv))
+ return false;
+
+ ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
+ ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
+
+ if (C0 && C1)
+ return false;
+
+ return (C0 && C0->getValueAPF().isNormal()) ||
+ (C1 && C1->getValueAPF().isNormal());
+}
+
+static bool isNormalFp(const ConstantFP *C) {
+ const APFloat &Flt = C->getValueAPF();
+ return Flt.isNormal() && !Flt.isDenormal();
+}
+
+/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
+/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
+/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
+/// This function is to simplify "FMulOrDiv * C" and returns the
+/// resulting expression. Note that this function could return NULL in
+/// case the constants cannot be folded into a normal floating-point.
+///
+Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+ Instruction *InsertBefore) {
+ assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
+
+ Value *Opnd0 = FMulOrDiv->getOperand(0);
+ Value *Opnd1 = FMulOrDiv->getOperand(1);
+
+ ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+ ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+
+ BinaryOperator *R = 0;
+
+ // (X * C0) * C => X * (C0*C)
+ if (FMulOrDiv->getOpcode() == Instruction::FMul) {
+ Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C);
+ if (isNormalFp(cast<ConstantFP>(F)))
+ R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F);
+ } else {
+ if (C0) {
+ // (C0 / X) * C => (C0 * C) / X
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ } else {
+ // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
+ if (isNormalFp(F)) {
+ R = BinaryOperator::CreateFMul(Opnd0, F);
+ } else {
+ // (X / C1) * C => X / (C1/C)
+ Constant *F = ConstantExpr::getFDiv(C1, C);
+ if (isNormalFp(cast<ConstantFP>(F)))
+ R = BinaryOperator::CreateFDiv(Opnd0, F);
+ }
+ }
+ }
+
+ if (R) {
+ R->setHasUnsafeAlgebra(true);
+ InsertNewInstWith(R, *InsertBefore);
+ }
+
+ return R;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<Constant>(Op0))
+ std::swap(Op0, Op1);
+
+ if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ ConstantFP *C = dyn_cast<ConstantFP>(Op1);
+ if (C && AllowReassociate && C->getValueAPF().isNormal()) {
+ // Let MDC denote an expression in one of these forms:
+ // X * C, C/X, X/C, where C is a constant.
+ //
+ // Try to simplify "MDC * Constant"
+ if (isFMulOrFDivWithConstant(Op0)) {
+ Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I);
+ if (V)
+ return ReplaceInstUsesWith(I, V);
+ }
+
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
+ Instruction *FAddSub = dyn_cast<Instruction>(Op0);
+ if (FAddSub &&
+ (FAddSub->getOpcode() == Instruction::FAdd ||
+ FAddSub->getOpcode() == Instruction::FSub)) {
+ Value *Opnd0 = FAddSub->getOperand(0);
+ Value *Opnd1 = FAddSub->getOperand(1);
+ ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+ ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+ bool Swap = false;
+ if (C0) {
+ std::swap(C0, C1);
+ std::swap(Opnd0, Opnd1);
+ Swap = true;
+ }
+
+ if (C1 && C1->getValueAPF().isNormal() &&
+ isFMulOrFDivWithConstant(Opnd0)) {
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
+ foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
+ 0;
+ if (M0 && M1) {
+ if (Swap && FAddSub->getOpcode() == Instruction::FSub)
+ std::swap(M0, M1);
+
+ Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
+ BinaryOperator::CreateFAdd(M0, M1) :
+ BinaryOperator::CreateFSub(M0, M1);
+ Instruction *RI = cast<Instruction>(R);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+ }
+ }
+ }
+ }
+
+
+ // Under unsafe algebra do:
+ // X * log2(0.5*Y) = X*log2(Y) - X
+ if (I.hasUnsafeAlgebra()) {
+ Value *OpX = NULL;
+ Value *OpY = NULL;
+ IntrinsicInst *Log2;
+ detectLog2OfHalf(Op0, OpY, Log2);
+ if (OpY) {
+ OpX = Op1;
+ } else {
+ detectLog2OfHalf(Op1, OpY, Log2);
+ if (OpY) {
+ OpX = Op0;
+ }
+ }
+ // if pattern detected emit alternate sequence
+ if (OpX && OpY) {
+ Log2->setArgOperand(0, OpY);
+ Value *FMulVal = Builder->CreateFMul(OpX, Log2);
+ Instruction *FMul = cast<Instruction>(FMulVal);
+ FMul->copyFastMathFlags(Log2);
+ Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
+ FSub->copyFastMathFlags(Log2);
+ return FSub;
+ }
+ }
+
+ // Handle symmetric situation in a 2-iteration loop
+ Value *Opnd0 = Op0;
+ Value *Opnd1 = Op1;
+ for (int i = 0; i < 2; i++) {
+ bool IgnoreZeroSign = I.hasNoSignedZeros();
+ if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+ Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+ // -X * -Y => X*Y
+ if (N1)
+ return BinaryOperator::CreateFMul(N0, N1);
+
+ if (Opnd0->hasOneUse()) {
+ // -X * Y => -(X*Y) (Promote negation as high as possible)
+ Value *T = Builder->CreateFMul(N0, Opnd1);
+ cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
+ Instruction *Neg = BinaryOperator::CreateFNeg(T);
+ if (I.getFastMathFlags().any()) {
+ cast<Instruction>(T)->copyFastMathFlags(&I);
+ Neg->copyFastMathFlags(&I);
+ }
+ return Neg;
+ }
+ }
+
+ // (X*Y) * X => (X*X) * Y where Y != X
+ // The purpose is two-fold:
+ // 1) to form a power expression (of X).
+ // 2) potentially shorten the critical path: After transformation, the
+ // latency of the instruction Y is amortized by the expression of X*X,
+ // and therefore Y is in a "less critical" position compared to what it
+ // was before the transformation.
+ //
+ if (AllowReassociate) {
+ Value *Opnd0_0, *Opnd0_1;
+ if (Opnd0->hasOneUse() &&
+ match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+ Value *Y = 0;
+ if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+ Y = Opnd0_1;
+ else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+ Y = Opnd0_0;
+
+ if (Y) {
+ Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
+ T->copyFastMathFlags(&I);
+ T->setDebugLoc(I.getDebugLoc());
+
+ Instruction *R = BinaryOperator::CreateFMul(T, Y);
+ R->copyFastMathFlags(&I);
+ return R;
+ }
+ }
+ }
+
+ if (!isa<Constant>(Op1))
+ std::swap(Opnd0, Opnd1);
+ else
+ break;
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ Worklist.Add(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+ ConstantInt::getFalse(BBI->getContext());
+ Worklist.Add(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // (X / C1) / C2 -> X / (C1*C2)
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+ if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+ if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+ if (MultiplyOverflows(RHS, LHSRHS,
+ I.getOpcode()==Instruction::SDiv))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ ConstantExpr::getMul(RHS, LHSRHS));
+ }
+
+ if (!RHS->isZero()) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+
+ // See if we can fold away this div instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+ Value *X = 0, *Z = 0;
+ if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+ (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+ return BinaryOperator::Create(I.getOpcode(), X, Op1);
+ }
+
+ return 0;
+}
+
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, Type *Ty) {
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+ if (Z->getSrcTy() == Ty)
+ return Z->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+ if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+ return ConstantExpr::getTrunc(C, Ty);
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ {
+ // X udiv 2^C -> X >> C
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ const APInt *C;
+ if (match(Op1, m_Power2(C))) {
+ BinaryOperator *LShr =
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
+ C->logBase2()));
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+ }
+ }
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+ // X udiv C, where C >= signbit
+ if (C->getValue().isNegative()) {
+ Value *IC = Builder->CreateICmpULT(Op0, C);
+ return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+ }
+ }
+
+ // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+ Value *X;
+ ConstantInt *C1;
+ if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+ APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+ return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+ }
+ }
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ { const APInt *CI; Value *N;
+ if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
+ match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
+ if (*CI != 1)
+ N = Builder->CreateAdd(N,
+ ConstantInt::get(N->getType(), CI->logBase2()));
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
+ N = Builder->CreateZExt(N, Z->getDestTy());
+ if (I.isExact())
+ return BinaryOperator::CreateExactLShr(Op0, N);
+ return BinaryOperator::CreateLShr(Op0, N);
+ }
+ }
+
+ // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+ // where C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ // Construct the "on true" case of the select
+ Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+ I.isExact());
+
+ // Construct the "on false" case of the select
+ Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+ I.isExact());
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(Cond, TSI, FSI);
+ }
+ }
+
+ // (zext A) udiv (zext B) --> zext (A udiv B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
+ I.isExact()),
+ I.getType());
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // sdiv X, -1 == -X
+ if (RHS->isAllOnesValue())
+ return BinaryOperator::CreateNeg(Op0);
+
+ // sdiv X, C --> ashr exact X, log2(C)
+ if (I.isExact() && RHS->getValue().isNonNegative() &&
+ RHS->getValue().isPowerOf2()) {
+ Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+ RHS->getValue().exactLogBase2());
+ return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
+ }
+
+ // -X/C --> X/-C provided the negation doesn't overflow.
+ if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+ if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
+ return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+ ConstantExpr::getNeg(RHS));
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a udiv.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask)) {
+ if (MaskedValueIsZero(Op1, Mask)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
+/// FP value and:
+/// 1) 1/C is exact, or
+/// 2) reciprocal is allowed.
+/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// returned; otherwise, NULL is returned.
+///
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
+ ConstantFP *Divisor,
+ bool AllowReciprocal) {
+ const APFloat &FpVal = Divisor->getValueAPF();
+ APFloat Reciprocal(FpVal.getSemantics());
+ bool Cvt = FpVal.getExactInverse(&Reciprocal);
+
+ if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
+ Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
+ (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
+ Cvt = !Reciprocal.isDenormal();
+ }
+
+ if (!Cvt)
+ return 0;
+
+ ConstantFP *R;
+ R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
+ return BinaryOperator::CreateFMul(Dividend, R);
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+ bool AllowReciprocal = I.hasAllowReciprocal();
+
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (AllowReassociate) {
+ ConstantFP *C1 = 0;
+ ConstantFP *C2 = Op1C;
+ Value *X;
+ Instruction *Res = 0;
+
+ if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) {
+ // (X*C1)/C2 => X * (C1/C2)
+ //
+ Constant *C = ConstantExpr::getFDiv(C1, C2);
+ const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+ if (F.isNormal() && !F.isDenormal())
+ Res = BinaryOperator::CreateFMul(X, C);
+ } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
+ // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
+ //
+ Constant *C = ConstantExpr::getFMul(C1, C2);
+ const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+ if (F.isNormal() && !F.isDenormal()) {
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ AllowReciprocal);
+ if (!Res)
+ Res = BinaryOperator::CreateFDiv(X, C);
+ }
+ }
+
+ if (Res) {
+ Res->setFastMathFlags(I.getFastMathFlags());
+ return Res;
+ }
+ }
+
+ // X / C => X * 1/C
+ if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal))
+ return T;
+
+ return 0;
+ }
+
+ if (AllowReassociate && isa<ConstantFP>(Op0)) {
+ ConstantFP *C1 = cast<ConstantFP>(Op0), *C2;
+ Constant *Fold = 0;
+ Value *X;
+ bool CreateDiv = true;
+
+ // C1 / (X*C2) => (C1/C2) / X
+ if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2))))
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) {
+ // C1 / (X/C2) => (C1*C2) / X
+ Fold = ConstantExpr::getFMul(C1, C2);
+ } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) {
+ // C1 / (C2/X) => (C1/C2) * X
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ CreateDiv = false;
+ }
+
+ if (Fold) {
+ const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
+ if (FoldC.isNormal() && !FoldC.isDenormal()) {
+ Instruction *R = CreateDiv ?
+ BinaryOperator::CreateFDiv(Fold, X) :
+ BinaryOperator::CreateFMul(X, Fold);
+ R->setFastMathFlags(I.getFastMathFlags());
+ return R;
+ }
+ }
+ return 0;
+ }
+
+ if (AllowReassociate) {
+ Value *X, *Y;
+ Value *NewInst = 0;
+ Instruction *SimpR = 0;
+
+ if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // (X/Y) / Z => X / (Y*Z)
+ //
+ if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) {
+ NewInst = Builder->CreateFMul(Y, Op1);
+ SimpR = BinaryOperator::CreateFDiv(X, NewInst);
+ }
+ } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // Z / (X/Y) => Z*Y / X
+ //
+ if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) {
+ NewInst = Builder->CreateFMul(Op0, Y);
+ SimpR = BinaryOperator::CreateFDiv(NewInst, X);
+ }
+ }
+
+ if (NewInst) {
+ if (Instruction *T = dyn_cast<Instruction>(NewInst))
+ T->setDebugLoc(I.getDebugLoc());
+ SimpR->setFastMathFlags(I.getFastMathFlags());
+ return SimpR;
+ }
+ }
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (isa<ConstantInt>(Op1)) {
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ } else if (isa<PHINode>(Op0I)) {
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // See if we can fold away this rem instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ // X urem C^2 -> X and C-1
+ { const APInt *C;
+ if (match(Op1, m_Power2(C)))
+ return BinaryOperator::CreateAnd(Op0,
+ ConstantInt::get(I.getType(), *C-1));
+ }
+
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1);
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+
+ // urem X, (select Cond, 2^C1, 2^C2) -->
+ // select Cond, (and X, C1-1), (and X, C2-1)
+ // when C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+ Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+ return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+ }
+ }
+
+ // (zext A) urem (zext B) --> zext (A urem B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ I.getType());
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer rem common cases
+ if (Instruction *Common = commonIRemTransforms(I))
+ return Common;
+
+ if (Value *RHSNeg = dyn_castNegVal(Op1))
+ if (!isa<Constant>(RHSNeg) ||
+ (isa<ConstantInt>(RHSNeg) &&
+ cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+ // X % -Y -> X % Y
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, RHSNeg);
+ return &I;
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a urem.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+ }
+ }
+
+ // If it's a constant vector, flip any negative values positive.
+ if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
+ Constant *C = cast<Constant>(Op1);
+ unsigned VWidth = C->getType()->getVectorNumElements();
+
+ bool hasNegative = false;
+ bool hasMissing = false;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = C->getAggregateElement(i);
+ if (Elt == 0) {
+ hasMissing = true;
+ break;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
+ if (RHS->isNegative())
+ hasNegative = true;
+ }
+
+ if (hasNegative && !hasMissing) {
+ SmallVector<Constant *, 16> Elts(VWidth);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Elts[i] = C->getAggregateElement(i); // Handle undef, etc.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
+ if (RHS->isNegative())
+ Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+ }
+ }
+
+ Constant *NewRHSV = ConstantVector::get(Elts);
+ if (NewRHSV != C) { // Don't loop on -MININT
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, NewRHSV);
+ return &I;
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 000000000000..b0a998cca76e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,904 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+ assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+ unsigned Opc = FirstInst->getOpcode();
+ Value *LHSVal = FirstInst->getOperand(0);
+ Value *RHSVal = FirstInst->getOperand(1);
+
+ Type *LHSType = LHSVal->getType();
+ Type *RHSType = RHSVal->getType();
+
+ bool isNUW = false, isNSW = false, isExact = false;
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ // Verify type of the LHS matches so we don't fold cmp's of different
+ // types.
+ I->getOperand(0)->getType() != LHSType ||
+ I->getOperand(1)->getType() != RHSType)
+ return 0;
+
+ // If they are CmpInst instructions, check their predicates
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
+ return 0;
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+
+ // Keep track of which operand needs a phi node.
+ if (I->getOperand(0) != LHSVal) LHSVal = 0;
+ if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ }
+
+ // If both LHS and RHS would need a PHI, don't do this transformation,
+ // because it would increase the number of PHIs entering the block,
+ // which leads to higher register pressure. This is especially
+ // bad when the PHIs are in the header of a loop.
+ if (!LHSVal && !RHSVal)
+ return 0;
+
+ // Otherwise, this is safe to transform!
+
+ Value *InLHS = FirstInst->getOperand(0);
+ Value *InRHS = FirstInst->getOperand(1);
+ PHINode *NewLHS = 0, *NewRHS = 0;
+ if (LHSVal == 0) {
+ NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(0)->getName() + ".pn");
+ NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewLHS, PN);
+ LHSVal = NewLHS;
+ }
+
+ if (RHSVal == 0) {
+ NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(1)->getName() + ".pn");
+ NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewRHS, PN);
+ RHSVal = NewRHS;
+ }
+
+ // Add all operands to the new PHIs.
+ if (NewLHS || NewRHS) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ if (NewLHS) {
+ Value *NewInLHS = InInst->getOperand(0);
+ NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ }
+ if (NewRHS) {
+ Value *NewInRHS = InInst->getOperand(1);
+ NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ }
+ }
+ }
+
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+ BinaryOperator *NewBinOp =
+ BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+ if (isNSW) NewBinOp->setHasNoSignedWrap();
+ if (isExact) NewBinOp->setIsExact();
+ NewBinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return NewBinOp;
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+ GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+ FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
+
+ // We don't want to replace this phi if the replacement would require
+ // more than one phi, which leads to higher register pressure. This is
+ // especially bad when the PHIs are in the header of a loop.
+ bool NeededPhi = false;
+
+ bool AllInBounds = true;
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
+ return 0;
+
+ AllInBounds &= GEP->isInBounds();
+
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
+ // Compare the operand lists.
+ for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+ if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ continue;
+
+ // Don't merge two GEPs when two operands differ (introducing phi nodes)
+ // if one of the PHIs has a constant for the index. The index may be
+ // substantially cheaper to compute for the constants, so making it a
+ // variable index could pessimize the path. This also handles the case
+ // for struct indices, which must always be constant.
+ if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+ isa<ConstantInt>(GEP->getOperand(op)))
+ return 0;
+
+ if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ return 0;
+
+ // If we already needed a PHI for an earlier operand, and another operand
+ // also requires a PHI, we'd be introducing more PHIs than we're
+ // eliminating, which increases register pressure on entry to the PHI's
+ // block.
+ if (NeededPhi)
+ return 0;
+
+ FixedOperands[op] = 0; // Needs a PHI.
+ NeededPhi = true;
+ }
+ }
+
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
+ // Otherwise, this is safe to transform. Insert PHI nodes for each operand
+ // that is variable.
+ SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+
+ bool HasAnyPHIs = false;
+ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+ if (FixedOperands[i]) continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(i);
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
+ FirstOp->getName()+".pn");
+ InsertNewInstBefore(NewPN, PN);
+
+ NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+ OperandPhis[i] = NewPN;
+ FixedOperands[i] = NewPN;
+ HasAnyPHIs = true;
+ }
+
+
+ // Add all operands to the new PHIs.
+ if (HasAnyPHIs) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ BasicBlock *InBB = PN.getIncomingBlock(i);
+
+ for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+ if (PHINode *OpPhi = OperandPhis[op])
+ OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ }
+ }
+
+ Value *Base = FixedOperands[0];
+ GetElementPtrInst *NewGEP =
+ GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+ if (AllInBounds) NewGEP->setIsInBounds();
+ NewGEP->setDebugLoc(FirstInst->getDebugLoc());
+ return NewGEP;
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targeting a
+/// non-address-taken alloca. Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+ BasicBlock::iterator BBI = L, E = L->getParent()->end();
+
+ for (++BBI; BBI != E; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ // Check for non-address taken alloca. If not address-taken already, it isn't
+ // profitable to do this xform.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+ bool isAddressTaken = false;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (isa<LoadInst>(U)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If storing TO the alloca, then the address isn't taken.
+ if (SI->getOperand(1) == AI) continue;
+ }
+ isAddressTaken = true;
+ break;
+ }
+
+ if (!isAddressTaken && AI->isStaticAlloca())
+ return false;
+ }
+
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
+ return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+ LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+
+ // FIXME: This is overconservative; this transform is allowed in some cases
+ // for atomic operations.
+ if (FirstLI->isAtomic())
+ return 0;
+
+ // When processing loads, we need to propagate two bits of information to the
+ // sunk load: whether it is volatile, and what its alignment is. We currently
+ // don't sink loads when some have their alignment specified and some don't.
+ // visitLoadInst will propagate an alignment onto the load when TD is around,
+ // and if TD isn't around, we can't handle the mixed case.
+ bool isVolatile = FirstLI->isVolatile();
+ unsigned LoadAlignment = FirstLI->getAlignment();
+ unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+
+ // We can't sink the load if the loaded value could be modified between the
+ // load and the PHI.
+ if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+ !isSafeAndProfitableToSinkLoad(FirstLI))
+ return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+ if (!LI || !LI->hasOneUse())
+ return 0;
+
+ // We can't sink the load if the loaded value could be modified between
+ // the load and the PHI.
+ if (LI->isVolatile() != isVolatile ||
+ LI->getParent() != PN.getIncomingBlock(i) ||
+ LI->getPointerAddressSpace() != LoadAddrSpace ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return 0;
+
+ // If some of the loads have an alignment specified but not all of them,
+ // we can't do the transformation.
+ if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+ return 0;
+
+ LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstLI->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+
+ LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+ NewLI->setDebugLoc(FirstLI->getDebugLoc());
+ return NewLI;
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+ if (isa<GetElementPtrInst>(FirstInst))
+ return FoldPHIArgGEPIntoPHI(PN);
+ if (isa<LoadInst>(FirstInst))
+ return FoldPHIArgLoadIntoPHI(PN);
+
+ // Scan the instruction, looking for input operations that can be folded away.
+ // If all input operands to the phi are the same instruction (e.g. a cast from
+ // the same type or "+42") we can pull the operation through the PHI, reducing
+ // code size and simplifying code.
+ Constant *ConstantOp = 0;
+ Type *CastSrcTy = 0;
+ bool isNUW = false, isNSW = false, isExact = false;
+
+ if (isa<CastInst>(FirstInst)) {
+ CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy))
+ return 0;
+ }
+ } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+ // Can fold binop, compare or shift here if the RHS is a constant,
+ // otherwise call FoldPHIArgBinOpIntoPHI.
+ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+ if (ConstantOp == 0)
+ return FoldPHIArgBinOpIntoPHI(PN);
+
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+ } else {
+ return 0; // Cannot fold this operation.
+ }
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return 0;
+ if (CastSrcTy) {
+ if (I->getOperand(0)->getType() != CastSrcTy)
+ return 0; // Cast operation must match.
+ } else if (I->getOperand(1) != ConstantOp) {
+ return 0;
+ }
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstInst->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // Insert and return the new operation.
+ if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {
+ CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,
+ PN.getType());
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+ BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (isNUW) BinOp->setHasNoUnsignedWrap();
+ if (isNSW) BinOp->setHasNoSignedWrap();
+ if (isExact) BinOp->setIsExact();
+ BinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return BinOp;
+ }
+
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ PhiVal, ConstantOp);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+ SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+ if (PN->use_empty()) return true;
+ if (!PN->hasOneUse()) return false;
+
+ // Remember this node, and if we find the cycle, return.
+ if (!PotentiallyDeadPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PotentiallyDeadPHIs.size() == 16)
+ return false;
+
+ if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+ return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+ return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
+/// z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+ SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+ // See if we already saw this PHI node.
+ if (!ValueEqualPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (ValueEqualPHIs.size() == 16)
+ return false;
+
+ // Scan the operands to see if they are either phi nodes or are equal to
+ // the value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Op = PN->getIncomingValue(i);
+ if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+ return false;
+ } else if (Op != NonPhiInVal)
+ return false;
+ }
+
+ return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ };
+ template <>
+ struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ // Scan the input list of the PHI. If any input is an invoke, and if the
+ // input is defined in the predecessor, then we won't be split the critical
+ // edge which is required to insert a truncate. Because of this, we have to
+ // bail out.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+ if (II == 0) continue;
+ if (II->getParent() != PN->getIncomingBlock(i))
+ continue;
+
+ // If we have a phi, and if it's directly in the predecessor, then we have
+ // a critical edge where we need to put the truncate. Since we can't
+ // split the edge in instcombine, we have to bail out.
+ return 0;
+ }
+
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+ PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+ if (Value *V = SimplifyInstruction(&PN, TD))
+ return ReplaceInstUsesWith(PN, V);
+
+ // If all PHI operands are the same operation, pull them through the PHI,
+ // reducing code size.
+ if (isa<Instruction>(PN.getIncomingValue(0)) &&
+ isa<Instruction>(PN.getIncomingValue(1)) &&
+ cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ // FIXME: The hasOneUse check will fail for PHIs that use the value more
+ // than themselves more than once.
+ PN.getIncomingValue(0)->hasOneUse())
+ if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ return Result;
+
+ // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
+ // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+ // PHI)... break the cycle.
+ if (PN.hasOneUse()) {
+ Instruction *PHIUser = cast<Instruction>(PN.use_back());
+ if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+ SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+ PotentiallyDeadPHIs.insert(&PN);
+ if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+
+ // If this phi has a single use, and if that use just computes a value for
+ // the next iteration of a loop, delete the phi. This occurs with unused
+ // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
+ // common case here is good because the only other things that catch this
+ // are induction variable analysis (sometimes) and ADCE, which is only run
+ // late.
+ if (PHIUser->hasOneUse() &&
+ (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ PHIUser->use_back() == &PN) {
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+ }
+
+ // We sometimes end up with phi cycles that non-obviously end up being the
+ // same value, for example:
+ // z = some value; x = phi (y, z); y = phi (x, z)
+ // where the phi nodes don't necessarily need to be in the same block. Do a
+ // quick check to see if the PHI node only contains a single non-phi value, if
+ // so, scan to see if the phi cycle is actually equal to that value.
+ {
+ unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
+ // Scan for the first non-phi operand.
+ while (InValNo != NumIncomingVals &&
+ isa<PHINode>(PN.getIncomingValue(InValNo)))
+ ++InValNo;
+
+ if (InValNo != NumIncomingVals) {
+ Value *NonPhiInVal = PN.getIncomingValue(InValNo);
+
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
+ Value *OpVal = PN.getIncomingValue(InValNo);
+ if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+ break;
+ }
+
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumIncomingVals) {
+ SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return ReplaceInstUsesWith(PN, NonPhiInVal);
+ }
+ }
+ }
+
+ // If there are multiple PHIs, sort their operands so that they all list
+ // the blocks in the same order. This will help identical PHIs be eliminated
+ // by other passes. Other passes shouldn't depend on this for correctness
+ // however.
+ PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+ if (&PN != FirstPN)
+ for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BBA = PN.getIncomingBlock(i);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ if (BBA != BBB) {
+ Value *VA = PN.getIncomingValue(i);
+ unsigned j = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(j);
+ PN.setIncomingBlock(i, BBB);
+ PN.setIncomingValue(i, VB);
+ PN.setIncomingBlock(j, BBA);
+ PN.setIncomingValue(j, VA);
+ // NOTE: Instcombine normally would want us to "return &PN" if we
+ // modified any of the operands of an instruction. However, since we
+ // aren't adding or removing uses (just rearranging them) we don't do
+ // this in this case.
+ }
+ }
+
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (PN.getType()->isIntegerTy() && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 000000000000..121aa1f8d73f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,945 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (SI == 0) return SPF_UNKNOWN;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+ if (ICI == 0) return SPF_UNKNOWN;
+
+ LHS = ICI->getOperand(0);
+ RHS = ICI->getOperand(1);
+
+ // (icmp X, Y) ? X : Y
+ if (SI->getTrueValue() == ICI->getOperand(0) &&
+ SI->getFalseValue() == ICI->getOperand(1)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMAX;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMAX;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMIN;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ }
+ }
+
+ // (icmp X, Y) ? Y : X
+ if (SI->getTrueValue() == ICI->getOperand(1) &&
+ SI->getFalseValue() == ICI->getOperand(0)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
+
+ return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// %C = or %A, %B
+/// %D = select %cond, %C, %A
+/// into:
+/// %C = select %cond, %B, 0
+/// %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return 3; // Can fold through either operand.
+ case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::Shl: // Can only fold on the shift amount.
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return 1;
+ default:
+ return 0; // Cannot fold
+ }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("This cannot happen!");
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Constant::getNullValue(I->getType());
+ case Instruction::And:
+ return Constant::getAllOnesValue(I->getType());
+ case Instruction::Mul:
+ return ConstantInt::get(I->getType(), 1);
+ }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each. Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
+ if (TI->getNumOperands() == 1) {
+ // If this is a non-volatile load or a cast from the same type,
+ // merge.
+ if (TI->isCast()) {
+ Type *FIOpndTy = FI->getOperand(0)->getType();
+ if (TI->getOperand(0)->getType() != FIOpndTy)
+ return 0;
+ // The select condition may be a vector. We may only change the operand
+ // type if the vector width remains the same (and matches the condition).
+ Type *CondTy = SI.getCondition()->getType();
+ if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+ CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
+ return 0;
+ } else {
+ return 0; // unknown unary op.
+ }
+
+ // Fold this by inserting a select from the input values.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName()+".v");
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ TI->getType());
+ }
+
+ // Only handle binary operators here.
+ if (!isa<BinaryOperator>(TI))
+ return 0;
+
+ // Figure out if the operations have any operands in common.
+ Value *MatchOp, *OtherOpT, *OtherOpF;
+ bool MatchIsOpZero;
+ if (TI->getOperand(0) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = false;
+ } else if (!TI->isCommutative()) {
+ return 0;
+ } else if (TI->getOperand(0) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else {
+ return 0;
+ }
+
+ // If we reach here, they do have operations in common.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT,
+ OtherOpF, SI.getName()+".v");
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+ if (MatchIsOpZero)
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+ else
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+ }
+ llvm_unreachable("Shouldn't get here");
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+ ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+ if (!C1I)
+ return false;
+ ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+ if (!C2I)
+ return false;
+ if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+ return false;
+ return C1I->isOne() || C1I->isAllOnesValue() ||
+ C2I->isOne() || C2I->isAllOnesValue();
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
+ // See the comment above GetSelectFoldableOperands for a description of the
+ // transformation we are doing here.
+ if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+ if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+ !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(TVI);
+ Value *OOp = TVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+ NewSel->takeName(TVI);
+ BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+ BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+ FalseVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(TVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
+ }
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+ if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+ !isa<Constant>(TrueVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(FVI);
+ Value *OOp = FVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+ NewSel->takeName(FVI);
+ BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+ BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+ TrueVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(FVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
+ }
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // Trivial replacement.
+ if (V == Op)
+ return RepOp;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return 0;
+
+ // If this is a binary operator, try to simplify it with the replaced op.
+ if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
+ if (B->getOperand(0) == Op)
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
+ if (B->getOperand(1) == Op)
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
+ }
+
+ // Same for CmpInsts.
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ if (C->getOperand(0) == Op)
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ TLI);
+ if (C->getOperand(1) == Op)
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ TLI);
+ }
+
+ // TODO: We could hand off more cases to instsimplify here.
+
+ // If all operands are constant after substituting Op for RepOp then we can
+ // constant fold the instruction.
+ if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+ // Build a list of all constant operands.
+ SmallVector<Constant*, 8> ConstOps;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (I->getOperand(i) == Op)
+ ConstOps.push_back(CRepOp);
+ else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+ ConstOps.push_back(COp);
+ else
+ break;
+ }
+
+ // All operands were constants, fold it.
+ if (ConstOps.size() == I->getNumOperands()) {
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+ ConstOps[1], TD, TLI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ ConstOps, TD, TLI);
+ }
+ }
+
+ return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may move or edit ICI
+ // here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ // X < MIN ? T : F --> F
+ if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+ && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > MAX ? T : F --> F
+ else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+ && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // These transformations only work for selects over integers.
+ IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ if (!SelectTy)
+ break;
+
+ Constant *AdjustedRHS;
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+ else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+ ; // Nothing to do here. Values match without any sign/zero extension.
+
+ // Types do not match. Instead of calculating this with mixed types
+ // promote all to the larger type. This enables scalar evolution to
+ // analyze this expression.
+ else if (CmpRHS->getType()->getScalarSizeInBits()
+ < SelectTy->getBitWidth()) {
+ Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+ // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+ // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+ // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+ // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+ if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = sextRHS;
+ } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = sextRHS;
+ } else if (ICI->isUnsigned()) {
+ Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+ // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+ // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+ // zext + signed compare cannot be changed:
+ // 0xff <s 0x00, but 0x00ff >s 0x0000
+ if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = zextRHS;
+ } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = zextRHS;
+ } else
+ break;
+ } else
+ break;
+ } else
+ break;
+
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(0, CmpLHS);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+
+ // Move ICI instruction right before the select instruction. Otherwise
+ // the sext/zext value may be defined after the ICI instruction uses it.
+ ICI->moveBefore(&SI);
+
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+ // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+ // FIXME: Type and constness constraints could be lifted, but we have to
+ // watch code size carefully. We should consider xor instead of
+ // sub/add when we decide to do that.
+ if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (TrueVal->getType() == Ty) {
+ if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+ ConstantInt *C1 = NULL, *C2 = NULL;
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ C1 = dyn_cast<ConstantInt>(TrueVal);
+ C2 = dyn_cast<ConstantInt>(FalseVal);
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ C1 = dyn_cast<ConstantInt>(FalseVal);
+ C2 = dyn_cast<ConstantInt>(TrueVal);
+ }
+ if (C1 && C2) {
+ // This shift results in either -1 or 0.
+ Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+ // Check if we can express the operation with a single or.
+ if (C2->isAllOnesValue())
+ return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+ Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+ return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ }
+ }
+ }
+ }
+
+ // If we have an equality comparison then we know the value in one of the
+ // arms of the select. See if substituting this value into the arm and
+ // simplifying the result yields the same value as the other arm.
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+
+ // NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
+ if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+ // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+ SI.setOperand(1, CmpRHS);
+ Changed = true;
+ } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+ // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+ SI.setOperand(2, CmpRHS);
+ Changed = true;
+ }
+ }
+
+ return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks). See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI. For
+/// example, cases like this cannot be mapped:
+///
+/// X = phi [ C1, BB1], [C2, BB2]
+/// Y = add
+/// Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+ const SelectInst &SI) {
+ // If the value is a non-instruction value like a constant or argument, it
+ // can always be mapped.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return true;
+
+ // If V is a PHI node defined in the same block as the condition PHI, we can
+ // map the arguments.
+ const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+ if (const PHINode *VP = dyn_cast<PHINode>(I))
+ if (VP->getParent() == CondPHI->getParent())
+ return true;
+
+ // Otherwise, if the PHI and select are defined in the same block and if V is
+ // defined in a different block, then we can transform it.
+ if (SI.getParent() == CondPHI->getParent() &&
+ I->getParent() != CondPHI->getParent())
+ return true;
+
+ // Otherwise we have a 'hard' case and we can't tell without doing more
+ // detailed dominator based analysis, punt.
+ return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1,
+ Value *A, Value *B,
+ Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C) {
+ if (C == A || C == B) {
+ // MAX(MAX(A, B), B) -> MAX(A, B)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ if (SPF1 == SPF2)
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+ (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+ (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+ return ReplaceInstUsesWith(Outer, C);
+ }
+
+ // TODO: MIN(MIN(A, 23), 97)
+ return 0;
+}
+
+
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+ ConstantInt *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ if (!match(IC->getOperand(1), m_Zero()))
+ return 0;
+
+ ConstantInt *AndRHS;
+ Value *LHS = IC->getOperand(0);
+ if (LHS->getType() != SI.getType() ||
+ !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ return 0;
+
+ // If both select arms are non-zero see if we have a select of the form
+ // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+ // for 'x ? 2^n : 0' and fix the thing up at the end.
+ ConstantInt *Offset = 0;
+ if (!TrueVal->isZero() && !FalseVal->isZero()) {
+ if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ Offset = FalseVal;
+ else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ Offset = TrueVal;
+ else
+ return 0;
+
+ // Adjust TrueVal and FalseVal to the offset.
+ TrueVal = ConstantInt::get(Builder->getContext(),
+ TrueVal->getValue() - Offset->getValue());
+ FalseVal = ConstantInt::get(Builder->getContext(),
+ FalseVal->getValue() - Offset->getValue());
+ }
+
+ // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+ if (!AndRHS->getValue().isPowerOf2() ||
+ (!TrueVal->getValue().isPowerOf2() &&
+ !FalseVal->getValue().isPowerOf2()))
+ return 0;
+
+ // Determine which shift is needed to transform result of the 'and' into the
+ // desired result.
+ ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC->getValue().logBase2();
+ unsigned AndZeros = AndRHS->getValue().logBase2();
+
+ Value *V = LHS;
+ if (ValZeros > AndZeros)
+ V = Builder->CreateShl(V, ValZeros - AndZeros);
+ else if (ValZeros < AndZeros)
+ V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+ // Okay, now we know that everything is set up, we just don't know whether we
+ // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+ bool ShouldNotVal = !TrueVal->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ValC);
+
+ // Apply an offset if needed.
+ if (Offset)
+ V = Builder->CreateAdd(V, Offset);
+ return V;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+ return ReplaceInstUsesWith(SI, V);
+
+ if (SI.getType()->isIntegerTy(1)) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+ if (C->getZExtValue()) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ }
+ // Change: A = select B, false, C --> A = and !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ if (C->getZExtValue() == false) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+ // Change: A = select B, C, true --> A = or !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
+ }
+
+ // select a, b, a -> a&b
+ // select a, a, b -> a|b
+ if (CondVal == TrueVal)
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ else if (CondVal == FalseVal)
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+ // select a, ~a, b -> (~a)&b
+ // select a, b, ~a -> (~a)|b
+ if (match(TrueVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateOr(TrueVal, FalseVal);
+ }
+
+ // Selecting between two integer constants?
+ if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+ if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+ // select C, 1, 0 -> zext C to int
+ if (FalseValC->isZero() && TrueValC->getValue() == 1)
+ return new ZExtInst(CondVal, SI.getType());
+
+ // select C, -1, 0 -> sext C to int
+ if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+ return new SExtInst(CondVal, SI.getType());
+
+ // select C, 0, 1 -> zext !C to int
+ if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new ZExtInst(NotCond, SI.getType());
+ }
+
+ // select C, 0, -1 -> sext !C to int
+ if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new SExtInst(NotCond, SI.getType());
+ }
+
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+ // Transform (X == Y) ? Y : X -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ }
+ // NOTE: if we wanted to, this is where to detect ABS
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
+
+ if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+ if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+ if (TI->hasOneUse() && FI->hasOneUse()) {
+ Instruction *AddOp = 0, *SubOp = 0;
+
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+ if (TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
+
+ // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is
+ // even legal for FP.
+ if ((TI->getOpcode() == Instruction::Sub &&
+ FI->getOpcode() == Instruction::Add) ||
+ (TI->getOpcode() == Instruction::FSub &&
+ FI->getOpcode() == Instruction::FAdd)) {
+ AddOp = FI; SubOp = TI;
+ } else if ((FI->getOpcode() == Instruction::Sub &&
+ TI->getOpcode() == Instruction::Add) ||
+ (FI->getOpcode() == Instruction::FSub &&
+ TI->getOpcode() == Instruction::FAdd)) {
+ AddOp = TI; SubOp = FI;
+ }
+
+ if (AddOp) {
+ Value *OtherAddOp = 0;
+ if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+ OtherAddOp = AddOp->getOperand(1);
+ } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+ OtherAddOp = AddOp->getOperand(0);
+ }
+
+ if (OtherAddOp) {
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (SI.getType()->isFPOrFPVectorTy()) {
+ NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
+ } else {
+ NegVal = Builder->CreateNeg(SubOp->getOperand(1));
+ }
+
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Value *NewSel =
+ Builder->CreateSelect(CondVal, NewTrueOp,
+ NewFalseOp, SI.getName() + ".p");
+
+ if (SI.getType()->isFPOrFPVectorTy())
+ return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+ else
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ }
+ }
+ }
+
+ // See if we can fold the select into one of our operands.
+ if (SI.getType()->isIntegerTy()) {
+ if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+ return FoldI;
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ Value *LHS, *RHS, *LHS2, *RHS2;
+ if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+ SI, SPF, LHS))
+ return R;
+ }
+
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ // ABS(ABS(X)) -> ABS(X)
+ }
+
+ // See if we can fold the select into a phi node if the condition is a select.
+ if (isa<PHINode>(SI.getCondition()))
+ // The true/false values have to be live in the PHI predecessor's blocks.
+ if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+ CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+ if (Instruction *NV = FoldOpIntoPhi(SI))
+ return NV;
+
+ if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return 0;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ }
+ if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return 0;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ }
+
+ if (BinaryOperator::isNot(CondVal)) {
+ SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+
+ if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ unsigned VWidth = VecTy->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+ if (V != &SI)
+ return ReplaceInstUsesWith(SI, V);
+ return &SI;
+ }
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
+ // Form a shufflevector instruction.
+ SmallVector<Constant *, 8> Mask(VWidth);
+ Type *Int32Ty = Type::getInt32Ty(CV->getContext());
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elem = cast<Constant>(CV->getOperand(i));
+ if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
+ Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
+ else if (isa<UndefValue>(Elem))
+ Mask[i] = UndefValue::get(Int32Ty);
+ else
+ return 0;
+ }
+ Constant *MaskVal = ConstantVector::get(Mask);
+ Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ if (isa<ConstantAggregateZero>(CondVal)) {
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 000000000000..8cf76e5e8a9f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,810 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // See if we can fold away this shift.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Try to fold constant and into select arguments.
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+ return Res;
+
+ // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // Because shifts by negative values (which could occur if A were negative)
+ // are undefined.
+ Value *A; const APInt *B;
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+ // demand the sign bit (and many others) here??
+ Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+ Op1->getName());
+ I.setOperand(1, Rem);
+ return &I;
+ }
+
+ return 0;
+}
+
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI = 0;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = TypeWidth - CI->getZExtValue();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
+ IC.getTargetLibraryInfo());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(BO->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(BO);
+ VI->takeName(BO);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return BO;
+ }
+ case Instruction::LShr: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(BO->getType());
+
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setIsExact(false);
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setIsExact(false);
+ return BO;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I) {
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+
+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+ // a signed shift.
+ //
+ if (Op1->uge(TypeBits)) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ // ashr i32 X, 32 --> ashr i32 X, 31
+ I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+ return &I;
+ }
+
+ // ((X*C1) << C2) == (X * (C1 << C2))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+ if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+ if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+ return BinaryOperator::CreateMul(BO->getOperand(0),
+ ConstantExpr::getShl(BOOp, Op1));
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+ if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+ Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+ // place. Don't try to do this transformation in this case. Also, we
+ // require that the input operand is a shift-by-constant so that we have
+ // confidence that the shifts will get folded together. We could do this
+ // xform in more cases, but it is unlikely to be profitable.
+ if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
+ isa<ConstantInt>(TrOp->getOperand(1))) {
+ // Okay, we'll do this xform. Make the shift of shift.
+ Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ // (shift2 (shift1 & 0x00FF), c2)
+ Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+ // For logical shifts, the truncation has the effect of making the high
+ // part of the register be zeros. Emulate this by inserting an AND to
+ // clear the top bits as needed. This 'and' will usually be zapped by
+ // other xforms later if dead.
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
+ APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+
+ // The mask we constructed says what the trunc would do if occurring
+ // between the shifts. We want to know the effect *after* the second
+ // shift. We know that it is a logical shift by a constant, so adjust the
+ // mask as appropriate.
+ if (I.getOpcode() == Instruction::Shl)
+ MaskV <<= Op1->getZExtValue();
+ else {
+ assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+ MaskV = MaskV.lshr(Op1->getZExtValue());
+ }
+
+ // shift1 & 0x00FF
+ Value *And = Builder->CreateAnd(NSh,
+ ConstantInt::get(I.getContext(), MaskV),
+ TI->getName());
+
+ // Return the value truncated to the interesting size.
+ return new TruncInst(And, I.getType());
+ }
+ }
+
+ if (Op0->hasOneUse()) {
+ if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ Value *V1, *V2;
+ ConstantInt *CC;
+ switch (Op0BO->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // These operators commute.
+ // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+ match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
+ Value *Op0BOOp1 = Op0BO->getOperand(1);
+ if (isLeftShift && Op0BOOp1->hasOneUse() &&
+ match(Op0BOOp1,
+ m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+ m_ConstantInt(CC)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+ }
+ }
+
+ // FALL THROUGH.
+ case Instruction::Sub: {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0),
+ m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
+ m_ConstantInt(CC))) && V2 == Op1) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+ }
+
+ break;
+ }
+ }
+
+
+ // If the operand is an bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+ bool isValid = true; // Valid only for And, Or, Xor
+ bool highBitSet = false; // Transform if high bit of constant set?
+
+ switch (Op0BO->getOpcode()) {
+ default: isValid = false; break; // Do not perform transform!
+ case Instruction::Add:
+ isValid = isLeftShift;
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ highBitSet = false;
+ break;
+ case Instruction::And:
+ highBitSet = true;
+ break;
+ }
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ //
+ if (isValid && I.getOpcode() == Instruction::AShr)
+ isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+
+ if (isValid) {
+ Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+
+ Value *NewShift =
+ Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ NewShift->takeName(Op0BO);
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+ NewRHS);
+ }
+ }
+ }
+ }
+
+ // Find out if this is a shift of a shift by a constant.
+ BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+ if (ShiftOp && !ShiftOp->isShift())
+ ShiftOp = 0;
+
+ if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+
+ // This is a constant shift of a constant shift. Be careful about hiding
+ // shl instructions behind bit masks. They are used to represent multiplies
+ // by a constant, and it is important that simple arithmetic expressions
+ // are still recognizable by scalar evolution.
+ //
+ // The transforms applied to shl are very similar to the transforms applied
+ // to mul by constant. We can be more aggressive about optimizing right
+ // shifts.
+ //
+ // Combinations of right and left shifts will still be optimized in
+ // DAGCombine where scalar evolution no longer applies.
+
+ ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+ uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+ if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ Value *X = ShiftOp->getOperand(0);
+
+ IntegerType *Ty = cast<IntegerType>(I.getType());
+
+ // Check for (X << c1) << c2 and (X >> c1) >> c2
+ if (I.getOpcode() == ShiftOp->getOpcode()) {
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
+ // If this is oversized composite shift, then unsigned shifts get 0, ashr
+ // saturates.
+ if (AmtSum >= TypeBits) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
+ }
+
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(Ty, AmtSum));
+ }
+
+ if (ShiftAmt1 == ShiftAmt2) {
+ // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(), Mask));
+ }
+ } else if (ShiftAmt1 < ShiftAmt2) {
+ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+
+ // (X >>?,exact C1) << C2 --> X << (C2-C1)
+ // The inexact version is deferred to DAGCombine so we don't hide shl
+ // behind a bit mask.
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl &&
+ ShiftOp->isExact()) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+ return NewShl;
+ }
+
+ // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
+ X, ShiftDiffCst);
+ NewLShr->setIsExact(I.isExact());
+ return NewLShr;
+ }
+ Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
+ X, ShiftDiffCst);
+ NewAShr->setIsExact(I.isExact());
+ return NewAShr;
+ }
+ }
+ } else {
+ assert(ShiftAmt2 < ShiftAmt1);
+ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+ // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+ // The inexact version is deferred to DAGCombine so we don't hide shl
+ // behind a bit mask.
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl &&
+ ShiftOp->isExact()) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+ X, ShiftDiffCst);
+ NewShr->setIsExact(true);
+ return NewShr;
+ }
+
+ // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(true);
+ return NewShl;
+ }
+ Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoSignedWrap(true);
+ return NewShl;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+ I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+ TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *V = commonShiftTransforms(I))
+ return V;
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the shifted-out value is known-zero, then this is a NUW shift.
+ if (!I.hasNoUnsignedWrap() &&
+ MaskedValueIsZero(I.getOperand(0),
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+ I.setHasNoUnsignedWrap();
+ return &I;
+ }
+
+ // If the shifted out value is all signbits, this is a NSW shift.
+ if (!I.hasNoSignedWrap() &&
+ ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+ I.setHasNoSignedWrap();
+ return &I;
+ }
+ }
+
+ // (C1 << A) << C2 -> (C1 << C2) << A
+ Constant *C1, *C2;
+ Value *A;
+ if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+ match(I.getOperand(1), m_Constant(C2)))
+ return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ // ctlz.i32(x)>>5 --> zext(x == 0)
+ // cttz.i32(x)>>5 --> zext(x == 0)
+ // ctpop.i32(x)>>5 --> zext(x == -1)
+ if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+ II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctpop) &&
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
+ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+ Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ return new ZExtInst(Cmp, II->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+ // have a sign-extend idiom.
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+ // If the left shift is just shifting out partial signbits, delete the
+ // extension.
+ if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return ReplaceInstUsesWith(I, X);
+
+ // If the input is an extension from the shifted amount value, e.g.
+ // %x = zext i8 %A to i32
+ // %y = shl i32 %x, 24
+ // %z = ashr %y, 24
+ // then turn this into "z = sext i8 A to i32".
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+ uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+ uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+ if (Op1C->getZExtValue() == DestBits-SrcBits)
+ return new SExtInst(ZI->getOperand(0), ZI->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ // See if we can turn a signed shr into an unsigned shr.
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, Op1);
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+
+ return 0;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 000000000000..8add1ea618d3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1281 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
+ APInt Demanded) {
+ assert(I && "No instruction?");
+ assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+ // If the operand is not a constant integer, nothing to do.
+ ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+ if (!OpC) return false;
+
+ // If there are no bits set that aren't demanded, nothing to do.
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ if ((~Demanded & OpC->getValue()) == 0)
+ return false;
+
+ // This instruction is producing bits that are not demanded. Shrink the RHS.
+ Demanded &= OpC->getValue();
+ I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+ return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about. See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
+ KnownZero, KnownOne, 0);
+ if (V == 0) return false;
+ if (V == &Inst) return true;
+ ReplaceInstUsesWith(Inst, V);
+ return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place. It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+ KnownZero, KnownOne, Depth);
+ if (NewVal == 0) return false;
+ U = NewVal;
+ return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits. When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression. KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification. This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ assert(V != 0 && "Null pointer of Value???");
+ assert(Depth <= 6 && "Limit Search Depth");
+ uint32_t BitWidth = DemandedMask.getBitWidth();
+ Type *VTy = V->getType();
+ assert((TD || !VTy->isPointerTy()) &&
+ "SimplifyDemandedBits needs to know bit widths!");
+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ (!VTy->isIntOrIntVectorTy() ||
+ VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return 0;
+ }
+ if (isa<ConstantPointerNull>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne.clearAllBits();
+ KnownZero = DemandedMask;
+ return 0;
+ }
+
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ if (DemandedMask == 0) { // Not demanding any bits from V.
+ if (isa<UndefValue>(V))
+ return 0;
+ return UndefValue::get(VTy);
+ }
+
+ if (Depth == 6) // Limit search depth.
+ return 0;
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ return 0; // Only analyze instructions.
+ }
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ } else if (I->getOpcode() == Instruction::Xor) {
+ // We can simplify (X^Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
+ return 0;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
+ switch (I->getOpcode()) {
+ default:
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
+ break;
+ case Instruction::And:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ return I;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero = RHSKnownZero | LHSKnownZero;
+ break;
+ case Instruction::Or:
+ // If either the LHS or the RHS are One, the result is One.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne = RHSKnownOne | LHSKnownOne;
+ break;
+ case Instruction::Xor: {
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // If all of the demanded bits are known to be zero on one side or the
+ // other, turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstWith(Or, *I);
+ }
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ // all known
+ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ return InsertNewInstWith(And, *I);
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // If our LHS is an 'and' and if it has one use, and if any of the bits we
+ // are flipping are known to be set, then the xor is just resetting those
+ // bits to zero. We can just knock out bits from the 'and' and the 'xor',
+ // simplifying both of them.
+ if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+ if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ isa<ConstantInt>(LHSInst->getOperand(1)) &&
+ (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+ ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+ APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+
+ Constant *AndC =
+ ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ InsertNewInstWith(NewAnd, *I);
+
+ Constant *XorC =
+ ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
+ return InsertNewInstWith(NewXor, *I);
+ }
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+ break;
+ }
+ case Instruction::Select:
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+ ShrinkDemandedConstant(I, 2, DemandedMask))
+ return I;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ break;
+ case Instruction::Trunc: {
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ }
+ case Instruction::BitCast:
+ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+ return 0; // vector->int or fp->int?
+
+ if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (VectorType *SrcVTy =
+ dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+ if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+ // Don't touch a bitcast between vectors of different element counts.
+ return 0;
+ } else
+ // Don't touch a scalar-to-vector bitcast.
+ return 0;
+ } else if (I->getOperand(0)->getType()->isVectorTy())
+ // Don't touch a vector-to-scalar bitcast.
+ return 0;
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ case Instruction::ZExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // The top bits are known to be zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt InputDemandedBits = DemandedMask &
+ APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+ APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if ((NewBits & DemandedMask) != 0)
+ InputDemandedBits.setBit(SrcBitWidth-1);
+
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, or if the NewBits are not demanded
+ // convert this into a zero extension.
+ if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ // Convert to ZExt cast
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+ return InsertNewInstWith(NewCast, *I);
+ } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
+ KnownOne |= NewBits;
+ }
+ break;
+ }
+ case Instruction::Add: {
+ // Figure out what the input bits are. If the top bits of the and result
+ // are not demanded, then the add doesn't demand them from its input
+ // either.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+
+ // If there is a constant on the RHS, there are a variety of xformations
+ // we can do.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // If null, this should be simplified elsewhere. Some of the xforms here
+ // won't work if the RHS is zero.
+ if (RHS->isZero())
+ break;
+
+ // If the top bit of the output is demanded, demand everything from the
+ // input. Otherwise, we demand all the input bits except NLZ top bits.
+ APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+ // Find information about known zero/one bits in the input.
+ if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // If the RHS of the add has bits set that can't affect the input, reduce
+ // the constant.
+ if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+ return I;
+
+ // Avoid excess work.
+ if (LHSKnownZero == 0 && LHSKnownOne == 0)
+ break;
+
+ // Turn it into OR if input bits are zero.
+ if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstWith(Or, *I);
+ }
+
+ // We can say something about the output known-zero and known-one bits,
+ // depending on potential carries from the input constant and the
+ // unknowns. For example if the LHS is known to have at most the 0x0F0F0
+ // bits set and the RHS constant is 0x01001, then we know we have a known
+ // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+
+ // To compute this, we first compute the potential carry bits. These are
+ // the bits which may be modified. I'm not aware of a better way to do
+ // this scan.
+ const APInt &RHSVal = RHS->getValue();
+ APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+
+ // Now that we know which bits have carries, compute the known-1/0 sets.
+
+ // Bits are known one if they are known zero in one operand and one in the
+ // other, and there is no input carry.
+ KnownOne = ((LHSKnownZero & RHSVal) |
+ (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+
+ // Bits are known zero if they are known zero in both operands and there
+ // is no input carry.
+ KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+ } else {
+ // If the high-bits of this ADD are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this ADD to demand the most
+ // significant bit and all those below it.
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ }
+ break;
+ }
+ case Instruction::Sub:
+ // If the high-bits of this SUB are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this SUB to demand the most
+ // significant bit and all those below it.
+ uint32_t NLZ = DemandedMask.countLeadingZeros();
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+
+ // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // the known zeros and ones.
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ APInt I0 = C0->getValue();
+ if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
+ Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
+ return InsertNewInstWith(Xor, *I);
+ }
+ }
+ break;
+ case Instruction::Shl:
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ {
+ Value *VarX; ConstantInt *C1;
+ if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
+ Instruction *Shr = cast<Instruction>(I->getOperand(0));
+ Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
+ KnownZero, KnownOne);
+ if (R)
+ return R;
+ }
+ }
+
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ // low bits known zero.
+ if (ShiftAmt)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ // For a logical shift right
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Unsigned shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ if (ShiftAmt) {
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero |= HighBits; // high bits known zero.
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), I->getOperand(1), I->getName());
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // If the sign bit is the only bit demanded by this ashr, then there is no
+ // need to do it, the shift doesn't change the high bit.
+ if (DemandedMask.isSignBit())
+ return I->getOperand(0);
+
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ // If any of the "high bits" are demanded, we should set the sign bit as
+ // demanded.
+ if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ // Handle the sign bits.
+ APInt SignBit(APInt::getSignBit(BitWidth));
+ // Adjust to where it is now in the mask.
+ SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
+ (HighBits & ~DemandedMask) == HighBits) {
+ // Perform the logical shift right.
+ BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
+ SA, I->getName());
+ NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
+ return InsertNewInstWith(NewVal, *I);
+ } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X % -1 demands all the bits because we don't want to introduce
+ // INT_MIN % -1 (== undef) by accident.
+ if (Rem->isAllOnesValue())
+ break;
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
+ return I->getOperand(0);
+
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // The low bits of LHS are unchanged by the srem.
+ KnownZero = LHSKnownZero & LowBits;
+ KnownOne = LHSKnownOne & LowBits;
+
+ // If LHS is non-negative or has all low bits zero, then the upper bits
+ // are all zero.
+ if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If LHS is negative and not all low bits are zero, then the upper bits
+ // are all one.
+ if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
+ break;
+ case Instruction::URem: {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+ KnownZero2, KnownOne2, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return I;
+
+ unsigned Leaders = KnownZero2.countLeadingOnes();
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ return 0;
+ }
+ }
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, KnownOne);
+ return 0;
+}
+
+/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
+/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
+/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
+/// of "C2-C1".
+///
+/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
+/// ..., bn}, without considering the specific value X is holding.
+/// This transformation is legal iff one of following conditions is hold:
+/// 1) All the bit in S are 0, in this case E1 == E2.
+/// 2) We don't care those bits in S, per the input DemandedMask.
+/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
+/// rest bits.
+///
+/// Currently we only test condition 2).
+///
+/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
+/// not successful.
+Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
+ Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
+
+ unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
+ unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+ KnownZero &= DemandedMask;
+
+ if (ShlAmt == 0 || ShrAmt == 0)
+ return 0;
+
+ Value *VarX = Shr->getOperand(0);
+ Type *Ty = VarX->getType();
+
+ APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+ APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+
+ bool isLshr = (Shr->getOpcode() == Instruction::LShr);
+ BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
+ (BitMask1.ashr(ShrAmt) << ShlAmt);
+
+ if (ShrAmt <= ShlAmt) {
+ BitMask2 <<= (ShlAmt - ShrAmt);
+ } else {
+ BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
+ BitMask2.ashr(ShrAmt - ShlAmt);
+ }
+
+ // Check if condition-2 (see the comment to this function) is satified.
+ if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
+ if (ShrAmt == ShlAmt)
+ return VarX;
+
+ if (!Shr->hasOneUse())
+ return 0;
+
+ BinaryOperator *New;
+ if (ShrAmt < ShlAmt) {
+ Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
+ New = BinaryOperator::CreateShl(VarX, Amt);
+ BinaryOperator *Orig = cast<BinaryOperator>(Shl);
+ New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
+ } else {
+ Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
+ New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
+ BinaryOperator::CreateAShr(VarX, Amt);
+ if (cast<BinaryOperator>(Shr)->isExact())
+ New->setIsExact(true);
+ }
+
+ return InsertNewInstWith(New, *Shl);
+ }
+
+ return 0;
+}
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller. This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned. This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) {
+ unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+ if (isa<UndefValue>(V)) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return 0;
+ }
+
+ if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+ UndefElts = EltMask;
+ return UndefValue::get(V->getType());
+ }
+
+ UndefElts = 0;
+
+ // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return 0;
+
+ Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ continue;
+ }
+
+ Constant *Elt = C->getAggregateElement(i);
+ if (Elt == 0) return 0;
+
+ if (isa<UndefValue>(Elt)) { // Already undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ } else { // Otherwise, defined.
+ Elts.push_back(Elt);
+ }
+ }
+
+ // If we changed the constant, return it.
+ Constant *NewCV = ConstantVector::get(Elts);
+ return NewCV != C ? NewCV : 0;
+ }
+
+ // Limit search depth.
+ if (Depth == 10)
+ return 0;
+
+ // If multiple users are using the root value, proceed with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return 0;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0; // Only analyze instructions.
+
+ bool MadeChange = false;
+ APInt UndefElts2(VWidth, 0);
+ Value *TmpV;
+ switch (I->getOpcode()) {
+ default: break;
+
+ case Instruction::InsertElement: {
+ // If this is a variable index, we don't know which element it overwrites.
+ // demand exactly the same input as we produce.
+ ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (Idx == 0) {
+ // Note that we can't propagate undef elt info, because we don't know
+ // which elt is getting updated.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+ }
+
+ // If this is inserting an element that isn't demanded, remove this
+ // insertelement.
+ unsigned IdxNo = Idx->getZExtValue();
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+ Worklist.Add(I);
+ return I->getOperand(0);
+ }
+
+ // Otherwise, the element inserted overwrites whatever was there, so the
+ // input demanded set is simpler than the output set.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ // The inserted element is defined.
+ UndefElts.clearBit(IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LHSVWidth =
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts[i]) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < LHSVWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < LHSVWidth)
+ LeftDemanded.setBit(MaskVal);
+ else
+ RightDemanded.setBit(MaskVal - LHSVWidth);
+ }
+ }
+ }
+
+ APInt UndefElts4(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts4, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ APInt UndefElts3(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ UndefElts.setBit(i);
+ } else if (!DemandedElts[i]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ } else if (MaskVal < LHSVWidth) {
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ } else {
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts[i])
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+ else
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
+ break;
+ }
+ case Instruction::Select: {
+ APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+ if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (CV->getAggregateElement(i)->isNullValue())
+ LeftDemanded.clearBit(i);
+ else
+ RightDemanded.clearBit(i);
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ case Instruction::BitCast: {
+ // Vector->vector casts only.
+ VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ APInt InputDemandedElts(InVWidth, 0);
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if (VWidth > InVWidth) {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the result than there are in the source,
+ // then an input element is live if any of the corresponding output
+ // elements are live.
+ Ratio = VWidth/InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.setBit(OutIdx/Ratio);
+ }
+ } else {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the source than there are in the result,
+ // then an input element is live if the corresponding output element is
+ // live.
+ Ratio = InVWidth/VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts[InIdx/Ratio])
+ InputDemandedElts.setBit(InIdx);
+ }
+
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ UndefElts = UndefElts2;
+ if (VWidth > InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the result than there are in the source,
+ // then an output element is undef if the corresponding input element is
+ // undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2[OutIdx/Ratio])
+ UndefElts.setBit(OutIdx);
+ } else if (VWidth < InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the source than there are in the result,
+ // then a result element is undef if all of the corresponding input
+ // elements are undef.
+ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (!UndefElts2[InIdx]) // Not undef?
+ UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+
+ case Instruction::Call: {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II) break;
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // Binary vector operations that work column-wise. A dest element is a
+ // function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // If only the low elt is demanded and this is a scalarizable intrinsic,
+ // scalarize it now.
+ if (DemandedElts == 1) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ // TODO: Lower MIN/MAX/ABS/etc
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
+ // Extract the element as scalars.
+ LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+ RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_mul_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
+ II->getName()), *II);
+ break;
+ }
+
+ Instruction *New =
+ InsertElementInst::Create(
+ UndefValue::get(II->getType()), TmpV,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+ II->getName());
+ InsertNewInstWith(New, *II);
+ return New;
+ }
+ }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ break;
+ }
+ }
+ return MadeChange ? I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 000000000000..4f71db1a4b09
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,682 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation. isConstant indicates whether we're
+/// extracting one known element. If false we're extracting a variable index.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isConstant) return true;
+
+ // If all elts are the same, we can extract it and use any of the values.
+ Constant *Op0 = C->getAggregateElement(0U);
+ for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
+ if (C->getAggregateElement(i) != Op0)
+ return false;
+ return true;
+ }
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (CheapToScalarize(BO->getOperand(0), isConstant) ||
+ CheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->hasOneUse() &&
+ (CheapToScalarize(CI->getOperand(0), isConstant) ||
+ CheapToScalarize(CI->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+ VectorType *VTy = cast<VectorType>(V->getType());
+ unsigned Width = VTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(VTy->getElementType());
+
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->getAggregateElement(EltNo);
+
+ if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return 0;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return FindScalarElement(III->getOperand(0), EltNo);
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+ int InEl = SVI->getMaskValue(EltNo);
+ if (InEl < 0)
+ return UndefValue::get(VTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ }
+
+ // Extract a value from a vector add operation with a constant zero.
+ Value *Val = 0; Constant *Con = 0;
+ if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
+ if (Con->getAggregateElement(EltNo)->isNullValue())
+ return FindScalarElement(Val, EltNo);
+ }
+
+ // Otherwise, we don't know.
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. We handle a known element # below.
+ if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
+ if (CheapToScalarize(C, false))
+ return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // This instruction only demands the single element from the input vector.
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 0);
+ DemandedMask.setBit(IndexVal);
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+ DemandedMask, UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ }
+
+ if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+ return ReplaceInstUsesWith(EI, Elt);
+
+ // If the this extractelement is directly using a bitcast from a vector of
+ // the same number of elements, see if we can find the source element from
+ // it. In this case, we will end up needing to bitcast the scalars.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+ if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VT->getNumElements() == VectorWidth)
+ if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ return new BitCastInst(Elt, EI.getType());
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+ // Push extractelement into predecessor operation if legal and
+ // profitable to do so
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (I->hasOneUse() &&
+ CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ Value *newEI0 =
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ Value *newEI1 =
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+ }
+ } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return ReplaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+ Worklist.AddValue(EI.getOperand(0));
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ // If this is extracting an element from a shufflevector, figure out where
+ // it came from and extract from the appropriate input element instead.
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
+ Value *Src;
+ unsigned LHSWidth =
+ SVI->getOperand(0)->getType()->getVectorNumElements();
+
+ if (SrcIdx < 0)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ if (SrcIdx < (int)LHSWidth)
+ Src = SVI->getOperand(0);
+ else {
+ SrcIdx -= LHSWidth;
+ Src = SVI->getOperand(1);
+ }
+ Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+ return ExtractElementInst::Create(Src,
+ ConstantInt::get(Int32Ty,
+ SrcIdx, false));
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Canonicalize extractelement(cast) -> cast(extractelement)
+ // bitcasts can change the number of vector elements and they cost nothing
+ if (CI->hasOneUse() && EI.hasOneUse() &&
+ (CI->getOpcode() != Instruction::BitCast)) {
+ Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+ EI.getIndexOperand());
+ return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ SmallVectorImpl<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return true;
+ }
+
+ if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return true;
+ }
+
+ if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ i+NumElts));
+ return true;
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx+NumElts);
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
+ Value *&RHS) {
+ assert(V->getType()->isVectorTy() &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
+ "Invalid shuffle!");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return V;
+ }
+
+ if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+ return V;
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+ExtractedIdx);
+ return V;
+ }
+
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Everything but the extracted element is replaced with the RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertedIdx)
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+i);
+ }
+ return V;
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ // Otherwise, can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // Inserting an undef or into an undefined place, remove this.
+ if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+ ReplaceInstUsesWith(IE, VecOp);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == IE.getType()) {
+ unsigned NumVectorElts = IE.getType()->getNumElements();
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ SmallVector<Constant*, 16> Mask;
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+ }
+ }
+ }
+
+ unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (V != &IE)
+ return ReplaceInstUsesWith(IE, V);
+ return &IE;
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ SmallVector<int, 16> Mask = SVI.getShuffleMask();
+
+ bool MadeChange = false;
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(SVI.getOperand(2)))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return ReplaceInstUsesWith(SVI, V);
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (isa<UndefValue>(LHS) && LHS == RHS) {
+ // shuffle(undef,undef,mask) -> undef.
+ Value* result = (VWidth == LHSWidth)
+ ? LHS : UndefValue::get(SVI.getType());
+ return ReplaceInstUsesWith(SVI, result);
+ }
+
+ // Remap any references to RHS to use LHS.
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
+ if (Mask[i] < 0) {
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ continue;
+ }
+
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+ Mask[i]));
+ }
+ }
+ SVI.setOperand(0, SVI.getOperand(1));
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, ConstantVector::get(Elts));
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ if (VWidth == LHSWidth) {
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID = true, isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+ }
+
+ // If the LHS is a shufflevector itself, see if we can combine it with this
+ // one without producing an unusual shuffle.
+ // Cases that might be simplified:
+ // 1.
+ // x1=shuffle(v1,v2,mask1)
+ // x=shuffle(x1,undef,mask)
+ // ==>
+ // x=shuffle(v1,undef,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
+ // 2.
+ // x1=shuffle(v1,undef,mask1)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == mask1.size()
+ // ==>
+ // x=shuffle(v1,x2,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
+ // 3.
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v2.size() == mask2.size()
+ // ==>
+ // x=shuffle(x1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
+ // 4.
+ // x1=shuffle(v1,undef,mask1)
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == v2.size()
+ // ==>
+ // x=shuffle(v1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
+ //
+ // Here we are really conservative:
+ // we are absolutely afraid of producing a shuffle mask not in the input
+ // program, because the code gen may not be smart enough to turn a merged
+ // shuffle into two specific shuffles: it may produce worse code. As such,
+ // we only merge two shuffles if the result is either a splat or one of the
+ // input shuffle masks. In this case, merging the shuffles just removes
+ // one instruction, which we know is safe. This is good for things like
+ // turning: (splat(splat)) -> splat, or
+ // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
+ ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
+ if (LHSShuffle)
+ if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
+ LHSShuffle = NULL;
+ if (RHSShuffle)
+ if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
+ RHSShuffle = NULL;
+ if (!LHSShuffle && !RHSShuffle)
+ return MadeChange ? &SVI : 0;
+
+ Value* LHSOp0 = NULL;
+ Value* LHSOp1 = NULL;
+ Value* RHSOp0 = NULL;
+ unsigned LHSOp0Width = 0;
+ unsigned RHSOp0Width = 0;
+ if (LHSShuffle) {
+ LHSOp0 = LHSShuffle->getOperand(0);
+ LHSOp1 = LHSShuffle->getOperand(1);
+ LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+ }
+ if (RHSShuffle) {
+ RHSOp0 = RHSShuffle->getOperand(0);
+ RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+ }
+ Value* newLHS = LHS;
+ Value* newRHS = RHS;
+ if (LHSShuffle) {
+ // case 1
+ if (isa<UndefValue>(RHS)) {
+ newLHS = LHSOp0;
+ newRHS = LHSOp1;
+ }
+ // case 2 or 4
+ else if (LHSOp0Width == LHSWidth) {
+ newLHS = LHSOp0;
+ }
+ }
+ // case 3 or 4
+ if (RHSShuffle && RHSOp0Width == LHSWidth) {
+ newRHS = RHSOp0;
+ }
+ // case 4
+ if (LHSOp0 == RHSOp0) {
+ newLHS = LHSOp0;
+ newRHS = NULL;
+ }
+
+ if (newLHS == LHS && newRHS == RHS)
+ return MadeChange ? &SVI : 0;
+
+ SmallVector<int, 16> LHSMask;
+ SmallVector<int, 16> RHSMask;
+ if (newLHS != LHS)
+ LHSMask = LHSShuffle->getShuffleMask();
+ if (RHSShuffle && newRHS != RHS)
+ RHSMask = RHSShuffle->getShuffleMask();
+
+ unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
+ SmallVector<int, 16> newMask;
+ bool isSplat = true;
+ int SplatElt = -1;
+ // Create a new mask for the new ShuffleVectorInst so that the new
+ // ShuffleVectorInst is equivalent to the original one.
+ for (unsigned i = 0; i < VWidth; ++i) {
+ int eltMask;
+ if (Mask[i] < 0) {
+ // This element is an undef value.
+ eltMask = -1;
+ } else if (Mask[i] < (int)LHSWidth) {
+ // This element is from left hand side vector operand.
+ //
+ // If LHS is going to be replaced (case 1, 2, or 4), calculate the
+ // new mask value for the element.
+ if (newLHS != LHS) {
+ eltMask = LHSMask[Mask[i]];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
+ eltMask = -1;
+ } else
+ eltMask = Mask[i];
+ } else {
+ // This element is from right hand side vector operand
+ //
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value. (case 1)
+ if (isa<UndefValue>(RHS))
+ eltMask = -1;
+ // If RHS is going to be replaced (case 3 or 4), calculate the
+ // new mask value for the element.
+ else if (newRHS != RHS) {
+ eltMask = RHSMask[Mask[i]-LHSWidth];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)RHSOp0Width) {
+ assert(isa<UndefValue>(RHSShuffle->getOperand(1))
+ && "should have been check above");
+ eltMask = -1;
+ }
+ } else
+ eltMask = Mask[i]-LHSWidth;
+
+ // If LHS's width is changed, shift the mask value accordingly.
+ // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
+ // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
+ // If newRHS == newLHS, we want to remap any references from newRHS to
+ // newLHS so that we can properly identify splats that may occur due to
+ // obfuscation accross the two vectors.
+ if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
+ eltMask += newLHSWidth;
+ }
+
+ // Check if this could still be a splat.
+ if (eltMask >= 0) {
+ if (SplatElt >= 0 && SplatElt != eltMask)
+ isSplat = false;
+ SplatElt = eltMask;
+ }
+
+ newMask.push_back(eltMask);
+ }
+
+ // If the result mask is equal to one of the original shuffle masks,
+ // or is a splat, do the replacement.
+ if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+ SmallVector<Constant*, 16> Elts;
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+ for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
+ if (newMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
+ } else {
+ Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
+ }
+ }
+ if (newRHS == NULL)
+ newRHS = UndefValue::get(newLHS->getType());
+ return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
+ }
+
+ return MadeChange ? &SVI : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 000000000000..49efce5c4f22
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,106 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
+ SmallVector<Instruction*, 256> Worklist;
+ DenseMap<Instruction*, unsigned> WorklistMap;
+
+ void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
+ InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
+public:
+ InstCombineWorklist() {}
+
+ bool isEmpty() const { return Worklist.empty(); }
+
+ /// Add - Add the specified instruction to the worklist if it isn't already
+ /// in it.
+ void Add(Instruction *I) {
+ if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+ DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ Worklist.push_back(I);
+ }
+ }
+
+ void AddValue(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Add(I);
+ }
+
+ /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+ /// which should only be done when the worklist is empty and when the group
+ /// has no duplicates.
+ void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+ assert(Worklist.empty() && "Worklist must be empty to add initial group");
+ Worklist.reserve(NumEntries+16);
+ WorklistMap.resize(NumEntries);
+ DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ for (unsigned Idx = 0; NumEntries; --NumEntries) {
+ Instruction *I = List[NumEntries-1];
+ WorklistMap.insert(std::make_pair(I, Idx++));
+ Worklist.push_back(I);
+ }
+ }
+
+ // Remove - remove I from the worklist if it exists.
+ void Remove(Instruction *I) {
+ DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ if (It == WorklistMap.end()) return; // Not in worklist.
+
+ // Don't bother moving everything down, just null out the slot.
+ Worklist[It->second] = 0;
+
+ WorklistMap.erase(It);
+ }
+
+ Instruction *RemoveOne() {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ WorklistMap.erase(I);
+ return I;
+ }
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Add(cast<Instruction>(*UI));
+ }
+
+
+ /// Zap - check that the worklist is empty and nuke the backing store for
+ /// the map if it is large.
+ void Zap() {
+ assert(WorklistMap.empty() && "Worklist empty, but map not?");
+
+ // Do an explicit clear, this shrinks the map if needed.
+ WorklistMap.clear();
+ }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 000000000000..c6115e3e91fe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,2508 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+// %Y = add i32 %X, 1
+// %Z = add i32 %Y, 1
+// into:
+// %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+// 1. If a binary operator has a constant operand, it is moved to the RHS
+// 2. Bitwise operators with constant operands are always grouped so that
+// shifts are performed first, then or's, then and's, then xor's.
+// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+// 4. All cmp instructions on boolean values are replaced with logical ops
+// 5. add X, X is represented as (X*2) => (X << 1)
+// 6. Multiplies with a power-of-two constant argument are transformed into
+// shifts.
+// ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc , "Number of reassociations");
+
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstCombine(*unwrap(R));
+}
+
+char InstCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
+}
+
+
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+ return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+}
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
+// Return true, if No Signed Wrap should be maintained for I.
+// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
+// where both B and C should be ConstantInts, results in a constant that does
+// not overflow. This function only handles the Add and Sub opcodes. For
+// all other opcodes, the function conservatively returns false.
+static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ if (!OBO || !OBO->hasNoSignedWrap()) {
+ return false;
+ }
+
+ // We reason about Add and Sub Only.
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (Opcode != Instruction::Add &&
+ Opcode != Instruction::Sub) {
+ return false;
+ }
+
+ ConstantInt *CB = dyn_cast<ConstantInt>(B);
+ ConstantInt *CC = dyn_cast<ConstantInt>(C);
+
+ if (!CB || !CC) {
+ return false;
+ }
+
+ const APInt &BVal = CB->getValue();
+ const APInt &CVal = CC->getValue();
+ bool Overflow = false;
+
+ if (Opcode == Instruction::Add) {
+ BVal.sadd_ov(CVal, Overflow);
+ } else {
+ BVal.ssub_ov(CVal, Overflow);
+ }
+
+ return !Overflow;
+}
+
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+ FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+ if (!FPMO) {
+ I.clearSubclassOptionalData();
+ return;
+ }
+
+ FastMathFlags FMF = I.getFastMathFlags();
+ I.clearSubclassOptionalData();
+ I.setFastMathFlags(FMF);
+}
+
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+// Commutative operators:
+//
+// 1. Order operands such that they are listed from right (least complex) to
+// left (most complex). This puts constants before unary operators before
+// binary operators.
+//
+// Associative operators:
+//
+// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+// Associative and commutative operators:
+//
+// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+// if C1 and C2 are constants.
+//
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ bool Changed = false;
+
+ do {
+ // Order operands such that they are listed from right (least complex) to
+ // left (most complex). This puts constants before unary operators before
+ // binary operators.
+ if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+ getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+ if (I.isAssociative()) {
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+ // It simplifies to V. Form "A op V".
+ I.setOperand(0, A);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ if (MaintainNoSignedWrap(I, B, C) &&
+ (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
+ // Note: this is only valid because SimplifyBinOp doesn't look at
+ // the operands to Op0.
+ I.clearSubclassOptionalData();
+ I.setHasNoSignedWrap(true);
+ } else {
+ ClearSubclassDataAfterReassociation(I);
+ }
+
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+ // It simplifies to V. Form "V op C".
+ I.setOperand(0, V);
+ I.setOperand(1, C);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+ }
+
+ if (I.isAssociative() && I.isCommutative()) {
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "V op B".
+ I.setOperand(0, V);
+ I.setOperand(1, B);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "B op V".
+ I.setOperand(0, B);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+ // if C1 and C2 are constants.
+ if (Op0 && Op1 &&
+ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+ isa<Constant>(Op0->getOperand(1)) &&
+ isa<Constant>(Op1->getOperand(1)) &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *A = Op0->getOperand(0);
+ Constant *C1 = cast<Constant>(Op0->getOperand(1));
+ Value *B = Op1->getOperand(0);
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+ BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
+ InsertNewInstWith(New, I);
+ New->takeName(Op1);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ ClearSubclassDataAfterReassociation(I);
+
+ Changed = true;
+ continue;
+ }
+ }
+
+ // No further simplifications.
+ return Changed;
+ } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ switch (LOp) {
+ default:
+ return false;
+
+ case Instruction::And:
+ // And distributes over Or and Xor.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+
+ case Instruction::Mul:
+ // Multiplication distributes over addition and subtraction.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ }
+
+ case Instruction::Or:
+ // Or distributes over And.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::And:
+ return true;
+ }
+ }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ if (Instruction::isCommutative(ROp))
+ return LeftDistributesOverRight(ROp, LOp);
+ // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+ // but this requires knowing that the addition does not overflow and other
+ // such subtleties.
+ return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win). Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+ // Factorization.
+ if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, A, V);
+ V->takeName(&I);
+ return V;
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, V, B);
+ V->takeName(&I);
+ return V;
+ }
+ }
+ }
+
+ // Expansion.
+ if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+ // The instruction has the form "(A op' B) op C". See if expanding it out
+ // to "(A op C) op' (B op C)" results in simplifications.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) ||
+ (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+ return Op0;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+ }
+
+ if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+ // The instruction has the form "A op (B op' C)". See if expanding it out
+ // to "(A op B) op' (A op C)" results in simplifications.
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) ||
+ (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+ return Op1;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+ }
+
+ return 0;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+ if (BinaryOperator::isNeg(V))
+ return BinaryOperator::getNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantExpr::getNeg(C);
+
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
+ if (C->getType()->getElementType()->isIntegerTy())
+ return ConstantExpr::getNeg(C);
+
+ return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+ if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
+ return BinaryOperator::getFNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+ return ConstantExpr::getFNeg(C);
+
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
+ if (C->getType()->getElementType()->isFloatingPointTy())
+ return ConstantExpr::getFNeg(C);
+
+ return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+ InstCombiner *IC) {
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+ return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+ }
+
+ // Figure out if the constant is the left or the right argument.
+ bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+ Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+ if (Constant *SOC = dyn_cast<Constant>(SO)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+ return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+ }
+
+ Value *Op0 = SO, *Op1 = ConstOperand;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+ SO->getName()+".op");
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments. This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+ // Don't modify shared select instructions
+ if (!SI->hasOneUse()) return 0;
+ Value *TV = SI->getOperand(1);
+ Value *FV = SI->getOperand(2);
+
+ if (isa<Constant>(TV) || isa<Constant>(FV)) {
+ // Bool selects with constant operands can be folded to logical ops.
+ if (SI->getType()->isIntegerTy(1)) return 0;
+
+ // If it's a bitcast involving vectors, make sure it has the same number of
+ // elements on both sides.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+ VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+ // Verify that either both or neither are vectors.
+ if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ // If vectors, verify that they have the same number of elements.
+ if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ return 0;
+ }
+
+ Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+ Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+ return SelectInst::Create(SI->getCondition(),
+ SelectTrueVal, SelectFalseVal);
+ }
+ return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+ PHINode *PN = cast<PHINode>(I.getOperand(0));
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0)
+ return 0;
+
+ // We normally only transform phis with a single use. However, if a PHI has
+ // multiple uses and they are all the same operation, we can fold *all* of the
+ // uses into the PHI.
+ if (!PN->hasOneUse()) {
+ // Walk the use list for the instruction, comparing them to I.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User != &I && !I.isIdenticalTo(User))
+ return 0;
+ }
+ // Otherwise, we can replace *all* users with the new PHI we form.
+ }
+
+ // Check to see if all of the operands of the PHI are simple constants
+ // (constantint/constantfp/undef). If there is one non-constant value,
+ // remember the BB it is in. If there is more than one or if *it* is a PHI,
+ // bail out. We don't do arbitrary constant expressions here because moving
+ // their computation can be expensive without a cost model.
+ BasicBlock *NonConstBB = 0;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ continue;
+
+ if (isa<PHINode>(InVal)) return 0; // Itself a phi.
+ if (NonConstBB) return 0; // More than one non-const value.
+
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the InVal is an invoke at the end of the pred block, then we can't
+ // insert a computation after it without breaking the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == NonConstBB)
+ return 0;
+
+ // If the incoming non-constant value is in I's block, we will remove one
+ // instruction, but insert another equivalent one, leading to infinite
+ // instcombine.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
+
+ // If there is exactly one non-constant value, we can insert a copy of the
+ // operation in that block. However, if this is a critical edge, we would be
+ // inserting the computation one some other paths (e.g. inside a loop). Only
+ // do this if the pred block is unconditionally branching into the phi block.
+ if (NonConstBB != 0) {
+ BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+ if (!BI || !BI->isUnconditional()) return 0;
+ }
+
+ // Okay, we can do the transformation: create the new PHI node.
+ PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
+ InsertNewInstBefore(NewPN, *PN);
+ NewPN->takeName(PN);
+
+ // If we are going to have to insert a new computation, do so right before the
+ // predecessors terminator.
+ if (NonConstBB)
+ Builder->SetInsertPoint(NonConstBB->getTerminator());
+
+ // Next, add all of the operands to the PHI.
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+ // We only currently try to fold the condition of a select when it is a phi,
+ // not the true/false values.
+ Value *TrueV = SI->getTrueValue();
+ Value *FalseV = SI->getFalseValue();
+ BasicBlock *PhiTransBB = PN->getParent();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ BasicBlock *ThisBB = PN->getIncomingBlock(i);
+ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+ else
+ InV = Builder->CreateSelect(PN->getIncomingValue(i),
+ TrueVInPred, FalseVInPred, "phitmp");
+ NewPN->addIncoming(InV, ThisBB);
+ }
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else if (isa<ICmpInst>(CI))
+ InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ else
+ InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else if (I.getNumOperands() == 2) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ else
+ InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else {
+ CastInst *CI = cast<CastInst>(&I);
+ Type *RetTy = CI->getType();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+ else
+ InV = Builder->CreateCast(CI->getOpcode(),
+ PN->getIncomingValue(i), I.getType(), "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ }
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (User == &I) continue;
+ ReplaceInstUsesWith(*User, NewPN);
+ EraseInstFromFunction(*User);
+ }
+ return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset. If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ if (!TD) return 0;
+ if (!Ty->isSized()) return 0;
+
+ // Start with the index over the outer type. Note that the type size
+ // might be zero (even if the offset isn't zero) if the indexed type
+ // is something like [0 x {int, int}]
+ Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ int64_t FirstIdx = 0;
+ if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+ FirstIdx = Offset/TySize;
+ Offset -= FirstIdx*TySize;
+
+ // Handle hosts where % returns negative instead of values [0..TySize).
+ if (Offset < 0) {
+ --FirstIdx;
+ Offset += TySize;
+ assert(Offset >= 0);
+ }
+ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+ }
+
+ NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+ // Index into the types. If we fail, set OrigBase to null.
+ while (Offset) {
+ // Indexing into tail padding between struct/array elements.
+ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+ return 0;
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ assert(Offset < (int64_t)SL->getSizeInBytes() &&
+ "Offset must stay within the indexed type");
+
+ unsigned Elt = SL->getElementContainingOffset(Offset);
+ NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ Elt));
+
+ Offset -= SL->getElementOffset(Elt);
+ Ty = STy->getElementType(Elt);
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+ assert(EltSize && "Cannot index into a zero-sized array");
+ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+ Offset %= EltSize;
+ Ty = AT->getElementType();
+ } else {
+ // Otherwise, we can't index into the middle of this atomic type, bail.
+ return 0;
+ }
+ }
+
+ return Ty;
+}
+
+static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
+ // If this GEP has only 0 indices, it is the same pointer as
+ // Src. If Src is not a trivial GEP too, don't combine
+ // the indices.
+ if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
+ !Src.hasOneUse())
+ return false;
+ return true;
+}
+
+/// Descale - Return a value X such that Val = X * Scale, or null if none. If
+/// the multiplication is known not to overflow then NoSignedWrap is set.
+Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
+ assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
+ assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
+ Scale.getBitWidth() && "Scale not compatible with value!");
+
+ // If Val is zero or Scale is one then Val = Val * Scale.
+ if (match(Val, m_Zero()) || Scale == 1) {
+ NoSignedWrap = true;
+ return Val;
+ }
+
+ // If Scale is zero then it does not divide Val.
+ if (Scale.isMinValue())
+ return 0;
+
+ // Look through chains of multiplications, searching for a constant that is
+ // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
+ // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
+ // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
+ // down from Val:
+ //
+ // Val = M1 * X || Analysis starts here and works down
+ // M1 = M2 * Y || Doesn't descend into terms with more
+ // M2 = Z * 4 \/ than one use
+ //
+ // Then to modify a term at the bottom:
+ //
+ // Val = M1 * X
+ // M1 = Z * Y || Replaced M2 with Z
+ //
+ // Then to work back up correcting nsw flags.
+
+ // Op - the term we are currently analyzing. Starts at Val then drills down.
+ // Replaced with its descaled value before exiting from the drill down loop.
+ Value *Op = Val;
+
+ // Parent - initially null, but after drilling down notes where Op came from.
+ // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
+ // 0'th operand of Val.
+ std::pair<Instruction*, unsigned> Parent;
+
+ // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
+ // levels that doesn't overflow.
+ bool RequireNoSignedWrap = false;
+
+ // logScale - log base 2 of the scale. Negative if not a power of 2.
+ int32_t logScale = Scale.exactLogBase2();
+
+ for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // If Op is a constant divisible by Scale then descale to the quotient.
+ APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
+ APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
+ if (!Remainder.isMinValue())
+ // Not divisible by Scale.
+ return 0;
+ // Replace with the quotient in the parent.
+ Op = ConstantInt::get(CI->getType(), Quotient);
+ NoSignedWrap = true;
+ break;
+ }
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
+
+ if (BO->getOpcode() == Instruction::Mul) {
+ // Multiplication.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return 0;
+
+ // There are three cases for multiplication: multiplication by exactly
+ // the scale, multiplication by a constant different to the scale, and
+ // multiplication by something else.
+ Value *LHS = BO->getOperand(0);
+ Value *RHS = BO->getOperand(1);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Multiplication by a constant.
+ if (CI->getValue() == Scale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+
+ // Otherwise drill down into the constant.
+ if (!Op->hasOneUse())
+ return 0;
+
+ Parent = std::make_pair(BO, 1);
+ continue;
+ }
+
+ // Multiplication by something else. Drill down into the left-hand side
+ // since that's where the reassociate pass puts the good stuff.
+ if (!Op->hasOneUse())
+ return 0;
+
+ Parent = std::make_pair(BO, 0);
+ continue;
+ }
+
+ if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BO->getOperand(1))) {
+ // Multiplication by a power of 2.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return 0;
+
+ Value *LHS = BO->getOperand(0);
+ int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
+ getLimitedValue(Scale.getBitWidth());
+ // Op = LHS << Amt.
+
+ if (Amt == logScale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+ if (Amt < logScale || !Op->hasOneUse())
+ return 0;
+
+ // Multiplication by more than the scale. Reduce the multiplying amount
+ // by the scale in the parent.
+ Parent = std::make_pair(BO, 1);
+ Op = ConstantInt::get(BO->getType(), Amt - logScale);
+ break;
+ }
+ }
+
+ if (!Op->hasOneUse())
+ return 0;
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
+ if (Cast->getOpcode() == Instruction::SExt) {
+ // Op is sign-extended from a smaller type, descale in the smaller type.
+ unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ APInt SmallScale = Scale.trunc(SmallSize);
+ // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
+ // descale Op as (sext Y) * Scale. In order to have
+ // sext (Y * SmallScale) = (sext Y) * Scale
+ // some conditions need to hold however: SmallScale must sign-extend to
+ // Scale and the multiplication Y * SmallScale should not overflow.
+ if (SmallScale.sext(Scale.getBitWidth()) != Scale)
+ // SmallScale does not sign-extend to Scale.
+ return 0;
+ assert(SmallScale.exactLogBase2() == logScale);
+ // Require that Y * SmallScale must not overflow.
+ RequireNoSignedWrap = true;
+
+ // Drill down through the cast.
+ Parent = std::make_pair(Cast, 0);
+ Scale = SmallScale;
+ continue;
+ }
+
+ if (Cast->getOpcode() == Instruction::Trunc) {
+ // Op is truncated from a larger type, descale in the larger type.
+ // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
+ // trunc (Y * sext Scale) = (trunc Y) * Scale
+ // always holds. However (trunc Y) * Scale may overflow even if
+ // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
+ // from this point up in the expression (see later).
+ if (RequireNoSignedWrap)
+ return 0;
+
+ // Drill down through the cast.
+ unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ Parent = std::make_pair(Cast, 0);
+ Scale = Scale.sext(LargeSize);
+ if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
+ logScale = -1;
+ assert(Scale.exactLogBase2() == logScale);
+ continue;
+ }
+ }
+
+ // Unsupported expression, bail out.
+ return 0;
+ }
+
+ // We know that we can successfully descale, so from here on we can safely
+ // modify the IR. Op holds the descaled version of the deepest term in the
+ // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
+ // not to overflow.
+
+ if (!Parent.first)
+ // The expression only had one term.
+ return Op;
+
+ // Rewrite the parent using the descaled version of its operand.
+ assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
+ assert(Op != Parent.first->getOperand(Parent.second) &&
+ "Descaling was a no-op?");
+ Parent.first->setOperand(Parent.second, Op);
+ Worklist.Add(Parent.first);
+
+ // Now work back up the expression correcting nsw flags. The logic is based
+ // on the following observation: if X * Y is known not to overflow as a signed
+ // multiplication, and Y is replaced by a value Z with smaller absolute value,
+ // then X * Z will not overflow as a signed multiplication either. As we work
+ // our way up, having NoSignedWrap 'true' means that the descaled value at the
+ // current level has strictly smaller absolute value than the original.
+ Instruction *Ancestor = Parent.first;
+ do {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
+ // If the multiplication wasn't nsw then we can't say anything about the
+ // value of the descaled multiplication, and we have to clear nsw flags
+ // from this point on up.
+ bool OpNoSignedWrap = BO->hasNoSignedWrap();
+ NoSignedWrap &= OpNoSignedWrap;
+ if (NoSignedWrap != OpNoSignedWrap) {
+ BO->setHasNoSignedWrap(NoSignedWrap);
+ Worklist.Add(Ancestor);
+ }
+ } else if (Ancestor->getOpcode() == Instruction::Trunc) {
+ // The fact that the descaled input to the trunc has smaller absolute
+ // value than the original input doesn't tell us anything useful about
+ // the absolute values of the truncations.
+ NoSignedWrap = false;
+ }
+ assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
+ "Failed to keep proper track of nsw flags while drilling down?");
+
+ if (Ancestor == Val)
+ // Got to the top, all done!
+ return Val;
+
+ // Move up one level in the expression.
+ assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
+ Ancestor = Ancestor->use_back();
+ } while (1);
+}
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+ if (Value *V = SimplifyGEPInst(Ops, TD))
+ return ReplaceInstUsesWith(GEP, V);
+
+ Value *PtrOp = GEP.getOperand(0);
+
+ // Eliminate unneeded casts for indices, and replace indices which displace
+ // by multiples of a zero size type with zero.
+ if (TD) {
+ bool MadeChange = false;
+ Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+ I != E; ++I, ++GTI) {
+ // Skip indices into struct types.
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy) continue;
+
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
+ MadeChange = true;
+ }
+
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
+ }
+ }
+ if (MadeChange) return &GEP;
+ }
+
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+ if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
+ return 0;
+
+ // Note that if our source is a gep chain itself then we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ if (GEPOperator *SrcGEP =
+ dyn_cast<GEPOperator>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
+ return 0; // Wait until our source is folded to completion.
+
+ SmallVector<Value*, 8> Indices;
+
+ // Find out whether the last index in the source GEP is a sequential idx.
+ bool EndsWithSequential = false;
+ for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+ I != E; ++I)
+ EndsWithSequential = !(*I)->isStructTy();
+
+ // Can we combine the two pointer arithmetics offsets?
+ if (EndsWithSequential) {
+ // Replace: gep (gep %P, long B), long A, ...
+ // With: T = long A+B; gep %P, T, ...
+ //
+ Value *Sum;
+ Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+ Value *GO1 = GEP.getOperand(1);
+ if (SO1 == Constant::getNullValue(SO1->getType())) {
+ Sum = GO1;
+ } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+ Sum = SO1;
+ } else {
+ // If they aren't the same type, then the input hasn't been processed
+ // by the loop above yet (which canonicalizes sequential index types to
+ // intptr_t). Just avoid transforming this until the input has been
+ // normalized.
+ if (SO1->getType() != GO1->getType())
+ return 0;
+ Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+ }
+
+ // Update the GEP in place if possible.
+ if (Src->getNumOperands() == 2) {
+ GEP.setOperand(0, Src->getOperand(0));
+ GEP.setOperand(1, Sum);
+ return &GEP;
+ }
+ Indices.append(Src->op_begin()+1, Src->op_end()-1);
+ Indices.push_back(Sum);
+ Indices.append(GEP.op_begin()+2, GEP.op_end());
+ } else if (isa<Constant>(*GEP.idx_begin()) &&
+ cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+ Src->getNumOperands() != 1) {
+ // Otherwise we can do the fold if the first index of the GEP is a zero
+ Indices.append(Src->op_begin()+1, Src->op_end());
+ Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+ }
+
+ if (!Indices.empty())
+ return (GEP.isInBounds() && Src->isInBounds()) ?
+ GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
+ GEP.getName()) :
+ GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+ }
+
+ // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+ Value *StrippedPtr = PtrOp->stripPointerCasts();
+ PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+
+ // We do not handle pointer-vector geps here.
+ if (!StrippedPtrTy)
+ return 0;
+
+ if (StrippedPtr != PtrOp &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+ bool HasZeroPointerIndex = false;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isZero();
+
+ // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+ // into : GEP [10 x i8]* X, i32 0, ...
+ //
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ if (HasZeroPointerIndex) {
+ PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+ if (ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+ GetElementPtrInst *Res =
+ GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+ Res->setIsInBounds(GEP.isInBounds());
+ return Res;
+ }
+
+ if (ArrayType *XATy =
+ dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ GEP.setOperand(0, StrippedPtr);
+ return &GEP;
+ }
+ }
+ }
+ } else if (GEP.getNumOperands() == 2) {
+ // Transform things like:
+ // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+ // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ Type *SrcElTy = StrippedPtrTy->getElementType();
+ Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ if (TD && SrcElTy->isArrayTy() &&
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(ResElTy)) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Idx[1] = GEP.getOperand(1);
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ // V and GEP are both pointer types --> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+
+ // Transform things like:
+ // %V = mul i64 %N, 4
+ // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
+ // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
+ if (TD && ResElTy->isSized() && SrcElTy->isSized()) {
+ // Check that changing the type amounts to dividing the index by a scale
+ // factor.
+ uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy);
+ if (ResSize && SrcSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = SrcSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *NewGEP = GEP.isInBounds() && NSW ?
+ Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+
+ // Similarly, transform things like:
+ // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+ // (where tmp = 8*tmp2) into:
+ // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+ if (TD && ResElTy->isSized() && SrcElTy->isSized() &&
+ SrcElTy->isArrayTy()) {
+ // Check that changing to the array element type amounts to dividing the
+ // index by a scale factor.
+ uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ if (ResSize && ArrayEltSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = ArrayEltSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *Off[2];
+ Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Off[1] = NewIdx;
+ Value *NewGEP = GEP.isInBounds() && NSW ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+ }
+ }
+
+ /// See if we can simplify:
+ /// X = bitcast A* to B*
+ /// Y = gep X, <...constant indices...>
+ /// into a gep of the original struct. This is important for SROA and alias
+ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
+ if (TD &&
+ !isa<BitCastInst>(BCI->getOperand(0)) &&
+ GEP.accumulateConstantOffset(*TD, Offset) &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+ // If this GEP instruction doesn't move the pointer, just replace the GEP
+ // with a bitcast of the real input to the dest type.
+ if (!Offset) {
+ // If the bitcast is of an allocation, and the allocation will be
+ // converted to match the type of the cast, don't touch this.
+ if (isa<AllocaInst>(BCI->getOperand(0)) ||
+ isAllocationFn(BCI->getOperand(0), TLI)) {
+ // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+ if (Instruction *I = visitBitCast(*BCI)) {
+ if (I != BCI) {
+ I->takeName(BCI);
+ BCI->getParent()->getInstList().insert(BCI, I);
+ ReplaceInstUsesWith(*BCI, I);
+ }
+ return &GEP;
+ }
+ }
+ return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ }
+
+ // Otherwise, if the offset is non-zero, we need to find out if there is a
+ // field at Offset in 'A's type. If so, we can pull the cast through the
+ // GEP.
+ SmallVector<Value*, 8> NewIndices;
+ Type *InTy =
+ cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+ if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+ Value *NGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
+ Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+
+ if (NGEP->getType() == GEP.getType())
+ return ReplaceInstUsesWith(GEP, NGEP);
+ NGEP->takeName(&GEP);
+ return new BitCastInst(NGEP, GEP.getType());
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+static bool
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Instruction*, 4> Worklist;
+ Worklist.push_back(AI);
+
+ do {
+ Instruction *PI = Worklist.pop_back_val();
+ for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE;
+ ++UI) {
+ Instruction *I = cast<Instruction>(*UI);
+ switch (I->getOpcode()) {
+ default:
+ // Give up the moment we see something we can't handle.
+ return false;
+
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ Users.push_back(I);
+ Worklist.push_back(I);
+ continue;
+
+ case Instruction::ICmp: {
+ ICmpInst *ICI = cast<ICmpInst>(I);
+ // We can fold eq/ne comparisons with null to false/true, respectively.
+ if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ Users.push_back(I);
+ continue;
+ }
+
+ case Instruction::Call:
+ // Ignore no-op and store intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset: {
+ MemIntrinsic *MI = cast<MemIntrinsic>(II);
+ if (MI->isVolatile() || MI->getRawDest() != PI)
+ return false;
+ }
+ // fall through
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ Users.push_back(I);
+ continue;
+ }
+ }
+
+ if (isFreeCall(I, TLI)) {
+ Users.push_back(I);
+ continue;
+ }
+ return false;
+
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ if (SI->isVolatile() || SI->getPointerOperand() != PI)
+ return false;
+ Users.push_back(I);
+ continue;
+ }
+ }
+ llvm_unreachable("missing a return?");
+ }
+ } while (!Worklist.empty());
+ return true;
+}
+
+Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
+ // If we have a malloc call which is only used in any amount of comparisons
+ // to null and free calls, delete the calls and replace the comparisons with
+ // true or false as appropriate.
+ SmallVector<WeakVH, 64> Users;
+ if (isAllocSiteRemovable(&MI, Users, TLI)) {
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ Instruction *I = cast_or_null<Instruction>(&*Users[i]);
+ if (!I) continue;
+
+ if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
+ ReplaceInstUsesWith(*C,
+ ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ ReplaceInstUsesWith(*I, UndefValue::get(I->getType()));
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::objectsize) {
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
+ uint64_t DontKnow = CI->isZero() ? -1ULL : 0;
+ ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow));
+ }
+ }
+ EraseInstFromFunction(*I);
+ }
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
+ // Replace invoke with a NOP intrinsic to maintain the original CFG
+ Module *M = II->getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
+ InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
+ ArrayRef<Value *>(), "", II->getParent());
+ }
+ return EraseInstFromFunction(MI);
+ }
+ return 0;
+}
+
+/// \brief Move the call to free before a NULL test.
+///
+/// Check if this free is accessed after its argument has been test
+/// against NULL (property 0).
+/// If yes, it is legal to move this call in its predecessor block.
+///
+/// The move is performed only if the block containing the call to free
+/// will be removed, i.e.:
+/// 1. it has only one predecessor P, and P has two successors
+/// 2. it contains the call and an unconditional branch
+/// 3. its successor is the same as its predecessor's successor
+///
+/// The profitability is out-of concern here and this function should
+/// be called only if the caller knows this transformation would be
+/// profitable (e.g., for code size).
+static Instruction *
+tryToMoveFreeBeforeNullTest(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+ BasicBlock *FreeInstrBB = FI.getParent();
+ BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
+
+ // Validate part of constraint #1: Only one predecessor
+ // FIXME: We can extend the number of predecessor, but in that case, we
+ // would duplicate the call to free in each predecessor and it may
+ // not be profitable even for code size.
+ if (!PredBB)
+ return 0;
+
+ // Validate constraint #2: Does this block contains only the call to
+ // free and an unconditional branch?
+ // FIXME: We could check if we can speculate everything in the
+ // predecessor block
+ if (FreeInstrBB->size() != 2)
+ return 0;
+ BasicBlock *SuccBB;
+ if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+ return 0;
+
+ // Validate the rest of constraint #1 by matching on the pred branch.
+ TerminatorInst *TI = PredBB->getTerminator();
+ BasicBlock *TrueBB, *FalseBB;
+ ICmpInst::Predicate Pred;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+ return 0;
+ if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+ return 0;
+
+ // Validate constraint #3: Ensure the null case just falls through.
+ if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
+ return 0;
+ assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
+ "Broken CFG: missing edge from predecessor to successor");
+
+ FI.moveBefore(TI);
+ return &FI;
+}
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+
+ // free undef -> unreachable.
+ if (isa<UndefValue>(Op)) {
+ // Insert a new store to null because we cannot modify the CFG here.
+ Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
+ UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ return EraseInstFromFunction(FI);
+ }
+
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return EraseInstFromFunction(FI);
+
+ // If we optimize for code size, try to move the call to free before the null
+ // test so that simplify cfg can remove the empty block and dead code
+ // elimination the branch. I.e., helps to turn something like:
+ // if (foo) free(foo);
+ // into
+ // free(foo);
+ if (MinimizeSize)
+ if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+ return I;
+
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ // Change br (not X), label True, label False to: br X, label False, True
+ Value *X = 0;
+ BasicBlock *TrueDest;
+ BasicBlock *FalseDest;
+ if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ !isa<Constant>(X)) {
+ // Swap Destinations and condition...
+ BI.setCondition(X);
+ BI.swapSuccessors();
+ return &BI;
+ }
+
+ // Cannonicalize fcmp_one -> fcmp_oeq
+ FCmpInst::Predicate FPred; Value *Y;
+ if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+ FPred == FCmpInst::FCMP_OGE) {
+ FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+ Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+
+ // Swap Destinations and condition.
+ BI.swapSuccessors();
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ // Cannonicalize icmp_ne -> icmp_eq
+ ICmpInst::Predicate IPred;
+ if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
+ IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+ IPred == ICmpInst::ICMP_SGE) {
+ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+ Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+ // Swap Destinations and condition.
+ BI.swapSuccessors();
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+ Value *Cond = SI.getCondition();
+ if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+ if (I->getOpcode() == Instruction::Add)
+ if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+ // Skip the first item since that's the default case.
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ ConstantInt* CaseVal = i.getCaseValue();
+ Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
+ AddRHS);
+ assert(isa<ConstantInt>(NewCaseVal) &&
+ "Result of expression should be constant");
+ i.setValue(cast<ConstantInt>(NewCaseVal));
+ }
+ SI.setCondition(I->getOperand(0));
+ Worklist.Add(I);
+ return &SI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) {
+ if (EV.getNumIndices() == 0)
+ return ReplaceInstUsesWith(EV, C2);
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
+ }
+ return 0; // Can't handle other constants
+ }
+
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.getIndices());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+ EV.getIndices());
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ makeArrayRef(insi, inse));
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ makeArrayRef(exti, exte));
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value.
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+ return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+ ConstantExpr::getNot(CI));
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+ // If the (non-volatile) load only has one use, we can rewrite this to a
+ // load from a GEP. This reduces the size of the load.
+ // FIXME: If a load is used only by extractvalue instructions then this
+ // could be done regardless of having multiple uses.
+ if (L->isSimple() && L->hasOneUse()) {
+ // extractvalue has integer indices, getelementptr has Value*s. Convert.
+ SmallVector<Value*, 4> Indices;
+ // Prefix an i32 0 since we need the first element.
+ Indices.push_back(Builder->getInt32(0));
+ for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+ I != E; ++I)
+ Indices.push_back(Builder->getInt32(*I));
+
+ // We need to insert these at the location of the old load, not at that of
+ // the extractvalue.
+ Builder->SetInsertPoint(L->getParent(), L);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+ // Returning the load directly will cause the main loop to insert it in
+ // the wrong spot, so use ReplaceInstUsesWith().
+ return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ }
+ // We could simplify extracts from other values. Note that nested extracts may
+ // already be simplified implicitly by the above: extract (extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate. Similarly for extracts from single-use
+ // loads: extract (extract (load)) will be translated to extract (load (gep))
+ // and if again single-use then via load (gep (gep)) to load (gep).
+ // However, double extracts from e.g. function arguments or return values
+ // aren't handled yet.
+ return 0;
+}
+
+enum Personality_Type {
+ Unknown_Personality,
+ GNU_Ada_Personality,
+ GNU_CXX_Personality,
+ GNU_ObjC_Personality
+};
+
+/// RecognizePersonality - See if the given exception handling personality
+/// function is one that we understand. If so, return a description of it;
+/// otherwise return Unknown_Personality.
+static Personality_Type RecognizePersonality(Value *Pers) {
+ Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+ if (!F)
+ return Unknown_Personality;
+ return StringSwitch<Personality_Type>(F->getName())
+ .Case("__gnat_eh_personality", GNU_Ada_Personality)
+ .Case("__gxx_personality_v0", GNU_CXX_Personality)
+ .Case("__objc_personality_v0", GNU_ObjC_Personality)
+ .Default(Unknown_Personality);
+}
+
+/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+ switch (Personality) {
+ case Unknown_Personality:
+ return false;
+ case GNU_Ada_Personality:
+ // While __gnat_all_others_value will match any Ada exception, it doesn't
+ // match foreign exceptions (or didn't, before gcc-4.7).
+ return false;
+ case GNU_CXX_Personality:
+ case GNU_ObjC_Personality:
+ return TypeInfo->isNullValue();
+ }
+ llvm_unreachable("Unknown personality!");
+}
+
+static bool shorter_filter(const Value *LHS, const Value *RHS) {
+ return
+ cast<ArrayType>(LHS->getType())->getNumElements()
+ <
+ cast<ArrayType>(RHS->getType())->getNumElements();
+}
+
+Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
+ // The logic here should be correct for any real-world personality function.
+ // However if that turns out not to be true, the offending logic can always
+ // be conditioned on the personality function, like the catch-all logic is.
+ Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+
+ // Simplify the list of clauses, eg by removing repeated catch clauses
+ // (these are often created by inlining).
+ bool MakeNewInstruction = false; // If true, recreate using the following:
+ SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction;
+ bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
+
+ SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
+ for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
+ bool isLastClause = i + 1 == e;
+ if (LI.isCatch(i)) {
+ // A catch clause.
+ Value *CatchClause = LI.getClause(i);
+ Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts());
+
+ // If we already saw this clause, there is no point in having a second
+ // copy of it.
+ if (AlreadyCaught.insert(TypeInfo)) {
+ // This catch clause was not already seen.
+ NewClauses.push_back(CatchClause);
+ } else {
+ // Repeated catch clause - drop the redundant copy.
+ MakeNewInstruction = true;
+ }
+
+ // If this is a catch-all then there is no point in keeping any following
+ // clauses or marking the landingpad as having a cleanup.
+ if (isCatchAll(Personality, TypeInfo)) {
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+ } else {
+ // A filter clause. If any of the filter elements were already caught
+ // then they can be dropped from the filter. It is tempting to try to
+ // exploit the filter further by saying that any typeinfo that does not
+ // occur in the filter can't be caught later (and thus can be dropped).
+ // However this would be wrong, since typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some
+ // class derived from it).
+ assert(LI.isFilter(i) && "Unsupported landingpad clause!");
+ Value *FilterClause = LI.getClause(i);
+ ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
+ unsigned NumTypeInfos = FilterType->getNumElements();
+
+ // An empty filter catches everything, so there is no point in keeping any
+ // following clauses or marking the landingpad as having a cleanup. By
+ // dealing with this case here the following code is made a bit simpler.
+ if (!NumTypeInfos) {
+ NewClauses.push_back(FilterClause);
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+
+ bool MakeNewFilter = false; // If true, make a new filter.
+ SmallVector<Constant *, 16> NewFilterElts; // New elements.
+ if (isa<ConstantAggregateZero>(FilterClause)) {
+ // Not an empty filter - it contains at least one null typeinfo.
+ assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
+ Constant *TypeInfo =
+ Constant::getNullValue(FilterType->getElementType());
+ // If this typeinfo is a catch-all then the filter can never match.
+ if (isCatchAll(Personality, TypeInfo)) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // There is no point in having multiple copies of this typeinfo, so
+ // discard all but the first copy if there is more than one.
+ NewFilterElts.push_back(TypeInfo);
+ if (NumTypeInfos > 1)
+ MakeNewFilter = true;
+ } else {
+ ConstantArray *Filter = cast<ConstantArray>(FilterClause);
+ SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
+ NewFilterElts.reserve(NumTypeInfos);
+
+ // Remove any filter elements that were already caught or that already
+ // occurred in the filter. While there, see if any of the elements are
+ // catch-alls. If so, the filter can be discarded.
+ bool SawCatchAll = false;
+ for (unsigned j = 0; j != NumTypeInfos; ++j) {
+ Value *Elt = Filter->getOperand(j);
+ Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts());
+ if (isCatchAll(Personality, TypeInfo)) {
+ // This element is a catch-all. Bail out, noting this fact.
+ SawCatchAll = true;
+ break;
+ }
+ if (AlreadyCaught.count(TypeInfo))
+ // Already caught by an earlier clause, so having it in the filter
+ // is pointless.
+ continue;
+ // There is no point in having multiple copies of the same typeinfo in
+ // a filter, so only add it if we didn't already.
+ if (SeenInFilter.insert(TypeInfo))
+ NewFilterElts.push_back(cast<Constant>(Elt));
+ }
+ // A filter containing a catch-all cannot match anything by definition.
+ if (SawCatchAll) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // If we dropped something from the filter, make a new one.
+ if (NewFilterElts.size() < NumTypeInfos)
+ MakeNewFilter = true;
+ }
+ if (MakeNewFilter) {
+ FilterType = ArrayType::get(FilterType->getElementType(),
+ NewFilterElts.size());
+ FilterClause = ConstantArray::get(FilterType, NewFilterElts);
+ MakeNewInstruction = true;
+ }
+
+ NewClauses.push_back(FilterClause);
+
+ // If the new filter is empty then it will catch everything so there is
+ // no point in keeping any following clauses or marking the landingpad
+ // as having a cleanup. The case of the original filter being empty was
+ // already handled above.
+ if (MakeNewFilter && !NewFilterElts.size()) {
+ assert(MakeNewInstruction && "New filter but not a new instruction!");
+ CleanupFlag = false;
+ break;
+ }
+ }
+ }
+
+ // If several filters occur in a row then reorder them so that the shortest
+ // filters come first (those with the smallest number of elements). This is
+ // advantageous because shorter filters are more likely to match, speeding up
+ // unwinding, but mostly because it increases the effectiveness of the other
+ // filter optimizations below.
+ for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
+ unsigned j;
+ // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
+ for (j = i; j != e; ++j)
+ if (!isa<ArrayType>(NewClauses[j]->getType()))
+ break;
+
+ // Check whether the filters are already sorted by length. We need to know
+ // if sorting them is actually going to do anything so that we only make a
+ // new landingpad instruction if it does.
+ for (unsigned k = i; k + 1 < j; ++k)
+ if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
+ // Not sorted, so sort the filters now. Doing an unstable sort would be
+ // correct too but reordering filters pointlessly might confuse users.
+ std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
+ shorter_filter);
+ MakeNewInstruction = true;
+ break;
+ }
+
+ // Look for the next batch of filters.
+ i = j + 1;
+ }
+
+ // If typeinfos matched if and only if equal, then the elements of a filter L
+ // that occurs later than a filter F could be replaced by the intersection of
+ // the elements of F and L. In reality two typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some class
+ // derived from it) so it would be wrong to perform this transform in general.
+ // However the transform is correct and useful if F is a subset of L. In that
+ // case L can be replaced by F, and thus removed altogether since repeating a
+ // filter is pointless. So here we look at all pairs of filters F and L where
+ // L follows F in the list of clauses, and remove L if every element of F is
+ // an element of L. This can occur when inlining C++ functions with exception
+ // specifications.
+ for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
+ // Examine each filter in turn.
+ Value *Filter = NewClauses[i];
+ ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
+ if (!FTy)
+ // Not a filter - skip it.
+ continue;
+ unsigned FElts = FTy->getNumElements();
+ // Examine each filter following this one. Doing this backwards means that
+ // we don't have to worry about filters disappearing under us when removed.
+ for (unsigned j = NewClauses.size() - 1; j != i; --j) {
+ Value *LFilter = NewClauses[j];
+ ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
+ if (!LTy)
+ // Not a filter - skip it.
+ continue;
+ // If Filter is a subset of LFilter, i.e. every element of Filter is also
+ // an element of LFilter, then discard LFilter.
+ SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j;
+ // If Filter is empty then it is a subset of LFilter.
+ if (!FElts) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ // Move on to the next filter.
+ continue;
+ }
+ unsigned LElts = LTy->getNumElements();
+ // If Filter is longer than LFilter then it cannot be a subset of it.
+ if (FElts > LElts)
+ // Move on to the next filter.
+ continue;
+ // At this point we know that LFilter has at least one element.
+ if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
+ // Filter is a subset of LFilter iff Filter contains only zeros (as we
+ // already know that Filter is not longer than LFilter).
+ if (isa<ConstantAggregateZero>(Filter)) {
+ assert(FElts <= LElts && "Should have handled this case earlier!");
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ ConstantArray *LArray = cast<ConstantArray>(LFilter);
+ if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
+ // Since Filter is non-empty and contains only zeros, it is a subset of
+ // LFilter iff LFilter contains a zero.
+ assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
+ for (unsigned l = 0; l != LElts; ++l)
+ if (LArray->getOperand(l)->isNullValue()) {
+ // LFilter contains a zero - discard it.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ break;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ // At this point we know that both filters are ConstantArrays. Loop over
+ // operands to see whether every element of Filter is also an element of
+ // LFilter. Since filters tend to be short this is probably faster than
+ // using a method that scales nicely.
+ ConstantArray *FArray = cast<ConstantArray>(Filter);
+ bool AllFound = true;
+ for (unsigned f = 0; f != FElts; ++f) {
+ Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
+ AllFound = false;
+ for (unsigned l = 0; l != LElts; ++l) {
+ Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
+ if (LTypeInfo == FTypeInfo) {
+ AllFound = true;
+ break;
+ }
+ }
+ if (!AllFound)
+ break;
+ }
+ if (AllFound) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ }
+ }
+
+ // If we changed any of the clauses, replace the old landingpad instruction
+ // with a new one.
+ if (MakeNewInstruction) {
+ LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
+ LI.getPersonalityFn(),
+ NewClauses.size());
+ for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
+ NLI->addClause(NewClauses[i]);
+ // A landing pad with no clauses must have the cleanup flag set. It is
+ // theoretically possible, though highly unlikely, that we eliminated all
+ // clauses. If so, force the cleanup flag to true.
+ if (NewClauses.empty())
+ CleanupFlag = true;
+ NLI->setCleanup(CleanupFlag);
+ return NLI;
+ }
+
+ // Even if none of the clauses changed, we may nonetheless have understood
+ // that the cleanup flag is pointless. Clear it if so.
+ if (LI.isCleanup() != CleanupFlag) {
+ assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
+ LI.setCleanup(CleanupFlag);
+ return &LI;
+ }
+
+ return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(I->hasOneUse() && "Invariants didn't hold!");
+
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+ isa<TerminatorInst>(I))
+ return false;
+
+ // Do not sink alloca instructions out of the entry block.
+ if (isa<AllocaInst>(I) && I->getParent() ==
+ &DestBlock->getParent()->getEntryBlock())
+ return false;
+
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ Scan != E; ++Scan)
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
+ I->moveBefore(InsertPos);
+ ++NumSunkInst;
+ return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &Visited,
+ InstCombiner &IC,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
+
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB)) continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ ++NumDeadInst;
+ DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ if (TD) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+ i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == 0) continue;
+
+ Constant*& FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
+
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
+ }
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getDefaultDest());
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+ InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+ MadeIRChange = false;
+
+ DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getName() << "\n");
+
+ {
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock*, 64> Visited;
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+ TLI);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Visited.count(BB)) continue;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ if (!isa<DbgInfoIntrinsic>(Inst)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+ Inst->eraseFromParent();
+ }
+ }
+ }
+
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.RemoveOne();
+ if (I == 0) continue; // skip null values.
+
+ // Check to see if we can DCE the instruction.
+ if (isInstructionTriviallyDead(I, TLI)) {
+ DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ EraseInstFromFunction(*I);
+ ++NumDeadInst;
+ MadeIRChange = true;
+ continue;
+ }
+
+ // Instruction isn't dead, see if we can constant propagate it.
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+
+ // See if we can trivially sink this instruction to a successor basic block.
+ if (I->hasOneUse()) {
+ BasicBlock *BB = I->getParent();
+ Instruction *UserInst = cast<Instruction>(I->use_back());
+ BasicBlock *UserParent;
+
+ // Get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+ else
+ UserParent = UserInst->getParent();
+
+ if (UserParent != BB) {
+ bool UserIsSuccessor = false;
+ // See if the user is one of our successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ if (*SI == UserParent) {
+ UserIsSuccessor = true;
+ break;
+ }
+
+ // If the user is one of our immediate successors, and if that successor
+ // only has us as a predecessors (we'd have to split the critical edge
+ // otherwise), we can keep going.
+ if (UserIsSuccessor && UserParent->getSinglePredecessor())
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ MadeIRChange |= TryToSinkInstruction(I, UserParent);
+ }
+ }
+
+ // Now that we have an instruction, try combining it to simplify it.
+ Builder->SetInsertPoint(I->getParent(), I);
+ Builder->SetCurrentDebugLocation(I->getDebugLoc());
+
+#ifndef NDEBUG
+ std::string OrigI;
+#endif
+ DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+ DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+ if (Instruction *Result = visit(*I)) {
+ ++NumCombined;
+ // Should we replace the old instruction with a new one?
+ if (Result != I) {
+ DEBUG(errs() << "IC: Old = " << *I << '\n'
+ << " New = " << *Result << '\n');
+
+ if (!I->getDebugLoc().isUnknown())
+ Result->setDebugLoc(I->getDebugLoc());
+ // Everything uses the new instruction now.
+ I->replaceAllUsesWith(Result);
+
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
+ // Push the new instruction and any users onto the worklist.
+ Worklist.Add(Result);
+ Worklist.AddUsersToWorkList(*Result);
+
+ // Insert the new instruction into the basic block...
+ BasicBlock *InstParent = I->getParent();
+ BasicBlock::iterator InsertPos = I;
+
+ // If we replace a PHI with something that isn't a PHI, fix up the
+ // insertion point.
+ if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos))
+ InsertPos = InstParent->getFirstInsertionPt();
+
+ InstParent->getInstList().insert(InsertPos, Result);
+
+ EraseInstFromFunction(*I);
+ } else {
+#ifndef NDEBUG
+ DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ << " New = " << *I << '\n');
+#endif
+
+ // If the instruction was modified, it's possible that it is now dead.
+ // if so, remove it.
+ if (isInstructionTriviallyDead(I, TLI)) {
+ EraseInstFromFunction(*I);
+ } else {
+ Worklist.Add(I);
+ Worklist.AddUsersToWorkList(*I);
+ }
+ }
+ MadeIRChange = true;
+ }
+ }
+
+ Worklist.Zap();
+ return MadeIRChange;
+}
+
+namespace {
+class InstCombinerLibCallSimplifier : public LibCallSimplifier {
+ InstCombiner *IC;
+public:
+ InstCombinerLibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ InstCombiner *IC)
+ : LibCallSimplifier(TD, TLI, UnsafeFPShrink) {
+ this->IC = IC;
+ }
+
+ /// replaceAllUsesWith - override so that instruction replacement
+ /// can be defined in terms of the instruction combiner framework.
+ virtual void replaceAllUsesWith(Instruction *I, Value *With) const {
+ IC->ReplaceInstUsesWith(*I, With);
+ }
+};
+}
+
+bool InstCombiner::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ // Minimizing size?
+ MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ IRBuilder<true, TargetFolder, InstCombineIRInserter>
+ TheBuilder(F.getContext(), TargetFolder(TD),
+ InstCombineIRInserter(Worklist));
+ Builder = &TheBuilder;
+
+ InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this);
+ Simplifier = &TheSimplifier;
+
+ bool EverMadeChange = false;
+
+ // Lower dbg.declare intrinsics otherwise their value may be clobbered
+ // by instcombiner.
+ EverMadeChange = LowerDbgDeclare(F);
+
+ // Iterate while there is work to do.
+ unsigned Iteration = 0;
+ while (DoOneIteration(F, Iteration++))
+ EverMadeChange = true;
+
+ Builder = 0;
+ return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+ return new InstCombiner();
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
new file mode 100644
index 000000000000..623c4705061e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -0,0 +1,1453 @@
+//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+// Details of the algorithm:
+// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asan"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <string>
+
+using namespace llvm;
+
+static const uint64_t kDefaultShadowScale = 3;
+static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
+static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
+
+static const size_t kMaxStackMallocSize = 1 << 16; // 64K
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
+static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int kAsanCtorAndCtorPriority = 1;
+static const char *kAsanReportErrorTemplate = "__asan_report_";
+static const char *kAsanReportLoadN = "__asan_report_load_n";
+static const char *kAsanReportStoreN = "__asan_report_store_n";
+static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
+static const char *kAsanInitName = "__asan_init_v3";
+static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
+static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
+static const char *kAsanMappingScaleName = "__asan_mapping_scale";
+static const char *kAsanStackMallocName = "__asan_stack_malloc";
+static const char *kAsanStackFreeName = "__asan_stack_free";
+static const char *kAsanGenPrefix = "__asan_gen_";
+static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory";
+static const char *kAsanUnpoisonStackMemoryName =
+ "__asan_unpoison_stack_memory";
+
+static const int kAsanStackLeftRedzoneMagic = 0xf1;
+static const int kAsanStackMidRedzoneMagic = 0xf2;
+static const int kAsanStackRightRedzoneMagic = 0xf3;
+static const int kAsanStackPartialRedzoneMagic = 0xf4;
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+// Command-line flags.
+
+// This flag may need to be replaced with -f[no-]asan-reads.
+static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
+ cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
+ cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"),
+ cl::Hidden, cl::init(false));
+// This flag limits the number of instructions to be instrumented
+// in any given BB. Normally, this should be set to unlimited (INT_MAX),
+// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
+// set it to 10000.
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
+ cl::init(10000),
+ cl::desc("maximal number of instructions to instrument in any given BB"),
+ cl::Hidden);
+// This flag may need to be replaced with -f[no]asan-stack.
+static cl::opt<bool> ClStack("asan-stack",
+ cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -f[no]asan-use-after-return.
+static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
+ cl::desc("Check return-after-free"), cl::Hidden, cl::init(false));
+// This flag may need to be replaced with -f[no]asan-globals.
+static cl::opt<bool> ClGlobals("asan-globals",
+ cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
+static cl::opt<bool> ClMemIntrin("asan-memintrin",
+ cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClRealignStack("asan-realign-stack",
+ cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
+ cl::desc("File containing the list of objects to ignore "
+ "during instrumentation"), cl::Hidden);
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+// Shadow = (Mem >> scale) + (1 << offset_log)
+static cl::opt<int> ClMappingScale("asan-mapping-scale",
+ cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
+ cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
+static cl::opt<bool> ClShort64BitOffset("asan-short-64bit-mapping-offset",
+ cl::desc("Use short immediate constant as the mapping offset for 64bit"),
+ cl::Hidden, cl::init(true));
+
+// Optimization flags. Not user visible, used mostly for testing
+// and benchmarking the tool.
+static cl::opt<bool> ClOpt("asan-opt",
+ cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
+ cl::desc("Instrument the same temp just once"), cl::Hidden,
+ cl::init(true));
+static cl::opt<bool> ClOptGlobals("asan-opt-globals",
+ cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
+ cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
+ cl::Hidden, cl::init(false));
+
+// Debug flags.
+static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
+ cl::init(0));
+static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
+ cl::Hidden, cl::init(0));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func",
+ cl::Hidden, cl::desc("Debug func"));
+static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
+ cl::Hidden, cl::init(-1));
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
+ cl::Hidden, cl::init(-1));
+
+namespace {
+/// A set of dynamically initialized globals extracted from metadata.
+class SetOfDynamicallyInitializedGlobals {
+ public:
+ void Init(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+ DynInitGlobals.insert(cast<GlobalVariable>(VG));
+ }
+ }
+ bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+ private:
+ SmallSet<GlobalValue*, 32> DynInitGlobals;
+};
+
+/// This struct defines the shadow mapping using the rule:
+/// shadow = (mem >> Scale) ADD-or-OR Offset.
+struct ShadowMapping {
+ int Scale;
+ uint64_t Offset;
+ bool OrShadowOffset;
+};
+
+static ShadowMapping getShadowMapping(const Module &M, int LongSize,
+ bool ZeroBaseShadow) {
+ llvm::Triple TargetTriple(M.getTargetTriple());
+ bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+ bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64;
+ bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
+
+ ShadowMapping Mapping;
+
+ // OR-ing shadow offset if more efficient (at least on x86),
+ // but on ppc64 we have to use add since the shadow offset is not neccesary
+ // 1/8-th of the address space.
+ Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
+
+ Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
+ (LongSize == 32 ? kDefaultShadowOffset32 :
+ IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
+ if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
+ assert(LongSize == 64);
+ Mapping.Offset = kDefaultShort64bitShadowOffset;
+ }
+ if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) {
+ // Zero offset log is the special case.
+ Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog;
+ }
+
+ Mapping.Scale = kDefaultShadowScale;
+ if (ClMappingScale) {
+ Mapping.Scale = ClMappingScale;
+ }
+
+ return Mapping;
+}
+
+static size_t RedzoneSizeForScale(int MappingScale) {
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ return std::max(32U, 1U << MappingScale);
+}
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer : public FunctionPass {
+ AddressSanitizer(bool CheckInitOrder = true,
+ bool CheckUseAfterReturn = false,
+ bool CheckLifetime = false,
+ StringRef BlacklistFile = StringRef(),
+ bool ZeroBaseShadow = false)
+ : FunctionPass(ID),
+ CheckInitOrder(CheckInitOrder || ClInitializers),
+ CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
+ CheckLifetime(CheckLifetime || ClCheckLifetime),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+ : BlacklistFile),
+ ZeroBaseShadow(ZeroBaseShadow) {}
+ virtual const char *getPassName() const {
+ return "AddressSanitizerFunctionPass";
+ }
+ void instrumentMop(Instruction *I);
+ void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument);
+ Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue, uint32_t TypeSize);
+ Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
+ bool IsWrite, size_t AccessSizeIndex,
+ Value *SizeArgument);
+ bool instrumentMemIntrinsic(MemIntrinsic *MI);
+ void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
+ Value *Size,
+ Instruction *InsertBefore, bool IsWrite);
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ bool runOnFunction(Function &F);
+ bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
+ virtual bool doInitialization(Module &M);
+ static char ID; // Pass identification, replacement for typeid
+
+ private:
+ void initializeCallbacks(Module &M);
+
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
+ bool LooksLikeCodeInBug11395(Instruction *I);
+ void FindDynamicInitializers(Module &M);
+
+ bool CheckInitOrder;
+ bool CheckUseAfterReturn;
+ bool CheckLifetime;
+ SmallString<64> BlacklistFile;
+ bool ZeroBaseShadow;
+
+ LLVMContext *C;
+ DataLayout *TD;
+ int LongSize;
+ Type *IntptrTy;
+ ShadowMapping Mapping;
+ Function *AsanCtorFunction;
+ Function *AsanInitFunction;
+ Function *AsanHandleNoReturnFunc;
+ OwningPtr<BlackList> BL;
+ // This array is indexed by AccessIsWrite and log2(AccessSize).
+ Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+ // This array is indexed by AccessIsWrite.
+ Function *AsanErrorCallbackSized[2];
+ InlineAsm *EmptyAsm;
+ SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+
+ friend struct FunctionStackPoisoner;
+};
+
+class AddressSanitizerModule : public ModulePass {
+ public:
+ AddressSanitizerModule(bool CheckInitOrder = true,
+ StringRef BlacklistFile = StringRef(),
+ bool ZeroBaseShadow = false)
+ : ModulePass(ID),
+ CheckInitOrder(CheckInitOrder || ClInitializers),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+ : BlacklistFile),
+ ZeroBaseShadow(ZeroBaseShadow) {}
+ bool runOnModule(Module &M);
+ static char ID; // Pass identification, replacement for typeid
+ virtual const char *getPassName() const {
+ return "AddressSanitizerModule";
+ }
+
+ private:
+ void initializeCallbacks(Module &M);
+
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
+ void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
+ size_t RedzoneSize() const {
+ return RedzoneSizeForScale(Mapping.Scale);
+ }
+
+ bool CheckInitOrder;
+ SmallString<64> BlacklistFile;
+ bool ZeroBaseShadow;
+
+ OwningPtr<BlackList> BL;
+ SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ Type *IntptrTy;
+ LLVMContext *C;
+ DataLayout *TD;
+ ShadowMapping Mapping;
+ Function *AsanPoisonGlobals;
+ Function *AsanUnpoisonGlobals;
+ Function *AsanRegisterGlobals;
+ Function *AsanUnregisterGlobals;
+};
+
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
+ Function &F;
+ AddressSanitizer &ASan;
+ DIBuilder DIB;
+ LLVMContext *C;
+ Type *IntptrTy;
+ Type *IntptrPtrTy;
+ ShadowMapping Mapping;
+
+ SmallVector<AllocaInst*, 16> AllocaVec;
+ SmallVector<Instruction*, 8> RetVec;
+ uint64_t TotalStackSize;
+ unsigned StackAlignment;
+
+ Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
+
+ // Stores a place and arguments of poisoning/unpoisoning call for alloca.
+ struct AllocaPoisonCall {
+ IntrinsicInst *InsBefore;
+ uint64_t Size;
+ bool DoPoison;
+ };
+ SmallVector<AllocaPoisonCall, 8> AllocaPoisonCallVec;
+
+ // Maps Value to an AllocaInst from which the Value is originated.
+ typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy;
+ AllocaForValueMapTy AllocaForValue;
+
+ FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
+ : F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C),
+ IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)),
+ Mapping(ASan.Mapping),
+ TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {}
+
+ bool runOnFunction() {
+ if (!ClStack) return false;
+ // Collect alloca, ret, lifetime instructions etc.
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock *BB = *DI;
+ visit(*BB);
+ }
+ if (AllocaVec.empty()) return false;
+
+ initializeCallbacks(*F.getParent());
+
+ poisonStack();
+
+ if (ClDebugStack) {
+ DEBUG(dbgs() << F);
+ }
+ return true;
+ }
+
+ // Finds all static Alloca instructions and puts
+ // poisoned red zones around all of them.
+ // Then unpoison everything back before the function returns.
+ void poisonStack();
+
+ // ----------------------- Visitors.
+ /// \brief Collect all Ret instructions.
+ void visitReturnInst(ReturnInst &RI) {
+ RetVec.push_back(&RI);
+ }
+
+ /// \brief Collect Alloca instructions we want (and can) handle.
+ void visitAllocaInst(AllocaInst &AI) {
+ if (!isInterestingAlloca(AI)) return;
+
+ StackAlignment = std::max(StackAlignment, AI.getAlignment());
+ AllocaVec.push_back(&AI);
+ uint64_t AlignedSize = getAlignedAllocaSize(&AI);
+ TotalStackSize += AlignedSize;
+ }
+
+ /// \brief Collect lifetime intrinsic calls to check for use-after-scope
+ /// errors.
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ if (!ASan.CheckLifetime) return;
+ Intrinsic::ID ID = II.getIntrinsicID();
+ if (ID != Intrinsic::lifetime_start &&
+ ID != Intrinsic::lifetime_end)
+ return;
+ // Found lifetime intrinsic, add ASan instrumentation if necessary.
+ ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ // If size argument is undefined, don't do anything.
+ if (Size->isMinusOne()) return;
+ // Check that size doesn't saturate uint64_t and can
+ // be stored in IntptrTy.
+ const uint64_t SizeValue = Size->getValue().getLimitedValue();
+ if (SizeValue == ~0ULL ||
+ !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
+ return;
+ // Find alloca instruction that corresponds to llvm.lifetime argument.
+ AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
+ if (!AI) return;
+ bool DoPoison = (ID == Intrinsic::lifetime_end);
+ AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+ AllocaPoisonCallVec.push_back(APC);
+ }
+
+ // ---------------------- Helpers.
+ void initializeCallbacks(Module &M);
+
+ // Check if we want (and can) handle this alloca.
+ bool isInterestingAlloca(AllocaInst &AI) {
+ return (!AI.isArrayAllocation() &&
+ AI.isStaticAlloca() &&
+ AI.getAllocatedType()->isSized());
+ }
+
+ size_t RedzoneSize() const {
+ return RedzoneSizeForScale(Mapping.Scale);
+ }
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
+ return SizeInBytes;
+ }
+ uint64_t getAlignedSize(uint64_t SizeInBytes) {
+ size_t RZ = RedzoneSize();
+ return ((SizeInBytes + RZ - 1) / RZ) * RZ;
+ }
+ uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ return getAlignedSize(SizeInBytes);
+ }
+ /// Finds alloca where the value comes from.
+ AllocaInst *findAllocaForValue(Value *V);
+ void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+ Value *ShadowBase, bool DoPoison);
+ void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+};
+
+} // namespace
+
+char AddressSanitizer::ID = 0;
+INITIALIZE_PASS(AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
+ false, false)
+FunctionPass *llvm::createAddressSanitizerFunctionPass(
+ bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
+ StringRef BlacklistFile, bool ZeroBaseShadow) {
+ return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
+ CheckLifetime, BlacklistFile, ZeroBaseShadow);
+}
+
+char AddressSanitizerModule::ID = 0;
+INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
+ "ModulePass", false, false)
+ModulePass *llvm::createAddressSanitizerModulePass(
+ bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) {
+ return new AddressSanitizerModule(CheckInitOrder, BlacklistFile,
+ ZeroBaseShadow);
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+ size_t Res = CountTrailingZeros_32(TypeSize / 8);
+ assert(Res < kNumberOfAccessSizes);
+ return Res;
+}
+
+// Create a constant for Str so that we can pass it to the run-time lib.
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
+ GlobalValue::PrivateLinkage, StrConst,
+ kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
+}
+
+static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
+ return G->getName().find(kAsanGenPrefix) == 0;
+}
+
+Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+ // Shadow >> scale
+ Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+ if (Mapping.Offset == 0)
+ return Shadow;
+ // (Shadow >> scale) | offset
+ if (Mapping.OrShadowOffset)
+ return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+ else
+ return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+}
+
+void AddressSanitizer::instrumentMemIntrinsicParam(
+ Instruction *OrigIns,
+ Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
+ IRBuilder<> IRB(InsertBefore);
+ if (Size->getType() != IntptrTy)
+ Size = IRB.CreateIntCast(Size, IntptrTy, false);
+ // Check the first byte.
+ instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size);
+ // Check the last byte.
+ IRB.SetInsertPoint(InsertBefore);
+ Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne);
+ instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size);
+}
+
+// Instrument memset/memmove/memcpy
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ Value *Dst = MI->getDest();
+ MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
+ Value *Src = MemTran ? MemTran->getSource() : 0;
+ Value *Length = MI->getLength();
+
+ Constant *ConstLength = dyn_cast<Constant>(Length);
+ Instruction *InsertBefore = MI;
+ if (ConstLength) {
+ if (ConstLength->isNullValue()) return false;
+ } else {
+ // The size is not a constant so it could be zero -- check at run-time.
+ IRBuilder<> IRB(InsertBefore);
+
+ Value *Cmp = IRB.CreateICmpNE(Length,
+ Constant::getNullValue(Length->getType()));
+ InsertBefore = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ }
+
+ instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
+ if (Src)
+ instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
+ return true;
+}
+
+// If I is an interesting memory access, return the PointerOperand
+// and set IsWrite. Otherwise return NULL.
+static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!ClInstrumentReads) return NULL;
+ *IsWrite = false;
+ return LI->getPointerOperand();
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!ClInstrumentWrites) return NULL;
+ *IsWrite = true;
+ return SI->getPointerOperand();
+ }
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ if (!ClInstrumentAtomics) return NULL;
+ *IsWrite = true;
+ return RMW->getPointerOperand();
+ }
+ if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!ClInstrumentAtomics) return NULL;
+ *IsWrite = true;
+ return XCHG->getPointerOperand();
+ }
+ return NULL;
+}
+
+void AddressSanitizer::instrumentMop(Instruction *I) {
+ bool IsWrite = false;
+ Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
+ assert(Addr);
+ if (ClOpt && ClOptGlobals) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ if (!CheckInitOrder)
+ return;
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global does not have initailizer
+ // at all, we assume it has dynamic initializer (in other TU).
+ if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
+ return;
+ }
+ }
+
+ Type *OrigPtrTy = Addr->getType();
+ Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+
+ assert(OrigTy->isSized());
+ uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+
+ assert((TypeSize % 8) == 0);
+
+ // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
+ if (TypeSize == 8 || TypeSize == 16 ||
+ TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0);
+ // Instrument unusual size (but still multiple of 8).
+ // We can not do it with a single check, so we do 1-byte check for the first
+ // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+ // to report the actual access size.
+ IRBuilder<> IRB(I);
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ OrigPtrTy);
+ Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size);
+}
+
+// Validate the result of Module::getOrInsertFunction called for an interface
+// function of AddressSanitizer. If the instrumented module defines a function
+// with the same name, their prototypes must match, otherwise
+// getOrInsertFunction returns a bitcast.
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
+ if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
+ FuncOrBitcast->dump();
+ report_fatal_error("trying to redefine an AddressSanitizer "
+ "interface function");
+}
+
+Instruction *AddressSanitizer::generateCrashCode(
+ Instruction *InsertBefore, Value *Addr,
+ bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
+ IRBuilder<> IRB(InsertBefore);
+ CallInst *Call = SizeArgument
+ ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
+ : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+
+ // We don't do Call->setDoesNotReturn() because the BB already has
+ // UnreachableInst at the end.
+ // This EmptyAsm is required to avoid callback merge.
+ IRB.CreateCall(EmptyAsm);
+ return Call;
+}
+
+Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue,
+ uint32_t TypeSize) {
+ size_t Granularity = 1 << Mapping.Scale;
+ // Addr & (Granularity - 1)
+ Value *LastAccessedByte = IRB.CreateAnd(
+ AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ // (Addr & (Granularity - 1)) + size - 1
+ if (TypeSize / 8 > 1)
+ LastAccessedByte = IRB.CreateAdd(
+ LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+ // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+ LastAccessedByte = IRB.CreateIntCast(
+ LastAccessedByte, ShadowValue->getType(), false);
+ // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+ return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+ Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument) {
+ IRBuilder<> IRB(InsertBefore);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
+ Type *ShadowTy = IntegerType::get(
+ *C, std::max(8U, TypeSize >> Mapping.Scale));
+ Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+ Value *ShadowPtr = memToShadow(AddrLong, IRB);
+ Value *CmpVal = Constant::getNullValue(ShadowTy);
+ Value *ShadowValue = IRB.CreateLoad(
+ IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+
+ Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+ size_t Granularity = 1 << Mapping.Scale;
+ TerminatorInst *CrashTerm = 0;
+
+ if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ TerminatorInst *CheckTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+ BasicBlock *NextBB = CheckTerm->getSuccessor(0);
+ IRB.SetInsertPoint(CheckTerm);
+ Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ BasicBlock *CrashBlock =
+ BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
+ BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+ ReplaceInstWithInst(CheckTerm, NewTerm);
+ } else {
+ CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
+ }
+
+ Instruction *Crash = generateCrashCode(
+ CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+void AddressSanitizerModule::createInitializerPoisonCalls(
+ Module &M, GlobalValue *ModuleName) {
+ // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+ Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // If that function is not present, this TU contains no globals, or they have
+ // all been optimized away
+ if (!GlobalInit)
+ return;
+
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+ // Add a call to poison all external globals before the given function starts.
+ Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+ IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+ I != E; ++I) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+ }
+ }
+}
+
+bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+
+ if (BL->isIn(*G)) return false;
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ if (GlobalWasGeneratedByAsan(G)) return false; // Our own global.
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ return false;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ return false;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize()) return false;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ return false;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ return false;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void AddressSanitizerModule::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ // Declare our poisoning and unpoisoning functions.
+ AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
+ AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+ AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+ // Declare functions that register/unregister globals.
+ AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, NULL));
+ AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
+ AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName,
+ IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+}
+
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+bool AddressSanitizerModule::runOnModule(Module &M) {
+ if (!ClGlobals) return false;
+ TD = getAnalysisIfAvailable<DataLayout>();
+ if (!TD)
+ return false;
+ BL.reset(new BlackList(BlacklistFile));
+ if (BL->isIn(M)) return false;
+ C = &(M.getContext());
+ int LongSize = TD->getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+ initializeCallbacks(M);
+ DynamicallyInitializedGlobals.Init(M);
+
+ SmallVector<GlobalVariable *, 16> GlobalsToChange;
+
+ for (Module::GlobalListType::iterator G = M.global_begin(),
+ E = M.global_end(); G != E; ++G) {
+ if (ShouldInstrumentGlobal(G))
+ GlobalsToChange.push_back(G);
+ }
+
+ size_t n = GlobalsToChange.size();
+ if (n == 0) return false;
+
+ // A global is described by a structure
+ // size_t beg;
+ // size_t size;
+ // size_t size_with_redzone;
+ // const char *name;
+ // const char *module_name;
+ // size_t has_dynamic_init;
+ // We initialize an array of such structures and pass it to a run-time call.
+ StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n), DynamicInit;
+
+
+ Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+ assert(CtorFunc);
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+
+ bool HasDynamicallyInitializedGlobals = false;
+
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+ // We shouldn't merge same module names, as this string serves as unique
+ // module ID in runtime.
+ ModuleName->setUnnamedAddr(false);
+
+ for (size_t i = 0; i < n; i++) {
+ static const uint64_t kMaxGlobalRedzone = 1 << 18;
+ GlobalVariable *G = GlobalsToChange[i];
+ PointerType *PtrTy = cast<PointerType>(G->getType());
+ Type *Ty = PtrTy->getElementType();
+ uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
+ uint64_t MinRZ = RedzoneSize();
+ // MinRZ <= RZ <= kMaxGlobalRedzone
+ // and trying to make RZ to be ~ 1/4 of SizeInBytes.
+ uint64_t RZ = std::max(MinRZ,
+ std::min(kMaxGlobalRedzone,
+ (SizeInBytes / MinRZ / 4) * MinRZ));
+ uint64_t RightRedzoneSize = RZ;
+ // Round up to MinRZ
+ if (SizeInBytes % MinRZ)
+ RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+ assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
+ Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+ // Determine whether this global should be poisoned in initialization.
+ bool GlobalHasDynamicInitializer =
+ DynamicallyInitializedGlobals.Contains(G);
+ // Don't check initialization order if this global is blacklisted.
+ GlobalHasDynamicInitializer &= !BL->isInInit(*G);
+
+ StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
+ Constant *NewInitializer = ConstantStruct::get(
+ NewTy, G->getInitializer(),
+ Constant::getNullValue(RightRedZoneTy), NULL);
+
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
+
+ // Create a new global variable with enough space for a redzone.
+ GlobalVariable *NewGlobal = new GlobalVariable(
+ M, NewTy, G->isConstant(), G->getLinkage(),
+ NewInitializer, "", G, G->getThreadLocalMode());
+ NewGlobal->copyAttributesFrom(G);
+ NewGlobal->setAlignment(MinRZ);
+
+ Value *Indices2[2];
+ Indices2[0] = IRB.getInt32(0);
+ Indices2[1] = IRB.getInt32(0);
+
+ G->replaceAllUsesWith(
+ ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+ NewGlobal->takeName(G);
+ G->eraseFromParent();
+
+ Initializers[i] = ConstantStruct::get(
+ GlobalStructTy,
+ ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+ ConstantInt::get(IntptrTy, SizeInBytes),
+ ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
+ ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
+ ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
+ NULL);
+
+ // Populate the first and last globals declared in this TU.
+ if (CheckInitOrder && GlobalHasDynamicInitializer)
+ HasDynamicallyInitializedGlobals = true;
+
+ DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
+ }
+
+ ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
+ GlobalVariable *AllGlobals = new GlobalVariable(
+ M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
+ ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+ createInitializerPoisonCalls(M, ModuleName);
+ IRB.CreateCall2(AsanRegisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+
+ // We also need to unregister globals at the end, e.g. when a shared library
+ // gets closed.
+ Function *AsanDtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+ IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+ IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
+ DEBUG(dbgs() << M);
+ return true;
+}
+
+void AddressSanitizer::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ // Create __asan_report* callbacks.
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ // IsWrite and TypeSize are encoded in the function name.
+ std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+ (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
+ // If we are merging crash callbacks, they have two parameters.
+ AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+ }
+ }
+ AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+
+ AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ // We insert an empty inline asm after __asan_report* to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+}
+
+void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const {
+ // Tell the values of mapping offset and scale to the run-time.
+ GlobalValue *asan_mapping_offset =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, Mapping.Offset),
+ kAsanMappingOffsetName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_offset, true);
+
+ GlobalValue *asan_mapping_scale =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, Mapping.Scale),
+ kAsanMappingScaleName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_scale, true);
+}
+
+// virtual
+bool AddressSanitizer::doInitialization(Module &M) {
+ // Initialize the private fields. No one has accessed them before.
+ TD = getAnalysisIfAvailable<DataLayout>();
+
+ if (!TD)
+ return false;
+ BL.reset(new BlackList(BlacklistFile));
+ DynamicallyInitializedGlobals.Init(M);
+
+ C = &(M.getContext());
+ LongSize = TD->getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+
+ AsanCtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+ BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
+ // call __asan_init in the module ctor.
+ IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
+ AsanInitFunction = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+ AsanInitFunction->setLinkage(Function::ExternalLinkage);
+ IRB.CreateCall(AsanInitFunction);
+
+ Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+ emitShadowMapping(M, IRB);
+
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+ return true;
+}
+
+bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
+ // For each NSObject descendant having a +load method, this method is invoked
+ // by the ObjC runtime before any of the static constructors is called.
+ // Therefore we need to instrument such methods with a call to __asan_init
+ // at the beginning in order to initialize our runtime before any access to
+ // the shadow memory.
+ // We cannot just ignore these methods, because they may call other
+ // instrumented functions.
+ if (F.getName().find(" load]") != std::string::npos) {
+ IRBuilder<> IRB(F.begin()->begin());
+ IRB.CreateCall(AsanInitFunction);
+ return true;
+ }
+ return false;
+}
+
+bool AddressSanitizer::runOnFunction(Function &F) {
+ if (BL->isIn(F)) return false;
+ if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
+ DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
+ initializeCallbacks(*F.getParent());
+
+ // If needed, insert __asan_init before checking for SanitizeAddress attr.
+ maybeInsertAsanInitAtFunctionEntry(F);
+
+ if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::SanitizeAddress))
+ return false;
+
+ if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
+ return false;
+
+ // We want to instrument every address only once per basic block (unless there
+ // are calls between uses).
+ SmallSet<Value*, 16> TempsToInstrument;
+ SmallVector<Instruction*, 16> ToInstrument;
+ SmallVector<Instruction*, 8> NoReturnCalls;
+ bool IsWrite;
+
+ // Fill the set of memory operations to instrument.
+ for (Function::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ TempsToInstrument.clear();
+ int NumInsnsPerBB = 0;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ if (LooksLikeCodeInBug11395(BI)) return false;
+ if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) {
+ if (ClOpt && ClOptSameTemp) {
+ if (!TempsToInstrument.insert(Addr))
+ continue; // We've seen this temp in the current BB.
+ }
+ } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
+ // ok, take it.
+ } else {
+ CallSite CS(BI);
+ if (CS) {
+ // A call inside BB.
+ TempsToInstrument.clear();
+ if (CS.doesNotReturn())
+ NoReturnCalls.push_back(CS.getInstruction());
+ }
+ continue;
+ }
+ ToInstrument.push_back(BI);
+ NumInsnsPerBB++;
+ if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
+ break;
+ }
+ }
+
+ // Instrument.
+ int NumInstrumented = 0;
+ for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
+ Instruction *Inst = ToInstrument[i];
+ if (ClDebugMin < 0 || ClDebugMax < 0 ||
+ (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
+ if (isInterestingMemoryAccess(Inst, &IsWrite))
+ instrumentMop(Inst);
+ else
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+ }
+ NumInstrumented++;
+ }
+
+ FunctionStackPoisoner FSP(F, *this);
+ bool ChangedStack = FSP.runOnFunction();
+
+ // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+ // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
+ for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
+ Instruction *CI = NoReturnCalls[i];
+ IRBuilder<> IRB(CI);
+ IRB.CreateCall(AsanHandleNoReturnFunc);
+ }
+ DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n");
+
+ return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+}
+
+static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
+ if (ShadowRedzoneSize == 1) return PoisonByte;
+ if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte;
+ if (ShadowRedzoneSize == 4)
+ return (PoisonByte << 24) + (PoisonByte << 16) +
+ (PoisonByte << 8) + (PoisonByte);
+ llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4");
+}
+
+static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
+ size_t Size,
+ size_t RZSize,
+ size_t ShadowGranularity,
+ uint8_t Magic) {
+ for (size_t i = 0; i < RZSize;
+ i+= ShadowGranularity, Shadow++) {
+ if (i + ShadowGranularity <= Size) {
+ *Shadow = 0; // fully addressable
+ } else if (i >= Size) {
+ *Shadow = Magic; // unaddressable
+ } else {
+ *Shadow = Size - i; // first Size-i bytes are addressable
+ }
+ }
+}
+
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+ if (LongSize != 32) return false;
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI || !CI->isInlineAsm()) return false;
+ if (CI->getNumArgOperands() <= 5) return false;
+ // We have inline assembly with quite a few arguments.
+ return true;
+}
+
+void FunctionStackPoisoner::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+}
+
+void FunctionStackPoisoner::poisonRedZones(
+ const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+ bool DoPoison) {
+ size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
+ assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
+ Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
+ Type *RZPtrTy = PointerType::get(RZTy, 0);
+
+ Value *PoisonLeft = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize));
+ Value *PoisonMid = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize));
+ Value *PoisonRight = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize));
+
+ // poison the first red zone.
+ IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
+
+ // poison all other red zones.
+ uint64_t Pos = RedzoneSize();
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+ AllocaInst *AI = AllocaVec[i];
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ uint64_t AlignedSize = getAlignedAllocaSize(AI);
+ assert(AlignedSize - SizeInBytes < RedzoneSize());
+ Value *Ptr = NULL;
+
+ Pos += AlignedSize;
+
+ assert(ShadowBase->getType() == IntptrTy);
+ if (SizeInBytes < AlignedSize) {
+ // Poison the partial redzone at right
+ Ptr = IRB.CreateAdd(
+ ShadowBase, ConstantInt::get(IntptrTy,
+ (Pos >> Mapping.Scale) - ShadowRZSize));
+ size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes);
+ uint32_t Poison = 0;
+ if (DoPoison) {
+ PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
+ RedzoneSize(),
+ 1ULL << Mapping.Scale,
+ kAsanStackPartialRedzoneMagic);
+ }
+ Value *PartialPoison = ConstantInt::get(RZTy, Poison);
+ IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+ }
+
+ // Poison the full redzone at right.
+ Ptr = IRB.CreateAdd(ShadowBase,
+ ConstantInt::get(IntptrTy, Pos >> Mapping.Scale));
+ bool LastAlloca = (i == AllocaVec.size() - 1);
+ Value *Poison = LastAlloca ? PoisonRight : PoisonMid;
+ IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+
+ Pos += RedzoneSize();
+ }
+}
+
+void FunctionStackPoisoner::poisonStack() {
+ uint64_t LocalStackSize = TotalStackSize +
+ (AllocaVec.size() + 1) * RedzoneSize();
+
+ bool DoStackMalloc = ASan.CheckUseAfterReturn
+ && LocalStackSize <= kMaxStackMallocSize;
+
+ assert(AllocaVec.size() > 0);
+ Instruction *InsBefore = AllocaVec[0];
+ IRBuilder<> IRB(InsBefore);
+
+
+ Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
+ AllocaInst *MyAlloca =
+ new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
+ if (ClRealignStack && StackAlignment < RedzoneSize())
+ StackAlignment = RedzoneSize();
+ MyAlloca->setAlignment(StackAlignment);
+ assert(MyAlloca->isStaticAlloca());
+ Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
+ Value *LocalStackBase = OrigStackBase;
+
+ if (DoStackMalloc) {
+ LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+ ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+ }
+
+ // This string will be parsed by the run-time (DescribeAddressIfStack).
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << AllocaVec.size() << " ";
+
+ // Insert poison calls for lifetime intrinsics for alloca.
+ bool HavePoisonedAllocas = false;
+ for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
+ const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
+ IntrinsicInst *II = APC.InsBefore;
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ assert(AI);
+ IRBuilder<> IRB(II);
+ poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+ HavePoisonedAllocas |= APC.DoPoison;
+ }
+
+ uint64_t Pos = RedzoneSize();
+ // Replace Alloca instructions with base+offset.
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+ AllocaInst *AI = AllocaVec[i];
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ StringRef Name = AI->getName();
+ StackDescription << Pos << " " << SizeInBytes << " "
+ << Name.size() << " " << Name << " ";
+ uint64_t AlignedSize = getAlignedAllocaSize(AI);
+ assert((AlignedSize % RedzoneSize()) == 0);
+ Value *NewAllocaPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
+ AI->getType());
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
+ AI->replaceAllUsesWith(NewAllocaPtr);
+ Pos += AlignedSize + RedzoneSize();
+ }
+ assert(Pos == LocalStackSize);
+
+ // The left-most redzone has enough space for at least 4 pointers.
+ // Write the Magic value to redzone[0].
+ Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
+ IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
+ BasePlus0);
+ // Write the frame description constant to redzone[1].
+ Value *BasePlus1 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
+ IntptrPtrTy);
+ GlobalVariable *StackDescriptionGlobal =
+ createPrivateGlobalForString(*F.getParent(), StackDescription.str());
+ Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
+ IntptrTy);
+ IRB.CreateStore(Description, BasePlus1);
+ // Write the PC to redzone[2].
+ Value *BasePlus2 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
+ 2 * ASan.LongSize/8)),
+ IntptrPtrTy);
+ IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
+
+ // Poison the stack redzones at the entry.
+ Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+ poisonRedZones(AllocaVec, IRB, ShadowBase, true);
+
+ // Unpoison the stack before all ret instructions.
+ for (size_t i = 0, n = RetVec.size(); i < n; i++) {
+ Instruction *Ret = RetVec[i];
+ IRBuilder<> IRBRet(Ret);
+ // Mark the current frame as retired.
+ IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
+ BasePlus0);
+ // Unpoison the stack.
+ poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
+ if (DoStackMalloc) {
+ // In use-after-return mode, mark the whole stack frame unaddressable.
+ IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
+ } else if (HavePoisonedAllocas) {
+ // If we poisoned some allocas in llvm.lifetime analysis,
+ // unpoison whole stack frame now.
+ assert(LocalStackBase == OrigStackBase);
+ poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false);
+ }
+ }
+
+ // We are done. Remove the old unused alloca instructions.
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++)
+ AllocaVec[i]->eraseFromParent();
+}
+
+void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
+ IRBuilder<> IRB, bool DoPoison) {
+ // For now just insert the call to ASan runtime.
+ Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
+ Value *SizeArg = ConstantInt::get(IntptrTy, Size);
+ IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
+ : AsanUnpoisonStackMemoryFunc,
+ AddrArg, SizeArg);
+}
+
+// Handling llvm.lifetime intrinsics for a given %alloca:
+// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca.
+// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect
+// invalid accesses) and unpoison it for llvm.lifetime.start (the memory
+// could be poisoned by previous llvm.lifetime.end instruction, as the
+// variable may go in and out of scope several times, e.g. in loops).
+// (3) if we poisoned at least one %alloca in a function,
+// unpoison the whole stack frame at function exit.
+
+AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ // We're intested only in allocas we can handle.
+ return isInterestingAlloca(*AI) ? AI : 0;
+ // See if we've already calculated (or started to calculate) alloca for a
+ // given value.
+ AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
+ if (I != AllocaForValue.end())
+ return I->second;
+ // Store 0 while we're calculating alloca for value V to avoid
+ // infinite recursion if the value references itself.
+ AllocaForValue[V] = 0;
+ AllocaInst *Res = 0;
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ Res = findAllocaForValue(CI->getOperand(0));
+ else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *IncValue = PN->getIncomingValue(i);
+ // Allow self-referencing phi-nodes.
+ if (IncValue == PN) continue;
+ AllocaInst *IncValueAI = findAllocaForValue(IncValue);
+ // AI for incoming values should exist and should all be equal.
+ if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res))
+ return 0;
+ Res = IncValueAI;
+ }
+ }
+ if (Res != 0)
+ AllocaForValue[V] = Res;
+ return Res;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
new file mode 100644
index 000000000000..927982d2af47
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
@@ -0,0 +1,125 @@
+//===-- BlackList.cpp - blacklist for sanitizers --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+BlackList::BlackList(const StringRef Path) {
+ // Validate and open blacklist file.
+ if (Path.empty()) return;
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ report_fatal_error("Can't open blacklist file: " + Path + ": " +
+ EC.message());
+ }
+
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(File.take()->getBuffer(), Lines, "\n\r");
+ StringMap<std::string> Regexps;
+ for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ std::string Regexp = SplitLine.second;
+ if (Regexp.empty()) {
+ // Missing ':' in the line.
+ report_fatal_error("malformed blacklist line: " + SplitLine.first);
+ }
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string Error;
+ if (!CheckRE.isValid(Error)) {
+ report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+ ": " + Error);
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (!Regexps[Prefix].empty())
+ Regexps[Prefix] += "|";
+ Regexps[Prefix] += Regexp;
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<std::string>::const_iterator I = Regexps.begin(),
+ E = Regexps.end(); I != E; ++I) {
+ Entries[I->getKey()] = new Regex(I->getValue());
+ }
+}
+
+bool BlackList::isIn(const Function &F) const {
+ return isIn(*F.getParent()) || inSection("fun", F.getName());
+}
+
+bool BlackList::isIn(const GlobalVariable &G) const {
+ return isIn(*G.getParent()) || inSection("global", G.getName());
+}
+
+bool BlackList::isIn(const Module &M) const {
+ return inSection("src", M.getModuleIdentifier());
+}
+
+static StringRef GetGVTypeString(const GlobalVariable &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+bool BlackList::isInInit(const GlobalVariable &G) const {
+ return (isIn(*G.getParent()) ||
+ inSection("global-init", G.getName()) ||
+ inSection("global-init-type", GetGVTypeString(G)));
+}
+
+bool BlackList::inSection(const StringRef Section,
+ const StringRef Query) const {
+ StringMap<Regex*>::const_iterator I = Entries.find(Section);
+ if (I == Entries.end()) return false;
+
+ Regex *FunctionRegex = I->getValue();
+ return FunctionRegex->match(Query);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
new file mode 100644
index 000000000000..b094d42568f0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -0,0 +1,212 @@
+//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that instruments the code to perform run-time
+// bounds checking on loads, stores, and other memory intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bounds-checking"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetFolder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
+ cl::desc("Use one trap block per function"));
+
+STATISTIC(ChecksAdded, "Bounds checks added");
+STATISTIC(ChecksSkipped, "Bounds checks skipped");
+STATISTIC(ChecksUnable, "Bounds checks unable to add");
+
+typedef IRBuilder<true, TargetFolder> BuilderTy;
+
+namespace {
+ struct BoundsChecking : public FunctionPass {
+ static char ID;
+
+ BoundsChecking() : FunctionPass(ID) {
+ initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DataLayout>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ private:
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ ObjectSizeOffsetEvaluator *ObjSizeEval;
+ BuilderTy *Builder;
+ Instruction *Inst;
+ BasicBlock *TrapBB;
+
+ BasicBlock *getTrapBB();
+ void emitBranchToTrap(Value *Cmp = 0);
+ bool computeAllocSize(Value *Ptr, APInt &Offset, Value* &OffsetValue,
+ APInt &Size, Value* &SizeValue);
+ bool instrument(Value *Ptr, Value *Val);
+ };
+}
+
+char BoundsChecking::ID = 0;
+INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
+ false, false)
+
+
+/// getTrapBB - create a basic block that traps. All overflowing conditions
+/// branch to this block. There's only one trap block per function.
+BasicBlock *BoundsChecking::getTrapBB() {
+ if (TrapBB && SingleTrapBB)
+ return TrapBB;
+
+ Function *Fn = Inst->getParent()->getParent();
+ BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
+ TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
+ Builder->SetInsertPoint(TrapBB);
+
+ llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
+ CallInst *TrapCall = Builder->CreateCall(F);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ TrapCall->setDebugLoc(Inst->getDebugLoc());
+ Builder->CreateUnreachable();
+
+ Builder->SetInsertPoint(PrevInsertPoint);
+ return TrapBB;
+}
+
+
+/// emitBranchToTrap - emit a branch instruction to a trap block.
+/// If Cmp is non-null, perform a jump only if its value evaluates to true.
+void BoundsChecking::emitBranchToTrap(Value *Cmp) {
+ // check if the comparison is always false
+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(Cmp);
+ if (C) {
+ ++ChecksSkipped;
+ if (!C->getZExtValue())
+ return;
+ else
+ Cmp = 0; // unconditional branch
+ }
+ ++ChecksAdded;
+
+ Instruction *Inst = Builder->GetInsertPoint();
+ BasicBlock *OldBB = Inst->getParent();
+ BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
+ OldBB->getTerminator()->eraseFromParent();
+
+ if (Cmp)
+ BranchInst::Create(getTrapBB(), Cont, Cmp, OldBB);
+ else
+ BranchInst::Create(getTrapBB(), OldBB);
+}
+
+
+/// instrument - adds run-time bounds checks to memory accessing instructions.
+/// Ptr is the pointer that will be read/written, and InstVal is either the
+/// result from the load or the value being stored. It is used to determine the
+/// size of memory block that is touched.
+/// Returns true if any change was made to the IR, false otherwise.
+bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
+ uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType());
+ DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
+ << " bytes\n");
+
+ SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr);
+
+ if (!ObjSizeEval->bothKnown(SizeOffset)) {
+ ++ChecksUnable;
+ return false;
+ }
+
+ Value *Size = SizeOffset.first;
+ Value *Offset = SizeOffset.second;
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
+
+ Type *IntTy = TD->getIntPtrType(Ptr->getType());
+ Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+
+ // three checks are required to ensure safety:
+ // . Offset >= 0 (since the offset is given from the base ptr)
+ // . Size >= Offset (unsigned)
+ // . Size - Offset >= NeededSize (unsigned)
+ //
+ // optimization: if Size >= 0 (signed), skip 1st check
+ // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
+ Value *ObjSize = Builder->CreateSub(Size, Offset);
+ Value *Cmp2 = Builder->CreateICmpULT(Size, Offset);
+ Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
+ Value *Or = Builder->CreateOr(Cmp2, Cmp3);
+ if (!SizeCI || SizeCI->getValue().slt(0)) {
+ Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+ Or = Builder->CreateOr(Cmp1, Or);
+ }
+ emitBranchToTrap(Or);
+
+ return true;
+}
+
+bool BoundsChecking::runOnFunction(Function &F) {
+ TD = &getAnalysis<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ TrapBB = 0;
+ BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
+ Builder = &TheBuilder;
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
+ ObjSizeEval = &TheObjSizeEval;
+
+ // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
+ // touching instructions
+ std::vector<Instruction*> WorkList;
+ for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+ Instruction *I = &*i;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
+ isa<AtomicRMWInst>(I))
+ WorkList.push_back(I);
+ }
+
+ bool MadeChange = false;
+ for (std::vector<Instruction*>::iterator i = WorkList.begin(),
+ e = WorkList.end(); i != e; ++i) {
+ Inst = *i;
+
+ Builder->SetInsertPoint(Inst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ MadeChange |= instrument(LI->getPointerOperand(), LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
+ } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
+ } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
+ MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
+ } else {
+ llvm_unreachable("unknown Instruction type");
+ }
+ }
+ return MadeChange;
+}
+
+FunctionPass *llvm::createBoundsCheckingPass() {
+ return new BoundsChecking();
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 000000000000..a2459fbafe18
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,117 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive. We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-edge-profiling"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "ProfilingUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class EdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ EdgeProfiler() : ModulePass(ID) {
+ initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Edge Profiler";
+ }
+ };
+}
+
+char EdgeProfiler::ID = 0;
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+ "Insert instrumentation for edge profiling", false, false)
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ std::set<BasicBlock*> BlocksToInstrument;
+ unsigned NumEdges = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ BlocksToInstrument.insert(BB);
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+
+ Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "EdgeProfCounters");
+ NumEdgesInserted = NumEdges;
+
+ // Instrument all of the edges...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Create counter for (0,entry) edge.
+ IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
+ // Okay, we have to add a counter of each outgoing edge. If the
+ // outgoing edge is not critical don't split it, just insert the counter
+ // in the source or destination of the edge.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ // If the edge is critical, split it.
+ SplitCriticalEdge(TI, s, this);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If we
+ // only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(BB, i++, Counters, false);
+ } else {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+ }
+ }
+ }
+ }
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 000000000000..2edd151869e0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,855 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "ProfilingUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <string>
+#include <utility>
+using namespace llvm;
+
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
+namespace {
+ class GCOVProfiler : public ModulePass {
+ public:
+ static char ID;
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "GCOV Profiler";
+ }
+
+ private:
+ bool runOnModule(Module &M);
+
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
+
+ // Modify the program to track transitions along edges and call into the
+ // profiling runtime to emit .gcda files when run.
+ bool emitProfileArcs();
+
+ // Get pointers to the functions in the runtime library.
+ Constant *getStartFileFunc();
+ Constant *getIncrementIndirectCounterFunc();
+ Constant *getEmitFunctionFunc();
+ Constant *getEmitArcsFunc();
+ Constant *getDeleteWriteoutFunctionListFunc();
+ Constant *getDeleteFlushFunctionListFunc();
+ Constant *getEndFileFunc();
+
+ // Create or retrieve an i32 state value that is used to represent the
+ // pred block number for certain non-trivial edges.
+ GlobalVariable *getEdgeStateValue();
+
+ // Produce a table of pointers to counters, by predecessor and successor
+ // block number.
+ GlobalVariable *buildEdgeLookupTable(Function *F,
+ GlobalVariable *Counter,
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
+
+ // Add the function to write out all our counters to the global destructor
+ // list.
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ void insertIndirectCounterIncrement();
+
+ std::string mangleName(DICompileUnit CU, const char *NewStem);
+
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
+
+ Module *M;
+ LLVMContext *Ctx;
+ };
+}
+
+char GCOVProfiler::ID = 0;
+INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
+ "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
+}
+
+static std::string getFunctionName(DISubprogram SP) {
+ if (!SP.getLinkageName().empty())
+ return SP.getLinkageName();
+ return SP.getName();
+}
+
+namespace {
+ class GCOVRecord {
+ protected:
+ static const char *LinesTag;
+ static const char *FunctionTag;
+ static const char *BlockTag;
+ static const char *EdgeTag;
+
+ GCOVRecord() {}
+
+ void writeBytes(const char *Bytes, int Size) {
+ os->write(Bytes, Size);
+ }
+
+ void write(uint32_t i) {
+ writeBytes(reinterpret_cast<char*>(&i), 4);
+ }
+
+ // Returns the length measured in 4-byte blocks that will be used to
+ // represent this string in a GCOV file
+ unsigned lengthOfGCOVString(StringRef s) {
+ // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+ // padding out to the next 4-byte word. The length is measured in 4-byte
+ // words including padding, not bytes of actual string.
+ return (s.size() / 4) + 1;
+ }
+
+ void writeGCOVString(StringRef s) {
+ uint32_t Len = lengthOfGCOVString(s);
+ write(Len);
+ writeBytes(s.data(), s.size());
+
+ // Write 1 to 4 bytes of NUL padding.
+ assert((unsigned)(4 - (s.size() % 4)) > 0);
+ assert((unsigned)(4 - (s.size() % 4)) <= 4);
+ writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+ }
+
+ raw_ostream *os;
+ };
+ const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+ const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+ const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+ const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+ class GCOVFunction;
+ class GCOVBlock;
+
+ // Constructed only by requesting it from a GCOVBlock, this object stores a
+ // list of line numbers and a single filename, representing lines that belong
+ // to the block.
+ class GCOVLines : public GCOVRecord {
+ public:
+ void addLine(uint32_t Line) {
+ Lines.push_back(Line);
+ }
+
+ uint32_t length() {
+ // Here 2 = 1 for string length + 1 for '0' id#.
+ return lengthOfGCOVString(Filename) + 2 + Lines.size();
+ }
+
+ void writeOut() {
+ write(0);
+ writeGCOVString(Filename);
+ for (int i = 0, e = Lines.size(); i != e; ++i)
+ write(Lines[i]);
+ }
+
+ GCOVLines(StringRef F, raw_ostream *os)
+ : Filename(F) {
+ this->os = os;
+ }
+
+ private:
+ StringRef Filename;
+ SmallVector<uint32_t, 32> Lines;
+ };
+
+ // Represent a basic block in GCOV. Each block has a unique number in the
+ // function, number of lines belonging to each block, and a set of edges to
+ // other blocks.
+ class GCOVBlock : public GCOVRecord {
+ public:
+ GCOVLines &getFile(StringRef Filename) {
+ GCOVLines *&Lines = LinesByFile[Filename];
+ if (!Lines) {
+ Lines = new GCOVLines(Filename, os);
+ }
+ return *Lines;
+ }
+
+ void addEdge(GCOVBlock &Successor) {
+ OutEdges.push_back(&Successor);
+ }
+
+ void writeOut() {
+ uint32_t Len = 3;
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I) {
+ Len += I->second->length();
+ }
+
+ writeBytes(LinesTag, 4);
+ write(Len);
+ write(Number);
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I)
+ I->second->writeOut();
+ write(0);
+ write(0);
+ }
+
+ ~GCOVBlock() {
+ DeleteContainerSeconds(LinesByFile);
+ }
+
+ private:
+ friend class GCOVFunction;
+
+ GCOVBlock(uint32_t Number, raw_ostream *os)
+ : Number(Number) {
+ this->os = os;
+ }
+
+ uint32_t Number;
+ StringMap<GCOVLines *> LinesByFile;
+ SmallVector<GCOVBlock *, 4> OutEdges;
+ };
+
+ // A function has a unique identifier, a checksum (we leave as zero) and a
+ // set of blocks and a map of edges between blocks. This is the only GCOV
+ // object users can construct, the blocks and lines will be rooted here.
+ class GCOVFunction : public GCOVRecord {
+ public:
+ GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+ bool UseCfgChecksum) {
+ this->os = os;
+
+ Function *F = SP.getFunction();
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
+ uint32_t i = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ Blocks[BB] = new GCOVBlock(i++, os);
+ }
+ ReturnBlock = new GCOVBlock(i++, os);
+
+ writeBytes(FunctionTag, 4);
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
+ 1 + lengthOfGCOVString(SP.getFilename()) + 1;
+ if (UseCfgChecksum)
+ ++BlockLen;
+ write(BlockLen);
+ write(Ident);
+ write(0); // lineno checksum
+ if (UseCfgChecksum)
+ write(0); // cfg checksum
+ writeGCOVString(getFunctionName(SP));
+ writeGCOVString(SP.getFilename());
+ write(SP.getLineNumber());
+ }
+
+ ~GCOVFunction() {
+ DeleteContainerSeconds(Blocks);
+ delete ReturnBlock;
+ }
+
+ GCOVBlock &getBlock(BasicBlock *BB) {
+ return *Blocks[BB];
+ }
+
+ GCOVBlock &getReturnBlock() {
+ return *ReturnBlock;
+ }
+
+ void writeOut() {
+ // Emit count of blocks.
+ writeBytes(BlockTag, 4);
+ write(Blocks.size() + 1);
+ for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+ write(0); // No flags on our blocks.
+ }
+ DEBUG(dbgs() << Blocks.size() << " blocks.\n");
+
+ // Emit edges between blocks.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ GCOVBlock &Block = *I->second;
+ if (Block.OutEdges.empty()) continue;
+
+ writeBytes(EdgeTag, 4);
+ write(Block.OutEdges.size() * 2 + 1);
+ write(Block.Number);
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+ << "\n");
+ write(Block.OutEdges[i]->Number);
+ write(0); // no flags
+ }
+ }
+
+ // Emit lines for each block.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ I->second->writeOut();
+ }
+ }
+
+ private:
+ DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+ GCOVBlock *ReturnBlock;
+ };
+}
+
+std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
+ if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+ for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+ MDNode *N = GCov->getOperand(i);
+ if (N->getNumOperands() != 2) continue;
+ MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+ MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1));
+ if (!GCovFile || !CompileUnit) continue;
+ if (CompileUnit == CU) {
+ SmallString<128> Filename = GCovFile->getString();
+ sys::path::replace_extension(Filename, NewStem);
+ return Filename.str();
+ }
+ }
+ }
+
+ SmallString<128> Filename = CU.getFilename();
+ sys::path::replace_extension(Filename, NewStem);
+ StringRef FName = sys::path::filename(Filename);
+ SmallString<128> CurPath;
+ if (sys::fs::current_path(CurPath)) return FName;
+ sys::path::append(CurPath, FName.str());
+ return CurPath.str();
+}
+
+bool GCOVProfiler::runOnModule(Module &M) {
+ this->M = &M;
+ Ctx = &M.getContext();
+
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
+ return false;
+}
+
+void GCOVProfiler::emitProfileNotes() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string ErrorInfo;
+ raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write("MVLL", 4);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
+ }
+ }
+ Func.writeOut();
+ }
+ out.write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out.close();
+ }
+}
+
+bool GCOVProfiler::emitProfileArcs() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return false;
+
+ bool Result = false;
+ bool InsertIndCounterIncrCode = false;
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ DIArray SPs = CU.getSubprograms();
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ if (!Result) Result = true;
+ unsigned Edges = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ ++Edges;
+ else
+ Edges += TI->getNumSuccessors();
+ }
+
+ ArrayType *CounterTy =
+ ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+ GlobalVariable *Counters =
+ new GlobalVariable(*M, CounterTy, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(CounterTy),
+ "__llvm_gcov_ctr");
+ CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+
+ UniqueVector<BasicBlock *> ComplexEdgePreds;
+ UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors) {
+ IRBuilder<> Builder(TI);
+
+ if (Successors == 1) {
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Counter);
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ Value *Sel = Builder.CreateSelect(BI->getCondition(),
+ Builder.getInt64(Edge),
+ Builder.getInt64(Edge + 1));
+ SmallVector<Value *, 2> Idx;
+ Idx.push_back(Builder.getInt64(0));
+ Idx.push_back(Sel);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Counter);
+ } else {
+ ComplexEdgePreds.insert(BB);
+ for (int i = 0; i != Successors; ++i)
+ ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ }
+ Edge += Successors;
+ }
+ }
+
+ if (!ComplexEdgePreds.empty()) {
+ GlobalVariable *EdgeTable =
+ buildEdgeLookupTable(F, Counters,
+ ComplexEdgePreds, ComplexEdgeSuccs);
+ GlobalVariable *EdgeState = getEdgeStateValue();
+
+ for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+ IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ Builder.CreateStore(Builder.getInt32(i), EdgeState);
+ }
+ for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+ // call runtime to perform increment
+ BasicBlock::iterator InsertPt =
+ ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
+ IRBuilder<> Builder(InsertPt);
+ Value *CounterPtrArray =
+ Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+ i * ComplexEdgePreds.size());
+
+ // Build code to increment the counter.
+ InsertIndCounterIncrCode = true;
+ Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+ EdgeState, CounterPtrArray);
+ }
+ }
+ }
+
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Type *Params[] = {
+ PointerType::get(FTy, 0),
+ PointerType::get(FTy, 0)
+ };
+ FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+ // Inialize the environment and register the local writeout and flush
+ // functions.
+ Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+ }
+
+ if (InsertIndCounterIncrCode)
+ insertIndirectCounterIncrement();
+
+ return Result;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+ Function *F,
+ GlobalVariable *Counters,
+ const UniqueVector<BasicBlock *> &Preds,
+ const UniqueVector<BasicBlock *> &Succs) {
+ // TODO: support invoke, threads. We rely on the fact that nothing can modify
+ // the whole-Module pred edge# between the time we set it and the time we next
+ // read it. Threads and invoke make this untrue.
+
+ // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+ size_t TableSize = Succs.size() * Preds.size();
+ Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
+
+ OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]);
+ Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+ for (size_t i = 0; i != TableSize; ++i)
+ EdgeTable[i] = NullValue;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+ for (int i = 0; i != Successors; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ IRBuilder<> Builder(Succ);
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge + i);
+ EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+ (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+ }
+ }
+ Edge += Successors;
+ }
+
+ ArrayRef<Constant*> V(&EdgeTable[0], TableSize);
+ GlobalVariable *EdgeTableGV =
+ new GlobalVariable(
+ *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+ ConstantArray::get(EdgeTableTy, V),
+ "__llvm_gcda_edge_table");
+ EdgeTableGV->setUnnamedAddr(true);
+ return EdgeTableGV;
+}
+
+Constant *GCOVProfiler::getStartFileFunc() {
+ Type *Args[] = {
+ Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+ Type::getInt8PtrTy(*Ctx), // const char version[4]
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+ Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ Type *Int64Ty = Type::getInt64Ty(*Ctx);
+ Type *Args[] = {
+ Int32Ty->getPointerTo(), // uint32_t *predecessor
+ Int64Ty->getPointerTo()->getPointerTo() // uint64_t **counters
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ return M->getOrInsertFunction("__llvm_gcov_indirect_counter_increment", FTy);
+}
+
+Constant *GCOVProfiler::getEmitFunctionFunc() {
+ Type *Args[3] = {
+ Type::getInt32Ty(*Ctx), // uint32_t ident
+ Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+Constant *GCOVProfiler::getEmitArcsFunc() {
+ Type *Args[] = {
+ Type::getInt32Ty(*Ctx), // uint32_t num_counters
+ Type::getInt64PtrTy(*Ctx), // uint64_t *counters
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getEndFileFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+ GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+ if (!GV) {
+ GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::get(Type::getInt32Ty(*Ctx),
+ 0xffffffff),
+ "__llvm_gcov_global_state_pred");
+ GV->setUnnamedAddr(true);
+ }
+ return GV;
+}
+
+Function *GCOVProfiler::insertCounterWriteout(
+ ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
+ FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ if (!WriteoutF)
+ WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_writeout", M);
+ WriteoutF->setUnnamedAddr(true);
+ WriteoutF->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ WriteoutF->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
+ IRBuilder<> Builder(BB);
+
+ Constant *StartFile = getStartFileFunc();
+ Constant *EmitFunction = getEmitFunctionFunc();
+ Constant *EmitArcs = getEmitArcsFunc();
+ Constant *EndFile = getEndFileFunc();
+
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string FilenameGcda = mangleName(CU, "gcda");
+ Builder.CreateCall2(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateGlobalStringPtr(ReversedVersion));
+ for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
+ DISubprogram SP(CountersBySP[j].second);
+ Builder.CreateCall3(
+ EmitFunction, Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
+
+ GlobalVariable *GV = CountersBySP[j].first;
+ unsigned Arcs =
+ cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
+ Builder.CreateCall2(EmitArcs,
+ Builder.getInt32(Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0));
+ }
+ Builder.CreateCall(EndFile);
+ }
+ }
+
+ Builder.CreateRetVoid();
+ return WriteoutF;
+}
+
+void GCOVProfiler::insertIndirectCounterIncrement() {
+ Function *Fn =
+ cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
+ Fn->setUnnamedAddr(true);
+ Fn->setLinkage(GlobalValue::InternalLinkage);
+ Fn->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ Fn->addFnAttr(Attribute::NoRedZone);
+
+ // Create basic blocks for function.
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
+ IRBuilder<> Builder(BB);
+
+ BasicBlock *PredNotNegOne = BasicBlock::Create(*Ctx, "", Fn);
+ BasicBlock *CounterEnd = BasicBlock::Create(*Ctx, "", Fn);
+ BasicBlock *Exit = BasicBlock::Create(*Ctx, "exit", Fn);
+
+ // uint32_t pred = *predecessor;
+ // if (pred == 0xffffffff) return;
+ Argument *Arg = Fn->arg_begin();
+ Arg->setName("predecessor");
+ Value *Pred = Builder.CreateLoad(Arg, "pred");
+ Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
+ BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
+
+ Builder.SetInsertPoint(PredNotNegOne);
+
+ // uint64_t *counter = counters[pred];
+ // if (!counter) return;
+ Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
+ Arg = llvm::next(Fn->arg_begin());
+ Arg->setName("counters");
+ Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+ Value *Counter = Builder.CreateLoad(GEP, "counter");
+ Cond = Builder.CreateICmpEQ(Counter,
+ Constant::getNullValue(
+ Builder.getInt64Ty()->getPointerTo()));
+ Builder.CreateCondBr(Cond, Exit, CounterEnd);
+
+ // ++*counter;
+ Builder.SetInsertPoint(CounterEnd);
+ Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
+ Builder.getInt64(1));
+ Builder.CreateStore(Add, Counter);
+ Builder.CreateBr(Exit);
+
+ // Fill in the exit block.
+ Builder.SetInsertPoint(Exit);
+ Builder.CreateRetVoid();
+}
+
+Function *GCOVProfiler::
+insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
+ if (!FlushF)
+ FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_flush", M);
+ else
+ FlushF->setLinkage(GlobalValue::InternalLinkage);
+ FlushF->setUnnamedAddr(true);
+ FlushF->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ FlushF->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
+
+ // Write out the current counters.
+ Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ assert(WriteoutF && "Need to create the writeout function first!");
+
+ IRBuilder<> Builder(Entry);
+ Builder.CreateCall(WriteoutF);
+
+ // Zero out the counters.
+ for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
+ I = CountersBySP.begin(), E = CountersBySP.end();
+ I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ Constant *Null = Constant::getNullValue(GV->getType()->getElementType());
+ Builder.CreateStore(Null, GV);
+ }
+
+ Type *RetTy = FlushF->getReturnType();
+ if (RetTy == Type::getVoidTy(*Ctx))
+ Builder.CreateRetVoid();
+ else if (RetTy->isIntegerTy())
+ // Used if __llvm_gcov_flush was implicitly declared.
+ Builder.CreateRet(ConstantInt::get(RetTy, 0));
+ else
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 000000000000..8ba102559bb6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,38 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeAddressSanitizerPass(Registry);
+ initializeAddressSanitizerModulePass(Registry);
+ initializeBoundsCheckingPass(Registry);
+ initializeEdgeProfilerPass(Registry);
+ initializeGCOVProfilerPass(Registry);
+ initializeOptimalEdgeProfilerPass(Registry);
+ initializePathProfilerPass(Registry);
+ initializeMemorySanitizerPass(Registry);
+ initializeThreadSanitizerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+ initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 000000000000..363539b2886f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,111 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module provides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/IR/BasicBlock.h"
+#include <algorithm>
+#include <vector>
+
+namespace llvm {
+
+ /// MaximumSpanningTree - A MST implementation.
+ /// The type parameter T determines the type of the nodes of the graph.
+ template <typename T>
+ class MaximumSpanningTree {
+ public:
+ typedef std::pair<const T*, const T*> Edge;
+ typedef std::pair<Edge, double> EdgeWeight;
+ typedef std::vector<EdgeWeight> EdgeWeights;
+ protected:
+ typedef std::vector<Edge> MaxSpanTree;
+
+ MaxSpanTree MST;
+
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+ /// spanning tree.
+ MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
+
+ // Create spanning tree, Forest contains a special data structure
+ // that makes checking if two nodes are already in a common (sub-)tree
+ // fast and cheap.
+ EquivalenceClasses<const T*> Forest;
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ Forest.insert(e.first);
+ Forest.insert(e.second);
+ }
+
+ // Iterate over the sorted edges, biggest first.
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+ Forest.unionSets(e.first, e.second);
+ // So we know now that the edge is not already in a subtree, so we push
+ // the edge to the MST.
+ MST.push_back(e);
+ }
+ }
+ }
+
+ typename MaxSpanTree::iterator begin() {
+ return MST.begin();
+ }
+
+ typename MaxSpanTree::iterator end() {
+ return MST.end();
+ }
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
new file mode 100644
index 000000000000..4e75904ded4f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -0,0 +1,1985 @@
+//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of MemorySanitizer, a detector of uninitialized
+/// reads.
+///
+/// Status: early prototype.
+///
+/// The algorithm of the tool is similar to Memcheck
+/// (http://goo.gl/QKbem). We associate a few shadow bits with every
+/// byte of the application memory, poison the shadow of the malloc-ed
+/// or alloca-ed memory, load the shadow bits on every memory read,
+/// propagate the shadow bits through some of the arithmetic
+/// instruction (including MOV), store the shadow bits on every memory
+/// write, report a bug on some other instructions (e.g. JMP) if the
+/// associated shadow is poisoned.
+///
+/// But there are differences too. The first and the major one:
+/// compiler instrumentation instead of binary instrumentation. This
+/// gives us much better register allocation, possible compiler
+/// optimizations and a fast start-up. But this brings the major issue
+/// as well: msan needs to see all program events, including system
+/// calls and reads/writes in system libraries, so we either need to
+/// compile *everything* with msan or use a binary translation
+/// component (e.g. DynamoRIO) to instrument pre-built libraries.
+/// Another difference from Memcheck is that we use 8 shadow bits per
+/// byte of application memory and use a direct shadow mapping. This
+/// greatly simplifies the instrumentation code and avoids races on
+/// shadow updates (Memcheck is single-threaded so races are not a
+/// concern there. Memcheck uses 2 shadow bits per byte with a slow
+/// path storage that uses 8 bits per byte).
+///
+/// The default value of shadow is 0, which means "clean" (not poisoned).
+///
+/// Every module initializer should call __msan_init to ensure that the
+/// shadow memory is ready. On error, __msan_warning is called. Since
+/// parameters and return values may be passed via registers, we have a
+/// specialized thread-local shadow for return values
+/// (__msan_retval_tls) and parameters (__msan_param_tls).
+///
+/// Origin tracking.
+///
+/// MemorySanitizer can track origins (allocation points) of all uninitialized
+/// values. This behavior is controlled with a flag (msan-track-origins) and is
+/// disabled by default.
+///
+/// Origins are 4-byte values created and interpreted by the runtime library.
+/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
+/// of application memory. Propagation of origins is basically a bunch of
+/// "select" instructions that pick the origin of a dirty argument, if an
+/// instruction has one.
+///
+/// Every 4 aligned, consecutive bytes of application memory have one origin
+/// value associated with them. If these bytes contain uninitialized data
+/// coming from 2 different allocations, the last store wins. Because of this,
+/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
+/// practice.
+///
+/// Origins are meaningless for fully initialized values, so MemorySanitizer
+/// avoids storing origin to memory when a fully initialized value is stored.
+/// This way it avoids needless overwritting origin of the 4-byte region on
+/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msan"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+static const uint64_t kShadowMask32 = 1ULL << 31;
+static const uint64_t kShadowMask64 = 1ULL << 46;
+static const uint64_t kOriginOffset32 = 1ULL << 30;
+static const uint64_t kOriginOffset64 = 1ULL << 45;
+static const unsigned kMinOriginAlignment = 4;
+static const unsigned kShadowTLSAlignment = 8;
+
+/// \brief Track origins of uninitialized values.
+///
+/// Adds a section to MemorySanitizer report that points to the allocation
+/// (stack or heap) the uninitialized bits came from originally.
+static cl::opt<bool> ClTrackOrigins("msan-track-origins",
+ cl::desc("Track origins (allocation sites) of poisoned memory"),
+ cl::Hidden, cl::init(false));
+static cl::opt<bool> ClKeepGoing("msan-keep-going",
+ cl::desc("keep going after reporting a UMR"),
+ cl::Hidden, cl::init(false));
+static cl::opt<bool> ClPoisonStack("msan-poison-stack",
+ cl::desc("poison uninitialized stack variables"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
+ cl::desc("poison uninitialized stack variables with a call"),
+ cl::Hidden, cl::init(false));
+static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
+ cl::desc("poison uninitialized stack variables with the given patter"),
+ cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+ cl::desc("poison undef temps"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
+ cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
+ cl::desc("exact handling of relational integer ICmp"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin",
+ cl::desc("store origin for clean (fully initialized) values"),
+ cl::Hidden, cl::init(false));
+
+// This flag controls whether we check the shadow of the address
+// operand of load or store. Such bugs are very rare, since load from
+// a garbage address typically results in SEGV, but still happen
+// (e.g. only lower bits of address are garbage, or the access happens
+// early at program startup where malloc-ed memory is more likely to
+// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
+static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
+ cl::desc("report accesses through a pointer which has poisoned shadow"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
+ cl::desc("print out instructions with default strict semantics"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
+ cl::desc("File containing the list of functions where MemorySanitizer "
+ "should not report bugs"), cl::Hidden);
+
+namespace {
+
+/// \brief An instrumentation pass implementing detection of uninitialized
+/// reads.
+///
+/// MemorySanitizer: instrument the code in module to find
+/// uninitialized reads.
+class MemorySanitizer : public FunctionPass {
+ public:
+ MemorySanitizer(bool TrackOrigins = false,
+ StringRef BlacklistFile = StringRef())
+ : FunctionPass(ID),
+ TrackOrigins(TrackOrigins || ClTrackOrigins),
+ TD(0),
+ WarningFn(0),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+ : BlacklistFile) { }
+ const char *getPassName() const { return "MemorySanitizer"; }
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ static char ID; // Pass identification, replacement for typeid.
+
+ private:
+ void initializeCallbacks(Module &M);
+
+ /// \brief Track origins (allocation points) of uninitialized values.
+ bool TrackOrigins;
+
+ DataLayout *TD;
+ LLVMContext *C;
+ Type *IntptrTy;
+ Type *OriginTy;
+ /// \brief Thread-local shadow storage for function parameters.
+ GlobalVariable *ParamTLS;
+ /// \brief Thread-local origin storage for function parameters.
+ GlobalVariable *ParamOriginTLS;
+ /// \brief Thread-local shadow storage for function return value.
+ GlobalVariable *RetvalTLS;
+ /// \brief Thread-local origin storage for function return value.
+ GlobalVariable *RetvalOriginTLS;
+ /// \brief Thread-local shadow storage for in-register va_arg function
+ /// parameters (x86_64-specific).
+ GlobalVariable *VAArgTLS;
+ /// \brief Thread-local shadow storage for va_arg overflow area
+ /// (x86_64-specific).
+ GlobalVariable *VAArgOverflowSizeTLS;
+ /// \brief Thread-local space used to pass origin value to the UMR reporting
+ /// function.
+ GlobalVariable *OriginTLS;
+
+ /// \brief The run-time callback to print a warning.
+ Value *WarningFn;
+ /// \brief Run-time helper that copies origin info for a memory range.
+ Value *MsanCopyOriginFn;
+ /// \brief Run-time helper that generates a new origin value for a stack
+ /// allocation.
+ Value *MsanSetAllocaOriginFn;
+ /// \brief Run-time helper that poisons stack on function entry.
+ Value *MsanPoisonStackFn;
+ /// \brief MSan runtime replacements for memmove, memcpy and memset.
+ Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+
+ /// \brief Address mask used in application-to-shadow address calculation.
+ /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask.
+ uint64_t ShadowMask;
+ /// \brief Offset of the origin shadow from the "normal" shadow.
+ /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL
+ uint64_t OriginOffset;
+ /// \brief Branch weights for error reporting.
+ MDNode *ColdCallWeights;
+ /// \brief Branch weights for origin store.
+ MDNode *OriginStoreWeights;
+ /// \bried Path to blacklist file.
+ SmallString<64> BlacklistFile;
+ /// \brief The blacklist.
+ OwningPtr<BlackList> BL;
+ /// \brief An empty volatile inline asm that prevents callback merge.
+ InlineAsm *EmptyAsm;
+
+ friend struct MemorySanitizerVisitor;
+ friend struct VarArgAMD64Helper;
+};
+} // namespace
+
+char MemorySanitizer::ID = 0;
+INITIALIZE_PASS(MemorySanitizer, "msan",
+ "MemorySanitizer: detects uninitialized reads.",
+ false, false)
+
+FunctionPass *llvm::createMemorySanitizerPass(bool TrackOrigins,
+ StringRef BlacklistFile) {
+ return new MemorySanitizer(TrackOrigins, BlacklistFile);
+}
+
+/// \brief Create a non-const global initialized with the given string.
+///
+/// Creates a writable global for Str so that we can pass it to the
+/// run-time lib. Runtime uses first 4 bytes of the string to store the
+/// frame ID, so the string needs to be mutable.
+static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
+ StringRef Str) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+ GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+
+/// \brief Insert extern declaration of runtime-provided functions and globals.
+void MemorySanitizer::initializeCallbacks(Module &M) {
+ // Only do this once.
+ if (WarningFn)
+ return;
+
+ IRBuilder<> IRB(*C);
+ // Create the callback.
+ // FIXME: this function should have "Cold" calling conv,
+ // which is not yet implemented.
+ StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
+ : "__msan_warning_noreturn";
+ WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+
+ MsanCopyOriginFn = M.getOrInsertFunction(
+ "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
+ MsanSetAllocaOriginFn = M.getOrInsertFunction(
+ "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+ IRB.getInt8PtrTy(), NULL);
+ MsanPoisonStackFn = M.getOrInsertFunction(
+ "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
+ MemmoveFn = M.getOrInsertFunction(
+ "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
+ MemcpyFn = M.getOrInsertFunction(
+ "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IntptrTy, NULL);
+ MemsetFn = M.getOrInsertFunction(
+ "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+ IntptrTy, NULL);
+
+ // Create globals.
+ RetvalTLS = new GlobalVariable(
+ M, ArrayType::get(IRB.getInt64Ty(), 8), false,
+ GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
+ GlobalVariable::GeneralDynamicTLSModel);
+ RetvalOriginTLS = new GlobalVariable(
+ M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
+ "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+ ParamTLS = new GlobalVariable(
+ M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+ GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
+ GlobalVariable::GeneralDynamicTLSModel);
+ ParamOriginTLS = new GlobalVariable(
+ M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
+ 0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+ VAArgTLS = new GlobalVariable(
+ M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+ GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
+ GlobalVariable::GeneralDynamicTLSModel);
+ VAArgOverflowSizeTLS = new GlobalVariable(
+ M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
+ "__msan_va_arg_overflow_size_tls", 0,
+ GlobalVariable::GeneralDynamicTLSModel);
+ OriginTLS = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
+ "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+ // We insert an empty inline asm after __msan_report* to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+}
+
+/// \brief Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
+bool MemorySanitizer::doInitialization(Module &M) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+ if (!TD)
+ return false;
+ BL.reset(new BlackList(BlacklistFile));
+ C = &(M.getContext());
+ unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
+ switch (PtrSize) {
+ case 64:
+ ShadowMask = kShadowMask64;
+ OriginOffset = kOriginOffset64;
+ break;
+ case 32:
+ ShadowMask = kShadowMask32;
+ OriginOffset = kOriginOffset32;
+ break;
+ default:
+ report_fatal_error("unsupported pointer size");
+ break;
+ }
+
+ IRBuilder<> IRB(*C);
+ IntptrTy = IRB.getIntPtrTy(TD);
+ OriginTy = IRB.getInt32Ty();
+
+ ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+ OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+
+ // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
+ appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
+ "__msan_init", IRB.getVoidTy(), NULL)), 0);
+
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(TrackOrigins), "__msan_track_origins");
+
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+
+ return true;
+}
+
+namespace {
+
+/// \brief A helper class that handles instrumentation of VarArg
+/// functions on a particular platform.
+///
+/// Implementations are expected to insert the instrumentation
+/// necessary to propagate argument shadow through VarArg function
+/// calls. Visit* methods are called during an InstVisitor pass over
+/// the function, and should avoid creating new basic blocks. A new
+/// instance of this class is created for each instrumented function.
+struct VarArgHelper {
+ /// \brief Visit a CallSite.
+ virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
+
+ /// \brief Visit a va_start call.
+ virtual void visitVAStartInst(VAStartInst &I) = 0;
+
+ /// \brief Visit a va_copy call.
+ virtual void visitVACopyInst(VACopyInst &I) = 0;
+
+ /// \brief Finalize function instrumentation.
+ ///
+ /// This method is called after visiting all interesting (see above)
+ /// instructions in a function.
+ virtual void finalizeInstrumentation() = 0;
+
+ virtual ~VarArgHelper() {}
+};
+
+struct MemorySanitizerVisitor;
+
+VarArgHelper*
+CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+ MemorySanitizerVisitor &Visitor);
+
+/// This class does all the work for a given function. Store and Load
+/// instructions store and load corresponding shadow and origin
+/// values. Most instructions propagate shadow from arguments to their
+/// return values. Certain instructions (most importantly, BranchInst)
+/// test their argument shadow and print reports (with a runtime call) if it's
+/// non-zero.
+struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
+ Function &F;
+ MemorySanitizer &MS;
+ SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
+ ValueMap<Value*, Value*> ShadowMap, OriginMap;
+ bool InsertChecks;
+ bool LoadShadow;
+ OwningPtr<VarArgHelper> VAHelper;
+
+ struct ShadowOriginAndInsertPoint {
+ Instruction *Shadow;
+ Instruction *Origin;
+ Instruction *OrigIns;
+ ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+ : Shadow(S), Origin(O), OrigIns(I) { }
+ ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
+ };
+ SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+ SmallVector<Instruction*, 16> StoreList;
+
+ MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
+ : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+ LoadShadow = InsertChecks =
+ !MS.BL->isIn(F) &&
+ F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::SanitizeMemory);
+
+ DEBUG(if (!InsertChecks)
+ dbgs() << "MemorySanitizer is not inserting checks into '"
+ << F.getName() << "'\n");
+ }
+
+ void materializeStores() {
+ for (size_t i = 0, n = StoreList.size(); i < n; i++) {
+ StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
+
+ IRBuilder<> IRB(&I);
+ Value *Val = I.getValueOperand();
+ Value *Addr = I.getPointerOperand();
+ Value *Shadow = getShadow(Val);
+ Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+ StoreInst *NewSI =
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+ DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
+ (void)NewSI;
+
+ if (ClCheckAccessAddress)
+ insertCheck(Addr, &I);
+
+ if (MS.TrackOrigins) {
+ unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+ if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
+ IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB),
+ Alignment);
+ } else {
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+
+ Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
+ // TODO(eugenis): handle non-zero constant shadow by inserting an
+ // unconditional check (can not simply fail compilation as this could
+ // be in the dead code).
+ if (Cst)
+ continue;
+
+ Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+ getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false,
+ MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ IRBNew.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRBNew),
+ Alignment);
+ }
+ }
+ }
+ }
+
+ void materializeChecks() {
+ for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
+ Instruction *Shadow = InstrumentationList[i].Shadow;
+ Instruction *OrigIns = InstrumentationList[i].OrigIns;
+ IRBuilder<> IRB(OrigIns);
+ DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+ getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
+ /* Unreachable */ !ClKeepGoing,
+ MS.ColdCallWeights);
+
+ IRB.SetInsertPoint(CheckTerm);
+ if (MS.TrackOrigins) {
+ Instruction *Origin = InstrumentationList[i].Origin;
+ IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
+ MS.OriginTLS);
+ }
+ CallInst *Call = IRB.CreateCall(MS.WarningFn);
+ Call->setDebugLoc(OrigIns->getDebugLoc());
+ IRB.CreateCall(MS.EmptyAsm);
+ DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
+ }
+ DEBUG(dbgs() << "DONE:\n" << F);
+ }
+
+ /// \brief Add MemorySanitizer instrumentation to a function.
+ bool runOnFunction() {
+ MS.initializeCallbacks(*F.getParent());
+ if (!MS.TD) return false;
+
+ // In the presence of unreachable blocks, we may see Phi nodes with
+ // incoming nodes from such blocks. Since InstVisitor skips unreachable
+ // blocks, such nodes will not have any shadow value associated with them.
+ // It's easier to remove unreachable blocks than deal with missing shadow.
+ removeUnreachableBlocks(F);
+
+ // Iterate all BBs in depth-first order and create shadow instructions
+ // for all instructions (where applicable).
+ // For PHI nodes we create dummy shadow PHIs which will be finalized later.
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock *BB = *DI;
+ visit(*BB);
+ }
+
+ // Finalize PHI nodes.
+ for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
+ PHINode *PN = ShadowPHINodes[i];
+ PHINode *PNS = cast<PHINode>(getShadow(PN));
+ PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
+ size_t NumValues = PN->getNumIncomingValues();
+ for (size_t v = 0; v < NumValues; v++) {
+ PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
+ if (PNO)
+ PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+ }
+ }
+
+ VAHelper->finalizeInstrumentation();
+
+ // Delayed instrumentation of StoreInst.
+ // This may add new checks to be inserted later.
+ materializeStores();
+
+ // Insert shadow value checks.
+ materializeChecks();
+
+ return true;
+ }
+
+ /// \brief Compute the shadow type that corresponds to a given Value.
+ Type *getShadowTy(Value *V) {
+ return getShadowTy(V->getType());
+ }
+
+ /// \brief Compute the shadow type that corresponds to a given Type.
+ Type *getShadowTy(Type *OrigTy) {
+ if (!OrigTy->isSized()) {
+ return 0;
+ }
+ // For integer type, shadow is the same as the original type.
+ // This may return weird-sized types like i1.
+ if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
+ return IT;
+ if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
+ uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType());
+ return VectorType::get(IntegerType::get(*MS.C, EltSize),
+ VT->getNumElements());
+ }
+ if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+ SmallVector<Type*, 4> Elements;
+ for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+ Elements.push_back(getShadowTy(ST->getElementType(i)));
+ StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
+ DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
+ return Res;
+ }
+ uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy);
+ return IntegerType::get(*MS.C, TypeSize);
+ }
+
+ /// \brief Flatten a vector type.
+ Type *getShadowTyNoVec(Type *ty) {
+ if (VectorType *vt = dyn_cast<VectorType>(ty))
+ return IntegerType::get(*MS.C, vt->getBitWidth());
+ return ty;
+ }
+
+ /// \brief Convert a shadow value to it's flattened variant.
+ Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
+ Type *Ty = V->getType();
+ Type *NoVecTy = getShadowTyNoVec(Ty);
+ if (Ty == NoVecTy) return V;
+ return IRB.CreateBitCast(V, NoVecTy);
+ }
+
+ /// \brief Compute the shadow address that corresponds to a given application
+ /// address.
+ ///
+ /// Shadow = Addr & ~ShadowMask.
+ Value *getShadowPtr(Value *Addr, Type *ShadowTy,
+ IRBuilder<> &IRB) {
+ Value *ShadowLong =
+ IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+ return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
+ }
+
+ /// \brief Compute the origin address that corresponds to a given application
+ /// address.
+ ///
+ /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL
+ Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) {
+ Value *ShadowLong =
+ IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+ Value *Add =
+ IRB.CreateAdd(ShadowLong,
+ ConstantInt::get(MS.IntptrTy, MS.OriginOffset));
+ Value *SecondAnd =
+ IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL));
+ return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0));
+ }
+
+ /// \brief Compute the shadow address for a given function argument.
+ ///
+ /// Shadow = ParamTLS+ArgOffset.
+ Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+ "_msarg");
+ }
+
+ /// \brief Compute the origin address for a given function argument.
+ Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
+ int ArgOffset) {
+ if (!MS.TrackOrigins) return 0;
+ Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+ "_msarg_o");
+ }
+
+ /// \brief Compute the shadow address for a retval.
+ Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
+ Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy);
+ return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+ "_msret");
+ }
+
+ /// \brief Compute the origin address for a retval.
+ Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+ // We keep a single origin for the entire retval. Might be too optimistic.
+ return MS.RetvalOriginTLS;
+ }
+
+ /// \brief Set SV to be the shadow value for V.
+ void setShadow(Value *V, Value *SV) {
+ assert(!ShadowMap.count(V) && "Values may only have one shadow");
+ ShadowMap[V] = SV;
+ }
+
+ /// \brief Set Origin to be the origin value for V.
+ void setOrigin(Value *V, Value *Origin) {
+ if (!MS.TrackOrigins) return;
+ assert(!OriginMap.count(V) && "Values may only have one origin");
+ DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n");
+ OriginMap[V] = Origin;
+ }
+
+ /// \brief Create a clean shadow value for a given value.
+ ///
+ /// Clean shadow (all zeroes) means all bits of the value are defined
+ /// (initialized).
+ Constant *getCleanShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return 0;
+ return Constant::getNullValue(ShadowTy);
+ }
+
+ /// \brief Create a dirty shadow of a given shadow type.
+ Constant *getPoisonedShadow(Type *ShadowTy) {
+ assert(ShadowTy);
+ if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
+ return Constant::getAllOnesValue(ShadowTy);
+ StructType *ST = cast<StructType>(ShadowTy);
+ SmallVector<Constant *, 4> Vals;
+ for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+ Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+ return ConstantStruct::get(ST, Vals);
+ }
+
+ /// \brief Create a dirty shadow for a given value.
+ Constant *getPoisonedShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return 0;
+ return getPoisonedShadow(ShadowTy);
+ }
+
+ /// \brief Create a clean (zero) origin.
+ Value *getCleanOrigin() {
+ return Constant::getNullValue(MS.OriginTy);
+ }
+
+ /// \brief Get the shadow value for a given Value.
+ ///
+ /// This function either returns the value set earlier with setShadow,
+ /// or extracts if from ParamTLS (for function arguments).
+ Value *getShadow(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // For instructions the shadow is already stored in the map.
+ Value *Shadow = ShadowMap[V];
+ if (!Shadow) {
+ DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
+ (void)I;
+ assert(Shadow && "No shadow for a value");
+ }
+ return Shadow;
+ }
+ if (UndefValue *U = dyn_cast<UndefValue>(V)) {
+ Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
+ DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
+ (void)U;
+ return AllOnes;
+ }
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ // For arguments we compute the shadow on demand and store it in the map.
+ Value **ShadowPtr = &ShadowMap[V];
+ if (*ShadowPtr)
+ return *ShadowPtr;
+ Function *F = A->getParent();
+ IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
+ unsigned ArgOffset = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ if (!AI->getType()->isSized()) {
+ DEBUG(dbgs() << "Arg is not sized\n");
+ continue;
+ }
+ unsigned Size = AI->hasByValAttr()
+ ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
+ : MS.TD->getTypeAllocSize(AI->getType());
+ if (A == AI) {
+ Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
+ if (AI->hasByValAttr()) {
+ // ByVal pointer itself has clean shadow. We copy the actual
+ // argument shadow to the underlying memory.
+ Value *Cpy = EntryIRB.CreateMemCpy(
+ getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
+ Base, Size, AI->getParamAlignment());
+ DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
+ (void)Cpy;
+ *ShadowPtr = getCleanShadow(V);
+ } else {
+ *ShadowPtr = EntryIRB.CreateLoad(Base);
+ }
+ DEBUG(dbgs() << " ARG: " << *AI << " ==> " <<
+ **ShadowPtr << "\n");
+ if (MS.TrackOrigins) {
+ Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
+ setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+ }
+ }
+ ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+ }
+ assert(*ShadowPtr && "Could not find shadow for an argument");
+ return *ShadowPtr;
+ }
+ // For everything else the shadow is zero.
+ return getCleanShadow(V);
+ }
+
+ /// \brief Get the shadow for i-th argument of the instruction I.
+ Value *getShadow(Instruction *I, int i) {
+ return getShadow(I->getOperand(i));
+ }
+
+ /// \brief Get the origin for a value.
+ Value *getOrigin(Value *V) {
+ if (!MS.TrackOrigins) return 0;
+ if (isa<Instruction>(V) || isa<Argument>(V)) {
+ Value *Origin = OriginMap[V];
+ if (!Origin) {
+ DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n");
+ Origin = getCleanOrigin();
+ }
+ return Origin;
+ }
+ return getCleanOrigin();
+ }
+
+ /// \brief Get the origin for i-th argument of the instruction I.
+ Value *getOrigin(Instruction *I, int i) {
+ return getOrigin(I->getOperand(i));
+ }
+
+ /// \brief Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the value is not fully defined.
+ void insertCheck(Value *Val, Instruction *OrigIns) {
+ assert(Val);
+ if (!InsertChecks) return;
+ Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+#ifndef NDEBUG
+ Type *ShadowTy = Shadow->getType();
+ assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
+ "Can only insert checks for integer and vector shadow types");
+#endif
+ Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ InstrumentationList.push_back(
+ ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ }
+
+ // ------------------- Visitors.
+
+ /// \brief Instrument LoadInst
+ ///
+ /// Loads the corresponding shadow and (optionally) origin.
+ /// Optionally, checks that the load address is fully defined.
+ void visitLoadInst(LoadInst &I) {
+ assert(I.getType()->isSized() && "Load type must have size");
+ IRBuilder<> IRB(&I);
+ Type *ShadowTy = getShadowTy(&I);
+ Value *Addr = I.getPointerOperand();
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ if (ClCheckAccessAddress)
+ insertCheck(I.getPointerOperand(), &I);
+
+ if (MS.TrackOrigins) {
+ if (LoadShadow) {
+ unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+ setOrigin(&I,
+ IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+ } else {
+ setOrigin(&I, getCleanOrigin());
+ }
+ }
+ }
+
+ /// \brief Instrument StoreInst
+ ///
+ /// Stores the corresponding shadow and (optionally) origin.
+ /// Optionally, checks that the store address is fully defined.
+ void visitStoreInst(StoreInst &I) {
+ StoreList.push_back(&I);
+ }
+
+ // Vector manipulation.
+ void visitExtractElementInst(ExtractElementInst &I) {
+ insertCheck(I.getOperand(1), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
+ "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitInsertElementInst(InsertElementInst &I) {
+ insertCheck(I.getOperand(2), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
+ I.getOperand(2), "_msprop"));
+ setOriginForNaryOp(I);
+ }
+
+ void visitShuffleVectorInst(ShuffleVectorInst &I) {
+ insertCheck(I.getOperand(2), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
+ I.getOperand(2), "_msprop"));
+ setOriginForNaryOp(I);
+ }
+
+ // Casts.
+ void visitSExtInst(SExtInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitZExtInst(ZExtInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitTruncInst(TruncInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitBitCastInst(BitCastInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitPtrToIntInst(PtrToIntInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+ "_msprop_ptrtoint"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitIntToPtrInst(IntToPtrInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+ "_msprop_inttoptr"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
+ void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
+ void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
+
+ /// \brief Propagate shadow for bitwise AND.
+ ///
+ /// This code is exact, i.e. if, for example, a bit in the left argument
+ /// is defined and 0, then neither the value not definedness of the
+ /// corresponding bit in B don't affect the resulting shadow.
+ void visitAnd(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // "And" of 0 and a poisoned value results in unpoisoned value.
+ // 1&1 => 1; 0&1 => 0; p&1 => p;
+ // 1&0 => 0; 0&0 => 0; p&0 => 0;
+ // 1&p => p; 0&p => 0; p&p => p;
+ // S = (S1 & S2) | (V1 & S2) | (S1 & V2)
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *V1 = I.getOperand(0);
+ Value *V2 = I.getOperand(1);
+ if (V1->getType() != S1->getType()) {
+ V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+ V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+ }
+ Value *S1S2 = IRB.CreateAnd(S1, S2);
+ Value *V1S2 = IRB.CreateAnd(V1, S2);
+ Value *S1V2 = IRB.CreateAnd(S1, V2);
+ setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setOriginForNaryOp(I);
+ }
+
+ void visitOr(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // "Or" of 1 and a poisoned value results in unpoisoned value.
+ // 1|1 => 1; 0|1 => 1; p|1 => 1;
+ // 1|0 => 1; 0|0 => 0; p|0 => p;
+ // 1|p => 1; 0|p => p; p|p => p;
+ // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *V1 = IRB.CreateNot(I.getOperand(0));
+ Value *V2 = IRB.CreateNot(I.getOperand(1));
+ if (V1->getType() != S1->getType()) {
+ V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+ V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+ }
+ Value *S1S2 = IRB.CreateAnd(S1, S2);
+ Value *V1S2 = IRB.CreateAnd(V1, S2);
+ Value *S1V2 = IRB.CreateAnd(S1, V2);
+ setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setOriginForNaryOp(I);
+ }
+
+ /// \brief Default propagation of shadow and/or origin.
+ ///
+ /// This class implements the general case of shadow propagation, used in all
+ /// cases where we don't know and/or don't care about what the operation
+ /// actually does. It converts all input shadow values to a common type
+ /// (extending or truncating as necessary), and bitwise OR's them.
+ ///
+ /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+ /// fully initialized), and less prone to false positives.
+ ///
+ /// This class also implements the general case of origin propagation. For a
+ /// Nary operation, result origin is set to the origin of an argument that is
+ /// not entirely initialized. If there is more than one such arguments, the
+ /// rightmost of them is picked. It does not matter which one is picked if all
+ /// arguments are initialized.
+ template <bool CombineShadow>
+ class Combiner {
+ Value *Shadow;
+ Value *Origin;
+ IRBuilder<> &IRB;
+ MemorySanitizerVisitor *MSV;
+
+ public:
+ Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) :
+ Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {}
+
+ /// \brief Add a pair of shadow and origin values to the mix.
+ Combiner &Add(Value *OpShadow, Value *OpOrigin) {
+ if (CombineShadow) {
+ assert(OpShadow);
+ if (!Shadow)
+ Shadow = OpShadow;
+ else {
+ OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
+ Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
+ }
+ }
+
+ if (MSV->MS.TrackOrigins) {
+ assert(OpOrigin);
+ if (!Origin) {
+ Origin = OpOrigin;
+ } else {
+ Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+ Value *Cond = IRB.CreateICmpNE(FlatShadow,
+ MSV->getCleanShadow(FlatShadow));
+ Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+ }
+ }
+ return *this;
+ }
+
+ /// \brief Add an application value to the mix.
+ Combiner &Add(Value *V) {
+ Value *OpShadow = MSV->getShadow(V);
+ Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0;
+ return Add(OpShadow, OpOrigin);
+ }
+
+ /// \brief Set the current combined values as the given instruction's shadow
+ /// and origin.
+ void Done(Instruction *I) {
+ if (CombineShadow) {
+ assert(Shadow);
+ Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
+ MSV->setShadow(I, Shadow);
+ }
+ if (MSV->MS.TrackOrigins) {
+ assert(Origin);
+ MSV->setOrigin(I, Origin);
+ }
+ }
+ };
+
+ typedef Combiner<true> ShadowAndOriginCombiner;
+ typedef Combiner<false> OriginCombiner;
+
+ /// \brief Propagate origin for arbitrary operation.
+ void setOriginForNaryOp(Instruction &I) {
+ if (!MS.TrackOrigins) return;
+ IRBuilder<> IRB(&I);
+ OriginCombiner OC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ OC.Add(OI->get());
+ OC.Done(&I);
+ }
+
+ size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
+ assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
+ "Vector of pointers is not a valid shadow type");
+ return Ty->isVectorTy() ?
+ Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
+ Ty->getPrimitiveSizeInBits();
+ }
+
+ /// \brief Cast between two shadow types, extending or truncating as
+ /// necessary.
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+ Type *srcTy = V->getType();
+ if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
+ return IRB.CreateIntCast(V, dstTy, false);
+ if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
+ dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
+ return IRB.CreateIntCast(V, dstTy, false);
+ size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
+ size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
+ Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
+ Value *V2 =
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+ return IRB.CreateBitCast(V2, dstTy);
+ // TODO: handle struct types.
+ }
+
+ /// \brief Propagate shadow for arbitrary operation.
+ void handleShadowOr(Instruction &I) {
+ IRBuilder<> IRB(&I);
+ ShadowAndOriginCombiner SC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ SC.Add(OI->get());
+ SC.Done(&I);
+ }
+
+ void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
+ void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitXor(BinaryOperator &I) { handleShadowOr(I); }
+ void visitMul(BinaryOperator &I) { handleShadowOr(I); }
+
+ void handleDiv(Instruction &I) {
+ IRBuilder<> IRB(&I);
+ // Strict on the second argument.
+ insertCheck(I.getOperand(1), &I);
+ setShadow(&I, getShadow(&I, 0));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitUDiv(BinaryOperator &I) { handleDiv(I); }
+ void visitSDiv(BinaryOperator &I) { handleDiv(I); }
+ void visitFDiv(BinaryOperator &I) { handleDiv(I); }
+ void visitURem(BinaryOperator &I) { handleDiv(I); }
+ void visitSRem(BinaryOperator &I) { handleDiv(I); }
+ void visitFRem(BinaryOperator &I) { handleDiv(I); }
+
+ /// \brief Instrument == and != comparisons.
+ ///
+ /// Sometimes the comparison result is known even if some of the bits of the
+ /// arguments are not.
+ void handleEqualityComparison(ICmpInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *Sa = getShadow(A);
+ Value *Sb = getShadow(B);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
+ // A == B <==> (C = A^B) == 0
+ // A != B <==> (C = A^B) != 0
+ // Sc = Sa | Sb
+ Value *C = IRB.CreateXor(A, B);
+ Value *Sc = IRB.CreateOr(Sa, Sb);
+ // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
+ // Result is defined if one of the following is true
+ // * there is a defined 1 bit in C
+ // * C is fully defined
+ // Si = !(C & ~Sc) && Sc
+ Value *Zero = Constant::getNullValue(Sc->getType());
+ Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
+ Value *Si =
+ IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
+ IRB.CreateICmpEQ(
+ IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
+ Si->setName("_msprop_icmp");
+ setShadow(&I, Si);
+ setOriginForNaryOp(I);
+ }
+
+ /// \brief Build the lowest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Maximise the undefined shadow bit, minimize other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
+ } else {
+ // Minimize undefined bits.
+ return IRB.CreateAnd(A, IRB.CreateNot(Sa));
+ }
+ }
+
+ /// \brief Build the highest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Minimise the undefined shadow bit, maximise other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
+ } else {
+ // Maximize undefined bits.
+ return IRB.CreateOr(A, Sa);
+ }
+ }
+
+ /// \brief Instrument relational comparisons.
+ ///
+ /// This function does exact shadow propagation for all relational
+ /// comparisons of integers, pointers and vectors of those.
+ /// FIXME: output seems suboptimal when one of the operands is a constant
+ void handleRelationalComparisonExact(ICmpInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *Sa = getShadow(A);
+ Value *Sb = getShadow(B);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
+ // Let [a0, a1] be the interval of possible values of A, taking into account
+ // its undefined bits. Let [b0, b1] be the interval of possible values of B.
+ // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
+ bool IsSigned = I.isSigned();
+ Value *S1 = IRB.CreateICmp(I.getPredicate(),
+ getLowestPossibleValue(IRB, A, Sa, IsSigned),
+ getHighestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *S2 = IRB.CreateICmp(I.getPredicate(),
+ getHighestPossibleValue(IRB, A, Sa, IsSigned),
+ getLowestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *Si = IRB.CreateXor(S1, S2);
+ setShadow(&I, Si);
+ setOriginForNaryOp(I);
+ }
+
+ /// \brief Instrument signed relational comparisons.
+ ///
+ /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
+ /// propagating the highest bit of the shadow. Everything else is delegated
+ /// to handleShadowOr().
+ void handleSignedRelationalComparison(ICmpInst &I) {
+ Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+ Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+ Value* op = NULL;
+ CmpInst::Predicate pre = I.getPredicate();
+ if (constOp0 && constOp0->isNullValue() &&
+ (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
+ op = I.getOperand(1);
+ } else if (constOp1 && constOp1->isNullValue() &&
+ (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+ op = I.getOperand(0);
+ }
+ if (op) {
+ IRBuilder<> IRB(&I);
+ Value* Shadow =
+ IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+ setShadow(&I, Shadow);
+ setOrigin(&I, getOrigin(op));
+ } else {
+ handleShadowOr(I);
+ }
+ }
+
+ void visitICmpInst(ICmpInst &I) {
+ if (!ClHandleICmp) {
+ handleShadowOr(I);
+ return;
+ }
+ if (I.isEquality()) {
+ handleEqualityComparison(I);
+ return;
+ }
+
+ assert(I.isRelational());
+ if (ClHandleICmpExact) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+ if (I.isSigned()) {
+ handleSignedRelationalComparison(I);
+ return;
+ }
+
+ assert(I.isUnsigned());
+ if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+
+ handleShadowOr(I);
+ }
+
+ void visitFCmpInst(FCmpInst &I) {
+ handleShadowOr(I);
+ }
+
+ void handleShift(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // If any of the S2 bits are poisoned, the whole thing is poisoned.
+ // Otherwise perform the same shift on S1.
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
+ S2->getType());
+ Value *V2 = I.getOperand(1);
+ Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
+ setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+ setOriginForNaryOp(I);
+ }
+
+ void visitShl(BinaryOperator &I) { handleShift(I); }
+ void visitAShr(BinaryOperator &I) { handleShift(I); }
+ void visitLShr(BinaryOperator &I) { handleShift(I); }
+
+ /// \brief Instrument llvm.memmove
+ ///
+ /// At this point we don't know if llvm.memmove will be inlined or not.
+ /// If we don't instrument it and it gets inlined,
+ /// our interceptor will not kick in and we will lose the memmove.
+ /// If we instrument the call here, but it does not get inlined,
+ /// we will memove the shadow twice: which is bad in case
+ /// of overlapping regions. So, we simply lower the intrinsic to a call.
+ ///
+ /// Similar situation exists for memcpy and memset.
+ void visitMemMoveInst(MemMoveInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall3(
+ MS.MemmoveFn,
+ IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ I.eraseFromParent();
+ }
+
+ // Similar to memmove: avoid copying shadow twice.
+ // This is somewhat unfortunate as it may slowdown small constant memcpys.
+ // FIXME: consider doing manual inline for small constant sizes and proper
+ // alignment.
+ void visitMemCpyInst(MemCpyInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall3(
+ MS.MemcpyFn,
+ IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ I.eraseFromParent();
+ }
+
+ // Same as memcpy.
+ void visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall3(
+ MS.MemsetFn,
+ IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ I.eraseFromParent();
+ }
+
+ void visitVAStartInst(VAStartInst &I) {
+ VAHelper->visitVAStartInst(I);
+ }
+
+ void visitVACopyInst(VACopyInst &I) {
+ VAHelper->visitVACopyInst(I);
+ }
+
+ enum IntrinsicKind {
+ IK_DoesNotAccessMemory,
+ IK_OnlyReadsMemory,
+ IK_WritesMemory
+ };
+
+ static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
+ const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
+ const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
+ const int OnlyReadsMemory = IK_OnlyReadsMemory;
+ const int OnlyAccessesArgumentPointees = IK_WritesMemory;
+ const int UnknownModRefBehavior = IK_WritesMemory;
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#define ModRefBehavior IntrinsicKind
+#include "llvm/IR/Intrinsics.gen"
+#undef ModRefBehavior
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+
+ /// \brief Handle vector store-like intrinsics.
+ ///
+ /// Instrument intrinsics that look like a simple SIMD store: writes memory,
+ /// has 1 pointer argument and 1 vector argument, returns void.
+ bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value* Addr = I.getArgOperand(0);
+ Value *Shadow = getShadow(&I, 1);
+ Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+ // We don't know the pointer alignment (could be unaligned SSE store!).
+ // Have to assume to worst case.
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
+
+ if (ClCheckAccessAddress)
+ insertCheck(Addr, &I);
+
+ // FIXME: use ClStoreCleanOrigin
+ // FIXME: factor out common code from materializeStores
+ if (MS.TrackOrigins)
+ IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB));
+ return true;
+ }
+
+ /// \brief Handle vector load-like intrinsics.
+ ///
+ /// Instrument intrinsics that look like a simple SIMD load: reads memory,
+ /// has 1 pointer argument, returns a vector.
+ bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getArgOperand(0);
+
+ Type *ShadowTy = getShadowTy(&I);
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ // We don't know the pointer alignment (could be unaligned SSE load!).
+ // Have to assume to worst case.
+ setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+
+ if (ClCheckAccessAddress)
+ insertCheck(Addr, &I);
+
+ if (MS.TrackOrigins) {
+ if (LoadShadow)
+ setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+ else
+ setOrigin(&I, getCleanOrigin());
+ }
+ return true;
+ }
+
+ /// \brief Handle (SIMD arithmetic)-like intrinsics.
+ ///
+ /// Instrument intrinsics with any number of arguments of the same type,
+ /// equal to the return type. The type should be simple (no aggregates or
+ /// pointers; vectors are fine).
+ /// Caller guarantees that this intrinsic does not access memory.
+ bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+ Type *RetTy = I.getType();
+ if (!(RetTy->isIntOrIntVectorTy() ||
+ RetTy->isFPOrFPVectorTy() ||
+ RetTy->isX86_MMXTy()))
+ return false;
+
+ unsigned NumArgOperands = I.getNumArgOperands();
+
+ for (unsigned i = 0; i < NumArgOperands; ++i) {
+ Type *Ty = I.getArgOperand(i)->getType();
+ if (Ty != RetTy)
+ return false;
+ }
+
+ IRBuilder<> IRB(&I);
+ ShadowAndOriginCombiner SC(this, IRB);
+ for (unsigned i = 0; i < NumArgOperands; ++i)
+ SC.Add(I.getArgOperand(i));
+ SC.Done(&I);
+
+ return true;
+ }
+
+ /// \brief Heuristically instrument unknown intrinsics.
+ ///
+ /// The main purpose of this code is to do something reasonable with all
+ /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
+ /// We recognize several classes of intrinsics by their argument types and
+ /// ModRefBehaviour and apply special intrumentation when we are reasonably
+ /// sure that we know what the intrinsic does.
+ ///
+ /// We special-case intrinsics where this approach fails. See llvm.bswap
+ /// handling as an example of that.
+ bool handleUnknownIntrinsic(IntrinsicInst &I) {
+ unsigned NumArgOperands = I.getNumArgOperands();
+ if (NumArgOperands == 0)
+ return false;
+
+ Intrinsic::ID iid = I.getIntrinsicID();
+ IntrinsicKind IK = getIntrinsicKind(iid);
+ bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
+ bool WritesMemory = IK == IK_WritesMemory;
+ assert(!(OnlyReadsMemory && WritesMemory));
+
+ if (NumArgOperands == 2 &&
+ I.getArgOperand(0)->getType()->isPointerTy() &&
+ I.getArgOperand(1)->getType()->isVectorTy() &&
+ I.getType()->isVoidTy() &&
+ WritesMemory) {
+ // This looks like a vector store.
+ return handleVectorStoreIntrinsic(I);
+ }
+
+ if (NumArgOperands == 1 &&
+ I.getArgOperand(0)->getType()->isPointerTy() &&
+ I.getType()->isVectorTy() &&
+ OnlyReadsMemory) {
+ // This looks like a vector load.
+ return handleVectorLoadIntrinsic(I);
+ }
+
+ if (!OnlyReadsMemory && !WritesMemory)
+ if (maybeHandleSimpleNomemIntrinsic(I))
+ return true;
+
+ // FIXME: detect and handle SSE maskstore/maskload
+ return false;
+ }
+
+ void handleBswap(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Op = I.getArgOperand(0);
+ Type *OpType = Op->getType();
+ Function *BswapFunc = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1));
+ setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
+ setOrigin(&I, getOrigin(Op));
+ }
+
+ void visitIntrinsicInst(IntrinsicInst &I) {
+ switch (I.getIntrinsicID()) {
+ case llvm::Intrinsic::bswap:
+ handleBswap(I);
+ break;
+ default:
+ if (!handleUnknownIntrinsic(I))
+ visitInstruction(I);
+ break;
+ }
+ }
+
+ void visitCallSite(CallSite CS) {
+ Instruction &I = *CS.getInstruction();
+ assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+ if (CS.isCall()) {
+ CallInst *Call = cast<CallInst>(&I);
+
+ // For inline asm, do the usual thing: check argument shadow and mark all
+ // outputs as clean. Note that any side effects of the inline asm that are
+ // not immediately visible in its constraints are not handled.
+ if (Call->isInlineAsm()) {
+ visitInstruction(I);
+ return;
+ }
+
+ // Allow only tail calls with the same types, otherwise
+ // we may have a false positive: shadow for a non-void RetVal
+ // will get propagated to a void RetVal.
+ if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
+ Call->setTailCall(false);
+
+ assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
+
+ // We are going to insert code that relies on the fact that the callee
+ // will become a non-readonly function after it is instrumented by us. To
+ // prevent this code from being optimized out, mark that function
+ // non-readonly in advance.
+ if (Function *Func = Call->getCalledFunction()) {
+ // Clear out readonly/readnone attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+ Func->removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(Func->getContext(),
+ AttributeSet::FunctionIndex,
+ B));
+ }
+ }
+ IRBuilder<> IRB(&I);
+ unsigned ArgOffset = 0;
+ DEBUG(dbgs() << " CallSite: " << I << "\n");
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ unsigned i = ArgIt - CS.arg_begin();
+ if (!A->getType()->isSized()) {
+ DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
+ continue;
+ }
+ unsigned Size = 0;
+ Value *Store = 0;
+ // Compute the Shadow for arg even if it is ByVal, because
+ // in that case getShadow() will copy the actual arg shadow to
+ // __msan_param_tls.
+ Value *ArgShadow = getShadow(A);
+ Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+ DEBUG(dbgs() << " Arg#" << i << ": " << *A <<
+ " Shadow: " << *ArgShadow << "\n");
+ if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
+ assert(A->getType()->isPointerTy() &&
+ "ByVal argument is not a pointer!");
+ Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
+ unsigned Alignment = CS.getParamAlignment(i + 1);
+ Store = IRB.CreateMemCpy(ArgShadowBase,
+ getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
+ Size, Alignment);
+ } else {
+ Size = MS.TD->getTypeAllocSize(A->getType());
+ Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+ kShadowTLSAlignment);
+ }
+ if (MS.TrackOrigins)
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ (void)Store;
+ assert(Size != 0 && Store != 0);
+ DEBUG(dbgs() << " Param:" << *Store << "\n");
+ ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+ }
+ DEBUG(dbgs() << " done with call args\n");
+
+ FunctionType *FT =
+ cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+ if (FT->isVarArg()) {
+ VAHelper->visitCallSite(CS, IRB);
+ }
+
+ // Now, get the shadow for the RetVal.
+ if (!I.getType()->isSized()) return;
+ IRBuilder<> IRBBefore(&I);
+ // Untill we have full dynamic coverage, make sure the retval shadow is 0.
+ Value *Base = getShadowPtrForRetval(&I, IRBBefore);
+ IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
+ Instruction *NextInsn = 0;
+ if (CS.isCall()) {
+ NextInsn = I.getNextNode();
+ } else {
+ BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
+ if (!NormalDest->getSinglePredecessor()) {
+ // FIXME: this case is tricky, so we are just conservative here.
+ // Perhaps we need to split the edge between this BB and NormalDest,
+ // but a naive attempt to use SplitEdge leads to a crash.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ return;
+ }
+ NextInsn = NormalDest->getFirstInsertionPt();
+ assert(NextInsn &&
+ "Could not find insertion point for retval shadow load");
+ }
+ IRBuilder<> IRBAfter(NextInsn);
+ Value *RetvalShadow =
+ IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
+ kShadowTLSAlignment, "_msret");
+ setShadow(&I, RetvalShadow);
+ if (MS.TrackOrigins)
+ setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+ }
+
+ void visitReturnInst(ReturnInst &I) {
+ IRBuilder<> IRB(&I);
+ if (Value *RetVal = I.getReturnValue()) {
+ // Set the shadow for the RetVal.
+ Value *Shadow = getShadow(RetVal);
+ Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ if (MS.TrackOrigins)
+ IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+ }
+ }
+
+ void visitPHINode(PHINode &I) {
+ IRBuilder<> IRB(&I);
+ ShadowPHINodes.push_back(&I);
+ setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
+ "_msphi_s"));
+ if (MS.TrackOrigins)
+ setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
+ "_msphi_o"));
+ }
+
+ void visitAllocaInst(AllocaInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ if (!ClPoisonStack) return;
+ IRBuilder<> IRB(I.getNextNode());
+ uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
+ if (ClPoisonStackWithCall) {
+ IRB.CreateCall2(MS.MsanPoisonStackFn,
+ IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+ ConstantInt::get(MS.IntptrTy, Size));
+ } else {
+ Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
+ IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
+ Size, I.getAlignment());
+ }
+
+ if (MS.TrackOrigins) {
+ setOrigin(&I, getCleanOrigin());
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ // We create a string with a description of the stack allocation and
+ // pass it into __msan_set_alloca_origin.
+ // It will be printed by the run-time if stack-originated UMR is found.
+ // The first 4 bytes of the string are set to '----' and will be replaced
+ // by __msan_va_arg_overflow_size_tls at the first call.
+ StackDescription << "----" << I.getName() << "@" << F.getName();
+ Value *Descr =
+ createPrivateNonConstGlobalForString(*F.getParent(),
+ StackDescription.str());
+ IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+ IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+ ConstantInt::get(MS.IntptrTy, Size),
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+ }
+ }
+
+ void visitSelectInst(SelectInst& I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateSelect(I.getCondition(),
+ getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
+ "_msprop"));
+ if (MS.TrackOrigins) {
+ // Origins are always i32, so any vector conditions must be flattened.
+ // FIXME: consider tracking vector origins for app vectors?
+ Value *Cond = I.getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Value *ConvertedShadow = convertToShadowTyNoVec(Cond, IRB);
+ Cond = IRB.CreateICmpNE(ConvertedShadow,
+ getCleanShadow(ConvertedShadow), "_mso_select");
+ }
+ setOrigin(&I, IRB.CreateSelect(Cond,
+ getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
+ }
+ }
+
+ void visitLandingPadInst(LandingPadInst &I) {
+ // Do nothing.
+ // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &I) {
+ handleShadowOr(I);
+ }
+
+ void visitExtractValueInst(ExtractValueInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Agg = I.getAggregateOperand();
+ DEBUG(dbgs() << "ExtractValue: " << I << "\n");
+ Value *AggShadow = getShadow(Agg);
+ DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
+ Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+ DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
+ setShadow(&I, ResShadow);
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitInsertValueInst(InsertValueInst &I) {
+ IRBuilder<> IRB(&I);
+ DEBUG(dbgs() << "InsertValue: " << I << "\n");
+ Value *AggShadow = getShadow(I.getAggregateOperand());
+ Value *InsShadow = getShadow(I.getInsertedValueOperand());
+ DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
+ DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n");
+ Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+ DEBUG(dbgs() << " Res: " << *Res << "\n");
+ setShadow(&I, Res);
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void dumpInst(Instruction &I) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
+ } else {
+ errs() << "ZZZ " << I.getOpcodeName() << "\n";
+ }
+ errs() << "QQQ " << I << "\n";
+ }
+
+ void visitResumeInst(ResumeInst &I) {
+ DEBUG(dbgs() << "Resume: " << I << "\n");
+ // Nothing to do here.
+ }
+
+ void visitInstruction(Instruction &I) {
+ // Everything else: stop propagating and check for poisoned shadow.
+ if (ClDumpStrictInstructions)
+ dumpInst(I);
+ DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+ for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
+ insertCheck(I.getOperand(i), &I);
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+};
+
+/// \brief AMD64-specific implementation of VarArgHelper.
+struct VarArgAMD64Helper : public VarArgHelper {
+ // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+ // See a comment in visitCallSite for more details.
+ static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+ static const unsigned AMD64FpEndOffset = 176;
+
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy;
+ Value *VAArgOverflowSize;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
+
+ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+ ArgKind classifyArgument(Value* arg) {
+ // A very rough approximation of X86_64 argument classification rules.
+ Type *T = arg->getType();
+ if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
+ return AK_FloatingPoint;
+ if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+ return AK_GeneralPurpose;
+ if (T->isPointerTy())
+ return AK_GeneralPurpose;
+ return AK_Memory;
+ }
+
+ // For VarArg functions, store the argument shadow in an ABI-specific format
+ // that corresponds to va_list layout.
+ // We do this because Clang lowers va_arg in the frontend, and this pass
+ // only sees the low level code that deals with va_list internals.
+ // A much easier alternative (provided that Clang emits va_arg instructions)
+ // would have been to associate each live instance of va_list with a copy of
+ // MSanParamTLS, and extract shadow on va_arg() call in the argument list
+ // order.
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
+ unsigned GpOffset = 0;
+ unsigned FpOffset = AMD64GpEndOffset;
+ unsigned OverflowOffset = AMD64FpEndOffset;
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+ AK = AK_Memory;
+ Value *Base;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
+ GpOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
+ FpOffset += 16;
+ break;
+ case AK_Memory:
+ uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
+ Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
+ OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+ }
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+ Constant *OverflowSize =
+ ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
+ IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// \brief Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */24, /* alignment */8, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */24, /* alignment */8, false);
+ }
+
+ void finalizeInstrumentation() {
+ assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ Value *CopySize =
+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
+ VAArgOverflowSize);
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+ }
+
+ // Instrument va_start.
+ // Copy va_list shadow from the backup copy of the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+ Value *VAListTag = OrigInst->getArgOperand(0);
+
+ Value *RegSaveAreaPtrPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, 16)),
+ Type::getInt64PtrTy(*MS.C));
+ Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ Value *RegSaveAreaShadowPtr =
+ MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
+ AMD64FpEndOffset, 16);
+
+ Value *OverflowArgAreaPtrPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, 8)),
+ Type::getInt64PtrTy(*MS.C));
+ Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+ Value *OverflowArgAreaShadowPtr =
+ MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
+ Value *SrcPtr =
+ getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+ IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
+ }
+ }
+};
+
+VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+ MemorySanitizerVisitor &Visitor) {
+ return new VarArgAMD64Helper(Func, Msan, Visitor);
+}
+
+} // namespace
+
+bool MemorySanitizer::runOnFunction(Function &F) {
+ MemorySanitizerVisitor Visitor(F, *this);
+
+ // Clear out readonly/readnone attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+ F.removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(F.getContext(),
+ AttributeSet::FunctionIndex, B));
+
+ return Visitor.runOnFunction();
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 000000000000..b45aef65bc76
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,225 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "llvm/Transforms/Instrumentation.h"
+#include "MaximumSpanningTree.h"
+#include "ProfilingUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class OptimalEdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ OptimalEdgeProfiler() : ModulePass(ID) {
+ initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(ProfileEstimatorPassID);
+ AU.addRequired<ProfileInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Optimal Edge Profiler";
+ }
+ };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+ return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+ BasicBlock* b,
+ unsigned i) {
+ DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
+ << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ // NumEdges counts all the edges that may be instrumented. Later on its
+ // decided which edges to actually instrument, to achieve optimal profiling.
+ // For the entry block a virtual edge (0,entry) is reserved, for each block
+ // with no successors an edge (BB,0) is reserved. These edges are necessary
+ // to calculate a truly optimal maximum spanning tree and thus an optimal
+ // instrumentation.
+ unsigned NumEdges = 0;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ if (BB->getTerminator()->getNumSuccessors() == 0) {
+ // Reserve space for (BB,0) edge.
+ ++NumEdges;
+ } else {
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+ }
+
+ // In the profiling output a counter for each edge is reserved, but only few
+ // are used. This is done to be able to read back in the profile without
+ // calulating the maximum spanning tree again, instead each edge counter that
+ // is not used is initialised with -1 to signal that this edge counter has to
+ // be calculated from other edge counters on reading the profile info back
+ // in.
+
+ Type *Int32 = Type::getInt32Ty(M.getContext());
+ ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "OptEdgeProfCounters");
+ NumEdgesInserted = 0;
+
+ std::vector<Constant*> Initializer(NumEdges);
+ Constant *Zero = ConstantInt::get(Int32, 0);
+ Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+ // Instrument all of the edges not in MST...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+
+ // Calculate a Maximum Spanning Tree with the edge weights determined by
+ // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+ // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+ // edges also participate in the maximum spanning tree calculation.
+ // The third parameter of MaximumSpanningTree() has the effect that not the
+ // actual MST is returned but the edges _not_ in the MST.
+
+ ProfileInfo::EdgeWeights ECs =
+ getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
+ std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+ MaximumSpanningTree<BasicBlock> MST(EdgeVector);
+ std::stable_sort(MST.begin(), MST.end());
+
+ // Check if (0,entry) not in the MST. If not, instrument edge
+ // (IncrementCounterInBlock()) and set the counter initially to zero, if
+ // the edge is in the MST the counter is initialised to -1.
+
+ BasicBlock *entry = &(F->getEntryBlock());
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge, entry, i);
+ IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+
+ // InsertedBlocks contains all blocks that were inserted for splitting an
+ // edge, this blocks do not have to be instrumented.
+ DenseSet<BasicBlock*> InsertedBlocks;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Check if block was not inserted and thus does not have to be
+ // instrumented.
+ if (InsertedBlocks.count(BB)) continue;
+
+ // Okay, we have to add a counter of each outgoing edge not in MST. If
+ // the outgoing edge is not critical don't split it, just insert the
+ // counter in the source or destination of the edge. Also, if the block
+ // has no successors, the virtual edge (BB,0) is processed.
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge, BB, i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ BasicBlock *Succ = TI->getSuccessor(s);
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+ // If the edge is critical, split it.
+ bool wasInserted = SplitCriticalEdge(TI, s, this);
+ Succ = TI->getSuccessor(s);
+ if (wasInserted)
+ InsertedBlocks.insert(Succ);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If
+ // we only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge, BB, i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ } else {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge, Succ, i);
+ IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
+ }
+ Initializer[i++] = (Zero);
+ } else {
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ }
+ }
+
+ // Check if the number of edges counted at first was the number of edges we
+ // considered for instrumentation.
+ assert(i == NumEdges && "the number of edges in counting array is wrong");
+
+ // Assign the now completely defined initialiser to the array.
+ Constant *init = ConstantArray::get(ATy, Initializer);
+ Counters->setInitializer(init);
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 000000000000..7de73269cf2b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1424 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit. The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96]. The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+// removed in the following manner. For every backedge
+// v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number - The number corresponding to a specific path through a
+// Ball-Larus DAG.
+// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
+// vertices and is a tree.
+// Chord - An edge not in the spanning tree.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+// Thomas Ball. "Efficiently Counting Program Events with Support for
+// On-line queries."
+// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+// September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+ // Creates a new BLInstrumentationNode from a BasicBlock.
+ BLInstrumentationNode(BasicBlock* BB);
+
+ // Get/sets the Value corresponding to the pathNumber register,
+ // constant or phinode. Used by the instrumentation code to remember
+ // path number Values.
+ Value* getStartingPathNumber();
+ void setStartingPathNumber(Value* pathNumber);
+
+ Value* getEndingPathNumber();
+ void setEndingPathNumber(Value* pathNumber);
+
+ // Get/set the PHINode Instruction for this node.
+ PHINode* getPathPHI();
+ void setPathPHI(PHINode* pathPHI);
+
+private:
+
+ Value* _startingPathNumber; // The Value for the current pathNumber.
+ Value* _endingPathNumber; // The Value for the current pathNumber.
+ PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+ BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target);
+
+ // Sets the target node of this edge. Required to split edges.
+ void setTarget(BallLarusNode* node);
+
+ // Get/set whether edge is in the spanning tree.
+ bool isInSpanningTree() const;
+ void setIsInSpanningTree(bool isInSpanningTree);
+
+ // Get/ set whether this edge will be instrumented with a path number
+ // initialization.
+ bool isInitialization() const;
+ void setIsInitialization(bool isInitialization);
+
+ // Get/set whether this edge will be instrumented with a path counter
+ // increment. Notice this is incrementing the path counter
+ // corresponding to the path number register. The path number
+ // increment is determined by getIncrement().
+ bool isCounterIncrement() const;
+ void setIsCounterIncrement(bool isCounterIncrement);
+
+ // Get/set the path number increment that this edge will be instrumented
+ // with. This is distinct from the path counter increment and the
+ // weight. The counter increment counts the number of executions of
+ // some path, whereas the path number keeps track of which path number
+ // the program is on.
+ long getIncrement() const;
+ void setIncrement(long increment);
+
+ // Get/set whether the edge has been instrumented.
+ bool hasInstrumentation();
+ void setHasInstrumentation(bool hasInstrumentation);
+
+ // Returns the successor number of this edge in the source.
+ unsigned getSuccessorNumber();
+
+private:
+ // The increment that the code will be instrumented with.
+ long long _increment;
+
+ // Whether this edge is in the spanning tree.
+ bool _isInSpanningTree;
+
+ // Whether this edge is an initialiation of the path number.
+ bool _isInitialization;
+
+ // Whether this edge is a path counter increment.
+ bool _isCounterIncrement;
+
+ // Whether this edge has been instrumented.
+ bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+ BLInstrumentationDag(Function &F);
+
+ // Returns the Exit->Root edge. This edge is required for creating
+ // directed cycles in the algorithm for moving instrumentation off of
+ // the spanning tree
+ BallLarusEdge* getExitRootEdge();
+
+ // Returns an array of phony edges which mark those nodes
+ // with function calls
+ BLEdgeVector getCallPhonyEdges();
+
+ // Gets/sets the path counter array
+ GlobalVariable* getCounterArray();
+ void setCounterArray(GlobalVariable* c);
+
+ // Calculates the increments for the chords, thereby removing
+ // instrumentation from the spanning tree edges. Implementation is based
+ // on the algorithm in Figure 4 of [Ball94]
+ void calculateChordIncrements();
+
+ // Updates the state when an edge has been split
+ void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+ // Calculates a spanning tree of the DAG ignoring cycles. Whichever
+ // edges are in the spanning tree will not be instrumented, but this
+ // implementation does not try to minimize the instrumentation overhead
+ // by trying to find hot edges.
+ void calculateSpanningTree();
+
+ // Pushes initialization further down in order to group the first
+ // increment and initialization.
+ void pushInitialization();
+
+ // Pushes the path counter increments up in order to group the last path
+ // number increment.
+ void pushCounters();
+
+ // Removes phony edges from the successor list of the source, and the
+ // predecessor list of the target.
+ void unlinkPhony();
+
+ // Generate dot graph for the function
+ void generateDotGraph();
+
+protected:
+ // BLInstrumentationDag creates BLInstrumentationNode objects in this
+ // method overriding the creation of BallLarusNode objects.
+ //
+ // Allows subclasses to determine which type of Node is created.
+ // Override this method to produce subclasses of BallLarusNode if
+ // necessary.
+ virtual BallLarusNode* createNode(BasicBlock* BB);
+
+ // BLInstrumentationDag create BLInstrumentationEdges.
+ //
+ // Allows subclasses to determine which type of Edge is created.
+ // Override this method to produce subclasses of BallLarusEdge if
+ // necessary. Parameters source and target will have been created by
+ // createNode and can be cast to the subclass of BallLarusNode*
+ // returned by createNode.
+ virtual BallLarusEdge* createEdge(
+ BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+ BLEdgeVector _treeEdges; // All edges in the spanning tree.
+ BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+ GlobalVariable* _counterArray; // Array to store path counters
+
+ // Removes the edge from the appropriate predecessor and successor lists.
+ void unlinkEdge(BallLarusEdge* edge);
+
+ // Makes an edge part of the spanning tree.
+ void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+ // Pushes initialization and calls itself recursively.
+ void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+ // Pushes path counter increments up recursively.
+ void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+ // Depth first algorithm for determining the chord increments.f
+ void calculateChordIncrementsDfs(
+ long weight, BallLarusNode* v, BallLarusEdge* e);
+
+ // Determines the relative direction of two edges.
+ int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which instruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+ // Current context for multi threading support.
+ LLVMContext* Context;
+
+ // Which function are we currently instrumenting
+ unsigned currentFunctionNumber;
+
+ // The function prototype in the profiling runtime for incrementing a
+ // single path counter in a hash table.
+ Constant* llvmIncrementHashFunction;
+ Constant* llvmDecrementHashFunction;
+
+ // Instruments each function with path profiling. 'main' is instrumented
+ // with code to save the profile to disk.
+ bool runOnModule(Module &M);
+
+ // Analyzes the function for Ball-Larus path profiling, and inserts code.
+ void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+ // Creates an increment constant representing incr.
+ ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+ // Creates an increment constant representing the value in
+ // edge->getIncrement().
+ ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+ // Finds the insertion point after pathNumber in block. PathNumber may
+ // be NULL.
+ BasicBlock::iterator getInsertionPoint(
+ BasicBlock* block, Value* pathNumber);
+
+ // Inserts source's pathNumber Value* into target. Target may or may not
+ // have multiple predecessors, and may or may not have its phiNode
+ // initalized.
+ void pushValueIntoNode(
+ BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+ // Inserts source's pathNumber Value* into the appropriate slot of
+ // target's phiNode.
+ void pushValueIntoPHI(
+ BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+ // The Value* in node, oldVal, is updated with a Value* correspodning to
+ // oldVal + addition.
+ void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+ bool atBeginning);
+
+ // Creates a counter increment in the given node. The Value* in node is
+ // taken as the index into a hash table.
+ void insertCounterIncrement(
+ Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment = true);
+
+ // A PHINode is created in the node, and its values initialized to -1U.
+ void preparePHI(BLInstrumentationNode* node);
+
+ // Inserts instrumentation for the given edge
+ //
+ // Pre: The edge's source node has pathNumber set if edge is non zero
+ // path number increment.
+ //
+ // Post: Edge's target node has a pathNumber set to the path number Value
+ // corresponding to the value of the path register after edge's
+ // execution.
+ void insertInstrumentationStartingAt(
+ BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag);
+
+ // If this edge is a critical edge, then inserts a node at this edge.
+ // This edge becomes the first edge, and a new BallLarusEdge is created.
+ bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+ // Inserts instrumentation according to the marked edges in dag. Phony
+ // edges must be unlinked from the DAG, but accessible from the
+ // backedges. Dag must have initializations, path number increments, and
+ // counter increments present.
+ //
+ // Counter storage is created here.
+ void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfiler() : ModulePass(ID) {
+ initializePathProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Path Profiler";
+ }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+ cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+ "Insert instrumentation for Ball-Larus path profiling",
+ false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+ class PathProfilingFunctionTable {};
+
+ // Type for global array storing references to hashes or arrays
+ template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+ xcompile> {
+ public:
+ static StructType *get(LLVMContext& C) {
+ return( StructType::get(
+ TypeBuilder<types::i<32>, xcompile>::get(C), // type
+ TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+ TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+ NULL));
+ }
+ };
+
+ typedef TypeBuilder<PathProfilingFunctionTable, true>
+ ftEntryTypeBuilder;
+
+ // BallLarusEdge << operator overloading
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge)
+ LLVM_ATTRIBUTE_USED;
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge) {
+ os << "[" << edge.getSource()->getName() << " -> "
+ << edge.getTarget()->getName() << "] init: "
+ << (edge.isInitialization() ? "yes" : "no")
+ << " incr:" << edge.getIncrement() << " cinc: "
+ << (edge.isCounterIncrement() ? "yes" : "no");
+ return(os);
+ }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+ BallLarusNode(BB),
+ _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target)
+ : BallLarusEdge(source, target, 0),
+ _increment(0), _isInSpanningTree(false), _isInitialization(false),
+ _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge. Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+ _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+ return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+ _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+ return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+ _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment. Notice this is incrementing the path counter
+// corresponding to the path number register. The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+ return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+ _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with. This is distinct from the path counter increment and the
+// weight. The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+ return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+ _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+ return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+ _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+ BallLarusNode* sourceNode = getSource();
+ BallLarusNode* targetNode = getTarget();
+ BasicBlock* source = sourceNode->getBlock();
+ BasicBlock* target = targetNode->getBlock();
+
+ if(source == NULL || target == NULL)
+ return(0);
+
+ TerminatorInst* terminator = source->getTerminator();
+
+ unsigned i;
+ for(i=0; i < terminator->getNumSuccessors(); i++) {
+ if(terminator->getSuccessor(i) == target)
+ break;
+ }
+
+ return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+ _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+ BLEdgeIterator erEdge = getExit()->succBegin();
+ return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+ BLEdgeVector callEdges;
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++ ) {
+ if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+ callEdges.push_back(*edge);
+ }
+
+ return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+ return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+ _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+ calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+ BLInstrumentationEdge* chord;
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ chord = (BLInstrumentationEdge*) *chordEdge;
+ chord->setIncrement(chord->getIncrement() + chord->getWeight());
+ }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+ BasicBlock* newBlock) {
+ BallLarusNode* oldTarget = formerEdge->getTarget();
+ BallLarusNode* newNode = addNode(newBlock);
+ formerEdge->setTarget(newNode);
+ newNode->addPredEdge(formerEdge);
+
+ DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
+
+ oldTarget->removePredEdge(formerEdge);
+ BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+ if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+ formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+ newEdge->setType(formerEdge->getType());
+ newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+ newEdge->setPhonyExit(formerEdge->getPhonyExit());
+ formerEdge->setType(BallLarusEdge::NORMAL);
+ formerEdge->setPhonyRoot(NULL);
+ formerEdge->setPhonyExit(NULL);
+ }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles. Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+ std::stack<BallLarusNode*> dfsStack;
+
+ for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+ nodeIt != end; nodeIt++) {
+ (*nodeIt)->setColor(BallLarusNode::WHITE);
+ }
+
+ dfsStack.push(getRoot());
+ while(dfsStack.size() > 0) {
+ BallLarusNode* node = dfsStack.top();
+ dfsStack.pop();
+
+ if(node->getColor() == BallLarusNode::WHITE)
+ continue;
+
+ BallLarusNode* nextNode;
+ bool forward = true;
+ BLEdgeIterator succEnd = node->succEnd();
+
+ node->setColor(BallLarusNode::WHITE);
+ // first iterate over successors then predecessors
+ for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+ edge != predEnd; edge++) {
+ if(edge == succEnd) {
+ edge = node->predBegin();
+ forward = false;
+ }
+
+ // Ignore split edges
+ if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+ if(nextNode->getColor() != BallLarusNode::WHITE) {
+ nextNode->setColor(BallLarusNode::WHITE);
+ makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+ }
+ }
+ }
+
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+ // safe since createEdge is overriden
+ if(!instEdge->isInSpanningTree() && (*edge)->getType()
+ != BallLarusEdge::SPLITEDGE)
+ _chordEdges.push_back(instEdge);
+ }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsInitialization(true);
+ pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+ BallLarusEdge* edge;
+
+ for(BLEdgeIterator next = _edges.begin(),
+ end = _edges.end(); next != end; next++) {
+ edge = (*next);
+
+ if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+ unlinkEdge(edge);
+ }
+ }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+ std::string errorInfo;
+ std::string functionName = getFunction().getName().str();
+ std::string filename = "pathdag." + functionName + ".dot";
+
+ DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+ raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+ if (!errorInfo.empty()) {
+ errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+ errs() << "\n";
+ return;
+ }
+
+ dotFile << "digraph " << functionName << " {\n";
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ std::string sourceName = (*edge)->getSource()->getName();
+ std::string targetName = (*edge)->getTarget()->getName();
+
+ dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+ << targetName.c_str() << "\" ";
+
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+ switch( (*edge)->getType() ) {
+ case BallLarusEdge::NORMAL:
+ dotFile << "[label=" << inc << "] [color=black];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE:
+ dotFile << "[color=cyan];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE_PHONY:
+ dotFile << "[label=" << inc
+ << "] [color=blue];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE:
+ dotFile << "[color=violet];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=red];\n";
+ break;
+
+ case BallLarusEdge::CALLEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=green];\n";
+ break;
+ }
+ }
+
+ dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+ return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target, unsigned edgeNumber) {
+ // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+ // is overriden to produce BLInstrumentationNode.
+ return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+ (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode. Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+ return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber. Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
+ pathNumber->getName() :
+ "unused") << "\n");
+ _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+ return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " EPN-" << getName() << " <-- "
+ << (pathNumber ? pathNumber->getName() : "unused") << "\n");
+ _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node. Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+ return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node. Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+ _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+ if(edge == getExitRootEdge())
+ DEBUG(dbgs() << " Removing exit->root edge\n");
+
+ edge->getSource()->removeSuccEdge(edge);
+ edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+ edge->setIsInSpanningTree(true);
+ _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+ BLInstrumentationEdge* edge) {
+ BallLarusNode* target;
+
+ target = edge->getTarget();
+ if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+ return;
+ } else {
+ for(BLEdgeIterator next = target->succBegin(),
+ end = target->succEnd(); next != end; next++) {
+ BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+ // Skip split edges
+ if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ intoEdge->setIncrement(intoEdge->getIncrement() +
+ edge->getIncrement());
+ intoEdge->setIsInitialization(true);
+ pushInitializationFromEdge(intoEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsInitialization(false);
+ }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+ BallLarusNode* source;
+
+ source = edge->getSource();
+ if(source->getNumberSuccEdges() > 1 || source == getRoot()
+ || edge->isInitialization()) {
+ return;
+ } else {
+ for(BLEdgeIterator previous = source->predBegin(),
+ end = source->predEnd(); previous != end; previous++) {
+ BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+ // Skip split edges
+ if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ fromEdge->setIncrement(fromEdge->getIncrement() +
+ edge->getIncrement());
+ fromEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(fromEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsCounterIncrement(false);
+ }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+ BallLarusNode* v, BallLarusEdge* e) {
+ BLInstrumentationEdge* f;
+
+ for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+ end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+ f = (BLInstrumentationEdge*) *treeEdge;
+ if(e != f && v == f->getTarget()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getSource(), f);
+ }
+ if(e != f && v == f->getSource()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getTarget(), f);
+ }
+ }
+
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ f = (BLInstrumentationEdge*) *chordEdge;
+ if(v == f->getSource() || v == f->getTarget()) {
+ f->setIncrement(f->getIncrement() +
+ calculateChordIncrementsDir(e,f)*weight);
+ }
+ }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+ BallLarusEdge* f) {
+ if( e == NULL)
+ return(1);
+ else if(e->getSource() == f->getTarget()
+ || e->getTarget() == f->getSource())
+ return(1);
+
+ return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+ int bitsize) {
+ return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+ BLInstrumentationEdge* edge) {
+ return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block. PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+ pathNumber) {
+ if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+ || (((Instruction*)(pathNumber))->getParent()) != block) {
+ return(block->getFirstInsertionPt());
+ } else {
+ Instruction* pathNumberInst = (Instruction*) (pathNumber);
+ BasicBlock::iterator insertPoint;
+ BasicBlock::iterator end = block->end();
+
+ for(insertPoint = block->begin();
+ insertPoint != end; insertPoint++) {
+ Instruction* insertInst = &(*insertPoint);
+
+ if(insertInst == pathNumberInst)
+ return(++insertPoint);
+ }
+
+ return(insertPoint);
+ }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+ BasicBlock* block = node->getBlock();
+ BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
+ pred_iterator PB = pred_begin(node->getBlock()),
+ PE = pred_end(node->getBlock());
+ PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
+ std::distance(PB, PE), "pathNumber",
+ insertPoint );
+ node->setPathPHI(phi);
+ node->setStartingPathNumber(phi);
+ node->setEndingPathNumber(phi);
+
+ for(pred_iterator predIt = PB; predIt != PE; predIt++) {
+ BasicBlock* pred = (*predIt);
+
+ if(pred != NULL)
+ phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+ }
+}
+
+// Inserts source's pathNumber Value* into target. Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+ BLInstrumentationNode* target) {
+ if(target->getBlock() == NULL)
+ return;
+
+
+ if(target->getNumberPredEdges() <= 1) {
+ assert(target->getStartingPathNumber() == NULL &&
+ "Target already has path number");
+ target->setStartingPathNumber(source->getEndingPathNumber());
+ target->setEndingPathNumber(source->getEndingPathNumber());
+ DEBUG(dbgs() << " Passing path number"
+ << (source->getEndingPathNumber() ? "" : " (null)")
+ << " value through.\n");
+ } else {
+ if(target->getPathPHI() == NULL) {
+ DEBUG(dbgs() << " Initializing PHI node for block '"
+ << target->getName() << "'\n");
+ preparePHI(target);
+ }
+ pushValueIntoPHI(target, source);
+ DEBUG(dbgs() << " Passing number value into PHI for block '"
+ << target->getName() << "'\n");
+ }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+ BLInstrumentationNode* source) {
+ PHINode* phi = target->getPathPHI();
+ assert(phi != NULL && " Tried to push value into node with PHI, but node"
+ " actually had no PHI.");
+ phi->removeIncomingValue(source->getBlock(), false);
+ phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal, is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+ Value* addition, bool atBeginning) {
+ BasicBlock* block = node->getBlock();
+ assert(node->getStartingPathNumber() != NULL);
+ assert(node->getEndingPathNumber() != NULL);
+
+ BasicBlock::iterator insertPoint;
+
+ if( atBeginning )
+ insertPoint = block->getFirstInsertionPt();
+ else
+ insertPoint = block->getTerminator();
+
+ DEBUG(errs() << " Creating addition instruction.\n");
+ Value* newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ addition, "pathNumber", insertPoint);
+
+ node->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node. The Value* in node is
+// taken as the index into an array or hash table. The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment) {
+ // Counter increment for array
+ if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ // Get pointer to the array location
+ std::vector<Value*> gepIndices(2);
+ gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ gepIndices[1] = incValue;
+
+ GetElementPtrInst* pcPointer =
+ GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
+ "counterInc", insertPoint);
+
+ // Load from the array - call it oldPC
+ LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+ // Test to see whether adding 1 will overflow the counter
+ ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+ createIncrementConstant(0xffffffff, 32),
+ "isMax");
+
+ // Select increment for the path counter based on overflow
+ SelectInst* inc =
+ SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+ createIncrementConstant(0,32),
+ "pathInc", insertPoint);
+
+ // newPc = oldPc + inc
+ BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+ oldPc, inc, "newPC",
+ insertPoint);
+
+ // Store back in to the array
+ new StoreInst(newPc, pcPointer, insertPoint);
+ } else { // Counter increment for hash
+ std::vector<Value*> args(2);
+ args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+ currentFunctionNumber);
+ args[1] = incValue;
+
+ CallInst::Create(
+ increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+ args, "", insertPoint);
+ }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ // Mark the edge as instrumented
+ edge->setHasInstrumentation(true);
+ DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+ // create a new node for this edge's instrumentation
+ splitCritical(edge, dag);
+
+ BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+ BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+ BLInstrumentationNode* instrumentNode;
+ BLInstrumentationNode* nextSourceNode;
+
+ bool atBeginning = false;
+
+ // Source node has only 1 successor so any information can be simply
+ // inserted in to it without splitting
+ if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << sourceNode->getName() << " (at end)\n");
+ instrumentNode = sourceNode;
+ nextSourceNode = targetNode; // ... since we never made any new nodes
+ }
+
+ // The target node only has one predecessor, so we can safely insert edge
+ // instrumentation into it. If there was splitting, it must have been
+ // successful.
+ else if( targetNode->getNumberPredEdges() == 1 ) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << targetNode->getName() << " (at beginning)\n");
+ pushValueIntoNode(sourceNode, targetNode);
+ instrumentNode = targetNode;
+ nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+ atBeginning = true;
+ }
+
+ // Somehow, splitting must have failed.
+ else {
+ errs() << "Instrumenting could not split a critical edge.\n";
+ DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
+ return;
+ }
+
+ // Insert instrumentation if this is a back or split edge
+ if( edge->getType() == BallLarusEdge::BACKEDGE ||
+ edge->getType() == BallLarusEdge::SPLITEDGE ) {
+ BLInstrumentationEdge* top =
+ (BLInstrumentationEdge*) edge->getPhonyRoot();
+ BLInstrumentationEdge* bottom =
+ (BLInstrumentationEdge*) edge->getPhonyExit();
+
+ assert( top->isInitialization() && " Top phony edge did not"
+ " contain a path number initialization.");
+ assert( bottom->isCounterIncrement() && " Bottom phony edge"
+ " did not contain a path counter increment.");
+
+ // split edge has yet to be initialized
+ if( !instrumentNode->getEndingPathNumber() ) {
+ instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+ instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+ }
+
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstInsertionPt() :
+ instrumentNode->getBlock()->getTerminator();
+
+ // add information from the bottom edge, if it exists
+ if( bottom->getIncrement() ) {
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(bottom),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+ }
+
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+ instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+ // Check for path counter increments
+ if( top->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ instrumentNode->getBlock()->getTerminator(),dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Insert instrumentation if this is a normal edge
+ else {
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstInsertionPt() :
+ instrumentNode->getBlock()->getTerminator();
+
+ if( edge->isInitialization() ) { // initialize path number
+ instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+ } else if( edge->getIncrement() ) {// increment path number
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(edge),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(newpn);
+ }
+
+ // Check for path counter increments
+ if( edge->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Push it along
+ if (nextSourceNode && instrumentNode->getEndingPathNumber())
+ pushValueIntoNode(instrumentNode, nextSourceNode);
+
+ // Add all the successors
+ for( BLEdgeIterator next = targetNode->succBegin(),
+ end = targetNode->succEnd(); next != end; next++ ) {
+ // So long as it is un-instrumented, add it to the list
+ if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+ insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+ else
+ DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
+ << " already instrumented.\n");
+ }
+}
+
+// Inserts instrumentation according to the marked edges in dag. Phony edges
+// must be unlinked from the DAG, but accessible from the backedges. Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+ BLInstrumentationDag& dag, Module &M) {
+
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) dag.getExitRootEdge();
+ insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+ // Iterate through each call edge and apply the appropriate hash increment
+ // and decrement functions
+ BLEdgeVector callEdges = dag.getCallPhonyEdges();
+ for( BLEdgeIterator edge = callEdges.begin(),
+ end = callEdges.end(); edge != end; edge++ ) {
+ BLInstrumentationNode* node =
+ (BLInstrumentationNode*)(*edge)->getSource();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
+
+ // Find the first function call
+ while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+ insertPoint++;
+
+ DEBUG(dbgs() << "\nInstrumenting method call block '"
+ << node->getBlock()->getName() << "'\n");
+ DEBUG(dbgs() << " Path number initialized: "
+ << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+ Value* newpn;
+ if( node->getStartingPathNumber() ) {
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+ if ( inc )
+ newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ createIncrementConstant(inc,32),
+ "pathNumber", insertPoint);
+ else
+ newpn = node->getStartingPathNumber();
+ } else {
+ newpn = (Value*)createIncrementConstant(
+ ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+ }
+
+ insertCounterIncrement(newpn, insertPoint, &dag);
+ insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+ &dag, false);
+ }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+ Function &F, Module &M) {
+ // Build DAG from CFG
+ BLInstrumentationDag dag = BLInstrumentationDag(F);
+ dag.init();
+
+ // give each path a unique integer value
+ dag.calculatePathNumbers();
+
+ // modify path increments to increase the efficiency
+ // of instrumentation
+ dag.calculateSpanningTree();
+ dag.calculateChordIncrements();
+ dag.pushInitialization();
+ dag.pushCounters();
+ dag.unlinkPhony();
+
+ // potentially generate .dot graph for the dag
+ if (DotPathDag)
+ dag.generateDotGraph ();
+
+ // Should we store the information in an array or hash
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+ dag.getNumberOfPaths());
+
+ dag.setCounterArray(new GlobalVariable(M, t, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(t), ""));
+ }
+
+ insertInstrumentation(dag, M);
+
+ // Add to global function reference table
+ unsigned type;
+ Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+ type = ProfilingArray;
+ else
+ type = ProfilingHash;
+
+ std::vector<Constant*> entryArray(3);
+ entryArray[0] = createIncrementConstant(type,32);
+ entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+ entryArray[2] = dag.getCounterArray() ?
+ ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+ Constant::getNullValue(voidPtr);
+
+ StructType* at = ftEntryTypeBuilder::get(*Context);
+ ConstantStruct* functionEntry =
+ (ConstantStruct*)ConstantStruct::get(at, entryArray);
+ ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() << \
+ "\n\n============= MODULE BEGIN ===============\n" << M << \
+ "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+ Context = &M.getContext();
+
+ DEBUG(dbgs()
+ << "****************************************\n"
+ << "****************************************\n"
+ << "** **\n"
+ << "** PATH PROFILING INSTRUMENTATION **\n"
+ << "** **\n"
+ << "****************************************\n"
+ << "****************************************\n");
+
+ // No main, no instrumentation!
+ Function *Main = M.getFunction("main");
+
+ // Using fortran? ... this kind of works
+ if (!Main)
+ Main = M.getFunction("MAIN__");
+
+ if (!Main) {
+ errs() << "WARNING: cannot insert path profiling into a module"
+ << " with no main function!\n";
+ return false;
+ }
+
+ llvmIncrementHashFunction = M.getOrInsertFunction(
+ "llvm_increment_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ llvmDecrementHashFunction = M.getOrInsertFunction(
+ "llvm_decrement_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ std::vector<Constant*> ftInit;
+ unsigned functionNumber = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
+ functionNumber++;
+
+ // set function number
+ currentFunctionNumber = functionNumber;
+ runOnFunction(ftInit, *F, M);
+ }
+
+ Type *t = ftEntryTypeBuilder::get(*Context);
+ ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+ Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+ DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+ GlobalVariable* functionTable =
+ new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+ ftInitConstant, "functionPathTable");
+ Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+ InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+ PointerType::getUnqual(eltType));
+
+ DEBUG(PRINT_MODULE);
+
+ return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ unsigned succNum = edge->getSuccessorNumber();
+ BallLarusNode* sourceNode = edge->getSource();
+ BallLarusNode* targetNode = edge->getTarget();
+ BasicBlock* sourceBlock = sourceNode->getBlock();
+ BasicBlock* targetBlock = targetNode->getBlock();
+
+ if(sourceBlock == NULL || targetBlock == NULL
+ || sourceNode->getNumberSuccEdges() <= 1
+ || targetNode->getNumberPredEdges() == 1 ) {
+ return(false);
+ }
+
+ TerminatorInst* terminator = sourceBlock->getTerminator();
+
+ if( SplitCriticalEdge(terminator, succNum, this, false)) {
+ BasicBlock* newBlock = terminator->getSuccessor(succNum);
+ dag->splitUpdate(edge, newBlock);
+ return(true);
+ } else
+ return(false);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 000000000000..4b3de6d7fc38
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,169 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Array,
+ PointerType *arrayType) {
+ LLVMContext &Context = MainFn->getContext();
+ Type *ArgVTy =
+ PointerType::getUnqual(Type::getInt8PtrTy(Context));
+ PointerType *UIntPtr = arrayType ? arrayType :
+ Type::getInt32PtrTy(Context);
+ Module &M = *MainFn->getParent();
+ Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+ Type::getInt32Ty(Context),
+ ArgVTy, UIntPtr,
+ Type::getInt32Ty(Context),
+ (Type *)0);
+
+ // This could force argc and argv into programs that wouldn't otherwise have
+ // them, but instead we just pass null values in.
+ std::vector<Value*> Args(4);
+ Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Args[1] = Constant::getNullValue(ArgVTy);
+
+ // Skip over any allocas in the entry block.
+ BasicBlock *Entry = MainFn->begin();
+ BasicBlock::iterator InsertPos = Entry->begin();
+ while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+ std::vector<Constant*> GEPIndices(2,
+ Constant::getNullValue(Type::getInt32Ty(Context)));
+ unsigned NumElements = 0;
+ if (Array) {
+ Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
+ NumElements =
+ cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+ } else {
+ // If this profiling instrumentation doesn't have a constant array, just
+ // pass null.
+ Args[2] = ConstantPointerNull::get(UIntPtr);
+ }
+ Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
+
+ CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
+
+ // If argc or argv are not available in main, just pass null values in.
+ Function::arg_iterator AI;
+ switch (MainFn->arg_size()) {
+ default:
+ case 2:
+ AI = MainFn->arg_begin(); ++AI;
+ if (AI->getType() != ArgVTy) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ false);
+ InitCall->setArgOperand(1,
+ CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+ } else {
+ InitCall->setArgOperand(1, AI);
+ }
+ /* FALL THROUGH */
+
+ case 1:
+ AI = MainFn->arg_begin();
+ // If the program looked at argc, have it look at the return value of the
+ // init call instead.
+ if (!AI->getType()->isIntegerTy(32)) {
+ Instruction::CastOps opcode;
+ if (!AI->use_empty()) {
+ opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+ AI->replaceAllUsesWith(
+ CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+ }
+ opcode = CastInst::getCastOpcode(AI, true,
+ Type::getInt32Ty(Context), true);
+ InitCall->setArgOperand(0,
+ CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+ "argc.cast", InitCall));
+ } else {
+ AI->replaceAllUsesWith(InitCall);
+ InitCall->setArgOperand(0, AI);
+ }
+
+ case 0: break;
+ }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray, bool beginning) {
+ // Insert the increment after any alloca or PHI instructions...
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
+ BB->getTerminator();
+ while (isa<AllocaInst>(InsertPos))
+ ++InsertPos;
+
+ LLVMContext &Context = BB->getContext();
+
+ // Create the getelementptr constant expression
+ std::vector<Constant*> Indices(2);
+ Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
+ Constant *ElementPtr =
+ ConstantExpr::getGetElementPtr(CounterArray, Indices);
+
+ // Load, increment and store the value back.
+ Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+ Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ "NewFuncCounter", InsertPos);
+ new StoreInst(NewVal, ElementPtr, InsertPos);
+}
+
+void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
+ // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
+ // types.
+ Type *GlobalDtorElems[2] = {
+ Type::getInt32Ty(Mod->getContext()),
+ FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
+ };
+ StructType *GlobalDtorElemTy =
+ StructType::get(Mod->getContext(), GlobalDtorElems, false);
+
+ // Construct the new element we'll be adding.
+ Constant *Elem[2] = {
+ ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
+ ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
+ };
+
+ // If llvm.global_dtors exists, make a copy of the things in its list and
+ // delete it, to replace it with one that has a larger array type.
+ std::vector<Constant *> dtors;
+ if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
+ if (ConstantArray *InitList =
+ dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
+ for (unsigned i = 0, e = InitList->getType()->getNumElements();
+ i != e; ++i)
+ dtors.push_back(cast<Constant>(InitList->getOperand(i)));
+ }
+ GlobalDtors->eraseFromParent();
+ }
+
+ // Build up llvm.global_dtors with our new item in it.
+ GlobalVariable *GlobalDtors = new GlobalVariable(
+ *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
+ GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
+
+ dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
+ GlobalDtors->setInitializer(ConstantArray::get(
+ cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 000000000000..09b22171ff04
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,36 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+ class BasicBlock;
+ class Function;
+ class GlobalValue;
+ class Module;
+ class PointerType;
+
+ void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Arr = 0,
+ PointerType *arrayType = 0);
+ void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray,
+ bool beginning = true);
+ void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
new file mode 100644
index 000000000000..299060a42fe8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -0,0 +1,585 @@
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+// - Insert calls to run-time library before every memory access.
+// - Optimizations may apply to avoid instrumenting some of the accesses.
+// - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tsan"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+static cl::opt<std::string> ClBlacklistFile("tsan-blacklist",
+ cl::desc("Blacklist file"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemoryAccesses(
+ "tsan-instrument-memory-accesses", cl::init(true),
+ cl::desc("Instrument memory accesses"), cl::Hidden);
+static cl::opt<bool> ClInstrumentFuncEntryExit(
+ "tsan-instrument-func-entry-exit", cl::init(true),
+ cl::desc("Instrument function entry and exit"), cl::Hidden);
+static cl::opt<bool> ClInstrumentAtomics(
+ "tsan-instrument-atomics", cl::init(true),
+ cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
+ "tsan-instrument-memintrinsics", cl::init(true),
+ cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOmittedReadsBeforeWrite,
+ "Number of reads ignored due to following writes");
+STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
+STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
+STATISTIC(NumOmittedReadsFromConstantGlobals,
+ "Number of reads from constant globals");
+STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
+
+namespace {
+
+/// ThreadSanitizer: instrument the code in module to find races.
+struct ThreadSanitizer : public FunctionPass {
+ ThreadSanitizer(StringRef BlacklistFile = StringRef())
+ : FunctionPass(ID),
+ TD(0),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+ : BlacklistFile) { }
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ static char ID; // Pass identification, replacement for typeid.
+
+ private:
+ void initializeCallbacks(Module &M);
+ bool instrumentLoadOrStore(Instruction *I);
+ bool instrumentAtomic(Instruction *I);
+ bool instrumentMemIntrinsic(Instruction *I);
+ void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
+ SmallVectorImpl<Instruction*> &All);
+ bool addrPointsToConstantData(Value *Addr);
+ int getMemoryAccessFuncIndex(Value *Addr);
+
+ DataLayout *TD;
+ Type *IntptrTy;
+ SmallString<64> BlacklistFile;
+ OwningPtr<BlackList> BL;
+ IntegerType *OrdTy;
+ // Callbacks to run-time library are computed in doInitialization.
+ Function *TsanFuncEntry;
+ Function *TsanFuncExit;
+ // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+ static const size_t kNumberOfAccessSizes = 5;
+ Function *TsanRead[kNumberOfAccessSizes];
+ Function *TsanWrite[kNumberOfAccessSizes];
+ Function *TsanAtomicLoad[kNumberOfAccessSizes];
+ Function *TsanAtomicStore[kNumberOfAccessSizes];
+ Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
+ Function *TsanAtomicCAS[kNumberOfAccessSizes];
+ Function *TsanAtomicThreadFence;
+ Function *TsanAtomicSignalFence;
+ Function *TsanVptrUpdate;
+ Function *TsanVptrLoad;
+ Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+};
+} // namespace
+
+char ThreadSanitizer::ID = 0;
+INITIALIZE_PASS(ThreadSanitizer, "tsan",
+ "ThreadSanitizer: detects data races.",
+ false, false)
+
+const char *ThreadSanitizer::getPassName() const {
+ return "ThreadSanitizer";
+}
+
+FunctionPass *llvm::createThreadSanitizerPass(StringRef BlacklistFile) {
+ return new ThreadSanitizer(BlacklistFile);
+}
+
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
+ if (Function *F = dyn_cast<Function>(FuncOrBitcast))
+ return F;
+ FuncOrBitcast->dump();
+ report_fatal_error("ThreadSanitizer interface function redefined");
+}
+
+void ThreadSanitizer::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(M.getContext());
+ // Initialize the callbacks.
+ TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_func_exit", IRB.getVoidTy(), NULL));
+ OrdTy = IRB.getInt32Ty();
+ for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
+ const size_t ByteSize = 1 << i;
+ const size_t BitSize = ByteSize * 8;
+ SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
+ TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+
+ SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
+ TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+
+ Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
+ "_load");
+ TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ AtomicLoadName, Ty, PtrTy, OrdTy, NULL));
+
+ SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
+ "_store");
+ TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
+ NULL));
+
+ for (int op = AtomicRMWInst::FIRST_BINOP;
+ op <= AtomicRMWInst::LAST_BINOP; ++op) {
+ TsanAtomicRMW[op][i] = NULL;
+ const char *NamePart = NULL;
+ if (op == AtomicRMWInst::Xchg)
+ NamePart = "_exchange";
+ else if (op == AtomicRMWInst::Add)
+ NamePart = "_fetch_add";
+ else if (op == AtomicRMWInst::Sub)
+ NamePart = "_fetch_sub";
+ else if (op == AtomicRMWInst::And)
+ NamePart = "_fetch_and";
+ else if (op == AtomicRMWInst::Or)
+ NamePart = "_fetch_or";
+ else if (op == AtomicRMWInst::Xor)
+ NamePart = "_fetch_xor";
+ else if (op == AtomicRMWInst::Nand)
+ NamePart = "_fetch_nand";
+ else
+ continue;
+ SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
+ TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
+ RMWName, Ty, PtrTy, Ty, OrdTy, NULL));
+ }
+
+ SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
+ "_compare_exchange_val");
+ TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, NULL));
+ }
+ TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), NULL));
+ TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
+ TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+
+ MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL));
+ MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IntptrTy, NULL));
+ MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+ IntptrTy, NULL));
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+ if (!TD)
+ return false;
+ BL.reset(new BlackList(BlacklistFile));
+
+ // Always insert a call to __tsan_init into the module's CTORs.
+ IRBuilder<> IRB(M.getContext());
+ IntptrTy = IRB.getIntPtrTy(TD);
+ Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+ IRB.getVoidTy(), NULL);
+ appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
+ return true;
+}
+
+static bool isVtableAccess(Instruction *I) {
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
+ if (Tag->getNumOperands() < 1) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ }
+ return false;
+}
+
+bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
+ // If this is a GEP, just analyze its pointer operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+ Addr = GEP->getPointerOperand();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ if (GV->isConstant()) {
+ // Reads from constant globals can not race with any writes.
+ NumOmittedReadsFromConstantGlobals++;
+ return true;
+ }
+ } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+ if (isVtableAccess(L)) {
+ // Reads from a vtable pointer can not race with any writes.
+ NumOmittedReadsFromVtable++;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Instrumenting some of the accesses may be proven redundant.
+// Currently handled:
+// - read-before-write (within same BB, no calls between)
+//
+// We do not handle some of the patterns that should not survive
+// after the classic compiler optimizations.
+// E.g. two reads from the same temp should be eliminated by CSE,
+// two writes should be eliminated by DSE, etc.
+//
+// 'Local' is a vector of insns within the same BB (no calls between).
+// 'All' is a vector of insns that will be instrumented.
+void ThreadSanitizer::chooseInstructionsToInstrument(
+ SmallVectorImpl<Instruction*> &Local,
+ SmallVectorImpl<Instruction*> &All) {
+ SmallSet<Value*, 8> WriteTargets;
+ // Iterate from the end.
+ for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
+ E = Local.rend(); It != E; ++It) {
+ Instruction *I = *It;
+ if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
+ WriteTargets.insert(Store->getPointerOperand());
+ } else {
+ LoadInst *Load = cast<LoadInst>(I);
+ Value *Addr = Load->getPointerOperand();
+ if (WriteTargets.count(Addr)) {
+ // We will write to this temp, so no reason to analyze the read.
+ NumOmittedReadsBeforeWrite++;
+ continue;
+ }
+ if (addrPointsToConstantData(Addr)) {
+ // Addr points to some constant data -- it can not race with any writes.
+ continue;
+ }
+ }
+ All.push_back(I);
+ }
+ Local.clear();
+}
+
+static bool isAtomic(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isAtomic() && LI->getSynchScope() == CrossThread;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isAtomic() && SI->getSynchScope() == CrossThread;
+ if (isa<AtomicRMWInst>(I))
+ return true;
+ if (isa<AtomicCmpXchgInst>(I))
+ return true;
+ if (isa<FenceInst>(I))
+ return true;
+ return false;
+}
+
+bool ThreadSanitizer::runOnFunction(Function &F) {
+ if (!TD) return false;
+ if (BL->isIn(F)) return false;
+ initializeCallbacks(*F.getParent());
+ SmallVector<Instruction*, 8> RetVec;
+ SmallVector<Instruction*, 8> AllLoadsAndStores;
+ SmallVector<Instruction*, 8> LocalLoadsAndStores;
+ SmallVector<Instruction*, 8> AtomicAccesses;
+ SmallVector<Instruction*, 8> MemIntrinCalls;
+ bool Res = false;
+ bool HasCalls = false;
+
+ // Traverse all instructions, collect loads/stores/returns, check for calls.
+ for (Function::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ BasicBlock &BB = *FI;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+ BI != BE; ++BI) {
+ if (isAtomic(BI))
+ AtomicAccesses.push_back(BI);
+ else if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
+ LocalLoadsAndStores.push_back(BI);
+ else if (isa<ReturnInst>(BI))
+ RetVec.push_back(BI);
+ else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ if (isa<MemIntrinsic>(BI))
+ MemIntrinCalls.push_back(BI);
+ HasCalls = true;
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ }
+ }
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ }
+
+ // We have collected all loads and stores.
+ // FIXME: many of these accesses do not need to be checked for races
+ // (e.g. variables that do not escape, etc).
+
+ // Instrument memory accesses.
+ if (ClInstrumentMemoryAccesses)
+ for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
+ Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
+ }
+
+ // Instrument atomic memory accesses.
+ if (ClInstrumentAtomics)
+ for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) {
+ Res |= instrumentAtomic(AtomicAccesses[i]);
+ }
+
+ if (ClInstrumentMemIntrinsics)
+ for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
+ Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+ }
+
+ // Instrument function entry/exit points if there were instrumented accesses.
+ if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ Value *ReturnAddress = IRB.CreateCall(
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+ IRB.getInt32(0));
+ IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+ for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
+ IRBuilder<> IRBRet(RetVec[i]);
+ IRBRet.CreateCall(TsanFuncExit);
+ }
+ Res = true;
+ }
+ return Res;
+}
+
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+ IRBuilder<> IRB(I);
+ bool IsWrite = isa<StoreInst>(*I);
+ Value *Addr = IsWrite
+ ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ if (IsWrite && isVtableAccess(I)) {
+ DEBUG(dbgs() << " VPTR : " << *I << "\n");
+ Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+ // StoredValue does not necessary have a pointer type.
+ if (isa<IntegerType>(StoredValue->getType()))
+ StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
+ // Call TsanVptrUpdate.
+ IRB.CreateCall2(TsanVptrUpdate,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
+ NumInstrumentedVtableWrites++;
+ return true;
+ }
+ if (!IsWrite && isVtableAccess(I)) {
+ IRB.CreateCall(TsanVptrLoad,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ NumInstrumentedVtableReads++;
+ return true;
+ }
+ Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+ IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ if (IsWrite) NumInstrumentedWrites++;
+ else NumInstrumentedReads++;
+ return true;
+}
+
+static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+ uint32_t v = 0;
+ switch (ord) {
+ case NotAtomic: assert(false);
+ case Unordered: // Fall-through.
+ case Monotonic: v = 0; break;
+ // case Consume: v = 1; break; // Not specified yet.
+ case Acquire: v = 2; break;
+ case Release: v = 3; break;
+ case AcquireRelease: v = 4; break;
+ case SequentiallyConsistent: v = 5; break;
+ }
+ return IRB->getInt32(v);
+}
+
+static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+ uint32_t v = 0;
+ switch (ord) {
+ case NotAtomic: assert(false);
+ case Unordered: // Fall-through.
+ case Monotonic: v = 0; break;
+ // case Consume: v = 1; break; // Not specified yet.
+ case Acquire: v = 2; break;
+ case Release: v = 0; break;
+ case AcquireRelease: v = 2; break;
+ case SequentiallyConsistent: v = 5; break;
+ }
+ return IRB->getInt32(v);
+}
+
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+ IRBuilder<> IRB(I);
+ if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+ IRB.CreateCall3(MemsetFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+ IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ }
+ return false;
+}
+
+// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
+// standards. For background see C++11 standard. A slightly older, publically
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
+bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
+ IRBuilder<> IRB(I);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *Addr = LI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ createOrdering(&IRB, LI->getOrdering())};
+ CallInst *C = CallInst::Create(TsanAtomicLoad[Idx],
+ ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ Value *Addr = SI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(SI->getValueOperand(), Ty, false),
+ createOrdering(&IRB, SI->getOrdering())};
+ CallInst *C = CallInst::Create(TsanAtomicStore[Idx],
+ ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+ Value *Addr = RMWI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+ if (F == NULL)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+ createOrdering(&IRB, RMWI->getOrdering())};
+ CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ Value *Addr = CASI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
+ IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
+ createOrdering(&IRB, CASI->getOrdering()),
+ createFailOrdering(&IRB, CASI->getOrdering())};
+ CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
+ Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
+ Function *F = FI->getSynchScope() == SingleThread ?
+ TsanAtomicSignalFence : TsanAtomicThreadFence;
+ CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ }
+ return true;
+}
+
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
+ Type *OrigPtrTy = Addr->getType();
+ Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+ assert(OrigTy->isSized());
+ uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+ if (TypeSize != 8 && TypeSize != 16 &&
+ TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+ NumAccessesWithBadSize++;
+ // Ignore all unusual sizes.
+ return -1;
+ }
+ size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+ assert(Idx < kNumberOfAccessSizes);
+ return Idx;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
new file mode 100644
index 000000000000..8f917aeb3725
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -0,0 +1,262 @@
+//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-dependency"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/Support/CFG.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ InstructionClass Class) {
+ switch (Class) {
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_IntrinsicUser:
+ case IC_User:
+ // These operations never directly modify a reference count.
+ return false;
+ default: break;
+ }
+
+ ImmutableCallSite CS = static_cast<const Value *>(Inst);
+ assert(CS && "Only calls can alter reference counts!");
+
+ // See if AliasAnalysis can help us with the call.
+ AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return false;
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I) {
+ const Value *Op = *I;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ }
+
+ // Assume the worst.
+ return true;
+}
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class) {
+ // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+ if (Class == IC_Call)
+ return false;
+
+ // Consider various instructions which may have pointer arguments which are
+ // not "uses".
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+ // Comparing a pointer with null, or any other constant, isn't really a use,
+ // because we don't care what the pointer points to, or about the values
+ // of any other dynamic reference-counted pointers.
+ if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
+ return false;
+ } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+ // For calls, just check the arguments (and not the callee operand).
+ for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+ OE = CS.arg_end(); OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Special-case stores, because we don't care about the stored value, just
+ // the store address.
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ // If we can't tell what the underlying object was, assume there is a
+ // dependence.
+ return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+ }
+
+ // Check each operand for a match.
+ for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+ OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+}
+
+/// Test if there can be dependencies on Inst through Arg. This function only
+/// tests dependencies relevant for removing pairs of calls.
+bool
+llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
+ const Value *Arg, ProvenanceAnalysis &PA) {
+ // If we've reached the definition of Arg, stop.
+ if (Inst == Arg)
+ return true;
+
+ switch (Flavor) {
+ case NeedsPositiveRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanUse(Inst, Arg, PA, Class);
+ }
+ }
+
+ case AutoreleasePoolBoundary: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // These mark the end and begin of an autorelease pool scope.
+ return true;
+ default:
+ // Nothing else does this.
+ return false;
+ }
+ }
+
+ case CanChangeRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ // Conservatively assume this can decrement any count.
+ return true;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanAlterRefCount(Inst, Arg, PA, Class);
+ }
+ }
+
+ case RetainAutoreleaseDep:
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // Don't merge an objc_autorelease with an objc_retain inside a different
+ // autoreleasepool scope.
+ return true;
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Nothing else matters for objc_retainAutorelease formation.
+ return false;
+ }
+
+ case RetainAutoreleaseRVDep: {
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Anything that can autorelease interrupts
+ // retainAutoreleaseReturnValue formation.
+ return CanInterruptRV(Class);
+ }
+ }
+
+ case RetainRVDep:
+ return CanInterruptRV(GetBasicInstructionClass(Inst));
+ }
+
+ llvm_unreachable("Invalid dependence flavor");
+}
+
+/// Walk up the CFG from StartPos (which is in StartBB) and find local and
+/// non-local dependencies on Arg.
+///
+/// TODO: Cache results?
+void
+llvm::objcarc::FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ BasicBlock::iterator StartPos = StartInst;
+
+ SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+ Worklist.push_back(std::make_pair(StartBB, StartPos));
+ do {
+ std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+ Worklist.pop_back_val();
+ BasicBlock *LocalStartBB = Pair.first;
+ BasicBlock::iterator LocalStartPos = Pair.second;
+ BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+ for (;;) {
+ if (LocalStartPos == StartBBBegin) {
+ pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+ if (PI == PE)
+ // If we've reached the function entry, produce a null dependence.
+ DependingInsts.insert(0);
+ else
+ // Add the predecessors to the worklist.
+ do {
+ BasicBlock *PredBB = *PI;
+ if (Visited.insert(PredBB))
+ Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+ } while (++PI != PE);
+ break;
+ }
+
+ Instruction *Inst = --LocalStartPos;
+ if (Depends(Flavor, Inst, Arg, PA)) {
+ DependingInsts.insert(Inst);
+ break;
+ }
+ }
+ } while (!Worklist.empty());
+
+ // Determine whether the original StartBB post-dominates all of the blocks we
+ // visited. If not, insert a sentinal indicating that most optimizations are
+ // not safe.
+ for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+ E = Visited.end(); I != E; ++I) {
+ const BasicBlock *BB = *I;
+ if (BB == StartBB)
+ continue;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ const BasicBlock *Succ = *SI;
+ if (Succ != StartBB && !Visited.count(Succ)) {
+ DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
+ return;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
new file mode 100644
index 000000000000..24d358b30ab1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -0,0 +1,79 @@
+//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+ class BasicBlock;
+ class Instruction;
+ class Value;
+}
+
+namespace llvm {
+namespace objcarc {
+
+class ProvenanceAnalysis;
+
+/// \enum DependenceKind
+/// \brief Defines different dependence kinds among various ARC constructs.
+///
+/// There are several kinds of dependence-like concepts in use here.
+///
+enum DependenceKind {
+ NeedsPositiveRetainCount,
+ AutoreleasePoolBoundary,
+ CanChangeRetainCount,
+ RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
+ RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
+ RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
+};
+
+void FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA);
+
+bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+ ProvenanceAnalysis &PA);
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+ InstructionClass Class);
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class);
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
new file mode 100644
index 000000000000..53a31b0de178
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -0,0 +1,48 @@
+//===-- ObjCARC.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+ class PassRegistry;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+ cl::location(EnableARCOpts),
+ cl::init(true));
+
+/// initializeObjCARCOptsPasses - Initialize all passes linked into the
+/// ObjCARCOpts library.
+void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
+ initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCAPElimPass(Registry);
+ initializeObjCARCExpandPass(Registry);
+ initializeObjCARCContractPass(Registry);
+ initializeObjCARCOptPass(Registry);
+}
+
+void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) {
+ initializeObjCARCOpts(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
new file mode 100644
index 000000000000..39670f339e9f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -0,0 +1,395 @@
+//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines common definitions/declarations used by the ObjC ARC
+/// Optimizer. ARC stands for Automatic Reference Counting and is a system for
+/// managing reference counts for objects in Objective C.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/ObjCARC.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+extern bool EnableARCOpts;
+
+/// \brief Test if the given module looks interesting to run ARC optimization
+/// on.
+static inline bool ModuleHasARC(const Module &M) {
+ return
+ M.getNamedValue("objc_retain") ||
+ M.getNamedValue("objc_release") ||
+ M.getNamedValue("objc_autorelease") ||
+ M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+ M.getNamedValue("objc_retainBlock") ||
+ M.getNamedValue("objc_autoreleaseReturnValue") ||
+ M.getNamedValue("objc_autoreleasePoolPush") ||
+ M.getNamedValue("objc_loadWeakRetained") ||
+ M.getNamedValue("objc_loadWeak") ||
+ M.getNamedValue("objc_destroyWeak") ||
+ M.getNamedValue("objc_storeWeak") ||
+ M.getNamedValue("objc_initWeak") ||
+ M.getNamedValue("objc_moveWeak") ||
+ M.getNamedValue("objc_copyWeak") ||
+ M.getNamedValue("objc_retainedObject") ||
+ M.getNamedValue("objc_unretainedObject") ||
+ M.getNamedValue("objc_unretainedPointer") ||
+ M.getNamedValue("clang.arc.use");
+}
+
+/// \enum InstructionClass
+/// \brief A simple classification for instructions.
+enum InstructionClass {
+ IC_Retain, ///< objc_retain
+ IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
+ IC_RetainBlock, ///< objc_retainBlock
+ IC_Release, ///< objc_release
+ IC_Autorelease, ///< objc_autorelease
+ IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
+ IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+ IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
+ IC_NoopCast, ///< objc_retainedObject, etc.
+ IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+ IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+ IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
+ IC_StoreWeak, ///< objc_storeWeak (primitive)
+ IC_InitWeak, ///< objc_initWeak (derived)
+ IC_LoadWeak, ///< objc_loadWeak (derived)
+ IC_MoveWeak, ///< objc_moveWeak (derived)
+ IC_CopyWeak, ///< objc_copyWeak (derived)
+ IC_DestroyWeak, ///< objc_destroyWeak (derived)
+ IC_StoreStrong, ///< objc_storeStrong (derived)
+ IC_IntrinsicUser, ///< clang.arc.use
+ IC_CallOrUser, ///< could call objc_release and/or "use" pointers
+ IC_Call, ///< could call objc_release
+ IC_User, ///< could "use" a pointer
+ IC_None ///< anything else
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
+
+/// \brief Test if the given class is a kind of user.
+inline static bool IsUser(InstructionClass Class) {
+ return Class == IC_User ||
+ Class == IC_CallOrUser ||
+ Class == IC_IntrinsicUser;
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+static inline bool IsRetain(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV;
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+static inline bool IsAutorelease(InstructionClass Class) {
+ return Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+static inline bool IsForwarding(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_NoopCast;
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+static inline bool IsNoopOnNull(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+static inline bool IsAlwaysTail(InstructionClass Class) {
+ // IC_RetainBlock may be given a stack argument.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+static inline bool IsNeverTail(InstructionClass Class) {
+ /// It is never safe to tail call objc_autorelease since by tail calling
+ /// objc_autorelease, we also tail call -[NSObject autorelease] which supports
+ /// fast autoreleasing causing our object to be potentially reclaimed from the
+ /// autorelease pool which violates the semantics of __autoreleasing types in
+ /// ARC.
+ return Class == IC_Autorelease;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+static inline bool IsNoThrow(InstructionClass Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_AutoreleasepoolPush ||
+ Class == IC_AutoreleasepoolPop;
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+static inline bool
+CanInterruptRV(InstructionClass Class) {
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_CallOrUser:
+ case IC_Call:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// \brief Determine if F is one of the special known Functions. If it isn't,
+/// return IC_CallOrUser.
+InstructionClass GetFunctionClass(const Function *F);
+
+/// \brief Determine which objc runtime call instruction class V belongs to.
+///
+/// This is similar to GetInstructionClass except that it only detects objc
+/// runtime calls. This allows it to be faster.
+///
+static inline InstructionClass GetBasicInstructionClass(const Value *V) {
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (const Function *F = CI->getCalledFunction())
+ return GetFunctionClass(F);
+ // Otherwise, be conservative.
+ return IC_CallOrUser;
+ }
+
+ // Otherwise, be conservative.
+ return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass GetInstructionClass(const Value *V);
+
+/// \brief This is a wrapper around getUnderlyingObject which also knows how to
+/// look through objc_retain and objc_autorelease calls, which we know to return
+/// their argument verbatim.
+static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+ for (;;) {
+ V = GetUnderlyingObject(V);
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+
+ return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline Value *StripPointerCastsAndObjCCalls(Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// \brief Assuming the given instruction is one of the special calls such as
+/// objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static inline Value *GetObjCArg(Value *Inst) {
+ return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+static inline bool IsNullOrUndef(const Value *V) {
+ return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static inline bool IsNoopInstruction(const Instruction *I) {
+ return isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+
+/// \brief Erase the given instruction.
+///
+/// Many ObjC calls return their argument verbatim,
+/// so if it's such a call and the return value has users, replace them with the
+/// argument value.
+///
+static inline void EraseInstruction(Instruction *CI) {
+ Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+ bool Unused = CI->use_empty();
+
+ if (!Unused) {
+ // Replace the return value with the argument.
+ assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+ "Can't delete non-forwarding instruction with users!");
+ CI->replaceAllUsesWith(OldArg);
+ }
+
+ CI->eraseFromParent();
+
+ if (Unused)
+ RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// \brief Test whether the given value is possible a retainable object pointer.
+static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
+ // Pointers to static or stack storage are not valid retainable object
+ // pointers.
+ if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+ return false;
+ // Special arguments can not be a valid retainable object pointer.
+ if (const Argument *Arg = dyn_cast<Argument>(Op))
+ if (Arg->hasByValAttr() ||
+ Arg->hasNestAttr() ||
+ Arg->hasStructRetAttr())
+ return false;
+ // Only consider values with pointer types.
+ //
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never retainable object pointers, however clang occasionally
+ // bitcasts retainable object pointers to function-pointer type temporarily.
+ PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ if (!Ty)
+ return false;
+ // Conservatively assume anything else is a potential retainable object
+ // pointer.
+ return true;
+}
+
+static inline bool IsPotentialRetainableObjPtr(const Value *Op,
+ AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialRetainableObjPtr(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
+/// \brief Helper for GetInstructionClass. Determines what kind of construct CS
+/// is.
+static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (IsPotentialRetainableObjPtr(*I))
+ return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+ return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// \brief Return true if this value refers to a distinct and identifiable
+/// object.
+///
+/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
+/// special knowledge of ObjC conventions.
+static inline bool IsObjCIdentifiedObject(const Value *V) {
+ // Assume that call results and arguments have their own "provenance".
+ // Constants (including GlobalVariables) and Allocas are never
+ // reference-counted.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+ isa<Argument>(V) || isa<Constant>(V) ||
+ isa<AllocaInst>(V))
+ return true;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ const Value *Pointer =
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
+ StringRef Name = GV->getName();
+ // These special variables are known to hold values which are not
+ // reference-counted pointers.
+ if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+ Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+ Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
new file mode 100644
index 000000000000..00d9864953dc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -0,0 +1,175 @@
+//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file implements optimizations which remove extraneous
+/// autorelease pools.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-ap-elim"
+#include "ObjCARC.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+ /// \brief Autorelease pool elimination.
+ class ObjCARCAPElim : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnModule(Module &M);
+
+ static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
+ static bool OptimizeBB(BasicBlock *BB);
+
+ public:
+ static char ID;
+ ObjCARCAPElim() : ModulePass(ID) {
+ initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+ "objc-arc-apelim",
+ "ObjC ARC autorelease pool elimination",
+ false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+ return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+/// Interprocedurally determine if calls made by the given call site can
+/// possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
+ if (const Function *Callee = CS.getCalledFunction()) {
+ if (Callee->isDeclaration() || Callee->mayBeOverridden())
+ return true;
+ for (Function::const_iterator I = Callee->begin(), E = Callee->end();
+ I != E; ++I) {
+ const BasicBlock *BB = I;
+ for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
+ J != F; ++J)
+ if (ImmutableCallSite JCS = ImmutableCallSite(J))
+ // This recursion depth limit is arbitrary. It's just great
+ // enough to cover known interesting testcases.
+ if (Depth < 3 &&
+ !JCS.onlyReadsMemory() &&
+ MayAutorelease(JCS, Depth + 1))
+ return true;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+ bool Changed = false;
+
+ Instruction *Push = 0;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPush:
+ Push = Inst;
+ break;
+ case IC_AutoreleasepoolPop:
+ // If this pop matches a push and nothing in between can autorelease,
+ // zap the pair.
+ if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
+ "autorelease pair:\n"
+ " Pop: " << *Inst << "\n"
+ << " Push: " << *Push << "\n");
+ Inst->eraseFromParent();
+ Push->eraseFromParent();
+ }
+ Push = 0;
+ break;
+ case IC_CallOrUser:
+ if (MayAutorelease(ImmutableCallSite(Inst)))
+ Push = 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!ModuleHasARC(M))
+ return false;
+
+ // Find the llvm.global_ctors variable, as the first step in
+ // identifying the global constructors. In theory, unnecessary autorelease
+ // pools could occur anywhere, but in practice it's pretty rare. Global
+ // ctors are a place where autorelease pools get inserted automatically,
+ // so it's pretty common for them to be unnecessary, and it's pretty
+ // profitable to eliminate them.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return false;
+
+ assert(GV->hasDefinitiveInitializer() &&
+ "llvm.global_ctors is uncooperative!");
+
+ bool Changed = false;
+
+ // Dig the constructor functions out of GV's initializer.
+ ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+ OI != OE; ++OI) {
+ Value *Op = *OI;
+ // llvm.global_ctors is an array of pairs where the second members
+ // are constructor functions.
+ Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ // If the user used a constructor function with the wrong signature and
+ // it got bitcasted or whatever, look the other way.
+ if (!F)
+ continue;
+ // Only look at function definitions.
+ if (F->isDeclaration())
+ continue;
+ // Only look at functions with one basic block.
+ if (llvm::next(F->begin()) != F->end())
+ continue;
+ // Ok, a single-block constructor function definition. Try to optimize it.
+ Changed |= OptimizeBB(F->begin());
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
new file mode 100644
index 000000000000..46b2de713745
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -0,0 +1,162 @@
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-aa"
+#include "ObjCARC.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
+
+namespace llvm {
+ class Function;
+ class Value;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+ return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+ // precise alias query.
+ const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+ const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+ AliasResult Result =
+ AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+ Location(SB, LocB.Size, LocB.TBAATag));
+ if (Result != MayAlias)
+ return Result;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *UA = GetUnderlyingObjCPtr(SA);
+ const Value *UB = GetUnderlyingObjCPtr(SB);
+ if (UA != SA || UB != SB) {
+ Result = AliasAnalysis::alias(Location(UA), Location(UB));
+ // We can't use MustAlias or PartialAlias results here because
+ // GetUnderlyingObjCPtr may return an offsetted pointer value.
+ if (Result == NoAlias)
+ return NoAlias;
+ }
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making
+ // a precise alias query.
+ const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+ if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+ OrLocal))
+ return true;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *U = GetUnderlyingObjCPtr(S);
+ if (U != S)
+ return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // We have nothing to do. Just chain to the next AliasAnalysis.
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefBehavior(F);
+
+ switch (GetFunctionClass(F)) {
+ case IC_NoopCast:
+ return DoesNotAccessMemory;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ switch (GetBasicInstructionClass(CS.getInstruction())) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_NoopCast:
+ case IC_AutoreleasepoolPush:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, because it updates
+ // pointers when it copies block data.
+ return NoModRef;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // TODO: Theoretically we could check for dependencies between objc_* calls
+ // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
new file mode 100644
index 000000000000..7abe995a5ce7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -0,0 +1,74 @@
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+namespace objcarc {
+
+ /// \brief This is a simple alias analysis implementation that uses knowledge
+ /// of ARC constructs to answer queries.
+ ///
+ /// TODO: This class could be generalized to know about other ObjC-specific
+ /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+ /// even though their offsets are dynamic.
+ class ObjCARCAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+ initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// This method is used when a pass implements an analysis interface through
+ /// multiple inheritance. If needed, it should override this to adjust the
+ /// this pointer as needed for the specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return static_cast<AliasAnalysis *>(this);
+ return this;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
new file mode 100644
index 000000000000..b96c64fe81de
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -0,0 +1,541 @@
+//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines late ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file mainly deals with ``contracting'' multiple lower level
+/// operations into singular higher level operations through pattern matching.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#define DEBUG_TYPE "objc-arc-contract"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+ /// \brief Late ARC optimizations
+ ///
+ /// These change the IR in a way that makes it difficult to be analyzed by
+ /// ObjCARCOpt, so it's run late.
+ class ObjCARCContract : public FunctionPass {
+ bool Changed;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ ProvenanceAnalysis PA;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// Declarations for ObjC runtime functions, for use in creating calls to
+ /// them. These are initialized lazily to avoid cluttering up the Module
+ /// with unused declarations.
+
+ /// Declaration for objc_storeStrong().
+ Constant *StoreStrongCallee;
+ /// Declaration for objc_retainAutorelease().
+ Constant *RetainAutoreleaseCallee;
+ /// Declaration for objc_retainAutoreleaseReturnValue().
+ Constant *RetainAutoreleaseRVCallee;
+
+ /// The inline asm string to insert between calls and RetainRV calls to make
+ /// the optimization work on targets which need it.
+ const MDString *RetainRVMarker;
+
+ /// The set of inserted objc_storeStrong calls. If at the end of walking the
+ /// function we have found no alloca instructions, these calls can be marked
+ /// "tail".
+ SmallPtrSet<CallInst *, 8> StoreStrongCalls;
+
+ Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainAutoreleaseCallee(Module *M);
+ Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+ bool ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited);
+
+ void ContractRelease(Instruction *Release,
+ inst_iterator &Iter);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ public:
+ static char ID;
+ ObjCARCContract() : FunctionPass(ID) {
+ initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+ return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+ if (!StoreStrongCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = { I8XX, I8X };
+
+ AttributeSet Attr = AttributeSet()
+ .addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind)
+ .addAttribute(M->getContext(), 1, Attribute::NoCapture);
+
+ StoreStrongCallee =
+ M->getOrInsertFunction(
+ "objc_storeStrong",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attr);
+ }
+ return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+ if (!RetainAutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainAutoreleaseCallee =
+ M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute);
+ }
+ return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+ if (!RetainAutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainAutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+ Attribute);
+ }
+ return RetainAutoreleaseRVCallee;
+}
+
+/// Merge an autorelease with a retain into a fused call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited) {
+ const Value *Arg = GetObjCArg(Autorelease);
+
+ // Check that there are no instructions between the retain and the autorelease
+ // (such as an autorelease_pop) which may change the count.
+ CallInst *Retain = 0;
+ if (Class == IC_AutoreleaseRV)
+ FindDependencies(RetainAutoreleaseRVDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+ else
+ FindDependencies(RetainAutoreleaseDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+
+ Visited.clear();
+ if (DependingInstructions.size() != 1) {
+ DependingInstructions.clear();
+ return false;
+ }
+
+ Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ DependingInstructions.clear();
+
+ if (!Retain ||
+ GetBasicInstructionClass(Retain) != IC_Retain ||
+ GetObjCArg(Retain) != Arg)
+ return false;
+
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
+ "retain/autorelease. Erasing: " << *Autorelease << "\n"
+ " Old Retain: "
+ << *Retain << "\n");
+
+ if (Class == IC_AutoreleaseRV)
+ Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+ else
+ Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New Retain: "
+ << *Retain << "\n");
+
+ EraseInstruction(Autorelease);
+ return true;
+}
+
+/// Attempt to merge an objc_release with a store, load, and objc_retain to form
+/// an objc_storeStrong. This can be a little tricky because the instructions
+/// don't always appear in order, and there may be unrelated intervening
+/// instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+ inst_iterator &Iter) {
+ LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+ if (!Load || !Load->isSimple()) return;
+
+ // For now, require everything to be in one basic block.
+ BasicBlock *BB = Release->getParent();
+ if (Load->getParent() != BB) return;
+
+ // Walk down to find the store and the release, which may be in either order.
+ BasicBlock::iterator I = Load, End = BB->end();
+ ++I;
+ AliasAnalysis::Location Loc = AA->getLocation(Load);
+ StoreInst *Store = 0;
+ bool SawRelease = false;
+ for (; !Store || !SawRelease; ++I) {
+ if (I == End)
+ return;
+
+ Instruction *Inst = I;
+ if (Inst == Release) {
+ SawRelease = true;
+ continue;
+ }
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ // Unrelated retains are harmless.
+ if (IsRetain(Class))
+ continue;
+
+ if (Store) {
+ // The store is the point where we're going to put the objc_storeStrong,
+ // so make sure there are no uses after it.
+ if (CanUse(Inst, Load, PA, Class))
+ return;
+ } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
+ // We are moving the load down to the store, so check for anything
+ // else which writes to the memory between the load and the store.
+ Store = dyn_cast<StoreInst>(Inst);
+ if (!Store || !Store->isSimple()) return;
+ if (Store->getPointerOperand() != Loc.Ptr) return;
+ }
+ }
+
+ Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+ // Walk up to find the retain.
+ I = Store;
+ BasicBlock::iterator Begin = BB->begin();
+ while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+ --I;
+ Instruction *Retain = I;
+ if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+ if (GetObjCArg(Retain) != New) return;
+
+ Changed = true;
+ ++NumStoreStrongs;
+
+ LLVMContext &C = Release->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+
+ Value *Args[] = { Load->getPointerOperand(), New };
+ if (Args[0]->getType() != I8XX)
+ Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+ if (Args[1]->getType() != I8X)
+ Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+ CallInst *StoreStrong =
+ CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+ Args, "", Store);
+ StoreStrong->setDoesNotThrow();
+ StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+ // We can't set the tail flag yet, because we haven't yet determined
+ // whether there are any escaping allocas. Remember this call, so that
+ // we can set the tail flag once we know it's safe.
+ StoreStrongCalls.insert(StoreStrong);
+
+ if (&*Iter == Store) ++Iter;
+ Store->eraseFromParent();
+ Release->eraseFromParent();
+ EraseInstruction(Retain);
+ if (Load->use_empty())
+ Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // These are initialized lazily.
+ StoreStrongCallee = 0;
+ RetainAutoreleaseCallee = 0;
+ RetainAutoreleaseRVCallee = 0;
+
+ // Initialize RetainRVMarker.
+ RetainRVMarker = 0;
+ if (NamedMDNode *NMD =
+ M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+ if (NMD->getNumOperands() == 1) {
+ const MDNode *N = NMD->getOperand(0);
+ if (N->getNumOperands() == 1)
+ if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+ RetainRVMarker = S;
+ }
+
+ return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+ // keyword. Be conservative if the function has variadic arguments.
+ // It seems that functions which "return twice" are also unsafe for the
+ // "tail" argument, because they are setjmp, which could need to
+ // return to an earlier stack state.
+ bool TailOkForStoreStrongs = !F.isVarArg() &&
+ !F.callsFunctionThatReturnsTwice();
+
+ // For ObjC library calls which return their argument, replace uses of the
+ // argument with uses of the call return value, if it dominates the use. This
+ // reduces register pressure.
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
+
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ break;
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+ continue;
+ break;
+ case IC_RetainRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the retainAutoreleasedReturnValue optimization,
+ // insert it now.
+ if (!RetainRVMarker)
+ break;
+ BasicBlock::iterator BBI = Inst;
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RetainRV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (&*BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator();
+ break;
+ }
+ --BBI;
+ } while (IsNoopInstruction(BBI));
+
+ if (&*BBI == GetObjCArg(Inst)) {
+ DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
+ "retainAutoreleasedReturnValue optimization.\n");
+ Changed = true;
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RetainRVMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+ CallInst::Create(IA, "", Inst);
+ }
+ decline_rv_optimization:
+ break;
+ }
+ case IC_InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null =
+ ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+
+ DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+ << " New = " << *Null << "\n");
+
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
+ }
+ continue;
+ }
+ case IC_Release:
+ ContractRelease(Inst, I);
+ continue;
+ case IC_User:
+ // Be conservative if the function has any alloca instructions.
+ // Technically we only care about escaping alloca instructions,
+ // but this is sufficient to handle some interesting cases.
+ if (isa<AllocaInst>(Inst))
+ TailOkForStoreStrongs = false;
+ continue;
+ case IC_IntrinsicUser:
+ // Remove calls to @clang.arc.use(...).
+ Inst->eraseFromParent();
+ continue;
+ default:
+ continue;
+ }
+
+ DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
+
+ // Don't use GetObjCArg because we don't want to look through bitcasts
+ // and such; to do the replacement, the argument must have type i8*.
+ const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+ for (;;) {
+ // If we're compiling bugpointed code, don't get in trouble.
+ if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+ break;
+ // Look through the uses of the pointer.
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ) {
+ Use &U = UI.getUse();
+ unsigned OperandNo = UI.getOperandNo();
+ ++UI; // Increment UI now, because we may unlink its element.
+
+ // If the call's return value dominates a use of the call's argument
+ // value, rewrite the use to use the return value. We check for
+ // reachability here because an unreachable call is considered to
+ // trivially dominate itself, which would lead us to rewriting its
+ // argument in terms of its return value, which would lead to
+ // infinite loops in GetObjCArg.
+ if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB = PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ // While we're here, rewrite all edges for this PHI, rather
+ // than just one use at a time, to minimize the number of
+ // bitcasts we emit.
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ cast<Instruction>(U.getUser()));
+ U.set(Replacement);
+ }
+ }
+ }
+
+ // If Arg is a no-op casted pointer, strip one level of casts and iterate.
+ if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+ Arg = BI->getOperand(0);
+ else if (isa<GEPOperator>(Arg) &&
+ cast<GEPOperator>(Arg)->hasAllZeroIndices())
+ Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+ else if (isa<GlobalAlias>(Arg) &&
+ !cast<GlobalAlias>(Arg)->mayBeOverridden())
+ Arg = cast<GlobalAlias>(Arg)->getAliasee();
+ else
+ break;
+ }
+ }
+
+ // If this function has no escaping allocas or suspicious vararg usage,
+ // objc_storeStrong calls can be marked with the "tail" keyword.
+ if (TailOkForStoreStrongs)
+ for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
+ E = StoreStrongCalls.end(); I != E; ++I)
+ (*I)->setTailCall();
+ StoreStrongCalls.clear();
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
new file mode 100644
index 000000000000..39bf8f38735b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -0,0 +1,128 @@
+//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file deals with early optimizations which perform certain
+/// cleanup operations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-expand"
+
+#include "ObjCARC.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class Module;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+ /// \brief Early ARC transformations.
+ class ObjCARCExpand : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ public:
+ static char ID;
+ ObjCARCExpand() : FunctionPass(ID) {
+ initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+ "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+ return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ bool Changed = false;
+
+ DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n");
+
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
+
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV: {
+ // These calls return their argument verbatim, as a low-level
+ // optimization. However, this makes high-level optimizations
+ // harder. Undo any uses of this optimization that the front-end
+ // emitted here. We'll redo them in the contract pass.
+ Changed = true;
+ Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
+ DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n"
+ " New = " << *Value << "\n");
+ Inst->replaceAllUsesWith(Value);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n");
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
new file mode 100644
index 000000000000..92d6fc4767c2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -0,0 +1,3026 @@
+//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// The optimizations performed include elimination of redundant, partially
+/// redundant, and inconsequential reference count operations, elimination of
+/// redundant weak pointer operations, and numerous minor simplifications.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-opts"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
+/// @{
+
+namespace {
+ /// \brief An associative container with fast insertion-order (deterministic)
+ /// iteration over its elements. Plus the special blot operation.
+ template<class KeyT, class ValueT>
+ class MapVector {
+ /// Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+ /// Keys and values.
+ VectorTy Vector;
+
+ public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~MapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+ I != E; ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(),
+ E = Vector.end(); I != E; ++I)
+ assert(!I->first ||
+ (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](const KeyT &Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector[Num].second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool>
+ insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(InsertPair);
+ return std::make_pair(Vector.begin() + Num, true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ const_iterator find(const KeyT &Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// This is similar to erase, but instead of removing the element from the
+ /// vector, it just zeros out the key in the vector. This leaves iterators
+ /// intact, but clients must be prepared for zeroed-out keys when iterating.
+ void blot(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+ };
+}
+
+/// @}
+///
+/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
+/// @{
+
+/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon
+/// as it finds a value with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+ if (Arg->hasOneUse()) {
+ if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+ return FindSingleUseIdentifiedObject(BC->getOperand(0));
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+ if (GEP->hasAllZeroIndices())
+ return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+ if (IsForwarding(GetBasicInstructionClass(Arg)))
+ return FindSingleUseIdentifiedObject(
+ cast<CallInst>(Arg)->getArgOperand(0));
+ if (!IsObjCIdentifiedObject(Arg))
+ return 0;
+ return Arg;
+ }
+
+ // If we found an identifiable object but it has multiple uses, but they are
+ // trivial uses, we can still consider this to be a single-use value.
+ if (IsObjCIdentifiedObject(Arg)) {
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+ return 0;
+ }
+
+ return Arg;
+ }
+
+ return 0;
+}
+
+/// \brief Test whether the given retainable object pointer escapes.
+///
+/// This differs from regular escape analysis in that a use as an
+/// argument to a call is not considered an escape.
+///
+static bool DoesRetainableObjPtrEscape(const User *Ptr) {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
+
+ // Walk the def-use chains.
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Ptr);
+ // If Ptr has any operands add them as well.
+ for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
+ ++I) {
+ Worklist.push_back(*I);
+ }
+
+ // Ensure we do not visit any value twice.
+ SmallPtrSet<const Value *, 8> VisitedSet;
+
+ do {
+ const Value *V = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const User *UUser = *UI;
+
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+
+ // Special - Use by a call (callee or argument) is not considered
+ // to be an escape.
+ switch (GetBasicInstructionClass(UUser)) {
+ case IC_StoreWeak:
+ case IC_InitWeak:
+ case IC_StoreStrong:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV: {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
+ "arguments. Pointer Escapes!\n");
+ // These special functions make copies of their pointer arguments.
+ return true;
+ }
+ case IC_IntrinsicUser:
+ // Use by the use intrinsic is not an escape.
+ continue;
+ case IC_User:
+ case IC_None:
+ // Use by an instruction which copies the value is an escape if the
+ // result is an escape.
+ if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+ isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+
+ if (VisitedSet.insert(UUser)) {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
+ "Ptr escapes if result escapes. Adding to list.\n");
+ Worklist.push_back(UUser);
+ } else {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
+ "\n");
+ }
+ continue;
+ }
+ // Use by a load is not an escape.
+ if (isa<LoadInst>(UUser))
+ continue;
+ // Use by a store is not an escape if the use is the address.
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+ if (V != SI->getValueOperand())
+ continue;
+ break;
+ default:
+ // Regular calls and other stuff are not considered escapes.
+ continue;
+ }
+ // Otherwise, conservatively assume an escape.
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+ return true;
+ }
+ } while (!Worklist.empty());
+
+ // No escapes found.
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+ return false;
+}
+
+/// @}
+///
+/// \defgroup ARCOpt ARC Optimization.
+/// @{
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
+
+STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets, "Number of return value forwarding "
+ "retain+autoreleaes eliminated");
+STATISTIC(NumRRs, "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+
+namespace {
+ /// \enum Sequence
+ ///
+ /// \brief A sequence of states that a pointer may go through in which an
+ /// objc_retain and objc_release are actually needed.
+ enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x).
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
+ S_Use, ///< any use of x.
+ S_Stop, ///< like S_Release, but code motion is stopped.
+ S_Release, ///< objc_release(x).
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
+ };
+
+ raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
+ switch (S) {
+ case S_None:
+ return OS << "S_None";
+ case S_Retain:
+ return OS << "S_Retain";
+ case S_CanRelease:
+ return OS << "S_CanRelease";
+ case S_Use:
+ return OS << "S_Use";
+ case S_Release:
+ return OS << "S_Release";
+ case S_MovableRelease:
+ return OS << "S_MovableRelease";
+ case S_Stop:
+ return OS << "S_Stop";
+ }
+ llvm_unreachable("Unknown sequence type.");
+ }
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ if (A > B) std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+namespace {
+ /// \brief Unidirectional information about either a
+ /// retain-decrement-use-release sequence or release-use-decrement-retain
+ /// reverese sequence.
+ struct RRInfo {
+ /// After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
+
+ /// True of the objc_release calls are all marked with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// If the Calls are objc_release calls and they all have a
+ /// clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// The set of optimal insert positions for moving calls in the opposite
+ /// sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ RRInfo() :
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+
+ void clear();
+ };
+}
+
+void RRInfo::clear() {
+ KnownSafe = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = 0;
+ Calls.clear();
+ ReverseInsertPts.clear();
+}
+
+namespace {
+ /// \brief This class summarizes several per-pointer runtime properties which
+ /// are propogated through the flow graph.
+ class PtrState {
+ /// True if the reference count is known to be incremented.
+ bool KnownPositiveRefCount;
+
+ /// True of we've seen an opportunity for partial RR elimination, such as
+ /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+ bool Partial;
+
+ /// The current position in the sequence.
+ Sequence Seq : 8;
+
+ public:
+ /// Unidirectional information about the current sequence.
+ ///
+ /// TODO: Encapsulate this better.
+ RRInfo RRI;
+
+ PtrState() : KnownPositiveRefCount(false), Partial(false),
+ Seq(S_None) {}
+
+ void SetKnownPositiveRefCount() {
+ KnownPositiveRefCount = true;
+ }
+
+ void ClearKnownPositiveRefCount() {
+ KnownPositiveRefCount = false;
+ }
+
+ bool HasKnownPositiveRefCount() const {
+ return KnownPositiveRefCount;
+ }
+
+ void SetSeq(Sequence NewSeq) {
+ Seq = NewSeq;
+ }
+
+ Sequence GetSeq() const {
+ return Seq;
+ }
+
+ void ClearSequenceProgress() {
+ ResetSequenceProgress(S_None);
+ }
+
+ void ResetSequenceProgress(Sequence NewSeq) {
+ Seq = NewSeq;
+ Partial = false;
+ RRI.clear();
+ }
+
+ void Merge(const PtrState &Other, bool TopDown);
+ };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
+
+ // If we're not in a sequence (anymore), drop all associated state.
+ if (Seq == S_None) {
+ Partial = false;
+ RRI.clear();
+ } else if (Partial || Other.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ ClearSequenceProgress();
+ } else {
+ // Conservatively merge the ReleaseMetadata information.
+ if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+ RRI.ReleaseMetadata = 0;
+
+ RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
+ RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
+ Other.RRI.IsTailCallRelease;
+ RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ I = Other.RRI.ReverseInsertPts.begin(),
+ E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+ Partial |= RRI.ReverseInsertPts.insert(*I);
+ }
+}
+
+namespace {
+ /// \brief Per-BasicBlock state.
+ class BBState {
+ /// The number of unique control paths from the entry which can reach this
+ /// block.
+ unsigned TopDownPathCount;
+
+ /// The number of unique control paths to exits from this block.
+ unsigned BottomUpPathCount;
+
+ /// A type for PerPtrTopDown and PerPtrBottomUp.
+ typedef MapVector<const Value *, PtrState> MapTy;
+
+ /// The top-down traversal uses this to record information known about a
+ /// pointer at the bottom of each block.
+ MapTy PerPtrTopDown;
+
+ /// The bottom-up traversal uses this to record information known about a
+ /// pointer at the top of each block.
+ MapTy PerPtrBottomUp;
+
+ /// Effective predecessors of the current block ignoring ignorable edges and
+ /// ignored backedges.
+ SmallVector<BasicBlock *, 2> Preds;
+ /// Effective successors of the current block ignoring ignorable edges and
+ /// ignored backedges.
+ SmallVector<BasicBlock *, 2> Succs;
+
+ public:
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+ typedef MapTy::iterator ptr_iterator;
+ typedef MapTy::const_iterator ptr_const_iterator;
+
+ ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ ptr_const_iterator top_down_ptr_begin() const {
+ return PerPtrTopDown.begin();
+ }
+ ptr_const_iterator top_down_ptr_end() const {
+ return PerPtrTopDown.end();
+ }
+
+ ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+ ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ ptr_const_iterator bottom_up_ptr_begin() const {
+ return PerPtrBottomUp.begin();
+ }
+ ptr_const_iterator bottom_up_ptr_end() const {
+ return PerPtrBottomUp.end();
+ }
+
+ /// Mark this block as being an entry block, which has one path from the
+ /// entry by definition.
+ void SetAsEntry() { TopDownPathCount = 1; }
+
+ /// Mark this block as being an exit block, which has one path to an exit by
+ /// definition.
+ void SetAsExit() { BottomUpPathCount = 1; }
+
+ PtrState &getPtrTopDownState(const Value *Arg) {
+ return PerPtrTopDown[Arg];
+ }
+
+ PtrState &getPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp[Arg];
+ }
+
+ void clearBottomUpPointers() {
+ PerPtrBottomUp.clear();
+ }
+
+ void clearTopDownPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void InitFromPred(const BBState &Other);
+ void InitFromSucc(const BBState &Other);
+ void MergePred(const BBState &Other);
+ void MergeSucc(const BBState &Other);
+
+ /// Return the number of possible unique paths from an entry to an exit
+ /// which pass through this block. This is only valid after both the
+ /// top-down and bottom-up traversals are complete.
+ unsigned GetAllPathCount() const {
+ assert(TopDownPathCount != 0);
+ assert(BottomUpPathCount != 0);
+ return TopDownPathCount * BottomUpPathCount;
+ }
+
+ // Specialized CFG utilities.
+ typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
+ edge_iterator pred_begin() { return Preds.begin(); }
+ edge_iterator pred_end() { return Preds.end(); }
+ edge_iterator succ_begin() { return Succs.begin(); }
+ edge_iterator succ_end() { return Succs.end(); }
+
+ void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
+ void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
+
+ bool isExit() const { return Succs.empty(); }
+ };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+ PerPtrTopDown = Other.PerPtrTopDown;
+ TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+ PerPtrBottomUp = Other.PerPtrBottomUp;
+ BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// The top-down traversal uses this to merge information about predecessors to
+/// form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+ // Other.TopDownPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ TopDownPathCount += Other.TopDownPathCount;
+
+ // Check for overflow. If we have overflow, fall back to conservative
+ // behavior.
+ if (TopDownPathCount < Other.TopDownPathCount) {
+ clearTopDownPointers();
+ return;
+ }
+
+ // For each entry in the other set, if our set has an entry with the same key,
+ // merge the entries. Otherwise, copy the entry and merge it with an empty
+ // entry.
+ for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+ ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/true);
+ }
+
+ // For each entry in our set, if the other set doesn't have an entry with the
+ // same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = top_down_ptr_begin(),
+ ME = top_down_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// The bottom-up traversal uses this to merge information about successors to
+/// form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+ // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ BottomUpPathCount += Other.BottomUpPathCount;
+
+ // Check for overflow. If we have overflow, fall back to conservative
+ // behavior.
+ if (BottomUpPathCount < Other.BottomUpPathCount) {
+ clearBottomUpPointers();
+ return;
+ }
+
+ // For each entry in the other set, if our set has an entry with the
+ // same key, merge the entries. Otherwise, copy the entry and merge
+ // it with an empty entry.
+ for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+ ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/false);
+ }
+
+ // For each entry in our set, if the other set doesn't have an entry
+ // with the same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = bottom_up_ptr_begin(),
+ ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+// Only enable ARC Annotations if we are building a debug version of
+// libObjCARCOpts.
+#ifndef NDEBUG
+#define ARC_ANNOTATIONS
+#endif
+
+// Define some macros along the lines of DEBUG and some helper functions to make
+// it cleaner to create annotations in the source code and to no-op when not
+// building in debug mode.
+#ifdef ARC_ANNOTATIONS
+
+#include "llvm/Support/CommandLine.h"
+
+/// Enable/disable ARC sequence annotations.
+static cl::opt<bool>
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+
+/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
+/// instruction so that we can track backwards when post processing via the llvm
+/// arc annotation processor tool. If the function is an
+static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
+ Value *Ptr) {
+ MDString *Hash = 0;
+
+ // If pointer is a result of an instruction and it does not have a source
+ // MDNode it, attach a new MDNode onto it. If pointer is a result of
+ // an instruction and does have a source MDNode attached to it, return a
+ // reference to said Node. Otherwise just return 0.
+ if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
+ MDNode *Node;
+ if (!(Node = Inst->getMetadata(NodeId))) {
+ // We do not have any node. Generate and attatch the hash MDString to the
+ // instruction.
+
+ // We just use an MDString to ensure that this metadata gets written out
+ // of line at the module level and to provide a very simple format
+ // encoding the information herein. Both of these makes it simpler to
+ // parse the annotations by a simple external program.
+ std::string Str;
+ raw_string_ostream os(Str);
+ os << "(" << Inst->getParent()->getParent()->getName() << ",%"
+ << Inst->getName() << ")";
+
+ Hash = MDString::get(Inst->getContext(), os.str());
+ Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
+ } else {
+ // We have a node. Grab its hash and return it.
+ assert(Node->getNumOperands() == 1 &&
+ "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
+ Hash = cast<MDString>(Node->getOperand(0));
+ }
+ } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
+ << ")";
+ Hash = MDString::get(Arg->getContext(), os.str());
+ }
+
+ return Hash;
+}
+
+static std::string SequenceToString(Sequence A) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << A;
+ return os.str();
+}
+
+/// Helper function to change a Sequence into a String object using our overload
+/// for raw_ostream so we only have printing code in one location.
+static MDString *SequenceToMDString(LLVMContext &Context,
+ Sequence A) {
+ return MDString::get(Context, SequenceToString(A));
+}
+
+/// A simple function to generate a MDNode which describes the change in state
+/// for Value *Ptr caused by Instruction *Inst.
+static void AppendMDNodeToInstForPtr(unsigned NodeId,
+ Instruction *Inst,
+ Value *Ptr,
+ MDString *PtrSourceMDNodeID,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ MDNode *Node = 0;
+ Value *tmp[3] = {PtrSourceMDNodeID,
+ SequenceToMDString(Inst->getContext(),
+ OldSeq),
+ SequenceToMDString(Inst->getContext(),
+ NewSeq)};
+ Node = MDNode::get(Inst->getContext(),
+ ArrayRef<Value*>(tmp, 3));
+
+ Inst->setMetadata(NodeId, Node);
+}
+
+/// Add to the beginning of the basic block llvm.ptr.annotations which show the
+/// state of a pointer at the entrance to a basic block.
+static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Add to the end of the basic block llvm.ptr.annotations which show the state
+/// of the pointer at the bottom of the basic block.
+static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Adds a source annotation to pointer and a state change annotation to Inst
+/// referencing the source annotation and the old/new state of pointer.
+static void GenerateARCAnnotation(unsigned InstMDId,
+ unsigned PtrMDId,
+ Instruction *Inst,
+ Value *Ptr,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ if (EnableARCAnnotations) {
+ // First generate the source annotation on our pointer. This will return an
+ // MDString* if Ptr actually comes from an instruction implying we can put
+ // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
+ // then we know that our pointer is from an Argument so we put a reference
+ // to the argument number.
+ //
+ // The point of this is to make it easy for the
+ // llvm-arc-annotation-processor tool to cross reference where the source
+ // pointer is in the LLVM IR since the LLVM IR parser does not submit such
+ // information via debug info for backends to use (since why would anyone
+ // need such a thing from LLVM IR besides in non standard cases
+ // [i.e. this]).
+ MDString *SourcePtrMDNode =
+ AppendMDNodeToSourcePtr(PtrMDId, Ptr);
+ AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
+ NewSeq);
+ }
+}
+
+// The actual interface for accessing the above functionality is defined via
+// some simple macros which are defined below. We do this so that the user does
+// not need to pass in what metadata id is needed resulting in cleaner code and
+// additionally since it provides an easy way to conditionally no-op all
+// annotation support in a non-debug build.
+
+/// Use this macro to annotate a sequence state change when processing
+/// instructions bottom up,
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+/// Use this macro to annotate a sequence state change when processing
+/// instructions top down.
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+
+#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
+ do { \
+ if (EnableARCAnnotations) { \
+ for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
+ E = (_states)._direction##_ptr_end(); I != E; ++I) { \
+ Value *Ptr = const_cast<Value*>(I->first); \
+ Sequence Seq = I->second.GetSeq(); \
+ GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ } \
+ } \
+} while (0)
+
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
+ Entrance, bottom_up)
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+ Terminator, bottom_up)
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+ Entrance, top_down)
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+ Terminator, top_down)
+
+#else // !ARC_ANNOTATION
+// If annotations are off, noop.
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
+#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
+#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
+#endif // !ARC_ANNOTATION
+
+namespace {
+ /// \brief The main ARC optimization pass.
+ class ObjCARCOpt : public FunctionPass {
+ bool Changed;
+ ProvenanceAnalysis PA;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// Declarations for ObjC runtime functions, for use in creating calls to
+ /// them. These are initialized lazily to avoid cluttering up the Module
+ /// with unused declarations.
+
+ /// Declaration for ObjC runtime function
+ /// objc_retainAutoreleasedReturnValue.
+ Constant *RetainRVCallee;
+ /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
+ Constant *AutoreleaseRVCallee;
+ /// Declaration for ObjC runtime function objc_release.
+ Constant *ReleaseCallee;
+ /// Declaration for ObjC runtime function objc_retain.
+ Constant *RetainCallee;
+ /// Declaration for ObjC runtime function objc_retainBlock.
+ Constant *RetainBlockCallee;
+ /// Declaration for ObjC runtime function objc_autorelease.
+ Constant *AutoreleaseCallee;
+
+ /// Flags which determine whether each of the interesting runtine functions
+ /// is in fact used in the current function.
+ unsigned UsedInThisFunction;
+
+ /// The Metadata Kind for clang.imprecise_release metadata.
+ unsigned ImpreciseReleaseMDKind;
+
+ /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+ unsigned CopyOnEscapeMDKind;
+
+ /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+ unsigned NoObjCARCExceptionsMDKind;
+
+#ifdef ARC_ANNOTATIONS
+ /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
+ unsigned ARCAnnotationBottomUpMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
+ unsigned ARCAnnotationTopDownMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
+ unsigned ARCAnnotationProvenanceSourceMDKind;
+#endif // ARC_ANNOATIONS
+
+ Constant *getRetainRVCallee(Module *M);
+ Constant *getAutoreleaseRVCallee(Module *M);
+ Constant *getReleaseCallee(Module *M);
+ Constant *getRetainCallee(Module *M);
+ Constant *getRetainBlockCallee(Module *M);
+ Constant *getAutoreleaseCallee(Module *M);
+
+ bool IsRetainBlockOptimizable(const Instruction *Inst);
+
+ void OptimizeRetainCall(Function &F, Instruction *Retain);
+ bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+ void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+ InstructionClass &Class);
+ bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
+ InstructionClass &Class);
+ void OptimizeIndividualCalls(Function &F);
+
+ void CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const;
+ bool VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates);
+ bool VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains);
+ bool VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates);
+ bool VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases);
+ bool Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M);
+
+ bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M,
+ SmallVector<Instruction *, 4> &NewRetains,
+ SmallVector<Instruction *, 4> &NewReleases,
+ SmallVector<Instruction *, 8> &DeadInsts,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ Value *Arg,
+ bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated);
+
+ bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M);
+
+ void OptimizeWeakCalls(Function &F);
+
+ bool OptimizeSequences(Function &F);
+
+ void OptimizeReturns(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual void releaseMemory();
+
+ public:
+ static char ID;
+ ObjCARCOpt() : FunctionPass(ID) {
+ initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+ return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ObjCARCAliasAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ // ARC optimization doesn't currently split critical edges.
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+ // Without the magic metadata tag, we have to assume this might be an
+ // objc_retainBlock call inserted to convert a block pointer to an id,
+ // in which case it really is needed.
+ if (!Inst->getMetadata(CopyOnEscapeMDKind))
+ return false;
+
+ // If the pointer "escapes" (not including being used in a call),
+ // the copy may be needed.
+ if (DoesRetainableObjPtrEscape(Inst))
+ return false;
+
+ // Otherwise, it's not needed.
+ return true;
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attribute);
+ }
+ return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+ if (!AutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ AutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+ Attribute);
+ }
+ return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+ if (!ReleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ ReleaseCallee =
+ M->getOrInsertFunction(
+ "objc_release",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+ if (!RetainCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainCallee =
+ M->getOrInsertFunction(
+ "objc_retain",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+ if (!RetainBlockCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ RetainBlockCallee =
+ M->getOrInsertFunction(
+ "objc_retainBlock",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ AttributeSet());
+ }
+ return RetainBlockCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+ if (!AutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ AutoreleaseCallee =
+ M->getOrInsertFunction(
+ "objc_autorelease",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return AutoreleaseCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
+ if (!Call) return;
+ if (Call->getParent() != Retain->getParent()) return;
+
+ // Check that the call is next to the retain.
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue..
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
+ "objc_retain => objc_retainAutoreleasedReturnValue"
+ " since the operand is a return value.\n"
+ " Old: "
+ << *Retain << "\n");
+
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New: "
+ << *Retain << "\n");
+}
+
+/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
+/// not a return value. Or, if it can be paired with an
+/// objc_autoreleaseReturnValue, delete the pair and return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+ // Check for the argument being from an immediately preceding call or invoke.
+ const Value *Arg = GetObjCArg(RetainRV);
+ ImmutableCallSite CS(Arg);
+ if (const Instruction *Call = CS.getInstruction()) {
+ if (Call->getParent() == RetainRV->getParent()) {
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock *RetainRVParent = RetainRV->getParent();
+ if (II->getNormalDest() == RetainRVParent) {
+ BasicBlock::const_iterator I = RetainRVParent->begin();
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
+ }
+ }
+
+ // Check for being preceded by an objc_autoreleaseReturnValue on the same
+ // pointer. In this case, we can delete the pair.
+ BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ if (I != Begin) {
+ do --I; while (I != Begin && IsNoopInstruction(I));
+ if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+ GetObjCArg(I) == Arg) {
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
+ << " Erasing " << *RetainRV
+ << "\n");
+
+ EraseInstruction(I);
+ EraseInstruction(RetainRV);
+ return true;
+ }
+ }
+
+ // Turn it to a plain objc_retain.
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
+ "objc_retainAutoreleasedReturnValue => "
+ "objc_retain since the operand is not a return value.\n"
+ " Old: "
+ << *RetainRV << "\n");
+
+ cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New: "
+ << *RetainRV << "\n");
+
+ return false;
+}
+
+/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
+/// used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+ InstructionClass &Class) {
+ // Check for a return of the pointer value.
+ const Value *Ptr = GetObjCArg(AutoreleaseRV);
+ SmallVector<const Value *, 2> Users;
+ Users.push_back(Ptr);
+ do {
+ Ptr = Users.pop_back_val();
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ if (isa<BitCastInst>(I))
+ Users.push_back(I);
+ }
+ } while (!Users.empty());
+
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
+ "objc_autoreleaseReturnValue => "
+ "objc_autorelease since its operand is not used as a return "
+ "value.\n"
+ " Old: "
+ << *AutoreleaseRV << "\n");
+
+ CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
+ AutoreleaseRVCI->
+ setCalledFunction(getAutoreleaseCallee(F.getParent()));
+ AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
+ Class = IC_Autorelease;
+
+ DEBUG(dbgs() << " New: "
+ << *AutoreleaseRV << "\n");
+
+}
+
+// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
+// calls.
+//
+// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
+// does not escape (following the rules of block escaping), strength reduce the
+// objc_retainBlock to an objc_retain.
+//
+// TODO: If an objc_retainBlock call is dominated period by a previous
+// objc_retainBlock call, strength reduce the objc_retainBlock to an
+// objc_retain.
+bool
+ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
+ InstructionClass &Class) {
+ assert(GetBasicInstructionClass(Inst) == Class);
+ assert(IC_RetainBlock == Class);
+
+ // If we can not optimize Inst, return false.
+ if (!IsRetainBlockOptimizable(Inst))
+ return false;
+
+ CallInst *RetainBlock = cast<CallInst>(Inst);
+ RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
+ // Remove copy_on_escape metadata.
+ RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
+ Class = IC_Retain;
+
+ return true;
+}
+
+/// Visit each call, one at a time, and make simplifications without doing any
+/// additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ // Reset all the flags in preparation for recomputing them.
+ UsedInThisFunction = 0;
+
+ // Visit all objc_* calls in F.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
+ << Class << "; " << *Inst << "\n");
+
+ switch (Class) {
+ default: break;
+
+ // Delete no-op casts. These function calls have special semantics, but
+ // the semantics are entirely implemented via lowering in the front-end,
+ // so by the time they reach the optimizer, they are just no-op calls
+ // which return their argument.
+ //
+ // There are gray areas here, as the ability to cast reference-counted
+ // pointers to raw void* and back allows code to break ARC assumptions,
+ // however these are currently considered to be unimportant.
+ case IC_NoopCast:
+ Changed = true;
+ ++NumNoops;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
+ " " << *Inst << "\n");
+ EraseInstruction(Inst);
+ continue;
+
+ // If the pointer-to-weak-pointer is null, it's undefined behavior.
+ case IC_StoreWeak:
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained:
+ case IC_InitWeak:
+ case IC_DestroyWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(0))) {
+ Changed = true;
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ llvm::Value *NewValue = UndefValue::get(CI->getType());
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+ "pointer-to-weak-pointer is undefined behavior.\n"
+ " Old = " << *CI <<
+ "\n New = " <<
+ *NewValue << "\n");
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_CopyWeak:
+ case IC_MoveWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(0)) ||
+ IsNullOrUndef(CI->getArgOperand(1))) {
+ Changed = true;
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+
+ llvm::Value *NewValue = UndefValue::get(CI->getType());
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+ "pointer-to-weak-pointer is undefined behavior.\n"
+ " Old = " << *CI <<
+ "\n New = " <<
+ *NewValue << "\n");
+
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_RetainBlock:
+ // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+ // onto the objc_retain peephole optimizations. Otherwise break.
+ if (!OptimizeRetainBlockCall(F, Inst, Class))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ OptimizeRetainCall(F, Inst);
+ break;
+ case IC_RetainRV:
+ if (OptimizeRetainRVCall(F, Inst))
+ continue;
+ break;
+ case IC_AutoreleaseRV:
+ OptimizeAutoreleaseRVCall(F, Inst, Class);
+ break;
+ }
+
+ // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+ if (IsAutorelease(Class) && Inst->use_empty()) {
+ CallInst *Call = cast<CallInst>(Inst);
+ const Value *Arg = Call->getArgOperand(0);
+ Arg = FindSingleUseIdentifiedObject(Arg);
+ if (Arg) {
+ Changed = true;
+ ++NumAutoreleases;
+
+ // Create the declaration lazily.
+ LLVMContext &C = Inst->getContext();
+ CallInst *NewCall =
+ CallInst::Create(getReleaseCallee(F.getParent()),
+ Call->getArgOperand(0), "", Call);
+ NewCall->setMetadata(ImpreciseReleaseMDKind,
+ MDNode::get(C, ArrayRef<Value *>()));
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
+ "objc_autorelease(x) with objc_release(x) since x is "
+ "otherwise unused.\n"
+ " Old: " << *Call <<
+ "\n New: " <<
+ *NewCall << "\n");
+
+ EraseInstruction(Call);
+ Inst = NewCall;
+ Class = IC_Release;
+ }
+ }
+
+ // For functions which can never be passed stack arguments, add
+ // a tail keyword.
+ if (IsAlwaysTail(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
+ " to function since it can never be passed stack args: " << *Inst <<
+ "\n");
+ cast<CallInst>(Inst)->setTailCall();
+ }
+
+ // Ensure that functions that can never have a "tail" keyword due to the
+ // semantics of ARC truly do not do so.
+ if (IsNeverTail(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
+ "keyword from function: " << *Inst <<
+ "\n");
+ cast<CallInst>(Inst)->setTailCall(false);
+ }
+
+ // Set nounwind as needed.
+ if (IsNoThrow(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
+ " class. Setting nounwind on: " << *Inst << "\n");
+ cast<CallInst>(Inst)->setDoesNotThrow();
+ }
+
+ if (!IsNoopOnNull(Class)) {
+ UsedInThisFunction |= 1 << Class;
+ continue;
+ }
+
+ const Value *Arg = GetObjCArg(Inst);
+
+ // ARC calls with null are no-ops. Delete them.
+ if (IsNullOrUndef(Arg)) {
+ Changed = true;
+ ++NumNoops;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
+ " null are no-ops. Erasing: " << *Inst << "\n");
+ EraseInstruction(Inst);
+ continue;
+ }
+
+ // Keep track of which of retain, release, autorelease, and retain_block
+ // are actually present in this function.
+ UsedInThisFunction |= 1 << Class;
+
+ // If Arg is a PHI, and one or more incoming values to the
+ // PHI are null, and the call is control-equivalent to the PHI, and there
+ // are no relevant side effects between the PHI and the call, the call
+ // could be pushed up to just those paths with non-null incoming values.
+ // For now, don't bother splitting critical edges for this.
+ SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+ Worklist.push_back(std::make_pair(Inst, Arg));
+ do {
+ std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+ Inst = Pair.first;
+ Arg = Pair.second;
+
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN) continue;
+
+ // Determine if the PHI has any null operands, or any incoming
+ // critical edges.
+ bool HasNull = false;
+ bool HasCriticalEdges = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (IsNullOrUndef(Incoming))
+ HasNull = true;
+ else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+ .getNumSuccessors() != 1) {
+ HasCriticalEdges = true;
+ break;
+ }
+ }
+ // If we have null operands and no critical edges, optimize.
+ if (!HasCriticalEdges && HasNull) {
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Check that there is nothing that cares about the reference
+ // count between the call and the phi.
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainBlock:
+ // These can always be moved up.
+ break;
+ case IC_Release:
+ // These can't be moved across things that care about the retain
+ // count.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_Autorelease:
+ // These can't be moved across autorelease pool scope boundaries.
+ FindDependencies(AutoreleasePoolBoundary, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_RetainRV:
+ case IC_AutoreleaseRV:
+ // Don't move these; the RV optimization depends on the autoreleaseRV
+ // being tail called, and the retainRV being immediately after a call
+ // (which might still happen if we get lucky with codegen layout, but
+ // it's not worth taking the chance).
+ continue;
+ default:
+ llvm_unreachable("Invalid dependence flavor");
+ }
+
+ if (DependingInstructions.size() == 1 &&
+ *DependingInstructions.begin() == PN) {
+ Changed = true;
+ ++NumPartialNoops;
+ // Clone the call into each predecessor that has a non-null value.
+ CallInst *CInst = cast<CallInst>(Inst);
+ Type *ParamTy = CInst->getArgOperand(0)->getType();
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (!IsNullOrUndef(Incoming)) {
+ CallInst *Clone = cast<CallInst>(CInst->clone());
+ Value *Op = PN->getIncomingValue(i);
+ Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+ if (Op->getType() != ParamTy)
+ Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+ Clone->setArgOperand(0, Op);
+ Clone->insertBefore(InsertPos);
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+ << *CInst << "\n"
+ " And inserting "
+ "clone at " << *InsertPos << "\n");
+ Worklist.push_back(std::make_pair(Clone, Incoming));
+ }
+ }
+ // Erase the original call.
+ DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
+ EraseInstruction(CInst);
+ continue;
+ }
+ }
+ } while (!Worklist.empty());
+ }
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// Check for critical edges, loop boundaries, irreducible control flow, or
+/// other CFG structures where moving code across the edge would result in it
+/// being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const {
+ // If any top-down local-use or possible-dec has a succ which is earlier in
+ // the sequence, forget it.
+ for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I)
+ switch (I->second.GetSeq()) {
+ default: break;
+ case S_Use: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ switch (SuccSSeq) {
+ case S_None:
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ continue;
+ }
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ break;
+ }
+ case S_CanRelease: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ switch (SuccSSeq) {
+ case S_None: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ continue;
+ }
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ break;
+ }
+ }
+}
+
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
+ DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
+ "releases (i.e. a release pair)\n");
+ NestingDetected = true;
+ }
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+ ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
+ S.ResetSequenceProgress(NewSeq);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+ S.SetKnownPositiveRefCount();
+ break;
+ }
+ case IC_RetainBlock:
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.SetKnownPositiveRefCount();
+
+ Sequence OldSeq = S.GetSeq();
+ switch (OldSeq) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV)
+ Retains[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
+ return NestingDetected;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.ClearKnownPositiveRefCount();
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.SetSeq(S_Use);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ } else if (Seq == S_Release && IsUser(Class)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
+ assert(S.RRI.ReverseInsertPts.empty());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ S.SetSeq(S_Use);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ }
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each successor to compute the initial state
+ // for the current block.
+ BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end());
+ if (SI != SE) {
+ const BasicBlock *Succ = *SI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.InitFromSucc(I->second);
+ ++SI;
+ for (; SI != SE; ++SI) {
+ Succ = *SI;
+ I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.MergeSucc(I->second);
+ }
+ }
+
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+
+ // Visit all the instructions, bottom-up.
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+ Instruction *Inst = llvm::prior(I);
+
+ // Invoke instructions are visited as part of their successors (below).
+ if (isa<InvokeInst>(Inst))
+ continue;
+
+ DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+
+ NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+ }
+
+ // If there's a predecessor with an invoke, visit the invoke as if it were
+ // part of this block, since we can't insert code after an invoke in its own
+ // block, and we don't want to split critical edges.
+ for (BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end()); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
+ NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
+ }
+
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
+ S.ResetSequenceProgress(S_Retain);
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
+ S.RRI.Calls.insert(Inst);
+ }
+
+ S.SetKnownPositiveRefCount();
+
+ // A retain can be a potential use; procede to the generic checking
+ // code below.
+ break;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.ClearKnownPositiveRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.ClearKnownPositiveRefCount();
+ switch (Seq) {
+ case S_Retain:
+ S.SetSeq(S_CanRelease);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ S.SetSeq(S_Use);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
+ }
+ break;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each predecessor to compute the initial state
+ // for the current block.
+ BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end());
+ if (PI != PE) {
+ const BasicBlock *Pred = *PI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.InitFromPred(I->second);
+ ++PI;
+ for (; PI != PE; ++PI) {
+ Pred = *PI;
+ I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.MergePred(I->second);
+ }
+ }
+
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+
+ // Visit all the instructions, top-down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ Instruction *Inst = I;
+
+ DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+
+ NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+ }
+
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+
+ CheckForCFGHazards(BB, BBStates, MyStates);
+ return NestingDetected;
+}
+
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
+ unsigned NoObjCARCExceptionsMDKind,
+ DenseMap<const BasicBlock *, BBState> &BBStates) {
+ /// The visited set, for doing DFS walks.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+
+ // Functions always have exactly one entry block, and we don't have
+ // any other block that we treat like an entry block.
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ BBState &MyStates = BBStates[EntryBB];
+ MyStates.SetAsEntry();
+ TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+ SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ BasicBlock *CurrBB = SuccStack.back().first;
+ TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
+ succ_iterator SE(TI, false);
+
+ while (SuccStack.back().second != SE) {
+ BasicBlock *SuccBB = *SuccStack.back().second++;
+ if (Visited.insert(SuccBB)) {
+ TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
+ SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBState &SuccStates = BBStates[SuccBB];
+ SuccStates.addPred(CurrBB);
+ OnStack.insert(SuccBB);
+ goto dfs_next_succ;
+ }
+
+ if (!OnStack.count(SuccBB)) {
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBStates[SuccBB].addPred(CurrBB);
+ }
+ }
+ OnStack.erase(CurrBB);
+ PostOrder.push_back(CurrBB);
+ SuccStack.pop_back();
+ } while (!SuccStack.empty());
+
+ Visited.clear();
+
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ // Functions may have many exits, and there also blocks which we treat
+ // as exits due to ignored edges.
+ SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *ExitBB = I;
+ BBState &MyStates = BBStates[ExitBB];
+ if (!MyStates.isExit())
+ continue;
+
+ MyStates.SetAsExit();
+
+ PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
+ while (PredStack.back().second != PE) {
+ BasicBlock *BB = *PredStack.back().second++;
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
+ goto reverse_dfs_next_succ;
+ }
+ }
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+ }
+ }
+}
+
+// Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
+ NoObjCARCExceptionsMDKind,
+ BBStates);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
+ bool BottomUpNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
+
+ // Use reverse-postorder for top-down.
+ bool TopDownNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
+
+ return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M) {
+ Type *ArgTy = Arg->getType();
+ Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+
+ // Insert the new retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = ReleasesToMove.ReverseInsertPts.begin(),
+ PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call =
+ CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
+ Call->setDoesNotThrow();
+ Call->setTailCall();
+
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
+ << "\n"
+ " At insertion point: " << *InsertPt
+ << "\n");
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = RetainsToMove.ReverseInsertPts.begin(),
+ PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
+
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
+ << "\n"
+ " At insertion point: " << *InsertPt
+ << "\n");
+ }
+
+ // Delete the original retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = RetainsToMove.Calls.begin(),
+ AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRetain = *AI;
+ Retains.blot(OrigRetain);
+ DeadInsts.push_back(OrigRetain);
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
+ "\n");
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = ReleasesToMove.Calls.begin(),
+ AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRelease = *AI;
+ Releases.erase(OrigRelease);
+ DeadInsts.push_back(OrigRelease);
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
+ << "\n");
+ }
+}
+
+bool
+ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M,
+ SmallVector<Instruction *, 4> &NewRetains,
+ SmallVector<Instruction *, 4> &NewReleases,
+ SmallVector<Instruction *, 8> &DeadInsts,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ Value *Arg,
+ bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated) {
+ // If a pair happens in a region where it is known that the reference count
+ // is already incremented, we can similarly ignore possible decrements.
+ bool KnownSafeTD = true, KnownSafeBU = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ // This is an iterative process so that we connect multiple releases
+ // to multiple retains if needed.
+ unsigned OldDelta = 0;
+ unsigned NewDelta = 0;
+ unsigned OldCount = 0;
+ unsigned NewCount = 0;
+ bool FirstRelease = true;
+ for (;;) {
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+ Instruction *NewRetain = *NI;
+ MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ assert(It != Retains.end());
+ const RRInfo &NewRetainRRI = It->second;
+ KnownSafeTD &= NewRetainRRI.KnownSafe;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewRetainRRI.Calls.begin(),
+ LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewRetainRelease = *LI;
+ DenseMap<Value *, RRInfo>::const_iterator Jt =
+ Releases.find(NewRetainRelease);
+ if (Jt == Releases.end())
+ return false;
+ const RRInfo &NewRetainReleaseRRI = Jt->second;
+ assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+ if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+ OldDelta -=
+ BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // Merge the ReleaseMetadata and IsTailCallRelease values.
+ if (FirstRelease) {
+ ReleasesToMove.ReleaseMetadata =
+ NewRetainReleaseRRI.ReleaseMetadata;
+ ReleasesToMove.IsTailCallRelease =
+ NewRetainReleaseRRI.IsTailCallRelease;
+ FirstRelease = false;
+ } else {
+ if (ReleasesToMove.ReleaseMetadata !=
+ NewRetainReleaseRRI.ReleaseMetadata)
+ ReleasesToMove.ReleaseMetadata = 0;
+ if (ReleasesToMove.IsTailCallRelease !=
+ NewRetainReleaseRRI.IsTailCallRelease)
+ ReleasesToMove.IsTailCallRelease = false;
+ }
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+ RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+ NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ }
+ NewReleases.push_back(NewRetainRelease);
+ }
+ }
+ }
+ NewRetains.clear();
+ if (NewReleases.empty()) break;
+
+ // Back the other way.
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+ Instruction *NewRelease = *NI;
+ DenseMap<Value *, RRInfo>::const_iterator It =
+ Releases.find(NewRelease);
+ assert(It != Releases.end());
+ const RRInfo &NewReleaseRRI = It->second;
+ KnownSafeBU &= NewReleaseRRI.KnownSafe;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewReleaseRRI.Calls.begin(),
+ LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewReleaseRetain = *LI;
+ MapVector<Value *, RRInfo>::const_iterator Jt =
+ Retains.find(NewReleaseRetain);
+ if (Jt == Retains.end())
+ return false;
+ const RRInfo &NewReleaseRetainRRI = Jt->second;
+ assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ unsigned PathCount =
+ BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+ OldDelta += PathCount;
+ OldCount += PathCount;
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+ RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+ PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ NewDelta += PathCount;
+ NewCount += PathCount;
+ }
+ }
+ NewRetains.push_back(NewReleaseRetain);
+ }
+ }
+ }
+ NewReleases.clear();
+ if (NewRetains.empty()) break;
+ }
+
+ // If the pointer is known incremented or nested, we can safely delete the
+ // pair regardless of what's between them.
+ if (KnownSafeTD || KnownSafeBU) {
+ RetainsToMove.ReverseInsertPts.clear();
+ ReleasesToMove.ReverseInsertPts.clear();
+ NewCount = 0;
+ } else {
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ return false;
+ }
+
+ // Determine whether the original call points are balanced in the retain and
+ // release calls through the program. If not, conservatively don't touch
+ // them.
+ // TODO: It's theoretically possible to do code motion in this case, as
+ // long as the existing imbalances are maintained.
+ if (OldDelta != 0)
+ return false;
+
+ Changed = true;
+ assert(OldCount != 0 && "Unreachable code?");
+ NumRRs += OldCount - NewCount;
+ // Set to true if we completely removed any RR pairs.
+ AnyPairsCompletelyEliminated = NewCount == 0;
+
+ // We can move calls!
+ return true;
+}
+
+/// Identify pairings between the retains and releases, and delete and/or move
+/// them.
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M) {
+ bool AnyPairsCompletelyEliminated = false;
+ RRInfo RetainsToMove;
+ RRInfo ReleasesToMove;
+ SmallVector<Instruction *, 4> NewRetains;
+ SmallVector<Instruction *, 4> NewReleases;
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ // Visit each retain.
+ for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end(); I != E; ++I) {
+ Value *V = I->first;
+ if (!V) continue; // blotted
+
+ Instruction *Retain = cast<Instruction>(V);
+
+ DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
+ << "\n");
+
+ Value *Arg = GetObjCArg(Retain);
+
+ // If the object being released is in static or stack storage, we know it's
+ // not being managed by ObjC reference counting, so we can delete pairs
+ // regardless of what possible decrements or uses lie between them.
+ bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ if (GV->isConstant())
+ KnownSafe = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ NewRetains.push_back(Retain);
+ bool PerformMoveCalls =
+ ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
+ NewReleases, DeadInsts, RetainsToMove,
+ ReleasesToMove, Arg, KnownSafe,
+ AnyPairsCompletelyEliminated);
+
+#ifdef ARC_ANNOTATIONS
+ // Do not move calls if ARC annotations are requested. If we were to move
+ // calls in this case, we would not be able
+ PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
+#endif // ARC_ANNOTATIONS
+
+ if (PerformMoveCalls) {
+ // Ok, everything checks out and we're all set. Let's move/delete some
+ // code!
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+ Retains, Releases, DeadInsts, M);
+ }
+
+ // Clean up state for next retain.
+ NewReleases.clear();
+ NewRetains.clear();
+ RetainsToMove.clear();
+ ReleasesToMove.clear();
+ }
+
+ // Now that we're done moving everything, we can delete the newly dead
+ // instructions, as we no longer need them as insert points.
+ while (!DeadInsts.empty())
+ EraseInstruction(DeadInsts.pop_back_val());
+
+ return AnyPairsCompletelyEliminated;
+}
+
+/// Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+ // itself because it uses AliasAnalysis and we need to do provenance
+ // queries instead.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
+ "\n");
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+ continue;
+
+ // Delete objc_loadWeak calls with no users.
+ if (Class == IC_LoadWeak && Inst->use_empty()) {
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // TODO: For now, just look for an earlier available version of this value
+ // within the same block. Theoretically, we could do memdep-style non-local
+ // analysis too, but that would want caching. A better approach would be to
+ // use the technique that EarlyCSE uses.
+ inst_iterator Current = llvm::prior(I);
+ BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ for (BasicBlock::iterator B = CurrentBB->begin(),
+ J = Current.getInstructionIterator();
+ J != B; --J) {
+ Instruction *EarlierInst = &*llvm::prior(J);
+ InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+ switch (EarlierClass) {
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained: {
+ // If this is loading from the same pointer, replace this load's value
+ // with that one.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall);
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_StoreWeak:
+ case IC_InitWeak: {
+ // If this is storing to the same pointer and has the same size etc.
+ // replace this load's value with the stored value.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_MoveWeak:
+ case IC_CopyWeak:
+ // TOOD: Grab the copied value.
+ goto clobbered;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ case IC_IntrinsicUser:
+ case IC_User:
+ // Weak pointers are only modified through the weak entry points
+ // (and arbitrary calls, which could call the weak entry points).
+ break;
+ default:
+ // Anything else could modify the weak pointer.
+ goto clobbered;
+ }
+ }
+ clobbered:;
+ }
+
+ // Then, for each destroyWeak with an alloca operand, check to see if
+ // the alloca and all its users can be zapped.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_DestroyWeak)
+ continue;
+
+ CallInst *Call = cast<CallInst>(Inst);
+ Value *Arg = Call->getArgOperand(0);
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ++UI) {
+ const Instruction *UserInst = cast<Instruction>(*UI);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ case IC_DestroyWeak:
+ continue;
+ default:
+ goto done;
+ }
+ }
+ Changed = true;
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ) {
+ CallInst *UserInst = cast<CallInst>(*UI++);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ // These functions return their second argument.
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
+ break;
+ case IC_DestroyWeak:
+ // No return value.
+ break;
+ default:
+ llvm_unreachable("alloca really is used!");
+ }
+ UserInst->eraseFromParent();
+ }
+ Alloca->eraseFromParent();
+ done:;
+ }
+ }
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
+
+}
+
+/// Identify program paths which execute sequences of retains and releases which
+/// can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+ /// Releases, Retains - These are used to store the results of the main flow
+ /// analysis. These use Value* as the key instead of Instruction* so that the
+ /// map stays valid when we get around to rewriting code and calls get
+ /// replaced by arguments.
+ DenseMap<Value *, RRInfo> Releases;
+ MapVector<Value *, RRInfo> Retains;
+
+ /// This is used during the traversal of the function to track the
+ /// states for each identified object at each block.
+ DenseMap<const BasicBlock *, BBState> BBStates;
+
+ // Analyze the CFG of the function, and all instructions.
+ bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+ // Transform.
+ return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+ NestingDetected;
+}
+
+/// Check if there is a dependent call earlier that does not have anything in
+/// between the Retain and the call that can affect the reference count of their
+/// shared pointer argument. Note that Retain need not be in BB.
+static bool
+HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
+ DepInsts, Visited, PA);
+ if (DepInsts.size() != 1)
+ return false;
+
+ CallInst *Call =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+ // Check that the pointer is the return value of the call.
+ if (!Call || Arg != Call)
+ return false;
+
+ // Check that the call is a regular call.
+ InstructionClass Class = GetBasicInstructionClass(Call);
+ if (Class != IC_CallOrUser && Class != IC_Call)
+ return false;
+
+ return true;
+}
+
+/// Find a dependent retain that precedes the given autorelease for which there
+/// is nothing in between the two instructions that can affect the ref count of
+/// Arg.
+static CallInst *
+FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
+ Instruction *Autorelease,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(CanChangeRetainCount, Arg,
+ BB, Autorelease, DepInsts, Visited, PA);
+ if (DepInsts.size() != 1)
+ return 0;
+
+ CallInst *Retain =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+ // Check that we found a retain with the same argument.
+ if (!Retain ||
+ !IsRetain(GetBasicInstructionClass(Retain)) ||
+ GetObjCArg(Retain) != Arg) {
+ return 0;
+ }
+
+ return Retain;
+}
+
+/// Look for an ``autorelease'' instruction dependent on Arg such that there are
+/// no instructions dependent on Arg that need a positive ref count in between
+/// the autorelease and the ret.
+static CallInst *
+FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
+ ReturnInst *Ret,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &V,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ BB, Ret, DepInsts, V, PA);
+ if (DepInsts.size() != 1)
+ return 0;
+
+ CallInst *Autorelease =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ if (!Autorelease)
+ return 0;
+ InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+ if (!IsAutorelease(AutoreleaseClass))
+ return 0;
+ if (GetObjCArg(Autorelease) != Arg)
+ return 0;
+
+ return Autorelease;
+}
+
+/// Look for this pattern:
+/// \code
+/// %call = call i8* @something(...)
+/// %2 = call i8* @objc_retain(i8* %call)
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+/// \endcode
+/// And delete the retain and autorelease.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+ if (!F.getReturnType()->isPointerTy())
+ return;
+
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = FI;
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+
+ if (!Ret)
+ continue;
+
+ const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+
+ // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+ // dependent on Arg such that there are no instructions dependent on Arg
+ // that need a positive ref count in between the autorelease and Ret.
+ CallInst *Autorelease =
+ FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
+ DependingInstructions, Visited,
+ PA);
+ if (Autorelease) {
+ DependingInstructions.clear();
+ Visited.clear();
+
+ CallInst *Retain =
+ FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+ DependingInstructions, Visited, PA);
+ if (Retain) {
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Check that there is nothing that can affect the reference count
+ // between the retain and the call. Note that Retain need not be in BB.
+ if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
+ Visited, PA)) {
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
+ << "\n Erasing: "
+ << *Autorelease << "\n");
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
+ }
+ }
+ }
+
+ DependingInstructions.clear();
+ Visited.clear();
+ }
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // Identify the imprecise release metadata kind.
+ ImpreciseReleaseMDKind =
+ M.getContext().getMDKindID("clang.imprecise_release");
+ CopyOnEscapeMDKind =
+ M.getContext().getMDKindID("clang.arc.copy_on_escape");
+ NoObjCARCExceptionsMDKind =
+ M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+#ifdef ARC_ANNOTATIONS
+ ARCAnnotationBottomUpMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
+ ARCAnnotationTopDownMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.topdown");
+ ARCAnnotationProvenanceSourceMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
+#endif // ARC_ANNOTATIONS
+
+ // Intuitively, objc_retain and others are nocapture, however in practice
+ // they are not, because they return their argument value. And objc_release
+ // calls finalizers which can have arbitrary side effects.
+
+ // These are initialized lazily.
+ RetainRVCallee = 0;
+ AutoreleaseRVCallee = 0;
+ ReleaseCallee = 0;
+ RetainCallee = 0;
+ RetainBlockCallee = 0;
+ AutoreleaseCallee = 0;
+
+ return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+
+ DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // This pass performs several distinct transformations. As a compile-time aid
+ // when compiling code that isn't ObjC, skip these if the relevant ObjC
+ // library functions aren't declared.
+
+ // Preliminary optimizations. This also computs UsedInThisFunction.
+ OptimizeIndividualCalls(F);
+
+ // Optimizations for weak pointers.
+ if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+ (1 << IC_LoadWeakRetained) |
+ (1 << IC_StoreWeak) |
+ (1 << IC_InitWeak) |
+ (1 << IC_CopyWeak) |
+ (1 << IC_MoveWeak) |
+ (1 << IC_DestroyWeak)))
+ OptimizeWeakCalls(F);
+
+ // Optimizations for retain+release pairs.
+ if (UsedInThisFunction & ((1 << IC_Retain) |
+ (1 << IC_RetainRV) |
+ (1 << IC_RetainBlock)))
+ if (UsedInThisFunction & (1 << IC_Release))
+ // Run OptimizeSequences until it either stops making changes or
+ // no retain+release pair nesting is detected.
+ while (OptimizeSequences(F)) {}
+
+ // Optimizations if objc_autorelease is used.
+ if (UsedInThisFunction & ((1 << IC_Autorelease) |
+ (1 << IC_AutoreleaseRV)))
+ OptimizeReturns(F);
+
+ DEBUG(dbgs() << "\n");
+
+ return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+ PA.clear();
+}
+
+/// @}
+///
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
new file mode 100644
index 000000000000..03e12d4fd763
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -0,0 +1,252 @@
+//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+ const InstructionClass Class) {
+ switch (Class) {
+ case IC_Retain:
+ return OS << "IC_Retain";
+ case IC_RetainRV:
+ return OS << "IC_RetainRV";
+ case IC_RetainBlock:
+ return OS << "IC_RetainBlock";
+ case IC_Release:
+ return OS << "IC_Release";
+ case IC_Autorelease:
+ return OS << "IC_Autorelease";
+ case IC_AutoreleaseRV:
+ return OS << "IC_AutoreleaseRV";
+ case IC_AutoreleasepoolPush:
+ return OS << "IC_AutoreleasepoolPush";
+ case IC_AutoreleasepoolPop:
+ return OS << "IC_AutoreleasepoolPop";
+ case IC_NoopCast:
+ return OS << "IC_NoopCast";
+ case IC_FusedRetainAutorelease:
+ return OS << "IC_FusedRetainAutorelease";
+ case IC_FusedRetainAutoreleaseRV:
+ return OS << "IC_FusedRetainAutoreleaseRV";
+ case IC_LoadWeakRetained:
+ return OS << "IC_LoadWeakRetained";
+ case IC_StoreWeak:
+ return OS << "IC_StoreWeak";
+ case IC_InitWeak:
+ return OS << "IC_InitWeak";
+ case IC_LoadWeak:
+ return OS << "IC_LoadWeak";
+ case IC_MoveWeak:
+ return OS << "IC_MoveWeak";
+ case IC_CopyWeak:
+ return OS << "IC_CopyWeak";
+ case IC_DestroyWeak:
+ return OS << "IC_DestroyWeak";
+ case IC_StoreStrong:
+ return OS << "IC_StoreStrong";
+ case IC_CallOrUser:
+ return OS << "IC_CallOrUser";
+ case IC_Call:
+ return OS << "IC_Call";
+ case IC_User:
+ return OS << "IC_User";
+ case IC_IntrinsicUser:
+ return OS << "IC_IntrinsicUser";
+ case IC_None:
+ return OS << "IC_None";
+ }
+ llvm_unreachable("Unknown instruction class!");
+}
+
+InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No (mandatory) arguments.
+ if (AI == AE)
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Case("clang.arc.use", IC_IntrinsicUser)
+ .Default(IC_CallOrUser);
+
+ // One argument.
+ const Argument *A0 = AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_retain", IC_Retain)
+ .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+ .Case("objc_retainBlock", IC_RetainBlock)
+ .Case("objc_release", IC_Release)
+ .Case("objc_autorelease", IC_Autorelease)
+ .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
+ .Case("objc_retainedObject", IC_NoopCast)
+ .Case("objc_unretainedObject", IC_NoopCast)
+ .Case("objc_unretainedPointer", IC_NoopCast)
+ .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+ .Default(IC_CallOrUser);
+
+ // Argument is i8**
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
+ .Case("objc_loadWeak", IC_LoadWeak)
+ .Case("objc_destroyWeak", IC_DestroyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = AI++;
+ if (AI == AE)
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_storeWeak", IC_StoreWeak)
+ .Case("objc_initWeak", IC_InitWeak)
+ .Case("objc_storeStrong", IC_StoreStrong)
+ .Default(IC_CallOrUser);
+ // Second argument is i8**.
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_moveWeak", IC_MoveWeak)
+ .Case("objc_copyWeak", IC_CopyWeak)
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
+ .Case("llvm.arc.annotation.topdown.bbend", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
+ .Default(IC_CallOrUser);
+ }
+
+ // Anything else.
+ return IC_CallOrUser;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass
+llvm::objcarc::GetInstructionClass(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // Check for calls to special functions.
+ if (const Function *F = CI->getCalledFunction()) {
+ InstructionClass Class = GetFunctionClass(F);
+ if (Class != IC_CallOrUser)
+ return Class;
+
+ // None of the intrinsic functions do objc_release. For intrinsics, the
+ // only question is whether or not they may be users.
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+ case Intrinsic::stacksave: case Intrinsic::stackrestore:
+ case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+ case Intrinsic::objectsize: case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: case Intrinsic::invariant_end:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return IC_None;
+ default:
+ break;
+ }
+ }
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select: case Instruction::PHI:
+ case Instruction::Ret: case Instruction::Br:
+ case Instruction::Switch: case Instruction::IndirectBr:
+ case Instruction::Alloca: case Instruction::VAArg:
+ case Instruction::Add: case Instruction::FAdd:
+ case Instruction::Sub: case Instruction::FSub:
+ case Instruction::Mul: case Instruction::FMul:
+ case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+ case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+ case Instruction::And: case Instruction::Or: case Instruction::Xor:
+ case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+ case Instruction::IntToPtr: case Instruction::FCmp:
+ case Instruction::FPTrunc: case Instruction::FPExt:
+ case Instruction::FPToUI: case Instruction::FPToSI:
+ case Instruction::UIToFP: case Instruction::SIToFP:
+ case Instruction::InsertElement: case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+ return IC_User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialRetainableObjPtr(*OI))
+ return IC_User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return IC_None;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
new file mode 100644
index 000000000000..ae3c6282cf83
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -0,0 +1,177 @@
+//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
+ const Value *B) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for relations between the values on corresponding arms.
+ if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+ if (A->getCondition() == SB->getCondition())
+ return related(A->getTrueValue(), SB->getTrueValue()) ||
+ related(A->getFalseValue(), SB->getFalseValue());
+
+ // Check both arms of the Select node individually.
+ return related(A->getTrueValue(), B) ||
+ related(A->getFalseValue(), B);
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
+ const Value *B) {
+ // If the values are PHIs in the same block, we can do a more precise as well
+ // as efficient check: just check for relations between the values on
+ // corresponding edges.
+ if (const PHINode *PNB = dyn_cast<PHINode>(B))
+ if (PNB->getParent() == A->getParent()) {
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+ if (related(A->getIncomingValue(i),
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ return true;
+ return false;
+ }
+
+ // Check each unique source of the PHI node against B.
+ SmallPtrSet<const Value *, 4> UniqueSrc;
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+ const Value *PV1 = A->getIncomingValue(i);
+ if (UniqueSrc.insert(PV1) && related(PV1, B))
+ return true;
+ }
+
+ // All of the arms checked out.
+ return false;
+}
+
+/// Test if the value of P, or any value covered by its provenance, is ever
+/// stored within the function (not counting callees).
+static bool IsStoredObjCPointer(const Value *P) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Worklist;
+ Worklist.push_back(P);
+ Visited.insert(P);
+ do {
+ P = Worklist.pop_back_val();
+ for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+ UI != UE; ++UI) {
+ const User *Ur = *UI;
+ if (isa<StoreInst>(Ur)) {
+ if (UI.getOperandNo() == 0)
+ // The pointer is stored.
+ return true;
+ // The pointed is stored through.
+ continue;
+ }
+ if (isa<CallInst>(Ur))
+ // The pointer is passed as an argument, ignore this.
+ continue;
+ if (isa<PtrToIntInst>(P))
+ // Assume the worst.
+ return true;
+ if (Visited.insert(Ur))
+ Worklist.push_back(Ur);
+ }
+ } while (!Worklist.empty());
+
+ // Everything checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A,
+ const Value *B) {
+ // Skip past provenance pass-throughs.
+ A = GetUnderlyingObjCPtr(A);
+ B = GetUnderlyingObjCPtr(B);
+
+ // Quick check.
+ if (A == B)
+ return true;
+
+ // Ask regular AliasAnalysis, for a first approximation.
+ switch (AA->alias(A, B)) {
+ case AliasAnalysis::NoAlias:
+ return false;
+ case AliasAnalysis::MustAlias:
+ case AliasAnalysis::PartialAlias:
+ return true;
+ case AliasAnalysis::MayAlias:
+ break;
+ }
+
+ bool AIsIdentified = IsObjCIdentifiedObject(A);
+ bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+ // An ObjC-Identified object can't alias a load if it is never locally stored.
+ if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return IsStoredObjCPointer(A);
+ if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
+ }
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ }
+
+ // Special handling for PHI and Select.
+ if (const PHINode *PN = dyn_cast<PHINode>(A))
+ return relatedPHI(PN, B);
+ if (const PHINode *PN = dyn_cast<PHINode>(B))
+ return relatedPHI(PN, A);
+ if (const SelectInst *S = dyn_cast<SelectInst>(A))
+ return relatedSelect(S, B);
+ if (const SelectInst *S = dyn_cast<SelectInst>(B))
+ return relatedSelect(S, A);
+
+ // Conservative.
+ return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A,
+ const Value *B) {
+ // Begin by inserting a conservative value into the map. If the insertion
+ // fails, we have the answer already. If it succeeds, leave it there until we
+ // compute the real answer to guard against recursive queries.
+ if (A > B) std::swap(A, B);
+ std::pair<CachedResultsTy::iterator, bool> Pair =
+ CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ bool Result = relatedCheck(A, B);
+ CachedResults[ValuePairTy(A, B)] = Result;
+ return Result;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
new file mode 100644
index 000000000000..ec449fd8e747
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -0,0 +1,80 @@
+//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class Value;
+ class AliasAnalysis;
+ class PHINode;
+ class SelectInst;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief This is similar to BasicAliasAnalysis, and it uses many of the same
+/// techniques, except it uses special ObjC-specific reasoning about pointer
+/// relationships.
+///
+/// In this context ``Provenance'' is defined as the history of an object's
+/// ownership. Thus ``Provenance Analysis'' is defined by using the notion of
+/// an ``independent provenance source'' of a pointer to determine whether or
+/// not two pointers have the same provenance source and thus could
+/// potentially be related.
+class ProvenanceAnalysis {
+ AliasAnalysis *AA;
+
+ typedef std::pair<const Value *, const Value *> ValuePairTy;
+ typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+ CachedResultsTy CachedResults;
+
+ bool relatedCheck(const Value *A, const Value *B);
+ bool relatedSelect(const SelectInst *A, const Value *B);
+ bool relatedPHI(const PHINode *A, const Value *B);
+
+ void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+ ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+
+public:
+ ProvenanceAnalysis() {}
+
+ void setAA(AliasAnalysis *aa) { AA = aa; }
+
+ AliasAnalysis *getAA() const { return AA; }
+
+ bool related(const Value *A, const Value *B);
+
+ void clear() {
+ CachedResults.clear();
+ }
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 000000000000..a09730864051
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,97 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass. This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+ struct ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function& F);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.setPreservesCFG();
+ }
+
+ };
+}
+
+char ADCE::ID = 0;
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
+
+bool ADCE::runOnFunction(Function& F) {
+ SmallPtrSet<Instruction*, 128> alive;
+ SmallVector<Instruction*, 128> worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+ isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ isa<LandingPadInst>(I.getInstructionIterator()) ||
+ I->mayHaveSideEffects()) {
+ alive.insert(I.getInstructionIterator());
+ worklist.push_back(I.getInstructionIterator());
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!worklist.empty()) {
+ Instruction* curr = worklist.pop_back_val();
+ for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+ OI != OE; ++OI)
+ if (Instruction* Inst = dyn_cast<Instruction>(OI))
+ if (alive.insert(Inst))
+ worklist.push_back(Inst);
+ }
+
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the worklist vector here for memory efficiency.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (!alive.count(I.getInstructionIterator())) {
+ worklist.push_back(I.getInstructionIterator());
+ I->dropAllReferences();
+ }
+
+ for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ E = worklist.end(); I != E; ++I) {
+ ++NumRemoved;
+ (*I)->eraseFromParent();
+ }
+
+ return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+ return new ADCE();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 000000000000..e755008808f6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,152 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm. The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches. If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts. This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks. I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be. When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+ struct BlockPlacement : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BlockPlacement() : FunctionPass(ID) {
+ initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<ProfileInfo>();
+ //AU.addPreserved<ProfileInfo>(); // Does this work?
+ }
+ private:
+ /// PI - The profile information that is guiding us.
+ ///
+ ProfileInfo *PI;
+
+ /// NumMovedBlocks - Every time we move a block, increment this counter.
+ ///
+ unsigned NumMovedBlocks;
+
+ /// PlacedBlocks - Every time we place a block, remember it so we don't get
+ /// into infinite loops.
+ std::set<BasicBlock*> PlacedBlocks;
+
+ /// InsertPos - This an iterator to the next place we want to insert a
+ /// block.
+ Function::iterator InsertPos;
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void PlaceBlocks(BasicBlock *BB);
+ };
+}
+
+char BlockPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+ PI = &getAnalysis<ProfileInfo>();
+
+ NumMovedBlocks = 0;
+ InsertPos = F.begin();
+
+ // Recursively place all blocks.
+ PlaceBlocks(F.begin());
+
+ PlacedBlocks.clear();
+ NumMoved += NumMovedBlocks;
+ return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+ assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+ PlacedBlocks.insert(BB);
+
+ // Place the specified block.
+ if (&*InsertPos != BB) {
+ // Use splice to move the block into the right place. This avoids having to
+ // remove the block from the function then readd it, which causes a bunch of
+ // symbol table traffic that is entirely pointless.
+ Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+ Blocks.splice(InsertPos, Blocks, BB);
+
+ ++NumMovedBlocks;
+ } else {
+ // This block is already in the right place, we don't have to do anything.
+ ++InsertPos;
+ }
+
+ // Keep placing successors until we run out of ones to place. Note that this
+ // loop is very inefficient (N^2) for blocks with many successors, like switch
+ // statements. FIXME!
+ while (1) {
+ // Okay, now place any unplaced successors.
+ succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+ // Scan for the first unplaced successor.
+ for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+ /*empty*/;
+ if (SI == E) return; // No more successors to place.
+
+ double MaxExecutionCount = PI->getExecutionCount(*SI);
+ BasicBlock *MaxSuccessor = *SI;
+
+ // Scan for more frequently executed successors
+ for (; SI != E; ++SI)
+ if (!PlacedBlocks.count(*SI)) {
+ double Count = PI->getExecutionCount(*SI);
+ if (Count > MaxExecutionCount ||
+ // Prefer to not disturb the code.
+ (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+ MaxExecutionCount = Count;
+ MaxSuccessor = *SI;
+ }
+ }
+
+ // Now that we picked the maximally executed successor, place it.
+ PlaceBlocks(MaxSuccessor);
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 000000000000..015fd2e6e6fc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,2012 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
+
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> DisableSelectToBranch(
+ "disable-cgp-select2branch", cl::Hidden, cl::init(false),
+ cl::desc("Disable select to branch conversion."));
+
+namespace {
+ class CodeGenPrepare : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
+ DominatorTree *DT;
+ ProfileInfo *PFI;
+
+ /// CurInstIterator - As we scan instructions optimizing them, this is the
+ /// next instruction to optimize. Xforms that can invalidate this should
+ /// update it.
+ BasicBlock::iterator CurInstIterator;
+
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
+ /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+ /// be updated.
+ bool ModifiedDT;
+
+ /// OptSize - True if optimizing for size.
+ bool OptSize;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F);
+
+ const char *getPassName() const { return "CodeGen Prepare"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ private:
+ bool EliminateFallThrough(Function &F);
+ bool EliminateMostlyEmptyBlocks(Function &F);
+ bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void EliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool OptimizeBlock(BasicBlock &BB);
+ bool OptimizeInst(Instruction *I);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
+ bool OptimizeInlineAsmInst(CallInst *CS);
+ bool OptimizeCallInst(CallInst *CI);
+ bool MoveExtToFormExtLoad(Instruction *I);
+ bool OptimizeExtUses(Instruction *I);
+ bool OptimizeSelectInst(SelectInst *SI);
+ bool DupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool PlaceDbgValues(Function &F);
+ };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+ return new CodeGenPrepare(TLI);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ bool EverMadeChange = false;
+
+ ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ PFI = getAnalysisIfAvailable<ProfileInfo>();
+ OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<unsigned int, unsigned int> &BypassWidths =
+ TLI->getBypassSlowDivWidths();
+ for (Function::iterator I = F.begin(); I != F.end(); I++)
+ EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
+ }
+
+ // Eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= PlaceDbgValues(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(); I != F.end(); ) {
+ BasicBlock *BB = I++;
+ MadeChange |= OptimizeBlock(*BB);
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ SunkAddrs.clear();
+
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ SmallPtrSet<BasicBlock*, 8> WorkList;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+ MadeChange |= ConstantFoldTerminator(BB, true);
+ if (!MadeChange) continue;
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ // Delete the dead blocks and any of their dead successors.
+ MadeChange |= !WorkList.empty();
+ while (!WorkList.empty()) {
+ BasicBlock *BB = *WorkList.begin();
+ WorkList.erase(BB);
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+ DeleteDeadBlock(BB);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
+
+ if (MadeChange)
+ ModifiedDT = true;
+ EverMadeChange |= MadeChange;
+ }
+
+ if (ModifiedDT && DT)
+ DT->DT->recalculate(F);
+
+ return EverMadeChange;
+}
+
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ }
+ }
+ return Changed;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch. Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel. Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+ bool MadeChange = false;
+ // Note that this intentionally skips the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ continue;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI;
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ continue;
+
+ if (!CanMergeBlocks(BB, DestBB))
+ continue;
+
+ EliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ BasicBlock::const_iterator BBI = BB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != DestBB || !isa<PHINode>(User))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (User->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(User))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN) return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ BBI = DestBB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ const Value *V1 = PN->getIncomingValueForBlock(Pred);
+ const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2) return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = DestBB->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN->removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ PN->addIncoming(InVal, *PI);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ if (DT && !ModifiedDT) {
+ BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
+ BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+ DT->changeImmediateDominator(DestBB, NewIDom);
+ DT->eraseNode(BB);
+ }
+ if (PFI) {
+ PFI->replaceAllUses(BB, DestBB);
+ PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+ }
+ BB->eraseFromParent();
+ ++NumBlocksElim;
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+ // If this is a noop copy,
+ EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT)) return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
+ SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
+ DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(UI);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ InsertedCast =
+ CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+ InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ ++NumCastUses;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced. This is
+/// a clear win except on targets with multiple condition code registers
+/// (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCmp - Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ InsertedCmp =
+ CmpInst::Create(CI->getOpcode(),
+ CI->getPredicate(), CI->getOperand(0),
+ CI->getOperand(1), "", InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ ++NumCmpUses;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+ void replaceCall(Value *With) {
+ CI->replaceAllUsesWith(With);
+ CI->eraseFromParent();
+ }
+ bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
+ return false;
+ }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (OptimizeInlineAsmInst(CI))
+ return true;
+ }
+
+ // Lower all uses of llvm.objectsize.*
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ Type *ReturnTy = CI->getType();
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, ModifiedDT ? 0 : DT);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ return true;
+ }
+
+ if (II && TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
+
+ // From here on out we're working with named functions.
+ if (CI->getCalledFunction() == 0) return false;
+
+ // We'll need DataLayout from here on out.
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
+ if (!TD) return false;
+
+ // Lower all default uses of _chk calls. This is very similar
+ // to what InstCombineCalls does, but here we are only lowering calls
+ // that have the default "don't know" as the objectsize. Anything else
+ // should be left alone.
+ CodeGenPrepareFortifiedLibCalls Simplifier;
+ return Simplifier.fold(CI, TD, TLInfo);
+}
+
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+/// @endcode
+///
+/// =>
+///
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+/// @endcode
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+ if (!TLI)
+ return false;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI)
+ return false;
+
+ PHINode *PN = 0;
+ BitCastInst *BCI = 0;
+ Value *V = RI->getReturnValue();
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
+
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between the PHI and return, or that the
+ // return is the first instruction in the block.
+ if (PN) {
+ BasicBlock::iterator BI = BB->begin();
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
+ if (&*BI != RI)
+ return false;
+ } else {
+ BasicBlock::iterator BI = BB->begin();
+ while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+ if (&*BI != RI)
+ return false;
+ }
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ SmallVector<CallInst*, 4> TailCalls;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ } else {
+ SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (!VisitedBBs.insert(*PI))
+ continue;
+
+ BasicBlock::InstListType &InstList = (*PI)->getInstList();
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+ do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+ if (RI == RE)
+ continue;
+
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of the
+ // return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ ModifiedDT = Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+ Value *BaseReg;
+ Value *ScaledReg;
+ ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+ bool operator==(const ExtAddrMode& O) const {
+ return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+ (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+ (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+ }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+ AM.print(OS);
+ return OS;
+}
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs)
+ OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+ SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetLowering &TLI;
+
+ /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+ /// the memory instruction that we're computing this address for.
+ Type *AccessTy;
+ Instruction *MemoryInst;
+
+ /// AddrMode - This is the addressing mode that we're building up. This is
+ /// part of the return value of this addressing mode matching stuff.
+ ExtAddrMode &AddrMode;
+
+ /// IgnoreProfitability - This is set to true when we should not do
+ /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
+ /// always returns true.
+ bool IgnoreProfitability;
+
+ AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+ const TargetLowering &T, Type *AT,
+ Instruction *MI, ExtAddrMode &AM)
+ : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
+ IgnoreProfitability = false;
+ }
+public:
+
+ /// Match - Find the maximal addressing mode that a load/store of V can fold,
+ /// give an access type of AccessTy. This returns a list of involved
+ /// instructions in AddrModeInsts.
+ static ExtAddrMode Match(Value *V, Type *AccessTy,
+ Instruction *MemoryInst,
+ SmallVectorImpl<Instruction*> &AddrModeInsts,
+ const TargetLowering &TLI) {
+ ExtAddrMode Result;
+
+ bool Success =
+ AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+ MemoryInst, Result).MatchAddr(V, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+ return Result;
+ }
+private:
+ bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool MatchAddr(Value *V, unsigned Depth);
+ bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
+ bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+ ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter);
+ bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+};
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return MatchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = 0; Value *AddLHS = 0;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode. If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr:
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+ TLI.getPointerTy())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS) return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1LL << Scale;
+
+ return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ const DataLayout *TD = TLI.getDataLayout();
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ // Check to see if we can fold the base pointer in too.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode. If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (AddrMode.BaseGV == 0) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseGV = 0;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = 0;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = 0;
+ }
+ // Couldn't match.
+ return false;
+}
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands. If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI) {
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+ SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+ SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ const TargetLowering &TLI) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I))
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ unsigned opNo = UI.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (!IA) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+ TLI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into. If so, there is no cost to
+/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it. However, the specified instruction has multiple
+/// uses. Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = 0;
+ if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = 0;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (BaseReg == 0 && ScaledReg == 0)
+ return true;
+
+ // If all uses of this instruction are ultimately load/store/inlineasm's,
+ // check to see if their addressing modes will include this instruction. If
+ // so, we can fold it into all uses, so it doesn't matter if it has multiple
+ // uses.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these uses and see if they could
+ // *actually* fold the instruction.
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ if (!Address->getType()->isPointerTy())
+ return false;
+ Type *AddressAccessTy =
+ cast<PointerType>(Address->getType())->getElementType();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ MemoryInst, Result);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.MatchAddr(Address, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
+
+} // end anonymous namespace
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation. As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program. The problem is that isel can only
+/// see within a single block. As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ Type *AccessTy) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = 0;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ ExtAddrMode AddrMode;
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(V)) {
+ Consensus = 0;
+ break;
+ }
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+ worklist.push_back(P->getIncomingValue(i));
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode =
+ AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
+ NewAddrModeInsts, *TLI);
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
+ Consensus = V;
+ NumUsesConsensus = NumUses;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = 0;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) return false;
+
+ // Check to see if any of the instructions supersumed by this addr mode are
+ // non-local to I's BB.
+ bool AnyNonLocal = false;
+ for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ if (!AnyNonLocal) {
+ DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
+ return false;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ IRBuilder<> Builder(MemoryInst);
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse the
+ // computation.
+ Value *&SunkAddr = SunkAddrs[Addr];
+ if (SunkAddr) {
+ DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ } else {
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ Type *IntPtrTy =
+ TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
+
+ Value *Result = 0;
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType()->isPointerTy())
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+ Result = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (V->getType()->isPointerTy()) {
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ } else {
+ V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
+ }
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ if (Result == 0)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
+ }
+
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ WeakVH IterHandle(CurInstIterator);
+ BasicBlock *BB = CurInstIterator->getParent();
+
+ RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
+
+ if (IterHandle != CurInstIterator) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ } else {
+ // This address is now available for reassignment, so erase the table
+ // entry; we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
+ }
+ ++NumMemoryInsts;
+ return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+ bool MadeChange = false;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI->ParseConstraints(CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
+ }
+
+ return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+ // Look for a load being extended.
+ LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI) return false;
+
+ // If they're already in the same block, there's nothing to do.
+ if (LI->getParent() == I->getParent())
+ return false;
+
+ // If the load has other users and the truncate is not free, this probably
+ // isn't worthwhile.
+ if (!LI->hasOneUse() &&
+ TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+ !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType()))
+ return false;
+
+ // Check whether the target supports casts folded into loads.
+ unsigned LType;
+ if (isa<ZExtInst>(I))
+ LType = ISD::ZEXTLOAD;
+ else {
+ assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ LType = ISD::SEXTLOAD;
+ }
+ if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ return false;
+
+ // Move the extend into the same block as the load, so that SelectionDAG
+ // can fold it.
+ I->removeFromParent();
+ I->insertAfter(LI);
+ ++NumExtsMoved;
+ return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure non of the uses are PHI nodes.
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ TheUse = InsertedTrunc;
+ ++NumExtUses;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
+/// turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+ // FIXME: This should use the same heuristics as IfConversion to determine
+ // whether a select is better represented as a branch. This requires that
+ // branch probability metadata is preserved for the select, which is not the
+ // case currently.
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+ // If the branch is predicted right, an out of order CPU can avoid blocking on
+ // the compare. Emit cmovs on compares with a memory operand as branches to
+ // avoid stalls on the load from memory. If the compare has more than one use
+ // there's probably another cmov or setcc around so it's not worth emitting a
+ // branch.
+ if (!Cmp)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0);
+ Value *CmpOp1 = Cmp->getOperand(1);
+
+ // We check that the memory operand has one use to avoid uses of the loaded
+ // value directly after the compare, making branches unprofitable.
+ return Cmp->hasOneUse() &&
+ ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+}
+
+
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
+bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+
+ // Can we convert the 'select' to CF ?
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
+ return false;
+
+ TargetLowering::SelectSupportKind SelectKind;
+ if (VectorCond)
+ SelectKind = TargetLowering::VectorMaskSelect;
+ else if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ // Do we have efficient codegen support for this kind of 'selects' ?
+ if (TLI->isSelectSupported(SelectKind)) {
+ // We have efficient codegen support for the select instruction.
+ // Check if it is profitable to keep this 'select'.
+ if (!TLI->isPredictableSelectExpensive() ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+ }
+
+ ModifiedDT = true;
+
+ // First, we split the block containing the select into 2 blocks.
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+ BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+
+ // Create a new block serving as the landing pad for the branch.
+ BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
+ NextBlock->getParent(), NextBlock);
+
+ // Move the unconditional branch from the block with the select in it into our
+ // landing pad block.
+ StartBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(NextBlock, SmallBlock);
+
+ // Insert the real conditional branch based on the original condition.
+ BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+ PN->takeName(SI);
+ PN->addIncoming(SI->getTrueValue(), StartBlock);
+ PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+
+ // Instruct OptimizeBlock to skip to the next block.
+ CurInstIterator = StartBlock->end();
+ ++NumSelectsExpanded;
+ return true;
+}
+
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ bool MadeChange = MoveExtToFormExtLoad(I);
+ return MadeChange | OptimizeExtUses(I);
+ }
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return OptimizeCmpExpression(CI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType());
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ OptimizeInst(NC);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return OptimizeCallInst(CI);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I))
+ return OptimizeSelectInst(SI);
+
+ return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+ SunkAddrs.clear();
+ bool MadeChange = false;
+
+ CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end())
+ MadeChange |= OptimizeInst(CurInstIterator++);
+
+ MadeChange |= DupRetToEnableTailCallOpts(&BB);
+
+ return MadeChange;
+}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ Instruction *PrevNonDbgInst = NULL;
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ Instruction *Insn = BI; ++BI;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ if (!DVI) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 000000000000..d5a96eceb993
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,98 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+// * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+// * This pass has a habit of making definitions be dead. It is a good idea
+// to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+ struct ConstantPropagation : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantPropagation() : FunctionPass(ID) {
+ initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char ConstantPropagation::ID = 0;
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+
+FunctionPass *llvm::createConstantPropagationPass() {
+ return new ConstantPropagation();
+}
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+ // Initialize the worklist to all of the instructions ready to process...
+ std::set<Instruction*> WorkList;
+ for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+ WorkList.insert(&*i);
+ }
+ bool Changed = false;
+ DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+
+ while (!WorkList.empty()) {
+ Instruction *I = *WorkList.begin();
+ WorkList.erase(WorkList.begin()); // Get an element from the worklist...
+
+ if (!I->use_empty()) // Don't muck with dead instructions...
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
+ // Add all of the users of this instruction to the worklist, they might
+ // be constant propagatable now...
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ WorkList.insert(cast<Instruction>(*UI));
+
+ // Replace all of the uses of a variable with uses of the constant.
+ I->replaceAllUsesWith(C);
+
+ // Remove the dead instruction.
+ WorkList.erase(I);
+ I->eraseFromParent();
+
+ // We made a change to the function...
+ Changed = true;
+ ++NumInstKilled;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..995782e1bc6b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,321 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumDeadCases, "Number of switch cases removed");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+ bool processSwitch(SwitchInst *SI);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) {
+ initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ Value *ReplaceWith = S->getOperand(1);
+ Value *Other = S->getOperand(2);
+ if (!CI->isOne()) std::swap(ReplaceWith, Other);
+ if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+ S->replaceAllUsesWith(ReplaceWith);
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB);
+
+ // Look if the incoming value is a select with a constant but LVI tells us
+ // that the incoming value can never be that constant. In that case replace
+ // the incoming value with the other value of the select. This often allows
+ // us to remove the select later.
+ if (!V) {
+ SelectInst *SI = dyn_cast<SelectInst>(Incoming);
+ if (!SI) continue;
+
+ Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+ if (!C) continue;
+
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+ P->getIncomingBlock(i), BB) !=
+ LazyValueInfo::False)
+ continue;
+
+ DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+ V = SI->getTrueValue();
+ }
+
+ P->setIncomingValue(i, V);
+ Changed = true;
+ }
+
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Changed)
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+/// processSwitch - Simplify a switch instruction by removing cases which can
+/// never fire. If the uselessness of a case could be determined locally then
+/// constant propagation would already have figured it out. Instead, walk the
+/// predecessors and statically evaluate cases based on information available
+/// on that edge. Cases that cannot fire no matter what the incoming edge can
+/// safely be removed. If a case fires on every incoming edge then the entire
+/// switch can be removed and replaced with a branch to the case destination.
+bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ BasicBlock *BB = SI->getParent();
+
+ // If the condition was defined in same block as the switch then LazyValueInfo
+ // currently won't say anything useful about it, though in theory it could.
+ if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
+ return false;
+
+ // If the switch is unreachable then trying to improve it is a waste of time.
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ if (PB == PE) return false;
+
+ // Analyse each switch case in turn. This is done in reverse order so that
+ // removing a case doesn't cause trouble for the iteration.
+ bool Changed = false;
+ for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
+ ) {
+ ConstantInt *Case = CI.getCaseValue();
+
+ // Check to see if the switch condition is equal to/not equal to the case
+ // value on every incoming edge, equal/not equal being the same each time.
+ LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ // Is the switch condition equal to the case value?
+ LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+ Cond, Case, *PI, BB);
+ // Give up on this case if nothing is known.
+ if (Value == LazyValueInfo::Unknown) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+
+ // If this was the first edge to be visited, record that all other edges
+ // need to give the same result.
+ if (PI == PB) {
+ State = Value;
+ continue;
+ }
+
+ // If this case is known to fire for some edges and known not to fire for
+ // others then there is nothing we can do - give up.
+ if (Value != State) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+ }
+
+ if (State == LazyValueInfo::False) {
+ // This case never fires - remove it.
+ CI.getCaseSuccessor()->removePredecessor(BB);
+ SI->removeCase(CI); // Does not invalidate the iterator.
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
+ ++NumDeadCases;
+ Changed = true;
+ } else if (State == LazyValueInfo::True) {
+ // This case always fires. Arrange for the switch to be turned into an
+ // unconditional branch by replacing the switch condition with the case
+ // value.
+ SI->setCondition(Case);
+ NumDeadCases += SI->getNumCases();
+ Changed = true;
+ break;
+ }
+ }
+
+ if (Changed)
+ // If the switch has been simplified to the point where it can be replaced
+ // by a branch then do so now.
+ ConstantFoldTerminator(BB);
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ Instruction *Term = FI->getTerminator();
+ switch (Term->getOpcode()) {
+ case Instruction::Switch:
+ BBChanged |= processSwitch(cast<SwitchInst>(Term));
+ break;
+ }
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 000000000000..e8a090af40c3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,134 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead. Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadInstElimination pass implementation
+ //
+ struct DeadInstElimination : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+ DeadInstElimination() : BasicBlockPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+ Instruction *Inst = DI++;
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ Inst->eraseFromParent();
+ Changed = true;
+ ++DIEEliminated;
+ }
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DeadInstElimination::ID = 0;
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false)
+
+Pass *llvm::createDeadInstEliminationPass() {
+ return new DeadInstElimination();
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadCodeElimination pass implementation
+ //
+ struct DCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DCE() : FunctionPass(ID) {
+ initializeDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DCE::ID = 0;
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+
+bool DCE::runOnFunction(Function &F) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
+ // Start out with all of the instructions in the worklist...
+ std::vector<Instruction*> WorkList;
+ for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+ WorkList.push_back(&*i);
+
+ // Loop over the worklist finding instructions that are dead. If they are
+ // dead make them drop all of their uses, making other instructions
+ // potentially dead, and work until the worklist is empty.
+ //
+ bool MadeChange = false;
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
+ // Loop over all of the values that the instruction uses, if there are
+ // instructions being used, add them to the worklist, because they might
+ // go dead after this one is removed.
+ //
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *Used = dyn_cast<Instruction>(*OI))
+ WorkList.push_back(Used);
+
+ // Remove the instruction.
+ I->eraseFromParent();
+
+ // Remove the instruction from the worklist if it still exists in it.
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
+ WorkList.end());
+
+ MadeChange = true;
+ ++DCEEliminated;
+ }
+ }
+ return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+ return new DCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 000000000000..57432c7d71d8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,900 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal. Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+ struct DSE : public FunctionPass {
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+ const TargetLibraryInfo *TLI;
+
+ static char ID; // Pass identification, replacement for typeid
+ DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
+ initializeDSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ AA = &getAnalysis<AliasAnalysis>();
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TLI = AA->getTargetLibraryInfo();
+
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ // Only check non-dead blocks. Dead blocks may have strange pointer
+ // cycles that will confuse alias analysis.
+ if (DT->isReachableFromEntry(I))
+ Changed |= runOnBasicBlock(*I);
+
+ AA = 0; MD = 0; DT = 0;
+ return Changed;
+ }
+
+ bool runOnBasicBlock(BasicBlock &BB);
+ bool HandleFree(CallInst *F);
+ bool handleEndBlock(BasicBlock &BB);
+ void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallSetVector<Value*, 16> &DeadStackObjects);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+ };
+}
+
+char DSE::ID = 0;
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+ MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
+ SmallSetVector<Value*, 16> *ValueSet = 0) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MD.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->remove(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory? This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ if (CallSite CS = I) {
+ if (Function *F = CS.getCalledFunction()) {
+ if (TLI && TLI->has(LibFunc::strcpy) &&
+ F->getName() == TLI->getName(LibFunc::strcpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncpy) &&
+ F->getName() == TLI->getName(LibFunc::strncpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strcat) &&
+ F->getName() == TLI->getName(LibFunc::strcat)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncat) &&
+ F->getName() == TLI->getName(LibFunc::strncat)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+/// If isRemovable returns true, this function and getLocForRead completely
+/// describe the memory operations for this instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return AA.getLocation(SI);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ // memcpy/memmove/memset.
+ AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // memset/memcpy, which writes more than an i8.
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getDataLayout() == 0)
+ return AliasAnalysis::Location();
+ return Loc;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II == 0) return AliasAnalysis::Location();
+
+ switch (II->getIntrinsicID()) {
+ default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+ case Intrinsic::init_trampoline:
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // init.trampoline, which writes more than an i8.
+ if (AA.getDataLayout() == 0) return AliasAnalysis::Location();
+
+ // FIXME: We don't know the size of the trampoline, so we can't really
+ // handle it here.
+ return AliasAnalysis::Location(II->getArgOperand(0));
+ case Intrinsic::lifetime_end: {
+ uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ return AliasAnalysis::Location(II->getArgOperand(1), Len);
+ }
+ }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+ assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+ "Unknown instruction case");
+
+ // The only instructions that both read and write are the mem transfer
+ // instructions (memcpy/memmove).
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ return AA.getLocationForSource(MTI);
+ return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+ // Don't remove volatile/atomic stores.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isUnordered();
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
+
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
+ }
+
+ if (CallSite CS = I)
+ return CS.getInstruction()->use_empty();
+
+ return false;
+}
+
+
+/// isShortenable - Returns true if this instruction can be safely shortened in
+/// length.
+static bool isShortenable(Instruction *I) {
+ // Don't shorten stores for now
+ if (isa<StoreInst>(I))
+ return false;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ // Do shorten memory intrinsics.
+ return true;
+ }
+ }
+
+ // Don't shorten libcalls calls for now.
+
+ return false;
+}
+
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getDest();
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ }
+ }
+
+ CallSite CS = I;
+ // All the supported functions so far happen to have dest as their first
+ // argument.
+ return CS.getArgument(0);
+}
+
+static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
+ uint64_t Size;
+ if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo()))
+ return Size;
+ return AliasAnalysis::UnknownSize;
+}
+
+namespace {
+ enum OverwriteResult
+ {
+ OverwriteComplete,
+ OverwriteEnd,
+ OverwriteUnknown
+ };
+}
+
+/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
+/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
+static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA,
+ int64_t &EarlierOff,
+ int64_t &LaterOff) {
+ const Value *P1 = Earlier.Ptr->stripPointerCasts();
+ const Value *P2 = Later.Ptr->stripPointerCasts();
+
+ // If the start pointers are the same, we just have to compare sizes to see if
+ // the later store was larger than the earlier store.
+ if (P1 == P2) {
+ // If we don't know the sizes of either access, then we can't do a
+ // comparison.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize) {
+ // If we have no DataLayout information around, then the size of the store
+ // is inferrable from the pointee type. If they are the same type, then
+ // we know that the store is safe.
+ if (AA.getDataLayout() == 0 &&
+ Later.Ptr->getType() == Earlier.Ptr->getType())
+ return OverwriteComplete;
+
+ return OverwriteUnknown;
+ }
+
+ // Make sure that the Later size is >= the Earlier size.
+ if (Later.Size >= Earlier.Size)
+ return OverwriteComplete;
+ }
+
+ // Otherwise, we have to have size information, and the later store has to be
+ // larger than the earlier one.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize ||
+ AA.getDataLayout() == 0)
+ return OverwriteUnknown;
+
+ // Check to see if the later store is to the entire object (either a global,
+ // an alloca, or a byval argument). If so, then it clearly overwrites any
+ // other store to the same object.
+ const DataLayout *TD = AA.getDataLayout();
+
+ const Value *UO1 = GetUnderlyingObject(P1, TD),
+ *UO2 = GetUnderlyingObject(P2, TD);
+
+ // If we can't resolve the same pointers to the same object, then we can't
+ // analyze them at all.
+ if (UO1 != UO2)
+ return OverwriteUnknown;
+
+ // If the "Later" store is to a recognizable object, get its size.
+ uint64_t ObjectSize = getPointerSize(UO2, AA);
+ if (ObjectSize != AliasAnalysis::UnknownSize)
+ if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
+ return OverwriteComplete;
+
+ // Okay, we have stores to two completely different pointers. Try to
+ // decompose the pointer into a "base + constant_offset" form. If the base
+ // pointers are equal, then we can reason about the two stores.
+ EarlierOff = 0;
+ LaterOff = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+
+ // If the base pointers still differ, we have two completely different stores.
+ if (BP1 != BP2)
+ return OverwriteUnknown;
+
+ // The later store completely overlaps the earlier store if:
+ //
+ // 1. Both start at the same offset and the later one's size is greater than
+ // or equal to the earlier one's, or
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // 2. The earlier store has an offset greater than the later offset, but which
+ // still lies completely within the later store.
+ //
+ // |--earlier--|
+ // |----- later ------|
+ //
+ // We have to be careful here as *Off is signed while *.Size is unsigned.
+ if (EarlierOff >= LaterOff &&
+ Later.Size >= Earlier.Size &&
+ uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+ return OverwriteComplete;
+
+ // The other interesting case is if the later store overwrites the end of
+ // the earlier store
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // In this case we may want to trim the size of earlier to avoid generating
+ // writes to addresses which will definitely be overwritten later
+ if (LaterOff > EarlierOff &&
+ LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
+ return OverwriteEnd;
+
+ // Otherwise, they don't completely overlap.
+ return OverwriteUnknown;
+}
+
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense. Consider this case:
+///
+/// memcpy(A <- B)
+/// memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
+///
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+ const AliasAnalysis::Location &InstStoreLoc,
+ Instruction *DepWrite, AliasAnalysis &AA) {
+ // Self reads can only happen for instructions that read memory. Get the
+ // location read.
+ AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+ if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+
+ // If the read and written loc obviously don't alias, it isn't a read.
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
+
+ // Okay, 'Inst' may copy over itself. However, we can still remove a the
+ // DepWrite instruction if we can prove that it reads from the same location
+ // as Inst. This handles useful cases like:
+ // memcpy(A <- B)
+ // memcpy(A <- B)
+ // Here we don't know if A/B may alias, but we do know that B/B are must
+ // aliases, so removing the first memcpy is safe (assuming it writes <= #
+ // bytes as the second one.
+ AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+
+ // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+ // then it can't be considered dead.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Do a top-down walk on the BB.
+ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+ Instruction *Inst = BBI++;
+
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(Inst, TLI)) {
+ MadeChange |= HandleFree(F);
+ continue;
+ }
+
+ // If we find something that writes memory, get its memory dependence.
+ if (!hasMemoryWrite(Inst, TLI))
+ continue;
+
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (!InstDep.isDef() && !InstDep.isClobber())
+ continue;
+
+ // If we're storing the same value back to a pointer that we just
+ // loaded from, then the store can be removed.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad && isRemovable(SI)) {
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
+ << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
+
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI, *MD, TLI);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
+ // Figure out what location is being stored to.
+ AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (Loc.Ptr == 0)
+ continue;
+
+ while (InstDep.isDef() || InstDep.isClobber()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependent instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (DepLoc.Ptr == 0)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
+ DepWriteOffset, InstWriteOffset);
+ if (OR == OverwriteComplete) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD, TLI);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
+ } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely
+ // a good idea to shorten it
+ // Power of 2 vector writes are probably always a bad idea to optimize
+ // as any store/memset/memcpy is likely using vector instructions so
+ // shortening it to not vector size is likely to be slower
+ MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+ if (llvm::isPowerOf2_64(InstWriteOffset) ||
+ ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", "
+ << DepLoc.Size << ")"
+ << *Inst << '\n');
+
+ Value* DepWriteLength = DepIntrinsic->getLength();
+ Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
+ InstWriteOffset -
+ DepWriteOffset);
+ DepIntrinsic->setLength(TrimmedLength);
+ MadeChange = true;
+ }
+ }
+ }
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+ }
+ }
+
+ // If this block ends in a return, unwind, or unreachable, all allocas are
+ // dead at its end, which means stores to them are also dead.
+ if (BB.getTerminator()->getNumSuccessors() == 0)
+ MadeChange |= handleEndBlock(BB);
+
+ return MadeChange;
+}
+
+/// Find all blocks that will unconditionally lead to the block BB and append
+/// them to F.
+static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
+ BasicBlock *BB, DominatorTree *DT) {
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *Pred = *I;
+ if (Pred == BB) continue;
+ TerminatorInst *PredTI = Pred->getTerminator();
+ if (PredTI->getNumSuccessors() != 1)
+ continue;
+
+ if (DT->isReachableFromEntry(Pred))
+ Blocks.push_back(Pred);
+ }
+}
+
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+ bool MadeChange = false;
+
+ AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
+ SmallVector<BasicBlock *, 16> Blocks;
+ Blocks.push_back(F->getParent());
+
+ while (!Blocks.empty()) {
+ BasicBlock *BB = Blocks.pop_back_val();
+ Instruction *InstPt = BB->getTerminator();
+ if (BB == F->getParent()) InstPt = F;
+
+ MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+ while (Dep.isDef() || Dep.isClobber()) {
+ Instruction *Dependency = Dep.getInst();
+ if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
+ break;
+
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
+
+ // Check for aliasing.
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+ break;
+
+ Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency, *MD, TLI);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // Inst's old Dependency is now deleted. Compute the next dependency,
+ // which may also be dead, as in
+ // s[0] = 0;
+ // s[1] = 0; // This has just been deleted.
+ // free(s);
+ Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB);
+ }
+
+ if (Dep.isNonLocal())
+ FindUnconditionalPreds(Blocks, BB, DT);
+ }
+
+ return MadeChange;
+}
+
+namespace {
+ struct CouldRef {
+ typedef Value *argument_type;
+ const CallSite CS;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the call site touches the value.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+ return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ }
+ };
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block. Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Keep track of all of the stack objects that are dead at the end of the
+ // function.
+ SmallSetVector<Value*, 16> DeadStackObjects;
+
+ // Find all of the alloca'd pointers in the entry block.
+ BasicBlock *Entry = BB.getParent()->begin();
+ for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
+ if (isa<AllocaInst>(I))
+ DeadStackObjects.insert(I);
+
+ // Okay, so these are dead heap objects, but if the pointer never escapes
+ // then it's leaked by this function anyways.
+ else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
+ DeadStackObjects.insert(I);
+ }
+
+ // Treat byval arguments the same, stores to them are dead at the end of the
+ // function.
+ for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+ AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+ if (AI->hasByValAttr())
+ DeadStackObjects.insert(AI);
+
+ // Scan the basic block backwards
+ for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+ --BBI;
+
+ // If we find a store, check to see if it points into a dead stack value.
+ if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ SmallVector<Value *, 4> Pointers;
+ GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
+
+ // Stores to stack values are valid candidates for removal.
+ bool AllDead = true;
+ for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ E = Pointers.end(); I != E; ++I)
+ if (!DeadStackObjects.count(*I)) {
+ AllDead = false;
+ break;
+ }
+
+ if (AllDead) {
+ Instruction *Dead = BBI++;
+
+ DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
+ << *Dead << "\n Objects: ";
+ for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ E = Pointers.end(); I != E; ++I) {
+ dbgs() << **I;
+ if (llvm::next(I) != E)
+ dbgs() << ", ";
+ }
+ dbgs() << '\n');
+
+ // DCE instructions only used to calculate that store.
+ DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Remove any dead non-memory-mutating instructions.
+ if (isInstructionTriviallyDead(BBI, TLI)) {
+ Instruction *Inst = BBI++;
+ DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
+
+ if (isa<AllocaInst>(BBI)) {
+ // Remove allocas from the list of dead stack objects; there can't be
+ // any references before the definition.
+ DeadStackObjects.remove(BBI);
+ continue;
+ }
+
+ if (CallSite CS = cast<Value>(BBI)) {
+ // Remove allocation function calls from the list of dead stack objects;
+ // there can't be any references before the definition.
+ if (isAllocLikeFn(BBI, TLI))
+ DeadStackObjects.remove(BBI);
+
+ // If this call does not access memory, it can't be loading any of our
+ // pointers.
+ if (AA->doesNotAccessMemory(CS))
+ continue;
+
+ // If the call might load from any of our allocas, then any store above
+ // the call is live.
+ CouldRef Pred = { CS, AA };
+ DeadStackObjects.remove_if(Pred);
+
+ // If all of the allocas were clobbered by the call then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
+
+ continue;
+ }
+
+ AliasAnalysis::Location LoadedLoc;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ if (!L->isUnordered()) // Be conservative with atomic/volatile load
+ break;
+ LoadedLoc = AA->getLocation(L);
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ LoadedLoc = AA->getLocation(V);
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+ LoadedLoc = AA->getLocationForSource(MTI);
+ } else if (!BBI->mayReadFromMemory()) {
+ // Instruction doesn't read memory. Note that stores that weren't removed
+ // above will hit this case.
+ continue;
+ } else {
+ // Unknown inst; assume it clobbers everything.
+ break;
+ }
+
+ // Remove any allocas from the DeadPointer set that are loaded, as this
+ // makes any stores above the access live.
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+
+ // If all of the allocas were clobbered by the access then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
+ }
+
+ return MadeChange;
+}
+
+namespace {
+ struct CouldAlias {
+ typedef Value *argument_type;
+ const AliasAnalysis::Location &LoadedLoc;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ }
+ };
+}
+
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set. If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallSetVector<Value*, 16> &DeadStackObjects) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
+
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
+
+ // Remove objects that could alias LoadedLoc.
+ CouldAlias Pred = { LoadedLoc, AA };
+ DeadStackObjects.remove_if(Pred);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 000000000000..3c08634bfe22
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,628 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <deque>
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
+STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumDSE, "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+ return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SimpleValue - Instances of this struct represent available values in the
+ /// scoped hash table.
+ struct SimpleValue {
+ Instruction *Inst;
+
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+ };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+ static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+ static inline SimpleValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline SimpleValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(SimpleValue Val);
+ static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(Inst)) {
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+ if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
+ std::swap(LHS, RHS);
+
+ if (isa<OverflowingBinaryOperator>(BinOp)) {
+ // Hash the overflow behavior
+ unsigned Overflow =
+ BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
+ BinOp->hasNoUnsignedWrap() * OverflowingBinaryOperator::NoUnsignedWrap;
+ return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
+ }
+
+ return hash_combine(BinOp->getOpcode(), LHS, RHS);
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+ Value *LHS = CI->getOperand(0);
+ Value *RHS = CI->getOperand(1);
+ CmpInst::Predicate Pred = CI->getPredicate();
+ if (Inst->getOperand(0) > Inst->getOperand(1)) {
+ std::swap(LHS, RHS);
+ Pred = CI->getSwappedPredicate();
+ }
+ return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
+
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
+ return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
+ hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
+
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
+ return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
+ IVI->getOperand(1),
+ hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
+
+ assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+ isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
+ isa<ShuffleVectorInst>(Inst)) && "Invalid/unknown instruction");
+
+ // Mix in the opcode.
+ return hash_combine(Inst->getOpcode(),
+ hash_combine_range(Inst->value_op_begin(),
+ Inst->value_op_end()));
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+
+ if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+ if (LHSI->isIdenticalTo(RHSI)) return true;
+
+ // If we're not strictly identical, we still might be a commutable instruction
+ if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
+ if (!LHSBinOp->isCommutative())
+ return false;
+
+ assert(isa<BinaryOperator>(RHSI)
+ && "same opcode, but different instruction type?");
+ BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
+
+ // Check overflow attributes
+ if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
+ assert(isa<OverflowingBinaryOperator>(RHSBinOp)
+ && "same opcode, but different operator type?");
+ if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
+ LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
+ return false;
+ }
+
+ // Commuted equality
+ return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
+ LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
+ }
+ if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
+ assert(isa<CmpInst>(RHSI)
+ && "same opcode, but different instruction type?");
+ CmpInst *RHSCmp = cast<CmpInst>(RHSI);
+ // Commuted equality
+ return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
+ LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
+ LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// CallValue - Instances of this struct represent available call values in
+ /// the scoped hash table.
+ struct CallValue {
+ Instruction *Inst;
+
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (CI == 0 || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+ };
+}
+
+namespace llvm {
+ // CallValue is POD.
+ template<> struct isPodLike<CallValue> {
+ static const bool value = true;
+ };
+
+ template<> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+ };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+ assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+ "Cannot value number calls with metadata operands");
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes. It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective. It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ AllocatorTy> ScopedHTType;
+
+ /// AvailableValues - This scoped hash table contains the current values of
+ /// all of our simple scalar expressions. As we walk down the domtree, we
+ /// look to see if instructions are in this: if so, we replace them with what
+ /// we find, otherwise we insert them so that dominated values can succeed in
+ /// their lookup.
+ ScopedHTType *AvailableValues;
+
+ /// AvailableLoads - This scoped hash table contains the current values
+ /// of loads. This allows us to get efficient access to dominating loads when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is
+ /// incremented after every possibly writing memory operation, which ensures
+ /// that we only CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+ typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+ DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+ LoadHTType *AvailableLoads;
+
+ /// AvailableCalls - This scoped hash table contains the current values
+ /// of read-only call values. It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+ CallHTType *AvailableCalls;
+
+ /// CurrentGeneration - This is the current generation of the memory value.
+ unsigned CurrentGeneration;
+
+ static char ID;
+ explicit EarlyCSE() : FunctionPass(ID) {
+ initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+private:
+
+ // NodeScope - almost a POD, but needs to call the constructors for the
+ // scoped hash tables so that a new scope gets pushed on. These are RAII so
+ // that the scope gets popped when the NodeScope is destroyed.
+ class NodeScope {
+ public:
+ NodeScope(ScopedHTType *availableValues,
+ LoadHTType *availableLoads,
+ CallHTType *availableCalls) :
+ Scope(*availableValues),
+ LoadScope(*availableLoads),
+ CallScope(*availableCalls) {}
+
+ private:
+ NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION;
+ void operator=(const NodeScope&) LLVM_DELETED_FUNCTION;
+
+ ScopedHTType::ScopeTy Scope;
+ LoadHTType::ScopeTy LoadScope;
+ CallHTType::ScopeTy CallScope;
+ };
+
+ // StackNode - contains all the needed information to create a stack for
+ // doing a depth first tranversal of the tree. This includes scopes for
+ // values, loads, and calls as well as the generation. There is a child
+ // iterator so that the children do not need to be store spearately.
+ class StackNode {
+ public:
+ StackNode(ScopedHTType *availableValues,
+ LoadHTType *availableLoads,
+ CallHTType *availableCalls,
+ unsigned cg, DomTreeNode *n,
+ DomTreeNode::iterator child, DomTreeNode::iterator end) :
+ CurrentGeneration(cg), ChildGeneration(cg), Node(n),
+ ChildIter(child), EndIter(end),
+ Scopes(availableValues, availableLoads, availableCalls),
+ Processed(false) {}
+
+ // Accessors.
+ unsigned currentGeneration() { return CurrentGeneration; }
+ unsigned childGeneration() { return ChildGeneration; }
+ void childGeneration(unsigned generation) { ChildGeneration = generation; }
+ DomTreeNode *node() { return Node; }
+ DomTreeNode::iterator childIter() { return ChildIter; }
+ DomTreeNode *nextChild() {
+ DomTreeNode *child = *ChildIter;
+ ++ChildIter;
+ return child;
+ }
+ DomTreeNode::iterator end() { return EndIter; }
+ bool isProcessed() { return Processed; }
+ void process() { Processed = true; }
+
+ private:
+ StackNode(const StackNode&) LLVM_DELETED_FUNCTION;
+ void operator=(const StackNode&) LLVM_DELETED_FUNCTION;
+
+ // Members.
+ unsigned CurrentGeneration;
+ unsigned ChildGeneration;
+ DomTreeNode *Node;
+ DomTreeNode::iterator ChildIter;
+ DomTreeNode::iterator EndIter;
+ NodeScope Scopes;
+ bool Processed;
+ };
+
+ bool processNode(DomTreeNode *Node);
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+ return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+
+ // If this block has a single predecessor, then the predecessor is the parent
+ // of the domtree node and all of the live out memory values are still current
+ // in this block. If this block has multiple predecessors, then they could
+ // have invalidated the live-out memory values of our parent value. For now,
+ // just be conservative and invalidate memory if this block has multiple
+ // predecessors.
+ if (BB->getSinglePredecessor() == 0)
+ ++CurrentGeneration;
+
+ /// LastStore - Keep track of the last non-volatile store that we saw... for
+ /// as long as there in no instruction that reads memory. If we see a store
+ /// to the same location, we delete the dead store. This zaps trivial dead
+ /// stores which can occur in bitfield code among other things.
+ StoreInst *LastStore = 0;
+
+ bool Changed = false;
+
+ // See if any instructions in the block can be eliminated. If so, do it. If
+ // not, add them to AvailableValues.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ // Dead instructions should just be removed.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+ // its simpler value.
+ if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
+ DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If this is a simple instruction that we can value number, process it.
+ if (SimpleValue::canHandle(Inst)) {
+ // See if the instruction has an available value. If so, use it.
+ if (Value *V = AvailableValues->lookup(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableValues->insert(Inst, Inst);
+ continue;
+ }
+
+ // If this is a non-volatile load, process it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Ignore volatile loads.
+ if (!LI->isSimple()) {
+ LastStore = 0;
+ continue;
+ }
+
+ // If we have an available version of this load, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal =
+ AvailableLoads->lookup(Inst->getOperand(0));
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableLoads->insert(Inst->getOperand(0),
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ LastStore = 0;
+ continue;
+ }
+
+ // If this instruction may read from memory, forget LastStore.
+ if (Inst->mayReadFromMemory())
+ LastStore = 0;
+
+ // If this is a read-only call, process it.
+ if (CallValue::canHandle(Inst)) {
+ // If we have an available version of this call, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSECall;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableCalls->insert(Inst,
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ continue;
+ }
+
+ // Okay, this isn't something we can CSE at all. Check to see if it is
+ // something that could modify memory. If so, our available memory values
+ // cannot be used so bump the generation count.
+ if (Inst->mayWriteToMemory()) {
+ ++CurrentGeneration;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // We do a trivial form of DSE if there are two stores to the same
+ // location with no intervening loads. Delete the earlier store.
+ if (LastStore &&
+ LastStore->getPointerOperand() == SI->getPointerOperand()) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
+ << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = 0;
+ continue;
+ }
+
+ // Okay, we just invalidated anything we knew about loaded values. Try
+ // to salvage *something* by remembering that the stored value is a live
+ // version of the pointer. It is safe to forward from volatile stores
+ // to non-volatile loads, so we don't have to check for volatility of
+ // the store.
+ AvailableLoads->insert(SI->getPointerOperand(),
+ std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+
+ // Remember that this was the last store we saw for DSE.
+ if (SI->isSimple())
+ LastStore = SI;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+ std::deque<StackNode *> nodesToProcess;
+
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Tables that the pass uses when walking the domtree.
+ ScopedHTType AVTable;
+ AvailableValues = &AVTable;
+ LoadHTType LoadTable;
+ AvailableLoads = &LoadTable;
+ CallHTType CallTable;
+ AvailableCalls = &CallTable;
+
+ CurrentGeneration = 0;
+ bool Changed = false;
+
+ // Process the root node.
+ nodesToProcess.push_front(
+ new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
+ CurrentGeneration, DT->getRootNode(),
+ DT->getRootNode()->begin(),
+ DT->getRootNode()->end()));
+
+ // Save the current generation.
+ unsigned LiveOutGeneration = CurrentGeneration;
+
+ // Process the stack.
+ while (!nodesToProcess.empty()) {
+ // Grab the first item off the stack. Set the current generation, remove
+ // the node from the stack, and process it.
+ StackNode *NodeToProcess = nodesToProcess.front();
+
+ // Initialize class members.
+ CurrentGeneration = NodeToProcess->currentGeneration();
+
+ // Check if the node needs to be processed.
+ if (!NodeToProcess->isProcessed()) {
+ // Process the node.
+ Changed |= processNode(NodeToProcess->node());
+ NodeToProcess->childGeneration(CurrentGeneration);
+ NodeToProcess->process();
+ } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+ // Push the next child onto the stack.
+ DomTreeNode *child = NodeToProcess->nextChild();
+ nodesToProcess.push_front(
+ new StackNode(AvailableValues,
+ AvailableLoads,
+ AvailableCalls,
+ NodeToProcess->childGeneration(), child,
+ child->begin(), child->end()));
+ } else {
+ // It has been processed, and there are no more children to process,
+ // so delete it and pop it off the stack.
+ delete NodeToProcess;
+ nodesToProcess.pop_front();
+ }
+ } // while (!nodes...)
+
+ // Reset the current generation.
+ CurrentGeneration = LiveOutGeneration;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 000000000000..129af8d45d6f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,2602 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions. It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+STATISTIC(NumGVNInstr, "Number of instructions deleted");
+STATISTIC(NumGVNLoad, "Number of loads deleted");
+STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumGVNSimpl, "Number of instructions simplified");
+STATISTIC(NumGVNEqProp, "Number of equalities propagated");
+STATISTIC(NumPRELoad, "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+ cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+
+// Maximum allowed recursion depth.
+static cl::opt<uint32_t>
+MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
+ cl::desc("Max recurse depth (default = 1000)"));
+
+//===----------------------------------------------------------------------===//
+// ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers. It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+ struct Expression {
+ uint32_t opcode;
+ Type *type;
+ SmallVector<uint32_t, 4> varargs;
+
+ Expression(uint32_t o = ~2U) : opcode(o) { }
+
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ if (opcode == ~0U || opcode == ~1U)
+ return true;
+ if (type != other.type)
+ return false;
+ if (varargs != other.varargs)
+ return false;
+ return true;
+ }
+
+ friend hash_code hash_value(const Expression &Value) {
+ return hash_combine(Value.opcode, Value.type,
+ hash_combine_range(Value.varargs.begin(),
+ Value.varargs.end()));
+ }
+ };
+
+ class ValueTable {
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ uint32_t nextValueNumber;
+
+ Expression create_expression(Instruction* I);
+ Expression create_cmp_expression(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS);
+ Expression create_extractvalue_expression(ExtractValueInst* EI);
+ uint32_t lookup_or_add_call(CallInst* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value *V);
+ uint32_t lookup(Value *V) const;
+ uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS);
+ void add(Value *V, uint32_t num);
+ void clear();
+ void erase(Value *v);
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
+ };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+ static inline Expression getEmptyKey() {
+ return ~0U;
+ }
+
+ static inline Expression getTombstoneKey() {
+ return ~1U;
+ }
+
+ static unsigned getHashValue(const Expression e) {
+ using llvm::hash_value;
+ return static_cast<unsigned>(hash_value(e));
+ }
+ static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ return LHS == RHS;
+ }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+
+Expression ValueTable::create_expression(Instruction *I) {
+ Expression e;
+ e.type = I->getType();
+ e.opcode = I->getOpcode();
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+ if (I->isCommutative()) {
+ // Ensure that commutative instructions that only differ by a permutation
+ // of their operands get the same value number by sorting the operand value
+ // numbers. Since all commutative instructions have two operands it is more
+ // efficient to sort by hand rather than using, say, std::sort.
+ assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
+ if (e.varargs[0] > e.varargs[1])
+ std::swap(e.varargs[0], e.varargs[1]);
+ }
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ // Sort the operand value numbers so x<y and y>x get the same value number.
+ CmpInst::Predicate Predicate = C->getPredicate();
+ if (e.varargs[0] > e.varargs[1]) {
+ std::swap(e.varargs[0], e.varargs[1]);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+ e.opcode = (C->getOpcode() << 8) | Predicate;
+ } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ }
+
+ return e;
+}
+
+Expression ValueTable::create_cmp_expression(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
+ assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
+ "Not a comparison!");
+ Expression e;
+ e.type = CmpInst::makeCmpResultType(LHS->getType());
+ e.varargs.push_back(lookup_or_add(LHS));
+ e.varargs.push_back(lookup_or_add(RHS));
+
+ // Sort the operand value numbers so x<y and y>x get the same value number.
+ if (e.varargs[0] > e.varargs[1]) {
+ std::swap(e.varargs[0], e.varargs[1]);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+ e.opcode = (Opcode << 8) | Predicate;
+ return e;
+}
+
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+ assert(EI != 0 && "Not an ExtractValueInst?");
+ Expression e;
+ e.type = EI->getType();
+ e.opcode = 0;
+
+ IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+ if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ // EI might be an extract from one of our recognised intrinsics. If it
+ // is we'll synthesize a semantically equivalent expression instead on
+ // an extract value expression.
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ e.opcode = Instruction::Add;
+ break;
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ e.opcode = Instruction::Sub;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ e.opcode = Instruction::Mul;
+ break;
+ default:
+ break;
+ }
+
+ if (e.opcode != 0) {
+ // Intrinsic recognized. Grab its args to finish building the expression.
+ assert(I->getNumArgOperands() == 2 &&
+ "Expect two args for recognised intrinsics.");
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+ return e;
+ }
+ }
+
+ // Not a recognised intrinsic. Fall back to producing an extract value
+ // expression.
+ e.opcode = EI->getOpcode();
+ for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+
+ return e;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
+uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
+ if (AA->doesNotAccessMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ } else if (AA->onlyReadsMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+
+ MemDepResult local_dep = MD->getDependency(C);
+
+ if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (local_dep.isDef()) {
+ CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering[C] = v;
+ return v;
+ }
+
+ // Non-local case.
+ const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(CallSite(C));
+ // FIXME: Move the checking logic to MemDep!
+ CallInst* cdep = 0;
+
+ // Check to see if we have a single dominating call instruction that is
+ // identical to C.
+ for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+ const NonLocalDepEntry *I = &deps[i];
+ if (I->getResult().isNonLocal())
+ continue;
+
+ // We don't handle non-definitions. If we already have a call, reject
+ // instruction dependencies.
+ if (!I->getResult().isDef() || cdep != 0) {
+ cdep = 0;
+ break;
+ }
+
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
+ // FIXME: All duplicated with non-local case.
+ if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
+ cdep = NonLocalDepCall;
+ continue;
+ }
+
+ cdep = 0;
+ break;
+ }
+
+ if (!cdep) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(cdep);
+ valueNumbering[C] = v;
+ return v;
+
+ } else {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction* I = cast<Instruction>(V);
+ Expression exp;
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ return lookup_or_add_call(cast<CallInst>(I));
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertValue:
+ case Instruction::GetElementPtr:
+ exp = create_expression(I);
+ break;
+ case Instruction::ExtractValue:
+ exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+ break;
+ default:
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[V] = e;
+ return e;
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) const {
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+}
+
+/// lookup_or_add_cmp - Returns the value number of the given comparison,
+/// assigning it a new number if it did not have one before. Useful when
+/// we deduced the result of a comparison, but don't immediately have an
+/// instruction realizing that comparison to hand.
+uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
+ Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ return e;
+}
+
+/// clear - Remove all entries from the ValueTable.
+void ValueTable::clear() {
+ valueNumbering.clear();
+ expressionNumbering.clear();
+ nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering.
+void ValueTable::erase(Value *V) {
+ valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+ for (DenseMap<Value*, uint32_t>::const_iterator
+ I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+ assert(I->first != V && "Inst still occurs in value numbering map!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class GVN : public FunctionPass {
+ bool NoLoads;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+
+ ValueTable VN;
+
+ /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// have that value number. Use findLeader to query it.
+ struct LeaderTableEntry {
+ Value *Val;
+ const BasicBlock *BB;
+ LeaderTableEntry *Next;
+ };
+ DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+ BumpPtrAllocator TableAllocator;
+
+ SmallVector<Instruction*, 8> InstrsToErase;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVN(bool noloads = false)
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ /// markInstructionForDeletion - This removes the specified instruction from
+ /// our various maps and marks it for deletion.
+ void markInstructionForDeletion(Instruction *I) {
+ VN.erase(I);
+ InstrsToErase.push_back(I);
+ }
+
+ const DataLayout *getDataLayout() const { return TD; }
+ DominatorTree &getDominatorTree() const { return *DT; }
+ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+ MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+ private:
+ /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+ /// its value number.
+ void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
+ LeaderTableEntry &Curr = LeaderTable[N];
+ if (!Curr.Val) {
+ Curr.Val = V;
+ Curr.BB = BB;
+ return;
+ }
+
+ LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
+ Node->Val = V;
+ Node->BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+ }
+
+ /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// value number, and remove the given instruction if encountered.
+ void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
+ LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Curr = &LeaderTable[N];
+
+ while (Curr->Val != I || Curr->BB != BB) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Val = 0;
+ Curr->BB = 0;
+ } else {
+ LeaderTableEntry* Next = Curr->Next;
+ Curr->Val = Next->Val;
+ Curr->BB = Next->BB;
+ Curr->Next = Next->Next;
+ }
+ }
+ }
+
+ // List of critical edges to be split between iterations.
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+
+ // Helper fuctions
+ // FIXME: eliminate or document these better
+ bool processLoad(LoadInst *L);
+ bool processInstruction(Instruction *I);
+ bool processNonLocalLoad(LoadInst *L);
+ bool processBlock(BasicBlock *BB);
+ void dump(DenseMap<uint32_t, Value*> &d);
+ bool iterateOnFunction(Function &F);
+ bool performPRE(Function &F);
+ Value *findLeader(const BasicBlock *BB, uint32_t num);
+ void cleanupGlobalSets();
+ void verifyRemoved(const Instruction *I) const;
+ bool splitCriticalEdges();
+ unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
+ const BasicBlockEdge &Root);
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ };
+
+ char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVN(NoLoads);
+}
+
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+ errs() << "{\n";
+ for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+ E = d.end(); I != E; ++I) {
+ errs() << I->first << "\n";
+ I->second->dump();
+ }
+ errs() << "}\n";
+}
+#endif
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block. As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
+/// map is actually a tri-state map with the following values:
+/// 0) we know the block *is not* fully available.
+/// 1) we know the block *is* fully available.
+/// 2) we do not know whether the block is fully available or not, but we are
+/// currently speculating that it will be.
+/// 3) we are speculating for this block and have used that to speculate for
+/// other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+ DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
+ uint32_t RecurseDepth) {
+ if (RecurseDepth > MaxRecurseDepth)
+ return false;
+
+ // Optimistically assume that the block is fully available and check to see
+ // if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+ FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+ // If the entry already existed for this block, return the precomputed value.
+ if (!IV.second) {
+ // If this is a speculative "available" value, mark it as being used for
+ // speculation of other blocks.
+ if (IV.first->second == 2)
+ IV.first->second = 3;
+ return IV.first->second != 0;
+ }
+
+ // Otherwise, see if it is fully available in all predecessors.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // If this block has no predecessors, it isn't live-in here.
+ if (PI == PE)
+ goto SpeculationFailure;
+
+ for (; PI != PE; ++PI)
+ // If the value isn't fully available in one of our predecessors, then it
+ // isn't fully available in this block either. Undo our previous
+ // optimistic assumption and bail out.
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
+ goto SpeculationFailure;
+
+ return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block. We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+ char &BBVal = FullyAvailableBlocks[BB];
+
+ // If we didn't speculate on this, just return with it set to false.
+ if (BBVal == 2) {
+ BBVal = 0;
+ return false;
+ }
+
+ // If we did speculate on this value, we could have blocks set to 1 that are
+ // incorrect. Walk the (transitive) successors of this block and mark them as
+ // 0 if set to one.
+ SmallVector<BasicBlock*, 32> BBWorklist;
+ BBWorklist.push_back(BB);
+
+ do {
+ BasicBlock *Entry = BBWorklist.pop_back_val();
+ // Note that this sets blocks to 0 (unavailable) if they happen to not
+ // already be in FullyAvailableBlocks. This is safe.
+ char &EntryVal = FullyAvailableBlocks[Entry];
+ if (EntryVal == 0) continue; // Already unavailable.
+
+ // Mark as unavailable.
+ EntryVal = 0;
+
+ for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+ BBWorklist.push_back(*I);
+ } while (!BBWorklist.empty());
+
+ return false;
+}
+
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+ Type *LoadTy,
+ const DataLayout &TD) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (TD.getTypeSizeInBits(StoredVal->getType()) <
+ TD.getTypeSizeInBits(LoadTy))
+ return false;
+
+ return true;
+}
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
+ Type *LoadedTy,
+ Instruction *InsertPt,
+ const DataLayout &TD) {
+ if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+ return 0;
+
+ // If this is already the right type, just return it.
+ Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoreSize == LoadSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy())
+ return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+
+ return StoredVal;
+ }
+
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+ StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (TD.isBigEndian()) {
+ Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+ StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+ }
+
+ // Truncate the integer to the right size now.
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+
+ if (LoadedTy == NewIntTy)
+ return StoredVal;
+
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+
+ // Otherwise, bitcast.
+ return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove). This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const DataLayout &TD) {
+ // If the loaded or stored value is a first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+#if 0
+ if (LoadOffset == StoreOffset) {
+ dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+ }
+#endif
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
+ LoadSize >>= 3;
+
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure) {
+#if 0
+ dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+#endif
+ return -1;
+ }
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset+StoreSize < LoadOffset+LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset-StoreOffset;
+}
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI,
+ const DataLayout &TD) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ StorePtr, StoreSize, TD);
+}
+
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
+ LoadInst *DepLI, const DataLayout &TD){
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ if (R != -1) return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ if (Size == 0) return -1;
+
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
+static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI,
+ const DataLayout &TD) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (SizeCst == 0) return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, TD);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (Src == 0) return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
+ if (GV == 0 || !GV->isConstant()) return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ MI->getDest(), MemSizeInBits, TD);
+ if (Offset == -1)
+ return Offset;
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ return Offset;
+ return -1;
+}
+
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store. This means
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias. Check this case to see if there is anything more we can do
+/// before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+ Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &TD){
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal,
+ TD.getIntPtrType(SrcVal->getType()));
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (TD.isLittleEndian())
+ ShiftAmt = Offset*8;
+ else
+ ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+
+ if (ShiftAmt)
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
+
+ if (LoadSize != StoreSize)
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
+
+ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+/// GetLoadValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load. This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ GVN &gvn) {
+ const DataLayout &TD = *gvn.getDataLayout();
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ if (Offset+LoadSize > SrcValSize) {
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset+LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ Type *DestPTy =
+ IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+ DestPTy = PointerType::get(DestPTy,
+ cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (TD.isBigEndian())
+ RV = Builder.CreateLShr(RV,
+ NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(SrcVal);
+ SrcVal = NewLoad;
+ }
+
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &TD){
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ Value *Val = MSI->getValue();
+ if (LoadSize != 1)
+ Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+
+ Value *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet*2 <= LoadSize) {
+ Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+ Val = Builder.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ Value *ShVal = Builder.CreateShl(Val, 1*8);
+ Val = Builder.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+namespace {
+
+struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+ }
+};
+
+} // end anonymous namespace
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI. This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI,
+ SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+ GVN &gvn) {
+ // Check for the fully redundant, dominating load case. In this case, we can
+ // just use the dominating value directly.
+ if (ValuesPerBlock.size() == 1 &&
+ gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+ LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+
+ // Otherwise, we have to construct SSA form.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSAUpdate(&NewPHIs);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
+
+ Type *LoadTy = LI->getType();
+
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ BasicBlock *BB = AV.BB;
+
+ if (SSAUpdate.HasValueForBlock(BB))
+ continue;
+
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+ }
+
+ // Perform PHI construction.
+ Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+
+ // If new PHI nodes were created, notify alias analysis.
+ if (V->getType()->getScalarType()->isPointerTy()) {
+ AliasAnalysis *AA = gvn.getAliasAnalysis();
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ AA->copyValue(LI, NewPHIs[i]);
+
+ // Now that we've copied information to the new PHIs, scan through
+ // them again and inform alias analysis that we've added potentially
+ // escaping uses to any values that are operands to these PHIs.
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+ PHINode *P = NewPHIs[i];
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ AA->addEscapingUse(P->getOperandUse(jj));
+ }
+ }
+ }
+
+ return V;
+}
+
+static bool isLifetimeStart(const Instruction *Inst) {
+ if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Find the non-local dependencies of the load.
+ SmallVector<NonLocalDepResult, 64> Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+ //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
+ // << Deps.size() << *LI << '\n');
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Filter out useless results (non-locals, etc). Keep track of the blocks
+ // where we have a value available in repl, also keep track of whether we see
+ // dependencies that produce an unknown value for the load (such as a call
+ // that could potentially clobber the load).
+ SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
+ SmallVector<BasicBlock*, 64> UnavailableBlocks;
+
+ for (unsigned i = 0, e = NumDeps; i != e; ++i) {
+ BasicBlock *DepBB = Deps[i].getBB();
+ MemDepResult DepInfo = Deps[i].getResult();
+
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ if (DepInfo.isClobber()) {
+ // The address being loaded in this non-local block may not be the same as
+ // the pointer operand of the load if PHI translation occurs. Make sure
+ // to consider the right address.
+ Value *Address = Deps[i].getAddress();
+
+ // If the dependence is to a store that writes to a superset of the bits
+ // read by the load, we can extract the bits we need for the load from the
+ // stored value.
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+ DepSI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ DepSI->getValueOperand(),
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI != LI && Address && TD) {
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+ LI->getPointerOperand(),
+ DepLI, *TD);
+
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can
+ // forward a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ DepMI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ // DepInfo.isDef() here
+
+ Instruction *DepInst = DepInfo.getInst();
+
+ // Loading the allocation -> undef.
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
+ // Loading immediately after lifetime begin -> undef.
+ isLifetimeStart(DepInst)) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ UndefValue::get(LI->getType())));
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+ // Reject loads and stores that are to the same address but are of
+ // different types if we have to.
+ if (S->getValueOperand()->getType() != LI->getType()) {
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *TD)) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ S->getValueOperand()));
+ continue;
+ }
+
+ if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+ // If the types mismatch and we can't handle it, reject reuse of the load.
+ if (LD->getType() != LI->getType()) {
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
+ continue;
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty()) return false;
+
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ // Okay, we have *some* definitions of the value. This means that the value
+ // is available in some of our (transitive) predecessors. Lets think about
+ // doing PRE of this load. This will involve inserting a new load into the
+ // predecessor when it's not available. We could do this in general, but
+ // prefer to not increase code size. As such, we only do this when we know
+ // that we only have to insert *one* load (which means we're basically moving
+ // the load, not inserting a new one).
+
+ SmallPtrSet<BasicBlock *, 4> Blockers;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ Blockers.insert(UnavailableBlocks[i]);
+
+ // Let's find the first basic block with more than one predecessor. Walk
+ // backwards through predecessors if needed.
+ BasicBlock *LoadBB = LI->getParent();
+ BasicBlock *TmpBB = LoadBB;
+
+ bool allSingleSucc = true;
+ while (TmpBB->getSinglePredecessor()) {
+ TmpBB = TmpBB->getSinglePredecessor();
+ if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+ return false;
+ if (Blockers.count(TmpBB))
+ return false;
+
+ // If any of these blocks has more than one successor (i.e. if the edge we
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
+ // above this block would be adding the load to execution paths along
+ // which it was not previously executed.
+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+ return false;
+ }
+
+ assert(TmpBB);
+ LoadBB = TmpBB;
+
+ // Check to see how many predecessors have the loaded value fully
+ // available.
+ DenseMap<BasicBlock*, Value*> PredLoads;
+ DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+ PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
+ continue;
+ }
+ PredLoads[Pred] = 0;
+
+ if (Pred->getTerminator()->getNumSuccessors() != 1) {
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
+ if (LoadBB->isLandingPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
+ unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+ NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ }
+ }
+
+ if (!NeedToSplit.empty()) {
+ toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+ return false;
+ }
+
+ // Decide whether PRE is profitable for this load.
+ unsigned NumUnavailablePreds = PredLoads.size();
+ assert(NumUnavailablePreds != 0 &&
+ "Fully available value should be eliminated above!");
+
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
+ return false;
+
+ // Check if the load can safely be moved to all the unavailable predecessors.
+ bool CanDoPRE = true;
+ SmallVector<Instruction*, 8> NewInsts;
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+
+ // Do PHI translation to get its value in the predecessor if necessary. The
+ // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+ // If all preds have a single successor, then we know it is safe to insert
+ // the load on the pred (?!?), so we can insert code to materialize the
+ // pointer if it is not available.
+ PHITransAddr Address(LI->getPointerOperand(), TD);
+ Value *LoadPtr = 0;
+ if (allSingleSucc) {
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
+ } else {
+ Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
+ LoadPtr = Address.getAddr();
+ }
+
+ // If we couldn't find or insert a computation of this phi translated value,
+ // we fail PRE.
+ if (LoadPtr == 0) {
+ DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+ << *LI->getPointerOperand() << "\n");
+ CanDoPRE = false;
+ break;
+ }
+
+ // Make sure it is valid to move this load here. We have to watch out for:
+ // @1 = getelementptr (i8* p, ...
+ // test p and branch if == 0
+ // load @1
+ // It is valid to have the getelementptr before the test, even if p can
+ // be 0, as getelementptr only does address arithmetic.
+ // If we are not pushing the value through any multiple-successor blocks
+ // we do not have this case. Otherwise, check that the load is safe to
+ // put anywhere; this can be improved, but should be conservatively safe.
+ if (!allSingleSucc &&
+ // FIXME: REEVALUTE THIS.
+ !isSafeToLoadUnconditionally(LoadPtr,
+ UnavailablePred->getTerminator(),
+ LI->getAlignment(), TD)) {
+ CanDoPRE = false;
+ break;
+ }
+
+ I->second = LoadPtr;
+ }
+
+ if (!CanDoPRE) {
+ while (!NewInsts.empty()) {
+ Instruction *I = NewInsts.pop_back_val();
+ if (MD) MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
+ return false;
+ }
+
+ // Okay, we can eliminate this load by inserting a reload in the predecessor
+ // and using PHI construction to get the value in the other predecessors, do
+ // it.
+ DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+ DEBUG(if (!NewInsts.empty())
+ dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
+ << *NewInsts.back() << '\n');
+
+ // Assign value numbers to the new instructions.
+ for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ // FIXME: We really _ought_ to insert these value numbers into their
+ // parent's availability map. However, in doing so, we risk getting into
+ // ordering issues. If a block hasn't been processed yet, we would be
+ // marking a value as AVAIL-IN, which isn't what we intend.
+ VN.lookup_or_add(NewInsts[i]);
+ }
+
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+ Value *LoadPtr = I->second;
+
+ Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Transfer the old load's TBAA tag to the new load.
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+ NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+ // Transfer DebugLoc.
+ NewLoad->setDebugLoc(LI->getDebugLoc());
+
+ // Add the newly created load.
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+ NewLoad));
+ MD->invalidateCachedPointerInfo(LoadPtr);
+ DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+ }
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumPRELoad;
+ return true;
+}
+
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
+ BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
+ if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
+ isa<OverflowingBinaryOperator>(ReplOp)) {
+ if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
+ ReplOp->setHasNoSignedWrap(false);
+ if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
+ ReplOp->setHasNoUnsignedWrap(false);
+ }
+ if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
+ SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
+ ReplInst->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (int i = 0, n = Metadata.size(); i < n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *IMD = I->getMetadata(Kind);
+ MDNode *ReplMD = Metadata[i].second;
+ switch(Kind) {
+ default:
+ ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_dbg:
+ llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
+ case LLVMContext::MD_tbaa:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD));
+ break;
+ case LLVMContext::MD_range:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
+ break;
+ case LLVMContext::MD_prof:
+ llvm_unreachable("MD_prof in a non terminator instruction");
+ break;
+ case LLVMContext::MD_fpmath:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
+ break;
+ }
+ }
+ }
+}
+
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
+ I->replaceAllUsesWith(Repl);
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L) {
+ if (!MD)
+ return false;
+
+ if (!L->isSimple())
+ return false;
+
+ if (L->use_empty()) {
+ markInstructionForDeletion(L);
+ return true;
+ }
+
+ // ... to a pointer that has been loaded from before...
+ MemDepResult Dep = MD->getDependency(L);
+
+ // If we have a clobber and target data is around, see if this is a clobber
+ // that we can fix up through code synthesis.
+ if (Dep.isClobber() && TD) {
+ // Check to see if we have something like this:
+ // store i32 123, i32* %P
+ // %A = bitcast i32* %P to i8*
+ // %B = gep i8* %A, i32 1
+ // %C = load i8* %B
+ //
+ // We could do that by recognizing if the clobber instructions are obviously
+ // a common base + constant offset, and if the previous store (or memset)
+ // completely covers this load. This sort of thing can happen in bitfield
+ // access code.
+ Value *AvailVal = 0;
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+ L->getType(), L, *TD);
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI == L)
+ return false;
+
+ int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+ L->getPointerOperand(),
+ DepLI, *TD);
+ if (Offset != -1)
+ AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+ // a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
+ }
+
+ if (AvailVal) {
+ DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+ << *AvailVal << '\n' << *L << "\n\n\n");
+
+ // Replace the load!
+ L->replaceAllUsesWith(AvailVal);
+ if (AvailVal->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(AvailVal);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+ }
+
+ // If the value isn't available, don't do anything!
+ if (Dep.isClobber()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ Instruction *I = Dep.getInst();
+ dbgs() << " is clobbered by " << *I << '\n';
+ );
+ return false;
+ }
+
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L);
+
+ if (!Dep.isDef()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ dbgs() << " has unknown dependence\n";
+ );
+ return false;
+ }
+
+ Instruction *DepInst = Dep.getInst();
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ Value *StoredVal = DepSI->getValueOperand();
+
+ // The store and load are to a must-aliased pointer, but they may not
+ // actually have the same type. See if we know how to reuse the stored
+ // value (depending on its type).
+ if (StoredVal->getType() != L->getType()) {
+ if (TD) {
+ StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+ L, *TD);
+ if (StoredVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ L->replaceAllUsesWith(StoredVal);
+ if (StoredVal->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(StoredVal);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ Value *AvailableVal = DepLI;
+
+ // The loads are of a must-aliased pointer, but they may not actually have
+ // the same type. See if we know how to reuse the previously loaded value
+ // (depending on its type).
+ if (DepLI->getType() != L->getType()) {
+ if (TD) {
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+ L, *TD);
+ if (AvailableVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ patchAndReplaceAllUsesWith(L, AvailableVal);
+ if (DepLI->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(DepLI);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example.
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load occurs either right after a lifetime begin,
+ // then the loaded value is undefined.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// findLeader - In order to find a leader for a given value number at a
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in
+// question. This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
+ LeaderTableEntry Vals = LeaderTable[num];
+ if (!Vals.Val) return 0;
+
+ Value *Val = 0;
+ if (DT->dominates(Vals.BB, BB)) {
+ Val = Vals.Val;
+ if (isa<Constant>(Val)) return Val;
+ }
+
+ LeaderTableEntry* Next = Vals.Next;
+ while (Next) {
+ if (DT->dominates(Next->BB, BB)) {
+ if (isa<Constant>(Next->Val)) return Next->Val;
+ if (!Val) Val = Next->Val;
+ }
+
+ Next = Next->Next;
+ }
+
+ return Val;
+}
+
+/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
+/// use is dominated by the given basic block. Returns the number of uses that
+/// were replaced.
+unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
+ const BasicBlockEdge &Root) {
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ) {
+ Use &U = (UI++).getUse();
+
+ if (DT->dominates(Root, U)) {
+ U.set(To);
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+ DominatorTree *DT) {
+ // While in theory it is interesting to consider the case in which Dst has
+ // more than one predecessor, because Dst might be part of a loop which is
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+ const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+ const BasicBlock *Src = E.getStart();
+ assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+ (void)Src;
+ return Pred != 0;
+}
+
+/// propagateEquality - The given values are known to be equal in every block
+/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
+/// 'RHS' everywhere in the scope. Returns whether a change was made.
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root) {
+ SmallVector<std::pair<Value*, Value*>, 4> Worklist;
+ Worklist.push_back(std::make_pair(LHS, RHS));
+ bool Changed = false;
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
+
+ while (!Worklist.empty()) {
+ std::pair<Value*, Value*> Item = Worklist.pop_back_val();
+ LHS = Item.first; RHS = Item.second;
+
+ if (LHS == RHS) continue;
+ assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+
+ // Prefer a constant on the right-hand side, or an Argument if no constants.
+ if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
+ std::swap(LHS, RHS);
+ assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
+
+ // If there is no obvious reason to prefer the left-hand side over the right-
+ // hand side, ensure the longest lived term is on the right-hand side, so the
+ // shortest lived term will be replaced by the longest lived. This tends to
+ // expose more simplifications.
+ uint32_t LVN = VN.lookup_or_add(LHS);
+ if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
+ (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
+ // Move the 'oldest' value to the right-hand side, using the value number as
+ // a proxy for age.
+ uint32_t RVN = VN.lookup_or_add(RHS);
+ if (LVN < RVN) {
+ std::swap(LHS, RHS);
+ LVN = RVN;
+ }
+ }
+
+ // If value numbering later sees that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
+ // the invariant that instructions only occur in the leader table for their
+ // own value number (this is used by removeFromLeaderTable), do not do this
+ // if RHS is an instruction (if an instruction in the scope is morphed into
+ // LHS then it will be turned into RHS by the next GVN iteration anyway, so
+ // using the leader table is about compiling faster, not optimizing better).
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd && !isa<Instruction>(RHS))
+ addToLeaderTable(LVN, RHS, Root.getEnd());
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
+ // LHS always has at least one use that is not dominated by Root, this will
+ // never do anything if LHS has only one use.
+ if (!LHS->hasOneUse()) {
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
+
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ continue;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Worklist.push_back(std::make_pair(A, RHS));
+ Worklist.push_back(std::make_pair(B, RHS));
+ continue;
+ }
+
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B. When propagating a comparison such as
+ // "(A >= B)" == "true", replace all instances of "A < B" with "false".
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
+
+ // If "A == B" is known true, or "A != B" is known false, then replace
+ // A with B everywhere in the scope.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+ Worklist.push_back(std::make_pair(Op0, Op1));
+
+ // If "A >= B" is known true, replace "A < B" with false everywhere.
+ CmpInst::Predicate NotPred = Cmp->getInversePredicate();
+ Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
+ // Since we don't have the instruction "A < B" immediately to hand, work out
+ // the value number that it would have and use that to find an appropriate
+ // instruction (if any).
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
+ // If the number we were assigned was brand new then there is no point in
+ // looking for an instruction realizing it: there cannot be one!
+ if (Num < NextNum) {
+ Value *NotCmp = findLeader(Root.getEnd(), Num);
+ if (NotCmp && isa<Instruction>(NotCmp)) {
+ unsigned NumReplacements =
+ replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
+ }
+ // Ensure that any instruction in scope that gets the "A < B" value number
+ // is replaced with false.
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd)
+ addToLeaderTable(Num, NotVal, Root.getEnd());
+
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I) {
+ // Ignore dbg info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // If the instruction can be easily simplified then do so now in preference
+ // to value numbering it. Value numbering often exposes redundancies, for
+ // example if it determines that %y is equal to %x then the instruction
+ // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ I->replaceAllUsesWith(V);
+ if (MD && V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(I);
+ ++NumGVNSimpl;
+ return true;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (processLoad(LI))
+ return true;
+
+ unsigned Num = VN.lookup_or_add(LI);
+ addToLeaderTable(Num, LI, LI->getParent());
+ return false;
+ }
+
+ // For conditional branches, we can perform simple conditional propagation on
+ // the condition value itself.
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return false;
+
+ Value *BranchCond = BI->getCondition();
+
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // Avoid multiple edges early.
+ if (TrueSucc == FalseSucc)
+ return false;
+
+ BasicBlock *Parent = BI->getParent();
+ bool Changed = false;
+
+ Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+ BasicBlockEdge TrueE(Parent, TrueSucc);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
+
+ Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+ BasicBlockEdge FalseE(Parent, FalseSucc);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
+
+ return Changed;
+ }
+
+ // For switches, propagate the case values into the case destinations.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ Value *SwitchCond = SI->getCondition();
+ BasicBlock *Parent = SI->getParent();
+ bool Changed = false;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
+ ++SwitchEdges[SI->getSuccessor(i)];
+
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *Dst = i.getCaseSuccessor();
+ // If there is only a single edge, propagate the case value into it.
+ if (SwitchEdges.lookup(Dst) == 1) {
+ BasicBlockEdge E(Parent, Dst);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ }
+ }
+ return Changed;
+ }
+
+ // Instructions with void type don't return a value, so there's
+ // no point in trying to find redundancies in them.
+ if (I->getType()->isVoidTy()) return false;
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
+
+ // Allocations are always uniquely numbered, so we can save time and memory
+ // by fast failing them.
+ if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // If the number we were assigned was a brand new VN, then we don't
+ // need to do a lookup to see if the number already exists
+ // somewhere in the domtree: it can't!
+ if (Num >= NextNum) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // Perform fast-path value-number based elimination of values inherited from
+ // dominators.
+ Value *repl = findLeader(I->getParent(), Num);
+ if (repl == 0) {
+ // Failure, just remember this instance for future use.
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // Remove it!
+ patchAndReplaceAllUsesWith(I, repl);
+ if (MD && repl->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ markInstructionForDeletion(I);
+ return true;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setMemDep(MD);
+ VN.setDomTree(DT);
+
+ bool Changed = false;
+ bool ShouldContinue = true;
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+ BasicBlock *BB = FI++;
+
+ bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+ if (removedBlock) ++NumGVNBlocks;
+
+ Changed |= removedBlock;
+ }
+
+ unsigned Iteration = 0;
+ while (ShouldContinue) {
+ DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+ ShouldContinue = iterateOnFunction(F);
+ if (splitCriticalEdges())
+ ShouldContinue = true;
+ Changed |= ShouldContinue;
+ ++Iteration;
+ }
+
+ if (EnablePRE) {
+ bool PREChanged = true;
+ while (PREChanged) {
+ PREChanged = performPRE(F);
+ Changed |= PREChanged;
+ }
+ }
+ // FIXME: Should perform GVN again after PRE does something. PRE can move
+ // computations into blocks where they become fully redundant. Note that
+ // we can't do this until PRE's critical edge splitting updates memdep.
+ // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+ cleanupGlobalSets();
+
+ return Changed;
+}
+
+
+bool GVN::processBlock(BasicBlock *BB) {
+ // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+ // (and incrementing BI before processing an instruction).
+ assert(InstrsToErase.empty() &&
+ "We expect InstrsToErase to be empty across iterations");
+ bool ChangedFunction = false;
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ ChangedFunction |= processInstruction(BI);
+ if (InstrsToErase.empty()) {
+ ++BI;
+ continue;
+ }
+
+ // If we need some instructions deleted, do it now.
+ NumGVNInstr += InstrsToErase.size();
+
+ // Avoid iterator invalidation.
+ bool AtStart = BI == BB->begin();
+ if (!AtStart)
+ --BI;
+
+ for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ E = InstrsToErase.end(); I != E; ++I) {
+ DEBUG(dbgs() << "GVN removed: " << **I << '\n');
+ if (MD) MD->removeInstruction(*I);
+ DEBUG(verifyRemoved(*I));
+ (*I)->eraseFromParent();
+ }
+ InstrsToErase.clear();
+
+ if (AtStart)
+ BI = BB->begin();
+ else
+ ++BI;
+ }
+
+ return ChangedFunction;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+ bool Changed = false;
+ SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock *CurrentBlock = *DI;
+
+ // Nothing to PRE in the entry block.
+ if (CurrentBlock == &F.getEntryBlock()) continue;
+
+ // Don't perform PRE on a landing pad.
+ if (CurrentBlock->isLandingPad()) continue;
+
+ for (BasicBlock::iterator BI = CurrentBlock->begin(),
+ BE = CurrentBlock->end(); BI != BE; ) {
+ Instruction *CurInst = BI++;
+
+ if (isa<AllocaInst>(CurInst) ||
+ isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+ CurInst->getType()->isVoidTy() ||
+ CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+ isa<DbgInfoIntrinsic>(CurInst))
+ continue;
+
+ // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
+ // sinking the compare again, and it would force the code generator to
+ // move the i1 from processor flags or predicate registers into a general
+ // purpose register.
+ if (isa<CmpInst>(CurInst))
+ continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
+
+ uint32_t ValNo = VN.lookup(CurInst);
+
+ // Look for the predecessors for PRE opportunities. We're
+ // only trying to solve the basic diamond case, where
+ // a value is computed in the successor and one predecessor,
+ // but not the other. We also explicitly disallow cases
+ // where the successor is its own predecessor, because they're
+ // more complicated to get right.
+ unsigned NumWith = 0;
+ unsigned NumWithout = 0;
+ BasicBlock *PREPred = 0;
+ predMap.clear();
+
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // We're not interested in PRE where the block is its
+ // own predecessor, or in blocks with predecessors
+ // that are not reachable.
+ if (P == CurrentBlock) {
+ NumWithout = 2;
+ break;
+ } else if (!DT->isReachableFromEntry(P)) {
+ NumWithout = 2;
+ break;
+ }
+
+ Value* predV = findLeader(P, ValNo);
+ if (predV == 0) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
+ PREPred = P;
+ ++NumWithout;
+ } else if (predV == CurInst) {
+ /* CurInst dominates this predecessor. */
+ NumWithout = 2;
+ break;
+ } else {
+ predMap.push_back(std::make_pair(predV, P));
+ ++NumWith;
+ }
+ }
+
+ // Don't do PRE when it might increase code size, i.e. when
+ // we would need to insert instructions in more than one pred.
+ if (NumWithout != 1 || NumWith == 0)
+ continue;
+
+ // Don't do PRE across indirect branch.
+ if (isa<IndirectBrInst>(PREPred->getTerminator()))
+ continue;
+
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+ if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+ continue;
+ }
+
+ // Instantiate the expression in the predecessor that lacked it.
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't originally present will have been instantiated earlier
+ // in this loop.
+ Instruction *PREInstr = CurInst->clone();
+ bool success = true;
+ for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+ Value *Op = PREInstr->getOperand(i);
+ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+ continue;
+
+ if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
+ PREInstr->setOperand(i, V);
+ } else {
+ success = false;
+ break;
+ }
+ }
+
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success) {
+ DEBUG(verifyRemoved(PREInstr));
+ delete PREInstr;
+ continue;
+ }
+
+ PREInstr->insertBefore(PREPred->getTerminator());
+ PREInstr->setName(CurInst->getName() + ".pre");
+ PREInstr->setDebugLoc(CurInst->getDebugLoc());
+ VN.add(PREInstr, ValNo);
+ ++NumGVNPRE;
+
+ // Update the availability map to include the new instruction.
+ addToLeaderTable(ValNo, PREInstr, PREPred);
+
+ // Create a PHI to make the value available in this block.
+ PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
+ CurInst->getName() + ".pre-phi",
+ CurrentBlock->begin());
+ for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
+ if (Value *V = predMap[i].first)
+ Phi->addIncoming(V, predMap[i].second);
+ else
+ Phi->addIncoming(PREInstr, PREPred);
+ }
+
+ VN.add(Phi, ValNo);
+ addToLeaderTable(ValNo, Phi, CurrentBlock);
+ Phi->setDebugLoc(CurInst->getDebugLoc());
+ CurInst->replaceAllUsesWith(Phi);
+ if (Phi->getType()->getScalarType()->isPointerTy()) {
+ // Because we have added a PHI-use of the pointer value, it has now
+ // "escaped" from alias analysis' perspective. We need to inform
+ // AA of this.
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+ ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+ }
+
+ if (MD)
+ MD->invalidateCachedPointerInfo(Phi);
+ }
+ VN.erase(CurInst);
+ removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+
+ DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+ if (MD) MD->removeInstruction(CurInst);
+ DEBUG(verifyRemoved(CurInst));
+ CurInst->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ if (splitCriticalEdges())
+ Changed = true;
+
+ return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+ if (toSplit.empty())
+ return false;
+ do {
+ std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ SplitCriticalEdge(Edge.first, Edge.second, this);
+ } while (!toSplit.empty());
+ if (MD) MD->invalidateCachedPredecessors();
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+ cleanupGlobalSets();
+
+ // Top-down walk of the dominator tree
+ bool Changed = false;
+#if 0
+ // Needed for value numbering with phi construction to work.
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+ RE = RPOT.end(); RI != RE; ++RI)
+ Changed |= processBlock(*RI);
+#else
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+ Changed |= processBlock(DI->getBlock());
+#endif
+
+ return Changed;
+}
+
+void GVN::cleanupGlobalSets() {
+ VN.clear();
+ LeaderTable.clear();
+ TableAllocator.Reset();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+ VN.verifyRemoved(Inst);
+
+ // Walk through the value number scope to make sure the instruction isn't
+ // ferreted away in it.
+ for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+ I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+ const LeaderTableEntry *Node = &I->second;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+
+ while (Node->Next) {
+ Node = Node->Next;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
new file mode 100644
index 000000000000..5d02c68a7a47
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -0,0 +1,311 @@
+//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "global-merge"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
+STATISTIC(NumMerged , "Number of globals merged");
+namespace {
+ class GlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const;
+
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit GlobalMerge(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual bool doFinalization(Module &M);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const DataLayout *TD;
+
+ GlobalCmp(const DataLayout *td) : TD(td) { }
+
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_PASS(GlobalMerge, "global-merge",
+ "Global Merge", false, false)
+
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const {
+ const DataLayout *TD = TLI->getDataLayout();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; j != e; ++j) {
+ Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "_MergedGlobals",
+ 0, GlobalVariable::NotThreadLocal,
+ AddrSpace);
+ for (size_t k = i; k < j; ++k) {
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ NumMerged++;
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
+
+bool GlobalMerge::doInitialization(Module &M) {
+ DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+ BSSGlobals;
+ const DataLayout *TD = TLI->getDataLayout();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+ setMustKeepGlobalVariables(M);
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(I->getType());
+ assert(PT && "Global variable is not a pointer!");
+
+ unsigned AddressSpace = PT->getAddressSpace();
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = TD->getPreferredAlignment(I);
+ Type *Ty = I->getType()->getElementType();
+ if (Alignment > TD->getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
+ if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+ if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
+ .isBSSLocal())
+ BSSGlobals[AddressSpace].push_back(I);
+ else if (I->isConstant())
+ ConstGlobals[AddressSpace].push_back(I);
+ else
+ Globals[AddressSpace].push_back(I);
+ }
+ }
+
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = Globals.begin(), E = Globals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, false, I->first);
+
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, false, I->first);
+
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
+
+ return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
+ return new GlobalMerge(tli);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 000000000000..8e76c78f5ac3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1845 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+// 1. The exit condition for the loop is canonicalized to compare the
+// induction value against the exit value. This turns loops like:
+// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+// 2. Any use outside of the loop of an expression derived from the indvar
+// is changed to compute the derived value outside of the loop, eliminating
+// the dependence on the exit value of the induction variable. If the only
+// purpose of the loop is to compute the exit value of some derived
+// expression, this transformation will make the loop dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+using namespace llvm;
+
+STATISTIC(NumWidened , "Number of indvars widened");
+STATISTIC(NumReplaced , "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
+
+namespace {
+ class IndVarSimplify : public LoopPass {
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
+
+ SmallVector<WeakVH, 16> DeadInsts;
+ bool Changed;
+ public:
+
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
+ Changed(false) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
+
+ private:
+ virtual void releaseMemory() {
+ DeadInsts.clear();
+ }
+
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
+
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ void RewriteNonIntegerIVs(Loop *L);
+
+ void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
+ Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
+
+ void SinkUnusedInvariants(Loop *L);
+ };
+}
+
+char IndVarSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+ "Induction Variable Simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+ "Induction Variable Simplification", false, false)
+
+Pass *llvm::createIndVarSimplifyPass() {
+ return new IndVarSimplify();
+}
+
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+ // If an SCEV expression subsumed multiple pointers, its expansion could
+ // reassociate the GEP changing the base pointer. This is illegal because the
+ // final address produced by a GEP chain must be inbounds relative to its
+ // underlying object. Otherwise basic alias analysis, among other things,
+ // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+ // producing an expression involving multiple pointers. Until then, we must
+ // bail out here.
+ //
+ // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // because it understands lcssa phis while SCEV does not.
+ Value *FromPtr = FromVal;
+ Value *ToPtr = ToVal;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ FromPtr = GEP->getPointerOperand();
+ }
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ ToPtr = GEP->getPointerOperand();
+ }
+ if (FromPtr != FromVal || ToPtr != ToVal) {
+ // Quickly check the common case
+ if (FromPtr == ToPtr)
+ return true;
+
+ // SCEV may have rewritten an expression that produces the GEP's pointer
+ // operand. That's ok as long as the pointer operand has the same base
+ // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // base of a recurrence. This handles the case in which SCEV expansion
+ // converts a pointer type recurrence into a nonrecurrent pointer base
+ // indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
+ const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+ const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+ if (FromBase == ToBase)
+ return true;
+
+ DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+ << *FromBase << " != " << *ToBase << "\n");
+
+ return false;
+ }
+ return true;
+}
+
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = 0;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+ assert(InsertPt && "Missing phi operand");
+ assert((!isa<Instruction>(Def) ||
+ DT->dominates(cast<Instruction>(Def), InsertPt)) &&
+ "def does not dominate all uses");
+ return InsertPt;
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+ bool isExact = false;
+ // See if we can convert this to an int64_t
+ uint64_t UIntVal;
+ if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+ &isExact) != APFloat::opOK || !isExact)
+ return false;
+ IntVal = UIntVal;
+ return true;
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // Check incoming value.
+ ConstantFP *InitValueVal =
+ dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+ int64_t InitValue;
+ if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+ return;
+
+ // Check IV increment. Reject this PN if increment operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+
+ // If this is not an add of the PHI with a constantfp, or if the constant fp
+ // is not an integer, bail out.
+ ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ int64_t IncValue;
+ if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+ return;
+
+ // Check Incr uses. One user is PN and the other user is an exit condition
+ // used by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
+ // only used by a branch, we can't transform it.
+ FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+ if (!Compare)
+ Compare = dyn_cast<FCmpInst>(U2);
+ if (Compare == 0 || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->use_back()))
+ return;
+
+ BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+ // We need to verify that the branch actually controls the iteration count
+ // of the loop. If not, the new IV can overflow and no one will notice.
+ // The branch block must be in the loop and one of the successors must be out
+ // of the loop.
+ assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(TheBr->getParent()) ||
+ (L->contains(TheBr->getSuccessor(0)) &&
+ L->contains(TheBr->getSuccessor(1))))
+ return;
+
+
+ // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+ // transform it.
+ ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ int64_t ExitValue;
+ if (ExitValueVal == 0 ||
+ !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (Compare->getPredicate()) {
+ default: return; // Unknown comparison.
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
+ }
+
+ // We convert the floating point induction variable to a signed i32 value if
+ // we can. This is only safe if the comparison will not overflow in a way
+ // that won't be trapped by the integer equivalent operations. Check for this
+ // now.
+ // TODO: We could use i64 if it is native and the range requires it.
+
+ // The start/stride/exit values must all fit in signed i32.
+ if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+ return;
+
+ // If not actually striding (add x, 0.0), avoid touching the code.
+ if (IncValue == 0)
+ return;
+
+ // Positive and negative strides have different safety conditions.
+ if (IncValue > 0) {
+ // If we have a positive stride, we require the init to be less than the
+ // exit value.
+ if (InitValue >= ExitValue)
+ return;
+
+ uint32_t Range = uint32_t(ExitValue-InitValue);
+ // Check for infinite loop, either:
+ // while (i <= Exit) or until (i > Exit)
+ if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+ return;
+
+ } else {
+ // If we have a negative stride, we require the init to be greater than the
+ // exit value.
+ if (InitValue <= ExitValue)
+ return;
+
+ uint32_t Range = uint32_t(InitValue-ExitValue);
+ // Check for infinite loop, either:
+ // while (i >= Exit) or until (i < Exit)
+ if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(-IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+ return;
+ }
+
+ IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+ NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+ PN->getIncomingBlock(IncomingEdge));
+
+ Value *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+ ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue),
+ Compare->getName());
+
+ // In the following deletions, PN may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PN;
+
+ // Delete the old floating point exit comparison. The branch starts using the
+ // new comparison.
+ NewCompare->takeName(Compare);
+ Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
+
+ // Delete the old floating point increment.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
+
+ // If the FP induction variable still has uses, this is because something else
+ // in the loop uses its value. In order to canonicalize the induction
+ // variable, we chose to eliminate the IV and rewrite it in terms of an
+ // int->fp cast.
+ //
+ // We give preference to sitofp over uitofp because it is faster on most
+ // platforms.
+ if (WeakPH) {
+ Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstInsertionPt());
+ PN->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
+ }
+ Changed = true;
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count. If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+ // Verify the input to the pass in already in LCSSA form.
+ assert(L->isLCSSAForm(*DT));
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBB = ExitBlocks[i];
+
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+
+ // SCEV only supports integer expressions for now.
+ if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst))
+ continue;
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible.
+ const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (!SE->isLoopInvariant(ExitValue, L))
+ continue;
+
+ // Computing the value outside of the loop brings no benefit if :
+ // - it is definitely used inside the loop in a way which can not be
+ // optimized away.
+ // - no use outside of the loop can take advantage of hoisting the
+ // computation out of the loop
+ if (ExitValue->getSCEVType()>=scMulExpr) {
+ unsigned NumHardInternalUses = 0;
+ unsigned NumSoftExternalUses = 0;
+ unsigned NumUses = 0;
+ for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+ IB!=IE && NumUses<=6 ; ++IB) {
+ Instruction *UseInstr = cast<Instruction>(*IB);
+ unsigned Opc = UseInstr->getOpcode();
+ NumUses++;
+ if (L->contains(UseInstr)) {
+ if (Opc == Instruction::Call || Opc == Instruction::Ret)
+ NumHardInternalUses++;
+ } else {
+ if (Opc == Instruction::PHI) {
+ // Do not count the Phi as a use. LCSSA may have inserted
+ // plenty of trivial ones.
+ NumUses--;
+ for (Value::use_iterator PB=UseInstr->use_begin(),
+ PE=UseInstr->use_end();
+ PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+ if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ continue;
+ }
+ if (Opc != Instruction::Call && Opc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ }
+ if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+ continue;
+ }
+
+ Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+ DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+ << " LoopVal = " << *Inst << "\n");
+
+ if (!isValidRewrite(Inst, ExitVal)) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+ Changed = true;
+ ++NumReplaced;
+
+ PN->setIncomingValue(i, ExitVal);
+
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
+
+ if (NumPreds == 1) {
+ // Completely replace a single-pred PHI. This is safe, because the
+ // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+ // node anymore.
+ PN->replaceAllUsesWith(ExitVal);
+ PN->eraseFromParent();
+ }
+ }
+ if (NumPreds != 1) {
+ // Clone the PHI and delete the original one. This lets IVUsers and
+ // any other maps purge the original user from their records.
+ PHINode *NewPN = cast<PHINode>(PN->clone());
+ NewPN->takeName(PN);
+ NewPN->insertBefore(PN);
+ PN->replaceAllUsesWith(NewPN);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // The insertion point instruction may have been deleted; clear it out
+ // so that the rewriter doesn't trip over it later.
+ Rewriter.clearInsertPoint();
+}
+
+//===----------------------------------------------------------------------===//
+// IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ // Collect information about induction variables that are used by sign/zero
+ // extend operations. This information is recorded by CollectExtend and
+ // provides the input to WidenIV.
+ struct WideIVInfo {
+ PHINode *NarrowIV;
+ Type *WidestNativeType; // Widest integer type created [sz]ext
+ bool IsSigned; // Was an sext user seen before a zext?
+
+ WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ };
+
+ class WideIVVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const DataLayout *TD;
+
+ public:
+ WideIVInfo WI;
+
+ WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
+ const DataLayout *TData) :
+ SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ virtual void visitCast(CastInst *Cast);
+ };
+}
+
+/// visitCast - Update information about the induction variable that is
+/// extended by this sign or zero extend operation. This is used to determine
+/// the final width of the IV before actually widening it.
+void WideIVVisitor::visitCast(CastInst *Cast) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
+ return;
+
+ Type *Ty = Cast->getType();
+ uint64_t Width = SE->getTypeSizeInBits(Ty);
+ if (TD && !TD->isLegalInteger(Width))
+ return;
+
+ if (!WI.WidestNativeType) {
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ WI.IsSigned = IsSigned;
+ return;
+ }
+
+ // We extend the IV to satisfy the sign of its first user, arbitrarily.
+ if (WI.IsSigned != IsSigned)
+ return;
+
+ if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+}
+
+namespace {
+
+/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
+/// WideIV that computes the same value as the Narrow IV def. This avoids
+/// caching Use* pointers.
+struct NarrowIVDefUse {
+ Instruction *NarrowDef;
+ Instruction *NarrowUse;
+ Instruction *WideDef;
+
+ NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
+ NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+};
+
+/// WidenIV - The goal of this transform is to remove sign and zero extends
+/// without creating any new induction variables. To do this, it creates a new
+/// phi of the wider type and redirects all users, either removing extends or
+/// inserting truncs whenever we stop propagating the type.
+///
+class WidenIV {
+ // Parameters
+ PHINode *OrigPhi;
+ Type *WideType;
+ bool IsSigned;
+
+ // Context
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ // Result
+ PHINode *WidePhi;
+ Instruction *WideInc;
+ const SCEV *WideIncExpr;
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ SmallPtrSet<Instruction*,16> Widened;
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
+
+public:
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
+ ScalarEvolution *SEv, DominatorTree *DTree,
+ SmallVectorImpl<WeakVH> &DI) :
+ OrigPhi(WI.NarrowIV),
+ WideType(WI.WidestNativeType),
+ IsSigned(WI.IsSigned),
+ LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())),
+ SE(SEv),
+ DT(DTree),
+ WidePhi(0),
+ WideInc(0),
+ WideIncExpr(0),
+ DeadInsts(DI) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ }
+
+ PHINode *CreateWideIV(SCEVExpander &Rewriter);
+
+protected:
+ Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *CloneIVUser(NarrowIVDefUse DU);
+
+ const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
+ const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+};
+} // anonymous namespace
+
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// CloneIVUser - Instantiate a wide operation to replace a narrow
+/// operation. This only needs to handle operations that can evaluation to
+/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
+Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return 0;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know
+ // anything about the narrow operand yet so must insert a [sz]ext. It is
+ // probably loop invariant and will be folded or hoisted. If it actually
+ // comes from a widened IV, it should be removed during a future call to
+ // WidenIVUse.
+ Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
+ Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
+
+ BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
+ BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
+ LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(DU.NarrowUse);
+ Builder.Insert(WideBO);
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+ if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+ if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ }
+ return WideBO;
+ }
+}
+
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return it.
+const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ if (DU.NarrowUse->getOpcode() != Instruction::Add)
+ return 0;
+
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = 0;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ if (IsSigned && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(!IsSigned && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return 0;
+
+ // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
+
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+ return AddRec;
+}
+
+/// GetWideRecurrence - Is this instruction potentially interesting from
+/// IVUsers' perspective after widening it's type? In other words, can the
+/// extend be safely hoisted out of the loop with SCEV reducing the value to a
+/// recurrence on the same loop. If so, return the sign or zero extended
+/// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType())
+ >= SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return 0;
+ }
+
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+ return AddRec;
+}
+
+/// WidenIVUse - Determine whether an individual user of the narrow IV can be
+/// widened. If so, return the wide clone of the user.
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (isa<PHINode>(DU.NarrowUse) &&
+ LI->getLoopFor(DU.NarrowUse->getParent()) != L)
+ return 0;
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *DU.NarrowUse << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
+ }
+ }
+ if (NewDef != DU.NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
+ ++NumElimExt;
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(DU.NarrowUse);
+ }
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return 0;
+ }
+
+ // Does this user itself evaluate to a recurrence after widening?
+ const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ if (!WideAddRec) {
+ WideAddRec = GetExtendedOperandRecurrence(DU);
+ }
+ if (!WideAddRec) {
+ // This user does not evaluate to a recurence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+ return 0;
+ }
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = 0;
+ if (WideAddRec == WideIncExpr
+ && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = CloneIVUser(DU);
+ if (!WideUse)
+ return 0;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec != SE->getSCEV(WideUse)) {
+ DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
+ << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
+ DeadInsts.push_back(WideUse);
+ return 0;
+ }
+
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ for (Value::use_iterator UI = NarrowDef->use_begin(),
+ UE = NarrowDef->use_end(); UI != UE; ++UI) {
+ Instruction *NarrowUse = cast<Instruction>(*UI);
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(NarrowUse))
+ continue;
+
+ NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
+ }
+}
+
+/// CreateWideIV - Process a single induction variable. First use the
+/// SCEVExpander to create a wide induction variable that evaluates to the same
+/// recurrence as the original narrow IV. Then use a worklist to forward
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
+/// interesting IV users, the narrow IV will be isolated for removal by
+/// DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+///
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return NULL;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = IsSigned ?
+ SE->getSignExtendExpr(AddRec, WideType) :
+ SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return NULL;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec is
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
+ && "Loop header phi recurrence inputs do not dominate the loop");
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = L->getHeader()->begin();
+ WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ }
+
+ DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
+
+ while (!NarrowIVUsers.empty()) {
+ NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *WideUse = WidenIVUse(DU, Rewriter);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse)
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
+
+ // WidenIVUse may have removed the def-use edge.
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.push_back(DU.NarrowDef);
+ }
+ return WidePhi;
+}
+
+//===----------------------------------------------------------------------===//
+// Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
+
+/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
+/// users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// Sign/Zero extend elimination is interleaved with IV simplification.
+///
+void IndVarSimplify::SimplifyAndExtend(Loop *L,
+ SCEVExpander &Rewriter,
+ LPPassManager &LPM) {
+ SmallVector<WideIVInfo, 8> WideIVs;
+
+ SmallVector<PHINode*, 8> LoopPhis;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ LoopPhis.push_back(cast<PHINode>(I));
+ }
+ // Each round of simplification iterates through the SimplifyIVUsers worklist
+ // for all current phis, then determines whether any IVs can be
+ // widened. Widening adds new phis to LoopPhis, inducing another round of
+ // simplification on the wide IVs.
+ while (!LoopPhis.empty()) {
+ // Evaluate as many IV expressions as possible before widening any IVs. This
+ // forces SCEV to set no-wrap flags before evaluating sign/zero
+ // extension. The first time SCEV attempts to normalize sign/zero extension,
+ // the result becomes final. So for the most predictable results, we delay
+ // evaluation of sign/zero extend evaluation until needed, and avoid running
+ // other SCEV based analysis prior to SimplifyAndExtend.
+ do {
+ PHINode *CurrIV = LoopPhis.pop_back_val();
+
+ // Information about sign/zero extensions of CurrIV.
+ WideIVVisitor WIV(CurrIV, SE, TD);
+
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
+
+ if (WIV.WI.WidestNativeType) {
+ WideIVs.push_back(WIV.WI);
+ }
+ } while(!LoopPhis.empty());
+
+ for (; !WideIVs.empty(); WideIVs.pop_back()) {
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
+ if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ Changed = true;
+ LoopPhis.push_back(WidePhi);
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// Check for expressions that ScalarEvolution generates to compute
+/// BackedgeTakenInfo. If these expressions have not been reduced, then
+/// expanding them may incur additional cost (albeit in the loop preheader).
+static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ SmallPtrSet<const SCEV*, 8> &Processed,
+ ScalarEvolution *SE) {
+ if (!Processed.insert(S))
+ return false;
+
+ // If the backedge-taken count is a UDiv, it's very likely a UDiv that
+ // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
+ // precise expression, rather than a UDiv from the user's code. If we can't
+ // find a UDiv in the code with some simple searching, assume the former and
+ // forego rewriting the loop.
+ if (isa<SCEVUDivExpr>(S)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return true;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != S) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != S)
+ return true;
+ }
+ }
+
+ // Recurse past add expressions, which commonly occur in the
+ // BackedgeTakenCount. They may already exist in program code, and if not,
+ // they are not too expensive rematerialize.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, BI, Processed, SE))
+ return true;
+ }
+ return false;
+ }
+
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+
+ // If we haven't recognized an expensive SCEV pattern, assume it's an
+ // expression produced by program code.
+ return false;
+}
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+///
+/// TODO: This fails for pointer-type loop counters with greater than one byte
+/// strides, consequently preventing LFTR from running. For the purpose of LFTR
+/// we could skip this check in the case that the LFTR loop counter (chosen by
+/// FindLoopCounter) is also pointer type. Instead, we could directly convert
+/// the loop test to an inequality test by checking the target data's alignment
+/// of element types (given that the initial pointer value originates from or is
+/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
+/// However, we don't yet have a strong motivation for converting loop tests
+/// into inequality tests.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ BackedgeTakenCount->isZero())
+ return false;
+
+ if (!L->getExitingBlock())
+ return false;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return false;
+
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+ return false;
+
+ return true;
+}
+
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+ Instruction *IncI = dyn_cast<Instruction>(IncV);
+ if (!IncI)
+ return 0;
+
+ switch (IncI->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ break;
+ case Instruction::GetElementPtr:
+ // An IV counter must preserve its type.
+ if (IncI->getNumOperands() == 2)
+ break;
+ default:
+ return 0;
+ }
+
+ PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ return Phi;
+ return 0;
+ }
+ if (IncI->getOpcode() == Instruction::GetElementPtr)
+ return 0;
+
+ // Allow add/sub to be commuted.
+ Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ return Phi;
+ }
+ return 0;
+}
+
+/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
+static ICmpInst *getLoopTest(Loop *L) {
+ assert(L->getExitingBlock() && "expected loop exit");
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ // Don't bother with LFTR if the loop is not properly simplified.
+ if (!LatchBlock)
+ return 0;
+
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ assert(BI && "expected exit branch");
+
+ return dyn_cast<ICmpInst>(BI->getCondition());
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+ // Do LFTR to simplify the exit condition to an ICMP.
+ ICmpInst *Cond = getLoopTest(L);
+ if (!Cond)
+ return true;
+
+ // Do LFTR to simplify the exit ICMP to EQ/NE
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return true;
+
+ // Look for a loop invariant RHS
+ Value *LHS = Cond->getOperand(0);
+ Value *RHS = Cond->getOperand(1);
+ if (!isLoopInvariant(RHS, L, DT)) {
+ if (!isLoopInvariant(LHS, L, DT))
+ return true;
+ std::swap(LHS, RHS);
+ }
+ // Look for a simple IV counter LHS
+ PHINode *Phi = dyn_cast<PHINode>(LHS);
+ if (!Phi)
+ Phi = getLoopPhiForCounter(LHS, L, DT);
+
+ if (!Phi)
+ return true;
+
+ // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
+ int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
+ if (Idx < 0)
+ return true;
+
+ // Do LFTR if the exit condition's IV is *not* a simple counter.
+ Value *IncV = Phi->getIncomingValue(Idx);
+ return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
+/// down to checking that all operands are constant and listing instructions
+/// that may hide undef.
+static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
+ unsigned Depth) {
+ if (isa<Constant>(V))
+ return !isa<UndefValue>(V);
+
+ if (Depth >= 6)
+ return false;
+
+ // Conservatively handle non-constant non-instructions. For example, Arguments
+ // may be undef.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // Load and return values may be undef.
+ if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
+ return false;
+
+ // Optimistically handle other instructions.
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ if (!Visited.insert(*OI))
+ continue;
+ if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ return false;
+ }
+ return true;
+}
+
+/// Return true if the given value is concrete. We must prove that undef can
+/// never reach it.
+///
+/// TODO: If we decide that this is a good approach to checking for undef, we
+/// may factor it into a common location.
+static bool hasConcreteDef(Value *V) {
+ SmallPtrSet<Value*, 8> Visited;
+ Visited.insert(V);
+ return hasConcreteDefImpl(V, Visited, 0);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+ for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != IncV) return false;
+ }
+
+ for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != Phi) return false;
+ }
+ return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// BECount may be an i8* pointer type. The pointer difference is already
+/// valid count without scaling the address stride, so it remains a pointer
+/// expression as far as SCEV is concerned.
+///
+/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) {
+ uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+ Value *Cond =
+ cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+ // Loop over all of the PHI nodes, looking for a simple counter.
+ PHINode *BestPhi = 0;
+ const SCEV *BestInit = 0;
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ // Avoid comparing an integer IV against a pointer Limit.
+ if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
+ continue;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ continue;
+
+ // AR may be a pointer type, while BECount is an integer type.
+ // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+ // AR may not be a narrower type, or we may never exit.
+ uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+ if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ continue;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ continue;
+
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+ continue;
+
+ // Avoid reusing a potentially undef value to compute other values that may
+ // have originally had a concrete definition.
+ if (!hasConcreteDef(Phi)) {
+ // We explicitly allow unknown phis as long as they are already used by
+ // the loop test. In this case we assume that performing LFTR could not
+ // increase the number of undef users.
+ if (ICmpInst *Cond = getLoopTest(L)) {
+ if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
+ && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
+ continue;
+ }
+ }
+ }
+ const SCEV *Init = AR->getStart();
+
+ if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+ // Don't force a live loop counter if another IV can be used.
+ if (AlmostDeadIV(Phi, LatchBlock, Cond))
+ continue;
+
+ // Prefer to count-from-zero. This is a more "canonical" counter form. It
+ // also prefers integer to pointer IVs.
+ if (BestInit->isZero() != Init->isZero()) {
+ if (BestInit->isZero())
+ continue;
+ }
+ // If two IVs both count from zero or both count from nonzero then the
+ // narrower is likely a dead phi that has been widened. Use the wider phi
+ // to allow the other to be eliminated.
+ else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ continue;
+ }
+ BestPhi = Phi;
+ BestInit = Init;
+ }
+ return BestPhi;
+}
+
+/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
+/// holds the RHS of the new loop test.
+static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+ SCEVExpander &Rewriter, ScalarEvolution *SE) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ const SCEV *IVInit = AR->getStart();
+
+ // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
+ // finds a valid pointer IV. Sign extend BECount in order to materialize a
+ // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
+ // the existing GEPs whenever possible.
+ if (IndVar->getType()->isPointerTy()
+ && !IVCount->getType()->isPointerTy()) {
+
+ Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
+ const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+
+ // Expand the code for the iteration count.
+ assert(SE->isLoopInvariant(IVOffset, L) &&
+ "Computed iteration count is not loop invariant!");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
+
+ Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+ assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
+ // We could handle pointer IVs other than i8*, but we need to compensate for
+ // gep index scaling. See canExpandBackedgeTakenCount comments.
+ assert(SE->getSizeOfExpr(
+ cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+ && "unit stride pointer IV must be i8*");
+
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+ }
+ else {
+ // In any other case, convert both IVInit and IVCount to integers before
+ // comparing. This may result in SCEV expension of pointers, but in practice
+ // SCEV will fold the pointer arithmetic away as such:
+ // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
+ //
+ // Valid Cases: (1) both integers is most common; (2) both may be pointers
+ // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
+ // pointer may occur when enable-iv-rewrite generates a canonical IV on top
+ // of case #2.
+
+ const SCEV *IVLimit = 0;
+ // For unit stride, IVCount = Start + BECount with 2's complement overflow.
+ // For non-zero Start, compute IVCount here.
+ if (AR->getStart()->isZero())
+ IVLimit = IVCount;
+ else {
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ const SCEV *IVInit = AR->getStart();
+
+ // For integer IVs, truncate the IV before computing IVInit + BECount.
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(IVCount->getType()))
+ IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+
+ IVLimit = SE->getAddExpr(IVInit, IVCount);
+ }
+ // Expand the code for the iteration count.
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ IRBuilder<> Builder(BI);
+ assert(SE->isLoopInvariant(IVLimit, L) &&
+ "Computed iteration count is not loop invariant!");
+ // Ensure that we generate the same type as IndVar, or a smaller integer
+ // type. In the presence of null pointer values, we have an integer type
+ // SCEV expression (IVInit) for a pointer type IV value (IndVar).
+ Type *LimitTy = IVCount->getType()->isPointerTy() ?
+ IndVar->getType() : IVCount->getType();
+ return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
+ }
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+Value *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter) {
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+
+ // LFTR can ignore IV overflow and truncate to the width of
+ // BECount. This avoids materializing the add(zext(add)) expression.
+ Type *CntTy = BackedgeTakenCount->getType();
+
+ const SCEV *IVCount = BackedgeTakenCount;
+
+ // If the exiting block is the same as the backedge block, we prefer to
+ // compare against the post-incremented value, otherwise we must compare
+ // against the preincremented value.
+ Value *CmpIndVar;
+ if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ const SCEV *N =
+ SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
+ if (CntTy == IVCount->getType())
+ IVCount = N;
+ else {
+ const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
+ } else {
+ // Potential overflow. Cast before doing the add.
+ IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
+ IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
+ }
+ }
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ } else {
+ // We must use the preincremented value...
+ IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
+ CmpIndVar = IndVar;
+ }
+
+ Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
+ && "genLoopLimit missed a cast");
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ ICmpInst::Predicate P;
+ if (L->contains(BI->getSuccessor(0)))
+ P = ICmpInst::ICMP_NE;
+ else
+ P = ICmpInst::ICMP_EQ;
+
+ DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t"
+ << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << " IVCount:\t" << *IVCount << "\n");
+
+ IRBuilder<> Builder(BI);
+ if (SE->getTypeSizeInBits(CmpIndVar->getType())
+ > SE->getTypeSizeInBits(ExitCnt->getType())) {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
+ }
+
+ Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
+ Value *OrigCond = BI->getCondition();
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ DeadInsts.push_back(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
+
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Skip landingpad instructions.
+ if (isa<LandingPadInst>(I))
+ continue;
+
+ // Don't sink alloca: we never want to sink static alloca's out of the
+ // entry block, and correctly sinking dynamic alloca's requires
+ // checks for stacksave/stackrestore intrinsics.
+ // FIXME: Refactor this check somehow?
+ if (isa<AllocaInst>(I))
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
+ Done = true;
+ }
+
+ ToMove->moveBefore(InsertPt);
+ if (Done) break;
+ InsertPt = ToMove;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // If LoopSimplify form is not available, stay out of trouble. Some notes:
+ // - LSR currently only supports LoopSimplify-form loops. Indvars'
+ // canonicalization can be a pessimization without LSR to "clean up"
+ // afterwards.
+ // - We depend on having a preheader; in particular,
+ // Loop::getCanonicalInductionVariable only supports loops with preheaders,
+ // and we're in trouble if we can't find the induction variable even when
+ // we've manually inserted one.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
+ DeadInsts.clear();
+ Changed = false;
+
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ RewriteNonIntegerIVs(L);
+
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+ // Create a rewriter object which we'll use to transform the code with.
+ SCEVExpander Rewriter(*SE, "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+
+ // Eliminate redundant IV users.
+ //
+ // Simplification works best when run before other consumers of SCEV. We
+ // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+ // other expressions involving loop IVs have been evaluated. This helps SCEV
+ // set no-wrap flags before normalizing sign/zero extension.
+ Rewriter.disableCanonicalMode();
+ SimplifyAndExtend(L, Rewriter, LPM);
+
+ // Check to see if this loop has a computable loop-invariant execution count.
+ // If so, this means that we can compute the final value of any expressions
+ // that are recurrent in the loop, and substitute the exit values from the
+ // loop into any instructions outside of the loop that use the final values of
+ // the current expressions.
+ //
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ RewriteLoopExitValues(L, Rewriter);
+
+ // Eliminate redundant IV cycles.
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+
+ // If we have a trip count expression, rewrite the loop's exit condition
+ // using it. We can currently only handle loops with a single exit.
+ if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ if (IndVar) {
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Rewriter);
+ }
+ }
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+
+ // The Rewriter may not be used from this point on.
+
+ // Loop-invariant instructions in the preheader that aren't used in the
+ // loop may be sunk below the loop to reduce register pressure.
+ SinkUnusedInvariants(L);
+
+ // Clean up dead instructions.
+ Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+ // Check a post-condition.
+ assert(L->isLCSSAForm(*DT) &&
+ "Indvars did not leave the loop in lcssa form!");
+
+ // Verify that LFTR, and any other change have not interfered with SCEV's
+ // ability to compute trip count.
+#ifndef NDEBUG
+ if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ SE->forgetLoop(L);
+ const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
+ if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
+ SE->getTypeSizeInBits(NewBECount->getType()))
+ NewBECount = SE->getTruncateOrNoop(NewBECount,
+ BackedgeTakenCount->getType());
+ else
+ BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
+ NewBECount->getType());
+ assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
+ }
+#endif
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 000000000000..b61c5ba56e0c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1618 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds, "Number of terminators folded");
+STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+ cl::desc("Max block size to duplicate for jump threading"),
+ cl::init(6), cl::Hidden);
+
+namespace {
+ // These are at global scope so static functions can use them too.
+ typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+ typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
+ /// This pass performs 'jump threading', which looks at blocks that have
+ /// multiple predecessors and multiple successors. If one or more of the
+ /// predecessors of the block can be proven to always jump to one of the
+ /// successors, we forward the edge from the predecessor to the successor by
+ /// duplicating the contents of this block.
+ ///
+ /// An example of when this can occur is code like this:
+ ///
+ /// if () { ...
+ /// X = 4;
+ /// }
+ /// if (X < 3) {
+ ///
+ /// In this case, the unconditional branch at the end of the first if can be
+ /// revectored to the false side of the second if.
+ ///
+ class JumpThreading : public FunctionPass {
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
+ LazyValueInfo *LVI;
+#ifdef NDEBUG
+ SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+ SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
+ public:
+ static char ID; // Pass identification
+ JumpThreading() : FunctionPass(ID) {
+ initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
+ bool ProcessBlock(BasicBlock *BB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
+ bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs);
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
+
+ bool ProcessBranchOnPHI(PHINode *PN);
+ bool ProcessBranchOnXOR(BinaryOperator *BO);
+
+ bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ };
+}
+
+char JumpThreading::ID = 0;
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ FindLoopHeaders(F);
+
+ bool Changed, EverChanged = false;
+ do {
+ Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
+ while (ProcessBlock(BB))
+ Changed = true;
+
+ ++I;
+
+ // If the block is trivially dead, zap it. This eliminates the successor
+ // edges which simplifies the CFG.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
+ << "' with terminator: " << *BB->getTerminator() << '\n');
+ LoopHeaders.erase(BB);
+ LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ Changed = true;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
+ }
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
+ EverChanged |= Changed;
+ } while (Changed);
+
+ LoopHeaders.clear();
+ return EverChanged;
+}
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+ unsigned Threshold) {
+ /// Ignore PHI nodes, these will be flattened when duplication happens.
+ BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+ // Sum up the cost of each instruction until we get to the terminator. Don't
+ // include the terminator because the copy won't include it.
+ unsigned Size = 0;
+ for (; !isa<TerminatorInst>(I); ++I) {
+
+ // Stop scanning the block if we've reached the threshold.
+ if (Size > Threshold)
+ return Size;
+
+ // Debugger intrinsics don't incur code size.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
+ // If this is a pointer->pointer bitcast, it is free.
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
+ continue;
+
+ // All other instructions count for at least one unit.
+ ++Size;
+
+ // Calls are more expensive. If they are non-intrinsic calls, we model them
+ // as having cost of 4. If they are a non-vector intrinsic, we model them
+ // as having cost of 2 total, and if they are a vector intrinsic, we model
+ // them as having cost 1.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->hasFnAttr(Attribute::NoDuplicate))
+ // Blocks with NoDuplicate are modelled as having infinite cost, so they
+ // are never duplicated.
+ return ~0U;
+ else if (!isa<IntrinsicInst>(CI))
+ Size += 3;
+ else if (!CI->getType()->isVectorTy())
+ Size += 1;
+ }
+ }
+
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(I))
+ Size = Size > 6 ? Size-6 : 0;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
+ return Size;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops. Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations. To prevent this from
+/// happening, we first have to find the loop headers. Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header. For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop). This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+ LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+ if (!Val)
+ return 0;
+
+ // Undef is "known" enough.
+ if (UndefValue *U = dyn_cast<UndefValue>(Val))
+ return U;
+
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+ return dyn_cast<ConstantInt>(Val);
+}
+
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
+///
+/// This returns true if there were any known values.
+///
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
+ // If V is a constant, then it is known in all predecessors.
+ if (Constant *KC = getKnownConstant(V, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
+ Result.push_back(std::make_pair(KC, P));
+ }
+
+ return !Result.empty();
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ } else {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ PredValueInfoTy LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(LHSVals[i].first)) {
+ Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
+ }
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(RHSVals[i].first)) {
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ Result[i].first = ConstantExpr::getNot(Result[i].first);
+
+ return true;
+ }
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(ResC, P));
+ }
+
+ return !Result.empty();
+ }
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+ }
+
+ return !Result.empty();
+}
+
+
+
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors. This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+ TerminatorInst *BBTerm = BB->getTerminator();
+ unsigned MinSucc = 0;
+ BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+ // Compute the successor with the minimum number of predecessors.
+ unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ TestBB = BBTerm->getSuccessor(i);
+ unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ if (NumPreds < MinNumPreds) {
+ MinSucc = i;
+ MinNumPreds = NumPreds;
+ }
+ }
+
+ return MinSucc;
+}
+
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If the block is trivially dead, just return and let the caller nuke it.
+ // This simplifies other transformations.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock())
+ return false;
+
+ // If this block has a single predecessor, and if that pred has a single
+ // successor, merge the blocks. This encourages recursive jump threading
+ // because now the condition in this block can be threaded through
+ // predecessors of our predecessor block.
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
+ if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ LVI->eraseBlock(SinglePred);
+ MergeBasicBlockIntoOnlyPred(BB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+ return true;
+ }
+ }
+
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
+ Value *Condition;
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
+ // Can't thread an unconditional jump.
+ if (BI->isUnconditional()) return false;
+ Condition = BI->getCondition();
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
+ Condition = SI->getCondition();
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ // Can't thread indirect branch with no successors.
+ if (IB->getNumSuccessors() == 0) return false;
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
+ return false; // Must be an invoke.
+ }
+
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
+
+ // If the terminator is branching on an undef, we can pick any of the
+ // successors to branch to. Let GetBestDestForJumpOnUndef decide.
+ if (isa<UndefValue>(Condition)) {
+ unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+
+ // Fold the branch/switch.
+ TerminatorInst *BBTerm = BB->getTerminator();
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ if (i == BestSucc) continue;
+ BBTerm->getSuccessor(i)->removePredecessor(BB, true);
+ }
+
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding undef terminator: " << *BBTerm << '\n');
+ BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+ BBTerm->eraseFromParent();
+ return true;
+ }
+
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (getKnownConstant(Condition, Preference)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB, true);
+ return true;
+ }
+
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+ // All the rest of our checks depend on the condition being an instruction.
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (ProcessThreadableEdges(Condition, BB, Preference))
+ return true;
+ return false;
+ }
+
+
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
+ }
+
+ // Check for some cases that are worth simplifying. Right now we want to look
+ // for loads that are used by a switch or by the condition for the branch. If
+ // we see one, check to see if it's partially redundant. If so, insert a PHI
+ // which can then be used to thread the values.
+ //
+ Value *SimplifyValue = CondInst;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+ if (isa<Constant>(CondCmp->getOperand(1)))
+ SimplifyValue = CondCmp->getOperand(0);
+
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+ if (SimplifyPartiallyRedundantLoad(LI))
+ return true;
+
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
+ return true;
+
+ // If this is an otherwise-unfoldable branch on a phi node in the current
+ // block, see if we can simplify.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnPHI(PN);
+
+
+ // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+ if (CondInst->getOpcode() == Instruction::Xor &&
+ CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
+
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
+
+ return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node. This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
+
+ // If the load is defined in a block with exactly one predecessor, it can't be
+ // partially redundant.
+ BasicBlock *LoadBB = LI->getParent();
+ if (LoadBB->getSinglePredecessor())
+ return false;
+
+ Value *LoadedPtr = LI->getOperand(0);
+
+ // If the loaded operand is defined in the LoadBB, it can't be available.
+ // TODO: Could do simple PHI translation, that would be fun :)
+ if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+ if (PtrOp->getParent() == LoadBB)
+ return false;
+
+ // Scan a few instructions up from the load, to see if it is obviously live at
+ // the entry to its block.
+ BasicBlock::iterator BBIt = LI;
+
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
+ // If the value if the load is locally available within the block, just use
+ // it. This frequently occurs for reg2mem'd allocas.
+ //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+ // If the returned value is the load itself, replace with an undef. This can
+ // only happen in dead loops.
+ if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(AvailableVal);
+ LI->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, if we scanned the whole block and got to the top of the block,
+ // we know the block is locally transparent to the load. If not, something
+ // might clobber its value.
+ if (BBIt != LoadBB->begin())
+ return false;
+
+ // If all of the loads and stores that feed the value have the same TBAA tag,
+ // then we can propagate it onto any newly inserted loads.
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+
+ SmallPtrSet<BasicBlock*, 8> PredsScanned;
+ typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+ AvailablePredsTy AvailablePreds;
+ BasicBlock *OneUnavailablePred = 0;
+
+ // If we got here, the loaded value is transparent through to the start of the
+ // block. Check to see if it is available in any of the predecessor blocks.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *PredBB = *PI;
+
+ // If we already scanned this predecessor, skip it.
+ if (!PredsScanned.insert(PredBB))
+ continue;
+
+ // Scan the predecessor to see if the value is available in the pred.
+ BBIt = PredBB->end();
+ MDNode *ThisTBAATag = 0;
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ 0, &ThisTBAATag);
+ if (!PredAvailable) {
+ OneUnavailablePred = PredBB;
+ continue;
+ }
+
+ // If tbaa tags disagree or are not present, forget about them.
+ if (TBAATag != ThisTBAATag) TBAATag = 0;
+
+ // If so, this load is partially redundant. Remember this info so that we
+ // can create a PHI node.
+ AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+ }
+
+ // If the loaded value isn't available in any predecessor, it isn't partially
+ // redundant.
+ if (AvailablePreds.empty()) return false;
+
+ // Okay, the loaded value is available in at least one (and maybe all!)
+ // predecessors. If the value is unavailable in more than one unique
+ // predecessor, we want to insert a merge block for those common predecessors.
+ // This ensures that we only have to insert one reload, thus not increasing
+ // code size.
+ BasicBlock *UnavailablePred = 0;
+
+ // If there is exactly one predecessor where the value is unavailable, the
+ // already computed 'OneUnavailablePred' block is it. If it ends in an
+ // unconditional branch, we know that it isn't a critical edge.
+ if (PredsScanned.size() == AvailablePreds.size()+1 &&
+ OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+ UnavailablePred = OneUnavailablePred;
+ } else if (PredsScanned.size() != AvailablePreds.size()) {
+ // Otherwise, we had multiple unavailable predecessors or we had a critical
+ // edge from the one.
+ SmallVector<BasicBlock*, 8> PredsToSplit;
+ SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+ for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+ AvailablePredSet.insert(AvailablePreds[i].first);
+
+ // Add all the unavailable predecessors to the PredsToSplit list.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
+
+ // Split them out to their own block.
+ UnavailablePred =
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
+ }
+
+ // If the value isn't available in all predecessors, then there will be
+ // exactly one where it isn't available. Insert a load on that edge and add
+ // it to the AvailablePreds list.
+ if (UnavailablePred) {
+ assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+ "Can't handle critical edge here!");
+ LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+ NewVal->setDebugLoc(LI->getDebugLoc());
+ if (TBAATag)
+ NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+ }
+
+ // Now we know that each predecessor of this block has a value in
+ // AvailablePreds, sort them for efficient access as we're walking the preds.
+ array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+ // Create a PHI node at the start of the block for the PRE'd load value.
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
+ PN->takeName(LI);
+ PN->setDebugLoc(LI->getDebugLoc());
+
+ // Insert new entries into the PHI for each predecessor. A single block may
+ // have multiple entries here.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ AvailablePredsTy::iterator I =
+ std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+ std::make_pair(P, (Value*)0));
+
+ assert(I != AvailablePreds.end() && I->first == P &&
+ "Didn't find entry for predecessor!");
+
+ PN->addIncoming(I->second, I->first);
+ }
+
+ //cerr << "PRE: " << *LI << *PN << "\n";
+
+ LI->replaceAllUsesWith(PN);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
+
+ // Okay, now we know the most popular destination. If there is more than one
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
+
+ MostPopularDest = TI->getSuccessor(i);
+ break;
+ }
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ PredValueInfoTy PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
+ return false;
+
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(dbgs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
+ << *PredValues[i].first
+ << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
+
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ Constant *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (isa<UndefValue>(Val))
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ } else {
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
+ }
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (PredToDestList.empty())
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
+ }
+
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block. See if there are any simplifications we
+/// can do based on inputs to the phi node.
+///
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // TODO: We could make use of this to do it once for blocks with common PHI
+ // values.
+ SmallVector<BasicBlock*, 1> PredBBs;
+ PredBBs.resize(1);
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the conditional branch into that block in order to further
+ // encourage jump threading and to eliminate cases where we have branch on a
+ // phi of an icmp (branch on icmp is much better).
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+ if (PredBr->isUnconditional()) {
+ PredBBs[0] = PredBB;
+ // Try to duplicate BB into PredBB.
+ if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block. See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+ BasicBlock *BB = BO->getParent();
+
+ // If either the LHS or RHS of the xor is a constant, don't do this
+ // optimization.
+ if (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1)))
+ return false;
+
+ // If the first instruction in BB isn't a phi, we won't be able to infer
+ // anything special about any particular predecessor.
+ if (!isa<PHINode>(BB->front()))
+ return false;
+
+ // If we have a xor as the branch input to this block, and we know that the
+ // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+ // the condition into the predecessor and fix that value to true, saving some
+ // logical ops on that path and encouraging other paths to simplify.
+ //
+ // This copies something like this:
+ //
+ // BB:
+ // %X = phi i1 [1], [%X']
+ // %Y = icmp eq i32 %A, %B
+ // %Z = xor i1 %X, %Y
+ // br i1 %Z, ...
+ //
+ // Into:
+ // BB':
+ // %Y = icmp ne i32 %A, %B
+ // br i1 %Z, ...
+
+ PredValueInfoTy XorOpValues;
+ bool isLHS = true;
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
+ assert(XorOpValues.empty());
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
+ return false;
+ isLHS = false;
+ }
+
+ assert(!XorOpValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ // Scan the information to see which is most popular: true or false. The
+ // predecessors can be of the set true, false, or undef.
+ unsigned NumTrue = 0, NumFalse = 0;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (isa<UndefValue>(XorOpValues[i].first))
+ // Ignore undefs for the count.
+ continue;
+ if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ ++NumFalse;
+ else
+ ++NumTrue;
+ }
+
+ // Determine which value to split on, true, false, or undef if neither.
+ ConstantInt *SplitVal = 0;
+ if (NumTrue > NumFalse)
+ SplitVal = ConstantInt::getTrue(BB->getContext());
+ else if (NumTrue != 0 || NumFalse != 0)
+ SplitVal = ConstantInt::getFalse(BB->getContext());
+
+ // Collect all of the blocks that this can be folded into so that we can
+ // factor this once and clone it once.
+ SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (XorOpValues[i].first != SplitVal &&
+ !isa<UndefValue>(XorOpValues[i].first))
+ continue;
+
+ BlocksToFoldInto.push_back(XorOpValues[i].second);
+ }
+
+ // If we inferred a value for all of the predecessors, then duplication won't
+ // help us. However, we can just replace the LHS or RHS with the constant.
+ if (BlocksToFoldInto.size() ==
+ cast<PHINode>(BB->front()).getNumIncomingValues()) {
+ if (SplitVal == 0) {
+ // If all preds provide undef, just nuke the xor, because it is undef too.
+ BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+ BO->eraseFromParent();
+ } else if (SplitVal->isZero()) {
+ // If all preds provide 0, replace the xor with the other input.
+ BO->replaceAllUsesWith(BO->getOperand(isLHS));
+ BO->eraseFromParent();
+ } else {
+ // If all preds provide 1, set the computed value to 1.
+ BO->setOperand(!isLHS, SplitVal);
+ }
+
+ return true;
+ }
+
+ // Try to duplicate BB into PredBB.
+ return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
+
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+ BasicBlock *OldPred,
+ BasicBlock *NewPred,
+ DenseMap<Instruction*, Value*> &ValueMap) {
+ for (BasicBlock::iterator PNI = PHIBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(OldPred);
+
+ // Remap the value if necessary.
+ if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+ if (I != ValueMap.end())
+ IV = I->second;
+ }
+
+ PN->addIncoming(IV, NewPred);
+ }
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB) {
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
+ << "' - would thread to self!\n");
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
+ << "' to dest BB '" << SuccBB->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
+ if (JumpThreadCost > Threshold) {
+ DEBUG(dbgs() << " Not threading BB '" << BB->getName()
+ << "' - Cost is too high: " << JumpThreadCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+ }
+
+ // And finally, do it!
+ DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
+ << SuccBB->getName() << "' with cost: " << JumpThreadCost
+ << ", across block:\n "
+ << *BB << "\n");
+
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
+ BB->getParent(), BB);
+ NewBB->moveAfter(PredBB);
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; !isa<TerminatorInst>(BI); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ // We didn't copy the terminator from BB over to NewBB, because there is now
+ // an unconditional jump to SuccBB. Insert the unconditional jump.
+ BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+ // PHI nodes for NewBB now.
+ AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+
+ // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
+ // NewBB instead of BB. This eliminates predecessors from BB, which requires
+ // us to simplify any PHI nodes in BB.
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredTerm->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB, true);
+ PredTerm->setSuccessor(i, NewBB);
+ }
+
+ // At this point, the IR is fully up to date and consistent. Do a quick scan
+ // over the new instructions and zap any that are constants or dead. This
+ // frequently happens because of phi translation.
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
+
+ // Threaded an edge!
+ ++NumThreads;
+ return true;
+}
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs) {
+ assert(!PredBBs.empty() && "Can't handle an empty set");
+
+ // If BB is a loop header, then duplicating this block outside the loop would
+ // cause us to transform this into an irreducible loop, don't do this.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
+ << "' into predecessor block '" << PredBBs[0]->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
+ if (DuplicationCost > Threshold) {
+ DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
+ << "' - Cost is too high: " << DuplicationCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+ }
+
+ // Okay, we decided to do this! Clone all the instructions in BB onto the end
+ // of PredBB.
+ DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
+ << PredBB->getName() << "' to eliminate branch on phi. Cost: "
+ << DuplicationCost << " block is:" << *BB << "\n");
+
+ // Unless PredBB ends with an unconditional branch, split the edge so that we
+ // can just clone the bits from BB into the end of the new PredBB.
+ BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+ if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ PredBB = SplitEdge(PredBB, BB, this);
+ OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+ }
+
+ // We are going to have to map operands from the original BB block into the
+ // PredBB block. Evaluate PHI nodes in BB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into PredBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; BI != BB->end(); ++BI) {
+ Instruction *New = BI->clone();
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+
+ // If this instruction can be simplified after the operands are updated,
+ // just use the simplified value instead. This frequently happens due to
+ // phi translation.
+ if (Value *IV = SimplifyInstruction(New, TD)) {
+ delete New;
+ ValueMapping[BI] = IV;
+ } else {
+ // Otherwise, insert the new instruction into the block.
+ New->setName(BI->getName());
+ PredBB->getInstList().insert(OldPredBranch, New);
+ ValueMapping[BI] = New;
+ }
+ }
+
+ // Check to see if the targets of the branch had PHI nodes. If so, we need to
+ // add entries to the PHI nodes for branch from PredBB now.
+ BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+ ValueMapping);
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+ ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+ // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+ // that we nuked.
+ BB->removePredecessor(PredBB, true);
+
+ // Remove the unconditional branch at the end of the PredBB block.
+ OldPredBranch->eraseFromParent();
+
+ ++NumDupes;
+ return true;
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 000000000000..f94cd2a073ef
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,895 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible. It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe. This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+// 1. Moving loop invariant loads and calls out of loops. If we can determine
+// that a load or call inside of a loop never aliases anything stored to,
+// we can hoist it or sink it like any other instruction.
+// 2. Scalar Promotion of Memory - If there is a store instruction inside of
+// the loop, we try to move the store to happen AFTER the loop instead of
+// inside of the loop. This can only happen if a few conditions are true:
+// A. The pointer stored through is loop invariant
+// B. There are no stores or loads in the loop which _may_ alias the
+// pointer. There are no calls in the loop which mod/ref the pointer.
+// If these conditions are true, we can promote the loads and stores in the
+// loop of the pointer to use a temporary alloca'd variable. We then use
+// the SSAUpdater to construct the appropriate SSA form for the value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
+
+namespace {
+ struct LICM : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ using llvm::Pass::doFinalization;
+
+ bool doFinalization() {
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+ return false;
+ }
+
+ private:
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop.
+
+ DataLayout *TD; // DataLayout for constant folding.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
+ // State that is updated as we process loops.
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop...
+ Loop *CurLoop; // The current loop we are working on...
+ AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ bool MayThrow; // The current loop contains an instruction which
+ // may throw, thus preventing code motion of
+ // instructions with side effects.
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L);
+
+ /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+ /// dominated by the specified block, and that are in the current loop) in
+ /// reverse depth first order w.r.t the DominatorTree. This allows us to
+ /// visit uses before definitions, allowing us to sink a loop body in one
+ /// pass without iteration.
+ ///
+ void SinkRegion(DomTreeNode *N);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(DomTreeNode *N);
+
+ /// inSubLoop - Little predicate that returns true if the specified basic
+ /// block is in a subloop of the current one, not the current one itself.
+ ///
+ bool inSubLoop(BasicBlock *BB) {
+ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+ return LI->getLoopFor(BB) != CurLoop;
+ }
+
+ /// sink - When an instruction is found to only be used outside of the loop,
+ /// this function moves it to the exit blocks and patches up SSA form as
+ /// needed.
+ ///
+ void sink(Instruction &I);
+
+ /// hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void hoist(Instruction &I);
+
+ /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+ /// is not a trapping instruction or if it is a trapping instruction and is
+ /// guaranteed to execute.
+ ///
+ bool isSafeToExecuteUnconditionally(Instruction &I);
+
+ /// isGuaranteedToExecute - Check that the instruction is guaranteed to
+ /// execute.
+ ///
+ bool isGuaranteedToExecute(Instruction &I);
+
+ /// pointerInvalidatedByLoop - Return true if the body of this loop may
+ /// store into the memory location pointed to by V.
+ ///
+ bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const MDNode *TBAAInfo) {
+ // Check to see if any of the basic blocks in CurLoop invalidate *V.
+ return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
+ }
+
+ bool canSinkOrHoistInst(Instruction &I);
+ bool isNotUsedInLoop(Instruction &I);
+
+ void PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts);
+ };
+}
+
+char LICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ Changed = false;
+
+ // Get our Loop and Alias Analysis information...
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ CurAST = new AliasSetTracker(*AA);
+ // Collect Alias info from subloops.
+ for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+ LoopItr != LoopItrE; ++LoopItr) {
+ Loop *InnerL = *LoopItr;
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
+
+ // What if InnerLoop was modified by other passes ?
+ CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
+ }
+
+ CurLoop = L;
+
+ // Get the preheader block to move instructions into...
+ Preheader = L->getLoopPreheader();
+
+ // Loop over the body of this loop, looking for calls, invokes, and stores.
+ // Because subloops have already been incorporated into AST, we skip blocks in
+ // subloops.
+ //
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
+ CurAST->add(*BB); // Incorporate the specified basic block
+ }
+
+ MayThrow = false;
+ // TODO: We've already searched for instructions which may throw in subloops.
+ // We may want to reuse this information.
+ for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
+ (BB != BBE) && !MayThrow ; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+ (I != E) && !MayThrow; ++I)
+ MayThrow |= I->mayThrow();
+
+ // We want to visit all of the instructions in this loop... that are not parts
+ // of our subloops (they have already had their invariants hoisted out of
+ // their loop, into this loop, so there is no need to process the BODIES of
+ // the subloops).
+ //
+ // Traverse the body of the loop in depth first order on the dominator tree so
+ // that we are guaranteed to see definitions before we see uses. This allows
+ // us to sink instructions in one pass, without iteration. After sinking
+ // instructions, we perform another pass to hoist them out of the loop.
+ //
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
+
+ // Now that all loop invariants have been removed from the loop, promote any
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I, ExitBlocks, InsertPts);
+ }
+
+ // Clear out loops state information for the next iteration
+ CurLoop = 0;
+ Preheader = 0;
+
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
+ return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // We are processing blocks in reverse dfo, so process children first.
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ SinkRegion(Children[i]);
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (inSubLoop(BB)) return;
+
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I, TLI)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can sink this instruction to the exit blocks
+ // of the loop. We can do this if the all users of the instruction are
+ // outside of the loop. In this case, it doesn't even matter if the
+ // operands of the instruction are loop invariant.
+ //
+ if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+ ++II;
+ sink(I);
+ }
+ }
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (!inSubLoop(BB))
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ Instruction &I = *II++;
+
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
+ // Try hoisting the instruction out to the preheader. We can only do this
+ // if all of the operands of the instruction are loop invariant and if it
+ // is safe to hoist the instruction.
+ //
+ if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
+ isSafeToExecuteUnconditionally(I))
+ hoist(I);
+ }
+
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+ // Loads have extra constraints we have to verify before we can hoist them.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (!LI->isUnordered())
+ return false; // Don't hoist volatile/atomic loads!
+
+ // Loads from constant memory are always safe to move, even if they end up
+ // in the same alias set as something that ends up being modified.
+ if (AA->pointsToConstantMemory(LI->getOperand(0)))
+ return true;
+ if (LI->getMetadata("invariant.load"))
+ return true;
+
+ // Don't hoist loads which have may-aliased stores in loop.
+ uint64_t Size = 0;
+ if (LI->getType()->isSized())
+ Size = AA->getTypeStoreSize(LI->getType());
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+ LI->getMetadata(LLVMContext::MD_tbaa));
+ } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Don't sink or hoist dbg info; it's legal, but not useful.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Handle simple cases by querying alias analysis.
+ AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return true;
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ // If this call only reads from memory and there are no writes to memory
+ // in the loop, we can hoist or sink the call as appropriate.
+ bool FoundMod = false;
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I) {
+ AliasSet &AS = *I;
+ if (!AS.isForwardingAliasSet() && AS.isMod()) {
+ FoundMod = true;
+ break;
+ }
+ }
+ if (!FoundMod) return true;
+ }
+
+ // FIXME: This should use mod/ref information to see if we can hoist or
+ // sink the call.
+
+ return false;
+ }
+
+ // Only these instructions are hoistable/sinkable.
+ if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
+ !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
+ !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
+ !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
+ !isa<InsertValueInst>(I))
+ return false;
+
+ return isSafeToExecuteUnconditionally(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop. If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // PHI node uses occur in predecessor blocks!
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I)
+ if (CurLoop->contains(PN->getIncomingBlock(i)))
+ return false;
+ } else if (CurLoop->contains(User)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumSunk;
+ Changed = true;
+
+ // The case where there is only a single exit node of this loop is common
+ // enough that we handle it as a special (more efficient) case. It is more
+ // efficient to handle because there are no PHI nodes that need to be placed.
+ if (ExitBlocks.size() == 1) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+ // Instruction is not used, just delete it.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ } else {
+ // Move the instruction to the start of the exit block, after any PHI
+ // nodes in it.
+ I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
+ }
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
+ // The instruction is actually dead if there ARE NO exit blocks.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!DT->dominates(InstOrigBB, ExitBlock))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
+ }
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
+ }
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
+ << I << "\n");
+
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumHoisted;
+ Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+ // If it is not a trapping instruction, it is always safe to hoist.
+ if (isSafeToSpeculativelyExecute(&Inst))
+ return true;
+
+ return isGuaranteedToExecute(Inst);
+}
+
+bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (MayThrow)
+ return false;
+
+ // Otherwise we have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ return true;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // Verify that the block dominates each of the exit blocks of the loop.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ return false;
+
+ // As a degenerate case, if the loop is statically infinite then we haven't
+ // proven anything since there are no exit blocks.
+ if (ExitBlocks.empty())
+ return false;
+
+ return true;
+}
+
+namespace {
+ class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ SmallVectorImpl<Instruction*> &LoopInsertPts;
+ AliasSetTracker &AST;
+ DebugLoc DL;
+ int Alignment;
+ MDNode *TBAATag;
+ public:
+ LoopPromoter(Value *SP,
+ const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ SmallPtrSet<Value*, 4> &PMA,
+ SmallVectorImpl<BasicBlock*> &LEB,
+ SmallVectorImpl<Instruction*> &LIP,
+ AliasSetTracker &ast, DebugLoc dl, int alignment,
+ MDNode *TBAATag)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+ AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
+
+ virtual void doExtraRewritesBeforeFinalDeletion() const {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = LoopInsertPts[i];
+ StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ NewSI->setAlignment(Alignment);
+ NewSI->setDebugLoc(DL);
+ if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+ }
+
+ virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ virtual void instructionDeleted(Instruction *I) const {
+ AST.deleteValue(I);
+ }
+ };
+} // end anon namespace
+
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop. We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant.
+///
+void LICM::PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
+
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
+ //
+ // for () { if (c) *P += 1; }
+ //
+ // into:
+ //
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // We start with an alignment of one and try to find instructions that allow
+ // us to prove better alignment.
+ unsigned Alignment = 1;
+ MDNode *TBAATag = 0;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes. While we are at it, collect alignment and TBAA info.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
+ UI != UE; ++UI) {
+ // Ignore instructions that are outside the loop.
+ Instruction *Use = dyn_cast<Instruction>(*UI);
+ if (!Use || !CurLoop->contains(Use))
+ continue;
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
+ assert(!load->isVolatile() && "AST broken");
+ if (!load->isSimple())
+ return;
+ } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
+ // Stores *of* the pointer are not interesting, only stores *to* the
+ // pointer.
+ if (Use->getOperand(1) != ASIV)
+ continue;
+ assert(!store->isVolatile() && "AST broken");
+ if (!store->isSimple())
+ return;
+
+ // Note that we only check GuaranteedToExecute inside the store case
+ // so that we do not introduce stores where they did not exist before
+ // (which would break the LLVM concurrency model).
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ unsigned InstAlignment = store->getAlignment();
+ if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
+ if (isGuaranteedToExecute(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isGuaranteedToExecute(*Use);
+
+ } else
+ return; // Not a load or store.
+
+ // Merge the TBAA tags.
+ if (LoopUses.empty()) {
+ // On the first load/store, just take its TBAA tag.
+ TBAATag = Use->getMetadata(LLVMContext::MD_tbaa);
+ } else if (TBAATag) {
+ TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+ Use->getMetadata(LLVMContext::MD_tbaa));
+ }
+
+ LoopUses.push_back(Use);
+ }
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
+
+ // Grab a debug location for the inserted loads/stores; given that the
+ // inserted loads/stores have little relation to the original loads/stores,
+ // this code just arbitrarily picks a location from one, since any debug
+ // location is better than none.
+ DebugLoc DL = LoopUses[0]->getDebugLoc();
+
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.resize(ExitBlocks.size());
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ }
+
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ InsertPts, *CurAST, DL, Alignment, TBAATag);
+
+ // Set up the preheader to have a definition of the value. It is the live-out
+ // value from the preheader that uses in the loop will use.
+ LoadInst *PreheaderLoad =
+ new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ PreheaderLoad->setAlignment(Alignment);
+ PreheaderLoad->setDebugLoc(DL);
+ if (TBAATag) PreheaderLoad->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Rewrite all the loads in the loop and remember all the definitions from
+ // stores in the loop.
+ Promoter.run(LoopUses);
+
+ // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+ if (PreheaderLoad->use_empty())
+ PreheaderLoad->eraseFromParent();
+}
+
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->deleteValue(V);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 000000000000..0b62050b17a0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,250 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+ class LoopDeletion : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletion() : LoopPass(ID) {
+ initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ }
+
+ private:
+ bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
+ };
+}
+
+char LoopDeletion::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+
+Pass *llvm::createLoopDeletionPass() {
+ return new LoopDeletion();
+}
+
+/// isLoopDead - Determined if a loop is dead. This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::isLoopDead(Loop *L,
+ SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader) {
+ BasicBlock *exitBlock = exitBlocks[0];
+
+ // Make sure that all PHI entries coming from the loop are loop invariant.
+ // Because the code is in LCSSA form, any values used outside of the loop
+ // must pass through a PHI in the exit block, meaning that this check is
+ // sufficient to guarantee that no loop-variant values are used outside
+ // of the loop.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+ // Make sure all exiting blocks produce the same incoming value for the exit
+ // block. If there are different incoming values for different exiting
+ // blocks, then it is impossible to statically determine which value should
+ // be used.
+ for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
+ if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+ return false;
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
+ return false;
+
+ ++BI;
+ }
+
+ // Make sure that no instructions in the block have potential side-effects.
+ // This includes instructions that could write to memory, and loads that are
+ // marked volatile. This could be made more aggressive by using aliasing
+ // information to identify readonly and readnone calls.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI) {
+ if (BI->mayHaveSideEffects())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time. Note
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // We can only remove the loop if there is a preheader that we can
+ // branch from after removing it.
+ BasicBlock *preheader = L->getLoopPreheader();
+ if (!preheader)
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
+ // We can't remove loops that contain subloops. If the subloops were dead,
+ // they would already have been removed in earlier executions of this pass.
+ if (L->begin() != L->end())
+ return false;
+
+ SmallVector<BasicBlock*, 4> exitingBlocks;
+ L->getExitingBlocks(exitingBlocks);
+
+ SmallVector<BasicBlock*, 4> exitBlocks;
+ L->getUniqueExitBlocks(exitBlocks);
+
+ // We require that the loop only have a single exit block. Otherwise, we'd
+ // be in the situation of needing to be able to solve statically which exit
+ // block will be branched to, or trying to preserve the branching logic in
+ // a loop invariant manner.
+ if (exitBlocks.size() != 1)
+ return false;
+
+ // Finally, we have to check that the loop really is dead.
+ bool Changed = false;
+ if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ return Changed;
+
+ // Don't remove loops for which we can't solve the trip count.
+ // They could be infinite, in which case we'd be changing program behavior.
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+ const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(S))
+ return Changed;
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ BasicBlock *exitBlock = exitBlocks[0];
+
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues. Don't
+ // mess with this unless you have good reason and know what you're doing.
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ SE.forgetLoop(L);
+
+ // Connect the preheader directly to the exit block.
+ TerminatorInst *TI = preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+ // Rewrite phis in the exit block to get their inputs from
+ // the preheader instead of the exiting block.
+ BasicBlock *exitingBlock = exitingBlocks[0];
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ int j = P->getBasicBlockIndex(exitingBlock);
+ assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+ P->setIncomingBlock(j, preheader);
+ for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+ P->removeIncomingValue(exitingBlocks[i]);
+ ++BI;
+ }
+
+ // Update the dominator tree and remove the instructions and blocks that will
+ // be deleted from the reference counting scheme.
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ SmallVector<DomTreeNode*, 8> ChildNodes;
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ // Move all of the block's children to be children of the preheader, which
+ // allows us to remove the domtree entry for the block.
+ ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ DE = ChildNodes.end(); DI != DE; ++DI) {
+ DT.changeImmediateDominator(*DI, DT[preheader]);
+ }
+
+ ChildNodes.clear();
+ DT.eraseNode(*LI);
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ (*LI)->dropAllReferences();
+ }
+
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove its
+ // entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ (*LI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+ LoopInfo &loopInfo = getAnalysis<LoopInfo>();
+ SmallPtrSet<BasicBlock*, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+ E = blocks.end(); I != E; ++I)
+ loopInfo.removeBlock(*I);
+
+ // The last step is to inform the loop pass manager that we've
+ // eliminated this loop.
+ LPM.deleteLoopFromQueue(L);
+ Changed = true;
+
+ ++NumDeleted;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 000000000000..8258719a0200
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,1138 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form. In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+// memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+// fpowi
+// Future integer operation idioms to recognize:
+// ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set. It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior. Negative strides *do* happen for memset/memcpy loops.
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+
+ class LoopIdiomRecognize;
+
+ /// This class defines some utility functions for loop idiom recognization.
+ class LIRUtil {
+ public:
+ /// Return true iff the block contains nothing but an uncondition branch
+ /// (aka goto instruction).
+ static bool isAlmostEmpty(BasicBlock *);
+
+ static BranchInst *getBranch(BasicBlock *BB) {
+ return dyn_cast<BranchInst>(BB->getTerminator());
+ }
+
+ /// Return the condition of the branch terminating the given basic block.
+ static Value *getBrCondtion(BasicBlock *);
+
+ /// Derive the precondition block (i.e the block that guards the loop
+ /// preheader) from the given preheader.
+ static BasicBlock *getPrecondBb(BasicBlock *PreHead);
+ };
+
+ /// This class is to recoginize idioms of population-count conducted in
+ /// a noncountable loop. Currently it only recognizes this pattern:
+ /// \code
+ /// while(x) {cnt++; ...; x &= x - 1; ...}
+ /// \endcode
+ class NclPopcountRecognize {
+ LoopIdiomRecognize &LIR;
+ Loop *CurLoop;
+ BasicBlock *PreCondBB;
+
+ typedef IRBuilder<> IRBuilderTy;
+
+ public:
+ explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
+ bool recognize();
+
+ private:
+ /// Take a glimpse of the loop to see if we need to go ahead recoginizing
+ /// the idiom.
+ bool preliminaryScreen();
+
+ /// Check if the given conditional branch is based on the comparison
+ /// beween a variable and zero, and if the variable is non-zero, the
+ /// control yeilds to the loop entry. If the branch matches the behavior,
+ /// the variable involved in the comparion is returned. This function will
+ /// be called to see if the precondition and postcondition of the loop
+ /// are in desirable form.
+ Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
+
+ /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
+ /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+ /// is set to the corresponding phi node. 3) \p Var is set to the value
+ /// whose population bits are being counted.
+ bool detectIdiom
+ (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
+
+ /// Insert ctpop intrinsic function and some obviously dead instructions.
+ void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
+
+ /// Create llvm.ctpop.* intrinsic function.
+ CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
+ };
+
+ class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ const DataLayout *TD;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
+ public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+ const DataLayout *getDataLayout() {
+ return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
+ }
+
+ DominatorTree *getDominatorTree() {
+ return DT ? DT : (DT=&getAnalysis<DominatorTree>());
+ }
+
+ ScalarEvolution *getScalarEvolution() {
+ return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
+ }
+
+ TargetLibraryInfo *getTargetLibraryInfo() {
+ return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
+ }
+
+ const TargetTransformInfo *getTargetTransformInfo() {
+ return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
+ }
+
+ Loop *getLoop() const { return CurLoop; }
+
+ private:
+ bool runOnNoncountableLoop();
+ bool runOnCountableLoop();
+ };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// deleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+
+ // Before we touch this instruction, remove it from SE!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // SCEV.
+ SE.forgetValue(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ } while (!NowDeadInsts.empty());
+}
+
+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (isInstructionTriviallyDead(I, TLI))
+ deleteDeadInstruction(I, SE, TLI);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of LIRUtil
+//
+//===----------------------------------------------------------------------===//
+
+// This fucntion will return true iff the given block contains nothing but goto.
+// A typical usage of this function is to check if the preheader fucntion is
+// "almost" empty such that generated intrinsic function can be moved across
+// preheader and to be placed at the end of the preconditiona block without
+// concerning of breaking data dependence.
+bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
+ if (BranchInst *Br = getBranch(BB)) {
+ return Br->isUnconditional() && BB->size() == 1;
+ }
+ return false;
+}
+
+Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
+ BranchInst *Br = getBranch(BB);
+ return Br ? Br->getCondition() : 0;
+}
+
+BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
+ if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
+ BranchInst *Br = getBranch(BB);
+ return Br && Br->isConditional() ? BB : 0;
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of NclPopcountRecognize
+//
+//===----------------------------------------------------------------------===//
+
+NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
+ LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+}
+
+bool NclPopcountRecognize::preliminaryScreen() {
+ const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
+ if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
+ return false;
+
+ // Counting population are usually conducted by few arithmetic instrutions.
+ // Such instructions can be easilly "absorbed" by vacant slots in a
+ // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+ // in a compact loop.
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ BasicBlock *LoopBody = *(CurLoop->block_begin());
+ if (LoopBody->size() >= 20) {
+ // The loop is too big, bail out.
+ return false;
+ }
+
+ // It should have a preheader containing nothing but a goto instruction.
+ BasicBlock *PreHead = CurLoop->getLoopPreheader();
+ if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+ return false;
+
+ // It should have a precondition block where the generated popcount instrinsic
+ // function will be inserted.
+ PreCondBB = LIRUtil::getPrecondBb(PreHead);
+ if (!PreCondBB)
+ return false;
+
+ return true;
+}
+
+Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
+ BasicBlock *LoopEntry) const {
+ if (!Br || !Br->isConditional())
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
+ if (!Cond)
+ return 0;
+
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpZero || !CmpZero->isZero())
+ return 0;
+
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
+ return Cond->getOperand(0);
+
+ return 0;
+}
+
+bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
+ PHINode *&CntPhi,
+ Value *&Var) const {
+ // Following code tries to detect this idiom:
+ //
+ // if (x0 != 0)
+ // goto loop-exit // the precondition of the loop
+ // cnt0 = init-val;
+ // do {
+ // x1 = phi (x0, x2);
+ // cnt1 = phi(cnt0, cnt2);
+ //
+ // cnt2 = cnt1 + 1;
+ // ...
+ // x2 = x1 & (x1 - 1);
+ // ...
+ // } while(x != 0);
+ //
+ // loop-exit:
+ //
+
+ // step 1: Check to see if the look-back branch match this pattern:
+ // "if (a!=0) goto loop-entry".
+ BasicBlock *LoopEntry;
+ Instruction *DefX2, *CountInst;
+ Value *VarX1, *VarX0;
+ PHINode *PhiX, *CountPhi;
+
+ DefX2 = CountInst = 0;
+ VarX1 = VarX0 = 0;
+ PhiX = CountPhi = 0;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ {
+ if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
+ DefX2 = dyn_cast<Instruction>(T);
+ else
+ return false;
+ }
+
+ // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+ {
+ if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+ return false;
+
+ BinaryOperator *SubOneOp;
+
+ if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+ VarX1 = DefX2->getOperand(1);
+ else {
+ VarX1 = DefX2->getOperand(0);
+ SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+ }
+ if (!SubOneOp)
+ return false;
+
+ Instruction *SubInst = cast<Instruction>(SubOneOp);
+ ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+ if (!Dec ||
+ !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+ (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
+ return false;
+ }
+ }
+
+ // step 3: Check the recurrence of variable X
+ {
+ PhiX = dyn_cast<PHINode>(VarX1);
+ if (!PhiX ||
+ (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+ return false;
+ }
+ }
+
+ // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+ {
+ CountInst = NULL;
+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
+ IterE = LoopEntry->end(); Iter != IterE; Iter++) {
+ Instruction *Inst = Iter;
+ if (Inst->getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ if (!Inc || !Inc->isOne())
+ continue;
+
+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+ if (!Phi || Phi->getParent() != LoopEntry)
+ continue;
+
+ // Check if the result of the instruction is live of the loop.
+ bool LiveOutLoop = false;
+ for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+ I != E; I++) {
+ if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
+ LiveOutLoop = true; break;
+ }
+ }
+
+ if (LiveOutLoop) {
+ CountInst = Inst;
+ CountPhi = Phi;
+ break;
+ }
+ }
+
+ if (!CountInst)
+ return false;
+ }
+
+ // step 5: check if the precondition is in this form:
+ // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+ {
+ BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+ Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
+ if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+ return false;
+
+ CntInst = CountInst;
+ CntPhi = CountPhi;
+ Var = T;
+ }
+
+ return true;
+}
+
+void NclPopcountRecognize::transform(Instruction *CntInst,
+ PHINode *CntPhi, Value *Var) {
+
+ ScalarEvolution *SE = LIR.getScalarEvolution();
+ TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
+ BasicBlock *PreHead = CurLoop->getLoopPreheader();
+ BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+ const DebugLoc DL = CntInst->getDebugLoc();
+
+ // Assuming before transformation, the loop is following:
+ // if (x) // the precondition
+ // do { cnt++; x &= x - 1; } while(x);
+
+ // Step 1: Insert the ctpop instruction at the end of the precondition block
+ IRBuilderTy Builder(PreCondBr);
+ Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+ {
+ PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+ NewCount = PopCntZext =
+ Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+ if (NewCount != PopCnt)
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+ // TripCnt is exactly the number of iterations the loop has
+ TripCnt = NewCount;
+
+ // If the popoulation counter's initial value is not zero, insert Add Inst.
+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero()) {
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+ }
+ }
+
+ // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
+ // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
+ // function would be partial dead code, and downstream passes will drag
+ // it back from the precondition block to the preheader.
+ {
+ ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+ Value *Opnd0 = PopCntZext;
+ Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+ if (PreCond->getOperand(0) != Var)
+ std::swap(Opnd0, Opnd1);
+
+ ICmpInst *NewPreCond =
+ cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+ PreCond->replaceAllUsesWith(NewPreCond);
+
+ deleteDeadInstruction(PreCond, *SE, TLI);
+ }
+
+ // Step 3: Note that the population count is exactly the trip count of the
+ // loop in question, which enble us to to convert the loop from noncountable
+ // loop into a countable one. The benefit is twofold:
+ //
+ // - If the loop only counts population, the entire loop become dead after
+ // the transformation. It is lots easier to prove a countable loop dead
+ // than to prove a noncountable one. (In some C dialects, a infite loop
+ // isn't dead even if it computes nothing useful. In general, DCE needs
+ // to prove a noncountable loop finite before safely delete it.)
+ //
+ // - If the loop also performs something else, it remains alive.
+ // Since it is transformed to countable form, it can be aggressively
+ // optimized by some optimizations which are in general not applicable
+ // to a noncountable loop.
+ //
+ // After this step, this loop (conceptually) would look like following:
+ // newcnt = __builtin_ctpop(x);
+ // t = newcnt;
+ // if (x)
+ // do { cnt++; x &= x-1; t--) } while (t > 0);
+ BasicBlock *Body = *(CurLoop->block_begin());
+ {
+ BranchInst *LbBr = LIRUtil::getBranch(Body);
+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+ Type *Ty = TripCnt->getType();
+
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
+
+ Builder.SetInsertPoint(LbCond);
+ Value *Opnd1 = cast<Value>(TcPhi);
+ Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
+ Instruction *TcDec =
+ cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
+
+ TcPhi->addIncoming(TripCnt, PreHead);
+ TcPhi->addIncoming(TcDec, Body);
+
+ CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
+ CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+ LbCond->setPredicate(Pred);
+ LbCond->setOperand(0, TcDec);
+ LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
+ }
+
+ // Step 4: All the references to the original population counter outside
+ // the loop are replaced with the NewCount -- the value returned from
+ // __builtin_ctpop().
+ {
+ SmallVector<Value *, 4> CntUses;
+ for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
+ I != E; I++) {
+ if (cast<Instruction>(*I)->getParent() != Body)
+ CntUses.push_back(*I);
+ }
+ for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
+ (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
+ }
+ }
+
+ // step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+ SE->forgetLoop(CurLoop);
+}
+
+CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
+ Value *Val, DebugLoc DL) {
+ Value *Ops[] = { Val };
+ Type *Tys[] = { Val->getType() };
+
+ Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+
+ return CI;
+}
+
+/// recognize - detect population count idiom in a non-countable loop. If
+/// detected, transform the relevant code to popcount intrinsic function
+/// call, and return true; otherwise, return false.
+bool NclPopcountRecognize::recognize() {
+
+ if (!LIR.getTargetTransformInfo())
+ return false;
+
+ LIR.getScalarEvolution();
+
+ if (!preliminaryScreen())
+ return false;
+
+ Instruction *CntInst;
+ PHINode *CntPhi;
+ Value *Val;
+ if (!detectIdiom(CntInst, CntPhi, Val))
+ return false;
+
+ transform(CntInst, CntPhi, Val);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of LoopIdiomRecognize
+//
+//===----------------------------------------------------------------------===//
+
+bool LoopIdiomRecognize::runOnCountableLoop() {
+ const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
+ if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+ // If this loop executes exactly one time, then it should be peeled, not
+ // optimized by this pass.
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ if (BECst->getValue()->getValue() == 0)
+ return false;
+
+ // We require target data for now.
+ if (!getDataLayout())
+ return false;
+
+ // set DT
+ (void)getDominatorTree();
+
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // set TLI
+ (void)getTargetLibraryInfo();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << CurLoop->getHeader()->getParent()->getName()
+ << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+
+ bool MadeChange = false;
+ // Scan all the blocks in the loop that are not in subloops.
+ for (Loop::block_iterator BI = CurLoop->block_begin(),
+ E = CurLoop->block_end(); BI != E; ++BI) {
+ // Ignore blocks in subloops.
+ if (LI.getLoopFor(*BI) != CurLoop)
+ continue;
+
+ MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ }
+ return MadeChange;
+}
+
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ NclPopcountRecognize Popcount(*this);
+ if (Popcount.recognize())
+ return true;
+
+ return false;
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ CurLoop = L;
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
+ return false;
+
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
+ return false;
+
+ SE = &getAnalysis<ScalarEvolution>();
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
+ return runOnNoncountableLoop();
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, ExitBlocks[i]))
+ return false;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopStore(SI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the store invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (!SI->isSimple()) return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+ if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ // TODO: Could also handle negative stride here someday, that will require
+ // the validity check in mayLoopAccessLocation to be updated though.
+ // Enable this to print exact negative strides.
+ if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+ dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+ dbgs() << "BB: " << *SI->getParent();
+ }
+
+ return false;
+ }
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
+
+ // If the stored value is a strided load in the same loop with the same stride
+ // this this may be transformable into a memcpy. This kicks in for stuff like
+ // for (i) A[i] = B[i];
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+ if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ return true;
+ }
+ //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+ return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ // If we're not allowed to hack on memset, we fail.
+ if (!TLI->has(LibFunc::memset))
+ return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+ Loop *L, const SCEV *BECount,
+ unsigned StoreSize, AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
+ // Get the location that may be stored across the loop. Since the access is
+ // strided positively through memory, we say that the modified location starts
+ // at the pointer and has infinite size.
+ uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access size
+ // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI)
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore &&
+ (AA.getModRefInfo(I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (TD.isBigEndian())
+ return 0;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16) return 0;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16) return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16/Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store.
+ return false;
+ }
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, "loop-idiom");
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for any overlap by generating the
+ // base pointer and checking the region.
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+
+ if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(BasePtr, *SE, TLI);
+ return false;
+ }
+
+ // Okay, everything looks good, insert the memset.
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ CallInst *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ NewCall->setDebugLoc(TheStore->getDebugLoc());
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ deleteDeadInstruction(TheStore, *SE, TLI);
+ ++NumMemSet;
+ return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount) {
+ // If we're not allowed to form memcpy, we fail.
+ if (!TLI->has(LibFunc::memcpy))
+ return false;
+
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, "loop-idiom");
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write the memory region we're storing to. This includes the load that
+ // feeds the stores. Check for an alias by generating the base address and
+ // checking everything.
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+ return false;
+ }
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ Value *LoadBasePtr =
+ Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+ return false;
+ }
+
+ // Okay, everything is safe, we can transform this!
+
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ CallInst *NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
+ NewCall->setDebugLoc(SI->getDebugLoc());
+
+ DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ deleteDeadInstruction(SI, *SE, TLI);
+ ++NumMemCpy;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 000000000000..a23860aad80e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,175 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+ class LoopInstSimplify : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplify() : LoopPass(ID) {
+ initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop*, LPPassManager&);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved("scalar-evolution");
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+ // The bit we are stealing from the pointer represents whether this basic
+ // block is the header of a subloop, in which case we only process its phis.
+ typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ SmallVector<WorklistItem, 16> VisitStack;
+ SmallPtrSet<BasicBlock*, 32> Visited;
+
+ bool Changed = false;
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ VisitStack.clear();
+ Visited.clear();
+
+ VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+ while (!VisitStack.empty()) {
+ WorklistItem Item = VisitStack.pop_back_val();
+ BasicBlock *BB = Item.getPointer();
+ bool IsSubloopHeader = Item.getInt();
+
+ // Simplify instructions in the current basic block.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = BI++;
+
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't bother simplifying unused instructions.
+ if (!I->use_empty()) {
+ Value *V = SimplifyInstruction(I, TD, TLI, DT);
+ if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+
+ I->replaceAllUsesWith(V);
+ LocalChanged = true;
+ ++NumSimplified;
+ }
+ }
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+
+ if (IsSubloopHeader && !isa<PHINode>(I))
+ break;
+ }
+
+ // Add all successors to the worklist, except for loop exit blocks and the
+ // bodies of subloops. We visit the headers of loops so that we can process
+ // their phis, but we contract the rest of the subloop body and only follow
+ // edges leading back to the original loop.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ BasicBlock *SuccBB = *SI;
+ if (!Visited.insert(SuccBB))
+ continue;
+
+ const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB
+ && L->contains(SuccLoop)) {
+ VisitStack.push_back(WorklistItem(SuccBB, true));
+
+ SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+ for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+ BasicBlock *ExitBB = SubLoopExitBlocks[i];
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ VisitStack.push_back(WorklistItem(ExitBB, false));
+ }
+
+ continue;
+ }
+
+ bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+ ExitBlocks.end(), SuccBB);
+ if (IsExitBlock)
+ continue;
+
+ VisitStack.push_back(WorklistItem(SuccBB, false));
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 000000000000..e98ae953e532
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,508 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+ class LoopRotate : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate() : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ }
+
+ // LCSSA form makes instruction renaming easier.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L);
+
+ private:
+ LoopInfo *LI;
+ const TargetTransformInfo *TTI;
+ };
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// the loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LI = &getAnalysis<LoopInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ simplifyLoopLatch(L);
+
+ // One loop can be rotated multiple times.
+ bool MadeChange = false;
+ while (rotateLoop(L))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+}
+
+/// Determine whether the instructions in this range my be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ bool seenIncrement = false;
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the cast of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+void LoopRotate::simplifyLoopLatch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
+ return;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
+ // If the loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (BI == 0 || BI->isUnconditional())
+ return false;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return false;
+
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ return false;
+
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
+ {
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+ if (Metrics.notDuplicatable) {
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+ << " instructions: "; L->dump());
+ return false;
+ }
+ if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ return false;
+ }
+
+ // Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (OrigPreheader == 0)
+ return false;
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated.
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ ValueToValueMapTy ValueMap;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
+ !isa<AllocaInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ delete C;
+ ValueMap[Inst] = V;
+ } else {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[Inst] = C;
+ }
+ }
+
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ TerminatorInst *TI = OrigHeader->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+ != NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Update DominatorTree to reflect the CFG change we just made. Then split
+ // edges as necessary to preserve LoopSimplify form.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Everything that was dominated by the old loop header is now dominated
+ // by the original loop preheader. Conceptually the header was merged
+ // into the preheader, even though we reuse the actual block as a new
+ // loop latch.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+ DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+ assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
+
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ }
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor.
+ BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+ ExitSplit->moveBefore(Exit);
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+
+ // Brute force incremental dominator tree update. Call
+ // findNearestCommonDominator on all CFG predecessors of each child of the
+ // original header.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ bool Changed;
+ do {
+ Changed = false;
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+ DomTreeNode *Node = HeaderChildren[I];
+ BasicBlock *BB = Node->getBlock();
+
+ pred_iterator PI = pred_begin(BB);
+ BasicBlock *NearestDom = *PI;
+ for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+ NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+
+ // Remember if this changes the DomTree.
+ if (Node->getIDom()->getBlock() != NearestDom) {
+ DT->changeImmediateDominator(BB, NearestDom);
+ Changed = true;
+ }
+ }
+
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
+ } while (Changed);
+ }
+ }
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, this);
+
+ DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
+ ++NumRotated;
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 000000000000..73e44d7edf5e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,4835 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+// %i = phi [ 0, %entry ], [ %i.next, %latch ]
+// ...
+// %i.next = add %i, 1
+// %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+// of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+// smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+// multiple base registers, or as the increment expression in an addrec),
+// we may not actually need both reg and (-1 * reg) in registers; the
+// negation can be implemented by using a sub instead of an add. The
+// lack of support for taking this into consideration when making
+// register pressure decisions is partly worked around by the "Special"
+// use kind.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// bail out. This threshold is far beyond the number of users that LSR can
+/// conceivably solve, so it should not affect generated code, but catches the
+/// worst cases before LSR burns too much compile time and stack space.
+static const unsigned MaxIVUsers = 200;
+
+// Temporary flag to cleanup congruent phis after LSR phi expansion.
+// It's currently disabled until we can determine whether it's truly useful or
+// not. The flag should be removed after the v3.0 release.
+// This is now needed for ivchains.
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::init(true),
+ cl::desc("Enable LSR phi elimination"));
+
+#ifndef NDEBUG
+// Stress test IV chain generation.
+static cl::opt<bool> StressIVChain(
+ "stress-ivchain", cl::Hidden, cl::init(false),
+ cl::desc("Stress test LSR IV chains"));
+#else
+static bool StressIVChain = false;
+#endif
+
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+ /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// a particular register.
+ SmallBitVector UsedByIndices;
+
+ RegSortData() {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+ OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void RegSortData::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+namespace {
+
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+ typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+
+ RegUsesTy RegUsesMap;
+ SmallVector<const SCEV *, 16> RegSequence;
+
+public:
+ void CountRegister(const SCEV *Reg, size_t LUIdx);
+ void DropRegister(const SCEV *Reg, size_t LUIdx);
+ void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+
+ bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+ const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+ void clear();
+
+ typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+ typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+ iterator begin() { return RegSequence.begin(); }
+ iterator end() { return RegSequence.end(); }
+ const_iterator begin() const { return RegSequence.begin(); }
+ const_iterator end() const { return RegSequence.end(); }
+};
+
+}
+
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+ std::pair<RegUsesTy::iterator, bool> Pair =
+ RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
+ RegSortData &RSD = Pair.first->second;
+ if (Pair.second)
+ RegSequence.push_back(Reg);
+ RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+ RSD.UsedByIndices.set(LUIdx);
+}
+
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+ RegUsesTy::iterator It = RegUsesMap.find(Reg);
+ assert(It != RegUsesMap.end());
+ RegSortData &RSD = It->second;
+ assert(RSD.UsedByIndices.size() > LUIdx);
+ RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+ assert(LUIdx <= LastLUIdx);
+
+ // Update RegUses. The data structure is not optimized for this purpose;
+ // we must iterate through it and update each of the bit vectors.
+ for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+ I != E; ++I) {
+ SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ if (LUIdx < UsedByIndices.size())
+ UsedByIndices[LUIdx] =
+ LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+ UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+ }
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ int i = UsedByIndices.find_first();
+ if (i == -1) return false;
+ if ((size_t)i != LUIdx) return true;
+ return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ assert(I != RegUsesMap.end() && "Unknown register!");
+ return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+ RegUsesMap.clear();
+ RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+ /// Global base address used for complex addressing.
+ GlobalValue *BaseGV;
+
+ /// Base offset for complex addressing.
+ int64_t BaseOffset;
+
+ /// Whether any complex addressing has a base register.
+ bool HasBaseReg;
+
+ /// The scale of any complex addressing.
+ int64_t Scale;
+
+ /// BaseRegs - The list of "base" registers for this use. When this is
+ /// non-empty,
+ SmallVector<const SCEV *, 4> BaseRegs;
+
+ /// ScaledReg - The 'scaled' register for this use. This should be non-null
+ /// when Scale is not zero.
+ const SCEV *ScaledReg;
+
+ /// UnfoldedOffset - An additional constant offset which added near the
+ /// use. This requires a temporary register, but the offset itself can
+ /// live in an add immediate field rather than a register.
+ int64_t UnfoldedOffset;
+
+ Formula()
+ : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+ UnfoldedOffset(0) {}
+
+ void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+
+ unsigned getNumRegs() const;
+ Type *getType() const;
+
+ void DeleteBaseReg(const SCEV *&S);
+
+ bool referencesReg(const SCEV *S) const;
+ bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+ SmallVectorImpl<const SCEV *> &Good,
+ SmallVectorImpl<const SCEV *> &Bad,
+ ScalarEvolution &SE) {
+ // Collect expressions which properly dominate the loop header.
+ if (SE.properlyDominates(S, L->getHeader())) {
+ Good.push_back(S);
+ return;
+ }
+
+ // Look at add operands.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ DoInitialMatch(*I, L, Good, Bad, SE);
+ return;
+ }
+
+ // Look at addrec operands.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ if (!AR->getStart()->isZero()) {
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
+ DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+ AR->getStepRecurrence(SE),
+ // FIXME: AR->getNoWrapFlags()
+ AR->getLoop(), SCEV::FlagAnyWrap),
+ L, Good, Bad, SE);
+ return;
+ }
+
+ // Handle a multiplication by -1 (negation) if it didn't fold.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+ if (Mul->getOperand(0)->isAllOnesValue()) {
+ SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *NewMul = SE.getMulExpr(Ops);
+
+ SmallVector<const SCEV *, 4> MyGood;
+ SmallVector<const SCEV *, 4> MyBad;
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
+ const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+ SE.getEffectiveSCEVType(NewMul->getType())));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+ E = MyGood.end(); I != E; ++I)
+ Good.push_back(SE.getMulExpr(NegOne, *I));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+ E = MyBad.end(); I != E; ++I)
+ Bad.push_back(SE.getMulExpr(NegOne, *I));
+ return;
+ }
+
+ // Ok, we can't do anything interesting. Just stuff the whole thing into a
+ // register and hope for the best.
+ Bad.push_back(S);
+}
+
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+ SmallVector<const SCEV *, 4> Good;
+ SmallVector<const SCEV *, 4> Bad;
+ DoInitialMatch(S, L, Good, Bad, SE);
+ if (!Good.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Good);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ HasBaseReg = true;
+ }
+ if (!Bad.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Bad);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ HasBaseReg = true;
+ }
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+ return !!ScaledReg + BaseRegs.size();
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+Type *Formula::getType() const {
+ return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+ ScaledReg ? ScaledReg->getType() :
+ BaseGV ? BaseGV->getType() :
+ 0;
+}
+
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+ if (&S != &BaseRegs.back())
+ std::swap(S, BaseRegs.back());
+ BaseRegs.pop_back();
+}
+
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+ return S == ScaledReg ||
+ std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
+
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const {
+ if (ScaledReg)
+ if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+ return true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I)
+ if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+ return true;
+ return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+ bool First = true;
+ if (BaseGV) {
+ if (!First) OS << " + "; else First = false;
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ }
+ if (BaseOffset != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << BaseOffset;
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I) {
+ if (!First) OS << " + "; else First = false;
+ OS << "reg(" << **I << ')';
+ }
+ if (HasBaseReg && BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: HasBaseReg**";
+ } else if (!HasBaseReg && !BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: !HasBaseReg**";
+ }
+ if (Scale != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << Scale << "*reg(";
+ if (ScaledReg)
+ OS << *ScaledReg;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ }
+ if (UnfoldedOffset != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << "imm(" << UnfoldedOffset << ')';
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void Formula::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given mul can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
+ Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
+ // Handle the trivial case, which works for any SCEV type.
+ if (LHS == RHS)
+ return SE.getConstant(LHS->getType(), 1);
+
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
+
+ // Check for a division of a constant by a constant.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+ if (!RC)
+ return 0;
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
+ return 0;
+ return SE.getConstant(LA.sdiv(RA));
+ }
+
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ // FlagNW is independent of the start value, step direction, and is
+ // preserved with smaller magnitude steps.
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
+ }
+ return 0;
+ }
+
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
+ }
+ return SE.getAddExpr(Ops);
+ }
+ return 0;
+ }
+
+ // Check for a multiply operand that we can pull RHS out of.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+ SmallVector<const SCEV *, 4> Ops;
+ bool Found = false;
+ for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+ I != E; ++I) {
+ const SCEV *S = *I;
+ if (!Found)
+ if (const SCEV *Q = getExactSDiv(S, RHS, SE,
+ IgnoreSignificantBits)) {
+ S = Q;
+ Found = true;
+ }
+ Ops.push_back(S);
+ }
+ return Found ? SE.getMulExpr(Ops) : 0;
+ }
+ return 0;
+ }
+
+ // Otherwise we don't know.
+ return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ S = SE.getConstant(C->getType(), 0);
+ return C->getValue()->getSExtValue();
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
+ return Result;
+ }
+ return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+ S = SE.getConstant(GV->getType(), 0);
+ return GV;
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
+ return Result;
+ }
+ return 0;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getArgOperand(0) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static Type *getAccessType(const Instruction *Inst) {
+ Type *AccessTy = Inst->getType();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ AccessTy = II->getArgOperand(0)->getType();
+ break;
+ }
+ }
+
+ // All pointers have the same requirements, so canonicalize them to an
+ // arbitrary pointer type to minimize variation.
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
+
+ return AccessTy;
+}
+
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
+/// Check if expanding this expression is likely to incur significant cost. This
+/// is tricky because SCEV doesn't track which expressions are actually computed
+/// by the current IR.
+///
+/// We currently allow expansion of IV increments that involve adds,
+/// multiplication by constants, and AddRecs from existing phis.
+///
+/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
+/// obvious multiple of the UDivExpr.
+static bool isHighCostExpansion(const SCEV *S,
+ SmallPtrSet<const SCEV*, 8> &Processed,
+ ScalarEvolution &SE) {
+ // Zero/One operand expressions
+ switch (S->getSCEVType()) {
+ case scUnknown:
+ case scConstant:
+ return false;
+ case scTruncate:
+ return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
+ Processed, SE);
+ case scZeroExtend:
+ return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ case scSignExtend:
+ return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ }
+
+ if (!Processed.insert(S))
+ return false;
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, Processed, SE))
+ return true;
+ }
+ return false;
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ if (Mul->getNumOperands() == 2) {
+ // Multiplication by a constant is ok
+ if (isa<SCEVConstant>(Mul->getOperand(0)))
+ return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
+
+ // If we have the value of one operand, check if an existing
+ // multiplication already generates this expression.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
+ Value *UVal = U->getValue();
+ for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
+ UI != UE; ++UI) {
+ // If U is a constant, it may be used by a ConstantExpr.
+ Instruction *User = dyn_cast<Instruction>(*UI);
+ if (User && User->getOpcode() == Instruction::Mul
+ && SE.isSCEVable(User->getType())) {
+ return SE.getSCEV(User) == Mul;
+ }
+ }
+ }
+ }
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (isExistingPhi(AR, SE))
+ return false;
+ }
+
+ // Fow now, consider any other type of expression (div/mul/min/max) high cost.
+ return true;
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+ bool Changed = false;
+
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
+
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+namespace {
+
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+ /// TODO: Some of these could be merged. Also, a lexical ordering
+ /// isn't always optimal.
+ unsigned NumRegs;
+ unsigned AddRecCost;
+ unsigned NumIVMuls;
+ unsigned NumBaseAdds;
+ unsigned ImmCost;
+ unsigned SetupCost;
+
+public:
+ Cost()
+ : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+ SetupCost(0) {}
+
+ bool operator<(const Cost &Other) const;
+
+ void Loose();
+
+#ifndef NDEBUG
+ // Once any of the metrics loses, they must all remain losers.
+ bool isValid() {
+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ | ImmCost | SetupCost) != ~0u)
+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ & ImmCost & SetupCost) == ~0u);
+ }
+#endif
+
+ bool isLoser() {
+ assert(isValid() && "invalid cost");
+ return NumRegs == ~0u;
+ }
+
+ void RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+private:
+ void RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+ void RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
+};
+
+}
+
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ // If this is an addrec for another loop, don't second-guess its addrec phi
+ // nodes. LSR isn't currently smart enough to reason about more than one
+ // loop at a time. LSR has already run on inner loops, will not run on outer
+ // loops, and cannot be expected to change sibling loops.
+ if (AR->getLoop() != L) {
+ // If the AddRec exists, consider it's register free and leave it alone.
+ if (isExistingPhi(AR, SE))
+ return;
+
+ // Otherwise, do not consider this formula at all.
+ Loose();
+ return;
+ }
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ // Add the step value register, if it needs one.
+ // TODO: The non-affine case isn't precisely modeled here.
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
+ if (!Regs.count(AR->getOperand(1))) {
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
+ }
+ }
+ ++NumRegs;
+
+ // Rough heuristic; favor registers which don't require extra setup
+ // instructions in the preheader.
+ if (!isa<SCEVUnknown>(Reg) &&
+ !isa<SCEVConstant>(Reg) &&
+ !(isa<SCEVAddRecExpr>(Reg) &&
+ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+ isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+ ++SetupCost;
+
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
+ RateRegister(Reg, Regs, L, SE, DT);
+ if (LoserRegs && isLoser())
+ LoserRegs->insert(Reg);
+ }
+}
+
+void Cost::RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ // Tally up the registers.
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (VisitedRegs.count(ScaledReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+ if (isLoser())
+ return;
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (VisitedRegs.count(BaseReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+ if (isLoser())
+ return;
+ }
+
+ // Determine how many (unfolded) adds we'll need inside the loop.
+ size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ if (NumBaseParts > 1)
+ NumBaseAdds += NumBaseParts - 1;
+
+ // Tally up the non-zero immediates.
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ int64_t Offset = (uint64_t)*I + F.BaseOffset;
+ if (F.BaseGV)
+ ImmCost += 64; // Handle symbolic values conservatively.
+ // TODO: This should probably be the pointer size.
+ else if (Offset != 0)
+ ImmCost += APInt(64, Offset, true).getMinSignedBits();
+ }
+ assert(isValid() && "invalid cost");
+}
+
+/// Loose - Set this cost to a losing value.
+void Cost::Loose() {
+ NumRegs = ~0u;
+ AddRecCost = ~0u;
+ NumIVMuls = ~0u;
+ NumBaseAdds = ~0u;
+ ImmCost = ~0u;
+ SetupCost = ~0u;
+}
+
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+ if (NumRegs != Other.NumRegs)
+ return NumRegs < Other.NumRegs;
+ if (AddRecCost != Other.AddRecCost)
+ return AddRecCost < Other.AddRecCost;
+ if (NumIVMuls != Other.NumIVMuls)
+ return NumIVMuls < Other.NumIVMuls;
+ if (NumBaseAdds != Other.NumBaseAdds)
+ return NumBaseAdds < Other.NumBaseAdds;
+ if (ImmCost != Other.ImmCost)
+ return ImmCost < Other.ImmCost;
+ if (SetupCost != Other.SetupCost)
+ return SetupCost < Other.SetupCost;
+ return false;
+}
+
+void Cost::print(raw_ostream &OS) const {
+ OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+ if (AddRecCost != 0)
+ OS << ", with addrec cost " << AddRecCost;
+ if (NumIVMuls != 0)
+ OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+ if (NumBaseAdds != 0)
+ OS << ", plus " << NumBaseAdds << " base add"
+ << (NumBaseAdds == 1 ? "" : "s");
+ if (ImmCost != 0)
+ OS << ", plus " << ImmCost << " imm cost";
+ if (SetupCost != 0)
+ OS << ", plus " << SetupCost << " setup cost";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void Cost::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+namespace {
+
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+ /// UserInst - The instruction which will be updated.
+ Instruction *UserInst;
+
+ /// OperandValToReplace - The operand of the instruction which will
+ /// be replaced. The operand may be used more than once; every instance
+ /// will be replaced.
+ Value *OperandValToReplace;
+
+ /// PostIncLoops - If this user is to use the post-incremented value of an
+ /// induction variable, this variable is non-null and holds the loop
+ /// associated with the induction variable.
+ PostIncLoopSet PostIncLoops;
+
+ /// LUIdx - The index of the LSRUse describing the expression which
+ /// this fixup needs, minus an offset (below).
+ size_t LUIdx;
+
+ /// Offset - A constant offset to be added to the LSRUse expression.
+ /// This allows multiple fixups to share the same LSRUse with different
+ /// offsets, for example in an unrolled loop.
+ int64_t Offset;
+
+ bool isUseFullyOutsideLoop(const Loop *L) const;
+
+ LSRFixup();
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+LSRFixup::LSRFixup()
+ : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+ // PHI nodes use their value in their incoming blocks.
+ if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == OperandValToReplace &&
+ L->contains(PN->getIncomingBlock(i)))
+ return false;
+ return true;
+ }
+
+ return !L->contains(UserInst);
+}
+
+void LSRFixup::print(raw_ostream &OS) const {
+ OS << "UserInst=";
+ // Store is common and interesting enough to be worth special-casing.
+ if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+ OS << "store ";
+ WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ } else if (UserInst->getType()->isVoidTy())
+ OS << UserInst->getOpcodeName();
+ else
+ WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+ OS << ", OperandValToReplace=";
+ WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+ for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+ E = PostIncLoops.end(); I != E; ++I) {
+ OS << ", PostIncLoop=";
+ WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+ }
+
+ if (LUIdx != ~size_t(0))
+ OS << ", LUIdx=" << LUIdx;
+
+ if (Offset != 0)
+ OS << ", Offset=" << Offset;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LSRFixup::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+namespace {
+
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+ static SmallVector<const SCEV *, 4> getEmptyKey() {
+ SmallVector<const SCEV *, 4> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-1));
+ return V;
+ }
+
+ static SmallVector<const SCEV *, 4> getTombstoneKey() {
+ SmallVector<const SCEV *, 4> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-2));
+ return V;
+ }
+
+ static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
+ unsigned Result = 0;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+ E = V.end(); I != E; ++I)
+ Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+ return Result;
+ }
+
+ static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+ const SmallVector<const SCEV *, 4> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+ DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+ /// KindType - An enum for a kind of use, indicating what types of
+ /// scaled and immediate operands it might support.
+ enum KindType {
+ Basic, ///< A normal use, with no folding.
+ Special, ///< A special case of basic, allowing -1 scales.
+ Address, ///< An address use; folding according to TargetLowering
+ ICmpZero ///< An equality icmp with both operands folded into one.
+ // TODO: Add a generic icmp too?
+ };
+
+ KindType Kind;
+ Type *AccessTy;
+
+ SmallVector<int64_t, 8> Offsets;
+ int64_t MinOffset;
+ int64_t MaxOffset;
+
+ /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+ /// LSRUse are outside of the loop, in which case some special-case heuristics
+ /// may be used.
+ bool AllFixupsOutsideLoop;
+
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ Type *WidestFixupType;
+
+ /// Formulae - A list of ways to build a value that can satisfy this user.
+ /// After the list is populated, one of these is selected heuristically and
+ /// used to formulate a replacement for OperandValToReplace in UserInst.
+ SmallVector<Formula, 12> Formulae;
+
+ /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ SmallPtrSet<const SCEV *, 4> Regs;
+
+ LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
+ MinOffset(INT64_MAX),
+ MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
+
+ bool HasFormulaWithSameRegs(const Formula &F) const;
+ bool InsertFormula(const Formula &F);
+ void DeleteFormula(Formula &F);
+ void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+ return Uniquifier.count(Key);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ if (!Uniquifier.insert(Key).second)
+ return false;
+
+ // Using a register to hold the value of 0 is not profitable.
+ assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+ "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+ for (SmallVectorImpl<const SCEV *>::const_iterator I =
+ F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+ assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
+
+ // Add the formula to the list.
+ Formulae.push_back(F);
+
+ // Record registers now being used by this use.
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+
+ return true;
+}
+
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+ if (&F != &Formulae.back())
+ std::swap(F, Formulae.back());
+ Formulae.pop_back();
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+ Regs.clear();
+ for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+ E = Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ }
+
+ // Update the RegTracker.
+ for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+ E = OldRegs.end(); I != E; ++I)
+ if (!Regs.count(*I))
+ RegUses.DropRegister(*I, LUIdx);
+}
+
+void LSRUse::print(raw_ostream &OS) const {
+ OS << "LSR Use: Kind=";
+ switch (Kind) {
+ case Basic: OS << "Basic"; break;
+ case Special: OS << "Special"; break;
+ case ICmpZero: OS << "ICmpZero"; break;
+ case Address:
+ OS << "Address of ";
+ if (AccessTy->isPointerTy())
+ OS << "pointer"; // the full pointer type could be really verbose
+ else
+ OS << *AccessTy;
+ }
+
+ OS << ", Offsets={";
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ OS << *I;
+ if (llvm::next(I) != E)
+ OS << ',';
+ }
+ OS << '}';
+
+ if (AllFixupsOutsideLoop)
+ OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LSRUse::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+ Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
+ switch (Kind) {
+ case LSRUse::Address:
+ return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+
+ // Otherwise, just guess that reg+reg addressing is legal.
+ //return ;
+
+ case LSRUse::ICmpZero:
+ // There's not even a target hook for querying whether it would be legal to
+ // fold a GV into an ICmp.
+ if (BaseGV)
+ return false;
+
+ // ICmp only has two operands; don't allow more than two non-trivial parts.
+ if (Scale != 0 && HasBaseReg && BaseOffset != 0)
+ return false;
+
+ // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+ // putting the scaled register in the other operand of the icmp.
+ if (Scale != 0 && Scale != -1)
+ return false;
+
+ // If we have low-level target information, ask the target if it can fold an
+ // integer immediate on an icmp.
+ if (BaseOffset != 0) {
+ // We have one of:
+ // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+ // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
+ // Offs is the ICmp immediate.
+ if (Scale == 0)
+ // The cast does the right thing with INT64_MIN.
+ BaseOffset = -(uint64_t)BaseOffset;
+ return TTI.isLegalICmpImmediate(BaseOffset);
+ }
+
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
+ return true;
+
+ case LSRUse::Basic:
+ // Only handle single-register values.
+ return !BaseGV && Scale == 0 && BaseOffset == 0;
+
+ case LSRUse::Special:
+ // Special case Basic to handle -1 scales.
+ return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
+ }
+
+ llvm_unreachable("Invalid LSRUse Kind!");
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // Check for overflow.
+ if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
+ (MinOffset > 0))
+ return false;
+ MinOffset = (uint64_t)BaseOffset + MinOffset;
+ if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+ (MaxOffset > 0))
+ return false;
+ MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+ Scale) &&
+ isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg) {
+ // Fast-path: zero is always foldable.
+ if (BaseOffset == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ // Canonicalize a scale of 1 to a base register if the formula doesn't
+ // already have a base register.
+ if (!HasBaseReg && Scale == 1) {
+ Scale = 0;
+ HasBaseReg = true;
+ }
+
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+}
+
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ Type *AccessTy, const SCEV *S, bool HasBaseReg) {
+ // Fast-path: zero is always foldable.
+ if (S->isZero()) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t BaseOffset = ExtractImmediate(S, SE);
+ GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+ // If there's anything else involved, it's not foldable.
+ if (!S->isZero()) return false;
+
+ // Fast-path: zero is always foldable.
+ if (BaseOffset == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
+}
+
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// IVInc - An individual increment in a Chain of IV increments.
+/// Relate an IV user to an expression that computes the IV it uses from the IV
+/// used by the previous link in the Chain.
+///
+/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
+/// original IVOperand. The head of the chain's IVOperand is only valid during
+/// chain collection, before LSR replaces IV users. During chain generation,
+/// IncExpr can be used to find the new IVOperand that computes the same
+/// expression.
+struct IVInc {
+ Instruction *UserInst;
+ Value* IVOperand;
+ const SCEV *IncExpr;
+
+ IVInc(Instruction *U, Value *O, const SCEV *E):
+ UserInst(U), IVOperand(O), IncExpr(E) {}
+};
+
+// IVChain - The list of IV increments in program order.
+// We typically add the head of a chain without finding subsequent links.
+struct IVChain {
+ SmallVector<IVInc,1> Incs;
+ const SCEV *ExprBase;
+
+ IVChain() : ExprBase(0) {}
+
+ IVChain(const IVInc &Head, const SCEV *Base)
+ : Incs(1, Head), ExprBase(Base) {}
+
+ typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+
+ // begin - return the first increment in the chain.
+ const_iterator begin() const {
+ assert(!Incs.empty());
+ return llvm::next(Incs.begin());
+ }
+ const_iterator end() const {
+ return Incs.end();
+ }
+
+ // hasIncs - Returns true if this chain contains any increments.
+ bool hasIncs() const { return Incs.size() >= 2; }
+
+ // add - Add an IVInc to the end of this chain.
+ void add(const IVInc &X) { Incs.push_back(X); }
+
+ // tailUserInst - Returns the last UserInst in the chain.
+ Instruction *tailUserInst() const { return Incs.back().UserInst; }
+
+ // isProfitableIncrement - Returns true if IncExpr can be profitably added to
+ // this chain.
+ bool isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution&);
+};
+
+/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
+/// Distinguish between FarUsers that definitely cross IV increments and
+/// NearUsers that may be used between IV increments.
+struct ChainUsers {
+ SmallPtrSet<Instruction*, 4> FarUsers;
+ SmallPtrSet<Instruction*, 4> NearUsers;
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+ IVUsers &IU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+ LoopInfo &LI;
+ const TargetTransformInfo &TTI;
+ Loop *const L;
+ bool Changed;
+
+ /// IVIncInsertPos - This is the insert position that the current loop's
+ /// induction variable increment should be placed. In simple loops, this is
+ /// the latch block's terminator. But in more complicated cases, this is a
+ /// position which will dominate all the in-loop post-increment users.
+ Instruction *IVIncInsertPos;
+
+ /// Factors - Interesting factors between use strides.
+ SmallSetVector<int64_t, 8> Factors;
+
+ /// Types - Interesting use types, to facilitate truncation reuse.
+ SmallSetVector<Type *, 4> Types;
+
+ /// Fixups - The list of operands which are to be replaced.
+ SmallVector<LSRFixup, 16> Fixups;
+
+ /// Uses - The list of interesting uses.
+ SmallVector<LSRUse, 16> Uses;
+
+ /// RegUses - Track which uses use which register candidates.
+ RegUseTracker RegUses;
+
+ // Limit the number of chains to avoid quadratic behavior. We don't expect to
+ // have more than a few IV increment chains in a loop. Missing a Chain falls
+ // back to normal LSR behavior for those uses.
+ static const unsigned MaxChains = 8;
+
+ /// IVChainVec - IV users can form a chain of IV increments.
+ SmallVector<IVChain, MaxChains> IVChainVec;
+
+ /// IVIncSet - IV users that belong to profitable IVChains.
+ SmallPtrSet<Use*, MaxChains> IVIncSet;
+
+ void OptimizeShadowIV();
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+ ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+ void OptimizeLoopTermCond();
+
+ void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec);
+ void FinalizeChain(IVChain &Chain);
+ void CollectChains();
+ void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts);
+
+ void CollectInterestingTypesAndFactors();
+ void CollectFixupsAndInitialFormulae();
+
+ LSRFixup &getNewFixup() {
+ Fixups.push_back(LSRFixup());
+ return Fixups.back();
+ }
+
+ // Support for sharing of LSRUses between LSRFixups.
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
+ UseMapTy UseMap;
+
+ bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, Type *AccessTy);
+
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ Type *AccessTy);
+
+ void DeleteUse(LSRUse &LU, size_t LUIdx);
+
+ LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void CountRegisters(const Formula &F, size_t LUIdx);
+ bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+ void CollectLoopInvariantFixupsAndFormulae();
+
+ void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+ unsigned Depth = 0);
+ void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateCrossUseConstantOffsets();
+ void GenerateAllReuseFormulae();
+
+ void FilterOutUndesirableDedicatedRegisters();
+
+ size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
+ void NarrowSearchSpaceUsingHeuristics();
+
+ void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const;
+ void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+ BasicBlock::iterator
+ HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs) const;
+ BasicBlock::iterator
+ AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const;
+
+ Value *Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P);
+
+public:
+ LSRInstance(Loop *L, Pass *P);
+
+ bool getChanged() const { return Changed; }
+
+ void print_factors_and_types(raw_ostream &OS) const;
+ void print_fixups(raw_ostream &OS) const;
+ void print_uses(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+ UI != E; /* empty */) {
+ IVUsers::const_iterator CandidateUI = UI;
+ ++UI;
+ Instruction *ShadowUse = CandidateUI->getUser();
+ Type *DestTy = NULL;
+ bool IsSigned = false;
+
+ /* If shadow use is a int->float cast then insert a second IV
+ to eliminate this cast.
+
+ for (unsigned i = 0; i < n; ++i)
+ foo((double)i);
+
+ is transformed into
+
+ double d = 0.0;
+ for (unsigned i = 0; i < n; ++i, ++d)
+ foo(d);
+ */
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = false;
+ DestTy = UCast->getDestTy();
+ }
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = true;
+ DestTy = SCast->getDestTy();
+ }
+ if (!DestTy) continue;
+
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ if (!TTI.isTypeLegal(DestTy)) continue;
+
+ PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+ if (!PH) continue;
+ if (PH->getNumIncomingValues() != 2) continue;
+
+ Type *SrcTy = PH->getType();
+ int Mantissa = DestTy->getFPMantissaWidth();
+ if (Mantissa == -1) continue;
+ if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+ continue;
+
+ unsigned Entry, Latch;
+ if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+ Entry = 0;
+ Latch = 1;
+ } else {
+ Entry = 1;
+ Latch = 0;
+ }
+
+ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+ if (!Init) continue;
+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
+ (double)Init->getSExtValue() :
+ (double)Init->getZExtValue());
+
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+ if (!Incr) continue;
+ if (Incr->getOpcode() != Instruction::Add
+ && Incr->getOpcode() != Instruction::Sub)
+ continue;
+
+ /* Initialize new IV, double d = 0.0 in above example. */
+ ConstantInt *C = NULL;
+ if (Incr->getOperand(0) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+ else if (Incr->getOperand(1) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+ else
+ continue;
+
+ if (!C) continue;
+
+ // Ignore negative constants, as the code below doesn't handle them
+ // correctly. TODO: Remove this restriction.
+ if (!C->getValue().isStrictlyPositive()) continue;
+
+ /* Add new PHINode. */
+ PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
+
+ /* create new increment. '++d' in above example. */
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ BinaryOperator *NewIncr =
+ BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+ Instruction::FAdd : Instruction::FSub,
+ NewPH, CFP, "IV.S.next.", Incr);
+
+ NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+ NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+ /* Remove cast operation */
+ ShadowUse->replaceAllUsesWith(NewPH);
+ ShadowUse->eraseFromParent();
+ Changed = true;
+ break;
+ }
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
+ for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
+ // NOTE: we could handle setcc instructions with multiple uses here, but
+ // InstCombine does it as well for simple uses, it's not clear that it
+ // occurs enough in real life to handle.
+ CondUse = UI;
+ return true;
+ }
+ return false;
+}
+
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+///
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
+//
+/// if (n > 0) {
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+/// }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+/// i = 0;
+/// max = n < 1 ? 1 : n;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i != max);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analysis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
+ // Check that the loop matches the pattern we're looking for.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+ Cond->getPredicate() != CmpInst::ICMP_NE)
+ return Cond;
+
+ SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+ if (!Sel || !Sel->hasOneUse()) return Cond;
+
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return Cond;
+ const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
+
+ // Add one to the backedge-taken count to get the trip count.
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
+ if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+ // Check for a max calculation that matches the pattern. There's no check
+ // for ICMP_ULE here because the comparison would be with zero, which
+ // isn't interesting.
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEVNAryExpr *Max = 0;
+ if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+ Pred = ICmpInst::ICMP_SLE;
+ Max = S;
+ } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_SLT;
+ Max = S;
+ } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_ULT;
+ Max = U;
+ } else {
+ // No match; bail.
+ return Cond;
+ }
+
+ // To handle a max with more than two operands, this optimization would
+ // require additional checking and setup.
+ if (Max->getNumOperands() != 2)
+ return Cond;
+
+ const SCEV *MaxLHS = Max->getOperand(0);
+ const SCEV *MaxRHS = Max->getOperand(1);
+
+ // ScalarEvolution canonicalizes constants to the left. For < and >, look
+ // for a comparison with 1. For <= and >=, a comparison with zero.
+ if (!MaxLHS ||
+ (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+ return Cond;
+
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine() ||
+ AR->getStart() != One ||
+ AR->getStepRecurrence(SE) != One)
+ return Cond;
+
+ assert(AR->getLoop() == L &&
+ "Loop condition operand is an addrec in a different loop!");
+
+ // Check the right operand of the select, and remember it, as it will
+ // be used in the new comparison instruction.
+ Value *NewRHS = 0;
+ if (ICmpInst::isTrueWhenEqual(Pred)) {
+ // Look for n+1, and grab n.
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (!NewRHS)
+ return Cond;
+ } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+ NewRHS = Sel->getOperand(1);
+ else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
+ NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
+ else
+ // Max doesn't match expected pattern.
+ return Cond;
+
+ // Determine the new comparison opcode. It may be signed or unsigned,
+ // and the original comparison may be either equality or inequality.
+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ // Ok, everything looks ok to change the condition into an SLT or SGE and
+ // delete the max calculation.
+ ICmpInst *NewCond =
+ new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+
+ // Delete the max calculation instructions.
+ Cond->replaceAllUsesWith(NewCond);
+ CondUse->setUser(NewCond);
+ Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Cond->eraseFromParent();
+ Sel->eraseFromParent();
+ if (Cmp->use_empty())
+ Cmp->eraseFromParent();
+ return NewCond;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void
+LSRInstance::OptimizeLoopTermCond() {
+ SmallPtrSet<Instruction *, 4> PostIncs;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+ // Get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
+
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
+
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse))
+ continue;
+
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ // One consequence of doing this now is that it disrupts the count-down
+ // optimization. That's not always a bad thing though, because in such
+ // cases it may still be worthwhile to avoid a max.
+ Cond = OptimizeMax(Cond, CondUse);
+
+ // If this exiting block dominates the latch block, it may also use
+ // the post-inc value if it won't be shared with other uses.
+ // Check for dominance.
+ if (!DT.dominates(ExitingBlock, LatchBlock))
+ continue;
+
+ // Conservatively avoid trying to use the post-inc value in non-latch
+ // exits if there may be pre-inc users in intervening blocks.
+ if (LatchBlock != ExitingBlock)
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ // Test if the use is reachable from the exiting block. This dominator
+ // query is a conservative approximation of reachability.
+ if (&*UI != CondUse &&
+ !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+ // Conservatively assume there may be reuse if the quotient of their
+ // strides could be a legal scale.
+ const SCEV *A = IU.getStride(*CondUse, L);
+ const SCEV *B = IU.getStride(*UI, L);
+ if (!A || !B) continue;
+ if (SE.getTypeSizeInBits(A->getType()) !=
+ SE.getTypeSizeInBits(B->getType())) {
+ if (SE.getTypeSizeInBits(A->getType()) >
+ SE.getTypeSizeInBits(B->getType()))
+ B = SE.getSignExtendExpr(B, A->getType());
+ else
+ A = SE.getSignExtendExpr(A, B->getType());
+ }
+ if (const SCEVConstant *D =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+ const ConstantInt *C = D->getValue();
+ // Stride of one or negative one can have reuse with non-addresses.
+ if (C->isOne() || C->isAllOnesValue())
+ goto decline_post_inc;
+ // Avoid weird situations.
+ if (C->getValue().getMinSignedBits() >= 64 ||
+ C->getValue().isMinSignedValue())
+ goto decline_post_inc;
+ // Check for possible scaled-address reuse.
+ Type *AccessTy = getAccessType(UI->getUser());
+ int64_t Scale = C->getSExtValue();
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
+ goto decline_post_inc;
+ Scale = -Scale;
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
+ goto decline_post_inc;
+ }
+ }
+
+ DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
+
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != TermBr) {
+ if (Cond->hasOneUse()) {
+ Cond->moveBefore(TermBr);
+ } else {
+ // Clone the terminating condition and insert into the loopend.
+ ICmpInst *OldCond = Cond;
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+ TermBr->replaceUsesOfWith(OldCond, Cond);
+ }
+ }
+
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->transformToPostInc(L);
+ Changed = true;
+
+ PostIncs.insert(Cond);
+ decline_post_inc:;
+ }
+
+ // Determine an insertion point for the loop induction variable increment. It
+ // must dominate all the post-inc comparisons we just set up, and it must
+ // dominate the loop latch edge.
+ IVIncInsertPos = L->getLoopLatch()->getTerminator();
+ for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+ E = PostIncs.end(); I != E; ++I) {
+ BasicBlock *BB =
+ DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+ (*I)->getParent());
+ if (BB == (*I)->getParent())
+ IVIncInsertPos = *I;
+ else if (BB != IVIncInsertPos->getParent())
+ IVIncInsertPos = BB->getTerminator();
+ }
+}
+
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, Type *AccessTy) {
+ int64_t NewMinOffset = LU.MinOffset;
+ int64_t NewMaxOffset = LU.MaxOffset;
+ Type *NewAccessTy = AccessTy;
+
+ // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+ // something conservative, however this can pessimize in the case that one of
+ // the uses will have all its uses outside the loop, for example.
+ if (LU.Kind != Kind)
+ return false;
+ // Conservatively assume HasBaseReg is true for now.
+ if (NewOffset < LU.MinOffset) {
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ LU.MaxOffset - NewOffset, HasBaseReg))
+ return false;
+ NewMinOffset = NewOffset;
+ } else if (NewOffset > LU.MaxOffset) {
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ NewOffset - LU.MinOffset, HasBaseReg))
+ return false;
+ NewMaxOffset = NewOffset;
+ }
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+ // Update the use.
+ LU.MinOffset = NewMinOffset;
+ LU.MaxOffset = NewMaxOffset;
+ LU.AccessTy = NewAccessTy;
+ if (NewOffset != LU.Offsets.back())
+ LU.Offsets.push_back(NewOffset);
+ return true;
+}
+
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind, Type *AccessTy) {
+ const SCEV *Copy = Expr;
+ int64_t Offset = ExtractImmediate(Expr, SE);
+
+ // Basic uses can't accept any offset, for example.
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ Offset, /*HasBaseReg=*/ true)) {
+ Expr = Copy;
+ Offset = 0;
+ }
+
+ std::pair<UseMapTy::iterator, bool> P =
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+ if (!P.second) {
+ // A use already existed with this base.
+ size_t LUIdx = P.first->second;
+ LSRUse &LU = Uses[LUIdx];
+ if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
+ // Reuse this use.
+ return std::make_pair(LUIdx, Offset);
+ }
+
+ // Create a new use.
+ size_t LUIdx = Uses.size();
+ P.first->second = LUIdx;
+ Uses.push_back(LSRUse(Kind, AccessTy));
+ LSRUse &LU = Uses[LUIdx];
+
+ // We don't need to track redundant offsets, but we don't need to go out
+ // of our way here to avoid them.
+ if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+ LU.Offsets.push_back(Offset);
+
+ LU.MinOffset = Offset;
+ LU.MaxOffset = Offset;
+ return std::make_pair(LUIdx, Offset);
+}
+
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
+ if (&LU != &Uses.back())
+ std::swap(LU, Uses.back());
+ Uses.pop_back();
+
+ // Update RegUses.
+ RegUses.SwapAndDropUse(LUIdx, Uses.size());
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+ const LSRUse &OrigLU) {
+ // Search all uses for the formula. This could be more clever.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
+ if (&LU != &OrigLU &&
+ LU.Kind != LSRUse::ICmpZero &&
+ LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
+ LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
+ if (F.BaseRegs == OrigF.BaseRegs &&
+ F.ScaledReg == OrigF.ScaledReg &&
+ F.BaseGV == OrigF.BaseGV &&
+ F.Scale == OrigF.Scale &&
+ F.UnfoldedOffset == OrigF.UnfoldedOffset) {
+ if (F.BaseOffset == 0)
+ return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and proceed to the next LSRUse.
+ break;
+ }
+ }
+ }
+ }
+
+ // Nothing looked good.
+ return 0;
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+ SmallSetVector<const SCEV *, 4> Strides;
+
+ // Collect interesting types and strides.
+ SmallVector<const SCEV *, 4> Worklist;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ const SCEV *Expr = IU.getExpr(*UI);
+
+ // Collect interesting types.
+ Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+ // Add strides for mentioned loops.
+ Worklist.push_back(Expr);
+ do {
+ const SCEV *S = Worklist.pop_back_val();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
+ Worklist.push_back(AR->getStart());
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ Worklist.append(Add->op_begin(), Add->op_end());
+ }
+ } while (!Worklist.empty());
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
+ for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
+ const SCEV *NewStride = *NewStrideIter;
+
+ if (SE.getTypeSizeInBits(OldStride->getType()) !=
+ SE.getTypeSizeInBits(NewStride->getType())) {
+ if (SE.getTypeSizeInBits(OldStride->getType()) >
+ SE.getTypeSizeInBits(NewStride->getType()))
+ NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+ else
+ OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+ }
+ if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ } else if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ }
+ }
+
+ // If all uses use the same type, don't bother looking for truncation-based
+ // reuse.
+ if (Types.size() == 1)
+ Types.clear();
+
+ DEBUG(print_factors_and_types(dbgs()));
+}
+
+/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
+/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
+/// Instructions to IVStrideUses, we could partially skip this.
+static User::op_iterator
+findIVOperand(User::op_iterator OI, User::op_iterator OE,
+ Loop *L, ScalarEvolution &SE) {
+ for(; OI != OE; ++OI) {
+ if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
+ if (!SE.isSCEVable(Oper->getType()))
+ continue;
+
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
+ if (AR->getLoop() == L)
+ break;
+ }
+ }
+ }
+ return OI;
+}
+
+/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
+/// operands, so wrap it in a convenient helper.
+static Value *getWideOperand(Value *Oper) {
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
+ return Trunc->getOperand(0);
+ return Oper;
+}
+
+/// isCompatibleIVType - Return true if we allow an IV chain to include both
+/// types.
+static bool isCompatibleIVType(Value *LVal, Value *RVal) {
+ Type *LType = LVal->getType();
+ Type *RType = RVal->getType();
+ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+}
+
+/// getExprBase - Return an approximation of this SCEV expression's "base", or
+/// NULL for any constant. Returning the expression itself is
+/// conservative. Returning a deeper subexpression is more precise and valid as
+/// long as it isn't less complex than another subexpression. For expressions
+/// involving multiple unscaled values, we need to return the pointer-type
+/// SCEVUnknown. This avoids forming chains across objects, such as:
+/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+///
+/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
+/// SCEVUnknown, we simply return the rightmost SCEV operand.
+static const SCEV *getExprBase(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ default: // uncluding scUnknown.
+ return S;
+ case scConstant:
+ return 0;
+ case scTruncate:
+ return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
+ case scZeroExtend:
+ return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
+ case scSignExtend:
+ return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
+ case scAddExpr: {
+ // Skip over scaled operands (scMulExpr) to follow add operands as long as
+ // there's nothing more complex.
+ // FIXME: not sure if we want to recognize negation.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
+ E(Add->op_begin()); I != E; ++I) {
+ const SCEV *SubExpr = *I;
+ if (SubExpr->getSCEVType() == scAddExpr)
+ return getExprBase(SubExpr);
+
+ if (SubExpr->getSCEVType() != scMulExpr)
+ return SubExpr;
+ }
+ return S; // all operands are scaled, be conservative.
+ }
+ case scAddRecExpr:
+ return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
+ }
+}
+
+/// Return true if the chain increment is profitable to expand into a loop
+/// invariant value, which may require its own register. A profitable chain
+/// increment will be an offset relative to the same base. We allow such offsets
+/// to potentially be used as chain increment as long as it's not obviously
+/// expensive to expand using real instructions.
+bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution &SE) {
+ // Aggressively form chains when -stress-ivchain.
+ if (StressIVChain)
+ return true;
+
+ // Do not replace a constant offset from IV head with a nonconstant IV
+ // increment.
+ if (!isa<SCEVConstant>(IncExpr)) {
+ const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
+ if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
+ return 0;
+ }
+
+ SmallPtrSet<const SCEV*, 8> Processed;
+ return !isHighCostExpansion(IncExpr, Processed, SE);
+}
+
+/// Return true if the number of registers needed for the chain is estimated to
+/// be less than the number required for the individual IV users. First prohibit
+/// any IV users that keep the IV live across increments (the Users set should
+/// be empty). Next count the number and type of increments in the chain.
+///
+/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
+/// effectively use postinc addressing modes. Only consider it profitable it the
+/// increments can be computed in fewer registers when chained.
+///
+/// TODO: Consider IVInc free if it's already used in another chains.
+static bool
+isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI) {
+ if (StressIVChain)
+ return true;
+
+ if (!Chain.hasIncs())
+ return false;
+
+ if (!Users.empty()) {
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
+ for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
+ E = Users.end(); I != E; ++I) {
+ dbgs() << " " << **I << "\n";
+ });
+ return false;
+ }
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+
+ // The chain itself may require a register, so intialize cost to 1.
+ int cost = 1;
+
+ // A complete chain likely eliminates the need for keeping the original IV in
+ // a register. LSR does not currently know how to form a complete chain unless
+ // the header phi already exists.
+ if (isa<PHINode>(Chain.tailUserInst())
+ && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
+ --cost;
+ }
+ const SCEV *LastIncExpr = 0;
+ unsigned NumConstIncrements = 0;
+ unsigned NumVarIncrements = 0;
+ unsigned NumReusedIncrements = 0;
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+
+ if (I->IncExpr->isZero())
+ continue;
+
+ // Incrementing by zero or some constant is neutral. We assume constants can
+ // be folded into an addressing mode or an add's immediate operand.
+ if (isa<SCEVConstant>(I->IncExpr)) {
+ ++NumConstIncrements;
+ continue;
+ }
+
+ if (I->IncExpr == LastIncExpr)
+ ++NumReusedIncrements;
+ else
+ ++NumVarIncrements;
+
+ LastIncExpr = I->IncExpr;
+ }
+ // An IV chain with a single increment is handled by LSR's postinc
+ // uses. However, a chain with multiple increments requires keeping the IV's
+ // value live longer than it needs to be if chained.
+ if (NumConstIncrements > 1)
+ --cost;
+
+ // Materializing increment expressions in the preheader that didn't exist in
+ // the original code may cost a register. For example, sign-extended array
+ // indices can produce ridiculous increments like this:
+ // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+ cost += NumVarIncrements;
+
+ // Reusing variable increments likely saves a register to hold the multiple of
+ // the stride.
+ cost -= NumReusedIncrements;
+
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
+
+ return cost < 0;
+}
+
+/// ChainInstruction - Add this IV user to an existing chain or make it the head
+/// of a new chain.
+void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec) {
+ // When IVs are used as types of varying widths, they are generally converted
+ // to a wider type with some uses remaining narrow under a (free) trunc.
+ Value *const NextIV = getWideOperand(IVOper);
+ const SCEV *const OperExpr = SE.getSCEV(NextIV);
+ const SCEV *const OperExprBase = getExprBase(OperExpr);
+
+ // Visit all existing chains. Check if its IVOper can be computed as a
+ // profitable loop invariant increment from the last link in the Chain.
+ unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ const SCEV *LastIncExpr = 0;
+ for (; ChainIdx < NChains; ++ChainIdx) {
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+ // first avoids creating extra SCEV expressions.
+ if (!StressIVChain && Chain.ExprBase != OperExprBase)
+ continue;
+
+ Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
+ if (!isCompatibleIVType(PrevIV, NextIV))
+ continue;
+
+ // A phi node terminates a chain.
+ if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+ continue;
+
+ // The increment must be loop-invariant so it can be kept in a register.
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
+ continue;
+
+ if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
+ LastIncExpr = IncExpr;
+ break;
+ }
+ }
+ // If we haven't found a chain, create a new one, unless we hit the max. Don't
+ // bother for phi nodes, because they must be last in the chain.
+ if (ChainIdx == NChains) {
+ if (isa<PHINode>(UserInst))
+ return;
+ if (NChains >= MaxChains && !StressIVChain) {
+ DEBUG(dbgs() << "IV Chain Limit\n");
+ return;
+ }
+ LastIncExpr = OperExpr;
+ // IVUsers may have skipped over sign/zero extensions. We don't currently
+ // attempt to form chains involving extensions unless they can be hoisted
+ // into this loop's AddRec.
+ if (!isa<SCEVAddRecExpr>(LastIncExpr))
+ return;
+ ++NChains;
+ IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+ OperExprBase));
+ ChainUsersVec.resize(NChains);
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
+ } else {
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
+ }
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
+ // This chain's NearUsers become FarUsers.
+ if (!LastIncExpr->isZero()) {
+ ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
+ NearUsers.end());
+ NearUsers.clear();
+ }
+
+ // All other uses of IVOperand become near uses of the chain.
+ // We currently ignore intermediate values within SCEV expressions, assuming
+ // they will eventually be used be the current chain, or can be computed
+ // from one of the chain increments. To be more precise we could
+ // transitively follow its user and only add leaf IV users to the set.
+ for (Value::use_iterator UseIter = IVOper->use_begin(),
+ UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
+ Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ if (!OtherUse)
+ continue;
+ // Uses in the chain will no longer be uses if the chain is formed.
+ // Include the head of the chain in this iteration (not Chain.begin()).
+ IVChain::const_iterator IncIter = Chain.Incs.begin();
+ IVChain::const_iterator IncEnd = Chain.Incs.end();
+ for( ; IncIter != IncEnd; ++IncIter) {
+ if (IncIter->UserInst == OtherUse)
+ break;
+ }
+ if (IncIter != IncEnd)
+ continue;
+
+ if (SE.isSCEVable(OtherUse->getType())
+ && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
+ && IU.isIVUserOrOperand(OtherUse)) {
+ continue;
+ }
+ NearUsers.insert(OtherUse);
+ }
+
+ // Since this user is part of the chain, it's no longer considered a use
+ // of the chain.
+ ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
+}
+
+/// CollectChains - Populate the vector of Chains.
+///
+/// This decreases ILP at the architecture level. Targets with ample registers,
+/// multiple memory ports, and no register renaming probably don't want
+/// this. However, such targets should probably disable LSR altogether.
+///
+/// The job of LSR is to make a reasonable choice of induction variables across
+/// the loop. Subsequent passes can easily "unchain" computation exposing more
+/// ILP *within the loop* if the target wants it.
+///
+/// Finding the best IV chain is potentially a scheduling problem. Since LSR
+/// will not reorder memory operations, it will recognize this as a chain, but
+/// will generate redundant IV increments. Ideally this would be corrected later
+/// by a smart scheduler:
+/// = A[i]
+/// = A[i+x]
+/// A[i] =
+/// A[i+x] =
+///
+/// TODO: Walk the entire domtree within this loop, not just the path to the
+/// loop latch. This will discover chains on side paths, but requires
+/// maintaining multiple copies of the Chains state.
+void LSRInstance::CollectChains() {
+ DEBUG(dbgs() << "Collecting IV Chains.\n");
+ SmallVector<ChainUsers, 8> ChainUsersVec;
+
+ SmallVector<BasicBlock *,8> LatchPath;
+ BasicBlock *LoopHeader = L->getHeader();
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
+ Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
+ LatchPath.push_back(Rung->getBlock());
+ }
+ LatchPath.push_back(LoopHeader);
+
+ // Walk the instruction stream from the loop header to the loop latch.
+ for (SmallVectorImpl<BasicBlock *>::reverse_iterator
+ BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
+ BBIter != BBEnd; ++BBIter) {
+ for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
+ I != E; ++I) {
+ // Skip instructions that weren't seen by IVUsers analysis.
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ continue;
+
+ // Ignore users that are part of a SCEV expression. This way we only
+ // consider leaf IV Users. This effectively rediscovers a portion of
+ // IVUsers analysis but in program order this time.
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ continue;
+
+ // Remove this instruction from any NearUsers set it may be in.
+ for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ ChainIdx < NChains; ++ChainIdx) {
+ ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ }
+ // Search for operands that can be chained.
+ SmallPtrSet<Instruction*, 4> UniqueOperands;
+ User::op_iterator IVOpEnd = I->op_end();
+ User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ while (IVOpIter != IVOpEnd) {
+ Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
+ if (UniqueOperands.insert(IVOpInst))
+ ChainInstruction(I, IVOpInst, ChainUsersVec);
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ } // Continue walking down the instructions.
+ } // Continue walking down the domtree.
+ // Visit phi backedges to determine if the chain can generate the IV postinc.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()))
+ continue;
+
+ Instruction *IncV =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (IncV)
+ ChainInstruction(PN, IncV, ChainUsersVec);
+ }
+ // Remove any unprofitable chains.
+ unsigned ChainIdx = 0;
+ for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
+ UsersIdx < NChains; ++UsersIdx) {
+ if (!isProfitableChain(IVChainVec[UsersIdx],
+ ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
+ continue;
+ // Preserve the chain at UsesIdx.
+ if (ChainIdx != UsersIdx)
+ IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
+ FinalizeChain(IVChainVec[ChainIdx]);
+ ++ChainIdx;
+ }
+ IVChainVec.resize(ChainIdx);
+}
+
+void LSRInstance::FinalizeChain(IVChain &Chain) {
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
+
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
+ User::op_iterator UseI =
+ std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
+ assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+ IVIncSet.insert(UseI);
+ }
+}
+
+/// Return true if the IVInc can be folded into an addressing mode.
+static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
+ Value *Operand, const TargetTransformInfo &TTI) {
+ const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
+ if (!IncConst || !isAddressUse(UserInst, Operand))
+ return false;
+
+ if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ return false;
+
+ int64_t IncOffset = IncConst->getValue()->getSExtValue();
+ if (!isAlwaysFoldable(TTI, LSRUse::Address,
+ getAccessType(UserInst), /*BaseGV=*/ 0,
+ IncOffset, /*HaseBaseReg=*/ false))
+ return false;
+
+ return true;
+}
+
+/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
+/// materialize the IV user's operand from the previous IV user's operand.
+void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ // Find the new IVOperand for the head of the chain. It may have been replaced
+ // by LSR.
+ const IVInc &Head = Chain.Incs[0];
+ User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
+ User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
+ IVOpEnd, L, SE);
+ Value *IVSrc = 0;
+ while (IVOpIter != IVOpEnd) {
+ IVSrc = getWideOperand(*IVOpIter);
+
+ // If this operand computes the expression that the chain needs, we may use
+ // it. (Check this after setting IVSrc which is used below.)
+ //
+ // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
+ // narrow for the chain, so we can no longer use it. We do allow using a
+ // wider phi, assuming the LSR checked for free truncation. In that case we
+ // should already have a truncate on this operand such that
+ // getSCEV(IVSrc) == IncExpr.
+ if (SE.getSCEV(*IVOpIter) == Head.IncExpr
+ || SE.getSCEV(IVSrc) == Head.IncExpr) {
+ break;
+ }
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ if (IVOpIter == IVOpEnd) {
+ // Gracefully give up on this chain.
+ DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+ Type *IVTy = IVSrc->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(IVTy);
+ const SCEV *LeftOverExpr = 0;
+ for (IVChain::const_iterator IncI = Chain.begin(),
+ IncE = Chain.end(); IncI != IncE; ++IncI) {
+
+ Instruction *InsertPt = IncI->UserInst;
+ if (isa<PHINode>(InsertPt))
+ InsertPt = L->getLoopLatch()->getTerminator();
+
+ // IVOper will replace the current IV User's operand. IVSrc is the IV
+ // value currently held in a register.
+ Value *IVOper = IVSrc;
+ if (!IncI->IncExpr->isZero()) {
+ // IncExpr was the result of subtraction of two narrow values, so must
+ // be signed.
+ const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+ LeftOverExpr = LeftOverExpr ?
+ SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
+ }
+ if (LeftOverExpr && !LeftOverExpr->isZero()) {
+ // Expand the IV increment.
+ Rewriter.clearPostInc();
+ Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
+ const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
+ SE.getUnknown(IncV));
+ IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+
+ // If an IV increment can't be folded, use it as the next IV value.
+ if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
+ TTI)) {
+ assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+ IVSrc = IVOper;
+ LeftOverExpr = 0;
+ }
+ }
+ Type *OperTy = IncI->IVOperand->getType();
+ if (IVTy != OperTy) {
+ assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
+ "cannot extend a chained IV");
+ IRBuilder<> Builder(InsertPt);
+ IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
+ }
+ IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
+ DeadInsts.push_back(IncI->IVOperand);
+ }
+ // If LSR created a new, wider phi, we may also replace its postinc. We only
+ // do this if we also found a wide value for the head of the chain.
+ if (isa<PHINode>(Chain.tailUserInst())) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ if (!isCompatibleIVType(Phi, IVSrc))
+ continue;
+ Instruction *PostIncV = dyn_cast<Instruction>(
+ Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
+ continue;
+ Value *IVOper = IVSrc;
+ Type *PostIncTy = PostIncV->getType();
+ if (IVTy != PostIncTy) {
+ assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
+ IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
+ Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
+ IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
+ }
+ Phi->replaceUsesOfWith(PostIncV, IVOper);
+ DeadInsts.push_back(PostIncV);
+ }
+ }
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ Instruction *UserInst = UI->getUser();
+ // Skip IV users that are part of profitable IV Chains.
+ User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
+ UI->getOperandValToReplace());
+ assert(UseI != UserInst->op_end() && "cannot find IV operand");
+ if (IVIncSet.count(UseI))
+ continue;
+
+ // Record the uses.
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = UserInst;
+ LF.OperandValToReplace = UI->getOperandValToReplace();
+ LF.PostIncLoops = UI->getPostIncLoops();
+
+ LSRUse::KindType Kind = LSRUse::Basic;
+ Type *AccessTy = 0;
+ if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+ Kind = LSRUse::Address;
+ AccessTy = getAccessType(LF.UserInst);
+ }
+
+ const SCEV *S = IU.getExpr(*UI);
+
+ // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+ // (N - i == 0), and this allows (N - i) to be the expression that we work
+ // with rather than just N or i, so we can consider the register
+ // requirements for both N and i at the same time. Limiting this code to
+ // equality icmps is not a problem because all interesting loops use
+ // equality icmps, thanks to IndVarSimplify.
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+ if (CI->isEquality()) {
+ // Swap the operands if needed to put the OperandValToReplace on the
+ // left, for consistency.
+ Value *NV = CI->getOperand(1);
+ if (NV == LF.OperandValToReplace) {
+ CI->setOperand(1, CI->getOperand(0));
+ CI->setOperand(0, NV);
+ NV = CI->getOperand(1);
+ Changed = true;
+ }
+
+ // x == y --> x - y == 0
+ const SCEV *N = SE.getSCEV(NV);
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+ // S is normalized, so normalize N before folding it into S
+ // to keep the result normalized.
+ N = TransformForPostIncUse(Normalize, N, CI, 0,
+ LF.PostIncLoops, SE, DT);
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ }
+
+ // -1 and the negations of all interesting strides (except the negation
+ // of -1) are now also interesting.
+ for (size_t i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i] != -1)
+ Factors.insert(-(uint64_t)Factors[i]);
+ Factors.insert(-1);
+ }
+
+ // Set up the initial formula for this use.
+ std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+ // If this is the first use of this LSRUse, give it a formula.
+ if (LU.Formulae.empty()) {
+ InsertInitialFormula(S, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), LF.LUIdx);
+ }
+ }
+
+ DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.InitialMatch(S, L, SE);
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+ LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.BaseRegs.push_back(S);
+ F.HasBaseReg = true;
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+ if (F.ScaledReg)
+ RegUses.CountRegister(F.ScaledReg, LUIdx);
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I)
+ RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ if (!LU.InsertFormula(F))
+ return false;
+
+ CountRegisters(F, LUIdx);
+ return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+ SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+ SmallPtrSet<const SCEV *, 8> Inserted;
+
+ while (!Worklist.empty()) {
+ const SCEV *S = Worklist.pop_back_val();
+
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+ Worklist.append(N->op_begin(), N->op_end());
+ else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ Worklist.push_back(C->getOperand());
+ else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ Worklist.push_back(D->getLHS());
+ Worklist.push_back(D->getRHS());
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(U)) continue;
+ const Value *V = U->getValue();
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Look for instructions defined outside the loop.
+ if (L->contains(Inst)) continue;
+ } else if (isa<UndefValue>(V))
+ // Undef doesn't have a live range, so it doesn't matter.
+ continue;
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ // Ignore non-instructions.
+ if (!UserInst)
+ continue;
+ // Ignore instructions in other functions (as can happen with
+ // Constants).
+ if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+ continue;
+ // Ignore instructions not dominated by the loop.
+ const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+ UserInst->getParent() :
+ cast<PHINode>(UserInst)->getIncomingBlock(
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ if (!DT.dominates(L->getHeader(), UseBB))
+ continue;
+ // Ignore uses which are part of other SCEV expressions, to avoid
+ // analyzing them multiple times.
+ if (SE.isSCEVable(UserInst->getType())) {
+ const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+ // If the user is a no-op, look through to its uses.
+ if (!isa<SCEVUnknown>(UserS))
+ continue;
+ if (UserS == U) {
+ Worklist.push_back(
+ SE.getUnknown(const_cast<Instruction *>(UserInst)));
+ continue;
+ }
+ }
+ // Ignore icmp instructions which are already being analyzed.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+ unsigned OtherIdx = !UI.getOperandNo();
+ Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+ if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
+ continue;
+ }
+
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = const_cast<Instruction *>(UserInst);
+ LF.OperandValToReplace = UI.getUse();
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+ InsertSupplementalFormula(U, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+ break;
+ }
+ }
+ }
+}
+
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ // Break out add operands.
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Split a non-zero base out of an addrec.
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
+ }
+ } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ // Break (C * (a + b + c)) into C*a + C*b + C*c.
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
+ }
+ return S;
+}
+
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base,
+ unsigned Depth) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *BaseReg = Base.BaseRegs[i];
+
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1) continue;
+
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
+
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
+
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
+
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
+
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC &&
+ SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue();
+ F.BaseRegs.erase(F.BaseRegs.begin() + i);
+ } else
+ F.BaseRegs[i] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+ }
+ }
+}
+
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // This method is only interesting on a plurality of registers.
+ if (Base.BaseRegs.size() <= 1) return;
+
+ Formula F = Base;
+ F.BaseRegs.clear();
+ SmallVector<const SCEV *, 4> Ops;
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+ !SE.hasComputableLoopEvolution(BaseReg, L))
+ Ops.push_back(BaseReg);
+ else
+ F.BaseRegs.push_back(BaseReg);
+ }
+ if (Ops.size() > 1) {
+ const SCEV *Sum = SE.getAddExpr(Ops);
+ // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+ // opportunity to fold something. For now, just ignore such cases
+ // rather than proceed with zero in a register.
+ if (!Sum->isZero()) {
+ F.BaseRegs.push_back(Sum);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // We can't add a symbolic offset if the address already contains one.
+ if (Base.BaseGV) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ continue;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // TODO: For now, just add the min and max offset, because it usually isn't
+ // worthwhile looking at everything inbetween.
+ SmallVector<int64_t, 2> Worklist;
+ Worklist.push_back(LU.MinOffset);
+ if (LU.MaxOffset != LU.MinOffset)
+ Worklist.push_back(LU.MaxOffset);
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ std::swap(F.BaseRegs[i], F.BaseRegs.back());
+ F.BaseRegs.pop_back();
+ } else
+ F.BaseRegs[i] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ continue;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ if (LU.Kind != LSRUse::ICmpZero) return;
+
+ // Determine the integer type for the base formula.
+ Type *IntTy = Base.getType();
+ if (!IntTy) return;
+ if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+ // Don't do this if there is more than one offset.
+ if (LU.MinOffset != LU.MaxOffset) return;
+
+ assert(!Base.BaseGV && "ICmpZero use is not legal!");
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ // Check that the multiplication doesn't overflow.
+ if (Base.BaseOffset == INT64_MIN && Factor == -1)
+ continue;
+ int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+ if (NewBaseOffset / Factor != Base.BaseOffset)
+ continue;
+
+ // Check that multiplying with the use offset doesn't overflow.
+ int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
+ Offset = (uint64_t)Offset * Factor;
+ if (Offset / Factor != LU.MinOffset)
+ continue;
+
+ Formula F = Base;
+ F.BaseOffset = NewBaseOffset;
+
+ // Check that this scale is legal.
+ if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
+ continue;
+
+ // Compensate for the use having MinOffset built into it.
+ F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
+
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+
+ // Check that multiplying with each base register doesn't overflow.
+ for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+ F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ goto next;
+ }
+
+ // Check that multiplying with the scaled register doesn't overflow.
+ if (F.ScaledReg) {
+ F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ continue;
+ }
+
+ // Check that multiplying with the unfolded offset doesn't overflow.
+ if (F.UnfoldedOffset != 0) {
+ if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
+ continue;
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
+ if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+ continue;
+ }
+
+ // If we make it here and it's legal, add it.
+ (void)InsertFormula(LU, LUIdx, F);
+ next:;
+ }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // Determine the integer type for the base formula.
+ Type *IntTy = Base.getType();
+ if (!IntTy) return;
+
+ // If this Formula already has a scaled register, we can't add another one.
+ if (Base.Scale != 0) return;
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ Base.Scale = Factor;
+ Base.HasBaseReg = Base.BaseRegs.size() > 1;
+ // Check whether this scale is going to be legal.
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ Base)) {
+ // As a special-case, handle special out-of-loop Basic users specially.
+ // TODO: Reconsider this special case.
+ if (LU.Kind == LSRUse::Basic &&
+ isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+ LU.AccessTy, Base) &&
+ LU.AllFixupsOutsideLoop)
+ LU.Kind = LSRUse::Special;
+ else
+ continue;
+ }
+ // For an ICmpZero, negating a solitary base register won't lead to
+ // new solutions.
+ if (LU.Kind == LSRUse::ICmpZero &&
+ !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
+ continue;
+ // For each addrec base reg, apply the scale, if possible.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+ if (FactorS->isZero())
+ continue;
+ // Divide out the factor, ignoring high bits, since we'll be
+ // scaling the value back up in the end.
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ F.DeleteBaseReg(F.BaseRegs[i]);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+ }
+}
+
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // Don't bother truncating symbolic values.
+ if (Base.BaseGV) return;
+
+ // Determine the integer type for the base formula.
+ Type *DstTy = Base.getType();
+ if (!DstTy) return;
+ DstTy = SE.getEffectiveSCEVType(DstTy);
+
+ for (SmallSetVector<Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ Type *SrcTy = *I;
+ if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
+ Formula F = Base;
+
+ if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+ for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J)
+ *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+ // TODO: This assumes we've done basic processing on all uses and
+ // have an idea what the register usage is.
+ if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+ continue;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+ size_t LUIdx;
+ int64_t Imm;
+ const SCEV *OrigReg;
+
+ WorkItem(size_t LI, int64_t I, const SCEV *R)
+ : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+ OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+ << " , add offset " << Imm;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void WorkItem::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+ // Group the registers by their value without any added constant offset.
+ typedef std::map<int64_t, const SCEV *> ImmMapTy;
+ typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+ RegMapTy Map;
+ DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+ SmallVector<const SCEV *, 8> Sequence;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ int64_t Imm = ExtractImmediate(Reg, SE);
+ std::pair<RegMapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Reg, ImmMapTy()));
+ if (Pair.second)
+ Sequence.push_back(Reg);
+ Pair.first->second.insert(std::make_pair(Imm, *I));
+ UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+ }
+
+ // Now examine each set of registers with the same base value. Build up
+ // a list of work to do and do the work in a separate step so that we're
+ // not adding formulae and register counts while we're searching.
+ SmallVector<WorkItem, 32> WorkItems;
+ SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+ E = Sequence.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ const ImmMapTy &Imms = Map.find(Reg)->second;
+
+ // It's not worthwhile looking for reuse if there's only one offset.
+ if (Imms.size() == 1)
+ continue;
+
+ DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J)
+ dbgs() << ' ' << J->first;
+ dbgs() << '\n');
+
+ // Examine each offset.
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J) {
+ const SCEV *OrigReg = J->second;
+
+ int64_t JImm = J->first;
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+ if (!isa<SCEVConstant>(OrigReg) &&
+ UsedByIndicesMap[Reg].count() == 1) {
+ DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+ continue;
+ }
+
+ // Conservatively examine offsets between this orig reg a few selected
+ // other orig regs.
+ ImmMapTy::const_iterator OtherImms[] = {
+ Imms.begin(), prior(Imms.end()),
+ Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ };
+ for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+ ImmMapTy::const_iterator M = OtherImms[i];
+ if (M == J || M == JE) continue;
+
+ // Compute the difference between the two.
+ int64_t Imm = (uint64_t)JImm - M->first;
+ for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+ LUIdx = UsedByIndices.find_next(LUIdx))
+ // Make a memo of this use, offset, and register tuple.
+ if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+ WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+ }
+ }
+ }
+
+ Map.clear();
+ Sequence.clear();
+ UsedByIndicesMap.clear();
+ UniqueItems.clear();
+
+ // Now iterate through the worklist and add new formulae.
+ for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+ E = WorkItems.end(); I != E; ++I) {
+ const WorkItem &WI = *I;
+ size_t LUIdx = WI.LUIdx;
+ LSRUse &LU = Uses[LUIdx];
+ int64_t Imm = WI.Imm;
+ const SCEV *OrigReg = WI.OrigReg;
+
+ Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+ unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+ // TODO: Use a more targeted data structure.
+ for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+ const Formula &F = LU.Formulae[L];
+ // Use the immediate in the scaled register.
+ if (F.ScaledReg == OrigReg) {
+ int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
+ // Don't create 50 + reg(-50).
+ if (F.referencesReg(SE.getSCEV(
+ ConstantInt::get(IntTy, -(uint64_t)Offset))))
+ continue;
+ Formula NewF = F;
+ NewF.BaseOffset = Offset;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ NewF))
+ continue;
+ NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+ // If the new scale is a constant in a register, and adding the constant
+ // value to the immediate would produce a value closer to zero than the
+ // immediate itself, then the formula isn't worthwhile.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+ if (C->getValue()->isNegative() !=
+ (NewF.BaseOffset < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+ .ule(abs64(NewF.BaseOffset)))
+ continue;
+
+ // OK, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ } else {
+ // Use the immediate in a base register.
+ for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+ const SCEV *BaseReg = F.BaseRegs[N];
+ if (BaseReg != OrigReg)
+ continue;
+ Formula NewF = F;
+ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, NewF)) {
+ if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ continue;
+ NewF = F;
+ NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
+ }
+ NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+ // If the new formula has a constant in a register, and adding the
+ // constant value to the immediate would produce a value closer to
+ // zero than the immediate itself, then the formula isn't worthwhile.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+ J != JE; ++J)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+ if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+ abs64(NewF.BaseOffset)) &&
+ (C->getValue()->getValue() +
+ NewF.BaseOffset).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.BaseOffset))
+ goto skip_formula;
+
+ // Ok, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ break;
+ skip_formula:;
+ }
+ }
+ }
+ }
+}
+
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+ // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+ // queries are more precise.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateScales(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+ }
+
+ GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
+}
+
+/// If there are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
+#ifndef NDEBUG
+ bool ChangedFormulae = false;
+#endif
+
+ // Collect the best formula for each unique set of shared registers. This
+ // is reset for each use.
+ typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
+ BestFormulaeTy;
+ BestFormulaeTy BestFormulae;
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+ bool Any = false;
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+ FIdx != NumForms; ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
+
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
+ }
+ else {
+ SmallVector<const SCEV *, 4> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
+ Formula &Best = LU.Formulae[P.first->second];
+
+ Cost CostBest;
+ Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ if (CostF < CostBest)
+ std::swap(F, Best);
+ DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula "; Best.print(dbgs());
+ dbgs() << '\n');
+ }
+#ifndef NDEBUG
+ ChangedFormulae = true;
+#endif
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
+ }
+
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+
+ // Reset this to prepare for the next use.
+ BestFormulae.clear();
+ }
+
+ DEBUG(if (ChangedFormulae) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
+}
+
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+ size_t Power = 1;
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ size_t FSize = I->Formulae.size();
+ if (FSize >= ComplexityLimit) {
+ Power = ComplexityLimit;
+ break;
+ }
+ Power *= FSize;
+ if (Power >= ComplexityLimit)
+ break;
+ }
+ return Power;
+}
+
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+ "which use a superset of registers used by other "
+ "formulae.\n");
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ // Look for a formula with a constant or GV in a register. If the use
+ // also has a formula with that same value in an immediate field,
+ // delete the one that uses a register.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+ Formula NewF = F;
+ NewF.BaseOffset += C->getValue()->getSExtValue();
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+ if (!F.BaseGV) {
+ Formula NewF = F;
+ NewF.BaseGV = GV;
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
+
+ DEBUG(dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by assuming that uses separated "
+ "by a constant offset will use the same registers.\n");
+
+ // This is especially useful for unrolled loops.
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.BaseOffset == 0 || F.Scale != 0)
+ continue;
+
+ LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+ if (!LUThatHas)
+ continue;
+
+ if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+ LU.Kind, LU.AccessTy))
+ continue;
+
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.BaseOffset;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
+ }
+ DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Delete the old use.
+ DeleteUse(LU, LUIdx);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
+ // With all other options exhausted, loop until the system is simple
+ // enough to handle.
+ SmallPtrSet<const SCEV *, 4> Taken;
+ while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Pick the register which is used by the most LSRUses, which is likely
+ // to be a good reuse register candidate.
+ const SCEV *Best = 0;
+ unsigned BestNum = 0;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ if (Taken.count(Reg))
+ continue;
+ if (!Best)
+ Best = Reg;
+ else {
+ unsigned Count = RegUses.getUsedByIndices(Reg).count();
+ if (Count > BestNum) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+ << " will yield profitable reuse.\n");
+ Taken.insert(Best);
+
+ // In any use with formulae which references this register, delete formulae
+ // which don't reference it.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ if (!LU.Regs.count(Best)) continue;
+
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ if (!F.referencesReg(Best)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --e;
+ --i;
+ Any = true;
+ assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
+ continue;
+ }
+ }
+
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const {
+ // Some ideas:
+ // - prune more:
+ // - use more aggressive filtering
+ // - sort the formula so that the most profitable solutions are found first
+ // - sort the uses too
+ // - search faster:
+ // - don't compute a cost, and then compare. compare while computing a cost
+ // and bail early.
+ // - track register sets with SmallBitVector
+
+ const LSRUse &LU = Uses[Workspace.size()];
+
+ // If this use references any register that's already a part of the
+ // in-progress solution, consider it a requirement that a formula must
+ // reference that register in order to be considered. This prunes out
+ // unprofitable searching.
+ SmallSetVector<const SCEV *, 4> ReqRegs;
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+ E = CurRegs.end(); I != E; ++I)
+ if (LU.Regs.count(*I))
+ ReqRegs.insert(*I);
+
+ SmallPtrSet<const SCEV *, 16> NewRegs;
+ Cost NewCost;
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+
+ // Ignore formulae which do not use any of the required registers.
+ bool SatisfiedReqReg = true;
+ for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+ JE = ReqRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ F.BaseRegs.end()) {
+ SatisfiedReqReg = false;
+ break;
+ }
+ }
+ if (!SatisfiedReqReg) {
+ // If none of the formulae satisfied the required registers, then we could
+ // clear ReqRegs and try again. Currently, we simply give up in this case.
+ continue;
+ }
+
+ // Evaluate the cost of the current formula. If it's already worse than
+ // the current best, prune the search at that point.
+ NewCost = CurCost;
+ NewRegs = CurRegs;
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ if (NewCost < SolutionCost) {
+ Workspace.push_back(&F);
+ if (Workspace.size() != Uses.size()) {
+ SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+ NewRegs, VisitedRegs);
+ if (F.getNumRegs() == 1 && Workspace.size() == 1)
+ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+ } else {
+ DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+ dbgs() << ".\n Regs:";
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator
+ I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+ dbgs() << ' ' << **I;
+ dbgs() << '\n');
+
+ SolutionCost = NewCost;
+ Solution = Workspace;
+ }
+ Workspace.pop_back();
+ }
+ }
+}
+
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+ SmallVector<const Formula *, 8> Workspace;
+ Cost SolutionCost;
+ SolutionCost.Loose();
+ Cost CurCost;
+ SmallPtrSet<const SCEV *, 16> CurRegs;
+ DenseSet<const SCEV *> VisitedRegs;
+ Workspace.reserve(Uses.size());
+
+ // SolveRecurse does all the work.
+ SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+ CurRegs, VisitedRegs);
+ if (Solution.empty()) {
+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ return;
+ }
+
+ // Ok, we've now made all our decisions.
+ DEBUG(dbgs() << "\n"
+ "The chosen solution requires "; SolutionCost.print(dbgs());
+ dbgs() << ":\n";
+ for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+ dbgs() << " ";
+ Uses[i].print(dbgs());
+ dbgs() << "\n"
+ " ";
+ Solution[i]->print(dbgs());
+ dbgs() << '\n';
+ });
+
+ assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs)
+ const {
+ for (;;) {
+ const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+ unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+ BasicBlock *IDom;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
+
+ // Don't climb into a loop though.
+ const Loop *IDomLoop = LI.getLoopFor(IDom);
+ unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+ if (IDomDepth <= IPLoopDepth &&
+ (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+ break;
+ }
+
+ bool AllDominate = true;
+ Instruction *BetterPos = 0;
+ Instruction *Tentative = IDom->getTerminator();
+ for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+ E = Inputs.end(); I != E; ++I) {
+ Instruction *Inst = *I;
+ if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+ AllDominate = false;
+ break;
+ }
+ // Attempt to find an insert position in the middle of the block,
+ // instead of at the end, so that it can be used for other expansions.
+ if (IDom == Inst->getParent() &&
+ (!BetterPos || !DT.dominates(Inst, BetterPos)))
+ BetterPos = llvm::next(BasicBlock::iterator(Inst));
+ }
+ if (!AllDominate)
+ break;
+ if (BetterPos)
+ IP = BetterPos;
+ else
+ IP = Tentative;
+ }
+
+ return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
+ const LSRFixup &LF,
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const {
+ // Collect some instructions which must be dominated by the
+ // expanding replacement. These must be dominated by any operands that
+ // will be required in the expansion.
+ SmallVector<Instruction *, 4> Inputs;
+ if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+ Inputs.push_back(I);
+ if (LU.Kind == LSRUse::ICmpZero)
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+ Inputs.push_back(I);
+ if (LF.PostIncLoops.count(L)) {
+ if (LF.isUseFullyOutsideLoop(L))
+ Inputs.push_back(L->getLoopLatch()->getTerminator());
+ else
+ Inputs.push_back(IVIncInsertPos);
+ }
+ // The expansion must also be dominated by the increment positions of any
+ // loops it for which it is using post-inc mode.
+ for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+ E = LF.PostIncLoops.end(); I != E; ++I) {
+ const Loop *PIL = *I;
+ if (PIL == L) continue;
+
+ // Be dominated by the loop exit.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ PIL->getExitingBlocks(ExitingBlocks);
+ if (!ExitingBlocks.empty()) {
+ BasicBlock *BB = ExitingBlocks[0];
+ for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+ BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+ Inputs.push_back(BB->getTerminator());
+ }
+ }
+
+ assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ && !isa<DbgInfoIntrinsic>(LowestIP) &&
+ "Insertion point must be a normal instruction");
+
+ // Then, climb up the immediate dominator tree as far as we can go while
+ // still being dominated by the input positions.
+ BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
+
+ // Don't insert instructions before PHI nodes.
+ while (isa<PHINode>(IP)) ++IP;
+
+ // Ignore landingpad instructions.
+ while (isa<LandingPadInst>(IP)) ++IP;
+
+ // Ignore debug intrinsics.
+ while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+
+ // Set IP below instructions recently inserted by SCEVExpander. This keeps the
+ // IP consistent across expansions and allows the previously inserted
+ // instructions to be reused by subsequent expansion.
+ while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+
+ return IP;
+}
+
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
+Value *LSRInstance::Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
+ const LSRUse &LU = Uses[LF.LUIdx];
+
+ // Determine an input position which will be dominated by the operands and
+ // which will dominate the result.
+ IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+
+ // Inform the Rewriter if we have a post-increment use, so that it can
+ // perform an advantageous expansion.
+ Rewriter.setPostInc(LF.PostIncLoops);
+
+ // This is the type that the user actually needs.
+ Type *OpTy = LF.OperandValToReplace->getType();
+ // This will be the type that we'll initially expand to.
+ Type *Ty = F.getType();
+ if (!Ty)
+ // No type known; just expand directly to the ultimate type.
+ Ty = OpTy;
+ else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+ // Expand directly to the ultimate type if it's the right size.
+ Ty = OpTy;
+ // This is the type to do integer arithmetic in.
+ Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+ // Build up a list of operands to add together to form the full base.
+ SmallVector<const SCEV *, 8> Ops;
+
+ // Expand the BaseRegs portion.
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ Reg = TransformForPostIncUse(Denormalize, Reg,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ }
+
+ // Expand the ScaledReg portion.
+ Value *ICmpScaledV = 0;
+ if (F.Scale != 0) {
+ const SCEV *ScaledS = F.ScaledReg;
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ } else {
+ // Otherwise just expand the scaled register and an explicit scale,
+ // which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+ ScaledS = SE.getMulExpr(ScaledS,
+ SE.getConstant(ScaledS->getType(), F.Scale));
+ Ops.push_back(ScaledS);
+ }
+ }
+
+ // Expand the GV portion.
+ if (F.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+ Ops.push_back(SE.getUnknown(F.BaseGV));
+ }
+
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the immediate portion.
+ int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
+ if (Offset != 0) {
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // The other interesting way of "folding" with an ICmpZero is to use a
+ // negated immediate.
+ if (!ICmpScaledV)
+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
+ else {
+ Ops.push_back(SE.getUnknown(ICmpScaledV));
+ ICmpScaledV = ConstantInt::get(IntTy, Offset);
+ }
+ } else {
+ // Just add the immediate values. These again are expected to be matched
+ // as part of the address.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+ }
+ }
+
+ // Expand the unfolded offset portion.
+ int64_t UnfoldedOffset = F.UnfoldedOffset;
+ if (UnfoldedOffset != 0) {
+ // Just add the immediate values.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
+ UnfoldedOffset)));
+ }
+
+ // Emit instructions summing all the operands.
+ const SCEV *FullS = Ops.empty() ?
+ SE.getConstant(IntTy, 0) :
+ SE.getAddExpr(Ops);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+ // We're done expanding now, so reset the rewriter.
+ Rewriter.clearPostInc();
+
+ // An ICmpZero Formula represents an ICmp which we're handling as a
+ // comparison against zero. Now that we've expanded an expression for that
+ // form, update the ICmp's other operand.
+ if (LU.Kind == LSRUse::ICmpZero) {
+ ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+ DeadInsts.push_back(CI->getOperand(1));
+ assert(!F.BaseGV && "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ if (F.Scale == -1) {
+ if (ICmpScaledV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+ OpTy, false),
+ ICmpScaledV, OpTy, "tmp", CI);
+ ICmpScaledV = Cast;
+ }
+ CI->setOperand(1, ICmpScaledV);
+ } else {
+ assert(F.Scale == 0 &&
+ "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+ -(uint64_t)Offset);
+ if (C->getType() != OpTy)
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ OpTy, false),
+ C, OpTy);
+
+ CI->setOperand(1, C);
+ }
+ }
+
+ return FullV;
+}
+
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator())) {
+ BasicBlock *Parent = PN->getParent();
+ Loop *PNLoop = LI.getLoopFor(Parent);
+ if (!PNLoop || Parent != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = 0;
+ if (!Parent->isLandingPad()) {
+ NewBB = SplitCriticalEdge(BB, Parent, P,
+ /*MergeIdenticalEdges=*/true,
+ /*DontDeleteUselessPhis=*/true);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ NewBB = NewBBs[0];
+ }
+ // If NewBB==NULL, then SplitCriticalEdge refused to split because all
+ // phi predecessors are identical. The simple thing to do is skip
+ // splitting in this case rather than complicate the API.
+ if (NewBB) {
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
+ }
+ }
+
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
+ }
+ }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ // First, find an insertion point that dominates UserInst. For PHI nodes,
+ // find the nearest block which dominates all the relevant uses.
+ if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ } else {
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+ FullV, OpTy, "tmp", LF.UserInst);
+ FullV = Cast;
+ }
+
+ // Update the user. ICmpZero is handled specially here (for now) because
+ // Expand may have updated one of the operands of the icmp already, and
+ // its new value may happen to be equal to LF.OperandValToReplace, in
+ // which case doing replaceUsesOfWith leads to replacing both operands
+ // with the same value. TODO: Reorganize this.
+ if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+ LF.UserInst->setOperand(0, FullV);
+ else
+ LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
+ }
+
+ DeadInsts.push_back(LF.OperandValToReplace);
+}
+
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P) {
+ // Keep track of instructions we may have made dead, so that
+ // we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ SCEVExpander Rewriter(SE, "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
+ Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+
+ // Mark phi nodes that terminate chains so the expander tries to reuse them.
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
+ Rewriter.setChainedPhi(PN);
+ }
+
+ // Expand the new value definitions and update the users.
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ const LSRFixup &Fixup = *I;
+
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+
+ Changed = true;
+ }
+
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+ Changed = true;
+ }
+ // Clean up after ourselves. This must be done before deleting any
+ // instructions.
+ Rewriter.clear();
+
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+ IVIncInsertPos(0) {
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return;
+
+ // If there's no interesting work to be done, bail early.
+ if (IU.empty()) return;
+
+ // If there's too much analysis to be done, bail early. We won't be able to
+ // model the problem anyway.
+ unsigned NumUsers = 0;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ if (++NumUsers > MaxIVUsers) {
+ DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
+ << "\n");
+ return;
+ }
+ }
+
+#ifndef NDEBUG
+ // All dominating loops must have preheaders, or SCEVExpander may not be able
+ // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
+ //
+ // IVUsers analysis should only create users that are dominated by simple loop
+ // headers. Since this loop should dominate all of its users, its user list
+ // should be empty if this loop itself is not within a simple loop nest.
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *BB = Rung->getBlock();
+ const Loop *DomLoop = LI.getLoopFor(BB);
+ if (DomLoop && DomLoop->getHeader() == BB) {
+ assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
+ }
+ }
+#endif // DEBUG
+
+ DEBUG(dbgs() << "\nLSR on loop ";
+ WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ dbgs() << ":\n");
+
+ // First, perform some low-level loop optimizations.
+ OptimizeShadowIV();
+ OptimizeLoopTermCond();
+
+ // If loop preparation eliminates all interesting IV users, bail.
+ if (IU.empty()) return;
+
+ // Skip nested loops until we can model them better with formulae.
+ if (!L->empty()) {
+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ return;
+ }
+
+ // Start collecting data and preparing for the solver.
+ CollectChains();
+ CollectInterestingTypesAndFactors();
+ CollectFixupsAndInitialFormulae();
+ CollectLoopInvariantFixupsAndFormulae();
+
+ assert(!Uses.empty() && "IVUsers reported at least one use");
+ DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+ print_uses(dbgs()));
+
+ // Now use the reuse data to generate a bunch of interesting ways
+ // to formulate the values needed for the uses.
+ GenerateAllReuseFormulae();
+
+ FilterOutUndesirableDedicatedRegisters();
+ NarrowSearchSpaceUsingHeuristics();
+
+ SmallVector<const Formula *, 8> Solution;
+ Solve(Solution);
+
+ // Release memory that is no longer needed.
+ Factors.clear();
+ Types.clear();
+ RegUses.clear();
+
+ if (Solution.empty())
+ return;
+
+#ifndef NDEBUG
+ // Formulae should be legal.
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+ I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end();
+ J != JE; ++J)
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ *J) && "Illegal formula generated!");
+ };
+#endif
+
+ // Now that we've decided what we want, make it so.
+ ImplementSolution(Solution, P);
+}
+
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+ if (Factors.empty() && Types.empty()) return;
+
+ OS << "LSR has identified the following interesting factors and types: ";
+ bool First = true;
+
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '*' << *I;
+ }
+
+ for (SmallSetVector<Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '(' << **I << ')';
+ }
+ OS << '\n';
+}
+
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+ OS << "LSR is examining the following fixup sites:\n";
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ dbgs() << " ";
+ I->print(OS);
+ OS << '\n';
+ }
+}
+
+void LSRInstance::print_uses(raw_ostream &OS) const {
+ OS << "LSR is examining the following uses:\n";
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ dbgs() << " ";
+ LU.print(OS);
+ OS << '\n';
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J) {
+ OS << " ";
+ J->print(OS);
+ OS << '\n';
+ }
+ }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+ print_factors_and_types(OS);
+ print_fixups(OS);
+ print_uses(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LSRInstance::dump() const {
+ print(errs()); errs() << '\n';
+}
+#endif
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopStrengthReduce();
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+
+
+Pass *llvm::createLoopStrengthReducePass() {
+ return new LoopStrengthReduce();
+}
+
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ // Requiring LoopSimplify a second time here prevents IVUsers from running
+ // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+ AU.addRequired<TargetTransformInfo>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ bool Changed = false;
+
+ // Run the main LSR transformation.
+ Changed |= LSRInstance(L, this).getChanged();
+
+ // Remove any extra phis created by processing inner loops.
+ Changed |= DeleteDeadPHIs(L->getHeader());
+ if (EnablePhiElim && L->isLoopSimplifyForm()) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ unsigned numFolded =
+ Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+ DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
+ if (numFolded) {
+ Changed = true;
+ DeleteTriviallyDeadInstructions(DeadInsts);
+ DeleteDeadPHIs(L->getHeader());
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 000000000000..80d060b926ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,238 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller. It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+ cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
+
+static cl::opt<bool>
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
+
+namespace {
+ class LoopUnroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+ CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+ CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+ UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// A magic value for use with the Threshold parameter to indicate
+ /// that the loop unroll should be performed regardless of how much
+ /// code expansion would result.
+ static const unsigned NoThreshold = UINT_MAX;
+
+ // Threshold to use when optsize is specified (and there is no
+ // explicit -unroll-threshold).
+ static const unsigned OptSizeUnrollThreshold = 50;
+
+ // Default unroll count for loops with run-time trip count if
+ // -unroll-count is not set
+ static const unsigned UnrollRuntimeCount = 8;
+
+ unsigned CurrentCount;
+ unsigned CurrentThreshold;
+ bool CurrentAllowPartial;
+ bool UserThreshold; // CurrentThreshold is user-specified.
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTree>();
+ }
+ };
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+ return new LoopUnroll(Threshold, Count, AllowPartial);
+}
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ bool &NotDuplicatable,
+ const TargetTransformInfo &TTI) {
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I, TTI);
+ NumCalls = Metrics.NumInlineCandidates;
+ NotDuplicatable = Metrics.notDuplicatable;
+
+ unsigned LoopSize = Metrics.NumInsts;
+
+ // Don't allow an estimate of size zero. This would allows unrolling of loops
+ // with huge iteration counts, which is a compile time problem even if it's
+ // not a problem for code quality.
+ if (LoopSize == 0) LoopSize = 1;
+
+ return LoopSize;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+ << "] Loop %" << Header->getName() << "\n");
+ (void)Header;
+
+ // Determine the current unrolling threshold. While this is normally set
+ // from UnrollThreshold, it is overridden to a smaller value if the current
+ // function is marked as optimize-for-size, and the unroll threshold was
+ // not user specified.
+ unsigned Threshold = CurrentThreshold;
+ if (!UserThreshold &&
+ Header->getParent()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
+ Threshold = OptSizeUnrollThreshold;
+
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ }
+ // Use a default unroll-count if the user doesn't specify a value
+ // and the trip count is a run-time value. The default is different
+ // for run-time or compile-time trip count loops.
+ unsigned Count = CurrentCount;
+ if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ Count = UnrollRuntimeCount;
+
+ if (Count == 0) {
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ if (TripCount == 0)
+ return false;
+ Count = TripCount;
+ }
+
+ // Enforce the threshold.
+ if (Threshold != NoThreshold) {
+ unsigned NumInlineCandidates;
+ bool notDuplicatable;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
+ notDuplicatable, TTI);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (notDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
+ << " instructions.\n");
+ return false;
+ }
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
+ uint64_t Size = (uint64_t)LoopSize*Count;
+ if (TripCount != 1 && Size > Threshold) {
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+ if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+ DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ return false;
+ }
+ if (TripCount) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = Threshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0)
+ Count--;
+ }
+ else if (UnrollRuntime) {
+ // Reduce unroll count to be a lower power-of-two value
+ while (Count != 0 && Size > Threshold) {
+ Count >>= 1;
+ Size = LoopSize*Count;
+ }
+ }
+ if (Count < 2) {
+ DEBUG(dbgs() << " could not unroll partially\n");
+ return false;
+ }
+ DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ }
+ }
+
+ // Unroll the loop.
+ if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+ return false;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 000000000000..0e8199f2fd5c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1289 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops. For example, it turns the left into the right code:
+//
+// for (...) if (lic)
+// A for (...)
+// if (lic) A; B; C
+// B else
+// C for (...)
+// A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+STATISTIC(TotalInsts, "Total number of instructions analyzed");
+
+// The specific value of 100 here was chosen based only on intuition and a
+// few specific examples.
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+ cl::init(100), cl::Hidden);
+
+namespace {
+
+ class LUAnalysisCache {
+
+ typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> >
+ UnswitchedValsMap;
+
+ typedef UnswitchedValsMap::iterator UnswitchedValsIt;
+
+ struct LoopProperties {
+ unsigned CanBeUnswitchedCount;
+ unsigned SizeEstimation;
+ UnswitchedValsMap UnswitchedVals;
+ };
+
+ // Here we use std::map instead of DenseMap, since we need to keep valid
+ // LoopProperties pointer for current loop for better performance.
+ typedef std::map<const Loop*, LoopProperties> LoopPropsMap;
+ typedef LoopPropsMap::iterator LoopPropsMapIt;
+
+ LoopPropsMap LoopsProperties;
+ UnswitchedValsMap* CurLoopInstructions;
+ LoopProperties* CurrentLoopProperties;
+
+ // Max size of code we can produce on remained iterations.
+ unsigned MaxSize;
+
+ public:
+
+ LUAnalysisCache() :
+ CurLoopInstructions(NULL), CurrentLoopProperties(NULL),
+ MaxSize(Threshold)
+ {}
+
+ // Analyze loop. Check its size, calculate is it possible to unswitch
+ // it. Returns true if we can unswitch this loop.
+ bool countLoop(const Loop* L, const TargetTransformInfo &TTI);
+
+ // Clean all data related to given loop.
+ void forgetLoop(const Loop* L);
+
+ // Mark case value as unswitched.
+ // Since SI instruction can be partly unswitched, in order to avoid
+ // extra unswitching in cloned loops keep track all unswitched values.
+ void setUnswitched(const SwitchInst* SI, const Value* V);
+
+ // Check was this case value unswitched before or not.
+ bool isUnswitched(const SwitchInst* SI, const Value* V);
+
+ // Clone all loop-unswitch related loop properties.
+ // Redistribute unswitching quotas.
+ // Note, that new loop data is stored inside the VMap.
+ void cloneData(const Loop* NewLoop, const Loop* OldLoop,
+ const ValueToValueMapTy& VMap);
+ };
+
+ class LoopUnswitch : public LoopPass {
+ LoopInfo *LI; // Loop information
+ LPPassManager *LPM;
+
+ // LoopProcessWorklist - Used to check if second loop needs processing
+ // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ std::vector<Loop*> LoopProcessWorklist;
+
+ LUAnalysisCache BranchesInfo;
+
+ bool OptimizeForSize;
+ bool redoLoop;
+
+ Loop *currentLoop;
+ DominatorTree *DT;
+ BasicBlock *loopHeader;
+ BasicBlock *loopPreheader;
+
+ // LoopBlocks contains all of the basic blocks of the loop, including the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
+ // loop, in that order.
+ std::vector<BasicBlock*> LoopBlocks;
+ // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+ std::vector<BasicBlock*> NewBlocks;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {
+ initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool processCurrentLoop();
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+ private:
+
+ virtual void releaseMemory() {
+ BranchesInfo.forgetLoop(currentLoop);
+ }
+
+ /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+ /// remove it.
+ void RemoveLoopFromWorklist(Loop *L) {
+ std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+ LoopProcessWorklist.end(), L);
+ if (I != LoopProcessWorklist.end())
+ LoopProcessWorklist.erase(I);
+ }
+
+ void initLoopData() {
+ loopHeader = currentLoop->getHeader();
+ loopPreheader = currentLoop->getLoopPreheader();
+ }
+
+ /// Split all of the edges from inside the loop to their exit blocks.
+ /// Update the appropriate Phi nodes as we do so.
+ void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+ BasicBlock *ExitBlock);
+ void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+ void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val, bool isEqual);
+
+ void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt);
+
+ void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+ void RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist, Loop *l);
+ void RemoveLoopFromHierarchy(Loop *L);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+ BasicBlock **LoopExit = 0);
+
+ };
+}
+
+// Analyze loop. Check its size, calculate is it possible to unswitch
+// it. Returns true if we can unswitch this loop.
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
+
+ std::pair<LoopPropsMapIt, bool> InsertRes =
+ LoopsProperties.insert(std::make_pair(L, LoopProperties()));
+
+ LoopProperties& Props = InsertRes.first->second;
+
+ if (InsertRes.second) {
+ // New loop.
+
+ // Limit the number of instructions to avoid causing significant code
+ // expansion, and the number of basic blocks, to avoid loops with
+ // large numbers of branches which cause loop unswitching to go crazy.
+ // This is a very ad-hoc heuristic.
+
+ // FIXME: This is overly conservative because it does not take into
+ // consideration code simplification opportunities and code that can
+ // be shared by the resultant unswitched loops.
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(),
+ E = L->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I, TTI);
+
+ Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+ Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+ MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+
+ if (Metrics.notDuplicatable) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << L->getHeader()->getName() << ", contents cannot be "
+ << "duplicated!\n");
+ return false;
+ }
+ }
+
+ if (!Props.CanBeUnswitchedCount) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << L->getHeader()->getName() << ", cost too high: "
+ << L->getBlocks().size() << "\n");
+
+ return false;
+ }
+
+ // Be careful. This links are good only before new loop addition.
+ CurrentLoopProperties = &Props;
+ CurLoopInstructions = &Props.UnswitchedVals;
+
+ return true;
+}
+
+// Clean all data related to given loop.
+void LUAnalysisCache::forgetLoop(const Loop* L) {
+
+ LoopPropsMapIt LIt = LoopsProperties.find(L);
+
+ if (LIt != LoopsProperties.end()) {
+ LoopProperties& Props = LIt->second;
+ MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+ LoopsProperties.erase(LIt);
+ }
+
+ CurrentLoopProperties = NULL;
+ CurLoopInstructions = NULL;
+}
+
+// Mark case value as unswitched.
+// Since SI instruction can be partly unswitched, in order to avoid
+// extra unswitching in cloned loops keep track all unswitched values.
+void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) {
+ (*CurLoopInstructions)[SI].insert(V);
+}
+
+// Check was this case value unswitched before or not.
+bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
+ return (*CurLoopInstructions)[SI].count(V);
+}
+
+// Clone all loop-unswitch related loop properties.
+// Redistribute unswitching quotas.
+// Note, that new loop data is stored inside the VMap.
+void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
+ const ValueToValueMapTy& VMap) {
+
+ LoopProperties& NewLoopProps = LoopsProperties[NewLoop];
+ LoopProperties& OldLoopProps = *CurrentLoopProperties;
+ UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals;
+
+ // Reallocate "can-be-unswitched quota"
+
+ --OldLoopProps.CanBeUnswitchedCount;
+ unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
+ NewLoopProps.CanBeUnswitchedCount = Quota / 2;
+ OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
+
+ NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
+
+ // Clone unswitched values info:
+ // for new loop switches we clone info about values that was
+ // already unswitched and has redundant successors.
+ for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
+ const SwitchInst* OldInst = I->first;
+ Value* NewI = VMap.lookup(OldInst);
+ const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI);
+ assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
+
+ NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
+ }
+}
+
+char LoopUnswitch::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+
+ // We started analyze new instruction, increment scanned instructions counter.
+ ++TotalInsts;
+
+ // We can never unswitch on vector conditions.
+ if (Cond->getType()->isVectorTy())
+ return 0;
+
+ // Constants should be folded, not unswitched on!
+ if (isa<Constant>(Cond)) return 0;
+
+ // TODO: Handle: br (VARIANT|INVARIANT).
+
+ // Hoist simple values out.
+ if (L->makeLoopInvariant(Cond, Changed))
+ return Cond;
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+ if (BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or) {
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+ return LHS;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+ return RHS;
+ }
+
+ return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ LI = &getAnalysis<LoopInfo>();
+ LPM = &LPM_Ref;
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ currentLoop = L;
+ Function *F = currentLoop->getHeader()->getParent();
+ bool Changed = false;
+ do {
+ assert(currentLoop->isLCSSAForm(*DT));
+ redoLoop = false;
+ Changed |= processCurrentLoop();
+ } while(redoLoop);
+
+ if (Changed) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DT)
+ DT->runOnFunction(*F);
+ }
+ return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+ bool Changed = false;
+
+ initLoopData();
+
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
+ // Loops with indirectbr cannot be cloned.
+ if (!currentLoop->isSafeToClone())
+ return false;
+
+ // Without dedicated exits, splitting the exit edge may fail.
+ if (!currentLoop->hasDedicatedExits())
+ return false;
+
+ LLVMContext &Context = loopHeader->getContext();
+
+ // Probably we reach the quota of branches for this loop. If so
+ // stop unswitching.
+ if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
+ return false;
+
+ // Loop over all of the basic blocks in the loop. If we find an interior
+ // block that is branching on a loop-invariant condition, we can unswitch this
+ // loop.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ // If this isn't branching on an invariant condition, we can't unswitch
+ // it.
+ if (BI->isConditional()) {
+ // See if this, or some part of it, is loop invariant. If so, we can
+ // unswitch on it if we desire.
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumBranches;
+ return true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases) {
+ // Find a value to unswitch on:
+ // FIXME: this should chose the most expensive case!
+ // FIXME: scan for a case with a non-critical edge?
+ Constant *UnswitchVal = NULL;
+
+ // Do not process same value again and again.
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ Constant* UnswitchValCandidate = i.getCaseValue();
+ if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
+ continue;
+
+ if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+ ++NumSwitches;
+ return true;
+ }
+ }
+ }
+
+ // Scan the instructions to check for unswitchable values.
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ BBI != E; ++BBI)
+ if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumSelects;
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
+///
+/// If true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+ BasicBlock *&ExitBB,
+ std::set<BasicBlock*> &Visited) {
+ if (!Visited.insert(BB).second) {
+ // Already visited. Without more analysis, this could indicate an infinite
+ // loop.
+ return false;
+ } else if (!L->contains(BB)) {
+ // Otherwise, this is a loop exit, this is fine so long as this is the
+ // first exit.
+ if (ExitBB != 0) return false;
+ ExitBB = BB;
+ return true;
+ }
+
+ // Otherwise, this is an unvisited intra-loop node. Check all successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+ // Check to see if the successor is a trivial loop exit.
+ if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+ return false;
+ }
+
+ // Okay, everything after this looks good, check to make sure that this block
+ // doesn't include any side effects.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+
+ return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the
+/// process. If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+ std::set<BasicBlock*> Visited;
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
+ BasicBlock *ExitBB = 0;
+ if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+ return ExitBB;
+ return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all. If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false. When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+ BasicBlock **LoopExit) {
+ BasicBlock *Header = currentLoop->getHeader();
+ TerminatorInst *HeaderTerm = Header->getTerminator();
+ LLVMContext &Context = Header->getContext();
+
+ BasicBlock *LoopExitBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+ // If the header block doesn't end with a conditional branch on Cond, we
+ // can't handle it.
+ if (!BI->isConditional() || BI->getCondition() != Cond)
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ if (Val) *Val = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ if (Val) *Val = ConstantInt::getFalse(Context);
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+ // If this isn't a switch on Cond, we can't handle it.
+ if (SI->getCondition() != Cond) return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ i.getCaseSuccessor()))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ ConstantInt* CaseVal = i.getCaseValue();
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (BranchesInfo.isUnswitched(SI, CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
+ break;
+ }
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit block
+ // contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ if (LoopExit) *LoopExit = LoopExitBB;
+
+ // We already know that nothing uses any scalar values defined inside of this
+ // loop. As such, we just have to check to see if this loop will execute any
+ // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+ // part of the loop that the code *would* execute. We already checked the
+ // tail, check the header now.
+ for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+ return true;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+ Function *F = loopHeader->getParent();
+ Constant *CondVal = 0;
+ BasicBlock *ExitBlock = 0;
+
+ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+ // If the condition is trivial, always unswitch. There is no code growth
+ // for this case.
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+ return true;
+ }
+
+ // Check to see if it would be profitable to unswitch current loop.
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ if (OptimizeForSize ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
+ return false;
+
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+ return true;
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop *New = new Loop();
+ LPM->insertLoop(New, PL);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+ // Add all of the subloops to the new loop.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ CloneLoop(*I, New, VM, LI, LPM);
+
+ return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt) {
+ // Insert a conditional branch on LIC to the two preheaders. The original
+ // code is the true version and the new code is the false version.
+ Value *BranchVal = LIC;
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
+ else if (Val != ConstantInt::getTrue(Val->getContext()))
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+
+ // Insert the new branch.
+ BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+ // If either edge is critical, split it. This helps preserve LoopSimplify
+ // form for enclosing loops.
+ SplitCriticalEdge(BI, 0, this, false, false, true);
+ SplitCriticalEdge(BI, 1, this, false, false, true);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no
+/// side-effects), unswitch it. This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
+ BasicBlock *ExitBlock) {
+ DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << L->getHeader()->getParent()->getName()
+ << " on cond: " << *Val << " == " << *Cond << "\n");
+
+ // First step, split the preheader, so that we know that there is a safe place
+ // to insert the conditional branch. We will change loopPreheader to have a
+ // conditional branch on Cond.
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we should
+ // short-circuit to.
+
+ // Split this block now, so that the loop maintains its exit block, and so
+ // that the jump from the preheader can execute the contents of the exit block
+ // without actually branching to it (the exit block should be dominated by the
+ // loop header, not the preheader).
+ assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+ BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+
+ // Okay, now we have a position to branch from and a position to branch to,
+ // insert the new conditional branch.
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ loopPreheader->getTerminator());
+ LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+ loopPreheader->getTerminator()->eraseFromParent();
+
+ // We need to reprocess this loop, it could be unswitched again.
+ redoLoop = true;
+
+ // Now that we know that the loop is never entered when this condition is a
+ // particular value, rewrite the loop with this info. We know that this will
+ // at least eliminate the old branch.
+ RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+ ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks. Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L,
+ const SmallVector<BasicBlock *, 8> &ExitBlocks){
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+
+ // Although SplitBlockPredecessors doesn't preserve loop-simplify in
+ // general, if we call it on all predecessors of all exits then it does.
+ if (!ExitBlock->isLandingPad()) {
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
+ this, NewBBs);
+ }
+ }
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// condition outside of either loop. Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+ Loop *L) {
+ Function *F = loopHeader->getParent();
+ DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << F->getName()
+ << " when '" << *Val << "' == " << *LIC << "\n");
+
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ LoopBlocks.clear();
+ NewBlocks.clear();
+
+ // First step, split the preheader and exit blocks, and add these blocks to
+ // the LoopBlocks list.
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+ LoopBlocks.push_back(NewPreheader);
+
+ // We want the loop to come after the preheader, but before the exit blocks.
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Split all of the edges from inside the loop to their exit blocks. Update
+ // the appropriate Phi nodes as we do so.
+ SplitExitEdges(L, ExitBlocks);
+
+ // The exit blocks may have been changed due to edge splitting, recompute.
+ ExitBlocks.clear();
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Add exit blocks to the loop blocks.
+ LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+ // Next step, clone all of the basic blocks that make up the loop (including
+ // the loop preheader and exit blocks), keeping track of the mapping between
+ // the instructions and blocks.
+ NewBlocks.reserve(LoopBlocks.size());
+ ValueToValueMapTy VMap;
+ for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
+ NewBlocks.push_back(NewBB);
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
+ }
+
+ // Splice the newly inserted blocks into the function right before the
+ // original preheader.
+ F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Now we create the new Loop object for the versioned loop.
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+
+ // Recalculate unswitching quota, inherit simplified switches info for NewBB,
+ // Probably clone more loop-unswitch related loop properties.
+ BranchesInfo.cloneData(NewLoop, L, VMap);
+
+ Loop *ParentLoop = L->getParentLoop();
+ if (ParentLoop) {
+ // Make sure to add the cloned preheader and exit blocks to the parent loop
+ // as well.
+ ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+ }
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+ // The new exit block should be in the same loop as the old one.
+ if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+ ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+
+ assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+ "Exit block should have been split to have one successor!");
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+ // If the successor of the exit block had PHI nodes, add an entry for
+ // NewExit.
+ PHINode *PN;
+ for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+ PN = cast<PHINode>(I);
+ Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+ ValueToValueMapTy::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ PN->addIncoming(V, NewExit);
+ }
+
+ if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
+ PN = PHINode::Create(LPad->getType(), 0, "",
+ ExitSucc->getFirstInsertionPt());
+
+ for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ LandingPadInst *LPI = BB->getLandingPadInst();
+ LPI->replaceAllUsesWith(PN);
+ PN->addIncoming(LPI, BB);
+ }
+ }
+ }
+
+ // Rewrite the code to refer to itself.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // Rewrite the original preheader to select between versions of the loop.
+ BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+ "Preheader splitting did not work correctly!");
+
+ // Emit the new branch that selects between the two versions of this loop.
+ EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+ LPM->deleteSimpleAnalysisValue(OldBR, L);
+ OldBR->eraseFromParent();
+
+ LoopProcessWorklist.push_back(NewLoop);
+ redoLoop = true;
+
+ // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody
+ // deletes the instruction (for example by simplifying a PHI that feeds into
+ // the condition that we're unswitching on), we don't rewrite the second
+ // iteration.
+ WeakVH LICHandle(LIC);
+
+ // Now we rewrite the original code to know that the condition is true and the
+ // new code to know that the condition is false.
+ RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't simplify
+ // it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I,
+ std::vector<Instruction*> &Worklist) {
+
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
+ Worklist.end());
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
+ std::vector<Instruction*> &Worklist,
+ Loop *L, LPPassManager *LPM) {
+ DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+
+ // Add users to the worklist which may be simplified now.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
+ ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist,
+ Loop *L) {
+ if (pred_begin(BB) != pred_end(BB)) {
+ // This block isn't dead, since an edge to BB was just removed, see if there
+ // are any easy simplifications we can do now.
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If it has one pred, fold phi nodes in BB.
+ while (isa<PHINode>(BB->begin()))
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
+ Worklist, L, LPM);
+
+ // If this is the header of a loop and the only pred is the latch, we now
+ // have an unreachable loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ if (loopHeader == BB && L->contains(Pred)) {
+ // Remove the branch from the latch to the header block, this makes
+ // the header dead, which will make the latch dead (because the header
+ // dominates the latch).
+ LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+ Pred->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), Pred);
+
+ // The loop is now broken, remove it from LI.
+ RemoveLoopFromHierarchy(L);
+
+ // Reprocess the header, which now IS dead.
+ RemoveBlockIfDead(BB, Worklist, L);
+ return;
+ }
+
+ // If pred ends in a uncond branch, add uncond branch to worklist so that
+ // the two blocks will get merged.
+ if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->isUnconditional())
+ Worklist.push_back(BI);
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Nuking dead block: " << *BB);
+
+ // Remove the instructions in the basic block from the worklist.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ RemoveFromWorklist(I, Worklist);
+
+ // Anything that uses the instructions in this basic block should have their
+ // uses replaced with undefs.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // If this is the edge to the header block for a loop, remove the loop and
+ // promote all subloops.
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ if (BBLoop->getLoopLatch() == BB) {
+ RemoveLoopFromHierarchy(BBLoop);
+ if (currentLoop == BBLoop) {
+ currentLoop = 0;
+ redoLoop = false;
+ }
+ }
+ }
+
+ // Remove the block from the loop info, which removes it from any loops it
+ // was in.
+ LI->removeBlock(BB);
+
+
+ // Remove phi node entries in successors for this block.
+ TerminatorInst *TI = BB->getTerminator();
+ SmallVector<BasicBlock*, 4> Succs;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ Succs.push_back(TI->getSuccessor(i));
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+
+ // Unique the successors, remove anything with multiple uses.
+ array_pod_sort(Succs.begin(), Succs.end());
+ Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+
+ // Remove the basic block, including all of the instructions contained in it.
+ LPM->deleteSimpleAnalysisValue(BB, L);
+ BB->eraseFromParent();
+ // Remove successor blocks here that are not dead, so that we know we only
+ // have dead blocks in this list. Nondead blocks have a way of becoming dead,
+ // then getting removed before we revisit them, which is badness.
+ //
+ for (unsigned i = 0; i != Succs.size(); ++i)
+ if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+ // One exception is loop headers. If this block was the preheader for a
+ // loop, then we DO want to visit the loop so the loop gets deleted.
+ // We know that if the successor is a loop header, that this loop had to
+ // be the preheader: the case where this was the latch block was handled
+ // above and headers can only have two predecessors.
+ if (!LI->isLoopHeader(Succs[i])) {
+ Succs.erase(Succs.begin()+i);
+ --i;
+ }
+ }
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed. If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops. If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+ LPM->deleteLoopFromQueue(L);
+ RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value. Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val,
+ bool IsEqual) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // FIXME: Support correlated properties, like:
+ // for (...)
+ // if (li1 < li2)
+ // ...
+ // if (li1 > li2)
+ // ...
+
+ // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
+ // selects, switches.
+ std::vector<Instruction*> Worklist;
+ LLVMContext &Context = Val->getContext();
+
+
+ // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+ // in the loop with the appropriate one directly.
+ if (IsEqual || (isa<ConstantInt>(Val) &&
+ Val->getType()->isIntegerTy(1))) {
+ Value *Replacement;
+ if (IsEqual)
+ Replacement = Val;
+ else
+ Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
+ !cast<ConstantInt>(Val)->getZExtValue());
+
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
+ continue;
+ Worklist.push_back(U);
+ }
+
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+ UI != Worklist.end(); ++UI)
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
+ SimplifyCode(Worklist, L);
+ return;
+ }
+
+ // Otherwise, we don't know the precise value of LIC, but we do know that it
+ // is certainly NOT "Val". As such, simplify any uses in the loop that we
+ // can. This case occurs when we unswitch switch statements.
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
+ continue;
+
+ Worklist.push_back(U);
+
+ // TODO: We could do other simplifications, for example, turning
+ // 'icmp eq LIC, Val' -> false.
+
+ // If we know that LIC is not Val, use this info to simplify code.
+ SwitchInst *SI = dyn_cast<SwitchInst>(U);
+ if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+
+ SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ // Default case is live for multiple values.
+ if (DeadCase == SI->case_default()) continue;
+
+ // Found a dead case value. Don't remove PHI nodes in the
+ // successor if they become single-entry, those PHI nodes may
+ // be in the Users list.
+
+ BasicBlock *Switch = SI->getParent();
+ BasicBlock *SISucc = DeadCase.getCaseSuccessor();
+ BasicBlock *Latch = L->getLoopLatch();
+
+ BranchesInfo.setUnswitched(SI, Val);
+
+ if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
+ // If the DeadCase successor dominates the loop latch, then the
+ // transformation isn't safe since it will delete the sole predecessor edge
+ // to the latch.
+ if (Latch && DT->dominates(SISucc, Latch))
+ continue;
+
+ // FIXME: This is a hack. We need to keep the successor around
+ // and hooked up so as to preserve the loop structure, because
+ // trying to update it is complicated. So instead we preserve the
+ // loop structure and put the block on a dead code path.
+ SplitEdge(Switch, SISucc, this);
+ // Compute the successors instead of relying on the return value
+ // of SplitEdge, since it may have split the switch successor
+ // after PHI nodes.
+ BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
+ BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+ // Create an "unreachable" destination.
+ BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+ Switch->getParent(),
+ OldSISucc);
+ new UnreachableInst(Context, Abort);
+ // Force the new case destination to branch to the "unreachable"
+ // block while maintaining a (dead) CFG edge to the old block.
+ NewSISucc->getTerminator()->eraseFromParent();
+ BranchInst::Create(Abort, OldSISucc,
+ ConstantInt::getTrue(Context), NewSISucc);
+ // Release the PHI operands for this edge.
+ for (BasicBlock::iterator II = NewSISucc->begin();
+ PHINode *PN = dyn_cast<PHINode>(II); ++II)
+ PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+ UndefValue::get(PN->getType()));
+ // Tell the domtree about the new block. We don't fully update the
+ // domtree here -- instead we force it to do a full recomputation
+ // after the pass is complete -- but we do need to inform it of
+ // new blocks.
+ if (DT)
+ DT->addNewBlock(Abort, NewSISucc);
+ }
+
+ SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible. Note that this is effectively a very simple loop-structure-aware
+/// optimizer. During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ // Simple DCE.
+ if (isInstructionTriviallyDead(I)) {
+ DEBUG(dbgs() << "Remove dead instruction '" << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ // See if instruction simplification can hack this up. This is common for
+ // things like "select false, X, Y" after unswitching made the condition be
+ // 'false'. TODO: update the domtree properly so we can pass it here.
+ if (Value *V = SimplifyInstruction(I))
+ if (LI->replacementPreservesLCSSAForm(I, V)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
+ // Special case hacks that appear commonly in unswitched code.
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (BI->isUnconditional()) {
+ // If BI's parent is the only pred of the successor, fold the two blocks
+ // together.
+ BasicBlock *Pred = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+ BasicBlock *SinglePred = Succ->getSinglePredecessor();
+ if (!SinglePred) continue; // Nothing to do.
+ assert(SinglePred == Pred && "CFG broken");
+
+ DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
+ << Succ->getName() << "\n");
+
+ // Resolve any single entry PHI nodes in Succ.
+ while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+
+ // If Succ has any successors with PHI nodes, update them to have
+ // entries coming from Pred instead of Succ.
+ Succ->replaceAllUsesWith(Pred);
+
+ // Move all of the successor contents from Succ to Pred.
+ Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+ Succ->end());
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+
+ // Remove Succ from the loop tree.
+ LI->removeBlock(Succ);
+ LPM->deleteSimpleAnalysisValue(Succ, L);
+ Succ->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+ // Conditional branch. Turn it into an unconditional branch, then
+ // remove dead blocks.
+ continue; // FIXME: Enable.
+
+ DEBUG(dbgs() << "Folded branch: " << *BI);
+ BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+ BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+ DeadSucc->removePredecessor(BI->getParent(), true);
+ Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+ ++NumSimplify;
+
+ RemoveBlockIfDead(DeadSucc, Worklist, L);
+ }
+ continue;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..8ced4946c832
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,142 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI->getParent(), CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CXI->replaceAllUsesWith(Orig);
+ CXI->eraseFromParent();
+ return true;
+}
+
+static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+
+ switch (RMWI->getOperation()) {
+ default: llvm_unreachable("Unexpected RMW operation");
+ case AtomicRMWInst::Xchg:
+ Res = Val;
+ break;
+ case AtomicRMWInst::Add:
+ Res = Builder.CreateAdd(Orig, Val);
+ break;
+ case AtomicRMWInst::Sub:
+ Res = Builder.CreateSub(Orig, Val);
+ break;
+ case AtomicRMWInst::And:
+ Res = Builder.CreateAnd(Orig, Val);
+ break;
+ case AtomicRMWInst::Nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
+ break;
+ case AtomicRMWInst::Or:
+ Res = Builder.CreateOr(Orig, Val);
+ break;
+ case AtomicRMWInst::Xor:
+ Res = Builder.CreateXor(Orig, Val);
+ break;
+ case AtomicRMWInst::Max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::Min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Orig, Val);
+ break;
+ case AtomicRMWInst::UMax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::UMin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Orig, Val);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
+
+static bool LowerFenceInst(FenceInst *FI) {
+ FI->eraseFromParent();
+ return true;
+}
+
+static bool LowerLoadInst(LoadInst *LI) {
+ LI->setAtomic(NotAtomic);
+ return true;
+}
+
+static bool LowerStoreInst(StoreInst *SI) {
+ SI->setAtomic(NotAtomic);
+ return true;
+}
+
+namespace {
+ struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {
+ initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ Changed |= LowerFenceInst(FI);
+ else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ Changed |= LowerAtomicCmpXchgInst(CXI);
+ else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
+ Changed |= LowerAtomicRMWInst(RMWI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isAtomic())
+ LowerLoadInst(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isAtomic())
+ LowerStoreInst(SI);
+ }
+ }
+ return Changed;
+ }
+ };
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false)
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 000000000000..be0f0e8a25f6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,1020 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
+
+static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
+ bool &VariableIdxFound, const DataLayout &TD){
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (OpC == 0)
+ return VariableIdxFound = true;
+ if (OpC->isZero()) continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset. For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+ const DataLayout &TD) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+ GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+ GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
+
+ bool VariableIdxFound = false;
+
+ // If one pointer is a GEP and the other isn't, then see if the GEP is a
+ // constant offset from the base, as in "P" and "gep P, 1".
+ if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return false;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ if (VariableIdxFound) return false;
+
+ Offset = Offset2-Offset1;
+ return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+/// store 0 -> P+1
+/// store 0 -> P+0
+/// store 0 -> P+3
+/// store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc. When we see
+/// the first store, we make a range [1, 2). The second store extends the range
+/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+ // Start/End - A semi range that describes the span that this range covers.
+ // The range is closed at the start and open at the end: [Start, End).
+ int64_t Start, End;
+
+ /// StartPtr - The getelementptr instruction that points to the start of the
+ /// range.
+ Value *StartPtr;
+
+ /// Alignment - The known alignment of the first store.
+ unsigned Alignment;
+
+ /// TheStores - The actual stores that make up this range.
+ SmallVector<Instruction*, 16> TheStores;
+
+ bool isProfitableToUseMemset(const DataLayout &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
+
+ // If there is nothing to merge, don't do anything.
+ if (TheStores.size() < 2) return false;
+
+ // If any of the stores are a memset, then it is always good to extend the
+ // memset.
+ for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+ if (!isa<StoreInst>(TheStores[i]))
+ return true;
+
+ // Assume that the code generator is capable of merging pairs of stores
+ // together if it wants to.
+ if (TheStores.size() == 2) return false;
+
+ // If we have fewer than 8 stores, it can still be worthwhile to do this.
+ // For example, merging 4 i8 stores into an i32 store is useful almost always.
+ // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+ // memset will be split into 2 32-bit stores anyway) and doing so can
+ // pessimize the llvm optimizer.
+ //
+ // Since we don't have perfect knowledge here, make some assumptions: assume
+ // the maximum GPR width is the same size as the pointer size and assume that
+ // this width can be stored. If so, check to see whether we will end up
+ // actually reducing the number of stores used.
+ unsigned Bytes = unsigned(End-Start);
+ unsigned NumPointerStores = Bytes/TD.getPointerSize();
+
+ // Assume the remaining bytes if any are done a byte at a time.
+ unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+
+ // If we will reduce the # stores (according to this heuristic), do the
+ // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+ // etc.
+ return TheStores.size() > NumPointerStores+NumByteStores;
+}
+
+
+namespace {
+class MemsetRanges {
+ /// Ranges - A sorted list of the memset ranges. We use std::list here
+ /// because each element is relatively large and expensive to copy.
+ std::list<MemsetRange> Ranges;
+ typedef std::list<MemsetRange>::iterator range_iterator;
+ const DataLayout &TD;
+public:
+ MemsetRanges(const DataLayout &td) : TD(td) {}
+
+ typedef std::list<MemsetRange>::const_iterator const_iterator;
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
+ bool empty() const { return Ranges.empty(); }
+
+ void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ addStore(OffsetFromFirst, SI);
+ else
+ addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+ }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+ int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ addRange(OffsetFromFirst, StoreSize,
+ SI->getPointerOperand(), SI->getAlignment(), SI);
+ }
+
+ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+ int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ }
+
+ void addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst);
+
+};
+
+} // end anon namespace
+
+
+/// addRange - Add a new store to the MemsetRanges data structure. This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list. We keep the ranges sorted for
+/// simplicity here. This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst) {
+ int64_t End = Start+Size;
+ range_iterator I = Ranges.begin(), E = Ranges.end();
+
+ while (I != E && Start > I->End)
+ ++I;
+
+ // We now know that I == E, in which case we didn't find anything to merge
+ // with, or that Start <= I->End. If End < I->Start or I == E, then we need
+ // to insert a new range. Handle this now.
+ if (I == E || End < I->Start) {
+ MemsetRange &R = *Ranges.insert(I, MemsetRange());
+ R.Start = Start;
+ R.End = End;
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
+ R.TheStores.push_back(Inst);
+ return;
+ }
+
+ // This store overlaps with I, add it.
+ I->TheStores.push_back(Inst);
+
+ // At this point, we may have an interval that completely contains our store.
+ // If so, just add it to the interval and return.
+ if (I->Start <= Start && I->End >= End)
+ return;
+
+ // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+ // but is not entirely contained within the range.
+
+ // See if the range extends the start of the range. In this case, it couldn't
+ // possibly cause it to join the prior range, because otherwise we would have
+ // stopped on *it*.
+ if (Start < I->Start) {
+ I->Start = Start;
+ I->StartPtr = Ptr;
+ I->Alignment = Alignment;
+ }
+
+ // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+ // is in or right at the end of I), and that End >= I->Start. Extend I out to
+ // End.
+ if (End > I->End) {
+ I->End = End;
+ range_iterator NextI = I;
+ while (++NextI != E && End >= NextI->Start) {
+ // Merge the range in.
+ I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+ if (NextI->End > I->End)
+ I->End = NextI->End;
+ Ranges.erase(NextI);
+ NextI = I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class MemCpyOpt : public FunctionPass {
+ MemoryDependenceAnalysis *MD;
+ TargetLibraryInfo *TLI;
+ const DataLayout *TD;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MemCpyOpt() : FunctionPass(ID) {
+ initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
+ TLI = 0;
+ TD = 0;
+ }
+
+ bool runOnFunction(Function &F);
+
+ private:
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+
+ // Helper fuctions
+ bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
+ bool processMemCpy(MemCpyInst *M);
+ bool processMemMove(MemMoveInst *M);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
+ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+ Value *ByteVal);
+
+ bool iterateOnFunction(Function &F);
+ };
+
+ char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
+/// some other patterns to fold away. In particular, this looks for stores to
+/// neighboring locations of memory. If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr, Value *ByteVal) {
+ if (TD == 0) return 0;
+
+ // Okay, so we now have a single store that can be splatable. Scan to find
+ // all subsequent stores of the same value to offset from the same pointer.
+ // Join these together into ranges, so we can decide whether contiguous blocks
+ // are stored.
+ MemsetRanges Ranges(*TD);
+
+ BasicBlock::iterator BI = StartInst;
+ for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
+
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ // If this is a store, see if we can merge it in.
+ if (!NextStore->isSimple()) break;
+
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+ Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+ !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ break;
+
+ Ranges.addMemSet(Offset, MSI);
+ }
+ }
+
+ // If we have no ranges, then we just had a single store with nothing that
+ // could be merged in. This is a very common case of course.
+ if (Ranges.empty())
+ return 0;
+
+ // If we had at least one store that could be merged in, add the starting
+ // store as well. We try to avoid this unless there is at least something
+ // interesting as a small compile-time optimization.
+ Ranges.addInst(0, StartInst);
+
+ // If we create any memsets, we put it right before the first instruction that
+ // isn't part of the memset block. This ensure that the memset is dominated
+ // by any addressing instruction needed by the start of the block.
+ IRBuilder<> Builder(BI);
+
+ // Now that we have full information about ranges, loop over the ranges and
+ // emit memset's for anything big enough to be worthwhile.
+ Instruction *AMemSet = 0;
+ for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+ I != E; ++I) {
+ const MemsetRange &Range = *I;
+
+ if (Range.TheStores.size() == 1) continue;
+
+ // If it is profitable to lower this range to memset, do so now.
+ if (!Range.isProfitableToUseMemset(*TD))
+ continue;
+
+ // Otherwise, we do want to transform this! Create a new memset.
+ // Get the starting pointer of the block.
+ StartPtr = Range.StartPtr;
+
+ // Determine alignment
+ unsigned Alignment = Range.Alignment;
+ if (Alignment == 0) {
+ Type *EltType =
+ cast<PointerType>(StartPtr->getType())->getElementType();
+ Alignment = TD->getABITypeAlignment(EltType);
+ }
+
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+
+ DEBUG(dbgs() << "Replace stores:\n";
+ for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+
+ // Zap all the stores.
+ for (SmallVector<Instruction*, 16>::const_iterator
+ SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI) {
+ MD->removeInstruction(*SI);
+ (*SI)->eraseFromParent();
+ }
+ ++NumMemSetInfer;
+ }
+
+ return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (!SI->isSimple()) return false;
+
+ if (TD == 0) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (LI->isSimple() && LI->hasOneUse() &&
+ LI->getParent() == SI->getParent()) {
+ MemDepResult ldep = MD->getDependency(LI);
+ CallInst *C = 0;
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+
+ if (C) {
+ // Check that nothing touches the dest of the "copy" between
+ // the call and the store.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+ E = C; I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ C = 0;
+ break;
+ }
+ }
+ }
+
+ if (C) {
+ unsigned storeAlign = SI->getAlignment();
+ if (!storeAlign)
+ storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType());
+ unsigned loadAlign = LI->getAlignment();
+ if (!loadAlign)
+ loadAlign = TD->getABITypeAlignment(LI->getType());
+
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(storeAlign, loadAlign), C);
+ if (changed) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+ ByteVal)) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+
+ return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+ // See if there is another memset or store neighboring this memset which
+ // allows us to widen out the memset to do a single larger store.
+ if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+ if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+ MSI->getValue())) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+ return false;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, unsigned cpyAlign,
+ CallInst *C) {
+ // The general transformation to keep in mind is
+ //
+ // call @func(..., src, ...)
+ // memcpy(dest, src, ...)
+ //
+ // ->
+ //
+ // memcpy(dest, src, ...)
+ // call @func(..., dest, ...)
+ //
+ // Since moving the memcpy is technically awkward, we additionally check that
+ // src only holds uninitialized values at the moment of the call, meaning that
+ // the memcpy can be discarded rather than moved.
+
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
+ CallSite CS(C);
+
+ // Require that src be an alloca. This simplifies the reasoning considerably.
+ AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+ if (!srcAlloca)
+ return false;
+
+ // Check that all of src is copied to dest.
+ if (TD == 0) return false;
+
+ ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+ if (!srcArraySize)
+ return false;
+
+ uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
+
+ if (cpyLen < srcSize)
+ return false;
+
+ // Check that accessing the first srcSize bytes of dest will not cause a
+ // trap. Otherwise the transform is invalid since it might cause a trap
+ // to occur earlier than it otherwise would.
+ if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
+ // The destination is an alloca. Check it is larger than srcSize.
+ ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+ if (!destArraySize)
+ return false;
+
+ uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
+
+ if (destSize < srcSize)
+ return false;
+ } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+ // If the destination is an sret parameter then only accesses that are
+ // outside of the returned struct type can trap.
+ if (!A->hasStructRetAttr())
+ return false;
+
+ Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
+
+ if (destSize < srcSize)
+ return false;
+ } else {
+ return false;
+ }
+
+ // Check that dest points to memory that is at least as aligned as src.
+ unsigned srcAlign = srcAlloca->getAlignment();
+ if (!srcAlign)
+ srcAlign = TD->getABITypeAlignment(srcAlloca->getAllocatedType());
+ bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
+ // If dest is not aligned enough and we can't increase its alignment then
+ // bail out.
+ if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
+ return false;
+
+ // Check that src is not accessed except via the call and the memcpy. This
+ // guarantees that it holds only undefined values when passed in (so the final
+ // memcpy can be dropped), that it is not read or written between the call and
+ // the memcpy, and that writing beyond the end of it is undefined.
+ SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+ srcAlloca->use_end());
+ while (!srcUseList.empty()) {
+ User *UI = srcUseList.pop_back_val();
+
+ if (isa<BitCastInst>(UI)) {
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
+ } else if (UI != C && UI != cpy) {
+ return false;
+ }
+ }
+
+ // Since we're changing the parameter to the callsite, we need to make sure
+ // that what would be the new parameter dominates the callsite.
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
+ if (!DT.dominates(cpyDestInst, C))
+ return false;
+
+ // In addition to knowing that the call does not access src in some
+ // unexpected manner, for example via a global, which we deduce from
+ // the use analysis, we also need to know that it does not sneakily
+ // access dest. We rely on AA to figure this out for us.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ // If necessary, perform additional analysis.
+ if (MR != AliasAnalysis::NoModRef)
+ MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
+ if (MR != AliasAnalysis::NoModRef)
+ return false;
+
+ // All the checks have passed, so do the transformation.
+ bool changedArgument = false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+ Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
+ : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+ cpyDest->getName(), C);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() == Dest->getType())
+ CS.setArgument(i, Dest);
+ else
+ CS.setArgument(i, CastInst::CreatePointerCast(Dest,
+ CS.getArgument(i)->getType(), Dest->getName(), C));
+ }
+
+ if (!changedArgument)
+ return false;
+
+ // If the destination wasn't sufficiently aligned then increase its alignment.
+ if (!isDestSufficientlyAligned) {
+ assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
+ cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
+ }
+
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
+ MD->removeInstruction(C);
+
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
+ ++NumMemCpyInstr;
+
+ return true;
+}
+
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
+/// copy from MDep's input if we can. MSize is the size of M's copy.
+///
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other.
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
+ return false;
+
+ // If dep instruction is reading from our current input, then it is a noop
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
+ // memcpy(a <- a)
+ // memcpy(b <- a)
+ if (M->getSource() == MDep->getSource())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceding one
+ // must be larger than the following one.
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ return false;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Verify that the copied-from memory doesn't change in between the two
+ // transfers. For example, in:
+ // memcpy(a <- b)
+ // *b = 42;
+ // memcpy(c <- a)
+ // It would be invalid to transform the second memcpy into memcpy(c <- b).
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+ false, M, M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ UseMemMove = true;
+
+ // If all checks passed, then we can transform M.
+
+ // Make sure to use the lesser of the alignment of the source and the dest
+ // since we're changing where we're reading from, but don't want to increase
+ // the alignment past what can be read from or written to.
+ // TODO: Is this worth it if we're creating a less aligned memcpy? For
+ // example we could be moving from movaps -> movq on x86.
+ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+
+ IRBuilder<> Builder(M);
+ if (UseMemMove)
+ Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+ else
+ Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+
+ // Remove the instruction we're replacing.
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
+
+ // If the source and destination of the memcpy are the same, then zap it.
+ if (M->getSource() == M->getDest()) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return false;
+ }
+
+ // If copying from a constant, try to turn the memcpy into a memset.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ IRBuilder<> Builder(M);
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ M->getAlignment(), false);
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), M->getAlignment(),
+ C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
+ }
+ }
+
+ AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
+ M, M->getParent());
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ }
+
+ return false;
+}
+
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ if (!TLI->has(LibFunc::memmove))
+ return false;
+
+ // See if the pointers alias.
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
+ return false;
+
+ DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+
+ // If not, then we know we can transform this.
+ Module *Mod = M->getParent()->getParent()->getParent();
+ Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys));
+
+ // MemDep may have over conservative information about this instruction, just
+ // conservatively flush it from the cache.
+ MD->removeInstruction(M);
+
+ ++NumMoveToCpy;
+ return true;
+}
+
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ if (TD == 0) return false;
+
+ // Find out what feeds this byval argument.
+ Value *ByValArg = CS.getArgument(ArgNo);
+ Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
+ uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ MemDepResult DepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+ true, CS.getInstruction(),
+ CS.getInstruction()->getParent());
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ return false;
+
+ // Get the alignment of the byval. If the call doesn't specify the alignment,
+ // then it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0) return false;
+
+ // If it is greater than the memcpy, then we check to see if we can force the
+ // source of the memcpy to the alignment we need. If we fail, we bail out.
+ if (MDep->getAlignment() < ByValAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+ false, CS.getInstruction(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+ bool MadeChange = false;
+
+ // Walk all instruction in the function.
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ // Avoid invalidating the iterator.
+ Instruction *I = BI++;
+
+ bool RepeatInstruction = false;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ MadeChange |= processStore(SI, BI);
+ else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ RepeatInstruction = processMemSet(M, BI);
+ else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+ RepeatInstruction = processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ RepeatInstruction = processMemMove(M);
+ else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.isByValArgument(i))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ if (BI != BB->begin()) --BI;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+ bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // If we don't have at least memset and memcpy, there is little point of doing
+ // anything here. These are required by a freestanding implementation, so if
+ // even they are disabled, there is no point in trying hard.
+ if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ return false;
+
+ while (1) {
+ if (!iterateOnFunction(F))
+ break;
+ MadeChange = true;
+ }
+
+ MD = 0;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 000000000000..7ee40273347b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,2007 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+ struct ValueEntry {
+ unsigned Rank;
+ Value *Op;
+ ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+ };
+ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+ return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
+ }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
+ Module *M = I->getParent()->getParent()->getParent();
+ dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+ << *Ops[0].Op->getType() << '\t';
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ dbgs() << "[ ";
+ WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+ dbgs() << ", #" << Ops[i].Rank << "] ";
+ }
+}
+#endif
+
+namespace {
+ /// \brief Utility class representing a base and exponent pair which form one
+ /// factor of some product.
+ struct Factor {
+ Value *Base;
+ unsigned Power;
+
+ Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
+
+ /// \brief Sort factors by their Base.
+ struct BaseSorter {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Base < RHS.Base;
+ }
+ };
+
+ /// \brief Compare factors for equal bases.
+ struct BaseEqual {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Base == RHS.Base;
+ }
+ };
+
+ /// \brief Sort factors in descending order by their power.
+ struct PowerDescendingSorter {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Power > RHS.Power;
+ }
+ };
+
+ /// \brief Compare factors for equal powers.
+ struct PowerEqual {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Power == RHS.Power;
+ }
+ };
+ };
+
+ /// Utility class representing a non-constant Xor-operand. We classify
+ /// non-constant Xor-Operands into two categories:
+ /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
+ /// C2)
+ /// C2.1) The operand is in the form of "X | C", where C is a non-zero
+ /// constant.
+ /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+ /// operand as "E | 0"
+ class XorOpnd {
+ public:
+ XorOpnd(Value *V);
+ const XorOpnd &operator=(const XorOpnd &That);
+
+ bool isInvalid() const { return SymbolicPart == 0; }
+ bool isOrExpr() const { return isOr; }
+ Value *getValue() const { return OrigVal; }
+ Value *getSymbolicPart() const { return SymbolicPart; }
+ unsigned getSymbolicRank() const { return SymbolicRank; }
+ const APInt &getConstPart() const { return ConstPart; }
+
+ void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+ // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
+ // The purpose is twofold:
+ // 1) Cluster together the operands sharing the same symbolic-value.
+ // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
+ // could potentially shorten crital path, and expose more loop-invariants.
+ // Note that values' rank are basically defined in RPO order (FIXME).
+ // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
+ // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+ // "z" in the order of X-Y-Z is better than any other orders.
+ class PtrSortFunctor {
+ ArrayRef<XorOpnd> A;
+
+ public:
+ PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
+ bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
+ return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+ }
+ };
+ private:
+ Value *OrigVal;
+ Value *SymbolicPart;
+ APInt ConstPart;
+ unsigned SymbolicRank;
+ bool isOr;
+ };
+}
+
+namespace {
+ class Reassociate : public FunctionPass {
+ DenseMap<BasicBlock*, unsigned> RankMap;
+ DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
+ SetVector<AssertingVH<Instruction> > RedoInsts;
+ bool MadeChange;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ Reassociate() : FunctionPass(ID) {
+ initializeReassociatePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ private:
+ void BuildRankMap(Function &F);
+ unsigned getRank(Value *V);
+ void ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
+ Value *&Res);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res);
+ bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors);
+ Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
+ SmallVectorImpl<Factor> &Factors);
+ Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+ Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+ void EraseInst(Instruction *I);
+ void OptimizeInst(Instruction *I);
+ };
+}
+
+XorOpnd::XorOpnd(Value *V) {
+ assert(!isa<ConstantInt>(V) && "No ConstantInt");
+ OrigVal = V;
+ Instruction *I = dyn_cast<Instruction>(V);
+ SymbolicRank = 0;
+
+ if (I && (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And)) {
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ if (isa<ConstantInt>(V0))
+ std::swap(V0, V1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
+ ConstPart = C->getValue();
+ SymbolicPart = V0;
+ isOr = (I->getOpcode() == Instruction::Or);
+ return;
+ }
+ }
+
+ // view the operand as "V | 0"
+ SymbolicPart = V;
+ ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+ isOr = true;
+}
+
+const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
+ OrigVal = That.OrigVal;
+ SymbolicPart = That.SymbolicPart;
+ ConstPart = That.ConstPart;
+ SymbolicRank = That.SymbolicRank;
+ isOr = That.isOr;
+ return *this;
+}
+
+char Reassociate::ID = 0;
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false)
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+ if (V->hasOneUse() && isa<Instruction>(V) &&
+ cast<Instruction>(V)->getOpcode() == Opcode)
+ return cast<BinaryOperator>(V);
+ return 0;
+}
+
+static bool isUnmovableInstruction(Instruction *I) {
+ if (I->getOpcode() == Instruction::PHI ||
+ I->getOpcode() == Instruction::LandingPad ||
+ I->getOpcode() == Instruction::Alloca ||
+ I->getOpcode() == Instruction::Load ||
+ I->getOpcode() == Instruction::Invoke ||
+ (I->getOpcode() == Instruction::Call &&
+ !isa<DbgInfoIntrinsic>(I)) ||
+ I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv ||
+ I->getOpcode() == Instruction::FDiv ||
+ I->getOpcode() == Instruction::URem ||
+ I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::FRem)
+ return true;
+ return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+ unsigned i = 2;
+
+ // Assign distinct ranks to function arguments
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ ValueRankMap[&*I] = ++i;
+
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+ E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ unsigned BBRank = RankMap[BB] = ++i << 16;
+
+ // Walk the basic block, adding precomputed ranks for any instructions that
+ // we cannot move. This ensures that the ranks for these instructions are
+ // all different in the block.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isUnmovableInstruction(I))
+ ValueRankMap[&*I] = ++BBRank;
+ }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) {
+ if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
+ return 0; // Otherwise it's a global or constant, rank 0.
+ }
+
+ if (unsigned Rank = ValueRankMap[I])
+ return Rank; // Rank already known?
+
+ // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+ // we can reassociate expressions for code motion! Since we do not recurse
+ // for PHI nodes, we cannot have infinite recursion here, because there
+ // cannot be loops in the value graph that do not go through PHI nodes.
+ unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+ for (unsigned i = 0, e = I->getNumOperands();
+ i != e && Rank != MaxRank; ++i)
+ Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+ // If this is a not or neg instruction, do not count it for rank. This
+ // assures us that X and ~X will have the same rank.
+ if (!I->getType()->isIntegerTy() ||
+ (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+ ++Rank;
+
+ //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
+ // << Rank << "\n");
+
+ return ValueRankMap[I] = Rank;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
+ Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+
+ BinaryOperator *Res =
+ BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+ Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op.
+ Res->takeName(Neg);
+ Neg->replaceAllUsesWith(Res);
+ Res->setDebugLoc(Neg->getDebugLoc());
+ return Res;
+}
+
+/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
+/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
+/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
+/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
+/// even x in Bitwidth-bit arithmetic.
+static unsigned CarmichaelShift(unsigned Bitwidth) {
+ if (Bitwidth < 3)
+ return Bitwidth - 1;
+ return Bitwidth - 2;
+}
+
+/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
+/// reducing the combined weight using any special properties of the operation.
+/// The existing weight LHS represents the computation X op X op ... op X where
+/// X occurs LHS times. The combined weight represents X op X op ... op X with
+/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
+/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
+/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
+static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
+ // If we were working with infinite precision arithmetic then the combined
+ // weight would be LHS + RHS. But we are using finite precision arithmetic,
+ // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
+ // for nilpotent operations and addition, but not for idempotent operations
+ // and multiplication), so it is important to correctly reduce the combined
+ // weight back into range if wrapping would be wrong.
+
+ // If RHS is zero then the weight didn't change.
+ if (RHS.isMinValue())
+ return;
+ // If LHS is zero then the combined weight is RHS.
+ if (LHS.isMinValue()) {
+ LHS = RHS;
+ return;
+ }
+ // From this point on we know that neither LHS nor RHS is zero.
+
+ if (Instruction::isIdempotent(Opcode)) {
+ // Idempotent means X op X === X, so any non-zero weight is equivalent to a
+ // weight of 1. Keeping weights at zero or one also means that wrapping is
+ // not a problem.
+ assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
+ return; // Return a weight of 1.
+ }
+ if (Instruction::isNilpotent(Opcode)) {
+ // Nilpotent means X op X === 0, so reduce weights modulo 2.
+ assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
+ LHS = 0; // 1 + 1 === 0 modulo 2.
+ return;
+ }
+ if (Opcode == Instruction::Add) {
+ // TODO: Reduce the weight by exploiting nsw/nuw?
+ LHS += RHS;
+ return;
+ }
+
+ assert(Opcode == Instruction::Mul && "Unknown associative operation!");
+ unsigned Bitwidth = LHS.getBitWidth();
+ // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
+ // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
+ // bit number x, since either x is odd in which case x^CM = 1, or x is even in
+ // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
+ // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
+ // which by a happy accident means that they can always be represented using
+ // Bitwidth bits.
+ // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
+ // the Carmichael number).
+ if (Bitwidth > 3) {
+ /// CM - The value of Carmichael's lambda function.
+ APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
+ // Any weight W >= Threshold can be replaced with W - CM.
+ APInt Threshold = CM + Bitwidth;
+ assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
+ // For Bitwidth 4 or more the following sum does not overflow.
+ LHS += RHS;
+ while (LHS.uge(Threshold))
+ LHS -= CM;
+ } else {
+ // To avoid problems with overflow do everything the same as above but using
+ // a larger type.
+ unsigned CM = 1U << CarmichaelShift(Bitwidth);
+ unsigned Threshold = CM + Bitwidth;
+ assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
+ "Weights not reduced!");
+ unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
+ while (Total >= Threshold)
+ Total -= CM;
+ LHS = Total;
+ }
+}
+
+typedef std::pair<Value*, APInt> RepeatedValue;
+
+/// LinearizeExprTree - Given an associative binary expression, return the leaf
+/// nodes in Ops along with their weights (how many times the leaf occurs). The
+/// original expression is the same as
+/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
+/// op
+/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
+/// op
+/// ...
+/// op
+/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
+///
+/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
+///
+/// This routine may modify the function, in which case it returns 'true'. The
+/// changes it makes may well be destructive, changing the value computed by 'I'
+/// to something completely different. Thus if the routine returns 'true' then
+/// you MUST either replace I with a new expression computed from the Ops array,
+/// or use RewriteExprTree to put the values back in.
+///
+/// A leaf node is either not a binary operation of the same kind as the root
+/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
+/// opcode), or is the same kind of binary operator but has a use which either
+/// does not belong to the expression, or does belong to the expression but is
+/// a leaf node. Every leaf node has at least one use that is a non-leaf node
+/// of the expression, while for non-leaf nodes (except for the root 'I') every
+/// use is a non-leaf node of the expression.
+///
+/// For example:
+/// expression graph node names
+///
+/// + | I
+/// / \ |
+/// + + | A, B
+/// / \ / \ |
+/// * + * | C, D, E
+/// / \ / \ / \ |
+/// + * | F, G
+///
+/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
+/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
+///
+/// The expression is maximal: if some instruction is a binary operator of the
+/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
+/// then the instruction also belongs to the expression, is not a leaf node of
+/// it, and its operands also belong to the expression (but may be leaf nodes).
+///
+/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
+/// order to ensure that every non-root node in the expression has *exactly one*
+/// use by a non-leaf node of the expression. This destruction means that the
+/// caller MUST either replace 'I' with a new expression or use something like
+/// RewriteExprTree to put the values back in if the routine indicates that it
+/// made a change by returning 'true'.
+///
+/// In the above example either the right operand of A or the left operand of B
+/// will be replaced by undef. If it is B's operand then this gives:
+///
+/// + | I
+/// / \ |
+/// + + | A, B - operand of B replaced with undef
+/// / \ \ |
+/// * + * | C, D, E
+/// / \ / \ / \ |
+/// + * | F, G
+///
+/// Note that such undef operands can only be reached by passing through 'I'.
+/// For example, if you visit operands recursively starting from a leaf node
+/// then you will never see such an undef operand unless you get back to 'I',
+/// which requires passing through a phi node.
+///
+/// Note that this routine may also mutate binary operators of the wrong type
+/// that have all uses inside the expression (i.e. only used by non-leaf nodes
+/// of the expression) if it can turn them into binary operators of the right
+/// type and thus make the expression bigger.
+
+static bool LinearizeExprTree(BinaryOperator *I,
+ SmallVectorImpl<RepeatedValue> &Ops) {
+ DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
+ unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
+ unsigned Opcode = I->getOpcode();
+ assert(Instruction::isAssociative(Opcode) &&
+ Instruction::isCommutative(Opcode) &&
+ "Expected an associative and commutative operation!");
+
+ // Visit all operands of the expression, keeping track of their weight (the
+ // number of paths from the expression root to the operand, or if you like
+ // the number of times that operand occurs in the linearized expression).
+ // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
+ // while A has weight two.
+
+ // Worklist of non-leaf nodes (their operands are in the expression too) along
+ // with their weights, representing a certain number of paths to the operator.
+ // If an operator occurs in the worklist multiple times then we found multiple
+ // ways to get to it.
+ SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
+ Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
+ bool MadeChange = false;
+
+ // Leaves of the expression are values that either aren't the right kind of
+ // operation (eg: a constant, or a multiply in an add tree), or are, but have
+ // some uses that are not inside the expression. For example, in I = X + X,
+ // X = A + B, the value X has two uses (by I) that are in the expression. If
+ // X has any other uses, for example in a return instruction, then we consider
+ // X to be a leaf, and won't analyze it further. When we first visit a value,
+ // if it has more than one use then at first we conservatively consider it to
+ // be a leaf. Later, as the expression is explored, we may discover some more
+ // uses of the value from inside the expression. If all uses turn out to be
+ // from within the expression (and the value is a binary operator of the right
+ // kind) then the value is no longer considered to be a leaf, and its operands
+ // are explored.
+
+ // Leaves - Keeps track of the set of putative leaves as well as the number of
+ // paths to each leaf seen so far.
+ typedef DenseMap<Value*, APInt> LeafMap;
+ LeafMap Leaves; // Leaf -> Total weight so far.
+ SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
+
+#ifndef NDEBUG
+ SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
+#endif
+ while (!Worklist.empty()) {
+ std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
+ I = P.first; // We examine the operands of this binary operator.
+
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
+ Value *Op = I->getOperand(OpIdx);
+ APInt Weight = P.second; // Number of paths to this operand.
+ DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
+ assert(!Op->use_empty() && "No uses, so how did we get to it?!");
+
+ // If this is a binary operation of the right kind with only one use then
+ // add its operands to the expression.
+ if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
+ assert(Visited.insert(Op) && "Not first visit!");
+ DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
+ Worklist.push_back(std::make_pair(BO, Weight));
+ continue;
+ }
+
+ // Appears to be a leaf. Is the operand already in the set of leaves?
+ LeafMap::iterator It = Leaves.find(Op);
+ if (It == Leaves.end()) {
+ // Not in the leaf map. Must be the first time we saw this operand.
+ assert(Visited.insert(Op) && "Not first visit!");
+ if (!Op->hasOneUse()) {
+ // This value has uses not accounted for by the expression, so it is
+ // not safe to modify. Mark it as being a leaf.
+ DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
+ LeafOrder.push_back(Op);
+ Leaves[Op] = Weight;
+ continue;
+ }
+ // No uses outside the expression, try morphing it.
+ } else if (It != Leaves.end()) {
+ // Already in the leaf map.
+ assert(Visited.count(Op) && "In leaf map but not visited!");
+
+ // Update the number of paths to the leaf.
+ IncorporateWeight(It->second, Weight, Opcode);
+
+#if 0 // TODO: Re-enable once PR13021 is fixed.
+ // The leaf already has one use from inside the expression. As we want
+ // exactly one such use, drop this new use of the leaf.
+ assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
+ I->setOperand(OpIdx, UndefValue::get(I->getType()));
+ MadeChange = true;
+
+ // If the leaf is a binary operation of the right kind and we now see
+ // that its multiple original uses were in fact all by nodes belonging
+ // to the expression, then no longer consider it to be a leaf and add
+ // its operands to the expression.
+ if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
+ DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
+ Worklist.push_back(std::make_pair(BO, It->second));
+ Leaves.erase(It);
+ continue;
+ }
+#endif
+
+ // If we still have uses that are not accounted for by the expression
+ // then it is not safe to modify the value.
+ if (!Op->hasOneUse())
+ continue;
+
+ // No uses outside the expression, try morphing it.
+ Weight = It->second;
+ Leaves.erase(It); // Since the value may be morphed below.
+ }
+
+ // At this point we have a value which, first of all, is not a binary
+ // expression of the right kind, and secondly, is only used inside the
+ // expression. This means that it can safely be modified. See if we
+ // can usefully morph it into an expression of the right kind.
+ assert((!isa<Instruction>(Op) ||
+ cast<Instruction>(Op)->getOpcode() != Opcode) &&
+ "Should have been handled above!");
+ assert(Op->hasOneUse() && "Has uses outside the expression tree!");
+
+ // If this is a multiply expression, turn any internal negations into
+ // multiplies by -1 so they can be reassociated.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op);
+ if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) {
+ DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+ BO = LowerNegateToMultiply(BO);
+ DEBUG(dbgs() << *BO << 'n');
+ Worklist.push_back(std::make_pair(BO, Weight));
+ MadeChange = true;
+ continue;
+ }
+
+ // Failed to morph into an expression of the right type. This really is
+ // a leaf.
+ DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
+ assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
+ LeafOrder.push_back(Op);
+ Leaves[Op] = Weight;
+ }
+ }
+
+ // The leaves, repeated according to their weights, represent the linearized
+ // form of the expression.
+ for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
+ Value *V = LeafOrder[i];
+ LeafMap::iterator It = Leaves.find(V);
+ if (It == Leaves.end())
+ // Node initially thought to be a leaf wasn't.
+ continue;
+ assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
+ APInt Weight = It->second;
+ if (Weight.isMinValue())
+ // Leaf already output or weight reduction eliminated it.
+ continue;
+ // Ensure the leaf is only output once.
+ It->second = 0;
+ Ops.push_back(std::make_pair(V, Weight));
+ }
+
+ // For nilpotent operations or addition there may be no operands, for example
+ // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
+ // in both cases the weight reduces to 0 causing the value to be skipped.
+ if (Ops.empty()) {
+ Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
+ assert(Identity && "Associative operation without identity!");
+ Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
+ }
+
+ return MadeChange;
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ assert(Ops.size() > 1 && "Single values should be used directly!");
+
+ // Since our optimizations should never increase the number of operations, the
+ // new expression can usually be written reusing the existing binary operators
+ // from the original expression tree, without creating any new instructions,
+ // though the rewritten expression may have a completely different topology.
+ // We take care to not change anything if the new expression will be the same
+ // as the original. If more than trivial changes (like commuting operands)
+ // were made then we are obliged to clear out any optional subclass data like
+ // nsw flags.
+
+ /// NodesToRewrite - Nodes from the original expression available for writing
+ /// the new expression into.
+ SmallVector<BinaryOperator*, 8> NodesToRewrite;
+ unsigned Opcode = I->getOpcode();
+ BinaryOperator *Op = I;
+
+ /// NotRewritable - The operands being written will be the leaves of the new
+ /// expression and must not be used as inner nodes (via NodesToRewrite) by
+ /// mistake. Inner nodes are always reassociable, and usually leaves are not
+ /// (if they were they would have been incorporated into the expression and so
+ /// would not be leaves), so most of the time there is no danger of this. But
+ /// in rare cases a leaf may become reassociable if an optimization kills uses
+ /// of it, or it may momentarily become reassociable during rewriting (below)
+ /// due it being removed as an operand of one of its uses. Ensure that misuse
+ /// of leaf nodes as inner nodes cannot occur by remembering all of the future
+ /// leaves and refusing to reuse any of them as inner nodes.
+ SmallPtrSet<Value*, 8> NotRewritable;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ NotRewritable.insert(Ops[i].Op);
+
+ // ExpressionChanged - Non-null if the rewritten expression differs from the
+ // original in some non-trivial way, requiring the clearing of optional flags.
+ // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
+ BinaryOperator *ExpressionChanged = 0;
+ for (unsigned i = 0; ; ++i) {
+ // The last operation (which comes earliest in the IR) is special as both
+ // operands will come from Ops, rather than just one with the other being
+ // a subexpression.
+ if (i+2 == Ops.size()) {
+ Value *NewLHS = Ops[i].Op;
+ Value *NewRHS = Ops[i+1].Op;
+ Value *OldLHS = Op->getOperand(0);
+ Value *OldRHS = Op->getOperand(1);
+
+ if (NewLHS == OldLHS && NewRHS == OldRHS)
+ // Nothing changed, leave it alone.
+ break;
+
+ if (NewLHS == OldRHS && NewRHS == OldLHS) {
+ // The order of the operands was reversed. Swap them.
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ Op->swapOperands();
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+ MadeChange = true;
+ ++NumChanged;
+ break;
+ }
+
+ // The new operation differs non-trivially from the original. Overwrite
+ // the old operands with the new ones.
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ if (NewLHS != OldLHS) {
+ BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(0, NewLHS);
+ }
+ if (NewRHS != OldRHS) {
+ BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(1, NewRHS);
+ }
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+
+ ExpressionChanged = Op;
+ MadeChange = true;
+ ++NumChanged;
+
+ break;
+ }
+
+ // Not the last operation. The left-hand side will be a sub-expression
+ // while the right-hand side will be the current element of Ops.
+ Value *NewRHS = Ops[i].Op;
+ if (NewRHS != Op->getOperand(1)) {
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ if (NewRHS == Op->getOperand(0)) {
+ // The new right-hand side was already present as the left operand. If
+ // we are lucky then swapping the operands will sort out both of them.
+ Op->swapOperands();
+ } else {
+ // Overwrite with the new right-hand side.
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
+ if (BO && !NotRewritable.count(BO))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(1, NewRHS);
+ ExpressionChanged = Op;
+ }
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+ MadeChange = true;
+ ++NumChanged;
+ }
+
+ // Now deal with the left-hand side. If this is already an operation node
+ // from the original expression then just rewrite the rest of the expression
+ // into it.
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
+ if (BO && !NotRewritable.count(BO)) {
+ Op = BO;
+ continue;
+ }
+
+ // Otherwise, grab a spare node from the original expression and use that as
+ // the left-hand side. If there are no nodes left then the optimizers made
+ // an expression with more nodes than the original! This usually means that
+ // they did something stupid but it might mean that the problem was just too
+ // hard (finding the mimimal number of multiplications needed to realize a
+ // multiplication expression is NP-complete). Whatever the reason, smart or
+ // stupid, create a new node if there are none left.
+ BinaryOperator *NewOp;
+ if (NodesToRewrite.empty()) {
+ Constant *Undef = UndefValue::get(I->getType());
+ NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
+ Undef, Undef, "", I);
+ } else {
+ NewOp = NodesToRewrite.pop_back_val();
+ }
+
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ Op->setOperand(0, NewOp);
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+ ExpressionChanged = Op;
+ MadeChange = true;
+ ++NumChanged;
+ Op = NewOp;
+ }
+
+ // If the expression changed non-trivially then clear out all subclass data
+ // starting from the operator specified in ExpressionChanged, and compactify
+ // the operators to just before the expression root to guarantee that the
+ // expression tree is dominated by all of Ops.
+ if (ExpressionChanged)
+ do {
+ ExpressionChanged->clearSubclassOptionalData();
+ if (ExpressionChanged == I)
+ break;
+ ExpressionChanged->moveBefore(I);
+ ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin());
+ } while (1);
+
+ // Throw away any left over nodes from the original expression.
+ for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
+ RedoInsts.insert(NodesToRewrite[i]);
+}
+
+/// NegateValue - Insert instructions before the instruction pointed to by BI,
+/// that computes the negative version of the value specified. The negative
+/// version of the value is returned, and BI is left pointing at the instruction
+/// that should be processed next by the reassociation pass.
+static Value *NegateValue(Value *V, Instruction *BI) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getNeg(C);
+
+ // We are trying to expose opportunity for reassociation. One of the things
+ // that we want to do to achieve this is to push a negation as deep into an
+ // expression chain as possible, to expose the add instructions. In practice,
+ // this means that we turn this:
+ // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
+ // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+ // the constants. We assume that instcombine will clean up the mess later if
+ // we introduce tons of unnecessary negation instructions.
+ //
+ if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) {
+ // Push the negates through the add.
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+ // We must move the add instruction here, because the neg instructions do
+ // not dominate the old add instruction in general. By moving it, we are
+ // assured that the neg instructions we just inserted dominate the
+ // instruction we are about to insert after them.
+ //
+ I->moveBefore(BI);
+ I->setName(I->getName()+".neg");
+ return I;
+ }
+
+ // Okay, we need to materialize a negated version of V with an instruction.
+ // Scan the use lists of V to see if we have one already.
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (!BinaryOperator::isNeg(U)) continue;
+
+ // We found one! Now we have to make sure that the definition dominates
+ // this use. We do this by moving it to the entry block (if it is a
+ // non-instruction value) or right after the definition. These negates will
+ // be zapped by reassociate later, so we don't need much finesse here.
+ BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+
+ // Verify that the negate is in this function, V might be a constant expr.
+ if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+ continue;
+
+ BasicBlock::iterator InsertPt;
+ if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+ InsertPt = II->getNormalDest()->begin();
+ } else {
+ InsertPt = InstInput;
+ ++InsertPt;
+ }
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ } else {
+ InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+ }
+ TheNeg->moveBefore(InsertPt);
+ return TheNeg;
+ }
+
+ // Insert a 'neg' instruction that subtracts the value from zero to get the
+ // negation.
+ return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+ // If this is a negation, we can't split it up!
+ if (BinaryOperator::isNeg(Sub))
+ return false;
+
+ // Don't bother to break this up unless either the LHS is an associable add or
+ // subtract or if this is only used by one.
+ if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+ return true;
+ if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+ return true;
+ if (Sub->hasOneUse() &&
+ (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+ isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ return true;
+
+ return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+ // Convert a subtract into an add and a neg instruction. This allows sub
+ // instructions to be commuted with other add instructions.
+ //
+ // Calculate the negative value of Operand 1 of the sub instruction,
+ // and set it as the RHS of the add instruction we just made.
+ //
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ BinaryOperator *New =
+ BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
+ Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
+ New->takeName(Sub);
+
+ // Everyone now refers to the add instruction.
+ Sub->replaceAllUsesWith(New);
+ New->setDebugLoc(Sub->getDebugLoc());
+
+ DEBUG(dbgs() << "Negated: " << *New << '\n');
+ return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
+ Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+
+ BinaryOperator *Mul =
+ BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+ Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
+ Mul->takeName(Shl);
+ Shl->replaceAllUsesWith(Mul);
+ Mul->setDebugLoc(Shl->getDebugLoc());
+ return Mul;
+}
+
+/// FindInOperandList - Scan backwards and forwards among values with the same
+/// rank as element i to see if X exists. If X does not exist, return i. This
+/// is useful when scanning for 'x' when we see '-x' because they both get the
+/// same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
+ Value *X) {
+ unsigned XRank = Ops[i].Rank;
+ unsigned e = Ops.size();
+ for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+ if (Ops[j].Op == X)
+ return j;
+ // Scan backwards.
+ for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+ if (Ops[j].Op == X)
+ return j;
+ return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result. Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I,
+ SmallVectorImpl<WeakVH> &Ops){
+ if (Ops.size() == 1) return Ops.back();
+
+ Value *V1 = Ops.back();
+ Ops.pop_back();
+ Value *V2 = EmitAddTreeOfValues(I, Ops);
+ return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) return 0;
+
+ SmallVector<RepeatedValue, 8> Tree;
+ MadeChange |= LinearizeExprTree(BO, Tree);
+ SmallVector<ValueEntry, 8> Factors;
+ Factors.reserve(Tree.size());
+ for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
+ RepeatedValue E = Tree[i];
+ Factors.append(E.second.getZExtValue(),
+ ValueEntry(getRank(E.first), E.first));
+ }
+
+ bool FoundFactor = false;
+ bool NeedsNegate = false;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ if (Factors[i].Op == Factor) {
+ FoundFactor = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+
+ // If this is a negative version of this factor, remove it.
+ if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+ if (FC1->getValue() == -FC2->getValue()) {
+ FoundFactor = NeedsNegate = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+ }
+
+ if (!FoundFactor) {
+ // Make sure to restore the operands to the expression tree.
+ RewriteExprTree(BO, Factors);
+ return 0;
+ }
+
+ BasicBlock::iterator InsertPt = BO; ++InsertPt;
+
+ // If this was just a single multiply, remove the multiply and return the only
+ // remaining operand.
+ if (Factors.size() == 1) {
+ RedoInsts.insert(BO);
+ V = Factors[0].Op;
+ } else {
+ RewriteExprTree(BO, Factors);
+ V = BO;
+ }
+
+ if (NeedsNegate)
+ V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+
+ return V;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
+static void FindSingleUseMultiplyFactors(Value *V,
+ SmallVectorImpl<Value*> &Factors,
+ const SmallVectorImpl<ValueEntry> &Ops) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) {
+ Factors.push_back(V);
+ return;
+ }
+
+ // Otherwise, add the LHS and RHS to the list of factors.
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
+}
+
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction. This optimizes based on identities. If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+ // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // First, check for X and ~X in the operand list.
+ assert(i < Ops.size());
+ if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
+ Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ if (Opcode == Instruction::And) // ...&X&~X = 0
+ return Constant::getNullValue(X->getType());
+
+ if (Opcode == Instruction::Or) // ...|X|~X = -1
+ return Constant::getAllOnesValue(X->getType());
+ }
+ }
+
+ // Next, check for duplicate pairs of values, which we assume are next to
+ // each other, due to our sorting criteria.
+ assert(i < Ops.size());
+ if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+ if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+ // Drop duplicate values for And and Or.
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ ++NumAnnihil;
+ continue;
+ }
+
+ // Drop pairs of values for Xor.
+ assert(Opcode == Instruction::Xor);
+ if (e == 2)
+ return Constant::getNullValue(Ops[0].Op->getType());
+
+ // Y ^ X^X -> Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ i -= 1; e -= 2;
+ ++NumAnnihil;
+ }
+ }
+ return 0;
+}
+
+/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// instruction with the given two operands, and return the resulting
+/// instruction. There are two special cases: 1) if the constant operand is 0,
+/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
+/// be returned.
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+ const APInt &ConstOpnd) {
+ if (ConstOpnd != 0) {
+ if (!ConstOpnd.isAllOnesValue()) {
+ LLVMContext &Ctx = Opnd->getType()->getContext();
+ Instruction *I;
+ I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
+ "and.ra", InsertBefore);
+ I->setDebugLoc(InsertBefore->getDebugLoc());
+ return I;
+ }
+ return Opnd;
+ }
+ return 0;
+}
+
+// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
+// into "R ^ C", where C would be 0, and R is a symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
+// and both "Res" and "ConstOpnd" remain unchanged.
+//
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+ APInt &ConstOpnd, Value *&Res) {
+ // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
+ // = ((x | c1) ^ c1) ^ (c1 ^ c2)
+ // = (x & ~c1) ^ (c1 ^ c2)
+ // It is useful only when c1 == c2.
+ if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
+ if (!Opnd1->getValue()->hasOneUse())
+ return false;
+
+ const APInt &C1 = Opnd1->getConstPart();
+ if (C1 != ConstOpnd)
+ return false;
+
+ Value *X = Opnd1->getSymbolicPart();
+ Res = createAndInstr(I, X, ~C1);
+ // ConstOpnd was C2, now C1 ^ C2.
+ ConstOpnd ^= C1;
+
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ return true;
+ }
+ return false;
+}
+
+
+// Helper function of OptimizeXor(). It tries to simplify
+// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively (If the entire expression is
+// evaluated to a constant, the Res is set to NULL); otherwise, false is
+// returned, and both "Res" and "ConstOpnd" remain unchanged.
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res) {
+ Value *X = Opnd1->getSymbolicPart();
+ if (X != Opnd2->getSymbolicPart())
+ return false;
+
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
+
+ // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
+ int DeadInstNum = 1;
+ if (Opnd1->getValue()->hasOneUse())
+ DeadInstNum++;
+ if (Opnd2->getValue()->hasOneUse())
+ DeadInstNum++;
+
+ // Xor-Rule 2:
+ // (x | c1) ^ (x & c2)
+ // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
+ // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
+ // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
+ //
+ if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
+ if (Opnd2->isOrExpr())
+ std::swap(Opnd1, Opnd2);
+
+ APInt C3((~C1) ^ C2);
+
+ // Do not increase code size!
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C1;
+
+ } else if (Opnd1->isOrExpr()) {
+ // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
+ //
+ APInt C3 = C1 ^ C2;
+
+ // Do not increase code size
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C3;
+ } else {
+ // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
+ //
+ APInt C3 = C1 ^ C2;
+ Res = createAndInstr(I, X, C3);
+ }
+
+ // Put the original operands in the Redo list; hope they will be deleted
+ // as dead code.
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
+ RedoInsts.insert(T);
+
+ return true;
+}
+
+/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
+/// to a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+Value *Reassociate::OptimizeXor(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
+ return V;
+
+ if (Ops.size() == 1)
+ return 0;
+
+ SmallVector<XorOpnd, 8> Opnds;
+ SmallVector<unsigned, 8> OpndIndices;
+ Type *Ty = Ops[0].Op->getType();
+ APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+
+ // Step 1: Convert ValueEntry to XorOpnd
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *V = Ops[i].Op;
+ if (!isa<ConstantInt>(V)) {
+ XorOpnd O(V);
+ O.setSymbolicRank(getRank(O.getSymbolicPart()));
+ Opnds.push_back(O);
+ OpndIndices.push_back(Opnds.size() - 1);
+ } else
+ ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+ }
+
+ // Step 2: Sort the Xor-Operands in a way such that the operands containing
+ // the same symbolic value cluster together. For instance, the input operand
+ // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
+ // ("x | 123", "x & 789", "y & 456").
+ std::sort(OpndIndices.begin(), OpndIndices.end(),
+ XorOpnd::PtrSortFunctor(Opnds));
+
+ // Step 3: Combine adjacent operands
+ XorOpnd *PrevOpnd = 0;
+ bool Changed = false;
+ for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+ // The combined value
+ Value *CV;
+
+ // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
+ if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ Changed = true;
+ if (CV)
+ *CurrOpnd = XorOpnd(CV);
+ else {
+ CurrOpnd->Invalidate();
+ continue;
+ }
+ }
+
+ if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
+ PrevOpnd = CurrOpnd;
+ continue;
+ }
+
+ // step 3.2: When previous and current operands share the same symbolic
+ // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
+ //
+ if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+ // Remove previous operand
+ PrevOpnd->Invalidate();
+ if (CV) {
+ *CurrOpnd = XorOpnd(CV);
+ PrevOpnd = CurrOpnd;
+ } else {
+ CurrOpnd->Invalidate();
+ PrevOpnd = 0;
+ }
+ Changed = true;
+ }
+ }
+
+ // Step 4: Reassemble the Ops
+ if (Changed) {
+ Ops.clear();
+ for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd &O = Opnds[i];
+ if (O.isInvalid())
+ continue;
+ ValueEntry VE(getRank(O.getValue()), O.getValue());
+ Ops.push_back(VE);
+ }
+ if (ConstOpnd != 0) {
+ Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+ ValueEntry VE(getRank(C), C);
+ Ops.push_back(VE);
+ }
+ int Sz = Ops.size();
+ if (Sz == 1)
+ return Ops.back().Op;
+ else if (Sz == 0) {
+ assert(ConstOpnd == 0);
+ return ConstantInt::get(Ty->getContext(), ConstOpnd);
+ }
+ }
+
+ return 0;
+}
+
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
+/// optimizes based on identities. If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and -X pairs. If we find any, we
+ // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+ //
+ // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+ //
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *TheOp = Ops[i].Op;
+ // Check to see if we've seen this operand before. If so, we factor all
+ // instances of the operand together. Due to our sorting criteria, we know
+ // that these need to be next to each other in the vector.
+ if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+ // Rescan the list, remove all instances of this operand from the expr.
+ unsigned NumFound = 0;
+ do {
+ Ops.erase(Ops.begin()+i);
+ ++NumFound;
+ } while (i != Ops.size() && Ops[i].Op == TheOp);
+
+ DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+ ++NumFactor;
+
+ // Insert a new multiply.
+ Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+ Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+
+ // Now that we have inserted a multiply, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+ RedoInsts.insert(cast<Instruction>(Mul));
+
+ // If every add operand was a duplicate, return the multiply.
+ if (Ops.empty())
+ return Mul;
+
+ // Otherwise, we had some input that didn't have the dupe, such as
+ // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+
+ --i;
+ e = Ops.size();
+ continue;
+ }
+
+ // Check for X and -X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp))
+ continue;
+
+ Value *X = BinaryOperator::getNegArgument(TheOp);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX == i)
+ continue;
+
+ // Remove X and -X from the operand list.
+ if (Ops.size() == 2)
+ return Constant::getNullValue(X->getType());
+
+ Ops.erase(Ops.begin()+i);
+ if (i < FoundX)
+ --FoundX;
+ else
+ --i; // Need to back up an extra one.
+ Ops.erase(Ops.begin()+FoundX);
+ ++NumAnnihil;
+ --i; // Revisit element.
+ e -= 2; // Removed two elements.
+ }
+
+ // Scan the operand list, checking to see if there are any common factors
+ // between operands. Consider something like A*A+A*B*C+D. We would like to
+ // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+ // To efficiently find this, we count the number of times a factor occurs
+ // for any ADD operands that are MULs.
+ DenseMap<Value*, unsigned> FactorOccurrences;
+
+ // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+ // where they are actually the same multiply.
+ unsigned MaxOcc = 0;
+ Value *MaxOccVal = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ if (!BOp)
+ continue;
+
+ // Compute all of the factors of this added value.
+ SmallVector<Value*, 8> Factors;
+ FindSingleUseMultiplyFactors(BOp, Factors, Ops);
+ assert(Factors.size() > 1 && "Bad linearize!");
+
+ // Add one to FactorOccurrences for each unique factor in this op.
+ SmallPtrSet<Value*, 8> Duplicates;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ Value *Factor = Factors[i];
+ if (!Duplicates.insert(Factor)) continue;
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+
+ // If Factor is a negative constant, add the negated value as a factor
+ // because we can percolate the negate out. Watch for minint, which
+ // cannot be positivified.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+ if (CI->isNegative() && !CI->isMinValue(true)) {
+ Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+ assert(!Duplicates.count(Factor) &&
+ "Shouldn't have two constant factors, missed a canonicalize");
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ }
+ }
+ }
+
+ // If any factor occurred more than one time, we can pull it out.
+ if (MaxOcc > 1) {
+ DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+ ++NumFactor;
+
+ // Create a new instruction that uses the MaxOccVal twice. If we don't do
+ // this, we could otherwise run into situations where removing a factor
+ // from an expression will drop a use of maxocc, and this can cause
+ // RemoveFactorFromExpression on successive values to behave differently.
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ SmallVector<WeakVH, 4> NewMulOps;
+ for (unsigned i = 0; i != Ops.size(); ++i) {
+ // Only try to remove factors from expressions we're allowed to.
+ BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ if (!BOp)
+ continue;
+
+ if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+ // The factorized operand may occur several times. Convert them all in
+ // one fell swoop.
+ for (unsigned j = Ops.size(); j != i;) {
+ --j;
+ if (Ops[j].Op == Ops[i].Op) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+j);
+ }
+ }
+ --i;
+ }
+ }
+
+ // No need for extra uses anymore.
+ delete DummyInst;
+
+ unsigned NumAddedValues = NewMulOps.size();
+ Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+ // Now that we have inserted the add tree, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
+ assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+ (void)NumAddedValues;
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ RedoInsts.insert(VI);
+
+ // Create the multiply.
+ Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+ // Rerun associate on the multiply in case the inner expression turned into
+ // a multiply. We want to make sure that we keep things in canonical form.
+ RedoInsts.insert(V2);
+
+ // If every add operand included the factor (e.g. "A*B + A*C"), then the
+ // entire result expression is just the multiply "A*(B+C)".
+ if (Ops.empty())
+ return V2;
+
+ // Otherwise, we had some input that didn't have the factor, such as
+ // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+ }
+
+ return 0;
+}
+
+namespace {
+ /// \brief Predicate tests whether a ValueEntry's op is in a map.
+ struct IsValueInMap {
+ const DenseMap<Value *, unsigned> &Map;
+
+ IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {}
+
+ bool operator()(const ValueEntry &Entry) {
+ return Map.find(Entry.Op) != Map.end();
+ }
+ };
+}
+
+/// \brief Build up a vector of value/power pairs factoring a product.
+///
+/// Given a series of multiplication operands, build a vector of factors and
+/// the powers each is raised to when forming the final product. Sort them in
+/// the order of descending power.
+///
+/// (x*x) -> [(x, 2)]
+/// ((x*x)*x) -> [(x, 3)]
+/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
+///
+/// \returns Whether any factors have a power greater than one.
+bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors) {
+ // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
+ // Compute the sum of powers of simplifiable factors.
+ unsigned FactorPowerSum = 0;
+ for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
+ Value *Op = Ops[Idx-1].Op;
+
+ // Count the number of occurrences of this value.
+ unsigned Count = 1;
+ for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
+ ++Count;
+ // Track for simplification all factors which occur 2 or more times.
+ if (Count > 1)
+ FactorPowerSum += Count;
+ }
+
+ // We can only simplify factors if the sum of the powers of our simplifiable
+ // factors is 4 or higher. When that is the case, we will *always* have
+ // a simplification. This is an important invariant to prevent cyclicly
+ // trying to simplify already minimal formations.
+ if (FactorPowerSum < 4)
+ return false;
+
+ // Now gather the simplifiable factors, removing them from Ops.
+ FactorPowerSum = 0;
+ for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
+ Value *Op = Ops[Idx-1].Op;
+
+ // Count the number of occurrences of this value.
+ unsigned Count = 1;
+ for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
+ ++Count;
+ if (Count == 1)
+ continue;
+ // Move an even number of occurrences to Factors.
+ Count &= ~1U;
+ Idx -= Count;
+ FactorPowerSum += Count;
+ Factors.push_back(Factor(Op, Count));
+ Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
+ }
+
+ // None of the adjustments above should have reduced the sum of factor powers
+ // below our mininum of '4'.
+ assert(FactorPowerSum >= 4);
+
+ std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
+ return true;
+}
+
+/// \brief Build a tree of multiplies, computing the product of Ops.
+static Value *buildMultiplyTree(IRBuilder<> &Builder,
+ SmallVectorImpl<Value*> &Ops) {
+ if (Ops.size() == 1)
+ return Ops.back();
+
+ Value *LHS = Ops.pop_back_val();
+ do {
+ LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
+ } while (!Ops.empty());
+
+ return LHS;
+}
+
+/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
+///
+/// Given a vector of values raised to various powers, where no two values are
+/// equal and the powers are sorted in decreasing order, compute the minimal
+/// DAG of multiplies to compute the final product, and return that product
+/// value.
+Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
+ SmallVectorImpl<Factor> &Factors) {
+ assert(Factors[0].Power);
+ SmallVector<Value *, 4> OuterProduct;
+ for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
+ Idx < Size && Factors[Idx].Power > 0; ++Idx) {
+ if (Factors[Idx].Power != Factors[LastIdx].Power) {
+ LastIdx = Idx;
+ continue;
+ }
+
+ // We want to multiply across all the factors with the same power so that
+ // we can raise them to that power as a single entity. Build a mini tree
+ // for that.
+ SmallVector<Value *, 4> InnerProduct;
+ InnerProduct.push_back(Factors[LastIdx].Base);
+ do {
+ InnerProduct.push_back(Factors[Idx].Base);
+ ++Idx;
+ } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
+
+ // Reset the base value of the first factor to the new expression tree.
+ // We'll remove all the factors with the same power in a second pass.
+ Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
+ if (Instruction *MI = dyn_cast<Instruction>(M))
+ RedoInsts.insert(MI);
+
+ LastIdx = Idx;
+ }
+ // Unique factors with equal powers -- we've folded them into the first one's
+ // base.
+ Factors.erase(std::unique(Factors.begin(), Factors.end(),
+ Factor::PowerEqual()),
+ Factors.end());
+
+ // Iteratively collect the base of each factor with an add power into the
+ // outer product, and halve each power in preparation for squaring the
+ // expression.
+ for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
+ if (Factors[Idx].Power & 1)
+ OuterProduct.push_back(Factors[Idx].Base);
+ Factors[Idx].Power >>= 1;
+ }
+ if (Factors[0].Power) {
+ Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
+ OuterProduct.push_back(SquareRoot);
+ OuterProduct.push_back(SquareRoot);
+ }
+ if (OuterProduct.size() == 1)
+ return OuterProduct.front();
+
+ Value *V = buildMultiplyTree(Builder, OuterProduct);
+ return V;
+}
+
+Value *Reassociate::OptimizeMul(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // We can only optimize the multiplies when there is a chain of more than
+ // three, such that a balanced tree might require fewer total multiplies.
+ if (Ops.size() < 4)
+ return 0;
+
+ // Try to turn linear trees of multiplies without other uses of the
+ // intermediate stages into minimal multiply DAGs with perfect sub-expression
+ // re-use.
+ SmallVector<Factor, 4> Factors;
+ if (!collectMultiplyFactors(Ops, Factors))
+ return 0; // All distinct factors, so nothing left for us to do.
+
+ IRBuilder<> Builder(I);
+ Value *V = buildMinimalMultiplyDAG(Builder, Factors);
+ if (Ops.empty())
+ return V;
+
+ ValueEntry NewEntry = ValueEntry(getRank(V), V);
+ Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
+ return 0;
+}
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Now that we have the linearized expression tree, try to optimize it.
+ // Start by folding any constants that we found.
+ Constant *Cst = 0;
+ unsigned Opcode = I->getOpcode();
+ while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
+ Constant *C = cast<Constant>(Ops.pop_back_val().Op);
+ Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
+ }
+ // If there was nothing but constants then we are done.
+ if (Ops.empty())
+ return Cst;
+
+ // Put the combined constant back at the end of the operand list, except if
+ // there is no point. For example, an add of 0 gets dropped here, while a
+ // multiplication by zero turns the whole expression into zero.
+ if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
+ if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
+ return Cst;
+ Ops.push_back(ValueEntry(0, Cst));
+ }
+
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ // Handle destructive annihilation due to identities between elements in the
+ // argument list here.
+ unsigned NumOps = Ops.size();
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ case Instruction::Or:
+ if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+ return Result;
+ break;
+
+ case Instruction::Xor:
+ if (Value *Result = OptimizeXor(I, Ops))
+ return Result;
+ break;
+
+ case Instruction::Add:
+ if (Value *Result = OptimizeAdd(I, Ops))
+ return Result;
+ break;
+
+ case Instruction::Mul:
+ if (Value *Result = OptimizeMul(I, Ops))
+ return Result;
+ break;
+ }
+
+ if (Ops.size() != NumOps)
+ return OptimizeExpression(I, Ops);
+ return 0;
+}
+
+/// EraseInst - Zap the given instruction, adding interesting operands to the
+/// work list.
+void Reassociate::EraseInst(Instruction *I) {
+ assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
+ SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ // Erase the dead instruction.
+ ValueRankMap.erase(I);
+ RedoInsts.remove(I);
+ I->eraseFromParent();
+ // Optimize its operands.
+ SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
+ // If this is a node in an expression tree, climb to the expression root
+ // and add that since that's where optimization actually happens.
+ unsigned Opcode = Op->getOpcode();
+ while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
+ Visited.insert(Op))
+ Op = Op->use_back();
+ RedoInsts.insert(Op);
+ }
+}
+
+/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
+/// instructions is not allowed.
+void Reassociate::OptimizeInst(Instruction *I) {
+ // Only consider operations that we understand.
+ if (!isa<BinaryOperator>(I))
+ return;
+
+ if (I->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(I->getOperand(1)))
+ // If an operand of this shift is a reassociable multiply, or if the shift
+ // is used by a reassociable multiply or add, turn into a multiply.
+ if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
+ (I->hasOneUse() &&
+ (isReassociableOp(I->use_back(), Instruction::Mul) ||
+ isReassociableOp(I->use_back(), Instruction::Add)))) {
+ Instruction *NI = ConvertShiftToMul(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ }
+
+ // Floating point binary operators are not associative, but we can still
+ // commute (some) of them, to canonicalize the order of their operands.
+ // This can potentially expose more CSE opportunities, and makes writing
+ // other transformations simpler.
+ if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) {
+ // FAdd and FMul can be commuted.
+ if (I->getOpcode() != Instruction::FMul &&
+ I->getOpcode() != Instruction::FAdd)
+ return;
+
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ unsigned LHSRank = getRank(LHS);
+ unsigned RHSRank = getRank(RHS);
+
+ // Sort the operands by rank.
+ if (RHSRank < LHSRank) {
+ I->setOperand(0, RHS);
+ I->setOperand(1, LHS);
+ }
+
+ return;
+ }
+
+ // Do not reassociate boolean (i1) expressions. We want to preserve the
+ // original order of evaluation for short-circuited comparisons that
+ // SimplifyCFG has folded to AND/OR expressions. If the expression
+ // is not further optimized, it is likely to be transformed back to a
+ // short-circuited form for code gen, and the source order may have been
+ // optimized for the most likely conditions.
+ if (I->getType()->isIntegerTy(1))
+ return;
+
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (I->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(I)) {
+ Instruction *NI = BreakUpSubtract(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ } else if (BinaryOperator::isNeg(I)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
+ (!I->hasOneUse() ||
+ !isReassociableOp(I->use_back(), Instruction::Mul))) {
+ Instruction *NI = LowerNegateToMultiply(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ }
+ }
+ }
+
+ // If this instruction is an associative binary operator, process it.
+ if (!I->isAssociative()) return;
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ unsigned Opcode = BO->getOpcode();
+ if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode)
+ return;
+
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
+ cast<Instruction>(BO->use_back())->getOpcode() == Instruction::Sub)
+ return;
+
+ ReassociateExpression(BO);
+}
+
+void Reassociate::ReassociateExpression(BinaryOperator *I) {
+
+ // First, walk the expression tree, linearizing the tree, collecting the
+ // operand information.
+ SmallVector<RepeatedValue, 8> Tree;
+ MadeChange |= LinearizeExprTree(I, Tree);
+ SmallVector<ValueEntry, 8> Ops;
+ Ops.reserve(Tree.size());
+ for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
+ RepeatedValue E = Tree[i];
+ Ops.append(E.second.getZExtValue(),
+ ValueEntry(getRank(E.first), E.first));
+ }
+
+ DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ // Now that we have linearized the tree to a list and have gathered all of
+ // the operands and their ranks, sort the operands by their rank. Use a
+ // stable_sort so that values with equal ranks will have their relative
+ // positions maintained (and so the compiler is deterministic). Note that
+ // this sorts so that the highest ranking values end up at the beginning of
+ // the vector.
+ std::stable_sort(Ops.begin(), Ops.end());
+
+ // OptimizeExpression - Now that we have the expression tree in a convenient
+ // sorted form, optimize it globally if possible.
+ if (Value *V = OptimizeExpression(I, Ops)) {
+ if (V == I)
+ // Self-referential expression in unreachable code.
+ return;
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
+ I->replaceAllUsesWith(V);
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ VI->setDebugLoc(I->getDebugLoc());
+ RedoInsts.insert(I);
+ ++NumAnnihil;
+ return;
+ }
+
+ // We want to sink immediates as deeply as possible except in the case where
+ // this is a multiply tree used only by an add, and the immediate is a -1.
+ // In this case we reassociate to put the negation on the outside so that we
+ // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+ if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Ops.back().Op) &&
+ cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
+ }
+
+ DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ if (Ops.size() == 1) {
+ if (Ops[0].Op == I)
+ // Self-referential expression in unreachable code.
+ return;
+
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ I->replaceAllUsesWith(Ops[0].Op);
+ if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+ OI->setDebugLoc(I->getDebugLoc());
+ RedoInsts.insert(I);
+ return;
+ }
+
+ // Now that we ordered and optimized the expressions, splat them back into
+ // the expression tree, removing any unneeded nodes.
+ RewriteExprTree(I, Ops);
+}
+
+bool Reassociate::runOnFunction(Function &F) {
+ // Calculate the rank map for F
+ BuildRankMap(F);
+
+ MadeChange = false;
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Optimize every instruction in the basic block.
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+ if (isInstructionTriviallyDead(II)) {
+ EraseInst(II++);
+ } else {
+ OptimizeInst(II);
+ assert(II->getParent() == BI && "Moved to a different block!");
+ ++II;
+ }
+
+ // If this produced extra instructions to optimize, handle them now.
+ while (!RedoInsts.empty()) {
+ Instruction *I = RedoInsts.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ EraseInst(I);
+ else
+ OptimizeInst(I);
+ }
+ }
+
+ // We are done with the rank map.
+ RankMap.clear();
+ ValueRankMap.clear();
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 000000000000..07f540a30127
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,133 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references. It is intended to be
+// the inverse of PromoteMemoryToRegister. By converting to loads, the only
+// values live across basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+ struct RegToMem : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMem() : FunctionPass(ID) {
+ initializeRegToMemPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool valueEscapes(const Instruction *Inst) const {
+ const BasicBlock *BB = Inst->getParent();
+ for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
+ UI != E; ++UI) {
+ const Instruction *I = cast<Instruction>(*UI);
+ if (I->getParent() != BB || isa<PHINode>(I))
+ return true;
+ }
+ return false;
+ }
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char RegToMem::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+
+bool RegToMem::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ // Insert all new allocas into entry block.
+ BasicBlock *BBEntry = &F.getEntryBlock();
+ assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+ "Entry block to function must not have predecessors!");
+
+ // Find first non-alloca instruction and create insertion point. This is
+ // safe if block is well-formed: it always have terminator, otherwise
+ // we'll get and assertion.
+ BasicBlock::iterator I = BBEntry->begin();
+ while (isa<AllocaInst>(I)) ++I;
+
+ CastInst *AllocaInsertionPoint =
+ new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()),
+ "reg2mem alloca point", I);
+
+ // Find the escaped instructions. But don't create stack slots for
+ // allocas in entry block.
+ std::list<Instruction*> WorkList;
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib) {
+ if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+ valueEscapes(iib)) {
+ WorkList.push_front(&*iib);
+ }
+ }
+
+ // Demote escaped instructions
+ NumRegsDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+
+ WorkList.clear();
+
+ // Find all phi's
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib)
+ if (isa<PHINode>(iib))
+ WorkList.push_front(&*iib);
+
+ // Demote phi nodes
+ NumPhisDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+
+ return true;
+}
+
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+ return new RegToMem();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 000000000000..e30a2746b01e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,1936 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+// * Assumes values are constant unless proven otherwise
+// * Assumes BasicBlocks are dead unless proven otherwise
+// * Proves values to be constant, and replaces them with constants
+// * Proves conditional branches to be unconditional
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy. It is a simple class with value semantics.
+///
+class LatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This LLVM Value has no known value yet.
+ undefined,
+
+ /// constant - This LLVM Value has a specific constant value.
+ constant,
+
+ /// forcedconstant - This LLVM Value was thought to be undef until
+ /// ResolvedUndefsIn. This is treated just like 'constant', but if merged
+ /// with another (different) constant, it goes to overdefined, instead of
+ /// asserting.
+ forcedconstant,
+
+ /// overdefined - This instruction is not known to be constant, and we know
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'forcedconstant' value.
+ PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+
+ LatticeValueTy getLatticeValue() const {
+ return Val.getInt();
+ }
+
+public:
+ LatticeVal() : Val(0, undefined) {}
+
+ bool isUndefined() const { return getLatticeValue() == undefined; }
+ bool isConstant() const {
+ return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+ }
+ bool isOverdefined() const { return getLatticeValue() == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val.getPointer();
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+
+ Val.setInt(overdefined);
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+ assert(getConstant() == V && "Marking constant with different value");
+ return false;
+ }
+
+ if (isUndefined()) {
+ Val.setInt(constant);
+ assert(V && "Marking constant with NULL");
+ Val.setPointer(V);
+ } else {
+ assert(getLatticeValue() == forcedconstant &&
+ "Cannot move from overdefined to constant!");
+ // Stay at forcedconstant if the constant is the same.
+ if (V == getConstant()) return false;
+
+ // Otherwise, we go to overdefined. Assumptions made based on the
+ // forced value are possibly wrong. Assuming this is another constant
+ // could expose a contradiction.
+ Val.setInt(overdefined);
+ }
+ return true;
+ }
+
+ /// getConstantInt - If this is a constant with a ConstantInt value, return it
+ /// otherwise return null.
+ ConstantInt *getConstantInt() const {
+ if (isConstant())
+ return dyn_cast<ConstantInt>(getConstant());
+ return 0;
+ }
+
+ void markForcedConstant(Constant *V) {
+ assert(isUndefined() && "Can't force a defined value!");
+ Val.setInt(forcedconstant);
+ Val.setPointer(V);
+ }
+};
+} // end anonymous namespace.
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
+ DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
+
+ /// StructValueState - This maintains ValueState for values that have
+ /// StructType, for example for formal arguments, calls, insertelement, etc.
+ ///
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+
+ /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+ /// represented here for efficient lookup.
+ SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+ /// TrackingIncomingArguments - This is the set of functions for whose
+ /// arguments we make optimistic assumptions about and try to prove as
+ /// constants.
+ SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+
+ /// The reason for two worklists is that overdefined is the lowest state
+ /// on the lattice, and moving things to overdefined as fast as possible
+ /// makes SCCP converge much faster.
+ ///
+ /// By having a separate worklist, we accomplish this because everything
+ /// possibly overdefined will become overdefined at the soonest possible
+ /// point.
+ SmallVector<Value*, 64> OverdefinedInstWorkList;
+ SmallVector<Value*, 64> InstWorkList;
+
+
+ SmallVector<BasicBlock*, 64> BBWorkList; // The BasicBlock work list
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+ DenseSet<Edge> KnownFeasibleEdges;
+public:
+ SCCPSolver(const DataLayout *td, const TargetLibraryInfo *tli)
+ : TD(td), TLI(tli) {}
+
+ /// MarkBlockExecutable - This method can be used by clients to mark all of
+ /// the blocks that are known to be intrinsically live in the processed unit.
+ ///
+ /// This returns true if the block was not considered live before.
+ bool MarkBlockExecutable(BasicBlock *BB) {
+ if (!BBExecutable.insert(BB)) return false;
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ return true;
+ }
+
+ /// TrackValueOfGlobalVariable - Clients can use this method to
+ /// inform the SCCPSolver that it should track loads and stores to the
+ /// specified global variable if it can. This is only legal to call if
+ /// performing Interprocedural SCCP.
+ void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+ // We only track the contents of scalar globals.
+ if (GV->getType()->getElementType()->isSingleValueType()) {
+ LatticeVal &IV = TrackedGlobals[GV];
+ if (!isa<UndefValue>(GV->getInitializer()))
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+ /// and out of the specified function (which cannot have its address taken),
+ /// this method must be called.
+ void AddTrackedFunction(Function *F) {
+ // Add an entry, F -> undef.
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ MRVFunctionsTracked.insert(F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+ LatticeVal()));
+ } else
+ TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+ }
+
+ void AddArgumentTrackedFunction(Function *F) {
+ TrackingIncomingArguments.insert(F);
+ }
+
+ /// Solve - Solve for constants and executable blocks.
+ ///
+ void Solve();
+
+ /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+ /// that branches on undef values cannot reach any of their successors.
+ /// However, this is not a safe assumption. After we solve dataflow, this
+ /// method should be use to handle this. If this returns true, the solver
+ /// should be rerun.
+ bool ResolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ LatticeVal getLatticeValueFor(Value *V) const {
+ DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+ assert(I != ValueState.end() && "V is not in valuemap!");
+ return I->second;
+ }
+
+ /// getTrackedRetVals - Get the inferred return value map.
+ ///
+ const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ /// getTrackedGlobals - Get and return the set of inferred initializers for
+ /// global variables.
+ const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ void markOverdefined(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markOverdefined(ValueState[V], V);
+ }
+
+ /// markAnythingOverdefined - Mark the specified value overdefined. This
+ /// works with both scalars and structs.
+ void markAnythingOverdefined(Value *V) {
+ if (StructType *STy = dyn_cast<StructType>(V->getType()))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ markOverdefined(getStructValueState(V, i), V);
+ else
+ markOverdefined(V);
+ }
+
+private:
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ //
+ void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+ if (!IV.markConstant(C)) return;
+ DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+ void markConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markConstant(ValueState[V], V, C);
+ }
+
+ void markForcedConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ LatticeVal &IV = ValueState[V];
+ IV.markForcedConstant(C);
+ DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ void markOverdefined(LatticeVal &IV, Value *V) {
+ if (!IV.markOverdefined()) return;
+
+ DEBUG(dbgs() << "markOverdefined: ";
+ if (Function *F = dyn_cast<Function>(V))
+ dbgs() << "Function '" << F->getName() << "'\n";
+ else
+ dbgs() << *V << '\n');
+ // Only instructions go on the work list
+ OverdefinedInstWorkList.push_back(V);
+ }
+
+ void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
+ if (IV.isOverdefined() || MergeWithV.isUndefined())
+ return; // Noop.
+ if (MergeWithV.isOverdefined())
+ markOverdefined(IV, V);
+ else if (IV.isUndefined())
+ markConstant(IV, V, MergeWithV.getConstant());
+ else if (IV.getConstant() != MergeWithV.getConstant())
+ markOverdefined(IV, V);
+ }
+
+ void mergeInValue(Value *V, LatticeVal MergeWithV) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ mergeInValue(ValueState[V], V, MergeWithV);
+ }
+
+
+ /// getValueState - Return the LatticeVal object that corresponds to the
+ /// value. This function handles the case when the value hasn't been seen yet
+ /// by properly seeding constants etc.
+ LatticeVal &getValueState(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+ std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+ ValueState.insert(std::make_pair(V, LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Undef values remain undefined.
+ if (!isa<UndefValue>(V))
+ LV.markConstant(C); // Constants are constant
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+ /// getStructValueState - Return the LatticeVal object that corresponds to the
+ /// value/field pair. This function handles the case when the value hasn't
+ /// been seen yet by properly seeding constants etc.
+ LatticeVal &getStructValueState(Value *V, unsigned i) {
+ assert(V->getType()->isStructTy() && "Should use getValueState");
+ assert(i < cast<StructType>(V->getType())->getNumElements() &&
+ "Invalid element #");
+
+ std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+ bool> I = StructValueState.insert(
+ std::make_pair(std::make_pair(V, i), LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ Constant *Elt = C->getAggregateElement(i);
+
+ if (Elt == 0)
+ LV.markOverdefined(); // Unknown sort of constant.
+ else if (isa<UndefValue>(Elt))
+ ; // Undef values remain undefined.
+ else
+ LV.markConstant(Elt); // Constants are constant.
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+
+ /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ /// work list if it is not already executable.
+ void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ if (!MarkBlockExecutable(Dest)) {
+ // If the destination is already executable, we just made an *edge*
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ visitPHINode(*PN);
+ }
+ }
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ //
+ void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+ // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+ // block to the 'To' basic block is currently feasible.
+ //
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow. Based on this
+ // information, we need to update the specified user of this instruction.
+ //
+ void OperandChangedState(Instruction *I) {
+ if (BBExecutable.count(I->getParent())) // Inst is executable?
+ visit(*I);
+ }
+
+private:
+ friend class InstVisitor<SCCPSolver>;
+
+ // visit implementations - Something changed in this instruction. Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminatorInst(TerminatorInst &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+
+ // Instructions that cannot be folded away.
+ void visitStoreInst (StoreInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitCallInst (CallInst &I) {
+ visitCallSite(&I);
+ }
+ void visitInvokeInst (InvokeInst &II) {
+ visitCallSite(&II);
+ visitTerminatorInst(II);
+ }
+ void visitCallSite (CallSite CS);
+ void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
+ void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
+ void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitFenceInst (FenceInst &I) { /*returns void*/ }
+ void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
+ void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
+
+ void visitInstruction(Instruction &I) {
+ // If a new instruction is added to LLVM that we don't handle.
+ dbgs() << "SCCP: Don't know how to handle: " << I;
+ markAnythingOverdefined(&I); // Just in case
+ }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVector<bool, 16> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ if (!BCValue.isUndefined())
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way.
+ Succs[CI->isZero()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (!SI->getNumCases()) {
+ Succs[0] = true;
+ return;
+ }
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0) { // Overdefined or undefined condition?
+ // All destinations are executable!
+ if (!SCValue.isUndefined())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
+ return;
+ }
+
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(&TI)) {
+ // Just mark all destinations executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+ assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+ // Make sure the source basic block is executable!!
+ if (!BBExecutable.count(From)) return false;
+
+ // Check to make sure this edge itself is actually feasible now.
+ TerminatorInst *TI = From->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional())
+ return true;
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+
+ // Overdefined condition variables mean the branch could go either way,
+ // undef conditions mean that neither edge is feasible yet.
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0)
+ return !BCValue.isUndefined();
+
+ // Constant condition variables mean the branch can only go a single way.
+ return BI->getSuccessor(CI->isZero()) == To;
+ }
+
+ // Invoke instructions successors are always executable.
+ if (isa<InvokeInst>(TI))
+ return true;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumCases() < 1)
+ return true;
+
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0)
+ return !SCValue.isUndefined();
+
+ return SI->findCaseValue(CI).getCaseSuccessor() == To;
+ }
+
+ // Just mark all destinations executable!
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(TI))
+ return true;
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+ llvm_unreachable(0);
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+ // If this PN returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (PN.getType()->isStructTy())
+ return markAnythingOverdefined(&PN);
+
+ if (getValueState(&PN).isOverdefined())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64)
+ return markOverdefined(&PN);
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is overdefined.
+ // If there are no executable operands, the PHI remains undefined.
+ //
+ Constant *OperandVal = 0;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LatticeVal IV = getValueState(PN.getIncomingValue(i));
+ if (IV.isUndefined()) continue; // Doesn't influence PHI node.
+
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+
+ if (IV.isOverdefined()) // PHI node becomes overdefined!
+ return markOverdefined(&PN);
+
+ if (OperandVal == 0) { // Grab the first value.
+ OperandVal = IV.getConstant();
+ continue;
+ }
+
+ // There is already a reachable operand. If we conflict with it,
+ // then the PHI node becomes overdefined. If we agree with it, we
+ // can continue on.
+
+ // Check to see if there are two different constants merging, if so, the PHI
+ // node is overdefined.
+ if (IV.getConstant() != OperandVal)
+ return markOverdefined(&PN);
+ }
+
+ // If we exited the loop, this means that the PHI node only has constant
+ // arguments that agree with each other(and OperandVal is the constant) or
+ // OperandVal is null because there are no defined incoming arguments. If
+ // this is the case, the PHI remains undefined.
+ //
+ if (OperandVal)
+ markConstant(&PN, OperandVal); // Acquire operand value
+}
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0) return; // ret void
+
+ Function *F = I.getParent()->getParent();
+ Value *ResultOp = I.getOperand(0);
+
+ // If we are tracking the return value of this function, merge it in.
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty()) {
+ if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (MRVFunctionsTracked.count(F))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+ getStructValueState(ResultOp, i));
+
+ }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable.
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+ LatticeVal OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
+ markOverdefined(&I);
+ else if (OpSt.isConstant()) // Propagate constant value
+ markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
+ OpSt.getConstant(), I.getType()));
+}
+
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+ // If this returns a struct, mark all elements over defined, we don't track
+ // structs in structs.
+ if (EVI.getType()->isStructTy())
+ return markAnythingOverdefined(&EVI);
+
+ // If this is extracting from more than one level of struct, we don't know.
+ if (EVI.getNumIndices() != 1)
+ return markOverdefined(&EVI);
+
+ Value *AggVal = EVI.getAggregateOperand();
+ if (AggVal->getType()->isStructTy()) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+ StructType *STy = dyn_cast<StructType>(IVI.getType());
+ if (STy == 0)
+ return markOverdefined(&IVI);
+
+ // If this has more than one index, we can't handle it, drive all results to
+ // undef.
+ if (IVI.getNumIndices() != 1)
+ return markAnythingOverdefined(&IVI);
+
+ Value *Aggr = IVI.getAggregateOperand();
+ unsigned Idx = *IVI.idx_begin();
+
+ // Compute the result based on what we're inserting.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // This passes through all values that aren't the inserted element.
+ if (i != Idx) {
+ LatticeVal EltVal = getStructValueState(Aggr, i);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+ continue;
+ }
+
+ Value *Val = IVI.getInsertedValueOperand();
+ if (Val->getType()->isStructTy())
+ // We don't track structs in structs.
+ markOverdefined(getStructValueState(&IVI, i), &IVI);
+ else {
+ LatticeVal InVal = getValueState(Val);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+ }
+ }
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+ // If this select returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal CondValue = getValueState(I.getCondition());
+ if (CondValue.isUndefined())
+ return;
+
+ if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+ Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+ mergeInValue(&I, getValueState(OpVal));
+ return;
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ LatticeVal TVal = getValueState(I.getTrueValue());
+ LatticeVal FVal = getValueState(I.getFalseValue());
+
+ // select ?, C, C -> C.
+ if (TVal.isConstant() && FVal.isConstant() &&
+ TVal.getConstant() == FVal.getConstant())
+ return markConstant(&I, FVal.getConstant());
+
+ if (TVal.isUndefined()) // select ?, undef, X -> X.
+ return mergeInValue(&I, FVal);
+ if (FVal.isUndefined()) // select ?, X, undef -> X.
+ return mergeInValue(&I, TVal);
+ markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I,
+ ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If something is undef, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ // Otherwise, one of our operands is overdefined. Try to produce something
+ // better than overdefined with some tricks.
+
+ // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+ // operand is overdefined.
+ if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+ LatticeVal *NonOverdefVal = 0;
+ if (!V1State.isOverdefined())
+ NonOverdefVal = &V1State;
+ else if (!V2State.isOverdefined())
+ NonOverdefVal = &V2State;
+
+ if (NonOverdefVal) {
+ if (NonOverdefVal->isUndefined()) {
+ // Could annihilate value.
+ if (I.getOpcode() == Instruction::And)
+ markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ else if (VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ markConstant(IV, &I, Constant::getAllOnesValue(PT));
+ else
+ markConstant(IV, &I,
+ Constant::getAllOnesValue(I.getType()));
+ return;
+ }
+
+ if (I.getOpcode() == Instruction::And) {
+ // X and 0 = 0
+ if (NonOverdefVal->getConstant()->isNullValue())
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ } else {
+ if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+ if (CI->isAllOnesValue()) // X or -1 = -1
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ }
+ }
+ }
+
+
+ markOverdefined(&I);
+}
+
+// Handle ICmpInst instruction.
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+ V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If operands are still undefined, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ markOverdefined(&I);
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+ if (ValState.isOverdefined() || IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &EltState = getValueState(I.getOperand(1));
+ LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+ if (ValState.isOverdefined() || EltState.isOverdefined() ||
+ IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+ else if (ValState.isUndefined() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+ LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+ if (MaskState.isUndefined() ||
+ (V1State.isUndefined() && V2State.isUndefined()))
+ return; // Undefined output if mask or both inputs undefined.
+
+ if (V1State.isOverdefined() || V2State.isOverdefined() ||
+ MaskState.isOverdefined()) {
+ markOverdefined(&I);
+ } else {
+ // A mix of constant/undef inputs.
+ Constant *V1 = V1State.isConstant() ?
+ V1State.getConstant() : UndefValue::get(I.getType());
+ Constant *V2 = V2State.isConstant() ?
+ V2State.getConstant() : UndefValue::get(I.getType());
+ Constant *Mask = MaskState.isConstant() ?
+ MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+ markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+ }
+#endif
+}
+
+// Handle getelementptr instructions. If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (ValueState[&I].isOverdefined()) return;
+
+ SmallVector<Constant*, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ LatticeVal State = getValueState(I.getOperand(i));
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+
+ if (State.isOverdefined())
+ return markOverdefined(&I);
+
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ Constant *Ptr = Operands[0];
+ ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end());
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+}
+
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+ // If this store is of a struct, ignore it.
+ if (SI.getOperand(0)->getType()->isStructTy())
+ return;
+
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+ // Get the value we are storing into the global, then merge it.
+ mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+ // If this load is of a struct, just mark the result overdefined.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (!PtrVal.isConstant() || I.isVolatile())
+ return markOverdefined(IV, &I);
+
+ Constant *Ptr = PtrVal.getConstant();
+
+ // load null -> null
+ if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+ return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+
+ // Transform load (constant global) into the value loaded.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+ TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second);
+ return;
+ }
+ }
+ }
+
+ // Transform load from a constant into a constant if possible.
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+ return markConstant(IV, &I, C);
+
+ // Otherwise we cannot say for certain what value this load will produce.
+ // Bail out.
+ markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ Instruction *I = CS.getInstruction();
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (F == 0 || F->isDeclaration()) {
+CallOverdefined:
+ // Void return and not tracking callee, just bail.
+ if (I->getType()->isVoidTy()) return;
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
+ canConstantFoldCallTo(F)) {
+
+ SmallVector<Constant*, 8> Operands;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+ AI != E; ++AI) {
+ LatticeVal State = getValueState(*AI);
+
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+ if (State.isOverdefined())
+ return markOverdefined(I);
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+ return markConstant(I, C);
+ }
+
+ // Otherwise, we don't know anything about this call, mark it overdefined.
+ return markAnythingOverdefined(I);
+ }
+
+ // If this is a local function that doesn't have its address taken, mark its
+ // entry block executable and merge in the actual arguments to the call into
+ // the formal arguments of the function.
+ if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+ MarkBlockExecutable(F->begin());
+
+ // Propagate information from this call site into the callee.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++CAI) {
+ // If this argument is byval, and if the function is not readonly, there
+ // will be an implicit copy formed of the input aggregate.
+ if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+ markOverdefined(AI);
+ continue;
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal CallArg = getStructValueState(*CAI, i);
+ mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ }
+ } else {
+ mergeInValue(AI, getValueState(*CAI));
+ }
+ }
+ }
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (!MRVFunctionsTracked.count(F))
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If we are tracking this callee, propagate the result of the function
+ // into this call site.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(getStructValueState(I, i), I,
+ TrackedMultipleRetVals[std::make_pair(F, i)]);
+ } else {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ if (TFRVI == TrackedRetVals.end())
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(I, TFRVI->second);
+ }
+}
+
+void SCCPSolver::Solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the overdefined instruction's work list first, which drives other
+ // things to overdefined more quickly.
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant, or to overdefined.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value.
+ //
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+ // "I" got into the work list because it made the transition from undef to
+ // constant.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value.
+ //
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption. After we solve dataflow, this
+/// method should be use to handle this. If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible). This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined. For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for instructions which produce undef values.
+ if (I->getType()->isVoidTy()) continue;
+
+ if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
+ if (CallSite CS = CallSite(I))
+ if (Function *F = CS.getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
+ }
+ continue;
+ }
+
+ LatticeVal &LV = getValueState(I);
+ if (!LV.isUndefined()) continue;
+
+ // extractvalue is safe; check here because the argument is a struct.
+ if (isa<ExtractValueInst>(I))
+ continue;
+
+ // Compute the operand LatticeVals, for convenience below.
+ // Anything taking a struct is conservatively assumed to require
+ // overdefined markings.
+ if (I->getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
+ LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op1LV;
+ if (I->getNumOperands() == 2) {
+ if (I->getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
+
+ Op1LV = getValueState(I->getOperand(1));
+ }
+ // If this is an instructions whose result is defined even if the input is
+ // not fully defined, propagate the information.
+ Type *ITy = I->getType();
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ break; // Any undef -> undef
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // Floating-point binary operation: be conservative.
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ // undef -> 0; some outputs are impossible
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Mul:
+ case Instruction::And:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
+ // undef * X -> 0. X could be zero.
+ // undef & X -> 0. X could be zero.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::Or:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
+ // undef | X -> -1. X could be -1.
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ return true;
+
+ case Instruction::Xor:
+ // undef ^ undef -> 0; strictly speaking, this is not strictly
+ // necessary, but we try to be nice to people who expect this
+ // behavior in simple cases
+ if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ }
+ // undef ^ X -> undef
+ break;
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // X / undef -> undef. No change.
+ // X % undef -> undef. No change.
+ if (Op1LV.isUndefined()) break;
+
+ // undef / X -> 0. X could be maxint.
+ // undef % X -> 0. X could be 1.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::AShr:
+ // X >>a undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef >>a X -> all ones
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ return true;
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // X << undef -> undef.
+ // X >> undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef << X -> 0
+ // undef >> X -> 0
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Select:
+ Op1LV = getValueState(I->getOperand(1));
+ // undef ? X : Y -> X or Y. There could be commonality between X/Y.
+ if (Op0LV.isUndefined()) {
+ if (!Op1LV.isConstant()) // Pick the constant one if there is any.
+ Op1LV = getValueState(I->getOperand(2));
+ } else if (Op1LV.isUndefined()) {
+ // c ? undef : undef -> undef. No change.
+ Op1LV = getValueState(I->getOperand(2));
+ if (Op1LV.isUndefined())
+ break;
+ // Otherwise, c ? undef : x -> x.
+ } else {
+ // Leave Op1LV as Operand(1)'s LatticeValue.
+ }
+
+ if (Op1LV.isConstant())
+ markForcedConstant(I, Op1LV.getConstant());
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::Load:
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ break;
+ case Instruction::ICmp:
+ // X == undef -> undef. Other comparisons get more complicated.
+ if (cast<ICmpInst>(I)->isEquality())
+ break;
+ markOverdefined(I);
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in ResolvedUndefsIn.
+ if (Function *F = CallSite(I).getCalledFunction())
+ if (TrackedRetVals.count(F))
+ break;
+
+ // If the call is constant-foldable, we mark it overdefined because
+ // we do not know what return values are valid.
+ markOverdefined(I);
+ return true;
+ }
+ default:
+ // If we don't know what should happen here, conservatively mark it
+ // overdefined.
+ markOverdefined(I);
+ return true;
+ }
+ }
+
+ // Check to see if we have a branch or switch on an undefined value. If so
+ // we force the branch to go one way or the other to make the successor
+ // values live. It doesn't really matter which way we force it.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional()) continue;
+ if (!getValueState(BI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // false.
+ if (isa<UndefValue>(BI->getCondition())) {
+ BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to false.
+ markForcedConstant(BI->getCondition(),
+ ConstantInt::getFalse(TI->getContext()));
+ return true;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (!SI->getNumCases())
+ continue;
+ if (!getValueState(SI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually switch on undef, fix the undef to
+ // the first constant.
+ if (isa<UndefValue>(SI->getCondition())) {
+ SI->setCondition(SI->case_begin().getCaseValue());
+ markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
+ return true;
+ }
+
+ markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+ /// Sparse Conditional Constant Propagator.
+ ///
+ struct SCCP : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ static char ID; // Pass identification, replacement for typeid
+ SCCP() : FunctionPass(ID) {
+ initializeSCCPPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - Run the Sparse Conditional Constant Propagation
+ // algorithm, and return true if the function was modified.
+ //
+ bool runOnFunction(Function &F);
+ };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false)
+
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() {
+ return new SCCP();
+}
+
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+ DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
+ ++NumDeadBlocks;
+
+ // Check to see if there are non-terminating instructions to delete.
+ if (isa<TerminatorInst>(BB->begin()))
+ return;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of having
+ // to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ BB->getInstList().erase(Inst);
+ ++NumInstRemoved;
+ }
+}
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
+
+ // Mark the first block of the function as being executable.
+ Solver.MarkBlockExecutable(F.begin());
+
+ // Mark all arguments to the function as being overdefined.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+ Solver.markAnythingOverdefined(AI);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+ DEBUG(dbgs() << "RESOLVING UNDEFs\n");
+ ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+ }
+
+ bool MadeChanges = false;
+
+ // If we decided that there are basic blocks that are dead in this function,
+ // delete their contents now. Note that we cannot actually delete the blocks,
+ // as we cannot modify the CFG of the function.
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+ continue;
+ }
+
+ // Iterate over all of the instructions in a function, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+ continue;
+
+ // TODO: Reconstruct structs from their elements.
+ if (Inst->getType()->isStructTy())
+ continue;
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
+ }
+
+ return MadeChanges;
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+ /// Constant Propagation.
+ ///
+ struct IPSCCP : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ static char ID;
+ IPSCCP() : ModulePass(ID) {
+ initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() {
+ return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(const GlobalValue *GV) {
+ // Delete any dead constantexpr klingons.
+ GV->removeDeadConstantUsers();
+
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == GV || SI->isVolatile())
+ return true; // Storing addr of GV.
+ } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+ // Make sure we are calling the function, not passing the address.
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return true;
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return true;
+ } else if (isa<BlockAddress>(U)) {
+ // blockaddress doesn't take the address of the function, it takes addr
+ // of label.
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
+
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
+ // Loop over all functions, marking arguments to those with their addresses
+ // taken or that are external as overdefined.
+ //
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration())
+ continue;
+
+ // If this is a strong or ODR definition of this function, then we can
+ // propagate information about its result into callsites of it.
+ if (!F->mayBeOverridden())
+ Solver.AddTrackedFunction(F);
+
+ // If this function only has direct calls that we can see, we can track its
+ // arguments and return value aggressively, and can assume it is not called
+ // unless we see evidence to the contrary.
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
+ }
+
+ // Assume the function is called.
+ Solver.MarkBlockExecutable(F->begin());
+
+ // Assume nothing about the incoming arguments.
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI)
+ Solver.markAnythingOverdefined(AI);
+ }
+
+ // Loop over global variables. We inform the solver about any internal global
+ // variables that do not have their 'addresses taken'. If they don't have
+ // their addresses taken, we can propagate constants through them.
+ for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+ G != E; ++G)
+ if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+ Solver.TrackValueOfGlobalVariable(G);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+
+ DEBUG(dbgs() << "RESOLVING UNDEFS\n");
+ ResolvedUndefs = false;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+ }
+
+ bool MadeChanges = false;
+
+ // Iterate over all of the instructions in the module, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ SmallVector<BasicBlock*, 512> BlocksToErase;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (Solver.isBlockExecutable(F->begin())) {
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ if (AI->use_empty() || AI->getType()->isStructTy()) continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(AI);
+ if (IV.isOverdefined()) continue;
+
+ Constant *CST = IV.isConstant() ?
+ IV.getConstant() : UndefValue::get(AI->getType());
+ DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n");
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ AI->replaceAllUsesWith(CST);
+ ++IPNumArgsElimed;
+ }
+ }
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
+
+ if (&*BB != &F->front())
+ BlocksToErase.push_back(BB);
+ else
+ new UnreachableInst(M.getContext(), BB);
+ continue;
+ }
+
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+ continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
+ }
+
+ // Now that all instructions in the function are constant folded, erase dead
+ // blocks, because we can now use ConstantFoldTerminator to get rid of
+ // in-edges.
+ for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+ // If there are any PHI nodes in this successor, drop entries for BB now.
+ BasicBlock *DeadBB = BlocksToErase[i];
+ for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+ UI != UE; ) {
+ // Grab the user and then increment the iterator early, as the user
+ // will be deleted. Step past all adjacent uses from the same user.
+ Instruction *I = dyn_cast<Instruction>(*UI);
+ do { ++UI; } while (UI != UE && *UI == I);
+
+ // Ignore blockaddress users; BasicBlock's dtor will handle them.
+ if (!I) continue;
+
+ bool Folded = ConstantFoldTerminator(I->getParent());
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ llvm_unreachable("Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
+ }
+
+ // Finally, delete the basic block.
+ F->getBasicBlockList().erase(DeadBB);
+ }
+ BlocksToErase.clear();
+ }
+
+ // If we inferred constant or undef return values for a function, we replaced
+ // all call uses with the inferred value. This means we don't need to bother
+ // actually returning anything from the function. Replace all return
+ // instructions with return undef.
+ //
+ // Do this in two stages: first identify the functions we should process, then
+ // actually zap their returns. This is important because we can only do this
+ // if the address of the function isn't taken. In cases where a return is the
+ // last use of a function, the order of processing functions would affect
+ // whether other functions are optimizable.
+ SmallVector<ReturnInst*, 8> ReturnsToZap;
+
+ // TODO: Process multiple value ret instructions also.
+ const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+ E = RV.end(); I != E; ++I) {
+ Function *F = I->first;
+ if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+ continue;
+
+ // We can only do this if we know that nothing else can call the function.
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
+ continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (!isa<UndefValue>(RI->getOperand(0)))
+ ReturnsToZap.push_back(RI);
+ }
+
+ // Zap all returns which we've identified as zap to change.
+ for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+ Function *F = ReturnsToZap[i]->getParent()->getParent();
+ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+ }
+
+ // If we inferred constant or undef values for globals variables, we can
+ // delete the global and any stores that remain to it.
+ const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+ for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+ E = TG.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ assert(!I->second.isOverdefined() &&
+ "Overdefined values should have been taken out of the map!");
+ DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+ while (!GV->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(GV->use_back());
+ SI->eraseFromParent();
+ }
+ M.getGlobalList().erase(GV);
+ ++IPNumGlobalConst;
+ }
+
+ return MadeChanges;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
new file mode 100644
index 000000000000..f6bb365216ff
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -0,0 +1,3765 @@
+//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation implements the well known scalar replacement of
+/// aggregates transformation. It tries to identify promotable elements of an
+/// aggregate alloca, and promote them to registers. It will also try to
+/// convert uses of an element (or set of elements) of an alloca into a vector
+/// or bitfield-style integer scalar if appropriate.
+///
+/// It works to do this with minimal slicing of the alloca so that regions
+/// which are merely transferred in and out of external memory remain unchanged
+/// and are not decomposed to scalar code.
+///
+/// Because this also performs alloca promotion, it can be thought of as also
+/// serving the purpose of SSA formation. The algorithm iterates on the
+/// function until all opportunities for promotion have been realized.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sroa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
+
+/// Hidden option to force the pass to not use DomTree and mem2reg, instead
+/// forming SSA values through the SSAUpdater infrastructure.
+static cl::opt<bool>
+ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
+
+namespace {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
+public:
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
+
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
+
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
+
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
+
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
+
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
+
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
+
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
+
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
+
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
+ }
+ return false;
+ }
+
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+ /// \brief The use in question. Provides access to both user and used value.
+ ///
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
+
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
+ /// \brief Construct a partitioning of a particular alloca.
+ ///
+ /// Construction does most of the work for partitioning the alloca. This
+ /// performs the necessary walks of users and builds a partitioning from it.
+ AllocaPartitioning(const DataLayout &TD, AllocaInst &AI);
+
+ /// \brief Test whether a pointer to the allocation escapes our analysis.
+ ///
+ /// If this is true, the partitioning is never fully built and should be
+ /// ignored.
+ bool isEscaped() const { return PointerEscapingInstr; }
+
+ /// \brief Support for iterating over the partitions.
+ /// @{
+ typedef SmallVectorImpl<Partition>::iterator iterator;
+ iterator begin() { return Partitions.begin(); }
+ iterator end() { return Partitions.end(); }
+
+ typedef SmallVectorImpl<Partition>::const_iterator const_iterator;
+ const_iterator begin() const { return Partitions.begin(); }
+ const_iterator end() const { return Partitions.end(); }
+ /// @}
+
+ /// \brief Support for iterating over and manipulating a particular
+ /// partition's uses.
+ ///
+ /// The iteration support provided for uses is more limited, but also
+ /// includes some manipulation routines to support rewriting the uses of
+ /// partitions during SROA.
+ /// @{
+ typedef SmallVectorImpl<PartitionUse>::iterator use_iterator;
+ use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); }
+ use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
+ use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
+ use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
+
+ typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator;
+ const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); }
+ const_use_iterator use_begin(const_iterator I) const {
+ return Uses[I - begin()].begin();
+ }
+ const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); }
+ const_use_iterator use_end(const_iterator I) const {
+ return Uses[I - begin()].end();
+ }
+
+ unsigned use_size(unsigned Idx) const { return Uses[Idx].size(); }
+ unsigned use_size(const_iterator I) const { return Uses[I - begin()].size(); }
+ const PartitionUse &getUse(unsigned PIdx, unsigned UIdx) const {
+ return Uses[PIdx][UIdx];
+ }
+ const PartitionUse &getUse(const_iterator I, unsigned UIdx) const {
+ return Uses[I - begin()][UIdx];
+ }
+
+ void use_push_back(unsigned Idx, const PartitionUse &PU) {
+ Uses[Idx].push_back(PU);
+ }
+ void use_push_back(const_iterator I, const PartitionUse &PU) {
+ Uses[I - begin()].push_back(PU);
+ }
+ /// @}
+
+ /// \brief Allow iterating the dead users for this alloca.
+ ///
+ /// These are instructions which will never actually use the alloca as they
+ /// are outside the allocated range. They are safe to replace with undef and
+ /// delete.
+ /// @{
+ typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;
+ dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }
+ dead_user_iterator dead_user_end() const { return DeadUsers.end(); }
+ /// @}
+
+ /// \brief Allow iterating the dead expressions referring to this alloca.
+ ///
+ /// These are operands which have cannot actually be used to refer to the
+ /// alloca as they are outside its range and the user doesn't correct for
+ /// that. These mostly consist of PHI node inputs and the like which we just
+ /// need to replace with undef.
+ /// @{
+ typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator;
+ dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); }
+ dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
+ /// @}
+
+ /// \brief MemTransferInst auxiliary data.
+ /// This struct provides some auxiliary data about memory transfer
+ /// intrinsics such as memcpy and memmove. These intrinsics can use two
+ /// different ranges within the same alloca, and provide other challenges to
+ /// correctly represent. We stash extra data to help us untangle this
+ /// after the partitioning is complete.
+ struct MemTransferOffsets {
+ /// The destination begin and end offsets when the destination is within
+ /// this alloca. If the end offset is zero the destination is not within
+ /// this alloca.
+ uint64_t DestBegin, DestEnd;
+
+ /// The source begin and end offsets when the source is within this alloca.
+ /// If the end offset is zero, the source is not within this alloca.
+ uint64_t SourceBegin, SourceEnd;
+
+ /// Flag for whether an alloca is splittable.
+ bool IsSplittable;
+ };
+ MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const {
+ return MemTransferInstData.lookup(&II);
+ }
+
+ /// \brief Map from a PHI or select operand back to a partition.
+ ///
+ /// When manipulating PHI nodes or selects, they can use more than one
+ /// partition of an alloca. We store a special mapping to allow finding the
+ /// partition referenced by each of these operands, if any.
+ iterator findPartitionForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
+ if (MapIt == PHIOrSelectOpMap.end())
+ return end();
+
+ return begin() + MapIt->second.first;
+ }
+
+ /// \brief Map from a PHI or select operand back to the specific use of
+ /// a partition.
+ ///
+ /// Similar to mapping these operands back to the partitions, this maps
+ /// directly to the use structure of that partition.
+ use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
+ assert(MapIt != PHIOrSelectOpMap.end());
+ return Uses[MapIt->second.first].begin() + MapIt->second.second;
+ }
+
+ /// \brief Compute a common type among the uses of a particular partition.
+ ///
+ /// This routines walks all of the uses of a particular partition and tries
+ /// to find a common type between them. Untyped operations such as memset and
+ /// memcpy are ignored.
+ Type *getCommonType(iterator I) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
+ void printUsers(raw_ostream &OS, const_iterator I,
+ StringRef Indent = " ") const;
+ void print(raw_ostream &OS) const;
+ void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const;
+ void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const;
+#endif
+
+private:
+ template <typename DerivedT, typename RetT = void> class BuilderBase;
+ class PartitionBuilder;
+ friend class AllocaPartitioning::PartitionBuilder;
+ class UseBuilder;
+ friend class AllocaPartitioning::UseBuilder;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// \brief Handle to alloca instruction to simplify method interfaces.
+ AllocaInst &AI;
+#endif
+
+ /// \brief The instruction responsible for this alloca having no partitioning.
+ ///
+ /// When an instruction (potentially) escapes the pointer to the alloca, we
+ /// store a pointer to that here and abort trying to partition the alloca.
+ /// This will be null if the alloca is partitioned successfully.
+ Instruction *PointerEscapingInstr;
+
+ /// \brief The partitions of the alloca.
+ ///
+ /// We store a vector of the partitions over the alloca here. This vector is
+ /// sorted by increasing begin offset, and then by decreasing end offset. See
+ /// the Partition inner class for more details. Initially (during
+ /// construction) there are overlaps, but we form a disjoint sequence of
+ /// partitions while finishing construction and a fully constructed object is
+ /// expected to always have this as a disjoint space.
+ SmallVector<Partition, 8> Partitions;
+
+ /// \brief The uses of the partitions.
+ ///
+ /// This is essentially a mapping from each partition to a list of uses of
+ /// that partition. The mapping is done with a Uses vector that has the exact
+ /// same number of entries as the partition vector. Each entry is itself
+ /// a vector of the uses.
+ SmallVector<SmallVector<PartitionUse, 2>, 8> Uses;
+
+ /// \brief Instructions which will become dead if we rewrite the alloca.
+ ///
+ /// Note that these are not separated by partition. This is because we expect
+ /// a partitioned alloca to be completely rewritten or not rewritten at all.
+ /// If rewritten, all these instructions can simply be removed and replaced
+ /// with undef as they come from outside of the allocated space.
+ SmallVector<Instruction *, 8> DeadUsers;
+
+ /// \brief Operands which will become dead if we rewrite the alloca.
+ ///
+ /// These are operands that in their particular use can be replaced with
+ /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
+ /// to PHI nodes and the like. They aren't entirely dead (there might be
+ /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
+ /// want to swap this particular input for undef to simplify the use lists of
+ /// the alloca.
+ SmallVector<Use *, 8> DeadOperands;
+
+ /// \brief The underlying storage for auxiliary memcpy and memset info.
+ SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData;
+
+ /// \brief A side datastructure used when building up the partitions and uses.
+ ///
+ /// This mapping is only really used during the initial building of the
+ /// partitioning so that we can retain information about PHI and select nodes
+ /// processed.
+ SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;
+
+ /// \brief Auxiliary information for particular PHI or select operands.
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
+
+ /// \brief A utility routine called from the constructor.
+ ///
+ /// This does what it says on the tin. It is the key of the alloca partition
+ /// splitting and merging. After it is called we have the desired disjoint
+ /// collection of partitions.
+ void splitAndMergePartitions();
+};
+}
+
+static Value *foldSelectInst(SelectInst &SI) {
+ // If the condition being selected on is a constant or the same value is
+ // being selected between, fold the select. Yes this does (rarely) happen
+ // early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+ return SI.getOperand(1+CI->isZero());
+ if (SI.getOperand(1) == SI.getOperand(2))
+ return SI.getOperand(1);
+
+ return 0;
+}
+
+/// \brief Builder for the alloca partitioning.
+///
+/// This class builds an alloca partitioning by recursively visiting the uses
+/// of an alloca and splitting the partitions for each load and store at each
+/// offset.
+class AllocaPartitioning::PartitionBuilder
+ : public PtrUseVisitor<PartitionBuilder> {
+ friend class PtrUseVisitor<PartitionBuilder>;
+ friend class InstVisitor<PartitionBuilder>;
+ typedef PtrUseVisitor<PartitionBuilder> Base;
+
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
+
+ SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
+
+public:
+ PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P)
+ : PtrUseVisitor<PartitionBuilder>(DL),
+ AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {}
+
+private:
+ void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
+ bool IsSplittable = false) {
+ // Completely skip uses which have a zero size or start either before or
+ // past the end of the allocation.
+ if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
+ << " which has zero size or starts outside of the "
+ << AllocSize << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ return;
+ }
+
+ uint64_t BeginOffset = Offset.getZExtValue();
+ uint64_t EndOffset = BeginOffset + Size;
+
+ // Clamp the end offset to the end of the allocation. Note that this is
+ // formulated to handle even the case where "BeginOffset + Size" overflows.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
+ assert(AllocSize >= BeginOffset); // Established above.
+ if (Size > AllocSize - BeginOffset) {
+ DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
+ << " to remain within the " << AllocSize << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ EndOffset = AllocSize;
+ }
+
+ Partition New(BeginOffset, EndOffset, IsSplittable);
+ P.Partitions.push_back(New);
+ }
+
+ void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
+ uint64_t Size, bool IsVolatile) {
+ // We allow splitting of loads and stores where the type is an integer type
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+
+ insertUse(I, Offset, Size, IsSplittable);
+ }
+
+ void visitLoadInst(LoadInst &LI) {
+ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
+ "All simple FCA loads should have been pre-split");
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&LI);
+
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
+ }
+
+ void visitStoreInst(StoreInst &SI) {
+ Value *ValOp = SI.getValueOperand();
+ if (ValOp == *U)
+ return PI.setEscapedAndAborted(&SI);
+ if (!IsOffsetKnown)
+ return PI.setAborted(&SI);
+
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
+ assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
+ "All simple FCA stores should have been pre-split");
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
+ }
+
+
+ void visitMemSetInst(MemSetInst &II) {
+ assert(II.getRawDest() == *U && "Pointer use is not the destination?");
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ // Zero-length mem transfer intrinsics can be ignored entirely.
+ return;
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
+ insertUse(II, Offset,
+ Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue(),
+ (bool)Length);
+ }
+
+ void visitMemTransferInst(MemTransferInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ // Zero-length mem transfer intrinsics can be ignored entirely.
+ return;
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
+ uint64_t RawOffset = Offset.getLimitedValue();
+ uint64_t Size = Length ? Length->getLimitedValue()
+ : AllocSize - RawOffset;
+
+ MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+
+ // Only intrinsics with a constant length can be split.
+ Offsets.IsSplittable = Length;
+
+ if (*U == II.getRawDest()) {
+ Offsets.DestBegin = RawOffset;
+ Offsets.DestEnd = RawOffset + Size;
+ }
+ if (*U == II.getRawSource()) {
+ Offsets.SourceBegin = RawOffset;
+ Offsets.SourceEnd = RawOffset + Size;
+ }
+
+ // If we have set up end offsets for both the source and the destination,
+ // we have found both sides of this transfer pointing at the same alloca.
+ bool SeenBothEnds = Offsets.SourceEnd && Offsets.DestEnd;
+ if (SeenBothEnds && II.getRawDest() != II.getRawSource()) {
+ unsigned PrevIdx = MemTransferPartitionMap[&II];
+
+ // Check if the begin offsets match and this is a non-volatile transfer.
+ // In that case, we can completely elide the transfer.
+ if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
+ P.Partitions[PrevIdx].kill();
+ return;
+ }
+
+ // Otherwise we have an offset transfer within the same alloca. We can't
+ // split those.
+ P.Partitions[PrevIdx].IsSplittable = Offsets.IsSplittable = false;
+ } else if (SeenBothEnds) {
+ // Handle the case where this exact use provides both ends of the
+ // operation.
+ assert(II.getRawDest() == II.getRawSource());
+
+ // For non-volatile transfers this is a no-op.
+ if (!II.isVolatile())
+ return;
+
+ // Otherwise just suppress splitting.
+ Offsets.IsSplittable = false;
+ }
+
+
+ // Insert the use now that we've fixed up the splittable nature.
+ insertUse(II, Offset, Size, Offsets.IsSplittable);
+
+ // Setup the mapping from intrinsic to partition of we've not seen both
+ // ends of this transfer.
+ if (!SeenBothEnds) {
+ unsigned NewIdx = P.Partitions.size() - 1;
+ bool Inserted
+ = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)).second;
+ assert(Inserted &&
+ "Already have intrinsic in map but haven't seen both ends");
+ (void)Inserted;
+ }
+ }
+
+ // Disable SRoA for any intrinsics except for lifetime invariants.
+ // FIXME: What about debug intrinsics? This matches old behavior, but
+ // doesn't make sense.
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
+ if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end) {
+ ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
+ uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
+ Length->getLimitedValue());
+ insertUse(II, Offset, Size, true);
+ return;
+ }
+
+ Base::visitIntrinsicInst(II);
+ }
+
+ Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
+ // We consider any PHI or select that results in a direct load or store of
+ // the same offset to be a viable use for partitioning purposes. These uses
+ // are considered unsplittable and the size is the maximum loaded or stored
+ // size.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
+ Visited.insert(Root);
+ Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ // If there are no loads or stores, the access is dead. We mark that as
+ // a size zero access.
+ Size = 0;
+ do {
+ Instruction *I, *UsedI;
+ llvm::tie(UsedI, I) = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ Value *Op = SI->getOperand(0);
+ if (Op == UsedI)
+ return SI;
+ Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllZeroIndices())
+ return GEP;
+ } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
+ !isa<SelectInst>(I)) {
+ return I;
+ }
+
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Uses.push_back(std::make_pair(I, cast<Instruction>(*UI)));
+ } while (!Uses.empty());
+
+ return 0;
+ }
+
+ void visitPHINode(PHINode &PN) {
+ if (PN.use_empty())
+ return;
+ if (!IsOffsetKnown)
+ return PI.setAborted(&PN);
+
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
+ if (PHIInfo.first) {
+ PHIInfo.second = true;
+ insertUse(PN, Offset, PHIInfo.first);
+ return;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+ return PI.setAborted(UnsafeI);
+
+ insertUse(PN, Offset, PHIInfo.first);
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ if (SI.use_empty())
+ return;
+ if (Value *Result = foldSelectInst(SI)) {
+ if (Result == *U)
+ // If the result of the constant fold will be the pointer, recurse
+ // through the select as if we had RAUW'ed it.
+ enqueueUsers(SI);
+
+ return;
+ }
+ if (!IsOffsetKnown)
+ return PI.setAborted(&SI);
+
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
+ if (SelectInfo.first) {
+ SelectInfo.second = true;
+ insertUse(SI, Offset, SelectInfo.first);
+ return;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+ return PI.setAborted(UnsafeI);
+
+ insertUse(SI, Offset, SelectInfo.first);
+ }
+
+ /// \brief Disable SROA entirely if there are unhandled users of the alloca.
+ void visitInstruction(Instruction &I) {
+ PI.setAborted(&I);
+ }
+};
+
+/// \brief Use adder for the alloca partitioning.
+///
+/// This class adds the uses of an alloca to all of the partitions which they
+/// use. For splittable partitions, this can end up doing essentially a linear
+/// walk of the partitions, but the number of steps remains bounded by the
+/// total result instruction size:
+/// - The number of partitions is a result of the number unsplittable
+/// instructions using the alloca.
+/// - The number of users of each partition is at worst the total number of
+/// splittable instructions using the alloca.
+/// Thus we will produce N * M instructions in the end, where N are the number
+/// of unsplittable uses and M are the number of splittable. This visitor does
+/// the exact same number of updates to the partitioning.
+///
+/// In the more common case, this visitor will leverage the fact that the
+/// partition space is pre-sorted, and do a logarithmic search for the
+/// partition needed, making the total visit a classical ((N + M) * log(N))
+/// complexity operation.
+class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
+ friend class PtrUseVisitor<UseBuilder>;
+ friend class InstVisitor<UseBuilder>;
+ typedef PtrUseVisitor<UseBuilder> Base;
+
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
+
+ /// \brief Set to de-duplicate dead instructions found in the use walk.
+ SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
+
+public:
+ UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : PtrUseVisitor<UseBuilder>(TD),
+ AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {}
+
+private:
+ void markAsDead(Instruction &I) {
+ if (VisitedDeadInsts.insert(&I))
+ P.DeadUsers.push_back(&I);
+ }
+
+ void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) {
+ // If the use has a zero size or extends outside of the allocation, record
+ // it as a dead use for elimination later.
+ if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize))
+ return markAsDead(User);
+
+ uint64_t BeginOffset = Offset.getZExtValue();
+ uint64_t EndOffset = BeginOffset + Size;
+
+ // Clamp the end offset to the end of the allocation. Note that this is
+ // formulated to handle even the case where "BeginOffset + Size" overflows.
+ assert(AllocSize >= BeginOffset); // Established above.
+ if (Size > AllocSize - BeginOffset)
+ EndOffset = AllocSize;
+
+ // NB: This only works if we have zero overlapping partitions.
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
+ PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
+ P.use_push_back(I, NewPU);
+ if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
+ P.PHIOrSelectOpMap[U]
+ = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
+ }
+ }
+
+ void visitBitCastInst(BitCastInst &BC) {
+ if (BC.use_empty())
+ return markAsDead(BC);
+
+ return Base::visitBitCastInst(BC);
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ if (GEPI.use_empty())
+ return markAsDead(GEPI);
+
+ return Base::visitGetElementPtrInst(GEPI);
+ }
+
+ void visitLoadInst(LoadInst &LI) {
+ assert(IsOffsetKnown);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
+ }
+
+ void visitStoreInst(StoreInst &SI) {
+ assert(IsOffsetKnown);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
+ }
+
+ void visitMemSetInst(MemSetInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ return markAsDead(II);
+
+ assert(IsOffsetKnown);
+ insertUse(II, Offset, Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue());
+ }
+
+ void visitMemTransferInst(MemTransferInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ return markAsDead(II);
+
+ assert(IsOffsetKnown);
+ uint64_t Size = Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue();
+
+ const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+ if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
+ Offsets.DestBegin == Offsets.SourceBegin)
+ return markAsDead(II); // Skip identity transfers without side-effects.
+
+ insertUse(II, Offset, Size);
+ }
+
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ assert(IsOffsetKnown);
+ assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end);
+
+ ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
+ insertUse(II, Offset, std::min(Length->getLimitedValue(),
+ AllocSize - Offset.getLimitedValue()));
+ }
+
+ void insertPHIOrSelect(Instruction &User, const APInt &Offset) {
+ uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
+
+ // For PHI and select operands outside the alloca, we can't nuke the entire
+ // phi or select -- the other side might still be relevant, so we special
+ // case them here and use a separate structure to track the operands
+ // themselves which should be replaced with undef.
+ if ((Offset.isNegative() && Offset.uge(Size)) ||
+ (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ P.DeadOperands.push_back(U);
+ return;
+ }
+
+ insertUse(User, Offset, Size);
+ }
+
+ void visitPHINode(PHINode &PN) {
+ if (PN.use_empty())
+ return markAsDead(PN);
+
+ assert(IsOffsetKnown);
+ insertPHIOrSelect(PN, Offset);
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ if (SI.use_empty())
+ return markAsDead(SI);
+
+ if (Value *Result = foldSelectInst(SI)) {
+ if (Result == *U)
+ // If the result of the constant fold will be the pointer, recurse
+ // through the select as if we had RAUW'ed it.
+ enqueueUsers(SI);
+ else
+ // Otherwise the operand to the select is dead, and we can replace it
+ // with undef.
+ P.DeadOperands.push_back(U);
+
+ return;
+ }
+
+ assert(IsOffsetKnown);
+ insertPHIOrSelect(SI, Offset);
+ }
+
+ /// \brief Unreachable, we've already visited the alloca once.
+ void visitInstruction(Instruction &I) {
+ llvm_unreachable("Unhandled instruction in use builder.");
+ }
+};
+
+void AllocaPartitioning::splitAndMergePartitions() {
+ size_t NumDeadPartitions = 0;
+
+ // Track the range of splittable partitions that we pass when accumulating
+ // overlapping unsplittable partitions.
+ uint64_t SplitEndOffset = 0ull;
+
+ Partition New(0ull, 0ull, false);
+
+ for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) {
+ ++j;
+
+ if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) {
+ assert(New.BeginOffset == New.EndOffset);
+ New = Partitions[i];
+ } else {
+ assert(New.IsSplittable);
+ New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset);
+ }
+ assert(New.BeginOffset != New.EndOffset);
+
+ // Scan the overlapping partitions.
+ while (j != e && New.EndOffset > Partitions[j].BeginOffset) {
+ // If the new partition we are forming is splittable, stop at the first
+ // unsplittable partition.
+ if (New.IsSplittable && !Partitions[j].IsSplittable)
+ break;
+
+ // Grow the new partition to include any equally splittable range. 'j' is
+ // always equally splittable when New is splittable, but when New is not
+ // splittable, we may subsume some (or part of some) splitable partition
+ // without growing the new one.
+ if (New.IsSplittable == Partitions[j].IsSplittable) {
+ New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset);
+ } else {
+ assert(!New.IsSplittable);
+ assert(Partitions[j].IsSplittable);
+ SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset);
+ }
+
+ Partitions[j].kill();
+ ++NumDeadPartitions;
+ ++j;
+ }
+
+ // If the new partition is splittable, chop off the end as soon as the
+ // unsplittable subsequent partition starts and ensure we eventually cover
+ // the splittable area.
+ if (j != e && New.IsSplittable) {
+ SplitEndOffset = std::max(SplitEndOffset, New.EndOffset);
+ New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
+ }
+
+ // Add the new partition if it differs from the original one and is
+ // non-empty. We can end up with an empty partition here if it was
+ // splittable but there is an unsplittable one that starts at the same
+ // offset.
+ if (New != Partitions[i]) {
+ if (New.BeginOffset != New.EndOffset)
+ Partitions.push_back(New);
+ // Mark the old one for removal.
+ Partitions[i].kill();
+ ++NumDeadPartitions;
+ }
+
+ New.BeginOffset = New.EndOffset;
+ if (!New.IsSplittable) {
+ New.EndOffset = std::max(New.EndOffset, SplitEndOffset);
+ if (j != e && !Partitions[j].IsSplittable)
+ New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
+ New.IsSplittable = true;
+ // If there is a trailing splittable partition which won't be fused into
+ // the next splittable partition go ahead and add it onto the partitions
+ // list.
+ if (New.BeginOffset < New.EndOffset &&
+ (j == e || !Partitions[j].IsSplittable ||
+ New.EndOffset < Partitions[j].BeginOffset)) {
+ Partitions.push_back(New);
+ New.BeginOffset = New.EndOffset = 0ull;
+ }
+ }
+ }
+
+ // Re-sort the partitions now that they have been split and merged into
+ // disjoint set of partitions. Also remove any of the dead partitions we've
+ // replaced in the process.
+ std::sort(Partitions.begin(), Partitions.end());
+ if (NumDeadPartitions) {
+ assert(Partitions.back().isDead());
+ assert((ptrdiff_t)NumDeadPartitions ==
+ std::count(Partitions.begin(), Partitions.end(), Partitions.back()));
+ }
+ Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end());
+}
+
+AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
+ :
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ AI(AI),
+#endif
+ PointerEscapingInstr(0) {
+ PartitionBuilder PB(TD, AI, *this);
+ PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI);
+ if (PtrI.isEscaped() || PtrI.isAborted()) {
+ // FIXME: We should sink the escape vs. abort info into the caller nicely,
+ // possibly by just storing the PtrInfo in the AllocaPartitioning.
+ PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
+ : PtrI.getAbortingInst();
+ assert(PointerEscapingInstr && "Did not track a bad instruction");
+ return;
+ }
+
+ // Sort the uses. This arranges for the offsets to be in ascending order,
+ // and the sizes to be in descending order.
+ std::sort(Partitions.begin(), Partitions.end());
+
+ // Remove any partitions from the back which are marked as dead.
+ while (!Partitions.empty() && Partitions.back().isDead())
+ Partitions.pop_back();
+
+ if (Partitions.size() > 1) {
+ // Intersect splittability for all partitions with equal offsets and sizes.
+ // Then remove all but the first so that we have a sequence of non-equal but
+ // potentially overlapping partitions.
+ for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E;
+ I = J) {
+ ++J;
+ while (J != E && *I == *J) {
+ I->IsSplittable &= J->IsSplittable;
+ ++J;
+ }
+ }
+ Partitions.erase(std::unique(Partitions.begin(), Partitions.end()),
+ Partitions.end());
+
+ // Split splittable and merge unsplittable partitions into a disjoint set
+ // of partitions over the used space of the allocation.
+ splitAndMergePartitions();
+ }
+
+ // Record how many partitions we end up with.
+ NumAllocaPartitions += Partitions.size();
+ MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
+ // Now build up the user lists for each of these disjoint partitions by
+ // re-walking the recursive users of the alloca.
+ Uses.resize(Partitions.size());
+ UseBuilder UB(TD, AI, *this);
+ PtrI = UB.visitPtr(AI);
+ assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
+ assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+ unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+ NumUses += Uses[Idx].size();
+#endif
+ NumAllocaPartitionUses += NumUses;
+ MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
+}
+
+Type *AllocaPartitioning::getCommonType(iterator I) const {
+ Type *Ty = 0;
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+ Use *U = UI->getUse();
+ if (!U)
+ continue; // Skip dead uses.
+ if (isa<IntrinsicInst>(*U->getUser()))
+ continue;
+ if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
+ continue;
+
+ Type *UserTy = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
+ UserTy = LI->getType();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
+ UserTy = SI->getValueOperand()->getType();
+ else
+ return 0; // Bail if we have weird uses.
+
+ if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ // If the type is larger than the partition, skip it. We only encounter
+ // this for split integer operations where we want to use the type of the
+ // entity causing the split.
+ if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+ continue;
+
+ // If we have found an integer type use covering the alloca, use that
+ // regardless of the other types, as integers are often used for a "bucket
+ // of bits" type.
+ return ITy;
+ }
+
+ if (Ty && Ty != UserTy)
+ return 0;
+
+ Ty = UserTy;
+ }
+ return Ty;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ OS << Indent << "partition #" << (I - begin())
+ << " [" << I->BeginOffset << "," << I->EndOffset << ")"
+ << (I->IsSplittable ? " (splittable)" : "")
+ << (Uses[I - begin()].empty() ? " (zero uses)" : "")
+ << "\n";
+}
+
+void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+ if (!UI->getUse())
+ continue; // Skip dead uses.
+ OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
+ const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
+ bool IsDest;
+ if (!MTO.IsSplittable)
+ IsDest = UI->BeginOffset == MTO.DestBegin;
+ else
+ IsDest = MTO.DestBegin != 0u;
+ OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": "
+ << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin)
+ << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n";
+ }
+ }
+}
+
+void AllocaPartitioning::print(raw_ostream &OS) const {
+ if (PointerEscapingInstr) {
+ OS << "No partitioning for alloca: " << AI << "\n"
+ << " A pointer to this alloca escaped by:\n"
+ << " " << *PointerEscapingInstr << "\n";
+ return;
+ }
+
+ OS << "Partitioning of alloca: " << AI << "\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ print(OS, I);
+ printUsers(OS, I);
+ }
+}
+
+void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); }
+void AllocaPartitioning::dump() const { print(dbgs()); }
+
+#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+
+namespace {
+/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
+///
+/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
+/// the loads and stores of an alloca instruction, as well as updating its
+/// debug information. This is used when a domtree is unavailable and thus
+/// mem2reg in its full form can't be used to handle promotion of allocas to
+/// scalar values.
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst &AI;
+ DIBuilder &DIB;
+
+ SmallVector<DbgDeclareInst *, 4> DDIs;
+ SmallVector<DbgValueInst *, 4> DVIs;
+
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaInst &AI, DIBuilder &DIB)
+ : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+
+ void run(const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ UE = DebugNode->use_end();
+ UI != UE; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ DDIs.push_back(DDI);
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ DVIs.push_back(DVI);
+ }
+
+ LoadAndStorePromoter::run(Insts);
+ AI.eraseFromParent();
+ while (!DDIs.empty())
+ DDIs.pop_back_val()->eraseFromParent();
+ while (!DVIs.empty())
+ DVIs.pop_back_val()->eraseFromParent();
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == &AI;
+ return cast<StoreInst>(I)->getPointerOperand() == &AI;
+ }
+
+ virtual void updateDebugInfo(Instruction *Inst) const {
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ }
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ Value *Arg = 0;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(ZExt->getOperand(0));
+ else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (!Arg)
+ Arg = SI->getValueOperand();
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Arg = LI->getPointerOperand();
+ } else {
+ continue;
+ }
+ Instruction *DbgVal =
+ DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ Inst);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ }
+ }
+};
+} // end anon namespace
+
+
+namespace {
+/// \brief An optimization pass providing Scalar Replacement of Aggregates.
+///
+/// This pass takes allocations which can be completely analyzed (that is, they
+/// don't escape) and tries to turn them into scalar SSA values. There are
+/// a few steps to this process.
+///
+/// 1) It takes allocations of aggregates and analyzes the ways in which they
+/// are used to try to split them into smaller allocations, ideally of
+/// a single scalar data type. It will split up memcpy and memset accesses
+/// as necessary and try to isolate individual scalar accesses.
+/// 2) It will transform accesses into forms which are suitable for SSA value
+/// promotion. This can be replacing a memset with a scalar store of an
+/// integer value, or it can involve speculating operations on a PHI or
+/// select to be a PHI or select of the results.
+/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
+/// onto insert and extract operations on a vector value, and convert them to
+/// this form. By doing so, it will enable promotion of vector aggregates to
+/// SSA vector values.
+class SROA : public FunctionPass {
+ const bool RequiresDomTree;
+
+ LLVMContext *C;
+ const DataLayout *TD;
+ DominatorTree *DT;
+
+ /// \brief Worklist of alloca instructions to simplify.
+ ///
+ /// Each alloca in the function is added to this. Each new alloca formed gets
+ /// added to it as well to recursively simplify unless that alloca can be
+ /// directly promoted. Finally, each time we rewrite a use of an alloca other
+ /// the one being actively rewritten, we add it back onto the list if not
+ /// already present to ensure it is re-visited.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist;
+
+ /// \brief A collection of instructions to delete.
+ /// We try to batch deletions to simplify code and make things a bit more
+ /// efficient.
+ SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
+
+ /// \brief Post-promotion worklist.
+ ///
+ /// Sometimes we discover an alloca which has a high probability of becoming
+ /// viable for SROA after a round of promotion takes place. In those cases,
+ /// the alloca is enqueued here for re-processing.
+ ///
+ /// Note that we have to be very careful to clear allocas out of this list in
+ /// the event they are deleted.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > PostPromotionWorklist;
+
+ /// \brief A collection of alloca instructions we can directly promote.
+ std::vector<AllocaInst *> PromotableAllocas;
+
+public:
+ SROA(bool RequiresDomTree = true)
+ : FunctionPass(ID), RequiresDomTree(RequiresDomTree),
+ C(0), TD(0), DT(0) {
+ initializeSROAPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ const char *getPassName() const { return "SROA"; }
+ static char ID;
+
+private:
+ friend class PHIOrSelectSpeculator;
+ friend class AllocaPartitionRewriter;
+ friend class AllocaPartitionVectorRewriter;
+
+ bool rewriteAllocaPartition(AllocaInst &AI,
+ AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI);
+ bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P);
+ bool runOnAlloca(AllocaInst &AI);
+ void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
+ bool promoteAllocas(Function &F);
+};
+}
+
+char SROA::ID = 0;
+
+FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
+ return new SROA(RequiresDomTree);
+}
+
+INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
+
+namespace {
+/// \brief Visitor to speculate PHIs and Selects where possible.
+class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
+
+ const DataLayout &TD;
+ AllocaPartitioning &P;
+ SROA &Pass;
+
+public:
+ PHIOrSelectSpeculator(const DataLayout &TD, AllocaPartitioning &P, SROA &Pass)
+ : TD(TD), P(P), Pass(Pass) {}
+
+ /// \brief Visit the users of an alloca partition and rewrite them.
+ void visitUsers(AllocaPartitioning::const_iterator PI) {
+ // Note that we need to use an index here as the underlying vector of uses
+ // may be grown during speculation. However, we never need to re-visit the
+ // new uses, and so we can use the initial size bound.
+ for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
+ continue; // Skip dead use.
+
+ visit(cast<Instruction>(PU.getUse()->getUser()));
+ }
+ }
+
+private:
+ // By default, skip this instruction.
+ void visitInstruction(Instruction &I) {}
+
+ /// PHI instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers in the pred blocks and then PHI the
+ /// results, allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = phi [i32* %Alloca, i32* %Other]
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// ...
+ /// %V2 = load i32* %Other
+ /// ...
+ /// %V = phi [i32 %V1, i32 %V2]
+ ///
+ /// We can do this to a select if its only uses are loads and if the operands
+ /// to the select can be loaded unconditionally.
+ ///
+ /// FIXME: This should be hoisted into a generic utility, likely in
+ /// Transforms/Util/Local.h
+ bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {
+ // For now, we can only do this promotion if the load is in the same block
+ // as the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN.getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is
+ // a common case that happens when instcombine merges two loads through
+ // a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ Loads.push_back(LI);
+ }
+
+ // We can only transform this if it is safe to push the loads into the
+ // predecessor blocks. The only thing to watch out for is that we can't put
+ // a possibly trapping load in the predecessor if it is a critical edge.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Value *InVal = PN.getIncomingValue(Idx);
+
+ // If the value is produced by the terminator of the predecessor (an
+ // invoke) or it has side-effects, there is no valid place to put a load
+ // in the predecessor.
+ if (TI == InVal || TI->mayHaveSideEffects())
+ return false;
+
+ // If the predecessor has a single successor, then the edge isn't
+ // critical.
+ if (TI->getNumSuccessors() == 1)
+ continue;
+
+ // If this pointer is always safe to load, or if we can prove that there
+ // is already a load in the block, then we can move the load to the pred
+ // block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+ }
+
+ void visitPHINode(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafePHIToSpeculate(PN, Loads))
+ return;
+
+ assert(!Loads.empty());
+
+ Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ IRBuilderTy PHIBuilder(&PN);
+ PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
+ PN.getName() + ".sroa.speculated");
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ.
+ LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ do {
+ LoadInst *LI = Loads.pop_back_val();
+ LI->replaceAllUsesWith(NewPN);
+ Pass.DeadInsts.insert(LI);
+ } while (!Loads.empty());
+
+ // Inject loads into all of the pred blocks.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ BasicBlock *Pred = PN.getIncomingBlock(Idx);
+ TerminatorInst *TI = Pred->getTerminator();
+ Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
+ Value *InVal = PN.getIncomingValue(Idx);
+ IRBuilderTy PredBuilder(TI);
+
+ LoadInst *Load
+ = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
+ Pred->getName()));
+ ++NumLoadsSpeculated;
+ Load->setAlignment(Align);
+ if (TBAATag)
+ Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ NewPN->addIncoming(Load, Pred);
+
+ Instruction *Ptr = dyn_cast<Instruction>(InVal);
+ if (!Ptr)
+ // No uses to rewrite.
+ continue;
+
+ // Try to lookup and rewrite any partition uses corresponding to this phi
+ // input.
+ AllocaPartitioning::iterator PI
+ = P.findPartitionForPHIOrSelectOperand(InUse);
+ if (PI == P.end())
+ continue;
+
+ // Replace the Use in the PartitionUse for this operand with the Use
+ // inside the load.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(InUse);
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
+ }
+ DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
+ }
+
+ /// Select instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers and then select between the result,
+ /// allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// %V2 = load i32* %Other
+ /// %V = select i1 %cond, i32 %V1, i32 %V2
+ ///
+ /// We can do this to a select if its only uses are loads and if the operand
+ /// to the select can be loaded unconditionally.
+ bool isSafeSelectToSpeculate(SelectInst &SI,
+ SmallVectorImpl<LoadInst *> &Loads) {
+ Value *TValue = SI.getTrueValue();
+ Value *FValue = SI.getFalseValue();
+ bool TDerefable = TValue->isDereferenceablePointer();
+ bool FDerefable = FValue->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // Both operands to the select need to be dereferencable, either
+ // absolutely (e.g. allocas) or at this point because we can see other
+ // accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ Loads.push_back(LI);
+ }
+
+ return true;
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+
+ // If the select isn't safe to speculate, just use simple logic to emit it.
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafeSelectToSpeculate(SI, Loads))
+ return;
+
+ IRBuilderTy IRB(&SI);
+ Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
+ AllocaPartitioning::iterator PIs[2];
+ PartitionUse PUs[2];
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
+ if (PIs[i] != P.end()) {
+ // If the pointer is within the partitioning, remove the select from
+ // its uses. We'll add in the new loads below.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
+ PUs[i] = *UI;
+ // Clear out the use here so that the offsets into the use list remain
+ // stable but this use is ignored when rewriting.
+ UI->setUse(0);
+ }
+ }
+
+ Value *TV = SI.getTrueValue();
+ Value *FV = SI.getFalseValue();
+ // Replace the loads of the select with a select of two loads.
+ while (!Loads.empty()) {
+ LoadInst *LI = Loads.pop_back_val();
+
+ IRB.SetInsertPoint(LI);
+ LoadInst *TL =
+ IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
+ LoadInst *FL =
+ IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+ NumLoadsSpeculated += 2;
+
+ // Transfer alignment and TBAA info if present.
+ TL->setAlignment(LI->getAlignment());
+ FL->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
+ LI->getName() + ".sroa.speculated");
+
+ LoadInst *Loads[2] = { TL, FL };
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ if (PIs[i] != P.end()) {
+ Use *LoadUse = &Loads[i]->getOperandUse(0);
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
+ P.use_push_back(PIs[i], PUs[i]);
+ }
+ }
+
+ DEBUG(dbgs() << " speculated to: " << *V << "\n");
+ LI->replaceAllUsesWith(V);
+ Pass.DeadInsts.insert(LI);
+ }
+ }
+};
+}
+
+/// \brief Build a GEP out of a base pointer and indices.
+///
+/// This will return the BasePtr if that is valid, or build a new GEP
+/// instruction using the IRBuilder if GEP-ing is needed.
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices) {
+ if (Indices.empty())
+ return BasePtr;
+
+ // A single zero index is a no-op, so check for this and avoid building a GEP
+ // in that case.
+ if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
+ return BasePtr;
+
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
+}
+
+/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
+/// TargetTy without changing the offset of the pointer.
+///
+/// This routine assumes we've already established a properly offset GEP with
+/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
+/// zero-indices down through type layers until we find one the same as
+/// TargetTy. If we can't find one with the same type, we at least try to use
+/// one with the same size. If none of that works, we just produce the GEP as
+/// indicated by Indices to have the correct offset.
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *BasePtr, Type *Ty, Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices) {
+ if (Ty == TargetTy)
+ return buildGEP(IRB, BasePtr, Indices);
+
+ // See if we can descend into a struct and locate a field with the correct
+ // type.
+ unsigned NumLayers = 0;
+ Type *ElementTy = Ty;
+ do {
+ if (ElementTy->isPointerTy())
+ break;
+ if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
+ ElementTy = SeqTy->getElementType();
+ // Note that we use the default address space as this index is over an
+ // array or a vector, not a pointer.
+ Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0)));
+ } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
+ if (STy->element_begin() == STy->element_end())
+ break; // Nothing left to descend into.
+ ElementTy = *STy->element_begin();
+ Indices.push_back(IRB.getInt32(0));
+ } else {
+ break;
+ }
+ ++NumLayers;
+ } while (ElementTy != TargetTy);
+ if (ElementTy != TargetTy)
+ Indices.erase(Indices.end() - NumLayers, Indices.end());
+
+ return buildGEP(IRB, BasePtr, Indices);
+}
+
+/// \brief Recursively compute indices for a natural GEP.
+///
+/// This is the recursive step for getNaturalGEPWithOffset that walks down the
+/// element types adding appropriate indices for the GEP.
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, Type *Ty, APInt &Offset,
+ Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices) {
+ if (Offset == 0)
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
+
+ // We can't recurse through pointer types.
+ if (Ty->isPointerTy())
+ return 0;
+
+ // We try to analyze GEPs over vectors here, but note that these GEPs are
+ // extremely poorly defined currently. The long-term goal is to remove GEPing
+ // over a vector from the IR completely.
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
+ unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType());
+ if (ElementSizeInBits % 8)
+ return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+ if (NumSkippedElements.ugt(VecTy->getNumElements()))
+ return 0;
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
+ Offset, TargetTy, Indices);
+ }
+
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+ Type *ElementTy = ArrTy->getElementType();
+ APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+ if (NumSkippedElements.ugt(ArrTy->getNumElements()))
+ return 0;
+
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices);
+ }
+
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (!STy)
+ return 0;
+
+ const StructLayout *SL = TD.getStructLayout(STy);
+ uint64_t StructOffset = Offset.getZExtValue();
+ if (StructOffset >= SL->getSizeInBytes())
+ return 0;
+ unsigned Index = SL->getElementContainingOffset(StructOffset);
+ Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
+ Type *ElementTy = STy->getElementType(Index);
+ if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
+ return 0; // The offset points into alignment padding.
+
+ Indices.push_back(IRB.getInt32(Index));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices);
+}
+
+/// \brief Get a natural GEP from a base pointer to a particular offset and
+/// resulting in a particular type.
+///
+/// The goal is to produce a "natural" looking GEP that works with the existing
+/// composite types to arrive at the appropriate offset and element type for
+/// a pointer. TargetTy is the element type the returned GEP should point-to if
+/// possible. We recurse by decreasing Offset, adding the appropriate index to
+/// Indices, and setting Ty to the result subtype.
+///
+/// If no natural GEP can be constructed, this function returns null.
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices) {
+ PointerType *Ty = cast<PointerType>(Ptr->getType());
+
+ // Don't consider any GEPs through an i8* as natural unless the TargetTy is
+ // an i8.
+ if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
+ return 0;
+
+ Type *ElementTy = Ty->getElementType();
+ if (!ElementTy->isSized())
+ return 0; // We can't GEP through an unsized element.
+ APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ if (ElementSize == 0)
+ return 0; // Zero-length arrays can't help us build a natural GEP.
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices);
+}
+
+/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
+/// resulting pointer has PointerTy.
+///
+/// This tries very hard to compute a "natural" GEP which arrives at the offset
+/// and produces the pointer type desired. Where it cannot, it will try to use
+/// the natural GEP to arrive at the offset and bitcast to the type. Where that
+/// fails, it will try to use an existing i8* and GEP to the byte offset and
+/// bitcast to the type.
+///
+/// The strategy for finding the more natural GEPs is to peel off layers of the
+/// pointer, walking back through bit casts and GEPs, searching for a base
+/// pointer from which we can compute a natural GEP with the desired
+/// properties. The algorithm tries to fold as many constant indices into
+/// a single GEP as possible, thus making each GEP more independent of the
+/// surrounding code.
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy) {
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(Ptr);
+ SmallVector<Value *, 4> Indices;
+
+ // We may end up computing an offset pointer that has the wrong type. If we
+ // never are able to compute one directly that has the correct type, we'll
+ // fall back to it, so keep it around here.
+ Value *OffsetPtr = 0;
+
+ // Remember any i8 pointer we come across to re-use if we need to do a raw
+ // byte offset.
+ Value *Int8Ptr = 0;
+ APInt Int8PtrOffset(Offset.getBitWidth(), 0);
+
+ Type *TargetTy = PointerTy->getPointerElementType();
+
+ do {
+ // First fold any existing GEPs into the offset.
+ while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ APInt GEPOffset(Offset.getBitWidth(), 0);
+ if (!GEP->accumulateConstantOffset(TD, GEPOffset))
+ break;
+ Offset += GEPOffset;
+ Ptr = GEP->getPointerOperand();
+ if (!Visited.insert(Ptr))
+ break;
+ }
+
+ // See if we can perform a natural GEP here.
+ Indices.clear();
+ if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
+ Indices)) {
+ if (P->getType() == PointerTy) {
+ // Zap any offset pointer that we ended up computing in previous rounds.
+ if (OffsetPtr && OffsetPtr->use_empty())
+ if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
+ I->eraseFromParent();
+ return P;
+ }
+ if (!OffsetPtr) {
+ OffsetPtr = P;
+ }
+ }
+
+ // Stash this pointer if we've found an i8*.
+ if (Ptr->getType()->isIntegerTy(8)) {
+ Int8Ptr = Ptr;
+ Int8PtrOffset = Offset;
+ }
+
+ // Peel off a layer of the pointer and update the offset appropriately.
+ if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
+ Ptr = cast<Operator>(Ptr)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
+ if (GA->mayBeOverridden())
+ break;
+ Ptr = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(Ptr));
+
+ if (!OffsetPtr) {
+ if (!Int8Ptr) {
+ Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
+ "raw_cast");
+ Int8PtrOffset = Offset;
+ }
+
+ OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
+ IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+ "raw_idx");
+ }
+ Ptr = OffsetPtr;
+
+ // On the off chance we were targeting i8*, guard the bitcast here.
+ if (Ptr->getType() != PointerTy)
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
+
+ return Ptr;
+}
+
+/// \brief Test whether we can convert a value from the old to the new type.
+///
+/// This predicate should be used to guard calls to convertValue in order to
+/// ensure that we only try to convert viable values. The strategy is that we
+/// will peel off single element struct and array wrappings to get to an
+/// underlying value, and convert that value.
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+ if (OldTy == NewTy)
+ return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
+ if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
+ return false;
+ if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
+ return false;
+
+ if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
+ if (NewTy->isPointerTy() && OldTy->isPointerTy())
+ return true;
+ if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
+ return true;
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Generic routine to convert an SSA value to a value of a different
+/// type.
+///
+/// This will try various different casting techniques, such as bitcasts,
+/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
+/// two types for viability with this routine.
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
+ Type *Ty) {
+ assert(canConvertValue(DL, V->getType(), Ty) &&
+ "Value not convertable to type");
+ if (V->getType() == Ty)
+ return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
+ if (V->getType()->isIntegerTy() && Ty->isPointerTy())
+ return IRB.CreateIntToPtr(V, Ty);
+ if (V->getType()->isPointerTy() && Ty->isIntegerTy())
+ return IRB.CreatePtrToInt(V, Ty);
+
+ return IRB.CreateBitCast(V, Ty);
+}
+
+/// \brief Test whether the given alloca partition can be promoted to a vector.
+///
+/// This is a quick test to check whether we can rewrite a particular alloca
+/// partition (and its newly formed alloca) into a vector alloca with only
+/// whole-vector loads and stores such that it could be promoted to a vector
+/// SSA value. We only can ensure this for a limited set of operations, and we
+/// don't want to do the rewrites unless we are confident that the result will
+/// be promotable, so we have an early test here.
+static bool isVectorPromotionViable(const DataLayout &TD,
+ Type *AllocaTy,
+ AllocaPartitioning &P,
+ uint64_t PartitionBeginOffset,
+ uint64_t PartitionEndOffset,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
+ if (!Ty)
+ return false;
+
+ uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
+
+ // While the definition of LLVM vectors is bitpacked, we don't support sizes
+ // that aren't byte sized.
+ if (ElementSize % 8)
+ return false;
+ assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+ "vector size not a multiple of element size?");
+ ElementSize /= 8;
+
+ for (; I != E; ++I) {
+ Use *U = I->getUse();
+ if (!U)
+ continue; // Skip dead use.
+
+ uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
+ uint64_t BeginIndex = BeginOffset / ElementSize;
+ if (BeginIndex * ElementSize != BeginOffset ||
+ BeginIndex >= Ty->getNumElements())
+ return false;
+ uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
+ uint64_t EndIndex = EndOffset / ElementSize;
+ if (EndIndex * ElementSize != EndOffset ||
+ EndIndex > Ty->getNumElements())
+ return false;
+
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ uint64_t NumElements = EndIndex - BeginIndex;
+ Type *PartitionTy
+ = (NumElements == 1) ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
+ if (MI->isVolatile())
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(*MTI);
+ if (!MTO.IsSplittable)
+ return false;
+ }
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
+ // Disable vector promotion when there are loads or stores of an FCA.
+ return false;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, PartitionTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
+ if (SI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// \brief Test whether the given alloca partition's integer operations can be
+/// widened to promotable ones.
+///
+/// This is a quick test to check whether we can rewrite the integer loads and
+/// stores to a particular alloca into wider loads and stores and be able to
+/// promote the resulting alloca.
+static bool isIntegerWideningViable(const DataLayout &TD,
+ Type *AllocaTy,
+ uint64_t AllocBeginOffset,
+ AllocaPartitioning &P,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+ // Don't create integer types larger than the maximum bitwidth.
+ if (SizeInBits > IntegerType::MAX_INT_BITS)
+ return false;
+
+ // Don't try to handle allocas with bit-padding.
+ if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
+ return false;
+
+ // We need to ensure that an integer type with the appropriate bitwidth can
+ // be converted to the alloca type, whatever that is. We don't want to force
+ // the alloca itself to have an integer type if there is a more suitable one.
+ Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+ if (!canConvertValue(TD, AllocaTy, IntTy) ||
+ !canConvertValue(TD, IntTy, AllocaTy))
+ return false;
+
+ uint64_t Size = TD.getTypeStoreSize(AllocaTy);
+
+ // Check the uses to ensure the uses are (likely) promotable integer uses.
+ // Also ensure that the alloca has a covering load or store. We don't want
+ // to widen the integer operations only to fail to promote due to some other
+ // unsplittable entry (which we may make splittable later).
+ bool WholeAllocaOp = false;
+ for (; I != E; ++I) {
+ Use *U = I->getUse();
+ if (!U)
+ continue; // Skip dead use.
+
+ uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
+ uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+
+ // We can't reasonably handle cases where the load or store extends past
+ // the end of the aloca's type and into its padding.
+ if (RelEnd > Size)
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer loads need to be convertible from the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, AllocaTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
+ Type *ValueTy = SI->getValueOperand()->getType();
+ if (SI->isVolatile())
+ return false;
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer stores need to be convertible to the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, ValueTy, AllocaTy))
+ return false;
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
+ if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(*MTI);
+ if (!MTO.IsSplittable)
+ return false;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return WholeAllocaOp;
+}
+
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
+ IntegerType *Ty, uint64_t Offset,
+ const Twine &Name) {
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element extends past full value");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt) {
+ V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ if (Ty != IntTy) {
+ V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ DEBUG(dbgs() << " trunced: " << *V << "\n");
+ }
+ return V;
+}
+
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
+ Value *V, uint64_t Offset, const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(Old->getType());
+ IntegerType *Ty = cast<IntegerType>(V->getType());
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot insert a larger integer!");
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ if (Ty != IntTy) {
+ V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ DEBUG(dbgs() << " extended: " << *V << "\n");
+ }
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element store outside of alloca store");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt) {
+ V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
+
+ if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+ APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+ Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ DEBUG(dbgs() << " masked: " << *Old << "\n");
+ V = IRB.CreateOr(Old, V, Name + ".insert");
+ DEBUG(dbgs() << " inserted: " << *V << "\n");
+ }
+ return V;
+}
+
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
+ unsigned BeginIndex, unsigned EndIndex,
+ const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ if (NumElements == VecTy->getNumElements())
+ return V;
+
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ Name + ".extract");
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ return V;
+ }
+
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".extract");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ return V;
+}
+
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
+ unsigned BeginIndex, const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(Old->getType());
+ assert(VecTy && "Can only insert a vector into a vector");
+
+ VectorType *Ty = dyn_cast<VectorType>(V->getType());
+ if (!Ty) {
+ // Single element to insert.
+ V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+ Name + ".insert");
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ return V;
+ }
+
+ assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+ "Too many elements!");
+ if (Ty->getNumElements() == VecTy->getNumElements()) {
+ assert(V->getType() == VecTy && "Vector type mismatch");
+ return V;
+ }
+ unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".expand");
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+ Name + "insert");
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ return V;
+}
+
+namespace {
+/// \brief Visitor to rewrite instructions using a partition of an alloca to
+/// use a new alloca.
+///
+/// Also implements the rewriting to vector-based accesses when the partition
+/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
+/// lives here.
+class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
+ bool> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>;
+
+ const DataLayout &TD;
+ AllocaPartitioning &P;
+ SROA &Pass;
+ AllocaInst &OldAI, &NewAI;
+ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
+ Type *NewAllocaTy;
+
+ // If we are rewriting an alloca partition which can be written as pure
+ // vector operations, we stash extra information here. When VecTy is
+ // non-null, we have some strict guarantees about the rewritten alloca:
+ // - The new alloca is exactly the size of the vector type here.
+ // - The accesses all either map to the entire vector or to a single
+ // element.
+ // - The set of accessing instructions is only one of those handled above
+ // in isVectorPromotionViable. Generally these are the same access kinds
+ // which are promotable via mem2reg.
+ VectorType *VecTy;
+ Type *ElementTy;
+ uint64_t ElementSize;
+
+ // This is a convenience and flag variable that will be null unless the new
+ // alloca's integer operations should be widened to this integer type due to
+ // passing isIntegerWideningViable above. If it is non-null, the desired
+ // integer type will be stored here for easy access during rewriting.
+ IntegerType *IntTy;
+
+ // The offset of the partition user currently being rewritten.
+ uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
+ Use *OldUse;
+ Instruction *OldPtr;
+
+ // Utility IR builder, whose name prefix is setup for each visited use, and
+ // the insertion point is set to point to the user.
+ IRBuilderTy IRB;
+
+public:
+ AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI,
+ SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI,
+ uint64_t NewBeginOffset, uint64_t NewEndOffset)
+ : TD(TD), P(P), Pass(Pass),
+ OldAI(OldAI), NewAI(NewAI),
+ NewAllocaBeginOffset(NewBeginOffset),
+ NewAllocaEndOffset(NewEndOffset),
+ NewAllocaTy(NewAI.getAllocatedType()),
+ VecTy(), ElementTy(), ElementSize(), IntTy(),
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+ IRB(NewAI.getContext(), ConstantFolder()) {
+ }
+
+ /// \brief Visit the users of the alloca partition and rewrite them.
+ bool visitUsers(AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P,
+ NewAllocaBeginOffset, NewAllocaEndOffset,
+ I, E)) {
+ ++NumVectorized;
+ VecTy = cast<VectorType>(NewAI.getAllocatedType());
+ ElementTy = VecTy->getElementType();
+ assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 &&
+ "Only multiple-of-8 sized vector elements are viable");
+ ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8;
+ } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
+ NewAllocaBeginOffset, P, I, E)) {
+ IntTy = Type::getIntNTy(NewAI.getContext(),
+ TD.getTypeSizeInBits(NewAI.getAllocatedType()));
+ }
+ bool CanSROA = true;
+ for (; I != E; ++I) {
+ if (!I->getUse())
+ continue; // Skip dead uses.
+ BeginOffset = I->BeginOffset;
+ EndOffset = I->EndOffset;
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
+ }
+ if (VecTy) {
+ assert(CanSROA);
+ VecTy = 0;
+ ElementTy = 0;
+ ElementSize = 0;
+ }
+ if (IntTy) {
+ assert(CanSROA);
+ IntTy = 0;
+ }
+ return CanSROA;
+ }
+
+private:
+ // Every instruction which can end up as a user must have a rewrite rule.
+ bool visitInstruction(Instruction &I) {
+ DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
+ llvm_unreachable("No rewrite rule for this instruction!");
+ }
+
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
+ assert(BeginOffset >= NewAllocaBeginOffset);
+ APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
+ }
+
+ /// \brief Compute suitable alignment to access an offset into the new alloca.
+ unsigned getOffsetAlign(uint64_t Offset) {
+ unsigned NewAIAlign = NewAI.getAlignment();
+ if (!NewAIAlign)
+ NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType());
+ return MinAlign(NewAIAlign, Offset);
+ }
+
+ /// \brief Compute suitable alignment to access this partition of the new
+ /// alloca.
+ unsigned getPartitionAlign() {
+ return getOffsetAlign(BeginOffset - NewAllocaBeginOffset);
+ }
+
+ /// \brief Compute suitable alignment to access a type at an offset of the
+ /// new alloca.
+ ///
+ /// \returns zero if the type's ABI alignment is a suitable alignment,
+ /// otherwise returns the maximal suitable alignment.
+ unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
+ unsigned Align = getOffsetAlign(Offset);
+ return Align == TD.getABITypeAlignment(Ty) ? 0 : Align;
+ }
+
+ /// \brief Compute suitable alignment to access a type at the beginning of
+ /// this partition of the new alloca.
+ ///
+ /// See \c getOffsetTypeAlign for details; this routine delegates to it.
+ unsigned getPartitionTypeAlign(Type *Ty) {
+ return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
+ }
+
+ unsigned getIndex(uint64_t Offset) {
+ assert(VecTy && "Can only call getIndex when rewriting a vector");
+ uint64_t RelOffset = Offset - NewAllocaBeginOffset;
+ assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
+ uint32_t Index = RelOffset / ElementSize;
+ assert(Index * ElementSize == RelOffset);
+ return Index;
+ }
+
+ void deleteIfTriviallyDead(Value *V) {
+ Instruction *I = cast<Instruction>(V);
+ if (isInstructionTriviallyDead(I))
+ Pass.DeadInsts.insert(I);
+ }
+
+ Value *rewriteVectorizedLoadInst() {
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
+ }
+
+ Value *rewriteIntegerLoad(LoadInst &LI) {
+ assert(IntTy && "We cannot insert an integer to the alloca");
+ assert(!LI.isVolatile());
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ V = convertValue(TD, IRB, V, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ if (Offset > 0 || EndOffset < NewAllocaEndOffset)
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ "extract");
+ return V;
+ }
+
+ bool visitLoadInst(LoadInst &LI) {
+ DEBUG(dbgs() << " original: " << LI << "\n");
+ Value *OldOp = LI.getOperand(0);
+ assert(OldOp == OldPtr);
+
+ uint64_t Size = EndOffset - BeginOffset;
+
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
+ bool IsPtrAdjusted = false;
+ Value *V;
+ if (VecTy) {
+ V = rewriteVectorizedLoadInst();
+ } else if (IntTy && LI.getType()->isIntegerTy()) {
+ V = rewriteIntegerLoad(LI);
+ } else if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), "load");
+ } else {
+ Type *LTy = TargetTy->getPointerTo();
+ V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
+ getPartitionTypeAlign(TargetTy),
+ LI.isVolatile(), "load");
+ IsPtrAdjusted = true;
+ }
+ V = convertValue(TD, IRB, V, TargetTy);
+
+ if (IsSplit) {
+ assert(!LI.isVolatile());
+ assert(LI.getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(LI.getType()) &&
+ "Non-byte-multiple bit width");
+ // Move the insertion point just past the load so that we can refer to it.
+ IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ // Create a placeholder value with the same type as LI to use as the
+ // basis for the new value. This allows us to replace the uses of LI with
+ // the computed value, and then replace the placeholder with LI, leaving
+ // LI only used for this computation.
+ Value *Placeholder
+ = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+ V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+ "insert");
+ LI.replaceAllUsesWith(V);
+ Placeholder->replaceAllUsesWith(&LI);
+ delete Placeholder;
+ } else {
+ LI.replaceAllUsesWith(V);
+ }
+
+ Pass.DeadInsts.insert(&LI);
+ deleteIfTriviallyDead(OldOp);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return !LI.isVolatile() && !IsPtrAdjusted;
+ }
+
+ bool rewriteVectorizedStoreInst(Value *V,
+ StoreInst &SI, Value *OldOp) {
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ Type *PartitionTy
+ = (NumElements == 1) ? ElementTy
+ : VectorType::get(ElementTy, NumElements);
+ if (V->getType() != PartitionTy)
+ V = convertValue(TD, IRB, V, PartitionTy);
+
+ // Mix in the existing elements.
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
+
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ Pass.DeadInsts.insert(&SI);
+
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
+ assert(IntTy && "We cannot extract an integer from the alloca");
+ assert(!SI.isVolatile());
+ if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
+ "insert");
+ }
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ Pass.DeadInsts.insert(&SI);
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
+ bool visitStoreInst(StoreInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ Value *OldOp = SI.getOperand(1);
+ assert(OldOp == OldPtr);
+
+ Value *V = SI.getValueOperand();
+
+ // Strip all inbounds GEPs and pointer casts to try to dig out any root
+ // alloca that should be re-examined after promoting this alloca.
+ if (V->getType()->isPointerTy())
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
+ Pass.PostPromotionWorklist.insert(AI);
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(V->getType())) {
+ assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
+ assert(V->getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(V->getType()) &&
+ "Non-byte-multiple bit width");
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+ "extract");
+ }
+
+ if (VecTy)
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
+ if (IntTy && V->getType()->isIntegerTy())
+ return rewriteIntegerStore(V, SI);
+
+ StoreInst *NewSI;
+ if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, V->getType(), NewAllocaTy)) {
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ SI.isVolatile());
+ } else {
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr,
+ getPartitionTypeAlign(V->getType()),
+ SI.isVolatile());
+ }
+ (void)NewSI;
+ Pass.DeadInsts.insert(&SI);
+ deleteIfTriviallyDead(OldOp);
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+ }
+
+ /// \brief Compute an integer value from splatting an i8 across the given
+ /// number of bytes.
+ ///
+ /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+ /// call this routine.
+ /// FIXME: Heed the advice above.
+ ///
+ /// \param V The i8 value to splat.
+ /// \param Size The number of bytes in the output (assuming i8 is one byte)
+ Value *getIntegerSplat(Value *V, unsigned Size) {
+ assert(Size > 0 && "Expected a positive number of bytes.");
+ IntegerType *VTy = cast<IntegerType>(V->getType());
+ assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+ if (Size == 1)
+ return V;
+
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
+ ConstantExpr::getUDiv(
+ Constant::getAllOnesValue(SplatIntTy),
+ ConstantExpr::getZExt(
+ Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
+ "isplat");
+ return V;
+ }
+
+ /// \brief Compute a vector splat for a given element value.
+ Value *getVectorSplat(Value *V, unsigned NumElements) {
+ V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
+ DEBUG(dbgs() << " splat: " << *V << "\n");
+ return V;
+ }
+
+ bool visitMemSetInst(MemSetInst &II) {
+ DEBUG(dbgs() << " original: " << II << "\n");
+ assert(II.getRawDest() == OldPtr);
+
+ // If the memset has a variable size, it cannot be split, just adjust the
+ // pointer to the new alloca.
+ if (!isa<Constant>(II.getLength())) {
+ II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign()));
+
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ Type *AllocaTy = NewAI.getAllocatedType();
+ Type *ScalarTy = AllocaTy->getScalarType();
+
+ // If this doesn't map cleanly onto the alloca type, and that type isn't
+ // a single value type, just emit a memset.
+ if (!VecTy && !IntTy &&
+ (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !AllocaTy->isSingleValueType() ||
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+ TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
+ Type *SizeTy = II.getLength()->getType();
+ Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+ CallInst *New
+ = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
+ II.getRawDest()->getType()),
+ II.getValue(), Size, getPartitionAlign(),
+ II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return false;
+ }
+
+ // If we can represent this as a simple value, we have to build the actual
+ // value to store, which requires expanding the byte present in memset to
+ // a sensible representation for the alloca type. This is essentially
+ // splatting the byte to a sufficiently wide integer, splatting it across
+ // any desired vector width, and bitcasting to the final type.
+ Value *V;
+
+ if (VecTy) {
+ // If this is a memset of a vectorized alloca, insert it.
+ assert(ElementTy == ScalarTy);
+
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ Value *Splat =
+ getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
+ Splat = convertValue(TD, IRB, Splat, ElementTy);
+ if (NumElements > 1)
+ Splat = getVectorSplat(Splat, NumElements);
+
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
+ } else if (IntTy) {
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ assert(!II.isVolatile());
+
+ uint64_t Size = EndOffset - BeginOffset;
+ V = getIntegerSplat(II.getValue(), Size);
+
+ if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaBeginOffset)) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, "insert");
+ } else {
+ assert(V->getType() == IntTy &&
+ "Wrong type for an alloca wide integer!");
+ }
+ V = convertValue(TD, IRB, V, AllocaTy);
+ } else {
+ // Established these invariants above.
+ assert(BeginOffset == NewAllocaBeginOffset);
+ assert(EndOffset == NewAllocaEndOffset);
+
+ V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
+ if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+ V = getVectorSplat(V, AllocaVecTy->getNumElements());
+
+ V = convertValue(TD, IRB, V, AllocaTy);
+ }
+
+ Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return !II.isVolatile();
+ }
+
+ bool visitMemTransferInst(MemTransferInst &II) {
+ // Rewriting of memory transfer instructions can be a bit tricky. We break
+ // them into two categories: split intrinsics and unsplit intrinsics.
+
+ DEBUG(dbgs() << " original: " << II << "\n");
+
+ assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
+ bool IsDest = II.getRawDest() == OldPtr;
+
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(II);
+
+ // Compute the relative offset within the transfer.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
+ : MTO.SourceBegin));
+
+ unsigned Align = II.getAlignment();
+ if (Align > 1)
+ Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), getPartitionAlign()));
+
+ // For unsplit intrinsics, we simply modify the source and destination
+ // pointers in place. This isn't just an optimization, it is a matter of
+ // correctness. With unsplit intrinsics we may be dealing with transfers
+ // within a single alloca before SROA ran, or with transfers that have
+ // a variable length. We may also be dealing with memmove instead of
+ // memcpy, and so simply updating the pointers is the necessary for us to
+ // update both source and dest of a single call.
+ if (!MTO.IsSplittable) {
+ Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+ if (IsDest)
+ II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ else
+ II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, Align));
+
+ DEBUG(dbgs() << " to: " << II << "\n");
+ deleteIfTriviallyDead(OldOp);
+ return false;
+ }
+ // For split transfer intrinsics we have an incredibly useful assurance:
+ // the source and destination do not reside within the same alloca, and at
+ // least one of them does not escape. This means that we can replace
+ // memmove with memcpy, and we don't need to worry about all manner of
+ // downsides to splitting and transforming the operations.
+
+ // If this doesn't map cleanly onto the alloca type, and that type isn't
+ // a single value type, just emit a memcpy.
+ bool EmitMemCpy
+ = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !NewAI.getAllocatedType()->isSingleValueType());
+
+ // If we're just going to emit a memcpy, the alloca hasn't changed, and the
+ // size hasn't been shrunk based on analysis of the viable range, this is
+ // a no-op.
+ if (EmitMemCpy && &OldAI == &NewAI) {
+ uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin;
+ uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd;
+ // Ensure the start lines up.
+ assert(BeginOffset == OrigBegin);
+ (void)OrigBegin;
+
+ // Rewrite the size as needed.
+ if (EndOffset != OrigEnd)
+ II.setLength(ConstantInt::get(II.getLength()->getType(),
+ EndOffset - BeginOffset));
+ return false;
+ }
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ // Strip all inbounds GEPs and pointer casts to try to dig out any root
+ // alloca that should be re-examined after rewriting this instruction.
+ Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
+ if (AllocaInst *AI
+ = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+ Pass.Worklist.insert(AI);
+
+ if (EmitMemCpy) {
+ Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+ : II.getRawDest()->getType();
+
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+
+ Value *OurPtr
+ = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
+ : II.getRawSource()->getType());
+ Type *SizeTy = II.getLength()->getType();
+ Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+
+ CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
+ IsDest ? OtherPtr : OurPtr,
+ Size, Align, II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return false;
+ }
+
+ // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy
+ // is equivalent to 1, but that isn't true if we end up rewriting this as
+ // a load or store.
+ if (!Align)
+ Align = 1;
+
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ uint64_t Size = EndOffset - BeginOffset;
+ unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+ unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+ unsigned NumElements = EndIndex - BeginIndex;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+ Type *OtherPtrTy = NewAI.getType();
+ if (VecTy && !IsWholeAlloca) {
+ if (NumElements == 1)
+ OtherPtrTy = VecTy->getElementType();
+ else
+ OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+ OtherPtrTy = OtherPtrTy->getPointerTo();
+ } else if (IntTy && !IsWholeAlloca) {
+ OtherPtrTy = SubIntTy->getPointerTo();
+ }
+
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+ Value *DstPtr = &NewAI;
+ if (!IsDest)
+ std::swap(SrcPtr, DstPtr);
+
+ Value *Src;
+ if (VecTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
+ } else if (IntTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ Src = convertValue(TD, IRB, Src, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
+ } else {
+ Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ "copyload");
+ }
+
+ if (VecTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
+ } else if (IntTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
+ Src = convertValue(TD, IRB, Src, NewAllocaTy);
+ }
+
+ StoreInst *Store = cast<StoreInst>(
+ IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return !II.isVolatile();
+ }
+
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end);
+ DEBUG(dbgs() << " original: " << II << "\n");
+ assert(II.getArgOperand(1) == OldPtr);
+
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ ConstantInt *Size
+ = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
+ EndOffset - BeginOffset);
+ Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType());
+ Value *New;
+ if (II.getIntrinsicID() == Intrinsic::lifetime_start)
+ New = IRB.CreateLifetimeStart(Ptr, Size);
+ else
+ New = IRB.CreateLifetimeEnd(Ptr, Size);
+
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return true;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ // We would like to compute a new pointer in only one place, but have it be
+ // as local as possible to the PHI. To do that, we re-use the location of
+ // the old pointer, which necessarily must be in the right position to
+ // dominate the PHI.
+ IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
+ // Replace the operands which were using the old pointer.
+ std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
+
+ DEBUG(dbgs() << " to: " << PN << "\n");
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+
+ // Find the operand we need to rewrite here.
+ bool IsTrueVal = SI.getTrueValue() == OldPtr;
+ if (IsTrueVal)
+ assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
+ else
+ assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
+ SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ DEBUG(dbgs() << " to: " << SI << "\n");
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+};
+}
+
+namespace {
+/// \brief Visitor to rewrite aggregate loads and stores as scalar.
+///
+/// This pass aggressively rewrites all aggregate loads and stores on
+/// a particular pointer (or any pointer derived from it which we can identify)
+/// with scalar loads and stores.
+class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
+
+ const DataLayout &TD;
+
+ /// Queue of pointer uses to analyze and potentially rewrite.
+ SmallVector<Use *, 8> Queue;
+
+ /// Set to prevent us from cycling with phi nodes and loops.
+ SmallPtrSet<User *, 8> Visited;
+
+ /// The current pointer use being rewritten. This is used to dig up the used
+ /// value (as opposed to the user).
+ Use *U;
+
+public:
+ AggLoadStoreRewriter(const DataLayout &TD) : TD(TD) {}
+
+ /// Rewrite loads and stores through a pointer and all pointers derived from
+ /// it.
+ bool rewrite(Instruction &I) {
+ DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
+ enqueueUsers(I);
+ bool Changed = false;
+ while (!Queue.empty()) {
+ U = Queue.pop_back_val();
+ Changed |= visit(cast<Instruction>(U->getUser()));
+ }
+ return Changed;
+ }
+
+private:
+ /// Enqueue all the users of the given instruction for further processing.
+ /// This uses a set to de-duplicate users.
+ void enqueueUsers(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(*UI))
+ Queue.push_back(&UI.getUse());
+ }
+
+ // Conservative default is to not rewrite anything.
+ bool visitInstruction(Instruction &I) { return false; }
+
+ /// \brief Generic recursive split emission class.
+ template <typename Derived>
+ class OpSplitter {
+ protected:
+ /// The builder used to form new instructions.
+ IRBuilderTy IRB;
+ /// The indices which to be used with insert- or extractvalue to select the
+ /// appropriate value within the aggregate.
+ SmallVector<unsigned, 4> Indices;
+ /// The indices to a GEP instruction which will move Ptr to the correct slot
+ /// within the aggregate.
+ SmallVector<Value *, 4> GEPIndices;
+ /// The base pointer of the original op, used as a base for GEPing the
+ /// split operations.
+ Value *Ptr;
+
+ /// Initialize the splitter with an insertion point, Ptr and start with a
+ /// single zero GEP index.
+ OpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
+
+ public:
+ /// \brief Generic recursive split emission routine.
+ ///
+ /// This method recursively splits an aggregate op (load or store) into
+ /// scalar or vector ops. It splits recursively until it hits a single value
+ /// and emits that single value operation via the template argument.
+ ///
+ /// The logic of this routine relies on GEPs and insertvalue and
+ /// extractvalue all operating with the same fundamental index list, merely
+ /// formatted differently (GEPs need actual values).
+ ///
+ /// \param Ty The type being split recursively into smaller ops.
+ /// \param Agg The aggregate value being built up or stored, depending on
+ /// whether this is splitting a load or a store respectively.
+ void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
+ if (Ty->isSingleValueType())
+ return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
+
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned OldSize = Indices.size();
+ (void)OldSize;
+ for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
+ ++Idx) {
+ assert(Indices.size() == OldSize && "Did not return to the old size");
+ Indices.push_back(Idx);
+ GEPIndices.push_back(IRB.getInt32(Idx));
+ emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
+ GEPIndices.pop_back();
+ Indices.pop_back();
+ }
+ return;
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ unsigned OldSize = Indices.size();
+ (void)OldSize;
+ for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
+ ++Idx) {
+ assert(Indices.size() == OldSize && "Did not return to the old size");
+ Indices.push_back(Idx);
+ GEPIndices.push_back(IRB.getInt32(Idx));
+ emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
+ GEPIndices.pop_back();
+ Indices.pop_back();
+ }
+ return;
+ }
+
+ llvm_unreachable("Only arrays and structs are aggregate loadable types");
+ }
+ };
+
+ struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
+ LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
+
+ /// Emit a leaf load of a single value. This is called at the leaves of the
+ /// recursive emission to actually load values.
+ void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ assert(Ty->isSingleValueType());
+ // Load the single value and insert it using the indices.
+ Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *Load = IRB.CreateLoad(GEP, Name + ".load");
+ Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
+ DEBUG(dbgs() << " to: " << *Load << "\n");
+ }
+ };
+
+ bool visitLoadInst(LoadInst &LI) {
+ assert(LI.getPointerOperand() == *U);
+ if (!LI.isSimple() || LI.getType()->isSingleValueType())
+ return false;
+
+ // We have an aggregate being loaded, split it apart.
+ DEBUG(dbgs() << " original: " << LI << "\n");
+ LoadOpSplitter Splitter(&LI, *U);
+ Value *V = UndefValue::get(LI.getType());
+ Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
+ LI.replaceAllUsesWith(V);
+ LI.eraseFromParent();
+ return true;
+ }
+
+ struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
+ StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
+
+ /// Emit a leaf store of a single value. This is called at the leaves of the
+ /// recursive emission to actually produce stores.
+ void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ assert(Ty->isSingleValueType());
+ // Extract the single value and store it using the indices.
+ Value *Store = IRB.CreateStore(
+ IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
+ IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ }
+ };
+
+ bool visitStoreInst(StoreInst &SI) {
+ if (!SI.isSimple() || SI.getPointerOperand() != *U)
+ return false;
+ Value *V = SI.getValueOperand();
+ if (V->getType()->isSingleValueType())
+ return false;
+
+ // We have an aggregate being stored, split it apart.
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ StoreOpSplitter Splitter(&SI, *U);
+ Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
+ SI.eraseFromParent();
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ enqueueUsers(BC);
+ return false;
+ }
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ enqueueUsers(GEPI);
+ return false;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ enqueueUsers(PN);
+ return false;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ enqueueUsers(SI);
+ return false;
+ }
+};
+}
+
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+ if (Ty->isSingleValueType())
+ return Ty;
+
+ uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+ uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+ Type *InnerTy;
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+ InnerTy = ArrTy->getElementType();
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Index = SL->getElementContainingOffset(0);
+ InnerTy = STy->getElementType(Index);
+ } else {
+ return Ty;
+ }
+
+ if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+ TypeSize > DL.getTypeSizeInBits(InnerTy))
+ return Ty;
+
+ return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
+/// \brief Try to find a partition of the aggregate type passed in for a given
+/// offset and size.
+///
+/// This recurses through the aggregate type and tries to compute a subtype
+/// based on the offset and size. When the offset and size span a sub-section
+/// of an array, it will even compute a new array type for that sub-section,
+/// and the same for structs.
+///
+/// Note that this routine is very strict and tries to find a partition of the
+/// type which produces the *exact* right offset and size. It is not forgiving
+/// when the size or offset cause either end of type-based partition to be off.
+/// Also, this is a best-effort routine. It is reasonable to give up and not
+/// return a type if necessary.
+static Type *getTypePartition(const DataLayout &TD, Type *Ty,
+ uint64_t Offset, uint64_t Size) {
+ if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
+ return stripAggregateTypeWrapping(TD, Ty);
+ if (Offset > TD.getTypeAllocSize(Ty) ||
+ (TD.getTypeAllocSize(Ty) - Offset) < Size)
+ return 0;
+
+ if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
+ // We can't partition pointers...
+ if (SeqTy->isPointerTy())
+ return 0;
+
+ Type *ElementTy = SeqTy->getElementType();
+ uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ uint64_t NumSkippedElements = Offset / ElementSize;
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
+ if (NumSkippedElements >= ArrTy->getNumElements())
+ return 0;
+ } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
+ if (NumSkippedElements >= VecTy->getNumElements())
+ return 0;
+ }
+ Offset -= NumSkippedElements * ElementSize;
+
+ // First check if we need to recurse.
+ if (Offset > 0 || Size < ElementSize) {
+ // Bail if the partition ends in a different array element.
+ if ((Offset + Size) > ElementSize)
+ return 0;
+ // Recurse through the element type trying to peel off offset bytes.
+ return getTypePartition(TD, ElementTy, Offset, Size);
+ }
+ assert(Offset == 0);
+
+ if (Size == ElementSize)
+ return stripAggregateTypeWrapping(TD, ElementTy);
+ assert(Size > ElementSize);
+ uint64_t NumElements = Size / ElementSize;
+ if (NumElements * ElementSize != Size)
+ return 0;
+ return ArrayType::get(ElementTy, NumElements);
+ }
+
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (!STy)
+ return 0;
+
+ const StructLayout *SL = TD.getStructLayout(STy);
+ if (Offset >= SL->getSizeInBytes())
+ return 0;
+ uint64_t EndOffset = Offset + Size;
+ if (EndOffset > SL->getSizeInBytes())
+ return 0;
+
+ unsigned Index = SL->getElementContainingOffset(Offset);
+ Offset -= SL->getElementOffset(Index);
+
+ Type *ElementTy = STy->getElementType(Index);
+ uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ if (Offset >= ElementSize)
+ return 0; // The offset points into alignment padding.
+
+ // See if any partition must be contained by the element.
+ if (Offset > 0 || Size < ElementSize) {
+ if ((Offset + Size) > ElementSize)
+ return 0;
+ return getTypePartition(TD, ElementTy, Offset, Size);
+ }
+ assert(Offset == 0);
+
+ if (Size == ElementSize)
+ return stripAggregateTypeWrapping(TD, ElementTy);
+
+ StructType::element_iterator EI = STy->element_begin() + Index,
+ EE = STy->element_end();
+ if (EndOffset < SL->getSizeInBytes()) {
+ unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
+ if (Index == EndIndex)
+ return 0; // Within a single element and its padding.
+
+ // Don't try to form "natural" types if the elements don't line up with the
+ // expected size.
+ // FIXME: We could potentially recurse down through the last element in the
+ // sub-struct to find a natural end point.
+ if (SL->getElementOffset(EndIndex) != EndOffset)
+ return 0;
+
+ assert(Index < EndIndex);
+ EE = STy->element_begin() + EndIndex;
+ }
+
+ // Try to build up a sub-structure.
+ StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE),
+ STy->isPacked());
+ const StructLayout *SubSL = TD.getStructLayout(SubTy);
+ if (Size != SubSL->getSizeInBytes())
+ return 0; // The sub-struct doesn't have quite the size needed.
+
+ return SubTy;
+}
+
+/// \brief Rewrite an alloca partition's users.
+///
+/// This routine drives both of the rewriting goals of the SROA pass. It tries
+/// to rewrite uses of an alloca partition to be conducive for SSA value
+/// promotion. If the partition needs a new, more refined alloca, this will
+/// build that new alloca, preserving as much type information as possible, and
+/// rewrite the uses of the old alloca to point at the new one and have the
+/// appropriate new offsets. It also evaluates how successful the rewrite was
+/// at enabling promotion and if it was successful queues the alloca to be
+/// promoted.
+bool SROA::rewriteAllocaPartition(AllocaInst &AI,
+ AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI) {
+ uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset;
+ bool IsLive = false;
+ for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
+ UE = P.use_end(PI);
+ UI != UE && !IsLive; ++UI)
+ if (UI->getUse())
+ IsLive = true;
+ if (!IsLive)
+ return false; // No live uses left of this partition.
+
+ DEBUG(dbgs() << "Speculating PHIs and selects in partition "
+ << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n");
+
+ PHIOrSelectSpeculator Speculator(*TD, P, *this);
+ DEBUG(dbgs() << " speculating ");
+ DEBUG(P.print(dbgs(), PI, ""));
+ Speculator.visitUsers(PI);
+
+ // Try to compute a friendly type for this partition of the alloca. This
+ // won't always succeed, in which case we fall back to a legal integer type
+ // or an i8 array of an appropriate size.
+ Type *AllocaTy = 0;
+ if (Type *PartitionTy = P.getCommonType(PI))
+ if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize)
+ AllocaTy = PartitionTy;
+ if (!AllocaTy)
+ if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(),
+ PI->BeginOffset, AllocaSize))
+ AllocaTy = PartitionTy;
+ if ((!AllocaTy ||
+ (AllocaTy->isArrayTy() &&
+ AllocaTy->getArrayElementType()->isIntegerTy())) &&
+ TD->isLegalInteger(AllocaSize * 8))
+ AllocaTy = Type::getIntNTy(*C, AllocaSize * 8);
+ if (!AllocaTy)
+ AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize);
+ assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize);
+
+ // Check for the case where we're going to rewrite to a new alloca of the
+ // exact same type as the original, and with the same access offsets. In that
+ // case, re-use the existing alloca, but still run through the rewriter to
+ // perform phi and select speculation.
+ AllocaInst *NewAI;
+ if (AllocaTy == AI.getAllocatedType()) {
+ assert(PI->BeginOffset == 0 &&
+ "Non-zero begin offset but same alloca type");
+ assert(PI == P.begin() && "Begin offset is zero on later partition");
+ NewAI = &AI;
+ } else {
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment) {
+ // The minimum alignment which users can rely on when the explicit
+ // alignment is omitted or zero is that required by the ABI for this
+ // type.
+ Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ }
+ Alignment = MinAlign(Alignment, PI->BeginOffset);
+ // If we will get at least this much alignment from the type alone, leave
+ // the alloca's alignment unconstrained.
+ if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ Alignment = 0;
+ NewAI = new AllocaInst(AllocaTy, 0, Alignment,
+ AI.getName() + ".sroa." + Twine(PI - P.begin()),
+ &AI);
+ ++NumNewAllocas;
+ }
+
+ DEBUG(dbgs() << "Rewriting alloca partition "
+ << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: "
+ << *NewAI << "\n");
+
+ // Track the high watermark of the post-promotion worklist. We will reset it
+ // to this point if the alloca is not in fact scheduled for promotion.
+ unsigned PPWOldSize = PostPromotionWorklist.size();
+
+ AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI,
+ PI->BeginOffset, PI->EndOffset);
+ DEBUG(dbgs() << " rewriting ");
+ DEBUG(P.print(dbgs(), PI, ""));
+ bool Promotable = Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI));
+ if (Promotable) {
+ DEBUG(dbgs() << " and queuing for promotion\n");
+ PromotableAllocas.push_back(NewAI);
+ } else if (NewAI != &AI) {
+ // If we can't promote the alloca, iterate on it to check for new
+ // refinements exposed by splitting the current alloca. Don't iterate on an
+ // alloca which didn't actually change and didn't get promoted.
+ Worklist.insert(NewAI);
+ }
+
+ // Drop any post-promotion work items if promotion didn't happen.
+ if (!Promotable)
+ while (PostPromotionWorklist.size() > PPWOldSize)
+ PostPromotionWorklist.pop_back();
+
+ return true;
+}
+
+/// \brief Walks the partitioning of an alloca rewriting uses of each partition.
+bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) {
+ bool Changed = false;
+ for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE;
+ ++PI)
+ Changed |= rewriteAllocaPartition(AI, P, PI);
+
+ return Changed;
+}
+
+/// \brief Analyze an alloca for SROA.
+///
+/// This analyzes the alloca to ensure we can reason about it, builds
+/// a partitioning of the alloca, and then hands it off to be split and
+/// rewritten as needed.
+bool SROA::runOnAlloca(AllocaInst &AI) {
+ DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
+ ++NumAllocasAnalyzed;
+
+ // Special case dead allocas, as they're trivial.
+ if (AI.use_empty()) {
+ AI.eraseFromParent();
+ return true;
+ }
+
+ // Skip alloca forms that this analysis can't handle.
+ if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
+ TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ return false;
+
+ bool Changed = false;
+
+ // First, split any FCA loads and stores touching this alloca to promote
+ // better splitting and promotion opportunities.
+ AggLoadStoreRewriter AggRewriter(*TD);
+ Changed |= AggRewriter.rewrite(AI);
+
+ // Build the partition set using a recursive instruction-visiting builder.
+ AllocaPartitioning P(*TD, AI);
+ DEBUG(P.print(dbgs()));
+ if (P.isEscaped())
+ return Changed;
+
+ // Delete all the dead users of this alloca before splitting and rewriting it.
+ for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(),
+ DE = P.dead_user_end();
+ DI != DE; ++DI) {
+ Changed = true;
+ (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+ DeadInsts.insert(*DI);
+ }
+ for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
+ DE = P.dead_op_end();
+ DO != DE; ++DO) {
+ Value *OldV = **DO;
+ // Clobber the use with an undef value.
+ **DO = UndefValue::get(OldV->getType());
+ if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+ if (isInstructionTriviallyDead(OldI)) {
+ Changed = true;
+ DeadInsts.insert(OldI);
+ }
+ }
+
+ // No partitions to split. Leave the dead alloca for a later pass to clean up.
+ if (P.begin() == P.end())
+ return Changed;
+
+ return splitAlloca(AI, P) || Changed;
+}
+
+/// \brief Delete the dead instructions accumulated in this run.
+///
+/// Recursively deletes the dead instructions we've accumulated. This is done
+/// at the very end to maximize locality of the recursive delete and to
+/// minimize the problems of invalidated instruction pointers as such pointers
+/// are used heavily in the intermediate stages of the algorithm.
+///
+/// We also record the alloca instructions deleted here so that they aren't
+/// subsequently handed to mem2reg to promote.
+void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
+ while (!DeadInsts.empty()) {
+ Instruction *I = DeadInsts.pop_back_val();
+ DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ *OI = 0;
+ if (isInstructionTriviallyDead(U))
+ DeadInsts.insert(U);
+ }
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ DeletedAllocas.insert(AI);
+
+ ++NumDeleted;
+ I->eraseFromParent();
+ }
+}
+
+/// \brief Promote the allocas, using the best available technique.
+///
+/// This attempts to promote whatever allocas have been identified as viable in
+/// the PromotableAllocas list. If that list is empty, there is nothing to do.
+/// If there is a domtree available, we attempt to promote using the full power
+/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
+/// based on the SSAUpdater utilities. This function returns whether any
+/// promotion occurred.
+bool SROA::promoteAllocas(Function &F) {
+ if (PromotableAllocas.empty())
+ return false;
+
+ NumPromoted += PromotableAllocas.size();
+
+ if (DT && !ForceSSAUpdater) {
+ DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT);
+ PromotableAllocas.clear();
+ return true;
+ }
+
+ DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
+ SSAUpdater SSA;
+ DIBuilder DIB(*F.getParent());
+ SmallVector<Instruction*, 64> Insts;
+
+ for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
+ AllocaInst *AI = PromotableAllocas[Idx];
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI++);
+ // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
+ // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
+ // leading to them) here. Eventually it should use them to optimize the
+ // scalar values produced.
+ if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ assert(onlyUsedByLifetimeMarkers(I) &&
+ "Found a bitcast used outside of a lifetime marker.");
+ while (!I->use_empty())
+ cast<Instruction>(*I->use_begin())->eraseFromParent();
+ I->eraseFromParent();
+ continue;
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Insts.push_back(I);
+ }
+ AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
+ Insts.clear();
+ }
+
+ PromotableAllocas.clear();
+ return true;
+}
+
+namespace {
+ /// \brief A predicate to test whether an alloca belongs to a set.
+ class IsAllocaInSet {
+ typedef SmallPtrSet<AllocaInst *, 4> SetType;
+ const SetType &Set;
+
+ public:
+ typedef AllocaInst *argument_type;
+
+ IsAllocaInSet(const SetType &Set) : Set(Set) {}
+ bool operator()(AllocaInst *AI) const { return Set.count(AI); }
+ };
+}
+
+bool SROA::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
+ C = &F.getContext();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ if (!TD) {
+ DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
+ return false;
+ }
+ DT = getAnalysisIfAvailable<DominatorTree>();
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end());
+ I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ Worklist.insert(AI);
+
+ bool Changed = false;
+ // A set of deleted alloca instruction pointers which should be removed from
+ // the list of promotable allocas.
+ SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
+
+ do {
+ while (!Worklist.empty()) {
+ Changed |= runOnAlloca(*Worklist.pop_back_val());
+ deleteDeadInstructions(DeletedAllocas);
+
+ // Remove the deleted allocas from various lists so that we don't try to
+ // continue processing them.
+ if (!DeletedAllocas.empty()) {
+ Worklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
+ PromotableAllocas.end(),
+ IsAllocaInSet(DeletedAllocas)),
+ PromotableAllocas.end());
+ DeletedAllocas.clear();
+ }
+ }
+
+ Changed |= promoteAllocas(F);
+
+ Worklist = PostPromotionWorklist;
+ PostPromotionWorklist.clear();
+ } while (!Worklist.empty());
+
+ return Changed;
+}
+
+void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (RequiresDomTree)
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 000000000000..8a9c7da113c1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,189 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+
+using namespace llvm;
+
+/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+ initializeADCEPass(Registry);
+ initializeBlockPlacementPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeConstantPropagationPass(Registry);
+ initializeCorrelatedValuePropagationPass(Registry);
+ initializeDCEPass(Registry);
+ initializeDeadInstEliminationPass(Registry);
+ initializeDSEPass(Registry);
+ initializeGVNPass(Registry);
+ initializeEarlyCSEPass(Registry);
+ initializeIndVarSimplifyPass(Registry);
+ initializeJumpThreadingPass(Registry);
+ initializeLICMPass(Registry);
+ initializeLoopDeletionPass(Registry);
+ initializeLoopInstSimplifyPass(Registry);
+ initializeLoopRotatePass(Registry);
+ initializeLoopStrengthReducePass(Registry);
+ initializeLoopUnrollPass(Registry);
+ initializeLoopUnswitchPass(Registry);
+ initializeLoopIdiomRecognizePass(Registry);
+ initializeLowerAtomicPass(Registry);
+ initializeLowerExpectIntrinsicPass(Registry);
+ initializeMemCpyOptPass(Registry);
+ initializeReassociatePass(Registry);
+ initializeRegToMemPass(Registry);
+ initializeSCCPPass(Registry);
+ initializeIPSCCPPass(Registry);
+ initializeSROAPass(Registry);
+ initializeSROA_DTPass(Registry);
+ initializeSROA_SSAUpPass(Registry);
+ initializeCFGSimplifyPassPass(Registry);
+ initializeSimplifyLibCallsPass(Registry);
+ initializeSinkingPass(Registry);
+ initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+ initializeScalarOpts(*unwrap(R));
+}
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIdiomPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+ int Threshold) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createVerifierPass());
+}
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
+
+void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerExpectIntrinsicPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 000000000000..e590a374eac2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,2611 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation. This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible). Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
+// often interact, especially for C++ programs. As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced, "Number of allocas broken up");
+STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+
+namespace {
+ struct SROA : public FunctionPass {
+ SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
+ : FunctionPass(ID), HasDomTree(hasDT) {
+ if (T == -1)
+ SRThreshold = 128;
+ else
+ SRThreshold = T;
+ if (ST == -1)
+ StructMemberThreshold = 32;
+ else
+ StructMemberThreshold = ST;
+ if (AT == -1)
+ ArrayElementThreshold = 8;
+ else
+ ArrayElementThreshold = AT;
+ if (SLT == -1)
+ // Do not limit the scalar integer load size if no threshold is given.
+ ScalarLoadThreshold = -1;
+ else
+ ScalarLoadThreshold = SLT;
+ }
+
+ bool runOnFunction(Function &F);
+
+ bool performScalarRepl(Function &F);
+ bool performPromotion(Function &F);
+
+ private:
+ bool HasDomTree;
+ DataLayout *TD;
+
+ /// DeadInsts - Keep track of instructions we have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<Value*, 32> DeadInsts;
+
+ /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+ /// information about the uses. All these fields are initialized to false
+ /// and set to true when something is learned.
+ struct AllocaInfo {
+ /// The alloca to promote.
+ AllocaInst *AI;
+
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
+ /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+ bool isUnsafe : 1;
+
+ /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+ bool isMemCpySrc : 1;
+
+ /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+ bool isMemCpyDst : 1;
+
+ /// hasSubelementAccess - This is true if a subelement of the alloca is
+ /// ever accessed, or false if the alloca is only accessed with mem
+ /// intrinsics or load/store that only access the entire alloca at once.
+ bool hasSubelementAccess : 1;
+
+ /// hasALoadOrStore - This is true if there are any loads or stores to it.
+ /// The alloca may just be accessed with memcpy, for example, which would
+ /// not set this.
+ bool hasALoadOrStore : 1;
+
+ explicit AllocaInfo(AllocaInst *ai)
+ : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+ hasSubelementAccess(false), hasALoadOrStore(false) {}
+ };
+
+ /// SRThreshold - The maximum alloca size to considered for SROA.
+ unsigned SRThreshold;
+
+ /// StructMemberThreshold - The maximum number of members a struct can
+ /// contain to be considered for SROA.
+ unsigned StructMemberThreshold;
+
+ /// ArrayElementThreshold - The maximum number of elements an array can
+ /// have to be considered for SROA.
+ unsigned ArrayElementThreshold;
+
+ /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
+ /// converting to scalar
+ unsigned ScalarLoadThreshold;
+
+ void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+ I.isUnsafe = true;
+ DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
+ }
+
+ bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+ void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Instruction *TheAccess, bool AllowWholeAccess);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy);
+
+ void DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList);
+ void DeleteDeadInstructions();
+
+ void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ bool ShouldAttemptScalarRepl(AllocaInst *AI);
+ };
+
+ // SROA_DT - SROA that uses DominatorTree.
+ struct SROA_DT : public SROA {
+ static char ID;
+ public:
+ SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, true, ID, ST, AT, SLT) {
+ initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+ };
+
+ // SROA_SSAUp - SROA that uses SSAUpdater.
+ struct SROA_SSAUp : public SROA {
+ static char ID;
+ public:
+ SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, false, ID, ST, AT, SLT) {
+ initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+
+}
+
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+ bool UseDomTree,
+ int StructMemberThreshold,
+ int ArrayElementThreshold,
+ int ScalarLoadThreshold) {
+ if (UseDomTree)
+ return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
+ ScalarLoadThreshold);
+ return new SROA_SSAUp(Threshold, StructMemberThreshold,
+ ArrayElementThreshold, ScalarLoadThreshold);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+ /// AllocaSize - The size of the alloca being considered in bytes.
+ unsigned AllocaSize;
+ const DataLayout &TD;
+ unsigned ScalarLoadThreshold;
+
+ /// IsNotTrivial - This is set to true if there is some access to the object
+ /// which means that mem2reg can't promote it.
+ bool IsNotTrivial;
+
+ /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+ /// computed based on the uses of the alloca rather than the LLVM type system.
+ enum {
+ Unknown,
+
+ // Accesses via GEPs that are consistent with element access of a vector
+ // type. This will not be converted into a vector unless there is a later
+ // access using an actual vector type.
+ ImplicitVector,
+
+ // Accesses via vector operations and GEPs that are consistent with the
+ // layout of a vector type.
+ Vector,
+
+ // An integer bag-of-bits with bitwise operations for insertion and
+ // extraction. Any combination of types can be converted into this kind
+ // of scalar.
+ Integer
+ } ScalarKind;
+
+ /// VectorTy - This tracks the type that we should promote the vector to if
+ /// it is possible to turn it into a vector. This starts out null, and if it
+ /// isn't possible to turn into a vector type, it gets set to VoidTy.
+ VectorType *VectorTy;
+
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
+ /// HadDynamicAccess - True if some element of this alloca was dynamic.
+ /// We don't yet have support for turning a dynamic access into a large
+ /// integer.
+ bool HadDynamicAccess;
+
+public:
+ explicit ConvertToScalarInfo(unsigned Size, const DataLayout &td,
+ unsigned SLT)
+ : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
+ ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ HadDynamicAccess(false) { }
+
+ AllocaInst *TryConvert(AllocaInst *AI);
+
+private:
+ bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
+ void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
+ bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
+ Value *NonConstantIdx);
+
+ Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
+ Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+ // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+ // out.
+ if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
+ return 0;
+
+ // If an alloca has only memset / memcpy uses, it may still have an Unknown
+ // ScalarKind. Treat it as an Integer below.
+ if (ScalarKind == Unknown)
+ ScalarKind = Integer;
+
+ if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+ ScalarKind = Integer;
+
+ // If we were able to find a vector type that can handle this with
+ // insert/extract elements, and if there was at least one use that had
+ // a vector type, promote this to a vector. We don't want to promote
+ // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+ // we just get a lot of insert/extracts. If at least one vector is
+ // involved, then we probably really do have a union of vector/array.
+ Type *NewTy;
+ if (ScalarKind == Vector) {
+ assert(VectorTy && "Missing type for vector scalar.");
+ DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
+ << *VectorTy << '\n');
+ NewTy = VectorTy; // Use the vector type.
+ } else {
+ unsigned BitWidth = AllocaSize * 8;
+
+ // Do not convert to scalar integer if the alloca size exceeds the
+ // scalar load threshold.
+ if (BitWidth > ScalarLoadThreshold)
+ return 0;
+
+ if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+ !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+ // Dynamic accesses on integers aren't yet supported. They need us to shift
+ // by a dynamic amount which could be difficult to work out as we might not
+ // know whether to use a left or right shift.
+ if (ScalarKind == Integer && HadDynamicAccess)
+ return 0;
+
+ DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+ // Create and insert the integer alloca.
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
+ }
+ AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0, 0);
+ return NewAI;
+}
+
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
+///
+/// There are two cases we handle here:
+/// 1) A union of vector types of the same size and potentially its elements.
+/// Here we turn element accesses into insert/extract element operations.
+/// This promotes a <4 x float> with a store of float to the third element
+/// into a <4 x float> that uses insert element.
+/// 2) A fully general blob of memory, which we turn into some (potentially
+/// large) integer type with extract and insert operations where the loads
+/// and stores would mutate the memory. We mark this by setting VectorTy
+/// to VoidTy.
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
+ uint64_t Offset) {
+ // If we already decided to turn this into a blob of integer memory, there is
+ // nothing to be done.
+ if (ScalarKind == Integer)
+ return;
+
+ // If this could be contributing to a vector, analyze it.
+
+ // If the In type is a vector that is the same size as the alloca, see if it
+ // matches the existing VecTy.
+ if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (MergeInVectorType(VInTy, Offset))
+ return;
+ } else if (In->isFloatTy() || In->isDoubleTy() ||
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+ isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (EltSize == AllocaSize)
+ return;
+
+ // If we're accessing something that could be an element of a vector, see
+ // if the implied vector agrees with what we already have and if Offset is
+ // compatible with it.
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+ (!VectorTy || EltSize == VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8)) {
+ if (!VectorTy) {
+ ScalarKind = ImplicitVector;
+ VectorTy = VectorType::get(In, AllocaSize/EltSize);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, we have a case that we can't handle with an optimized vector
+ // form. We can still turn this into a large integer.
+ ScalarKind = Integer;
+}
+
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
+ uint64_t Offset) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (!VectorTy)
+ VectorTy = VInTy;
+ ScalarKind = Vector;
+ return true;
+ }
+
+ return false;
+}
+
+/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type. If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
+ Value* NonConstantIdx) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // Don't break volatile loads.
+ if (!LI->isSimple())
+ return false;
+ // Don't touch MMX operations.
+ if (LI->getType()->isX86_MMXTy())
+ return false;
+ HadNonMemTransferAccess = true;
+ MergeInTypeForLoadOrStore(LI->getType(), Offset);
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Storing the pointer, not into the value?
+ if (SI->getOperand(0) == V || !SI->isSimple()) return false;
+ // Don't touch MMX operations.
+ if (SI->getOperand(0)->getType()->isX86_MMXTy())
+ return false;
+ HadNonMemTransferAccess = true;
+ MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
+ return false;
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // If this is a GEP with a variable indices, we can't handle it.
+ PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
+ if (!PtrTy)
+ return false;
+
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ Value *GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ if (!isa<VectorType>(PtrTy->getElementType()))
+ return false;
+ if (NonConstantIdx)
+ return false;
+ GEPNonConstantIdx = Indices.pop_back_val();
+ if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
+ return false;
+ HadDynamicAccess = true;
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
+ uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
+ Indices);
+ // See if all uses can be converted.
+ if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
+ return false;
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // handle it.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
+ // Store of constant value.
+ if (!isa<ConstantInt>(MSI->getValue()))
+ return false;
+
+ // Store of constant size.
+ ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+ if (!Len)
+ return false;
+
+ // If the size differs from the alloca, we can only convert the alloca to
+ // an integer bag-of-bits.
+ // FIXME: This should handle all of the cases that are currently accepted
+ // as vector element insertions.
+ if (Len->getZExtValue() != AllocaSize || Offset != 0)
+ ScalarKind = Integer;
+
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
+ ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+ if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ return false;
+
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ continue;
+ }
+
+ // If this is a lifetime intrinsic, we can handle it.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ continue;
+ }
+ }
+
+ // Otherwise, we cannot handle this!
+ return false;
+ }
+
+ return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly. This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right. By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+ uint64_t Offset,
+ Value* NonConstantIdx) {
+ while (!Ptr->use_empty()) {
+ Instruction *User = cast<Instruction>(Ptr->use_back());
+
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+ ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
+ CI->eraseFromParent();
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ Value* GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ assert(!NonConstantIdx &&
+ "Dynamic GEP reading from dynamic GEP unsupported");
+ GEPNonConstantIdx = Indices.pop_back_val();
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
+ uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ Indices);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
+ GEP->eraseFromParent();
+ continue;
+ }
+
+ IRBuilder<> Builder(User);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // The load is a bit extract from NewAI shifted right by Offset bits.
+ Value *LoadedVal = Builder.CreateLoad(NewAI);
+ Value *NewLoadVal
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
+ NonConstantIdx, Builder);
+ LI->replaceAllUsesWith(NewLoadVal);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ assert(SI->getOperand(0) != Ptr && "Consistency error!");
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+ NonConstantIdx, Builder);
+ Builder.CreateStore(New, NewAI);
+ SI->eraseFromParent();
+
+ // If the load we just inserted is now dead, then the inserted store
+ // overwrote the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // transform it into a store of the expanded constant value.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
+ int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+ if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
+ unsigned NumBytes = static_cast<unsigned>(SNumBytes);
+ unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+ // Compute the value replicated the right number of times.
+ APInt APVal(NumBytes*8, Val);
+
+ // Splat the value if non-zero.
+ if (Val)
+ for (unsigned i = 1; i != NumBytes; ++i)
+ APVal |= APVal << 8;
+
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(
+ ConstantInt::get(User->getContext(), APVal),
+ Old, Offset, 0, Builder);
+ Builder.CreateStore(New, NewAI);
+
+ // If the load we just inserted is now dead, then the memset overwrote
+ // the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ }
+ MSI->eraseFromParent();
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ assert(Offset == 0 && "must be store to start of alloca");
+ assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
+
+ // If the source and destination are both to the same alloca, then this is
+ // a noop copy-to-self, just delete it. Otherwise, emit a load and store
+ // as appropriate.
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+ // Dest must be OrigAI, change this to be a load from the original
+ // pointer (bitcasted), then a store to our new alloca.
+ assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+ Value *SrcPtr = MTI->getSource();
+ PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ SPTy->getAddressSpace());
+ }
+ SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
+ LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+ SrcVal->setAlignment(MTI->getAlignment());
+ Builder.CreateStore(SrcVal, NewAI);
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+ // Src must be OrigAI, change this to be a load from NewAI then a store
+ // through the original dest pointer (bitcasted).
+ assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+ LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+ PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ DPTy->getAddressSpace());
+ }
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
+ StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+ NewStore->setAlignment(MTI->getAlignment());
+ } else {
+ // Noop transfer. Src == Dst
+ }
+
+ MTI->eraseFromParent();
+ continue;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // There's no need to preserve these, as the resulting alloca will be
+ // converted to a register anyways.
+ II->eraseFromParent();
+ continue;
+ }
+ }
+
+ llvm_unreachable("Unsupported operation!");
+ }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset. This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
+ // If the load is of the whole new alloca, no conversion is needed.
+ Type *FromType = FromVal->getType();
+ if (FromType == ToType && Offset == 0)
+ return FromVal;
+
+ // If the result alloca is a vector type, this is either an element
+ // access or a bitcast to another vector type of the same size.
+ if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (FromTypeSize == ToTypeSize)
+ return Builder.CreateBitCast(FromVal, ToType);
+
+ // Otherwise it must be an element access.
+ unsigned Elt = 0;
+ if (Offset) {
+ unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ }
+ // Return the element extracted out of it.
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ Value *V = Builder.CreateExtractElement(FromVal, Idx);
+ if (V->getType() != ToType)
+ V = Builder.CreateBitCast(V, ToType);
+ return V;
+ }
+
+ // If ToType is a first class aggregate, extract out each of the pieces and
+ // use insertvalue's to form the FCA.
+ if (StructType *ST = dyn_cast<StructType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ Value *Res = UndefValue::get(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+ Offset+Layout.getElementOffsetInBits(i),
+ 0, Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i);
+ }
+ return Res;
+ }
+
+ if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ Value *Res = UndefValue::get(AT);
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+ Offset+i*EltSize, 0, Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i);
+ }
+ return Res;
+ }
+
+ // Otherwise, this must be a union that was converted to an integer value.
+ IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+ // If this is a big-endian system and the load is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+ TD.getTypeStoreSizeInBits(ToType) - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shl) which are not defined.
+ // We do this to support (f.e.) loads off the end of a structure where
+ // only some bits are used.
+ if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateLShr(FromVal,
+ ConstantInt::get(FromVal->getType(), ShAmt));
+ else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateShl(FromVal,
+ ConstantInt::get(FromVal->getType(), -ShAmt));
+
+ // Finally, unconditionally truncate the integer to the right width.
+ unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+ if (LIBitWidth < NTy->getBitWidth())
+ FromVal =
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth));
+ else if (LIBitWidth > NTy->getBitWidth())
+ FromVal =
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth));
+
+ // If the result is an integer, this is a trunc or bitcast.
+ if (ToType->isIntegerTy()) {
+ // Should be done.
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+ // Just do a bitcast, we know the sizes match up.
+ FromVal = Builder.CreateBitCast(FromVal, ToType);
+ } else {
+ // Otherwise must be a pointer.
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType);
+ }
+ assert(FromVal->getType() == ToType && "Didn't convert right?");
+ return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+///
+/// NonConstantIdx is an index value if there was a GEP with a non-constant
+/// index value. If this is 0 then all GEPs used to find this insert address
+/// are constant.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
+ // Convert the stored type to the actual type, shift it left to insert
+ // then 'or' into place.
+ Type *AllocaType = Old->getType();
+ LLVMContext &Context = Old->getContext();
+
+ if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+
+ // Changing the whole vector with memset or with an access of a different
+ // vector type?
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType);
+
+ // Must be an element insertion.
+ Type *EltTy = VTy->getElementType();
+ if (SV->getType() != EltTy)
+ SV = Builder.CreateBitCast(SV, EltTy);
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
+ unsigned Elt = Offset/EltSize;
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ return Builder.CreateInsertElement(Old, SV, Idx);
+ }
+
+ // If SV is a first-class aggregate value, insert each value recursively.
+ if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i);
+ Old = ConvertScalar_InsertValue(Elt, Old,
+ Offset+Layout.getElementOffsetInBits(i),
+ 0, Builder);
+ }
+ return Old;
+ }
+
+ if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
+ }
+ return Old;
+ }
+
+ // If SV is a float, convert it to the appropriate integer type.
+ // If it is a pointer, do the same.
+ unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
+ else if (SV->getType()->isPointerTy())
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+
+ // Zero extend or truncate the value if needed.
+ if (SV->getType() != AllocaType) {
+ if (SV->getType()->getPrimitiveSizeInBits() <
+ AllocaType->getPrimitiveSizeInBits())
+ SV = Builder.CreateZExt(SV, AllocaType);
+ else {
+ // Truncation may be needed if storing more than the alloca can hold
+ // (undefined behavior).
+ SV = Builder.CreateTrunc(SV, AllocaType);
+ SrcWidth = DestWidth;
+ SrcStoreWidth = DestStoreWidth;
+ }
+ }
+
+ // If this is a big-endian system and the store is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shr) which are not defined.
+ // We do this to support (f.e.) stores off the end of a structure where
+ // only some bits in the structure are set.
+ APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+ if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
+ Mask <<= ShAmt;
+ } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
+ Mask = Mask.lshr(-ShAmt);
+ }
+
+ // Mask out the bits we are about to insert from the old value, and or
+ // in the new bits.
+ if (SrcWidth != DestWidth) {
+ assert(DestWidth > SrcWidth);
+ Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+ SV = Builder.CreateOr(Old, SV, "ins");
+ }
+ return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
+bool SROA::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<DataLayout>();
+
+ bool Changed = performPromotion(F);
+
+ // FIXME: ScalarRepl currently depends on DataLayout more than it
+ // theoretically needs to. It should be refactored in order to support
+ // target-independent IR. Until this is done, just skip the actual
+ // scalar-replacement portion of this pass.
+ if (!TD) return Changed;
+
+ while (1) {
+ bool LocalChange = performScalarRepl(F);
+ if (!LocalChange) break; // No need to repromote if no scalarrepl
+ Changed = true;
+ LocalChange = performPromotion(F);
+ if (!LocalChange) break; // No need to re-scalarrepl if no promotion
+ }
+
+ return Changed;
+}
+
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst *AI;
+ DIBuilder *DIB;
+ SmallVector<DbgDeclareInst *, 4> DDIs;
+ SmallVector<DbgValueInst *, 4> DVIs;
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ DIBuilder *DB)
+ : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+
+ void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ this->AI = AI;
+ if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) {
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ DDIs.push_back(DDI);
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ DVIs.push_back(DVI);
+ }
+
+ LoadAndStorePromoter::run(Insts);
+ AI->eraseFromParent();
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ DDI->eraseFromParent();
+ }
+ for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ DVI->eraseFromParent();
+ }
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == AI;
+ return cast<StoreInst>(I)->getPointerOperand() == AI;
+ }
+
+ virtual void updateDebugInfo(Instruction *Inst) const {
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+ }
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ Value *Arg = NULL;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (!Arg)
+ Arg = SI->getOperand(0);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Arg = LI->getOperand(0);
+ } else {
+ continue;
+ }
+ Instruction *DbgVal =
+ DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ Inst);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ }
+ }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the terminator of the predecessor has side-effects (an invoke),
+ // there is no safe place to put a load in the predecessor.
+ if (Pred->getTerminator()->mayHaveSideEffects())
+ return false;
+
+ // If the value is produced by the terminator of the predecessor
+ // (an invoke), there is no valid place to put a load in the predecessor.
+ if (Pred->getTerminator() == InVal)
+ return false;
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (!LI->isSimple())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || !SI->isSimple())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (onlyUsedByLifetimeMarkers(BCI)) {
+ InstsToRewrite.insert(BCI);
+ continue;
+ }
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
+ // This could only be a bitcast used by nothing but lifetime intrinsics.
+ for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
+ I != E;) {
+ Use &U = I.getUse();
+ ++I;
+ cast<Instruction>(U.getUser())->eraseFromParent();
+ }
+ BCI->eraseFromParent();
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
+bool SROA::performPromotion(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree *DT = 0;
+ if (HasDomTree)
+ DT = &getAnalysis<DominatorTree>();
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ DIBuilder DIB(*F.getParent());
+ bool Changed = false;
+ SmallVector<Instruction*, 64> Insts;
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (tryToMakeAllocaBePromotable(AI, TD))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ if (HasDomTree)
+ PromoteMemToReg(Allocas, *DT);
+ else {
+ SSAUpdater SSA;
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ AllocaInst *AI = Allocas[i];
+
+ // Build list of instructions to promote.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ Insts.push_back(cast<Instruction>(*UI));
+ AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
+ Insts.clear();
+ }
+ }
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA. It must be a struct or array type with a small number of elements.
+bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
+ Type *T = AI->getAllocatedType();
+ // Do not promote any struct that has too many members.
+ if (StructType *ST = dyn_cast<StructType>(T))
+ return ST->getNumElements() <= StructMemberThreshold;
+ // Do not promote any array that has too many elements.
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements() <= ArrayElementThreshold;
+ return false;
+}
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+ std::vector<AllocaInst*> WorkList;
+
+ // Scan the entry basic block, adding allocas to the worklist.
+ BasicBlock &BB = F.getEntryBlock();
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ if (AllocaInst *A = dyn_cast<AllocaInst>(I))
+ WorkList.push_back(A);
+
+ // Process the worklist
+ bool Changed = false;
+ while (!WorkList.empty()) {
+ AllocaInst *AI = WorkList.back();
+ WorkList.pop_back();
+
+ // Handle dead allocas trivially. These can be formed by SROA'ing arrays
+ // with unused elements.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // If this alloca is impossible for us to promote, reject it early.
+ if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+ continue;
+
+ // Check to see if we can perform the core SROA transformation. We cannot
+ // transform the allocation instruction if it is an array allocation
+ // (allocations OF arrays are ok though), and an allocation of a scalar
+ // value cannot be decomposed at all.
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+ // Do not promote [0 x %struct].
+ if (AllocaSize == 0) continue;
+
+ // Do not promote any struct whose size is too big.
+ if (AllocaSize > SRThreshold) continue;
+
+ // If the alloca looks like a good candidate for scalar replacement, and if
+ // all its users can be transformed, then split up the aggregate into its
+ // separate elements.
+ if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+
+ // If we can turn this aggregate value (potentially with casts) into a
+ // simple scalar value that can be mem2reg'd into a register value.
+ // IsNotTrivial tracks whether this is something that mem2reg could have
+ // promoted itself. If so, we don't want to transform it needlessly. Note
+ // that we can't just check based on the type: the alloca may be of an i32
+ // but that has pointer arithmetic to set byte 3 of it or something.
+ if (AllocaInst *NewAI = ConvertToScalarInfo(
+ (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) {
+ NewAI->takeName(AI);
+ AI->eraseFromParent();
+ ++NumConverted;
+ Changed = true;
+ continue;
+ }
+
+ // Otherwise, couldn't process this alloca.
+ }
+
+ return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList) {
+ DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
+ SmallVector<AllocaInst*, 32> ElementAllocas;
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ ElementAllocas.reserve(ST->getNumContainedTypes());
+ for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ } else {
+ ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ElementAllocas.reserve(AT->getNumElements());
+ Type *ElTy = AT->getElementType();
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ }
+
+ // Now that we have created the new alloca instructions, rewrite all the
+ // uses of the old alloca.
+ RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
+
+ // Now erase any instructions that were made dead while rewriting the alloca.
+ DeleteDeadInstructions();
+ AI->eraseFromParent();
+
+ ++NumReplaced;
+}
+
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ // (But, don't add allocas to the dead instruction list -- they are
+ // already on the worklist and will be deleted separately.)
+ *OI = 0;
+ if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ }
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI. The results are flagged in
+/// the Info parameter. Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafeForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ uint64_t GEPOffset = Offset;
+ isSafeGEP(GEPI, GEPOffset, Info);
+ if (!Info.isUnsafe)
+ isSafeForScalarRepl(GEPI, GEPOffset, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (Length == 0)
+ return MarkUnsafe(Info, User);
+ if (Length->isNegative())
+ return MarkUnsafe(Info, User);
+
+ isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (!LI->isSimple())
+ return MarkUnsafe(Info, User);
+ Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (!SI->isSimple() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return MarkUnsafe(Info, User);
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (!LI->isSimple())
+ return MarkUnsafe(Info, User);
+ Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (!SI->isSimple() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement. It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type. The results are flagged in the Info parameter. Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
+ uint64_t &Offset, AllocaInfo &Info) {
+ gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+ if (GEPIt == E)
+ return;
+ bool NonConstant = false;
+ unsigned NonConstantIdxSize = 0;
+
+ // Walk through the GEP type indices, checking the types that this indexes
+ // into.
+ for (; GEPIt != E; ++GEPIt) {
+ // Ignore struct elements, no extra checking needed for these.
+ if ((*GEPIt)->isStructTy())
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+ if (!IdxVal) {
+ // Non constant GEPs are only a problem on arrays, structs, and pointers
+ // Vectors can be dynamically indexed.
+ // FIXME: Add support for dynamic indexing on arrays. This should be
+ // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
+ // isn't.
+ if (!(*GEPIt)->isVectorTy())
+ return MarkUnsafe(Info, GEPI);
+ NonConstant = true;
+ NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
+ }
+ }
+
+ // Compute the offset due to this GEP and check if the alloca has a
+ // component element at that offset.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If this GEP is non constant then the last operand must have been a
+ // dynamic index into a vector. Pop this now as it has no impact on the
+ // constant part of the offset.
+ if (NonConstant)
+ Indices.pop_back();
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
+ NonConstantIdxSize))
+ MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays). If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
+ Type *&EltTy) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ NumElts = AT->getNumElements();
+ EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ return true;
+ }
+ if (StructType *ST = dyn_cast<StructType>(T)) {
+ NumElts = ST->getNumContainedTypes();
+ EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ for (unsigned n = 1; n < NumElts; ++n) {
+ if (ST->getContainedType(n) != EltTy)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(Type *T1, Type *T2) {
+ if (T1 == T2)
+ return true;
+
+ unsigned NumElts1, NumElts2;
+ Type *EltTy1, *EltTy2;
+ if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+ isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+ NumElts1 == NumElts2 &&
+ EltTy1 == EltTy2)
+ return true;
+
+ return false;
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it. The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ Type *MemOpType, bool isStore,
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
+ // Check if this is a load/store of the entire alloca.
+ if (Offset == 0 && AllowWholeAccess &&
+ MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+ // loads/stores (which are essentially the same as the MemIntrinsics with
+ // regard to copying padding between elements). But, if an alloca is
+ // flagged as both a source and destination of such operations, we'll need
+ // to check later for padding between elements.
+ if (!MemOpType || MemOpType->isIntegerTy()) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ return;
+ }
+ // This is also safe for references using a type that is compatible with
+ // the type of the alloca, so that loads/stores can be rewritten using
+ // insertvalue/extractvalue.
+ if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+ Info.hasSubelementAccess = true;
+ return;
+ }
+ }
+ // Check if the offset/size correspond to a component within the alloca type.
+ Type *T = Info.AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize)) {
+ Info.hasSubelementAccess = true;
+ return;
+ }
+
+ return MarkUnsafe(Info, TheAccess);
+}
+
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size. If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+ Type *EltTy;
+ uint64_t EltSize;
+ if (StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+ EltTy = ST->getContainedType(EltIdx);
+ EltSize = TD->getTypeAllocSize(EltTy);
+ Offset -= Layout->getElementOffset(EltIdx);
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ EltTy = AT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= AT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
+ EltTy = VT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= VT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else {
+ return false;
+ }
+ if (Offset == 0 && (Size == 0 || EltSize == Size))
+ return true;
+ // Check if the component spans multiple elements.
+ if (Offset + Size > EltSize)
+ return false;
+ return TypeHasComponent(EltTy, Offset, Size);
+}
+
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCast(BC, AI, Offset, NewElts);
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ RewriteGEP(GEPI, AI, Offset, NewElts);
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ uint64_t MemSize = Length->getZExtValue();
+ if (Offset == 0 &&
+ MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+ // Otherwise the intrinsic can only touch a single element and the
+ // address operand will be updated, so nothing else needs to be done.
+ continue;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
+ }
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ Type *LIType = LI->getType();
+
+ if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ Value *Insert = UndefValue::get(LIType);
+ IRBuilder<> Builder(LI);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Load = Builder.CreateLoad(NewElts[i], "load");
+ Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
+ }
+ LI->replaceAllUsesWith(Insert);
+ DeadInsts.push_back(LI);
+ } else if (LIType->isIntegerTy() &&
+ TD->getTypeAllocSize(LIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ Type *SIType = Val->getType();
+ if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ IRBuilder<> Builder(SI);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
+ Builder.CreateStore(Extract, NewElts[i]);
+ }
+ DeadInsts.push_back(SI);
+ } else if (SIType->isIntegerTy() &&
+ TD->getTypeAllocSize(SIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+ }
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
+ }
+ }
+}
+
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ RewriteForScalarRepl(BC, AI, Offset, NewElts);
+ if (BC->getOperand(0) != AI)
+ return;
+
+ // The bitcast references the original alloca. Replace its uses with
+ // references to the alloca containing offset zero (which is normally at
+ // index zero, but might not be in cases involving structs with elements
+ // of size zero).
+ Type *T = AI->getAllocatedType();
+ uint64_t EltOffset = 0;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ Instruction *Val = NewElts[Idx];
+ if (Val->getType() != BC->getDestTy()) {
+ Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+ Val->takeName(BC);
+ }
+ BC->replaceAllUsesWith(Val);
+ DeadInsts.push_back(BC);
+}
+
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element. IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy) {
+ uint64_t Idx = 0;
+ if (StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ Idx = Layout->getElementContainingOffset(Offset);
+ T = ST->getContainedType(Idx);
+ Offset -= Layout->getElementOffset(Idx);
+ IdxTy = Type::getInt32Ty(T->getContext());
+ return Idx;
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+ }
+ VectorType *VT = cast<VectorType>(T);
+ T = VT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ uint64_t OldOffset = Offset;
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If the GEP was dynamic then it must have been a dynamic vector lookup.
+ // In this case, it must be the last GEP operand which is dynamic so keep that
+ // aside until we've found the constant GEP offset then add it back in at the
+ // end.
+ Value* NonConstantIdx = 0;
+ if (!GEPI->hasAllConstantIndices())
+ NonConstantIdx = Indices.pop_back_val();
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+
+ RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+ Type *T = AI->getAllocatedType();
+ Type *IdxTy;
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ if (GEPI->getOperand(0) == AI)
+ OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+ T = AI->getAllocatedType();
+ uint64_t EltOffset = Offset;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+ // If this GEP does not move the pointer across elements of the alloca
+ // being split, then it does not needs to be rewritten.
+ if (Idx == OldIdx)
+ return;
+
+ Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ while (EltOffset != 0) {
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+ }
+ if (NonConstantIdx) {
+ Type* GepTy = T;
+ // This GEP has a dynamic index. We need to add "i32 0" to index through
+ // any structs or arrays in the original type until we get to the vector
+ // to index.
+ while (!isa<VectorType>(GepTy)) {
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U);
+ }
+ NewArgs.push_back(NonConstantIdx);
+ }
+ Instruction *Val = NewElts[Idx];
+ if (NewArgs.size() > 1) {
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
+ Val->takeName(GEPI);
+ }
+ if (Val->getType() != GEPI->getType())
+ Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+ GEPI->replaceAllUsesWith(Val);
+ DeadInsts.push_back(GEPI);
+}
+
+/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
+/// to mark the lifetime of the scalarized memory.
+void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
+ // Put matching lifetime markers on everything from Offset up to
+ // Offset+OldSize.
+ Type *AIType = AI->getAllocatedType();
+ uint64_t NewOffset = Offset;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+
+ IRBuilder<> Builder(II);
+ uint64_t Size = OldSize->getLimitedValue();
+
+ if (NewOffset) {
+ // Splice the first element and index 'NewOffset' bytes in. SROA will
+ // split the alloca again later.
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
+ ++Idx;
+ }
+
+ for (; Idx != NewElts.size() && Size; ++Idx) {
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ }
+ DeadInsts.push_back(II);
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // If this is a memcpy/memmove, construct the other pointer as the
+ // appropriate type. The "Other" pointer is the pointer that goes to memory
+ // that doesn't have anything to do with the alloca that we are promoting. For
+ // memset, this Value* stays null.
+ Value *OtherPtr = 0;
+ unsigned MemAlignment = MI->getAlignment();
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+ if (Inst == MTI->getRawDest())
+ OtherPtr = MTI->getRawSource();
+ else {
+ assert(Inst == MTI->getRawSource());
+ OtherPtr = MTI->getRawDest();
+ }
+ }
+
+ // If there is an other pointer, we want to convert it to the same pointer
+ // type as AI has, so we can GEP through it safely.
+ if (OtherPtr) {
+ unsigned AddrSpace =
+ cast<PointerType>(OtherPtr->getType())->getAddressSpace();
+
+ // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
+ // optimization, but it's also required to detect the corner case where
+ // both pointer operands are referencing the same memory, and where
+ // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
+ // function is only called for mem intrinsics that access the whole
+ // aggregate, so non-zero GEPs are not an issue here.)
+ OtherPtr = OtherPtr->stripPointerCasts();
+
+ // Copying the alloca to itself is a no-op: just delete it.
+ if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+ // This code will run twice for a no-op memcpy -- once for each operand.
+ // Put only one reference to MI on the DeadInsts list.
+ for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+ E = DeadInsts.end(); I != E; ++I)
+ if (*I == MI) return;
+ DeadInsts.push_back(MI);
+ return;
+ }
+
+ // If the pointer is not the right type, insert a bitcast to the right
+ // type.
+ Type *NewTy =
+ PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+ if (OtherPtr->getType() != NewTy)
+ OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
+ }
+
+ // Process each element of the aggregate.
+ bool SROADest = MI->getRawDest() == Inst;
+
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // If this is a memcpy/memmove, emit a GEP of the other element address.
+ Value *OtherElt = 0;
+ unsigned OtherEltAlign = MemAlignment;
+
+ if (OtherPtr) {
+ Value *Idx[2] = { Zero,
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
+ OtherPtr->getName()+"."+Twine(i),
+ MI);
+ uint64_t EltOffset;
+ PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ Type *OtherTy = OtherPtrTy->getElementType();
+ if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ } else {
+ Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ }
+
+ // The alignment of the other pointer is the guaranteed alignment of the
+ // element, which is affected by both the known alignment of the whole
+ // mem intrinsic and the alignment of the element. If the alignment of
+ // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+ // known alignment is just 4 bytes.
+ OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+ }
+
+ Value *EltPtr = NewElts[i];
+ Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+ // If we got down to a scalar, insert a load or store as appropriate.
+ if (EltTy->isSingleValueType()) {
+ if (isa<MemTransferInst>(MI)) {
+ if (SROADest) {
+ // From Other to Alloca.
+ Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+ new StoreInst(Elt, EltPtr, MI);
+ } else {
+ // From Alloca to Other.
+ Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+ new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+ }
+ continue;
+ }
+ assert(isa<MemSetInst>(MI));
+
+ // If the stored element is zero (common case), just store a null
+ // constant.
+ Constant *StoreVal;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
+ if (CI->isZero()) {
+ StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
+ } else {
+ // If EltTy is a vector type, get the element type.
+ Type *ValTy = EltTy->getScalarType();
+
+ // Construct an integer with the right value.
+ unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ APInt OneVal(EltSize, CI->getZExtValue());
+ APInt TotalVal(OneVal);
+ // Set each byte.
+ for (unsigned i = 0; 8*i < EltSize; ++i) {
+ TotalVal = TotalVal.shl(8);
+ TotalVal |= OneVal;
+ }
+
+ // Convert the integer value to the appropriate type.
+ StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
+ if (ValTy->isPointerTy())
+ StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+ else if (ValTy->isFloatingPointTy())
+ StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+ assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+ // If the requested value was a vector constant, create it.
+ if (EltTy->isVectorTy()) {
+ unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
+ StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
+ }
+ }
+ new StoreInst(StoreVal, EltPtr, MI);
+ continue;
+ }
+ // Otherwise, if we're storing a byte variable, use a memset call for
+ // this element.
+ }
+
+ unsigned EltSize = TD->getTypeAllocSize(EltTy);
+ if (!EltSize)
+ continue;
+
+ IRBuilder<> Builder(MI);
+
+ // Finally, insert the meminst for this element.
+ if (isa<MemSetInst>(MI)) {
+ Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+ MI->isVolatile());
+ } else {
+ assert(isa<MemTransferInst>(MI));
+ Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
+ Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
+
+ if (isa<MemCpyInst>(MI))
+ Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+ else
+ Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
+ }
+ }
+ DeadInsts.push_back(MI);
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
+/// overwrites the entire allocation. Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts){
+ // Extract each element out of the integer according to its structure offset
+ // and store the element value to the individual alloca.
+ Value *SrcVal = SI->getOperand(0);
+ Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ IRBuilder<> Builder(SI);
+
+ // Handle tail padding by extending the operand
+ if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ SrcVal = Builder.CreateZExt(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits));
+
+ DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Get the number of bits to shift SrcVal to get the value.
+ Type *FieldTy = EltSTy->getElementType(i);
+ uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+ }
+
+ // Truncate down to an integer of the right size.
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ if (FieldSizeBits != AllocaSizeBits)
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits));
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == FieldTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = Builder.CreateBitCast(EltVal, FieldTy);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
+ }
+ new StoreInst(EltVal, DestField, SI);
+ }
+
+ } else {
+ ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ Type *ArrayEltTy = ATy->getElementType();
+ uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+ uint64_t Shift;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-ElementOffset;
+ else
+ Shift = 0;
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (ElementSizeBits == 0) continue;
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+ }
+
+ // Truncate down to an integer of the right size.
+ if (ElementSizeBits != AllocaSizeBits)
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits));
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == ArrayEltTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
+ }
+ new StoreInst(EltVal, DestField, SI);
+
+ if (TD->isBigEndian())
+ Shift -= ElementOffset;
+ else
+ Shift += ElementOffset;
+ }
+ }
+
+ DeadInsts.push_back(SI);
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
+/// an integer. Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // Extract each element out of the NewElts according to its structure offset
+ // and form the result value.
+ Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ const StructLayout *Layout = 0;
+ uint64_t ArrayEltBitOffset = 0;
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ Layout = TD->getStructLayout(EltSTy);
+ } else {
+ Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ }
+
+ Value *ResultVal =
+ Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Load the value from the alloca. If the NewElt is an aggregate, cast
+ // the pointer to an integer of the same size before doing the load.
+ Value *SrcField = NewElts[i];
+ Type *FieldTy =
+ cast<PointerType>(SrcField->getType())->getElementType();
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ FieldSizeBits);
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
+ SrcField = new BitCastInst(SrcField,
+ PointerType::getUnqual(FieldIntTy),
+ "", LI);
+ SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+ // If SrcField is a fp or vector of the right size but that isn't an
+ // integer type, bitcast to an integer so we can shift it.
+ if (SrcField->getType() != FieldIntTy)
+ SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+ // Zero extend the field to be the same size as the final alloca so that
+ // we can shift and insert it.
+ if (SrcField->getType() != ResultVal->getType())
+ SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+ // Determine the number of bits to shift SrcField.
+ uint64_t Shift;
+ if (Layout) // Struct case.
+ Shift = Layout->getElementOffsetInBits(i);
+ else // Array case.
+ Shift = i*ArrayEltBitOffset;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+ SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+ }
+
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
+ }
+
+ // Handle tail padding by truncating the result
+ if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+ LI->replaceAllUsesWith(ResultVal);
+ DeadInsts.push_back(LI);
+}
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
+static bool HasPadding(Type *Ty, const DataLayout &TD) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ }
+
+ // SROA currently handles only Arrays and Structs.
+ StructType *STy = cast<StructType>(Ty);
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
+ PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ if (PrevFieldEnd < FieldBitOffset)
+ return true;
+ }
+ PrevFieldBitOffset = FieldBitOffset;
+ }
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ return false;
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
+ // Loop over the use list of the alloca. We can only transform it if all of
+ // the users are safe to transform.
+ AllocaInfo Info(AI);
+
+ isSafeForScalarRepl(AI, 0, Info);
+ if (Info.isUnsafe) {
+ DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+ return false;
+ }
+
+ // Okay, we know all the users are promotable. If the aggregate is a memcpy
+ // source and destination, we have to be careful. In particular, the memcpy
+ // could be moving around elements that live in structure padding of the LLVM
+ // types, but may actually be used. In these cases, we refuse to promote the
+ // struct.
+ if (Info.isMemCpySrc && Info.isMemCpyDst &&
+ HasPadding(AI->getAllocatedType(), *TD))
+ return false;
+
+ // If the alloca never has an access to just *part* of it, but is accessed
+ // via loads and stores, then we should use ConvertToScalarInfo to promote
+ // the alloca instead of promoting each piece at a time and inserting fission
+ // and fusion code.
+ if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+ // If the struct/array just has one element, use basic SRoA.
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (ST->getNumElements() > 1) return false;
+ } else {
+ if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 000000000000..c243d34fd7db
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,350 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations. For example:
+//
+// * Removes basic blocks with no predecessors.
+// * Merges a basic block into its predecessor if there is only one and the
+// predecessor only has one successor.
+// * Eliminates PHI nodes for basic blocks with a single predecessor.
+// * Eliminates a basic block that only contains an unconditional branch.
+// * Changes invoke instructions to nounwind functions to be calls.
+// * Change things like "if (x) if (y)" into "if (x&y)".
+// * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+ struct CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+ }
+ };
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+ false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+ false, false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+ return new CFGSimplifyPass();
+}
+
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.insert(*SI))
+ Worklist.push_back(*SI);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+static bool removeUnreachableBlocksFromFn(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumSimpl += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool mergeEmptyReturnBlocks(Function &F) {
+ bool Changed = false;
+
+ BasicBlock *RetBlock = 0;
+
+ // Scan all the blocks in the function, looking for empty return blocks.
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+ BasicBlock &BB = *BBI++;
+
+ // Only look at return blocks.
+ ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (Ret == 0) continue;
+
+ // Only look at the block if it is empty or the only other thing in it is a
+ // single PHI node that is the operand to the return.
+ if (Ret != &BB.front()) {
+ // Check for something else in the block.
+ BasicBlock::iterator I = Ret;
+ --I;
+ // Skip over debug info.
+ while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+ --I;
+ if (!isa<DbgInfoIntrinsic>(I) &&
+ (!isa<PHINode>(I) || I != BB.begin() ||
+ Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != I))
+ continue;
+ }
+
+ // If this is the first returning block, remember it and keep going.
+ if (RetBlock == 0) {
+ RetBlock = &BB;
+ continue;
+ }
+
+ // Otherwise, we found a duplicate return block. Merge the two.
+ Changed = true;
+
+ // Case when there is no input to the return or when the returned values
+ // agree is trivial. Note that they can't agree if there are phis in the
+ // blocks.
+ if (Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) ==
+ cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+ BB.replaceAllUsesWith(RetBlock);
+ BB.eraseFromParent();
+ continue;
+ }
+
+ // If the canonical return block has no PHI node, create one now.
+ PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+ if (RetBlockPHI == 0) {
+ Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+ pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+ std::distance(PB, PE), "merge",
+ &RetBlock->front());
+
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ RetBlockPHI->addIncoming(InVal, *PI);
+ RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+ }
+
+ // Turn BB into a block that just unconditionally branches to the return
+ // block. This handles the case when the two return blocks have a common
+ // predecessor but that return different things.
+ RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+ BB.getTerminator()->eraseFromParent();
+ BranchInst::Create(RetBlock, &BB);
+ }
+
+ return Changed;
+}
+
+/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
+ const DataLayout *TD) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks and remove them if they are unneeded...
+ //
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+ if (SimplifyCFG(BBIt++, TTI, TD)) {
+ LocalChange = true;
+ ++NumSimpl;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+ const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ bool EverChanged = removeUnreachableBlocksFromFn(F);
+ EverChanged |= mergeEmptyReturnBlocks(F);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
+
+ // If neither pass changed anything, we're done.
+ if (!EverChanged) return false;
+
+ // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
+ // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterate between the two optimizations. We structure the code like this to
+ // avoid reruning iterativelySimplifyCFG if the second pass of
+ // removeUnreachableBlocksFromFn doesn't do anything.
+ if (!removeUnreachableBlocksFromFn(F))
+ return true;
+
+ do {
+ EverChanged = iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged |= removeUnreachableBlocksFromFn(F);
+ } while (EverChanged);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..3514e6c2aadc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,247 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions). Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// CallOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *OptimizeCall(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return CallOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// This pass optimizes well known library functions from libc and libm.
+ ///
+ class SimplifyLibCalls : public FunctionPass {
+ TargetLibraryInfo *TLI;
+
+ StringMap<LibCallOptimization*> Optimizations;
+ public:
+ static char ID; // Pass identification
+ SimplifyLibCalls() : FunctionPass(ID) {
+ initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+ }
+ void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
+ void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+
+ void InitOptimizations();
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+} // end anonymous namespace.
+
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+ return new SimplifyLibCalls();
+}
+
+void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
+ if (TLI->has(F))
+ Optimizations[TLI->getName(F)] = Opt;
+}
+
+void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
+ LibCallOptimization* Opt) {
+ if (TLI->has(F1) && TLI->has(F2))
+ Optimizations[TLI->getName(F1)] = Opt;
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ if (Optimizations.empty())
+ InitOptimizations();
+
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+
+ IRBuilder<> Builder(F.getContext());
+
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue;
+
+ // Ignore indirect calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration() ||
+ !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+ continue;
+
+ // Ignore unknown calls.
+ LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ if (!LCO) continue;
+
+ // Set the builder to the instruction after the call.
+ Builder.SetInsertPoint(BB, I);
+
+ // Use debug location of CI for all new instructions.
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Try to optimize this call.
+ Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
+ if (Result == 0) continue;
+
+ DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+ dbgs() << " into: " << *Result << "\n");
+
+ // Something changed!
+ Changed = true;
+
+ // Inspect the instruction after the call (which was potentially just
+ // added) next.
+ I = CI; ++I;
+
+ if (CI != Result && !CI->use_empty()) {
+ CI->replaceAllUsesWith(Result);
+ if (!Result->hasName())
+ Result->takeName(CI);
+ }
+ CI->eraseFromParent();
+ }
+ }
+ return Changed;
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// strchr:
+// * strchr(p, 0) -> strlen(p)
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 000000000000..d4595bb373e6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,270 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+STATISTIC(NumSinkIter, "Number of sinking iterations");
+
+namespace {
+ class Sinking : public FunctionPass {
+ DominatorTree *DT;
+ LoopInfo *LI;
+ AliasAnalysis *AA;
+
+ public:
+ static char ID; // Pass identification
+ Sinking() : FunctionPass(ID) {
+ initializeSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ }
+ private:
+ bool ProcessBlock(BasicBlock &BB);
+ bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+ bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+ bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
+ };
+} // end anonymous namespace
+
+char Sinking::ID = 0;
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+ BasicBlock *BB) const {
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+ for (Value::use_iterator I = Inst->use_begin(),
+ E = Inst->use_end(); I != E; ++I) {
+ // Determine the block of the use.
+ Instruction *UseInst = cast<Instruction>(*I);
+ BasicBlock *UseBlock = UseInst->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ // Check that it dominates.
+ if (!DT->dominates(BB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool MadeChange, EverMadeChange = false;
+
+ do {
+ MadeChange = false;
+ DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
+ // Process all basic blocks.
+ for (Function::iterator I = F.begin(), E = F.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+ EverMadeChange |= MadeChange;
+ NumSinkIter++;
+ } while (MadeChange);
+
+ return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&BB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ BasicBlock::iterator I = BB.end();
+ --I;
+ bool ProcessedBegin = false;
+ SmallPtrSet<Instruction *, 8> Stores;
+ do {
+ Instruction *Inst = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == BB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ if (SinkInstruction(Inst, Stores))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+ AliasAnalysis::Location Loc = AA->getLocation(L);
+ for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+ E = Stores.end(); I != E; ++I)
+ if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
+ return false;
+ }
+
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ return false;
+
+ return true;
+}
+
+/// IsAcceptableTarget - Return true if it is possible to sink the instruction
+/// in the specified basic block.
+bool Sinking::IsAcceptableTarget(Instruction *Inst,
+ BasicBlock *SuccToSinkTo) const {
+ assert(Inst && "Instruction to be sunk is null");
+ assert(SuccToSinkTo && "Candidate sink target is null");
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (Inst->getParent() == SuccToSinkTo)
+ return false;
+
+ // If the block has multiple predecessors, this would introduce computation
+ // on different code paths. We could split the critical edge, but for now we
+ // just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ if (!isSafeToSpeculativelyExecute(Inst))
+ return false;
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!DT->dominates(Inst->getParent(), SuccToSinkTo))
+ return false;
+
+ // Don't sink instructions into a loop.
+ Loop *succ = LI->getLoopFor(SuccToSinkTo);
+ Loop *cur = LI->getLoopFor(Inst->getParent());
+ if (succ != 0 && succ != cur)
+ return false;
+ }
+
+ // Finally, check that all the uses of the instruction are actually
+ // dominated by the candidate
+ return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ // Check if it's safe to move the instruction.
+ if (!isSafeToMove(Inst, AA, Stores))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ BasicBlock *SuccToSinkTo = 0;
+
+ // Instructions can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ // Look at all the postdominators and see if we can sink it in one.
+ DomTreeNode *DTN = DT->getNode(Inst->getParent());
+ for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
+ I != E && SuccToSinkTo == 0; ++I) {
+ BasicBlock *Candidate = (*I)->getBlock();
+ if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
+ IsAcceptableTarget(Inst, Candidate))
+ SuccToSinkTo = Candidate;
+ }
+
+ // If no suitable postdominator was found, look at all the successors and
+ // decide which one we should sink to, if any.
+ for (succ_iterator I = succ_begin(Inst->getParent()),
+ E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ if (IsAcceptableTarget(Inst, *I))
+ SuccToSinkTo = *I;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ DEBUG(dbgs() << "Sink" << *Inst << " (";
+ WriteAsOperand(dbgs(), Inst->getParent(), false);
+ dbgs() << " -> ";
+ WriteAsOperand(dbgs(), SuccToSinkTo, false);
+ dbgs() << ")\n");
+
+ // Move the instruction.
+ Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 000000000000..2002e680d195
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,648 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop. This pass also implements the following extensions to the basic
+// algorithm:
+//
+// 1. Trivial instructions between the call and return do not prevent the
+// transformation from taking place, though currently the analysis cannot
+// support moving any really useful instructions (only dead ones).
+// 2. This pass transforms functions that are prevented from being tail
+// recursive by an associative and commutative expression to use an
+// accumulator variable, thus compiling the typical naive factorial or
+// 'fib' implementation into efficient code.
+// 3. TRE is performed if the function returns void, if the return
+// returns the result returned by the call, or if the function returns a
+// run-time constant on all exits from the function. It is possible, though
+// unlikely, that the return returns something else (like constant 0), and
+// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
+// the function return the exact same value.
+// 4. If it can prove that callees do not access their caller stack frame,
+// they are marked as eligible for tail call elimination (by the code
+// generator).
+//
+// There are several improvements that could be made:
+//
+// 1. If the function has any alloca instructions, these instructions will be
+// moved out of the entry block of the function, causing them to be
+// evaluated each time through the tail recursion. Safely keeping allocas
+// in the entry block requires analysis to proves that the tail-called
+// function does not read or write the stack object.
+// 2. Tail recursion is only performed if the call immediately precedes the
+// return instruction. It's possible that there could be a jump between
+// the call and the return.
+// 3. There can be intervening operations between the call and the return that
+// prevent the TRE from occurring. For example, there could be GEP's and
+// stores to memory that will not be read or written by the call. This
+// requires some substantial analysis (such as with DSA) to prove safe to
+// move ahead of the call, but doing so could allow many more TREs to be
+// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+// 4. The algorithm we use to detect if callees access their caller stack
+// frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+ struct TailCallElim : public FunctionPass {
+ const TargetTransformInfo *TTI;
+
+ static char ID; // Pass identification, replacement for typeid
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ CallInst *FindTRECandidate(Instruction *I,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+ Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+ };
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false)
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+ return new TailCallElim();
+}
+
+void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+}
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function. We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+ // FIXME: do simple 'address taken' analysis.
+ return true;
+}
+
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions. If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+ bool &CannotTCETailMarkedCall) {
+ bool RetVal = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ RetVal |= AllocaMightEscapeToCalls(AI);
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ if (BB != &BB->getParent()->getEntryBlock() ||
+ !isa<ConstantInt>(AI->getArraySize()))
+ CannotTCETailMarkedCall = true;
+ }
+ return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+ // If this function is a varargs function, we won't be able to PHI the args
+ // right, so don't even try to convert it...
+ if (F.getFunctionType()->isVarArg()) return false;
+
+ TTI = &getAnalysis<TargetTransformInfo>();
+ BasicBlock *OldEntry = 0;
+ bool TailCallsAreMarkedTail = false;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
+ bool MadeChange = false;
+ bool FunctionContainsEscapingAllocas = false;
+
+ // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // marked with the 'tail' attribute, because doing so would cause the stack
+ // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // doesn't).
+ bool CannotTCETailMarkedCall = false;
+
+ // Loop over the function, looking for any returning blocks, and keeping track
+ // of whether this function has any non-trivially used allocas.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+ break;
+
+ FunctionContainsEscapingAllocas |=
+ CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ }
+
+ /// FIXME: The code generator produces really bad code when an 'escaping
+ /// alloca' is changed from being a static alloca to being a dynamic alloca.
+ /// Until this is resolved, disable this transformation if that would ever
+ /// happen. This bug is PR962.
+ if (FunctionContainsEscapingAllocas)
+ return false;
+
+ // Second pass, change any tail calls to loops.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,CannotTCETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTCETailMarkedCall);
+ MadeChange |= Change;
+ }
+ }
+
+ // If we eliminated any tail recursions, it's possible that we inserted some
+ // silly PHI nodes which just merge an initial value (the incoming operand)
+ // with themselves. Check to see if we did and clean up our mess if so. This
+ // occurs when a function passes an argument straight through to its tail
+ // call.
+ if (!ArgumentPHIs.empty()) {
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
+
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // Finally, if this function contains no non-escaping allocas, or calls
+ // setjmp, mark all calls in the function as eligible for tail calls
+ //(there is no stack memory for them to access).
+ if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+ // FIXME: We can move load/store/call/free instructions above the call if the
+ // call does not mod/ref the memory location being processed.
+ if (I->mayHaveSideEffects()) // This also handles volatile loads.
+ return false;
+
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ // Loads may always be moved above calls without side effects.
+ if (CI->mayHaveSideEffects()) {
+ // Non-volatile loads may be moved above a call with side effects if it
+ // does not write to memory and the load provably won't trap.
+ // FIXME: Writes to memory only matter if they may alias the pointer
+ // being loaded from.
+ if (CI->mayWriteToMemory() ||
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+ L->getAlignment()))
+ return false;
+ }
+ }
+
+ // Otherwise, if this is a side-effect free instruction, check to make sure
+ // that it does not use the return value of the call. If it doesn't use the
+ // return value of the call, it must only use things that are defined before
+ // the call, or movable instructions between the call and the instruction
+ // itself.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i) == CI)
+ return false;
+ return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
+ if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+ // Check to see if this is an immutable argument, if so, the value
+ // will be available to initialize the accumulator.
+ if (Argument *Arg = dyn_cast<Argument>(V)) {
+ // Figure out which argument number this is...
+ unsigned ArgNo = 0;
+ Function *F = CI->getParent()->getParent();
+ for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+ ++ArgNo;
+
+ // If we are passing this argument into call as the corresponding
+ // argument operand, then the argument is dynamically constant.
+ // Otherwise, we cannot transform this function safely.
+ if (CI->getArgOperand(ArgNo) == Arg)
+ return true;
+ }
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
+ // Not a constant or immutable argument, we can't safely transform.
+ return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
+ Value *ReturnedValue = 0;
+
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
+ return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value. Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+ CallInst *CI) {
+ if (!I->isAssociative() || !I->isCommutative()) return 0;
+ assert(I->getNumOperands() == 2 &&
+ "Associative/commutative operations should have 2 args!");
+
+ // Exactly one operand should be the result of the call instruction.
+ if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+ (I->getOperand(0) != CI && I->getOperand(1) != CI))
+ return 0;
+
+ // The only user of this instruction we allow is a single return instruction.
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+ return 0;
+
+ // Ok, now we have to check all of the other return instructions in this
+ // function. If they return non-constants or differing values, then we cannot
+ // transform the function safely.
+ return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = TI->getParent();
+ Function *F = BB->getParent();
+
+ if (&BB->front() == TI) // Make sure there is something before the terminator.
+ return 0;
+
+ // Scan backwards from the return, checking to see if there is a tail call in
+ // this block. If so, set CI to it.
+ CallInst *CI = 0;
+ BasicBlock::iterator BBI = TI;
+ while (true) {
+ CI = dyn_cast<CallInst>(BBI);
+ if (CI && CI->getCalledFunction() == F)
+ break;
+
+ if (BBI == BB->begin())
+ return 0; // Didn't find a potential tail call.
+ --BBI;
+ }
+
+ // If this call is marked as a tail call, and if there are dynamic allocas in
+ // the function, we cannot perform this optimization.
+ if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+ return 0;
+
+ // As a special case, detect code like this:
+ // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+ // and disable this xform in this case, because the code generator will
+ // lower the call to fabs into inline code.
+ if (BB == &F->getEntryBlock() &&
+ FirstNonDbg(BB->front()) == CI &&
+ FirstNonDbg(llvm::next(BB->begin())) == TI &&
+ CI->getCalledFunction() &&
+ !TTI->isLoweredToCall(CI->getCalledFunction())) {
+ // A single-block function with just a call and a return. Check that
+ // the arguments match.
+ CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+ E = CallSite(CI).arg_end();
+ Function::arg_iterator FI = F->arg_begin(),
+ FE = F->arg_end();
+ for (; I != E && FI != FE; ++I, ++FI)
+ if (*I != &*FI) break;
+ if (I == E && FI == FE)
+ return 0;
+ }
+
+ return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ // If we are introducing accumulator recursion to eliminate operations after
+ // the call instruction that are both associative and commutative, the initial
+ // value for the accumulator is placed in this variable. If this value is set
+ // then we actually perform accumulator recursion elimination instead of
+ // simple tail recursion elimination. If the operation is an LLVM instruction
+ // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
+ // we are handling the case when the return instruction returns a constant C
+ // which is different to the constant returned by other return instructions
+ // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
+ // special case of accumulator recursion, the operation being "return C".
+ Value *AccumulatorRecursionEliminationInitVal = 0;
+ Instruction *AccumulatorRecursionInstr = 0;
+
+ // Ok, we found a potential tail call. We can currently only transform the
+ // tail call if all of the instructions between the call and the return are
+ // movable to above the call itself, leaving the call next to the return.
+ // Check that this is the case now.
+ BasicBlock::iterator BBI = CI;
+ for (++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be transformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
+ }
+ }
+
+ // We can only transform call/return pairs that either ignore the return value
+ // of the call and return void, ignore the value of the call and return a
+ // constant, return the value returned by the tail call, or that are being
+ // accumulator recursion variable eliminated.
+ if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+ !isa<UndefValue>(Ret->getReturnValue()) &&
+ AccumulatorRecursionEliminationInitVal == 0 &&
+ !getCommonReturnValue(0, CI)) {
+ // One case remains that we are able to handle: the current return
+ // instruction returns a constant, and all other return instructions
+ // return a different constant.
+ if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+ return false; // Current return instruction does not return a constant.
+ // Check that all other return instructions return a common constant. If
+ // so, record it in AccumulatorRecursionEliminationInitVal.
+ AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+ if (!AccumulatorRecursionEliminationInitVal)
+ return false;
+ }
+
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
+ // OK! We can transform this tail call. If this is the first one found,
+ // create the new entry block, allowing us to branch back to the old entry.
+ if (OldEntry == 0) {
+ OldEntry = &F->getEntryBlock();
+ BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
+ NewEntry->takeName(OldEntry);
+ OldEntry->setName("tailrecurse");
+ BranchInst::Create(OldEntry, NewEntry);
+
+ // If this tail call is marked 'tail' and if there are any allocas in the
+ // entry block, move them up to the new entry block.
+ TailCallsAreMarkedTail = CI->isTailCall();
+ if (TailCallsAreMarkedTail)
+ // Move all fixed sized allocas from OldEntry to NewEntry.
+ for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+ NEBI = NewEntry->begin(); OEBI != E; )
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ AI->moveBefore(NEBI);
+
+ // Now that we have created a new block, which jumps to the entry
+ // block, insert a PHI node for each argument of the function.
+ // For now, we initialize each PHI to only have the real arguments
+ // which are passed in.
+ Instruction *InsertPos = OldEntry->begin();
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ PHINode *PN = PHINode::Create(I->getType(), 2,
+ I->getName() + ".tr", InsertPos);
+ I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+ PN->addIncoming(I, NewEntry);
+ ArgumentPHIs.push_back(PN);
+ }
+ }
+
+ // If this function has self recursive calls in the tail position where some
+ // are marked tail and some are not, only transform one flavor or another. We
+ // have to choose whether we move allocas in the entry block to the new entry
+ // block or not, so we can't make a good choice for both. NOTE: We could do
+ // slightly better here in the case that the function has no entry block
+ // allocas.
+ if (TailCallsAreMarkedTail && !CI->isTailCall())
+ return false;
+
+ // Ok, now that we know we have a pseudo-entry block WITH all of the
+ // required PHI nodes, add entries into the PHI node for the actual
+ // parameters passed into the tail-recursive call.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
+
+ // If we are introducing an accumulator variable to eliminate the recursion,
+ // do so now. Note that we _know_ that no subsequent tail recursion
+ // eliminations will happen on this function because of the way the
+ // accumulator recursion predicate is set up.
+ //
+ if (AccumulatorRecursionEliminationInitVal) {
+ Instruction *AccRecInstr = AccumulatorRecursionInstr;
+ // Start by inserting a new PHI node for the accumulator.
+ pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
+ PHINode *AccPN =
+ PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1,
+ "accumulator.tr", OldEntry->begin());
+
+ // Loop over all of the predecessors of the tail recursion block. For the
+ // real entry into the function we seed the PHI with the initial value,
+ // computed earlier. For any other existing branches to this block (due to
+ // other tail recursions eliminated) the accumulator is not modified.
+ // Because we haven't added the branch in the current block to OldEntry yet,
+ // it will not show up as a predecessor.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
+ else
+ AccPN->addIncoming(AccPN, P);
+ }
+
+ if (AccRecInstr) {
+ // Add an incoming argument for the current block, which is computed by
+ // our associative and commutative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ } else {
+ // Add an incoming argument for the current block, which is just the
+ // constant returned by the current return instruction.
+ AccPN->addIncoming(Ret->getReturnValue(), BB);
+ }
+
+ // Finally, rewrite any return instructions in the program to return the PHI
+ // node instead of the "initval" that they do currently. This loop will
+ // actually rewrite the return value we are destroying, but that's ok.
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ RI->setOperand(0, AccPN);
+ ++NumAccumAdded;
+ }
+
+ // Now that all of the PHI nodes are in place, remove the call and
+ // ret instructions, replacing them with an unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+ NewBI->setDebugLoc(CI->getDebugLoc());
+
+ BB->getInstList().erase(Ret); // Remove return.
+ BB->getInstList().erase(CI); // Remove call.
+ ++NumEliminated;
+ return true;
+}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ bool Change = false;
+
+ // If the return block contains nothing but the return and PHI's,
+ // there might be an opportunity to duplicate the return in its
+ // predecessors and perform TRC there. Look for predecessors that end
+ // in unconditional branch and recursive call(s).
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PTI = Pred->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
+ }
+ }
+
+ return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+ if (!CI)
+ return false;
+
+ return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 000000000000..ba99d2e662e4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,728 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
+/// any single-entry PHI nodes in it, fold them away. This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ AliasAnalysis *AA = 0;
+ MemoryDependenceAnalysis *MemDep = 0;
+ if (P) {
+ AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+ MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+ else if (AA && isa<PointerType>(PN->getType()))
+ AA->deleteValue(PN);
+
+ PN->eraseFromParent();
+ }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
+
+ return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible. The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break invokes.
+ if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock *OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, P);
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (P) {
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ DE = Children.end(); DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ LI->removeBlock(BB);
+
+ if (MemoryDependenceAnalysis *MD =
+ P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+ MD->invalidateCachedPredecessors();
+ }
+ }
+
+ BB->eraseFromParent();
+ return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I. The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == 0 &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+ unsigned e = Term->getNumSuccessors();
+#endif
+ for (unsigned i = 0; ; ++i) {
+ assert(i != e && "Didn't find edge?");
+ if (Term->getSuccessor(i) == Succ)
+ return i;
+ }
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ BasicBlock::iterator SplitPoint;
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = NULL;
+ return SplitBlock(Succ, Succ->begin(), P);
+ }
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block. The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+ BasicBlock::iterator SplitIt = SplitPt;
+ while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+
+ return New;
+}
+
+/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
+/// analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ Pass *P, bool &HasLoopExit) {
+ if (!P) return;
+
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ if (LI) {
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L) continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+ }
+
+ // Update dominator tree if available.
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ if (!L) return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+}
+
+/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
+/// from NewBB. This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock*> Preds, BranchInst *BI,
+ Pass *P, bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = 0;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 1, e = Preds.size(); i != e; ++i)
+ if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ PN->removeIncomingValue(Preds[i], false);
+ } else {
+ // If the values coming into the block are not the same, we need a PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+ if (AA) AA->copyValue(PN, NewPHI);
+
+ // Move all of the PHI values for 'Preds' to the new PHI.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ Value *V = PN->removeIncomingValue(Preds[i], false);
+ NewPHI->addIncoming(V, Preds[i]);
+ }
+
+ InVal = NewPHI;
+ }
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ }
+}
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (Preds.size() == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
+ return NewBB;
+}
+
+/// SplitLandingPadPredecessors - This method transforms the landing pad,
+/// OrigBB, by introducing two new basic blocks into the function. One of those
+/// new basic blocks gets the predecessors listed in Preds. The other basic
+/// block gets the remaining predecessors of OrigBB. The landingpad instruction
+/// OrigBB is clone into both of the new basic blocks. The new blocks are given
+/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
+/// it does not preserve LoopSimplify (because it's complicated to handle the
+/// case where one of the edges being split is an exit of a loop with other
+/// exits).
+///
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix1, const char *Suffix2,
+ Pass *P,
+ SmallVectorImpl<BasicBlock*> &NewBBs) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = 0;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
+ (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+ // Create a PHI node for the two cloned landingpad instructions.
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them. This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+ SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+ const BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+
+ SmallPtrSet<const BasicBlock*, 8> Visited;
+ SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+ SmallPtrSet<const BasicBlock*, 8> InStack;
+
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+ const BasicBlock *ParentBB = Top.first;
+ succ_const_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ Result.push_back(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ InStack.erase(VisitStack.pop_back_val().first);
+ }
+ } while (!VisitStack.empty());
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet, NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
+}
+
+/// SplitBlockAndInsertIfThen - Split the containing block at the
+/// specified instruction - everything before and including Cmp stays
+/// in the old basic block, and everything after Cmp is moved to a
+/// new block. The two blocks are connected by a conditional branch
+/// (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// Cmp
+/// Tail
+/// After:
+/// Head
+/// Cmp
+/// if (Cmp)
+/// ThenBlock
+/// Tail
+///
+/// If Unreachable is true, then ThenBlock ends with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
+ bool Unreachable, MDNode *BranchWeights) {
+ Instruction *SplitBefore = Cmp->getNextNode();
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+ return CheckTerm;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 000000000000..8513772da2e8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,409 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ProfileInfo>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, this)) {
+ ++NumBroken;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+ bool AllowIdenticalEdges) {
+ assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ if (TI->getNumSuccessors() == 1) return false;
+
+ const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+ // If there is more than one predecessor, this is a critical edge...
+ assert(I != E && "No preds, but we have an edge to the block?");
+ const BasicBlock *FirstPred = *I;
+ ++I; // Skip one edge due to the incoming arc from TI.
+ if (!AllowIdenticalEdges)
+ return I != E;
+
+ // If AllowIdenticalEdges is true, then we allow this edge to be considered
+ // non-critical iff all preds come from TI's block.
+ while (I != E) {
+ const BasicBlock *P = *I;
+ if (P != FirstPred)
+ return true;
+ // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+ // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+ E = pred_end(P);
+ ++I;
+ }
+ return false;
+}
+
+/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
+ BasicBlock *SplitBB,
+ BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+ SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block.
+ for (BasicBlock::iterator I = DestBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+ Value *V = PN->getIncomingValue(Idx);
+
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN =
+ PHINode::Create(PN->getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ?
+ SplitBB->begin() : SplitBB->getTerminator());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ NewPN->addIncoming(V, Preds[i]);
+
+ // Update the original PHI.
+ PN->setIncomingValue(Idx, NewPN);
+ }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge. This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
+/// have many edges to any one destination. This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst. Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ Pass *P, bool MergeIdenticalEdges,
+ bool DontDeleteUselessPhis,
+ bool SplitLandingPads) {
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isLandingPad()) return 0;
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+ TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+ NewBI->setDebugLoc(TI->getDebugLoc());
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB;
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ {
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+
+
+ // If we don't have a pass object, we can't update anything...
+ if (P == 0) return NewBB;
+
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+
+ // If we have nothing to update, just return.
+ if (DT == 0 && LI == 0 && PI == 0)
+ return NewBB;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
+ }
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DT) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = 0;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+ }
+ // If TIBB is in a loop and DestBB is outside of that loop, split the
+ // other exit blocks of the loop that also have predecessors outside
+ // the loop, to maintain a LoopSimplify guarantee.
+ if (!TIL->contains(DestBB) &&
+ P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+
+ // For each unique exit block...
+ // FIXME: This code is functionally equivalent to the corresponding
+ // loop in LoopSimplify.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ TIL->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ // Collect all the preds that are inside the loop, and note
+ // whether there are any preds outside the loop.
+ SmallVector<BasicBlock *, 4> Preds;
+ bool HasPredOutsideOfLoop = false;
+ BasicBlock *Exit = ExitBlocks[i];
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (TIL->contains(P)) {
+ if (isa<IndirectBrInst>(P->getTerminator())) {
+ Preds.clear();
+ break;
+ }
+ Preds.push_back(P);
+ } else {
+ HasPredOutsideOfLoop = true;
+ }
+ }
+ // If there are any preds not in the loop, we'll need to split
+ // the edges. The Preds.empty() check is needed because a block
+ // may appear multiple times in the list. We can't use
+ // getUniqueExitBlocks above because that depends on LoopSimplify
+ // form, which we're in the process of restoring!
+ if (!Preds.empty() && HasPredOutsideOfLoop) {
+ if (!Exit->isLandingPad()) {
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(Exit, Preds, "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+ } else if (SplitLandingPads) {
+ SmallVector<BasicBlock*, 8> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds,
+ ".split1", ".split2",
+ P, NewBBs);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
+ }
+ }
+ }
+ }
+ // LCSSA form was updated above for the case where LoopSimplify is
+ // available, which means that all predecessors of loop exit blocks
+ // are within the loop. Without LoopSimplify form, it would be
+ // necessary to insert a new phi.
+ assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+ P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+ "SplitCriticalEdge doesn't know how to update LCCSA form "
+ "without LoopSimplify!");
+ }
+ }
+
+ // Update ProfileInfo if it is around.
+ if (PI)
+ PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..6d13217df55d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,597 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen",
+ AttributeSet::get(M->getContext(),
+ AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer. Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strnlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrNLen = M->getOrInsertFunction("strnlen",
+ AttributeSet::get(M->getContext(),
+ AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strchr))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AttributeSet AS =
+ AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr = M->getOrInsertFunction("strchr",
+ AttributeSet::get(M->getContext(),
+ AS),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strncmp))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp",
+ AttributeSet::get(M->getContext(),
+ AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+ CastToCStr(Ptr2, B), Len, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ if (!TLI->has(LibFunc::strcpy))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction(Name,
+ AttributeSet::get(M->getContext(), AS),
+ I8Ptr, I8Ptr, I8Ptr, NULL);
+ CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Name);
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc::strncpy))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrNCpy = M->getOrInsertFunction(Name,
+ AttributeSet::get(M->getContext(),
+ AS),
+ I8Ptr, I8Ptr, I8Ptr,
+ Len->getType(), NULL);
+ CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Len, "strncpy");
+ if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcpy_chk))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS;
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context), NULL);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memchr))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS;
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt32Ty(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcmp))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttributeSet &Attrs) {
+ SmallString<20> NameBuffer;
+ if (!Op->getType()->isDoubleTy()) {
+ // If we need to add a suffix, copy into NameBuffer.
+ NameBuffer += Name;
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f'; // floorf
+ else
+ NameBuffer += 'l'; // floorl
+ Name = NameBuffer;
+ }
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::putchar))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::puts))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+
+ Value *PutS = M->getOrInsertFunction("puts",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputc))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputc",
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt32Ty(), File->getType(),
+ NULL);
+ else
+ F = M->getOrInsertFunction("fputc",
+ B.getInt32Ty(),
+ B.getInt32Ty(),
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputs))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ StringRef FPutsName = TLI->getName(LibFunc::fputs);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction(FPutsName,
+ AttributeSet::get(M->getContext(), AS),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fwrite))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteName = TLI->getName(LibFunc::fwrite);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction(FWriteName,
+ AttributeSet::get(M->getContext(), AS),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ // We really need DataLayout for later.
+ if (!TD) return false;
+
+ this->CI = CI;
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+ IRBuilder<> B(CI);
+
+ if (Name == "__memcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ // Should be similar to memcpy.
+ if (Name == "__mempcpy_chk") {
+ return false;
+ }
+
+ if (Name == "__memmove_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__memset_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+ TLI, Name.substr(2, 6));
+ if (!Ret)
+ return false;
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ if (!Ret)
+ return false;
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcat_chk") {
+ return false;
+ }
+
+ if (Name == "__strncat_chk") {
+ return false;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 000000000000..1f517d038d19
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,262 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bypass-slow-division"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace {
+ struct DivOpInfo {
+ bool SignedOp;
+ Value *Dividend;
+ Value *Divisor;
+
+ DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+ };
+
+ struct DivPhiNodes {
+ PHINode *Quotient;
+ PHINode *Remainder;
+
+ DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<DivOpInfo> {
+ static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+ return Val1.SignedOp == Val2.SignedOp &&
+ Val1.Dividend == Val2.Dividend &&
+ Val1.Divisor == Val2.Divisor;
+ }
+
+ static DivOpInfo getEmptyKey() {
+ return DivOpInfo(false, 0, 0);
+ }
+
+ static DivOpInfo getTombstoneKey() {
+ return DivOpInfo(true, 0, 0);
+ }
+
+ static unsigned getHashValue(const DivOpInfo &Val) {
+ return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+ reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ (unsigned)Val.SignedOp;
+ }
+ };
+
+ typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+}
+
+// insertFastDiv - Substitutes the div/rem instruction with code that checks the
+// value of the operands and uses a shorter-faster div/rem instruction when
+// possible and the longer-slower div/rem instruction otherwise.
+static bool insertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ Value *Dividend = Instr->getOperand(0);
+ Value *Divisor = Instr->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor) ||
+ (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
+ // Operations with immediate values should have
+ // been solved and replaced during compile time.
+ return false;
+ }
+
+ // Basic Block is split before divide
+ BasicBlock *MainBB = I;
+ BasicBlock *SuccessorBB = I->splitBasicBlock(J);
+ ++I; //advance iterator I to successorBB
+
+ // Add new basic block for slow divide operation
+ BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ SlowBB->moveBefore(SuccessorBB);
+ IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
+ Value *SlowQuotientV;
+ Value *SlowRemainderV;
+ if (UseSignedOp) {
+ SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ } else {
+ SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ }
+ SlowBuilder.CreateBr(SuccessorBB);
+
+ // Add new basic block for fast divide operation
+ BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ FastBB->moveBefore(SlowBB);
+ IRBuilder<> FastBuilder(FastBB, FastBB->begin());
+ Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
+ BypassType);
+ Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
+ BypassType);
+
+ // udiv/urem because optimization only handles positive numbers
+ Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
+ ShortDivisorV);
+ Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
+ ShortDivisorV);
+ Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortQuotientV,
+ Dividend->getType());
+ Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortRemainderV,
+ Dividend->getType());
+ FastBuilder.CreateBr(SuccessorBB);
+
+ // Phi nodes for result of div and rem
+ IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ QuoPhi->addIncoming(SlowQuotientV, SlowBB);
+ QuoPhi->addIncoming(FastQuotientV, FastBB);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ RemPhi->addIncoming(SlowRemainderV, SlowBB);
+ RemPhi->addIncoming(FastRemainderV, FastBB);
+
+ // Replace Instr with appropriate phi node
+ if (UseDivOp)
+ Instr->replaceAllUsesWith(QuoPhi);
+ else
+ Instr->replaceAllUsesWith(RemPhi);
+ Instr->eraseFromParent();
+
+ // Combine operands into a single value with OR for value testing below
+ MainBB->getInstList().back().eraseFromParent();
+ IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values and branch
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
+ Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
+ MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
+
+ // point iterator J at first instruction of successorBB
+ J = I->begin();
+
+ // Cache phi nodes to be used later in place of other instances
+ // of div or rem with the same sign, dividend, and divisor
+ DivOpInfo Key(UseSignedOp, Dividend, Divisor);
+ DivPhiNodes Value(QuoPhi, RemPhi);
+ PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
+ return true;
+}
+
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
+// operands and operation are identical. Otherwise call insertFastDiv to perform
+// the optimization and cache the resulting dividend and remainder.
+static bool reuseOrInsertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
+
+ if (CacheI == PerBBDivCache.end()) {
+ // If previous instance does not exist, insert fast div
+ return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
+ PerBBDivCache);
+ }
+
+ // Replace operation value with previously generated phi node
+ DivPhiNodes &Value = CacheI->second;
+ if (UseDivOp) {
+ // Replace all uses of div instruction with quotient phi node
+ J->replaceAllUsesWith(Value.Quotient);
+ } else {
+ // Replace all uses of rem instruction with remainder phi node
+ J->replaceAllUsesWith(Value.Remainder);
+ }
+
+ // Advance to next operation
+ ++J;
+
+ // Remove redundant operation
+ Instr->eraseFromParent();
+ return true;
+}
+
+// bypassSlowDivision - This optimization identifies DIV instructions that can
+// be profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(Function &F,
+ Function::iterator &I,
+ const DenseMap<unsigned int, unsigned int> &BypassWidths) {
+ DivCacheTy DivCache;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+
+ // Get instruction details
+ unsigned Opcode = J->getOpcode();
+ bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
+ bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
+ bool UseSignedOp = Opcode == Instruction::SDiv ||
+ Opcode == Instruction::SRem;
+
+ // Only optimize div or rem ops
+ if (!UseDivOp && !UseRemOp)
+ continue;
+
+ // Skip division on vector types, only optimize integer instructions
+ if (!J->getType()->isIntegerTy())
+ continue;
+
+ // Get bitwidth of div/rem instruction
+ IntegerType *T = cast<IntegerType>(J->getType());
+ unsigned int bitwidth = T->getBitWidth();
+
+ // Continue if bitwidth is not bypassed
+ DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
+ if (BI == BypassWidths.end())
+ continue;
+
+ // Get type for div/rem instruction with bypass bitwidth
+ IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
+
+ MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
+ UseSignedOp, DivCache);
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 000000000000..63d7a1d52aa5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,576 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+ ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo) {
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+ ValueMapTypeRemapper *TypeMapper) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+ // Clone any attributes.
+ if (NewFunc->arg_size() == OldFunc->arg_size())
+ NewFunc->copyAttributesFrom(OldFunc);
+ else {
+ //Some arguments were deleted with the VMap. Copy arguments one by one
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs = OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldFunc->getAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldFunc->getAttributes()));
+
+ }
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module. Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I->getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) { // Is this argument preserved?
+ DestI->setName(I->getName()); // Copy the name over...
+ VMap[I] = DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+ return NewF;
+}
+
+
+
+namespace {
+ /// PruningFunctionCloner - This class is a private class used to implement
+ /// the CloneAndPruneFunctionInto method.
+ struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ const DataLayout *TD;
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
+ const char *nameSuffix,
+ ClonedCodeInfo *codeInfo,
+ const DataLayout *td)
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ }
+
+ /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone);
+ };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone){
+ WeakVH &BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG.
+ if (!isa<PHINode>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ VMap[II] = V;
+ delete NewInst;
+ continue;
+ }
+ }
+
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ VMap[II] = NewInst; // Add instruction map to value.
+ NewBB->getInstList().push_back(NewInst);
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+ if (Cond) { // Constant fold to uncond branch!
+ SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const DataLayout *TD,
+ Instruction *TheCall) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ NameSuffix, CodeInfo, TD);
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode*, 16> PHIToResolve;
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+ if (NewBB == 0) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
+ if (const PHINode *PN = dyn_cast<PHINode>(I))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred),
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred, --e; // Revisit the next entry.
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+ E = PredCount.end(); PCI != E; ++PCI) {
+ BasicBlock *Pred = PCI->first;
+ for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ }
+
+ // Make a second pass over the PHINodes now that all of them have been
+ // remapped into the new function, simplifying the PHINode and performing any
+ // recursive simplifications exposed. This will transparently update the
+ // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+ // two PHINodes, the iteration over the old PHIs remains valid, and the
+ // mapping will just map us to the new node (which may not even be a PHI
+ // node).
+ for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+ if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+ recursivelySimplifyInstruction(PN, TD);
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happen all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator I = Begin;
+ while (I != NewFunc->end()) {
+ // Check if this block has become dead during inlining or other
+ // simplifications. Note that the first block will appear dead, as it has
+ // not yet been wired up properly.
+ if (I != Begin && (pred_begin(I) == pred_end(I) ||
+ I->getSinglePredecessor() == I)) {
+ BasicBlock *DeadBB = I++;
+ DeleteDeadBlock(DeadBB);
+ continue;
+ }
+
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ ConstantFoldTerminator(I);
+
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor()) {
+ ++I; continue;
+ }
+
+ // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+ // above should have zapped all of them..
+ assert(!isa<PHINode>(Dest->begin()));
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(I);
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+
+ // Make a final pass over the basic blocks from theh old function to gather
+ // any return instructions which survived folding. We have to do this here
+ // because we can iteratively remove and merge returns above.
+ for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+ E = NewFunc->end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+ Returns.push_back(RI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 000000000000..64df089e1b81
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,122 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module. This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+ // First off, we need to create the new module.
+ Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(*New,
+ I->getType()->getElementType(),
+ I->isConstant(), I->getLinkage(),
+ (Constant*) 0, I->getName(),
+ (GlobalVariable*) 0,
+ I->getThreadLocalMode(),
+ I->getType()->getAddressSpace());
+ GV->copyAttributesFrom(I);
+ VMap[I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ I->getLinkage(), I->getName(), New);
+ NF->copyAttributesFrom(I);
+ VMap[I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
+ I->getName(), NULL, New);
+ GA->copyAttributesFrom(I);
+ VMap[I] = GA;
+ }
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ if (I->hasInitializer())
+ GV->setInitializer(MapValue(I->getInitializer(), VMap));
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *F = cast<Function>(VMap[I]);
+ if (!I->isDeclaration()) {
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[J] = DestI++;
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ if (const Constant *C = I->getAliasee())
+ GA->setAliasee(MapValue(C, VMap));
+ }
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+ }
+
+ return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..8fa412a18b99
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -0,0 +1,96 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+ ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+ : ICI->getPredicate();
+ switch (Pred) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or the
+/// predicate for a new ICmp instruction. The sign is passed in to determine
+/// which kind of predicate to use in the new icmp instruction.
+/// Non-NULL return value will be a true or false constant.
+/// NULL return means a new ICmp is needed. The predicate for which is
+/// output in NewICmpPred.
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ CmpInst::Predicate &NewICmpPred) {
+ switch (Code) {
+ default: llvm_unreachable("Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+ case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+ case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return NULL;
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 000000000000..f7c659f2193b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,780 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+/// \brief Test whether a block is valid for extraction.
+static bool isBlockValidForExtraction(const BasicBlock &BB) {
+ // Landing pads must be in the function where they were inserted for cleanup.
+ if (BB.isLandingPad())
+ return false;
+
+ // Don't hoist code containing allocas, invokes, or vastarts.
+ for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
+ return false;
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Build a set of blocks to extract if the input blocks are viable.
+template <typename IteratorT>
+static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
+ IteratorT BBEnd) {
+ SetVector<BasicBlock *> Result;
+
+ assert(BBBegin != BBEnd);
+
+ // Loop over the blocks, adding them to our set-vector, and aborting with an
+ // empty set if we encounter invalid blocks.
+ for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) {
+ if (!Result.insert(*I))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+
+ if (!isBlockValidForExtraction(**I)) {
+ Result.clear();
+ return Result;
+ }
+ }
+
+#ifndef NDEBUG
+ for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()),
+ E = Result.end();
+ I != E; ++I)
+ for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
+ PI != PE; ++PI)
+ assert(Result.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+#endif
+
+ return Result;
+}
+
+/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
+ return buildExtractionBlockSet(BBs.begin(), BBs.end());
+}
+
+/// \brief Helper to call buildExtractionBlockSet with a RegionNode.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(const RegionNode &RN) {
+ if (!RN.isSubRegion())
+ // Just a single BasicBlock.
+ return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>());
+
+ const Region &R = *RN.getNodeAs<Region>();
+
+ return buildExtractionBlockSet(R.block_begin(), R.block_end());
+}
+
+CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
+ : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AggregateArgs)
+ : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs)
+ : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
+ bool AggregateArgs)
+ : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
+
+/// definedInRegion - Return true if the specified value is defined in the
+/// extracted region.
+static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+/// definedInCaller - Return true if the specified value is defined in the
+/// function being code extracted, but not in the region being extracted.
+/// These values must be passed in as live-ins to the function.
+static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
+ ValueSet &Outputs) const {
+ for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(),
+ E = Blocks.end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ for (User::op_iterator OI = II->op_begin(), OE = II->op_end();
+ OI != OE; ++OI)
+ if (definedInCaller(Blocks, *OI))
+ Inputs.insert(*OI);
+
+ for (Value::use_iterator UI = II->use_begin(), UE = II->use_end();
+ UI != UE; ++UI)
+ if (!definedInRegion(Blocks, *UI)) {
+ Outputs.insert(II);
+ break;
+ }
+ }
+ }
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ unsigned NumPredsFromRegion = 0;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i)))
+ ++NumPredsFromRegion;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+ Header->getName()+".ce");
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ Blocks.remove(OldPred);
+ Blocks.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, update dominator sets. The blocks that dominate the new one are the
+ // blocks that dominate TIBB plus the new block itself.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (NumPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everything within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName()+".ce", NewBB->begin());
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+ BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ if (DT) {
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
+ DomTreeNode *OldNode = DT->getNode(*I);
+ SmallVector<DomTreeNode*, 8> Children;
+ for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+ DI != DE; ++DI)
+ Children.push_back(*DI);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+ for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const ValueSet &inputs,
+ const ValueSet &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end();
+ i != e; ++i) {
+ const Value *value = *i;
+ DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+ if (AggregateArgs)
+ paramTy.push_back((*I)->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ }
+
+ DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+ for (std::vector<Type*>::iterator i = paramTy.begin(),
+ e = paramTy.end(); i != e; ++i)
+ DEBUG(dbgs() << **i << ", ");
+ DEBUG(dbgs() << ")\n");
+
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ PointerType *StructPtr =
+ PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ paramTy.clear();
+ paramTy.push_back(StructPtr);
+ }
+ FunctionType *funcType =
+ FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow();
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ } else
+ RewriteVal = AI++;
+
+ std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+ use != useE; ++use)
+ if (Instruction* inst = dyn_cast<Instruction>(*use))
+ if (Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->use_begin(), header->use_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!Blocks.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+ for (Value::use_iterator UI = Used->use_begin(),
+ UE = Used->use_end(); UI != UE; ++UI) {
+ PHINode *P = dyn_cast<PHINode>(*UI);
+ if (P && P->getParent() == BB)
+ return P->getIncomingBlock(UI);
+ }
+
+ return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ ValueSet &inputs, ValueSet &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+
+ LLVMContext &Context = newFunction->getContext();
+
+ // Add inputs as params, or to be filled into the struct
+ for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ if (AggregateArgs)
+ StructValues.push_back(*i);
+ else
+ params.push_back(*i);
+
+ // Create allocas for the outputs
+ for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ if (AggregateArgs) {
+ StructValues.push_back(*i);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ codeReplacer->getParent()->begin()->begin());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ AllocaInst *Struct = 0;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<Type*> ArgTypes;
+ for (ValueSet::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct =
+ new AllocaInst(StructArgTy, 0, "structArg",
+ codeReplacer->getParent()->begin()->begin());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Struct, Idx,
+ "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = 0;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP
+ = GetElementPtrInst::Create(Struct, Idx,
+ "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ Reloads.push_back(load);
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
+ e = Blocks.end(); i != e; ++i) {
+ TerminatorInst *TI = (*i)->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!Blocks.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = 0;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+ DefBlock = Invoke->getNormalDest();
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+ ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+ if (DefBlock == I->second) {
+ DefBlock = I->first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT) {
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ // If the output value is used by a phi in the target block,
+ // then we need to test for dominance of the phi's predecessor
+ // instead. Unfortunately, this a little complicated since we
+ // have already rewritten uses of the value to uses of the reload.
+ BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
+ OldTarget);
+ if (pred && DT && DT->dominates(DefBlock, pred))
+ DominatesDef = true;
+ }
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+ FirstOut+out);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(OAI, Idx,
+ "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setCondition(call);
+ TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+ // Remove redundant case
+ SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
+ TheSwitch->removeCase(ToBeRemoved);
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*Blocks.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
+ e = Blocks.end(); i != e; ++i) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(*i);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(*i);
+ }
+}
+
+Function *CodeExtractor::extractCodeRegion() {
+ if (!isEligible())
+ return 0;
+
+ ValueSet inputs, outputs;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = *Blocks.begin();
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs);
+
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I)
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ if (!Blocks.count(*SI))
+ ExitBlocks.insert(*SI);
+ NumExitBlocks = ExitBlocks.size();
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!Blocks.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ //cerr << "NEW FUNCTION: " << *newFunction;
+ // verifyFunction(*newFunction);
+
+ // cerr << "OLD FUNCTION: " << *oldFunction;
+ // verifyFunction(*oldFunction);
+
+ DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
+ return newFunction;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 000000000000..db525cdc24d8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,148 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), 0,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = I.getParent()->getParent();
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Change all of the users of the instruction to read from the stack slot.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot have multiple loads. The problem is that the
+ // resulting PHI node will have multiple values (from each load) coming in
+ // from the same block, which is illegal SSA form. For this reason, we
+ // keep track of and reuse loads we insert.
+ DenseMap<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (V == 0) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful if I is an invoke instruction, because we can't insert the store
+ // AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = &I;
+ ++InsertPt;
+ } else {
+ InvokeInst &II = cast<InvokeInst>(I);
+ if (II.getNormalDest()->getSinglePredecessor())
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
+ else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and insert the store
+ // in the newly created basic block.
+ unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
+ TerminatorInst *TI = &cast<TerminatorInst>(I);
+ assert (isCriticalEdge(TI, SuccNum) &&
+ "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
+ assert (BB && "Unable to split critical edge.");
+ InsertPt = BB->getFirstInsertionPt();
+ }
+ }
+
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+
+ new StoreInst(&I, Slot, InsertPt);
+ return Slot;
+}
+
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), 0,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Iterate over each operand inserting a store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); (void)II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert a load in place of the PHI and replace all uses.
+ BasicBlock::iterator InsertPt = P;
+
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+
+ Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
+ P->replaceAllUsesWith(V);
+
+ // Delete PHI.
+ P->eraseFromParent();
+ return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 000000000000..e9828d60cd55
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,916 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, InsertLifetime);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, InsertLifetime);
+}
+
+namespace {
+ /// A class for recording information about inlining through an invoke.
+ class InvokeInliningInfo {
+ BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
+ BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
+ LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
+ PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts.
+ SmallVector<Value*, 8> UnwindDestPHIValues;
+
+ public:
+ InvokeInliningInfo(InvokeInst *II)
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+ CallerLPad(0), InnerEHValuesPHI(0) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (; isa<PHINode>(I); ++I) {
+ // Save the value to use for this edge.
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ CallerLPad = cast<LandingPadInst>(I);
+ }
+
+ /// getOuterResumeDest - The outer unwind destination is the target of
+ /// unwind edges introduced for calls within the inlined function.
+ BasicBlock *getOuterResumeDest() const {
+ return OuterResumeDest;
+ }
+
+ BasicBlock *getInnerResumeDest();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
+ /// forwardResume - Forward the 'resume' instruction to the caller's landing
+ /// pad block. When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
+
+ /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
+ /// destination block for the given basic block, using the values for the
+ /// original invoke's source block.
+ void addIncomingPHIValuesFor(BasicBlock *BB) const {
+ addIncomingPHIValuesForInto(BB, OuterResumeDest);
+ }
+
+ void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+ BasicBlock::iterator I = dest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *phi = cast<PHINode>(I);
+ phi->addIncoming(UnwindDestPHIValues[i], src);
+ }
+ }
+ };
+}
+
+/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
+/// block. When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
+ BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ }
+}
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+/// Returns true to indicate that the next block should be skipped.
+static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+ InvokeInliningInfo &Invoke) {
+ LandingPadInst *LPI = Invoke.getLandingPadInst();
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+ unsigned NumClauses = LPI->getNumClauses();
+ L->reserveClauses(NumClauses);
+ for (unsigned i = 0; i != NumClauses; ++i)
+ L->addClause(LPI->getClause(i));
+ }
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(I);
+
+ // If this call cannot unwind, don't convert it to an invoke.
+ if (!CI || CI->doesNotThrow())
+ continue;
+
+ // Convert this function call into an invoke instruction. First, split the
+ // basic block.
+ BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+
+ // Create the new invoke instruction.
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
+ Invoke.getOuterResumeDest(),
+ InvokeArgs, CI->getName(), BB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakVH.
+ CI->replaceAllUsesWith(II);
+
+ // Delete the original call
+ Split->getInstList().pop_front();
+
+ // Update any PHI nodes in the exceptional block to indicate that there is
+ // now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(BB);
+ return false;
+ }
+
+ return false;
+}
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite.
+ InvokeInliningInfo Invoke(II);
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ if (InlinedCodeInfo.ContainsCalls)
+ if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
+ // Honor a request to skip the next block.
+ ++BB;
+ continue;
+ }
+
+ // Forward any resumes that are remaining here.
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
+ Invoke.forwardResume(RI, InlinedLPads);
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made. Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ ValueToValueMapTy &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Value *OrigCall = I->first;
+
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == 0)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ if (NewCall == 0) continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (I->second->getFunction() == 0)
+ if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+ IFI.TD) >= ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ LLVMContext &Context = Arg->getContext();
+
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have DataLayout, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD)
+ Align = IFI.TD->getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Function *Caller = TheCall->getParent()->getParent();
+
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
+// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
+// intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+ ++UI) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// hasLifetimeMarkers - Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+ Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+ if (AI->getType() == Int8PtrTy)
+ return isUsedByLifetimeMarker(AI);
+
+ // Do a scan to find all the casts to i8*.
+ for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
+ ++I) {
+ if (I->getType() != Int8PtrTy) continue;
+ if (I->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(*I))
+ return true;
+ }
+ return false;
+}
+
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
+/// recursively update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
+ const DebugLoc &InlinedAtDL,
+ LLVMContext &Ctx) {
+ if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+ DebugLoc NewInlinedAtDL
+ = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ NewInlinedAtDL.getAsMDNode(Ctx));
+ }
+
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ InlinedAtDL.getAsMDNode(Ctx));
+}
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+ Instruction *TheCall) {
+ DebugLoc TheCallDL = TheCall->getDebugLoc();
+ if (TheCallDL.isUnknown())
+ return;
+
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ DebugLoc DL = BI->getDebugLoc();
+ if (!DL.isUnknown()) {
+ BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+ LLVMContext &Ctx = BI->getContext();
+ MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
+ DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
+ InlinedAt, Ctx));
+ }
+ }
+ }
+ }
+}
+
+/// InlineFunction - This function inlines the called function into the basic
+/// block of the caller. This returns false if it is not possible to inline
+/// this call. The program is still in a well defined state if this occurs
+/// though.
+///
+/// Note that this only does one level of inlining. For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream. Similarly this will inline a recursive
+/// function by one level.
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ Instruction *TheCall = CS.getInstruction();
+ assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+ "Instruction not in function!");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ if (CalledFunc == 0 || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+ // If the call to the callee is not a tail call, we must clear the 'tail'
+ // flags on any calls that we inline.
+ bool MustClearTailCallFlags =
+ !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ const LandingPadInst *LP = BB->getLandingPadInst();
+ CalleePersonality = LP->getPersonalityFn();
+ break;
+ }
+
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality) {
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ const LandingPadInst *LP = BB->getLandingPadInst();
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ Function::iterator LastBlock = &Caller->back();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueToValueMapTy VMap;
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CS.isByValArgument(ArgNo)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo+1));
+
+ // Calls that we inline may use the new alloca, so we need to clear
+ // their 'tail' flags if HandleByValArgument introduced a new alloca and
+ // the callee has calls.
+ MustClearTailCallFlags |= ActualArg != *AI;
+ }
+
+ VMap[I] = ActualArg;
+ }
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo, IFI.TD, TheCall);
+
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+ // Update inlined instructions' line number information.
+ fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (AI == 0) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!isa<Constant>(AI->getArraySize()))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(InsertPoint,
+ FirstNewBlock->getInstList(),
+ AI, I);
+ }
+ }
+
+ // Leave lifetime markers for the static alloca's, scoping them to the
+ // function we just inlined.
+ if (InsertLifetime && !IFI.StaticAllocas.empty()) {
+ IRBuilder<> builder(FirstNewBlock->begin());
+ for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+ AllocaInst *AI = IFI.StaticAllocas[ai];
+
+ // If the alloca is already scoped to something smaller than the whole
+ // function then there's no need to add redundant, less accurate markers.
+ if (hasLifetimeMarkers(AI))
+ continue;
+
+ // Try to determine the size of the allocation.
+ ConstantInt *AllocaSize = 0;
+ if (ConstantInt *AIArraySize =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (IFI.TD) {
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+ assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
+ }
+ }
+ }
+
+ builder.CreateLifetimeStart(AI, AllocaSize);
+ for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
+ IRBuilder<> builder(Returns[ri]);
+ builder.CreateLifetimeEnd(AI, AllocaSize);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+ .CreateCall(StackSave, "savedstack");
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ }
+ }
+
+ // If we are inlining tail call instruction through a call site that isn't
+ // marked 'tail', we must remove the tail marker for any calls in the inlined
+ // code. Also, calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (InlinedFunctionInfo.ContainsCalls &&
+ (MustClearTailCallFlags || MarkNoUnwind)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (MustClearTailCallFlags)
+ CI->setTailCall(false);
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any call instructions into invoke instructions.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ CalledFunc->getName()+".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+ CalledFunc->getName()+".exit");
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, FirstNewBlock);
+
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+ FirstNewBlock, Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ Type *RTy = CalledFunc->getReturnType();
+
+ PHINode *PHI = 0;
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+ AfterCallBB->begin());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+
+ // Add a branch to the merge points and remove return instructions.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst::Create(AfterCallBB, RI);
+ RI->eraseFromParent();
+ }
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+ OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 000000000000..a020bc7398f5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->hasName() && !AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->hasName() && !I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 000000000000..3cb8ded8506a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,524 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit scalar integer division for
+// targets that don't have native support. It's largely derived from
+// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
+// amount of control flow
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "integer-division"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3
+
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *I32Ty = Builder.getInt32Ty();
+
+ ConstantInt *Zero = Builder.getInt32(0);
+ ConstantInt *One = Builder.getInt32(1);
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+ ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ I32Ty);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = or i1 %ret0_3, %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = or i1 %ret0, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::URem)
+ return true;
+
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Currently only
+/// implements 32bit scalar division, but future work is removing this
+/// limitation.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ if (Div->getType()->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::UDiv)
+ return true;
+
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (RemTyBitWidth == 32)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (DivTyBitWidth == 32)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 000000000000..2d1b166c2101
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,292 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+ struct LCSSA : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ std::vector<BasicBlock*> LoopBlocks;
+ PredIteratorCache PredCache;
+ Loop *L;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+ private:
+ bool ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ /// verifyAnalysis() - Verify loop nest.
+ virtual void verifyAnalysis() const {
+ // Check the special guarantees that LCSSA makes.
+ assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+ }
+
+ /// inLoop - returns true if the given block is within the current loop
+ bool inLoop(BasicBlock *B) const {
+ return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+ }
+ };
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ DominatorTree *DT) {
+ DomTreeNode *DomNode = DT->getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+ L = TheLoop;
+
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ // Speed up queries by creating a sorted vector of blocks.
+ LoopBlocks.clear();
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ bool MadeChange = false;
+
+ for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+ BBI != E; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->use_back()->getParent() == BB &&
+ !isa<PHINode>(I->use_back())))
+ continue;
+
+ MadeChange |= ProcessInstruction(I, ExitBlocks);
+ }
+ }
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && MadeChange)
+ SE->forgetLoop(L);
+
+ assert(L->isLCSSAForm(*DT));
+ PredCache.clear();
+
+ return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] == BB)
+ return true;
+ return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop. If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ SmallVector<Use*, 16> UsesToRewrite;
+
+ BasicBlock *InstBB = Inst->getParent();
+
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ UserBB = PN->getIncomingBlock(UI);
+
+ if (InstBB != UserBB && !inLoop(UserBB))
+ UsesToRewrite.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty()) return false;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not available
+ // along their unwind edge. The code below tests to see whether DomBB dominates
+ // the value, so adjust DomBB to the normal destination block, which is
+ // effectively where the value is first usable.
+ BasicBlock *DomBB = Inst->getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ DomBB = Inv->getNormalDest();
+
+ DomTreeNode *DomNode = DT->getNode(DomBB);
+
+ SmallVector<PHINode*, 16> AddedPHIs;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ BasicBlock *ExitBB = *BBI;
+ if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+
+ PHINode *PN = PHINode::Create(Inst->getType(),
+ PredCache.GetNumPreds(ExitBB),
+ Inst->getName()+".lcssa",
+ ExitBB->begin());
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+ PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!inLoop(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses in
+ // the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+ if (isa<PHINode>(UserBB->begin()) &&
+ isExitBlock(UserBB, ExitBlocks)) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ if (UsesToRewrite[i]->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
+ UsesToRewrite[i]->set(UserBB->begin());
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ }
+
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+ if (AddedPHIs[i]->use_empty())
+ AddedPHIs[i]->eraseFromParent();
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..be80d34d960f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,1001 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination. This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI) {
+ TerminatorInst *T = BB->getTerminator();
+ IRBuilder<> Builder(T);
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ return true;
+ }
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Dest1);
+ Value *Cond = BI->getCondition();
+ BI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+ return false;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch into a
+ // single branch instruction!
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *TheOnlyDest = SI->getDefaultDest();
+ BasicBlock *DefaultDest = TheOnlyDest;
+
+ // Figure out which case it goes to.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ // Found case matching a constant operand?
+ if (i.getCaseValue() == CI) {
+ TheOnlyDest = i.getCaseSuccessor();
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (i.getCaseSuccessor() == DefaultDest) {
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ // MD should have 2 + NumCases operands.
+ if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ // Merge weight of this case to the default weight.
+ unsigned idx = i.getCaseIndex();
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
+ // Remove this entry.
+ DefaultDest->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch.
+ Value *Cond = SI->getCondition();
+ SI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+
+ if (SI->getNumCases() == 1) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ SwitchInst::CaseIt FirstCase = SI->case_begin();
+ IntegersSubset& Case = FirstCase.getCaseValueEx();
+ if (Case.isSingleNumber()) {
+ // FIXME: Currently work with ConstantInt based numbers.
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ Case.getSingleNumber(0).toConstantInt(),
+ "cond");
+
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
+ }
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (BlockAddress *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ if (IBI->getDestination(i) == TheOnlyDest)
+ TheOnlyDest = 0;
+ else
+ IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ }
+ Value *Address = IBI->getAddress();
+ IBI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (TheOnlyDest) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+ // We don't want the landingpad instruction removed by anything this general.
+ if (isa<LandingPadInst>(I))
+ return false;
+
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->getValue())
+ return false;
+ return true;
+ }
+
+ if (!I->mayHaveSideEffects()) return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ return isa<UndefValue>(II->getArgOperand(1));
+ }
+
+ if (isAllocLikeFn(I, TLI)) return true;
+
+ if (CallInst *CI = isFreeCall(I, TLI))
+ if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+ return C->isNullValue() || isa<UndefValue>(C);
+
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+ const TargetLibraryInfo *TLI) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+ return false;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ do {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, 0);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ } while (!DeadInsts.empty());
+
+ return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::use_iterator UI = I->use_begin();
+ Value::use_iterator UE = I->use_end();
+ if (UI == UE)
+ return true;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI) {
+ SmallPtrSet<Instruction*, 4> Visited;
+ for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+ I = cast<Instruction>(*I->use_begin())) {
+ if (I->use_empty())
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+
+ // If we find an instruction more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (!Visited.insert(I)) {
+ // Break the cycle and delete the instruction and its operands.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ bool MadeChange = false;
+
+#ifndef NDEBUG
+ // In debug builds, ensure that the terminator of the block is never replaced
+ // or deleted by these simplifications. The idea of simplification is that it
+ // cannot introduce new instructions, and there is no way to replace the
+ // terminator of a block without introducing a new instruction.
+ AssertingVH<Instruction> TerminatorVH(--BB->end());
+#endif
+
+ for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+ assert(!BI->isTerminator());
+ Instruction *Inst = BI++;
+
+ WeakVH BIHandle(BI);
+ if (recursivelySimplifyInstruction(Inst, TD)) {
+ MadeChange = true;
+ if (BIHandle != BI)
+ BI = BB->begin();
+ continue;
+ }
+
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+ if (BIHandle != BI)
+ BI = BB->begin();
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ DataLayout *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+ Value *OldPhiIt = PhiIt;
+
+ if (!recursivelySimplifyInstruction(PN, TD))
+ continue;
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+ }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ if (P) {
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PI) {
+ PI->replaceAllUses(PredBB, DestBB);
+ PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+ }
+ }
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch. If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't introduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+
+ // Copy over any phi, debug or lifetime instruction.
+ BB->getTerminator()->eraseFromParent();
+ Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+ } else {
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ bool Changed = false;
+
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ // Map from PHI hash values to PHI nodes. If multiple PHIs have
+ // the same hash value, the element is the first PHI in the
+ // linked list in CollisionMap.
+ DenseMap<uintptr_t, PHINode *> HashMap;
+
+ // Maintain linked lists of PHI nodes with common hash values.
+ DenseMap<PHINode *, PHINode *> CollisionMap;
+
+ // Examine each PHI.
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I++); ) {
+ // Compute a hash value on the operands. Instcombine will likely have sorted
+ // them, which helps expose duplicates, but we have to check all the
+ // operands to be safe in case instcombine hasn't run.
+ uintptr_t Hash = 0;
+ // This hash algorithm is quite weak as hash functions go, but it seems
+ // to do a good enough job for this particular purpose, and is very quick.
+ for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+ I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+ Hash >>= 1;
+ // If we've never seen this hash value before, it's a unique PHI.
+ std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+ HashMap.insert(std::make_pair(Hash, PN));
+ if (Pair.second) continue;
+ // Otherwise it's either a duplicate or a hash collision.
+ for (PHINode *OtherPN = Pair.first->second; ; ) {
+ if (OtherPN->isIdenticalTo(PN)) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(OtherPN);
+ PN->eraseFromParent();
+ Changed = true;
+ break;
+ }
+ // A non-duplicate hash collision.
+ DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+ if (I == CollisionMap.end()) {
+ // Set this PHI to be the head of the linked list of colliding PHIs.
+ PHINode *Old = Pair.first->second;
+ Pair.first->second = PN;
+ CollisionMap[PN] = Old;
+ break;
+ }
+ // Proceed to the next PHI in the list.
+ OtherPN = I->second;
+ }
+ }
+
+ return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign, const DataLayout *TD) {
+ V = V->stripPointerCasts();
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ return Align;
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const DataLayout *TD) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal = NULL;
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = NULL;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg)
+ DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+ else
+ DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ LoadInst *LI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal =
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVar, LI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc LIDL = LI->getDebugLoc();
+ if (!LIDL.isUnknown())
+ DbgVal->setDebugLoc(LIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ DIBuilder DIB(*F.getParent());
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ Dbgs.push_back(DDI);
+ }
+ if (Dbgs.empty())
+ return false;
+
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+ E = Dbgs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ bool RemoveDDI = true;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ else
+ RemoveDDI = false;
+ if (RemoveDDI)
+ DDI->eraseFromParent();
+ }
+ }
+ return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+ if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ return DDI;
+
+ return 0;
+}
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+ if (!DDI)
+ return false;
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ // Create a copy of the original DIDescriptor for user variable, appending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ Type *Int64Ty = Type::getInt64Ty(AI->getContext());
+ SmallVector<Value*, 4> NewDIVarAddress;
+ if (DIVar.hasComplexAddress()) {
+ for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
+ NewDIVarAddress.push_back(
+ ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+ }
+ }
+ NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
+ DIVariable NewDIVar = Builder.createComplexVariable(
+ DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
+ DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
+ NewDIVarAddress, DIVar.getArgNumber());
+
+ // Insert llvm.dbg.declare in the same basic block as the original alloca,
+ // and remove old llvm.dbg.declare.
+ BasicBlock *BB = AI->getParent();
+ Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+ DDI->eraseFromParent();
+ return true;
+}
+
+bool llvm::removeUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 16> Reachable;
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(&F.getEntryBlock());
+ Reachable.insert(&F.getEntryBlock());
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.insert(*SI))
+ Worklist.push_back(*SI);
+ } while (!Worklist.empty());
+
+ if (Reachable.size() == F.size())
+ return false;
+
+ assert(Reachable.size() < F.size());
+ for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
+ if (Reachable.count(I))
+ continue;
+
+ for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(I);
+ I->dropAllReferences();
+ }
+
+ for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 000000000000..37819cc9c917
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,800 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested , "Number of nested loops split out");
+
+namespace {
+ struct LoopSimplify : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ Loop *L;
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DependenceAnalysis>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
+
+ private:
+ bool ProcessLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ BasicBlock *InsertPreheaderForLoop(Loop *L);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+ L = l;
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ Changed |= ProcessLoop(L, LPM);
+
+ return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
+ ++NumNested;
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ // If any reachable control flow within this loop has changed, notify
+ // ScalarEvolution. Currently assume the parent loop doesn't change
+ // (spliting edges doesn't count). If blocks, CFG edges, or other values
+ // in the parent loop change, then we need call to forgetLoop() for the
+ // parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", this, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
+
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+
+ return PreheaderBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks. This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+ SmallVector<BasicBlock*, 8> LoopBlocks;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ BasicBlock *P = *I;
+ if (L->contains(P)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ LoopBlocks.push_back(P);
+ }
+ }
+
+ assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+ BasicBlock *NewExitBB = 0;
+
+ if (Exit->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
+ LoopBlocks.size()),
+ ".loopexit", ".nonloopexit",
+ this, NewBBs);
+ NewExitBB = NewBBs[0];
+ } else {
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
+ }
+
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return NewExitBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there. Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ std::vector<BasicBlock *> WorkList;
+ WorkList.push_back(InputBB);
+ do {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ WorkList.push_back(WBB);
+ }
+ } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AliasAnalysis *AA, LoopInfo *LI) {
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block. This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop. This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader.
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+ if (PN == 0) return 0; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return 0;
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+ }
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ // Add the new loop to the pass manager queue.
+ LPM.insertLoopIntoQueue(NewOuter);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ AddBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L)
+ LI->changeLoopFor(BB, NewOuter);
+ --i;
+ }
+ }
+
+ return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it. If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header. This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName()+".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
+ if (AA) AA->copyValue(PN, NewPN);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (UniqueValue == 0)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ if (AA) AA->deleteValue(NewPN);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to just to the BEBlock instead of the header.
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ }
+
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 000000000000..cb581b3d13b9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,457 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+ ValueToValueMapTy &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
+ if (It != VMap.end())
+ I->setOperand(op, It->second);
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+ if (It != VMap.end())
+ PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+ }
+ }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
+ LPPassManager *LPM) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return 0;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ std::string OldName = BB->getName();
+
+ // Erase basic block from the function...
+
+ // ScalarEvolution holds references to loop exit blocks.
+ if (LPM) {
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
+ if (Loop *L = LI->getLoopFor(BB))
+ SE->forgetLoop(L);
+ }
+ }
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// TripCount is generally defined as the number of times the loop header
+/// executes. UnrollLoop relaxes the definition to permit early exits: here
+/// TripCount is the iteration on which control exits LatchBlock if no early
+/// exits were taken. Note that UnrollLoop assumes that the loop counter test
+/// terminates LatchBlock in order to remove unnecesssary instances of the
+/// test. In other words, control may exit the loop prior to TripCount
+/// iterations via an early branch, but control may not exit the loop from the
+/// LatchBlock's terminator prior to TripCount iterations.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+///
+/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
+/// available it must also preserve those analyses.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ // Loops with indirectbr cannot be cloned.
+ if (!L->isSafeToClone()) {
+ DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Can't unroll; loop not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
+
+ if (TripCount != 0)
+ DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ if (TripMultiple != 1)
+ DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ // Don't enter the unroll code if there is nothing to do. This way we don't
+ // need to support "partial unrolling by 1".
+ if (TripCount == 0 && Count < 2)
+ return false;
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (LPM) {
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+ }
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ if (CompletelyUnroll) {
+ DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
+ } else {
+ DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ } else if (TripMultiple != 1) {
+ DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
+ }
+ DEBUG(dbgs() << "!\n");
+ }
+
+ std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ OrigPHINode.push_back(cast<PHINode>(I));
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock*> NewBlocks;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ // Loop over all of the PHI nodes in the block, changing them to use the
+ // incoming values from the previous block.
+ if (*BB == Header)
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHINode[i]] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ // Add phi entries for newly created values to all exit blocks.
+ for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
+ SI != SE; ++SI) {
+ if (L->contains(*SI))
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ phi->addIncoming(Incoming, New);
+ }
+ }
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock)
+ Latches.push_back(New);
+
+ NewBlocks.push_back(New);
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (unsigned i = 0; i < NewBlocks.size(); ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ ::RemapInstruction(I, LastValueMap);
+ }
+
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ if (CompletelyUnroll) {
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ else if (Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll && j == 0) {
+ Dest = LoopExit;
+ NeedConditional = false;
+ }
+
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ // Remove phi operands at this loop exit
+ if (Dest != LoopExit) {
+ BasicBlock *BB = Latches[i];
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (*SI == Headers[i])
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Phi->removeIncomingValue(BB, false);
+ }
+ }
+ }
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+ }
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+ if (Term->isUnconditional()) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ }
+ }
+
+ if (LPM) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, LPM, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+ }
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+ BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ if (isInstructionTriviallyDead(Inst))
+ (*BB)->getInstList().erase(Inst);
+ else if (Value *V = SimplifyInstruction(Inst))
+ if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ Inst->replaceAllUsesWith(V);
+ (*BB)->getInstList().erase(Inst);
+ }
+ }
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+ // Remove the loop from the LoopPassManager if it's completely removed.
+ if (CompletelyUnroll && LPM != NULL)
+ LPM->deleteLoopFromQueue(L);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 000000000000..d801d5f2c2a4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the initial values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ if (!L->getExitingBlock())
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ if (!LPM)
+ return false;
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 000000000000..4aee8ff51a4e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,174 @@
+//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the 'expect' intrinsic to LLVM metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+ cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+ cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+ class LowerExpectIntrinsic : public FunctionPass {
+
+ bool HandleSwitchExpect(SwitchInst *SI);
+
+ bool HandleIfExpect(BranchInst *BI);
+
+ public:
+ static char ID;
+ LowerExpectIntrinsic() : FunctionPass(ID) {
+ initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+ };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+ CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
+ unsigned n = SI->getNumCases(); // +1 for default case.
+ std::vector<uint32_t> Weights(n + 1);
+
+ Weights[0] = Case == SI->case_default() ? LikelyBranchWeight
+ : UnlikelyBranchWeight;
+ for (unsigned i = 0; i != n; ++i)
+ Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight
+ : UnlikelyBranchWeight;
+
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(CI->getContext()).createBranchWeights(Weights));
+
+ SI->setCondition(ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+ if (BI->isUnconditional())
+ return false;
+
+ // Handle non-optimized IR code like:
+ // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+ // %tobool = icmp ne i64 %expval, 0
+ // br i1 %tobool, label %if.then, label %if.end
+
+ ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ MDBuilder MDB(CI->getContext());
+ MDNode *Node;
+
+ // If expect value is equal to 1 it means that we are more likely to take
+ // branch 0, in other case more likely is branch 1.
+ if (ExpectedValue->isOne())
+ Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
+ else
+ Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
+
+ BI->setMetadata(LLVMContext::MD_prof, Node);
+
+ CmpI->setOperand(0, ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I++;
+
+ // Create "block_weights" metadata.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (HandleIfExpect(BI))
+ IfHandled++;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (HandleSwitchExpect(SI))
+ IfHandled++;
+ }
+
+ // remove llvm.expect intrinsics.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ) {
+ CallInst *CI = dyn_cast<CallInst>(BI++);
+ if (!CI)
+ continue;
+
+ Function *Fn = CI->getCalledFunction();
+ if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ Value *Exp = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(Exp);
+ CI->eraseFromParent();
+ }
+ }
+ }
+
+ return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+ "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+ return new LowerExpectIntrinsic();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 000000000000..9ec84d730e46
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,582 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive. It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+ class LowerInvoke : public FunctionPass {
+ // Used for both models.
+ Constant *AbortFn;
+
+ // Used for expensive EH support.
+ StructType *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+ bool useExpensiveEHSupport;
+
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvoke(const TargetLowering *tli = NULL,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ TLI(tli) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+ }
+
+ private:
+ bool insertCheapEHSupport(Function &F);
+ void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+ void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum, AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch);
+ bool insertExpensiveEHSupport(Function &F);
+ };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ if (useExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers.
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ JBSize = JBSize ? JBSize : 200;
+ Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+ JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
+ Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+ JBLinkTy->setBody(Elts);
+
+ Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+ JBListHead = new GlobalVariable(M, PtrJBList, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist");
+ }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+ StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
+ AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(BB);
+
+ // Remove the invoke instruction now.
+ BB->getInstList().erase(II);
+
+ ++NumInvokes; Changed = true;
+ }
+ return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum,
+ AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke and store zero into the
+ // location afterward.
+ new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
+
+ // Insert a store of the stack ptr before the invoke, so we can restore it
+ // later in the exception case.
+ CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+ new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
+ // nonvolatile.
+ new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
+ InvokeNum, false, NI);
+
+ Instruction* StackPtrLoad =
+ new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstInsertionPt());
+ CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+ // Insert a normal call instruction.
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Replace the invoke with an uncond branch.
+ BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+ II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges. Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge. This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Scan all of the uses and see if the live range is live across an unwind
+ // edge. If we find a use live across an invoke edge, create an alloca
+ // and spill the value.
+ std::set<InvokeInst*> InvokesWithStoreInserted;
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<InvokeInst*,16> Invokes;
+ UnreachableInst* UnreachablePlaceholder = 0;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ }
+
+ if (Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+
+ // TODO: This is not an optimal way to do this. In particular, this always
+ // inserts setjmp calls into the entries of functions with invoke instructions
+ // even though there are possibly paths through the function that do not
+ // execute any invokes. In particular, for functions with early exits, e.g.
+ // the 'addMove' method in hexxagon, it would be nice to not have to do the
+ // setjmp stuff on the early exit path. This requires a bit of dataflow, but
+ // would not be too hard to do.
+
+ // If we have an invoke instruction, insert a setjmp that dominates all
+ // invokes. After the setjmp, use a cond branch that goes to the original
+ // code path on zero, and to a designated 'catch' block of nonzero.
+ Value *OldJmpBufPtr = 0;
+ if (!Invokes.empty()) {
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be live across invokes.
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ AllocaInst *JmpBuf =
+ new AllocaInst(JBLinkTy, 0, Align,
+ "jblink", F.begin()->begin());
+
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
+ EntryBB->getTerminator());
+
+ // Copy the JBListHead to the alloca.
+ Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+ EntryBB->getTerminator());
+ new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+ // Add the new jumpbuf to the list.
+ new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+ // Create the catch block. The catch block is basically a big switch
+ // statement that goes to all of the invoke catch blocks.
+ BasicBlock *CatchBB =
+ BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+ // Create an alloca which keeps track of the stack pointer before every
+ // invoke, this allows us to properly restore the stack pointer after
+ // long jumping.
+ AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+ "stackptr", EntryBB->begin());
+
+ // Create an alloca which keeps track of which invoke is currently
+ // executing. For normal calls it contains zero.
+ AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+ "invokenum",EntryBB->begin());
+ new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ InvokeNum, true, EntryBB->getTerminator());
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call). We insert an unreachable instruction here and
+ // modify the block to jump to the correct unwinding pad later.
+ BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
+
+ Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+ SwitchInst *CatchSwitch =
+ SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+ // Now that things are set up, insert the setjmp call itself.
+
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "setjmp.cont");
+
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
+ EntryBB->getTerminator());
+ JmpBufPtr = new BitCastInst(JmpBufPtr,
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", EntryBB->getTerminator());
+ Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+ EntryBB->getTerminator());
+
+ // Compare the return value to zero.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+ // At this point, we are all set up, rewrite each invoke instruction.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+ }
+
+ // We know that there is at least one unwind.
+
+ // Create three new blocks, the block to load the jmpbuf ptr and compare
+ // against null, the block to do the longjmp, and the error block for if it
+ // is null. Add them at the end of the function because they are not hot.
+ BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+ "dounwind", &F);
+ BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+ BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+ // If this function contains an invoke, restore the old jumpbuf ptr.
+ Value *BufPtr;
+ if (OldJmpBufPtr) {
+ // Before the return, insert a copy from the saved value to the new value.
+ BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+ new StoreInst(BufPtr, JBListHead, UnwindHandler);
+ } else {
+ BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+ Constant::getNullValue(BufPtr->getType()),
+ "notnull");
+ BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+ // Create the block to do the longjmp.
+ // Get a pointer to the jmpbuf and longjmp.
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
+ Idx[0] = new BitCastInst(Idx[0],
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", UnwindBlock);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+ CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
+ new UnreachableInst(F.getContext(), UnwindBlock);
+
+ // Set up the term block ("throw without a catch").
+ new UnreachableInst(F.getContext(), TermBlock);
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "",
+ TermBlock->getTerminator())->setTailCall();
+
+ // Replace the inserted unreachable with a branch to the unwind handler.
+ if (UnreachablePlaceholder) {
+ BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
+ UnreachablePlaceholder->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, restore the old jmpbuf pointer to its input value.
+ if (OldJmpBufPtr) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *R = Returns[i];
+
+ // Before the return, insert a copy from the saved value to the new value.
+ Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+ new StoreInst(OldBuf, JBListHead, true, R);
+ }
+ }
+
+ return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (useExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 000000000000..955b853533b0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,305 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+ /// instructions.
+ class LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerInvokePassID);
+ }
+
+ struct CaseRange {
+ Constant* Low;
+ Constant* High;
+ BasicBlock* BB;
+
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ Low(low), High(high), BB(bb) { }
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI);
+
+ BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI);
+ }
+ }
+
+ return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ Value* Val, BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ unsigned Size = End - Begin;
+
+ if (Size == 1)
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange& Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+ << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+ OrigBlock, Default);
+ BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+ OrigBlock, Default);
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT,
+ Val, Pivot.Low, "Pivot");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = NULL;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+ Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+ cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+
+ IntegersSubsetToBB TheClusterifier;
+
+ // Start with "simple" cases
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *SuccBB = i.getCaseSuccessor();
+ IntegersSubset CaseRanges = i.getCaseValueEx();
+ TheClusterifier.add(CaseRanges, SuccBB);
+ }
+
+ TheClusterifier.optimize();
+
+ size_t numCmps = 0;
+ for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(),
+ e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+ IntegersSubsetToBB::Cluster &C = *i;
+
+ // FIXME: Currently work with ConstantInt based numbers.
+ // Changing it to APInt based is a pretty heavy for this commit.
+ Cases.push_back(CaseRange(C.first.getLow().toConstantInt(),
+ C.first.getHigh().toConstantInt(), C.second));
+ if (C.first.isSingleNumber())
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getCondition(); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // If there is only the default destination, don't bother with the code below.
+ if (!SI->getNumCases()) {
+ BranchInst::Create(SI->getDefaultDest(), CurBlock);
+ CurBlock->getInstList().erase(SI);
+ return;
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+ OrigBlock, NewDefault);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 000000000000..61b3965d8f11
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+ struct PromotePass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ }
+ };
+} // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
new file mode 100644
index 000000000000..3716f586ff06
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -0,0 +1,145 @@
+//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+
+ // This PRNG is from the ISO C spec. It is intentionally simple and
+ // unsuitable for cryptographic use. We're just looking for enough
+ // variety to surprise and delight users.
+ struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) {
+ next = seed;
+ }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+ };
+
+ struct MetaRenamer : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) {
+ static const char *metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
+ E = M.getModuleIdentifier().end(); I != E; ++I)
+ randSeed += *I;
+
+ PRNG prng;
+ prng.srand(randSeed);
+
+ // Rename all aliases
+ for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
+ AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ AI->setName("alias");
+ }
+
+ // Rename all global variables
+ for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
+ GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ GI->setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ SmallString<128> NameStorage;
+ STy->setName((Twine("struct.") + metaNames[prng.rand() %
+ array_lengthof(metaNames)]).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (Module::iterator FI = M.begin(), FE = M.end();
+ FI != FE; ++FI) {
+ StringRef Name = FI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
+ runOnFunction(*FI);
+ }
+ return true;
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+}
+
+char MetaRenamer::ID = 0;
+INITIALIZE_PASS(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+//===----------------------------------------------------------------------===//
+//
+// MetaRenamer - Rename everything with metasyntactic names.
+//
+ModulePass *llvm::createMetaRenamerPass() {
+ return new MetaRenamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 000000000000..d090b487213b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,64 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+ StructType *Ty = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
+
+ Constant *RuntimeCtorInit = ConstantStruct::get(
+ Ty, IRB.getInt32(Priority), F, NULL);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ }
+ GVCtor->eraseFromParent();
+ }
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
+ CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 000000000000..de335ec1a05c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1149 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+ typedef std::pair<BasicBlock*, unsigned> EltTy;
+ static inline EltTy getEmptyKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+ }
+ static inline EltTy getTombstoneKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+ }
+ static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+ using llvm::hash_value;
+ return static_cast<unsigned>(hash_value(Val));
+ }
+ static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+ return LHS == RHS;
+ }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+ struct AllocaInfo;
+
+ // Data package used by RenamePass()
+ class RenamePassData {
+ public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P,
+ const ValVector &V) : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+ };
+
+ /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+ /// load/store instructions in the block that directly load or store an alloca.
+ ///
+ /// This functionality is important because it avoids scanning large basic
+ /// blocks multiple times when promoting many allocas in the same block.
+ class LargeBlockInfo {
+ /// InstNumbers - For each instruction that we track, keep the index of the
+ /// instruction. The index starts out as the number of the instruction from
+ /// the start of the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+ public:
+
+ /// isInterestingInstruction - This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// getInstructionIndex - Get or calculate the index of the specified
+ /// instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end()) return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+ BBI != E; ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) {
+ InstNumbers.erase(I);
+ }
+
+ void clear() {
+ InstNumbers.clear();
+ }
+ };
+
+ struct PromoteMem2Reg {
+ /// Allocas - The alloca instructions being promoted.
+ ///
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT;
+ DIBuilder *DIB;
+
+ /// AST - An AliasSetTracker object to update. If null, don't update it.
+ ///
+ AliasSetTracker *AST;
+
+ /// AllocaLookup - Reverse mapping of Allocas.
+ ///
+ DenseMap<AllocaInst*, unsigned> AllocaLookup;
+
+ /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify
+ /// some Phi nodes as we iterate over it, so it should have deterministic
+ /// iterators. We could use a MapVector, but since we already maintain a
+ /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
+ /// more efficient (also supports removal).
+ ///
+ DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
+
+ /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+ /// it corresponds to.
+ DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+
+ /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+ /// each alloca that is of pointer type, we keep track of what to copyValue
+ /// to the inserted PHI nodes here.
+ ///
+ std::vector<Value*> PointerAllocaValues;
+
+ /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+ /// intrinsic that describes it, if any, so that we can convert it to a
+ /// dbg.value intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+ /// Visited - The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock*, 16> Visited;
+
+ /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+ /// non-determinstic behavior.
+ DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+ /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode*, unsigned> DomLevels;
+
+ /// BBNumPreds - Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+ public:
+ PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+ AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DIB(0), AST(ast) {}
+ ~PromoteMem2Reg() {
+ delete DIB;
+ }
+
+ void run();
+
+ /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+ ///
+ bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+ return DT.dominates(BB1, BB2);
+ }
+
+ private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+ return NP-1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+
+ void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+ void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+ };
+
+ struct AllocaInfo {
+ SmallVector<BasicBlock*, 32> DefiningBlocks;
+ SmallVector<BasicBlock*, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
+ }
+
+ /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+ /// ivars.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+ };
+
+ typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+ struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second < RHS.second;
+ }
+ };
+} // end of anonymous namespace
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
+
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Instruction *Inst = cast<Instruction>(*UI);
+ ++UI;
+ Inst->eraseFromParent();
+ }
+ }
+ I->eraseFromParent();
+ }
+}
+
+void PromoteMem2Reg::run() {
+ Function &F = *DT.getRoot()->getParent();
+
+ if (AST) PointerAllocaValues.resize(Allocas.size());
+ AllocaDbgDeclares.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) &&
+ "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ removeLifetimeIntrinsicUsers(AI);
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ RewriteSingleStoreAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (Info.UsingBlocks.empty()) {
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ if (!DIB)
+ DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
+ DDI->eraseFromParent();
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock) {
+ PromoteSingleBlockAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the stores are all that is
+ // left.
+ if (Info.UsingBlocks.empty()) {
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ DDI->eraseFromParent();
+
+ ++NumLocalPromoted;
+ continue;
+ }
+ }
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode*, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ BBNumbers[I] = ID++;
+ }
+
+ // If we have an AST to keep updated, remember some pointer value that is
+ // stored into the alloca.
+ if (AST)
+ PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ DetermineInsertionPoint(AI, AllocaNum, Info);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ do {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ if (AST) AST->deleteValue(A);
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare instrinsics from the function.
+ for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+ DDI->eraseFromParent();
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+ // simplify and RAUW them as we go. If it was not, we could add uses to
+ // the values we replace with in a non deterministic order, thus creating
+ // non deterministic def->use chains.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+ if (AST && PN->getType()->isPointerTy())
+ AST->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVector<BasicBlock*, 16>::iterator EntIt =
+ std::lower_bound(Preds.begin(), Preds.end(),
+ SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
+/// are blocks which lead to uses. Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB)) continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI) continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i, --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI) continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB))
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm. Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info) {
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ DomTreeNodeCompare> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end(); I != E; ++I) {
+ if (DomTreeNode *Node = DT.getNode(*I))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
+ SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode*, 32> Visited;
+ SmallVector<DomTreeNode*, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+ AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+ bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+ const std::pair<unsigned, StoreInst*> &RHS) {
+ return LHS.first < RHS.first;
+ }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block. If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // If there are no stores to the alloca, just replace any loads with undef.
+ if (StoresByIndex.empty()) {
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LBI.deleteValue(LI);
+ LI->eraseFromParent();
+ }
+ return;
+ }
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI) continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+ StoreIndexSearchPredicate());
+
+ // If there is no store before this load, then we can't promote this load.
+ if (I == StoresByIndex.begin()) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+
+ // Otherwise, there was a store before this load, the load takes its value.
+ --I;
+ LI->replaceAllUsesWith(I->second->getOperand(0));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN) return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ BB->begin());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+
+ if (AST && PN->getType()->isPointerTy())
+ AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+ return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting. IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = 0;
+ for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+ if (*I == BB)
+ ++NumEdges;
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (APN == 0) break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB)) return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ Instruction *I = II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src) continue;
+
+ DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end()) continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest) continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I))
+ Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+ goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate. This function does
+/// not modify the CFG of the function at all. All allocas must be from the
+/// same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+ DominatorTree &DT, AliasSetTracker *AST) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty()) return;
+
+ PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..9d90fbe5654a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,530 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+ DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ Value *SingularValue = 0;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBB = *PI;
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *V = SimplifyInstruction(InsertedPHI)) {
+ InsertedPHI->eraseFromParent();
+ return V;
+ }
+
+ // Set the DebugLoc of the inserted PHI, if available.
+ DebugLoc DL;
+ if (const Instruction *I = BB->getFirstNonPHI())
+ DL = I->getDebugLoc();
+ InsertedPHI->setDebugLoc(DL);
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
+ U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ class PHI_iterator {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHI_iterator(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHI_iterator &operator++() { ++idx; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+
+ static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+ Preds->push_back(SomePhi->getIncomingBlock(PI));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ Value *SomeVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ BasicBlock *BB = User->getParent();
+ TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ updateDebugInfo(SI);
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ } else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(SI, Insts)) continue;
+ updateDebugInfo(SI);
+
+ // Remember that this is the active value in the block.
+ StoredValue = SI->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts)
+ const {
+ return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 000000000000..681bf9c2b7a4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,4072 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <map>
+#include <set>
+using namespace llvm;
+
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+ /// ValueEqualityComparisonCase - Represents a case of a switch.
+ struct ValueEqualityComparisonCase {
+ ConstantInt *Value;
+ BasicBlock *Dest;
+
+ ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
+ : Value(Value), Dest(Dest) {}
+
+ bool operator<(ValueEqualityComparisonCase RHS) const {
+ // Comparing pointers is ok as we only rely on the order for uniquing.
+ return Value < RHS.Value;
+ }
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
+ };
+
+class SimplifyCFGOpt {
+ const TargetTransformInfo &TTI;
+ const DataLayout *const TD;
+
+ Value *isValueEqualityComparison(TerminatorInst *TI);
+ BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder);
+ bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder);
+
+ bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
+ bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+
+public:
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
+ : TTI(TTI), TD(TD) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB))
+ return false;
+ }
+
+ return true;
+}
+
+/// isProfitableToFoldUnconditional - Return true if it is safe and profitable
+/// to merge these two terminator instructions together, where SI1 is an
+/// unconditional branch. PhiNodes will store all PHI nodes in common
+/// successors.
+///
+static bool isProfitableToFoldUnconditional(BranchInst *SI1,
+ BranchInst *SI2,
+ Instruction *Cond,
+ SmallVectorImpl<PHINode*> &PhiNodes) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+ assert(SI1->isUnconditional() && SI2->isConditional());
+
+ // We fold the unconditional branch if we can easily update all PHI nodes in
+ // common successors:
+ // 1> We have a constant incoming value for the conditional branch;
+ // 2> We have "Cond" as the incoming value for the unconditional branch;
+ // 3> SI2->getCondition() and Cond have same operands.
+ CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
+ if (!Ci2) return false;
+ if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
+ Cond->getOperand(1) == Ci2->getOperand(1)) &&
+ !(Cond->getOperand(0) == Ci2->getOperand(1) &&
+ Cond->getOperand(1) == Ci2->getOperand(0)))
+ return false;
+
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
+ !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
+ return false;
+ PhiNodes.push_back(PN);
+ }
+ return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block. The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = cast<PHINode>(BB->begin());
+ assert(SomePHI->getNumIncomingValues() == 2 &&
+ "Function can only handle blocks with 2 predecessors!");
+ BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+ BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
+ return 0;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
+
+/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// given instruction, which is assumed to be safe to speculate. 1 means
+/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
+static unsigned ComputeSpeculationCost(const User *I) {
+ assert(isSafeToSpeculativelyExecute(I) &&
+ "Instruction is not safe to speculatively execute!");
+ switch (Operator::getOpcode(I)) {
+ default:
+ // In doubt, be conservative.
+ return UINT_MAX;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return UINT_MAX;
+ return 1;
+ case Instruction::Load:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return 1; // These are all cheap.
+
+ case Instruction::Call:
+ case Instruction::Select:
+ return 2;
+ }
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+ unsigned &CostRemaining) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB) return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
+
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (AggressiveInsts == 0) return false;
+
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts->count(I)) return true;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ unsigned Cost = ComputeSpeculationCost(I);
+
+ if (Cost > CostRemaining)
+ return false;
+
+ CostRemaining -= Cost;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>
+ (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+ const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ // If this is an icmp against a constant, handle this as one of the cases.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ UsedICmps++;
+ Vals.push_back(C);
+ return I->getOperand(0);
+ }
+
+ // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+ // the set.
+ ConstantRange Span =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // If this is an and/!= check then we want to optimize "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet())
+ return 0;
+
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
+ return I->getOperand(0);
+ }
+ return 0;
+ }
+
+ // Otherwise, we can only handle an | or &, depending on isEQ.
+ if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+ return 0;
+
+ unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ if (LHS == RHS)
+ return LHS;
+ Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
+ }
+
+ // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+ // set it and return success.
+ if (Extra == 0 || Extra == I->getOperand(1)) {
+ Extra = I->getOperand(1);
+ return LHS;
+ }
+
+ Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
+ return 0;
+ }
+
+ // If the LHS can't be folded in, but Extra is available and RHS can, try to
+ // use LHS as Extra.
+ if (Extra == 0 || Extra == I->getOperand(0)) {
+ Value *OldExtra = Extra;
+ Extra = I->getOperand(0);
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps))
+ return RHS;
+ assert(Vals.size() == NumValsBeforeLHS);
+ Extra = OldExtra;
+ }
+
+ return 0;
+}
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction *Cond = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
+ }
+
+ TI->eraseFromParent();
+ if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+ Value *CV = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <= 128)
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+ ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+ GetConstantInt(ICI->getOperand(1), TD))
+ CV = ICI->getOperand(0);
+
+ // Unwrap any lossless ptrtoint cast.
+ if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ CV = PTII->getOperand(0);
+ return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<ValueEqualityComparisonCase>
+ &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(),
+ i.getCaseSuccessor()));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
+ Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
+ TD),
+ Succ));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+ std::vector<ValueEqualityComparisonCase> &Cases) {
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
+ std::vector<ValueEqualityComparisonCase > &C2) {
+ std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->size() == 0) return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].Value;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].Value)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].Value == (*V2)[i2].Value)
+ return true;
+ if ((*V1)[i1].Value < (*V2)[i2].Value)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison. If so, simplify TI. This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal) return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal) return false; // Different predicates.
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+ PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<ValueEqualityComparisonCase> ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(ThisDef);
+ (void) NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].Dest->removePredecessor(TI->getParent());
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].Value);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+ --i;
+ if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+ i.getCaseSuccessor()->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+ }
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
+
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == TIBB) {
+ if (TIV != 0)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].Value;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].Value == TIV) {
+ TheRealDest = ThisCases[i].Dest;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
+
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(TheRealDest);
+ (void) NI;
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+}
+
+namespace {
+ /// ConstantIntOrdering - This class implements a stable ordering of constant
+ /// integers that does not depend on their address. This is important for
+ /// applications that sort ConstantInt's to ensure uniqueness.
+ struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+ };
+}
+
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+ const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+ const ConstantInt *RHS = *(const ConstantInt*const*)P2;
+ if (LHS->getValue().ult(RHS->getValue()))
+ return 1;
+ if (LHS->getValue() == RHS->getValue())
+ return 0;
+ return -1;
+}
+
+static inline bool HasBranchWeights(const Instruction* I) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ bool Halve = false;
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ if (Weights[i] > UINT_MAX) {
+ Halve = true;
+ break;
+ }
+
+ if (! Halve)
+ return;
+
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ Weights[i] /= 2;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock*, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) &&
+ BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i+1]);
+ ValidTotalSuccWeight += SuccWeights[i+1];
+ }
+ }
+
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ PTIHandled.begin(),
+ E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
+ PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+ AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without DataLayout");
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
+ PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = 0;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (InfLoopBlock == 0) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block. The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
+
+ // If we get here, we can hoist at least one instruction.
+ BasicBlock *BIParent = BI->getParent();
+
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ I1 = BB1_Itr++;
+ I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ } while (I1->isIdenticalToWhenDefined(I2));
+
+ return true;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return true;
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI, NT);
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ IRBuilder<true, NoFolder> Builder(NT);
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V) continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName()));
+
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+ AddPredecessorToBlock(*SI, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
+ return false;
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// \brief Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
+///
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// %sub = sub %x, %y
+/// br label BB2
+/// EndBB:
+/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sub = sub %x, %y
+/// %cond = select i1 %cmp, 0, %sub
+/// ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+ // If ThenBB is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (ThenBB != BI->getSuccessor(0)) {
+ assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+ assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+ // Keep a count of how many times instructions are used within CondBB when
+ // they are candidates for sinking into CondBB. Specifically:
+ // - They are defined in BB, and
+ // - They have no side effects, and
+ // - All of their uses are in CondBB.
+ SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+ unsigned SpeculationCost = 0;
+ for (BasicBlock::iterator BBI = ThenBB->begin(),
+ BBE = llvm::prior(ThenBB->end());
+ BBI != BBE; ++BBI) {
+ Instruction *I = BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+
+ // Don't hoist the instruction if it's unsafe or expensive.
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+ if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ return false;
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in this BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (!OpI || OpI->getParent() != BB ||
+ OpI->mayHaveSideEffects())
+ continue; // Not a candidate for sinking.
+
+ ++SinkCandidateUseCounts[OpI];
+ }
+ }
+
+ // Consider any sink candidates which are only used in CondBB as costs for
+ // speculation. Note, while we iterate over a DenseMap here, we are summing
+ // and so iteration order isn't significant.
+ for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+ SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+ I != E; ++I)
+ if (I->first->getNumUses() == I->second) {
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // Check that the PHI nodes can be converted to selects.
+ bool HaveRewritablePHIs = false;
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ Value *OrigV = PN->getIncomingValueForBlock(BB);
+ Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
+ if (!CE)
+ continue; // Known safe and cheap.
+
+ if (!isSafeToSpeculativelyExecute(CE))
+ return false;
+ if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // If there are no PHIs to process, bail early. This helps ensure idempotence
+ // as well.
+ if (!HaveRewritablePHIs)
+ return false;
+
+ // If we get here, we can hoist the instruction and if-convert.
+ DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Hoist the instructions.
+ BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
+ llvm::prior(ThenBB->end()));
+
+ // Insert selects and rewrite the PHI operands.
+ IRBuilder<true, NoFolder> Builder(BI);
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned OrigI = PN->getBasicBlockIndex(BB);
+ unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN->getIncomingValue(OrigI);
+ Value *ThenV = PN->getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the preexisting value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
+ TrueV->getName() + "." + FalseV->getName());
+ PN->setIncomingValue(OrigI, V);
+ PN->setIncomingValue(ThenI, V);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10) return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+ // Skip if the predecessor's terminator is an indirect branch.
+ if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, TD)) {
+ TranslateMap[BBI] = V;
+ delete N; // Instruction folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
+ }
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, TD) | true;
+ }
+
+ return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ SmallPtrSet<Instruction*, 4> AggressiveInsts;
+ unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+ MaxCostVal1 = PHINodeFoldingThreshold;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, TD)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ MaxCostVal0) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ MaxCostVal1))
+ return false;
+ }
+
+ // If we folded the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
+ // If we all PHI nodes are promotable, check to make sure that all
+ // instructions in the predecessor blocks can be promoted as well. If
+ // not, we won't be able to get rid of the control flow, so it's not
+ // worth promoting to select instructions.
+ BasicBlock *DomBlock = 0;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+ Instruction *InsertPt = DomBlock->getTerminator();
+ IRBuilder<true, NoFolder> Builder(InsertPt);
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock1->getInstList(), IfBlock1->begin(),
+ IfBlock1->getTerminator());
+ if (IfBlock2)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock2->getInstList(), IfBlock2->begin(),
+ IfBlock2->getTerminator());
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ SelectInst *NV =
+ cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
+ PN->replaceAllUsesWith(NV);
+ NV->takeName(PN);
+ PN->eraseFromParent();
+ }
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+ Builder.CreateBr(BB);
+ OldTI->eraseFromParent();
+ return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+
+ Builder.SetInsertPoint(BI);
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ Builder.CreateRetVoid();
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue = Builder.CreateSelect(BrCond, TrueValue,
+ FalseValue, "retval");
+ }
+ }
+
+ Value *RI = !TrueValue ?
+ Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+ (void) RI;
+
+ DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the
+/// probabilities of the branch taking each edge. Fills in the two APInt
+/// parameters and return true, or returns false if no or invalid metadata was
+/// found.
+static bool ExtractBranchMetadata(BranchInst *BI,
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
+ assert(BI->isConditional() &&
+ "Looking for probabilities on unconditional branch?");
+ MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
+ ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
+ ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
+ if (!CITrue || !CIFalse) return false;
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
+ return true;
+}
+
+/// checkCSEInPredecessor - Return true if the given instruction is available
+/// in its predecessor block. If yes, the instruction will be removed.
+///
+static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
+ if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
+ return false;
+ for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) {
+ Instruction *PBI = &*I;
+ // Check whether Inst and PBI generate the same value.
+ if (Inst->isIdenticalTo(PBI)) {
+ Inst->replaceAllUsesWith(PBI);
+ Inst->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ Instruction *Cond = 0;
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ else {
+ // For unconditional branch, check for a simple CFG pattern, where
+ // BB has a single predecessor and BB's successor is also its predecessor's
+ // successor. If such pattern exisits, check for CSE between BB and its
+ // predecessor.
+ if (BasicBlock *PB = BB->getSinglePredecessor())
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
+ if (PBI->isConditional() &&
+ (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
+ BI->getSuccessor(0) == PBI->getSuccessor(1))) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ) {
+ Instruction *Curr = I++;
+ if (isa<CmpInst>(Curr)) {
+ Cond = Curr;
+ break;
+ }
+ // Quit if we can't remove this instruction.
+ if (!checkCSEInPredecessor(Curr, PB))
+ return false;
+ }
+ }
+
+ if (Cond == 0)
+ return false;
+ }
+
+ if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Only allow this if the condition is a simple instruction that can be
+ // executed unconditionally. It must be in the same block as the branch, and
+ // must be at the front of the block.
+ BasicBlock::iterator FrontIt = BB->front();
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+
+ // Allow a single instruction to be hoisted in addition to the compare
+ // that feeds the branch. We later ensure that any values that _it_ uses
+ // were also live in the predecessor, so that we don't unnecessarily create
+ // register pressure or inhibit out-of-order execution.
+ Instruction *BonusInst = 0;
+ if (&*FrontIt != Cond &&
+ FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ isSafeToSpeculativelyExecute(FrontIt)) {
+ BonusInst = &*FrontIt;
+ ++FrontIt;
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+ }
+
+ // Only a single bonus inst is allowed.
+ if (&*FrontIt != Cond)
+ return false;
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = Cond; ++CondIt;
+
+ // Ingore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+
+ if (&*CondIt != BI)
+ return false;
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ SmallVector<PHINode*, 4> PHIs;
+ if (PBI == 0 || PBI->isUnconditional() ||
+ (BI->isConditional() &&
+ !SafeToMergeTerminators(BI, PBI)) ||
+ (!BI->isConditional() &&
+ !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
+ continue;
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
+ bool InvertPredCond = false;
+
+ if (BI->isConditional()) {
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+ } else {
+ if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
+ continue;
+ }
+
+ // Ensure that any values used in the bonus instruction are also used
+ // by the terminator of the predecessor. This means that those values
+ // must already have been resolved, so we won't be inhibiting the
+ // out-of-order core by speculating them earlier.
+ if (BonusInst) {
+ // Collect the values used by the bonus inst
+ SmallPtrSet<Value*, 4> UsedValues;
+ for (Instruction::op_iterator OI = BonusInst->op_begin(),
+ OE = BonusInst->op_end(); OI != OE; ++OI) {
+ Value *V = *OI;
+ if (!isa<Constant>(V))
+ UsedValues.insert(V);
+ }
+
+ SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+ Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+
+ // Walk up to four levels back up the use-def chain of the predecessor's
+ // terminator to see if all those values were used. The choice of four
+ // levels is arbitrary, to provide a compile-time-cost bound.
+ while (!Worklist.empty()) {
+ std::pair<Value*, unsigned> Pair = Worklist.back();
+ Worklist.pop_back();
+
+ if (Pair.second >= 4) continue;
+ UsedValues.erase(Pair.first);
+ if (UsedValues.empty()) break;
+
+ if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+ }
+ }
+
+ if (!UsedValues.empty()) return false;
+ }
+
+ DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ IRBuilder<> Builder(PBI);
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond = Builder.CreateNot(NewCond,
+ PBI->getCondition()->getName()+".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ // If we have a bonus inst, clone it into the predecessor block.
+ Instruction *NewBonus = 0;
+ if (BonusInst) {
+ NewBonus = BonusInst->clone();
+ PredBlock->getInstList().insert(PBI, NewBonus);
+ NewBonus->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName()+".old");
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+ PredBlock->getInstList().insert(PBI, New);
+ New->takeName(Cond);
+ Cond->setName(New->getName()+".old");
+
+ if (BI->isConditional()) {
+ Instruction *NewCond =
+ cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
+ New, "or.cond"));
+ PBI->setCondition(NewCond);
+
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ } else {
+ // Update PHI nodes in the common successors.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ ConstantInt *PBI_C = cast<ConstantInt>(
+ PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
+ assert(PBI_C->getType()->isIntegerTy(1));
+ Instruction *MergedCond = 0;
+ if (PBI->getSuccessor(0) == TrueDest) {
+ // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
+ // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
+ // is false: !PBI_Cond and BI_Value
+ Instruction *NotCond =
+ cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
+ "not.cond"));
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::And,
+ NotCond, New,
+ "and.cond"));
+ if (PBI_C->isOne())
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
+ PBI->getCondition(), MergedCond,
+ "or.cond"));
+ } else {
+ // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
+ // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
+ // is false: PBI_Cond and BI_Value
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::And,
+ PBI->getCondition(), New,
+ "and.cond"));
+ if (PBI_C->isOne()) {
+ Instruction *NotCond =
+ cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
+ "not.cond"));
+ MergedCond =
+ cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
+ NotCond, MergedCond,
+ "or.cond"));
+ }
+ }
+ // Update PHI Node.
+ PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
+ MergedCond);
+ }
+ // Change PBI from Conditional to Unconditional.
+ BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
+ EraseTerminatorInstAndDCECond(PBI);
+ PBI = New_PBI;
+ }
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isa<DbgInfoIntrinsic>(*I))
+ I->clone()->insertBefore(PBI);
+
+ return true;
+ }
+ return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+ std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr",
+ BB->begin());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+ PBI != BI && PBI->isConditional() &&
+ PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue), P);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), P);
+ }
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors is a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ PBIOp = BIOp = 0;
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ PBIOp = 0, BIOp = 1;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ PBIOp = 1, BIOp = 0;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ PBIOp = BIOp = 1;
+ else
+ return false;
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ isa<PHINode>(II); ++II, ++NumPhis)
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ IRBuilder<true, NoFolder> Builder(PBI);
+ if (PBIOp)
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+
+ // Merge the conditions.
+ Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ if (PredHasWeights && SuccHasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ SmallVector<uint64_t, 2> NewWeights;
+ NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon);
+ NewWeights.push_back(PredOther * SuccOther);
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ PHINode *PN;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ Value *NV = cast<SelectInst>
+ (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+ PN->setIncomingValue(PBBIdx, NV);
+ }
+ }
+
+ DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight){
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+ // Then remove the rest.
+ for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = OldTerm->getSuccessor(I);
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = 0;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = 0;
+ else
+ Succ->removePredecessor(OldTerm->getParent());
+ }
+
+ IRBuilder<> Builder(OldTerm);
+ Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+ // Insert an appropriate new terminator.
+ if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ Builder.CreateBr(TrueBB);
+ else {
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext()).
+ createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (KeepEdge1 == 0)
+ // Only TrueBB was found.
+ Builder.CreateBr(TrueBB);
+ else
+ // Only FalseBB was found.
+ Builder.CreateBr(FalseBB);
+ }
+
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
+ getSuccessorIndex()];
+ FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
+ getSuccessorIndex()];
+ }
+ }
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
+ TrueWeight, FalseWeight);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
+
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
+ 0, 0);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
+ const DataLayout *TD) {
+ BasicBlock *BB = ICI->getParent();
+
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, TD)) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
+
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
+
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
+
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+ BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0]+1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+ }
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ Builder.SetInsertPoint(NewBB);
+ Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+ Builder.CreateBr(SuccBlock);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
+ IRBuilder<> &Builder) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = true;
+ Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
+
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
+ TrueWhenEqual = false;
+ }
+
+ // If we didn't have a multiply compared value, fail.
+ if (CompVal == 0) return false;
+
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just an cond branch.
+ if (ExtraCase && Values.size() < 2) return false;
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n" << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+
+ if (TrueWhenEqual)
+ Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+ else
+ Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ Builder.SetInsertPoint(BI);
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without DataLayout");
+ CompVal = Builder.CreatePtrToInt(CompVal,
+ TD->getIntPtrType(CompVal->getContext()),
+ "magicptr");
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ // If this is a trivial landing pad that just continues unwinding the caught
+ // exception then zap the landing pad, turning its invokes into calls.
+ BasicBlock *BB = RI->getParent();
+ LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ if (RI->getValue() != LPInst)
+ // Not a landing pad, or the resume is not unwinding the exception that
+ // caused control to branch here.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = LPInst, E = RI;
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ bool InvokeRequiresTableEntry = false;
+ bool Changed = false;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+ if (II->hasFnAttr(Attribute::UWTable)) {
+ // Don't remove an `invoke' instruction if the ABI requires an entry into
+ // the table.
+ InvokeRequiresTableEntry = true;
+ continue;
+ }
+
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction now uses
+ // the value produced by the call instruction. Note that we do this even
+ // for void functions and calls with no uses so that the callgraph edge is
+ // updated.
+ II->replaceAllUsesWith(Call);
+ BB->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ II->eraseFromParent();
+ Changed = true;
+ }
+
+ if (!InvokeRequiresTableEntry)
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI, Builder))
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI != BB->begin()) {
+ BasicBlock::iterator BBI = UI;
+ --BBI;
+ // Do not delete instructions that can have side effects which might cause
+ // the unreachable to not be reachable; specifically, calls and volatile
+ // operations may have this effect.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (BBI->mayHaveSideEffects()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->isVolatile())
+ break;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->isVolatile())
+ break;
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ if (RMWI->isVolatile())
+ break;
+ } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ if (CXI->isVolatile())
+ break;
+ } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+ !isa<LandingPadInst>(BBI)) {
+ break;
+ }
+ // Note that deleting LandingPad's here is in fact okay, although it
+ // involves a bit of subtle reasoning. If this inst is a LandingPad,
+ // all the predecessors of this block will be the unwind edges of Invokes,
+ // and we can therefore guarantee this block will be erased.
+ }
+
+ // Delete this instruction (any uses are guaranteed to be dead)
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI) return Changed;
+
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+ IRBuilder<> Builder(TI);
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ Builder.CreateBr(BI->getSuccessor(1));
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateBr(BI->getSuccessor(0));
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getDefaultDest() == BB) {
+ std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ std::pair<unsigned, unsigned> &entry =
+ Popularity[i.getCaseSuccessor()];
+ if (entry.first == 0) {
+ entry.first = 1;
+ entry.second = i.getCaseIndex();
+ } else {
+ entry.first++;
+ }
+ }
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ unsigned MaxIndex = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second.first > MaxPop ||
+ (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+ MaxPop = I->second.first;
+ MaxIndex = I->second.second;
+ MaxBlock = I->first;
+ }
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setDefaultDest(MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == MaxBlock) {
+ SI->removeCase(i);
+ --i; --e;
+ }
+ }
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ Builder.SetInsertPoint(BI);
+ CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+ Args, II->getName());
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Make sure all cases point to the same destination and gather the values.
+ SmallVector<ConstantInt *, 16> Cases;
+ SwitchInst::CaseIt I = SI->case_begin();
+ Cases.push_back(I.getCaseValue());
+ SwitchInst::CaseIt PrevI = I++;
+ for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+ if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
+ return false;
+ Cases.push_back(I.getCaseValue());
+ }
+ assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
+
+ // Sort the case values, then check if they form a range we can transform.
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+ return false;
+ }
+
+ Constant *Offset = ConstantExpr::getNeg(Cases.back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+ Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ BranchInst *NewBI = Builder.CreateCondBr(
+ Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+
+ // Update weight for the newly-created conditional branch.
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Combine all weights for the cases to be the true weight of NewBI.
+ // We assume that the sum of all weights for a Terminator can fit into 32
+ // bits.
+ uint32_t NewTrueWeight = 0;
+ for (unsigned I = 1, E = Weights.size(); I != E; ++I)
+ NewTrueWeight += (uint32_t)Weights[I];
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(NewTrueWeight,
+ (uint32_t)Weights[0]));
+ }
+ }
+
+ // Prune obsolete incoming values off the successor's PHI nodes.
+ for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ return true;
+}
+
+/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+ APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
+ ComputeMaskedBits(Cond, KnownZero, KnownOne);
+
+ // Gather dead cases.
+ SmallVector<ConstantInt*, 8> DeadCases;
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+ (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+ DeadCases.push_back(I.getCaseValue());
+ DEBUG(dbgs() << "SimplifyCFG: switch case '"
+ << I.getCaseValue() << "' is dead.\n");
+ }
+ }
+
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
+ // Remove dead cases from the switch.
+ for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
+ SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+ assert(Case != SI->case_default() &&
+ "Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+
+ // Prune unused values from PHI nodes.
+ Case.getCaseSuccessor()->removePredecessor(SI->getParent());
+ SI->removeCase(Case);
+ }
+ if (HasWeight) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
+ return !DeadCases.empty();
+}
+
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+ BasicBlock *BB,
+ int *PhiIndex) {
+ if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+ return NULL; // BB must be empty to be a candidate for simplification.
+ if (!BB->getSinglePredecessor())
+ return NULL; // BB must be dominated by the switch.
+
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Branch || !Branch->isUnconditional())
+ return NULL; // Terminator must be unconditional branch.
+
+ BasicBlock *Succ = Branch->getSuccessor(0);
+
+ BasicBlock::iterator I = Succ->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+ Value *InValue = PHI->getIncomingValue(Idx);
+ if (InValue != CaseValue) continue;
+
+ *PhiIndex = Idx;
+ return PHI;
+ }
+
+ return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+ typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+ ForwardingNodesMap ForwardingNodes;
+
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ ConstantInt *CaseValue = I.getCaseValue();
+ BasicBlock *CaseDest = I.getCaseSuccessor();
+
+ int PhiIndex;
+ PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+ &PhiIndex);
+ if (!PHI) continue;
+
+ ForwardingNodes[PHI].push_back(PhiIndex);
+ }
+
+ bool Changed = false;
+
+ for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+ E = ForwardingNodes.end(); I != E; ++I) {
+ PHINode *Phi = I->first;
+ SmallVector<int,4> &Indexes = I->second;
+
+ if (Indexes.size() < 2) continue;
+
+ for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+ Phi->setIncomingValue(Indexes[I], SI->getCondition());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *LookupConstant(Value *V,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *ConstantFold(Instruction *I,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::get(BO->getOpcode(), A, B);
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
+ }
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return 0;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return 0;
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
+ }
+
+ return 0;
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value*, Constant*> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1)
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+ ConstantPool.insert(std::make_pair(I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
+ ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Note: If the constant comes from constant-propagating the case value
+ // through the CaseDest basic block, it will be safe to remove the
+ // instructions in that block. They cannot be used (except in the phi nodes
+ // we visit) outside CaseDest, because that block does not dominate its
+ // successor. If it did, we would not be in this phi node.
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD)
+ : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ // Build up the table contents.
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = 0;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = 0;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const TargetTransformInfo &TTI,
+ const DataLayout *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+
+ bool AllTablesFitInRegister = true;
+ bool HasIllegalType = false;
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ Type *Ty = I->second;
+
+ // Saturate this flag to true.
+ HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+
+ // Saturate this flag to false.
+ AllTablesFitInRegister = AllTablesFitInRegister &&
+ SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+
+ // If both flags saturate, we're done. NOTE: This *only* works with
+ // saturating flags, and all flags have to saturate first due to the
+ // non-deterministic behavior of iterating over a dense map.
+ if (HasIllegalType && !AllTablesFitInRegister)
+ break;
+ }
+
+ // If each table would fit in a register, we should build it anyway.
+ if (AllTablesFitInRegister)
+ return true;
+
+ // Don't build a table that doesn't fit in-register if it has illegal types.
+ if (HasIllegalType)
+ return false;
+
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ return SI->getNumCases() * 10 >= TableSize * 4;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder,
+ const TargetTransformInfo &TTI,
+ const DataLayout* TD) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Only build lookup table when we have a target that supports it.
+ if (!TTI.shouldBuildLookupTables())
+ return false;
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases is too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = 0;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+ *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+ BasicBlock *BB = SI->getParent();
+
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // Remove unreachable cases.
+ if (EliminateDeadSwitchCases(SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ if (ForwardSwitchConditionToPHI(SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ if (SwitchToLookupTable(SI, Builder, TTI, TD))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+ BasicBlock *BB = BI->getParent();
+
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() &&
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
+ return true;
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and our successor, fold the comparison into the
+ // predecessor and use logical operations to update the incoming value
+ // for PHI nodes in common successor.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ }
+
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+ return true;
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the comparison into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (HoistThenElseCodeToIf(BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ return SimplifyCFG(BB, TTI, TD) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, TD))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB, TTI, TD) | true;
+
+ return false;
+}
+
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (I->use_empty())
+ return false;
+
+ if (C->isNullValue()) {
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->use_begin();
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+ if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I)
+ return passingValueIsAlwaysUndefined(V, GEP);
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer dereference), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+ for (BasicBlock::iterator i = BB->begin();
+ PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+ TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(PHI->getIncomingBlock(i));
+ // Turn uncoditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
+ BI->getSuccessor(0));
+ BI->eraseFromParent();
+ return true;
+ }
+ // TODO: SwitchInst.
+ }
+
+ return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB, true);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Check for and remove branches that will always cause undefined behavior.
+ Changed |= removeUndefIntroducingPredecessor(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ IRBuilder<> Builder(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TD);
+
+ Builder.SetInsertPoint(BB->getTerminator());
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI, Builder)) return true;
+ } else {
+ if (SimplifyCondBranch(BI, Builder)) return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI, Builder)) return true;
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder)) return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI, Builder)) return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI)) return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI)) return true;
+ }
+
+ return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG. For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ const DataLayout *TD) {
+ return SimplifyCFGOpt(TTI, TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 000000000000..41c207c3d5cb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,393 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// SimplifyIndvar - This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const DataLayout *TD; // May be NULL
+
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ bool Changed;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ L(Loop),
+ LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
+ SE(SE),
+ TD(LPM->getAnalysisIfAvailable<DataLayout>()),
+ DeadInsts(Dead),
+ Changed(false) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplicitions to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ };
+}
+
+/// foldIVUser - Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = 0;
+ unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = 0;
+ switch (UseInst->getOpcode()) {
+ default:
+ return 0;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return 0;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return 0;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return 0;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt(BitWidth, 1).shl(D->getZExtValue()));
+ }
+ FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return 0;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return 0;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.push_back(IVOperand);
+ return IVSrc;
+}
+
+/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1));
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
+}
+
+/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encouters a clear optimization
+/// opportunitiy.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseOper.first == CurrIV) continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+
+ Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseOper.first, IVOperand)) {
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseOper.first, L, SE)) {
+ pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+void IVVisitor::anchor() { }
+
+/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
+{
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// simplifyLoopIVs - Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ }
+ return Changed;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..f9687e4d5890
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,100 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) {
+ const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+ for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ Instruction *I = BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..c231704414fc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,1949 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// callOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ /// ignoreCallingConv - Returns false if this transformation could possibly
+ /// change the calling convention.
+ virtual bool ignoreCallingConv() { return false; }
+
+ Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return callOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+ virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+ CallInst *CI;
+
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain strcpy. Otherwise we'll keep our
+ // strcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Value *Ret =
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, TD, TLI);
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain stpcpy. Otherwise we'll keep our
+ // stpcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+ return 0;
+ return DstEnd;
+ }
+ return 0;
+ }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+ }
+
+ Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
+ }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+ }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD, TLI);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ return 0;
+ }
+
+ // Compute the offset.
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD, TLI);
+ }
+
+ return 0;
+ }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ return 0;
+ }
+};
+
+struct StrCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
+struct StpCpyOpt: public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "stpcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+ }
+};
+
+struct StrNCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ Type *PT = FT->getParamType(0);
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+
+ return Dst;
+ }
+};
+
+struct StrLenOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (isOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
+ }
+};
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrToOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return 0;
+
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
+
+ return 0;
+ }
+};
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ return 0;
+ }
+};
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ LCS->replaceAllUsesWith(Old, Cmp);
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
+ return 0;
+ }
+};
+
+struct MemCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return 0;
+
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.size() || Len > RHSStr.size())
+ return 0;
+ // Fold the memcmp and normalize the result. This way we get consistent
+ // results across multiple platforms.
+ uint64_t Ret = 0;
+ int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ if (Cmp < 0)
+ Ret = -1;
+ else if (Cmp > 0)
+ Ret = 1;
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return 0;
+ }
+};
+
+struct MemCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemMoveOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemSetOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
+struct UnsafeFPLibCallOptimization : public LibCallOptimization {
+ bool UnsafeFPShrink;
+ UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
+ this->UnsafeFPShrink = UnsafeFPShrink;
+ }
+};
+
+struct CosOpt : public UnsafeFPLibCallOptimization {
+ CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return Ret;
+ }
+};
+
+struct PowOpt : public UnsafeFPLibCallOptimization {
+ PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return Ret;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinity correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+ Callee->getAttributes());
+ Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+ Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+ Op1, "powrecip");
+ return 0;
+ }
+};
+
+struct Exp2Opt : public UnsafeFPLibCallOptimization {
+ Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType()->isFloatTy())
+ Name = "ldexpf";
+ else if (Op->getType()->isDoubleTy())
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ return Ret;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FFSOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+ }
+};
+
+struct AbsOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+struct IsDigitOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct IsAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct ToAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct PrintFOpt : public LibCallOptimization {
+ Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct SPrintFOpt : public LibCallOptimization {
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
+ Value *IncLen = B.CreateAdd(Len,
+ ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct FPrintFOpt : public LibCallOptimization {
+ Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // All the optimizations depend on the format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+ TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ return 0;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+struct FWriteOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ }
+
+ return 0;
+ }
+};
+
+struct FPutsOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // Require two pointers. Also, we can't optimize if return value is used.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len) return 0;
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
+ }
+};
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ bool UnsafeFPShrink;
+
+ // Math library call optimizations.
+ CosOpt Cos;
+ PowOpt Pow;
+ Exp2Opt Exp2;
+public:
+ LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS,
+ bool UnsafeFPShrink = false)
+ : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ this->UnsafeFPShrink = UnsafeFPShrink;
+ }
+
+ Value *optimizeCall(CallInst *CI);
+ LibCallOptimization *lookupOptimization(CallInst *CI);
+ bool hasFloatVersion(StringRef FuncName);
+};
+
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+ LibFunc::Func Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
+
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+ // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ // Next check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return &Pow;
+ case Intrinsic::exp2:
+ return &Exp2;
+ default:
+ return 0;
+ }
+ }
+
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ switch (Func) {
+ case LibFunc::strcat:
+ return &StrCat;
+ case LibFunc::strncat:
+ return &StrNCat;
+ case LibFunc::strchr:
+ return &StrChr;
+ case LibFunc::strrchr:
+ return &StrRChr;
+ case LibFunc::strcmp:
+ return &StrCmp;
+ case LibFunc::strncmp:
+ return &StrNCmp;
+ case LibFunc::strcpy:
+ return &StrCpy;
+ case LibFunc::stpcpy:
+ return &StpCpy;
+ case LibFunc::strncpy:
+ return &StrNCpy;
+ case LibFunc::strlen:
+ return &StrLen;
+ case LibFunc::strpbrk:
+ return &StrPBrk;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return &StrTo;
+ case LibFunc::strspn:
+ return &StrSpn;
+ case LibFunc::strcspn:
+ return &StrCSpn;
+ case LibFunc::strstr:
+ return &StrStr;
+ case LibFunc::memcmp:
+ return &MemCmp;
+ case LibFunc::memcpy:
+ return &MemCpy;
+ case LibFunc::memmove:
+ return &MemMove;
+ case LibFunc::memset:
+ return &MemSet;
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return &Cos;
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return &Pow;
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return &Exp2;
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return &FFS;
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return &Abs;
+ case LibFunc::isdigit:
+ return &IsDigit;
+ case LibFunc::isascii:
+ return &IsAscii;
+ case LibFunc::toascii:
+ return &ToAscii;
+ case LibFunc::printf:
+ return &PrintF;
+ case LibFunc::sprintf:
+ return &SPrintF;
+ case LibFunc::fprintf:
+ return &FPrintF;
+ case LibFunc::fwrite:
+ return &FWrite;
+ case LibFunc::fputs:
+ return &FPuts;
+ case LibFunc::puts:
+ return &Puts;
+ case LibFunc::ceil:
+ case LibFunc::fabs:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return &UnaryDoubleFP;
+ return 0;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::sqrt:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return &UnsafeUnaryDoubleFP;
+ return 0;
+ case LibFunc::memcpy_chk:
+ return &MemCpyChk;
+ default:
+ return 0;
+ }
+ }
+
+ // Finally check for fortified library calls.
+ if (FuncName.endswith("_chk")) {
+ if (FuncName == "__memmove_chk")
+ return &MemMoveChk;
+ else if (FuncName == "__memset_chk")
+ return &MemSetChk;
+ else if (FuncName == "__strcpy_chk")
+ return &StrCpyChk;
+ else if (FuncName == "__stpcpy_chk")
+ return &StpCpyChk;
+ else if (FuncName == "__strncpy_chk")
+ return &StrNCpyChk;
+ else if (FuncName == "__stpncpy_chk")
+ return &StrNCpyChk;
+ }
+
+ return 0;
+
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+ LibCallOptimization *LCO = lookupOptimization(CI);
+ if (LCO) {
+ IRBuilder<> Builder(CI);
+ return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+ }
+ return 0;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool UnsafeFPShrink) {
+ Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+ delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
+ return Impl->optimizeCall(CI);
+}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+ I->replaceAllUsesWith(With);
+ I->eraseFromParent();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 000000000000..560f58160753
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,122 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (isa<ReturnInst>(I->getTerminator()))
+ ReturningBlocks.push_back(I);
+ else if (isa<UnreachableInst>(I->getTerminator()))
+ UnreachableBlocks.push_back(I);
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = 0;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+ E = UnreachableBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = 0;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = 0;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+ E = ReturningBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..5812d4607dfc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,38 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokePass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializePromotePassPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+ initializeMetaRenamerPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 000000000000..b5941bdf2411
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,215 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper) {
+ ValueToValueMapTy::iterator I = VM.find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != VM.end() && I->second) return I->second;
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V) || isa<MDString>(V))
+ return VM[V] = const_cast<Value*>(V);
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ // Inline asm may need *type* remapping.
+ FunctionType *NewTy = IA->getFunctionType();
+ if (TypeMapper) {
+ NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+ if (NewTy != IA->getFunctionType())
+ V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+ IA->hasSideEffects(), IA->isAlignStack());
+ }
+
+ return VM[V] = const_cast<Value*>(V);
+ }
+
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value*>(V);
+
+ // Create a dummy node in case we have a metadata cycle.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ VM[V] = Dummy;
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (OP == 0) continue;
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_OP == OP ||
+ (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+ continue;
+
+ // Ok, at least one operand needs remapping.
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i) {
+ Value *Op = MD->getOperand(i);
+ if (Op == 0)
+ Elts.push_back(0);
+ else {
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+ Mapped_Op = Op;
+ Elts.push_back(Mapped_Op);
+ }
+ }
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts);
+ Dummy->replaceAllUsesWith(NewMD);
+ VM[V] = NewMD;
+ MDNode::deleteTemporary(Dummy);
+ return NewMD;
+ }
+
+ VM[V] = const_cast<Value*>(V);
+ MDNode::deleteTemporary(Dummy);
+
+ // No operands needed remapping. Use an identity mapping.
+ return const_cast<Value*>(V);
+ }
+
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (C == 0)
+ return 0;
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Function *F =
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+ Flags, TypeMapper));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+ }
+
+ // Otherwise, we have some other constant to remap. Start by checking to see
+ // if all operands have an identity remapping.
+ unsigned OpNo = 0, NumOperands = C->getNumOperands();
+ Value *Mapped = 0;
+ for (; OpNo != NumOperands; ++OpNo) {
+ Value *Op = C->getOperand(OpNo);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper);
+ if (Mapped != C) break;
+ }
+
+ // See if the type mapper wants to remap the type as well.
+ Type *NewTy = C->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+
+ // If the result type and all operands match up, then just insert an identity
+ // mapping.
+ if (OpNo == NumOperands && NewTy == C->getType())
+ return VM[V] = C;
+
+ // Okay, we need to create a new constant. We've already processed some or
+ // all of the operands, set them all up now.
+ SmallVector<Constant*, 8> Ops;
+ Ops.reserve(NumOperands);
+ for (unsigned j = 0; j != OpNo; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(j)));
+
+ // If one of the operands mismatch, push it and the other mapped operands.
+ if (OpNo != NumOperands) {
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++OpNo; OpNo != NumOperands; ++OpNo)
+ Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+ Flags, TypeMapper));
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return VM[V] = CE->getWithOperands(Ops, NewTy);
+ if (isa<ConstantArray>(C))
+ return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ if (isa<ConstantStruct>(C))
+ return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ if (isa<ConstantVector>(C))
+ return VM[V] = ConstantVector::get(Ops);
+ // If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<UndefValue>(C))
+ return VM[V] = UndefValue::get(NewTy);
+ if (isa<ConstantAggregateZero>(C))
+ return VM[V] = ConstantAggregateZero::get(NewTy);
+ assert(isa<ConstantPointerNull>(C));
+ return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+ // Remap operands.
+ for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ *op = V;
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced value not in value map!");
+ }
+
+ // Remap phi nodes' incoming blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ PN->setIncomingBlock(i, cast<BasicBlock>(V));
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced block not in value map!");
+ }
+ }
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ MDNode *Old = MI->second;
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+ if (New != Old)
+ I->setMetadata(MI->first, New);
+ }
+
+ // If the instruction's type is being remapped, do so now.
+ if (TypeMapper)
+ I->mutateType(TypeMapper->remapType(I->getType()));
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
new file mode 100644
index 000000000000..17900dabbefe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -0,0 +1,3183 @@
+//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic-block vectorization pass. The algorithm was
+// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
+// et al. It works by looking for chains of pairable operations and then
+// pairing them.
+//
+//===----------------------------------------------------------------------===//
+
+#define BBV_NAME "bb-vectorize"
+#define DEBUG_TYPE BBV_NAME
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
+ cl::Hidden, cl::desc("Ignore target information"));
+
+static cl::opt<unsigned>
+ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
+ cl::desc("The required chain depth for vectorization"));
+
+static cl::opt<bool>
+UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false),
+ cl::Hidden, cl::desc("Use the chain depth requirement with"
+ " target information"));
+
+static cl::opt<unsigned>
+SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
+ cl::desc("The maximum search distance for instruction pairs"));
+
+static cl::opt<bool>
+SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
+ cl::desc("Replicating one element to a pair breaks the chain"));
+
+static cl::opt<unsigned>
+VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
+ cl::desc("The size of the native vector registers"));
+
+static cl::opt<unsigned>
+MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
+ cl::desc("The maximum number of pairing iterations"));
+
+static cl::opt<bool>
+Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to form non-2^n-length vectors"));
+
+static cl::opt<unsigned>
+MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
+ cl::desc("The maximum number of pairable instructions per group"));
+
+static cl::opt<unsigned>
+MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden,
+ cl::desc("The maximum number of candidate instruction pairs per group"));
+
+static cl::opt<unsigned>
+MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
+ cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
+ " a full cycle check"));
+
+static cl::opt<bool>
+NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize boolean (i1) values"));
+
+static cl::opt<bool>
+NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize integer values"));
+
+static cl::opt<bool>
+NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point values"));
+
+// FIXME: This should default to false once pointer vector support works.
+static cl::opt<bool>
+NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
+ cl::desc("Don't try to vectorize pointer values"));
+
+static cl::opt<bool>
+NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize casting (conversion) operations"));
+
+static cl::opt<bool>
+NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point math intrinsics"));
+
+static cl::opt<bool>
+NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
+
+static cl::opt<bool>
+NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize select instructions"));
+
+static cl::opt<bool>
+NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize comparison instructions"));
+
+static cl::opt<bool>
+NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize getelementptr instructions"));
+
+static cl::opt<bool>
+NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize loads and stores"));
+
+static cl::opt<bool>
+AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
+ cl::desc("Only generate aligned loads and stores"));
+
+static cl::opt<bool>
+NoMemOpBoost("bb-vectorize-no-mem-op-boost",
+ cl::init(false), cl::Hidden,
+ cl::desc("Don't boost the chain-depth contribution of loads and stores"));
+
+static cl::opt<bool>
+FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
+ cl::desc("Use a fast instruction dependency analysis"));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " instruction-examination process"));
+static cl::opt<bool>
+DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " candidate-selection process"));
+static cl::opt<bool>
+DebugPairSelection("bb-vectorize-debug-pair-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " pair-selection process"));
+static cl::opt<bool>
+DebugCycleCheck("bb-vectorize-debug-cycle-check",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " cycle-checking process"));
+
+static cl::opt<bool>
+PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, dump the basic block after"
+ " every pair is fused"));
+#endif
+
+STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
+
+namespace {
+ struct BBVectorize : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ const VectorizeConfig Config;
+
+ BBVectorize(const VectorizeConfig &C = VectorizeConfig())
+ : BasicBlockPass(ID), Config(C) {
+ initializeBBVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ BBVectorize(Pass *P, const VectorizeConfig &C)
+ : BasicBlockPass(ID), Config(C) {
+ AA = &P->getAnalysis<AliasAnalysis>();
+ DT = &P->getAnalysis<DominatorTree>();
+ SE = &P->getAnalysis<ScalarEvolution>();
+ TD = P->getAnalysisIfAvailable<DataLayout>();
+ TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
+ }
+
+ typedef std::pair<Value *, Value *> ValuePair;
+ typedef std::pair<ValuePair, int> ValuePairWithCost;
+ typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
+ typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+ typedef std::pair<VPPair, unsigned> VPPairWithType;
+
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ DataLayout *TD;
+ const TargetTransformInfo *TTI;
+
+ // FIXME: const correct?
+
+ bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false);
+
+ bool getCandidatePairs(BasicBlock &BB,
+ BasicBlock::iterator &Start,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts, bool NonPow2Len);
+
+ // FIXME: The current implementation does not account for pairs that
+ // are connected in multiple ways. For example:
+ // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
+ enum PairConnectionType {
+ PairConnectionDirect,
+ PairConnectionSwap,
+ PairConnectionSplat
+ };
+
+ void computeConnectedPairs(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes);
+
+ void buildDepMap(BasicBlock &BB,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers);
+
+ void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs);
+
+ void fuseChosenPairs(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *>& ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
+
+
+ bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
+
+ bool areInstsCompatible(Instruction *I, Instruction *J,
+ bool IsSimpleLoadStore, bool NonPow2Len,
+ int &CostSavings, int &FixedOrder);
+
+ bool trackUsesOfI(DenseSet<Value *> &Users,
+ AliasSetTracker &WriteSet, Instruction *I,
+ Instruction *J, bool UpdateUsers = true,
+ DenseSet<ValuePair> *LoadMoveSetPairs = 0);
+
+ void computePairsConnectedTo(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ ValuePair P);
+
+ bool pairsConflict(ValuePair P, ValuePair Q,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> >
+ *PairableInstUserMap = 0,
+ DenseSet<VPPair> *PairableInstUserPairSet = 0);
+
+ bool pairWillFormCycle(ValuePair P,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
+ DenseSet<ValuePair> &CurrentPairs);
+
+ void pruneDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG,
+ DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+ bool UseCycleCheck);
+
+ void buildInitialDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG, ValuePair J);
+
+ void findBestDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+ int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+ bool UseCycleCheck);
+
+ Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o);
+
+ void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+ unsigned MaskOffset, unsigned NumInElem,
+ unsigned NumInElem1, unsigned IdxOffset,
+ std::vector<Constant*> &Mask);
+
+ Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
+ Instruction *J);
+
+ bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
+ unsigned o, Value *&LOp, unsigned numElemL,
+ Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
+ unsigned IdxOff = 0);
+
+ Value *getReplacementInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool IBeforeJ);
+
+ void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
+ bool IBeforeJ);
+
+ void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, Instruction *K,
+ Instruction *&InsertionPt, Instruction *&K1,
+ Instruction *&K2);
+
+ void collectPairLoadMoveSet(BasicBlock &BB,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *I);
+
+ void collectLoadMoveSet(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs);
+
+ bool canMoveUsesOfIAfterJ(BasicBlock &BB,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *I, Instruction *J);
+
+ void moveUsesOfIAfterJ(BasicBlock &BB,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *&InsertionPt,
+ Instruction *I, Instruction *J);
+
+ void combineMetadata(Instruction *K, const Instruction *J);
+
+ bool vectorizeBB(BasicBlock &BB) {
+ if (!DT->isReachableFromEntry(&BB)) {
+ DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
+ " in " << BB.getParent()->getName() << "\n");
+ return false;
+ }
+
+ DEBUG(if (TTI) dbgs() << "BBV: using target information\n");
+
+ bool changed = false;
+ // Iterate a sufficient number of times to merge types of size 1 bit,
+ // then 2 bits, then 4, etc. up to half of the target vector width of the
+ // target vector register.
+ unsigned n = 1;
+ for (unsigned v = 2;
+ (TTI || v <= Config.VectorBits) &&
+ (!Config.MaxIter || n <= Config.MaxIter);
+ v *= 2, ++n) {
+ DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+ " for " << BB.getName() << " in " <<
+ BB.getParent()->getName() << "...\n");
+ if (vectorizePairs(BB))
+ changed = true;
+ else
+ break;
+ }
+
+ if (changed && !Pow2LenOnly) {
+ ++n;
+ for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
+ DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " <<
+ n << " for " << BB.getName() << " in " <<
+ BB.getParent()->getName() << "...\n");
+ if (!vectorizePairs(BB, true)) break;
+ }
+ }
+
+ DEBUG(dbgs() << "BBV: done!\n");
+ return changed;
+ }
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
+
+ return vectorizeBB(BB);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ BasicBlockPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.setPreservesCFG();
+ }
+
+ static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) {
+ assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() &&
+ "Cannot form vector from incompatible scalar types");
+ Type *STy = ElemTy->getScalarType();
+
+ unsigned numElem;
+ if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
+ numElem = VTy->getNumElements();
+ } else {
+ numElem = 1;
+ }
+
+ if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
+ numElem += VTy->getNumElements();
+ } else {
+ numElem += 1;
+ }
+
+ return VectorType::get(STy, numElem);
+ }
+
+ static inline void getInstructionTypes(Instruction *I,
+ Type *&T1, Type *&T2) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // For stores, it is the value type, not the pointer type that matters
+ // because the value is what will come from a vector register.
+
+ Value *IVal = SI->getValueOperand();
+ T1 = IVal->getType();
+ } else {
+ T1 = I->getType();
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I))
+ T2 = CI->getSrcTy();
+ else
+ T2 = T1;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ T2 = SI->getCondition()->getType();
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ T2 = SI->getOperand(0)->getType();
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ T2 = CI->getOperand(0)->getType();
+ }
+ }
+
+ // Returns the weight associated with the provided value. A chain of
+ // candidate pairs has a length given by the sum of the weights of its
+ // members (one weight per pair; the weight of each member of the pair
+ // is assumed to be the same). This length is then compared to the
+ // chain-length threshold to determine if a given chain is significant
+ // enough to be vectorized. The length is also used in comparing
+ // candidate chains where longer chains are considered to be better.
+ // Note: when this function returns 0, the resulting instructions are
+ // not actually fused.
+ inline size_t getDepthFactor(Value *V) {
+ // InsertElement and ExtractElement have a depth factor of zero. This is
+ // for two reasons: First, they cannot be usefully fused. Second, because
+ // the pass generates a lot of these, they can confuse the simple metric
+ // used to compare the dags in the next iteration. Thus, giving them a
+ // weight of zero allows the pass to essentially ignore them in
+ // subsequent iterations when looking for vectorization opportunities
+ // while still tracking dependency chains that flow through those
+ // instructions.
+ if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
+ return 0;
+
+ // Give a load or store half of the required depth so that load/store
+ // pairs will vectorize.
+ if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+ return Config.ReqChainDepth/2;
+
+ return 1;
+ }
+
+ // Returns the cost of the provided instruction using TTI.
+ // This does not handle loads and stores.
+ unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
+ switch (Opcode) {
+ default: break;
+ case Instruction::GetElementPtr:
+ // We mark this instruction as zero-cost because scalar GEPs are usually
+ // lowered to the intruction addressing mode. At the moment we don't
+ // generate vector GEPs.
+ return 0;
+ case Instruction::Br:
+ return TTI->getCFInstrCost(Opcode);
+ case Instruction::PHI:
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return TTI->getArithmeticInstrCost(Opcode, T1);
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return TTI->getCmpSelInstrCost(Opcode, T1, T2);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::ShuffleVector:
+ return TTI->getCastInstrCost(Opcode, T1, T2);
+ }
+
+ return 1;
+ }
+
+ // This determines the relative offset of two loads or stores, returning
+ // true if the offset could be determined to be some constant value.
+ // For example, if OffsetInElmts == 1, then J accesses the memory directly
+ // after I; if OffsetInElmts == -1 then I accesses the memory
+ // directly after J.
+ bool getPairPtrInfo(Instruction *I, Instruction *J,
+ Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
+ unsigned &IAddressSpace, unsigned &JAddressSpace,
+ int64_t &OffsetInElmts, bool ComputeOffset = true) {
+ OffsetInElmts = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LoadInst *LJ = cast<LoadInst>(J);
+ IPtr = LI->getPointerOperand();
+ JPtr = LJ->getPointerOperand();
+ IAlignment = LI->getAlignment();
+ JAlignment = LJ->getAlignment();
+ IAddressSpace = LI->getPointerAddressSpace();
+ JAddressSpace = LJ->getPointerAddressSpace();
+ } else {
+ StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
+ IPtr = SI->getPointerOperand();
+ JPtr = SJ->getPointerOperand();
+ IAlignment = SI->getAlignment();
+ JAlignment = SJ->getAlignment();
+ IAddressSpace = SI->getPointerAddressSpace();
+ JAddressSpace = SJ->getPointerAddressSpace();
+ }
+
+ if (!ComputeOffset)
+ return true;
+
+ const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
+ const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
+
+ // If this is a trivial offset, then we'll get something like
+ // 1*sizeof(type). With target data, which we need anyway, this will get
+ // constant folded into a number.
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
+ if (const SCEVConstant *ConstOffSCEV =
+ dyn_cast<SCEVConstant>(OffsetSCEV)) {
+ ConstantInt *IntOff = ConstOffSCEV->getValue();
+ int64_t Offset = IntOff->getSExtValue();
+
+ Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
+
+ Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ if (VTy != VTy2 && Offset < 0) {
+ int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
+ OffsetInElmts = Offset/VTy2TSS;
+ return (abs64(Offset) % VTy2TSS) == 0;
+ }
+
+ OffsetInElmts = Offset/VTyTSS;
+ return (abs64(Offset) % VTyTSS) == 0;
+ }
+
+ return false;
+ }
+
+ // Returns true if the provided CallInst represents an intrinsic that can
+ // be vectorized.
+ bool isVectorizableIntrinsic(CallInst* I) {
+ Function *F = I->getCalledFunction();
+ if (!F) return false;
+
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+ if (!IID) return false;
+
+ switch(IID) {
+ default:
+ return false;
+ case Intrinsic::sqrt:
+ case Intrinsic::powi:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ return Config.VectorizeMath;
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return Config.VectorizeFMA;
+ }
+ }
+
+ bool isPureIEChain(InsertElementInst *IE) {
+ InsertElementInst *IENext = IE;
+ do {
+ if (!isa<UndefValue>(IENext->getOperand(0)) &&
+ !isa<InsertElementInst>(IENext->getOperand(0))) {
+ return false;
+ }
+ } while ((IENext =
+ dyn_cast<InsertElementInst>(IENext->getOperand(0))));
+
+ return true;
+ }
+ };
+
+ // This function implements one vectorization iteration on the provided
+ // basic block. It returns true if the block is changed.
+ bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) {
+ bool ShouldContinue;
+ BasicBlock::iterator Start = BB.getFirstInsertionPt();
+
+ std::vector<Value *> AllPairableInsts;
+ DenseMap<Value *, Value *> AllChosenPairs;
+ DenseSet<ValuePair> AllFixedOrderPairs;
+ DenseMap<VPPair, unsigned> AllPairConnectionTypes;
+ DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs,
+ AllConnectedPairDeps;
+
+ do {
+ std::vector<Value *> PairableInsts;
+ DenseMap<Value *, std::vector<Value *> > CandidatePairs;
+ DenseSet<ValuePair> FixedOrderPairs;
+ DenseMap<ValuePair, int> CandidatePairCostSavings;
+ ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
+ FixedOrderPairs,
+ CandidatePairCostSavings,
+ PairableInsts, NonPow2Len);
+ if (PairableInsts.empty()) continue;
+
+ // Build the candidate pair set for faster lookups.
+ DenseSet<ValuePair> CandidatePairsSet;
+ for (DenseMap<Value *, std::vector<Value *> >::iterator I =
+ CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I)
+ for (std::vector<Value *>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ CandidatePairsSet.insert(ValuePair(I->first, *J));
+
+ // Now we have a map of all of the pairable instructions and we need to
+ // select the best possible pairing. A good pairing is one such that the
+ // users of the pair are also paired. This defines a (directed) forest
+ // over the pairs such that two pairs are connected iff the second pair
+ // uses the first.
+
+ // Note that it only matters that both members of the second pair use some
+ // element of the first pair (to allow for splatting).
+
+ DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs,
+ ConnectedPairDeps;
+ DenseMap<VPPair, unsigned> PairConnectionTypes;
+ computeConnectedPairs(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs, PairConnectionTypes);
+ if (ConnectedPairs.empty()) continue;
+
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
+ I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+ I != IE; ++I)
+ for (std::vector<ValuePair>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ ConnectedPairDeps[*J].push_back(I->first);
+
+ // Build the pairable-instruction dependency map
+ DenseSet<ValuePair> PairableInstUsers;
+ buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
+
+ // There is now a graph of the connected pairs. For each variable, pick
+ // the pairing with the largest dag meeting the depth requirement on at
+ // least one branch. Then select all pairings that are part of that dag
+ // and remove them from the list of available pairings and pairable
+ // variables.
+
+ DenseMap<Value *, Value *> ChosenPairs;
+ choosePairs(CandidatePairs, CandidatePairsSet,
+ CandidatePairCostSavings,
+ PairableInsts, FixedOrderPairs, PairConnectionTypes,
+ ConnectedPairs, ConnectedPairDeps,
+ PairableInstUsers, ChosenPairs);
+
+ if (ChosenPairs.empty()) continue;
+ AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
+ PairableInsts.end());
+ AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
+
+ // Only for the chosen pairs, propagate information on fixed-order pairs,
+ // pair connections, and their types to the data structures used by the
+ // pair fusion procedures.
+ for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
+ IE = ChosenPairs.end(); I != IE; ++I) {
+ if (FixedOrderPairs.count(*I))
+ AllFixedOrderPairs.insert(*I);
+ else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
+ AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
+
+ for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
+ J != IE; ++J) {
+ DenseMap<VPPair, unsigned>::iterator K =
+ PairConnectionTypes.find(VPPair(*I, *J));
+ if (K != PairConnectionTypes.end()) {
+ AllPairConnectionTypes.insert(*K);
+ } else {
+ K = PairConnectionTypes.find(VPPair(*J, *I));
+ if (K != PairConnectionTypes.end())
+ AllPairConnectionTypes.insert(*K);
+ }
+ }
+ }
+
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
+ I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+ I != IE; ++I)
+ for (std::vector<ValuePair>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ if (AllPairConnectionTypes.count(VPPair(I->first, *J))) {
+ AllConnectedPairs[I->first].push_back(*J);
+ AllConnectedPairDeps[*J].push_back(I->first);
+ }
+ } while (ShouldContinue);
+
+ if (AllChosenPairs.empty()) return false;
+ NumFusedOps += AllChosenPairs.size();
+
+ // A set of pairs has now been selected. It is now necessary to replace the
+ // paired instructions with vector instructions. For this procedure each
+ // operand must be replaced with a vector operand. This vector is formed
+ // by using build_vector on the old operands. The replaced values are then
+ // replaced with a vector_extract on the result. Subsequent optimization
+ // passes should coalesce the build/extract combinations.
+
+ fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
+ AllPairConnectionTypes,
+ AllConnectedPairs, AllConnectedPairDeps);
+
+ // It is important to cleanup here so that future iterations of this
+ // function have less work to do.
+ (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
+ return true;
+ }
+
+ // This function returns true if the provided instruction is capable of being
+ // fused into a vector instruction. This determination is based only on the
+ // type and other attributes of the instruction.
+ bool BBVectorize::isInstVectorizable(Instruction *I,
+ bool &IsSimpleLoadStore) {
+ IsSimpleLoadStore = false;
+
+ if (CallInst *C = dyn_cast<CallInst>(I)) {
+ if (!isVectorizableIntrinsic(C))
+ return false;
+ } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ // Vectorize simple loads if possbile:
+ IsSimpleLoadStore = L->isSimple();
+ if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+ return false;
+ } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ // Vectorize simple stores if possbile:
+ IsSimpleLoadStore = S->isSimple();
+ if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+ return false;
+ } else if (CastInst *C = dyn_cast<CastInst>(I)) {
+ // We can vectorize casts, but not casts of pointer types, etc.
+ if (!Config.VectorizeCasts)
+ return false;
+
+ Type *SrcTy = C->getSrcTy();
+ if (!SrcTy->isSingleValueType())
+ return false;
+
+ Type *DestTy = C->getDestTy();
+ if (!DestTy->isSingleValueType())
+ return false;
+ } else if (isa<SelectInst>(I)) {
+ if (!Config.VectorizeSelect)
+ return false;
+ } else if (isa<CmpInst>(I)) {
+ if (!Config.VectorizeCmp)
+ return false;
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
+ if (!Config.VectorizeGEP)
+ return false;
+
+ // Currently, vector GEPs exist only with one index.
+ if (G->getNumIndices() != 1)
+ return false;
+ } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
+ isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
+ return false;
+ }
+
+ // We can't vectorize memory operations without target data
+ if (TD == 0 && IsSimpleLoadStore)
+ return false;
+
+ Type *T1, *T2;
+ getInstructionTypes(I, T1, T2);
+
+ // Not every type can be vectorized...
+ if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
+ !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
+ return false;
+
+ if (T1->getScalarSizeInBits() == 1) {
+ if (!Config.VectorizeBools)
+ return false;
+ } else {
+ if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
+ return false;
+ }
+
+ if (T2->getScalarSizeInBits() == 1) {
+ if (!Config.VectorizeBools)
+ return false;
+ } else {
+ if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
+ return false;
+ }
+
+ if (!Config.VectorizeFloats
+ && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+ return false;
+
+ // Don't vectorize target-specific types.
+ if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy())
+ return false;
+ if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
+ return false;
+
+ if ((!Config.VectorizePointers || TD == 0) &&
+ (T1->getScalarType()->isPointerTy() ||
+ T2->getScalarType()->isPointerTy()))
+ return false;
+
+ if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
+ T2->getPrimitiveSizeInBits() >= Config.VectorBits))
+ return false;
+
+ return true;
+ }
+
+ // This function returns true if the two provided instructions are compatible
+ // (meaning that they can be fused into a vector instruction). This assumes
+ // that I has already been determined to be vectorizable and that J is not
+ // in the use dag of I.
+ bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
+ bool IsSimpleLoadStore, bool NonPow2Len,
+ int &CostSavings, int &FixedOrder) {
+ DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
+ " <-> " << *J << "\n");
+
+ CostSavings = 0;
+ FixedOrder = 0;
+
+ // Loads and stores can be merged if they have different alignments,
+ // but are otherwise the same.
+ if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
+ (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0)))
+ return false;
+
+ Type *IT1, *IT2, *JT1, *JT2;
+ getInstructionTypes(I, IT1, IT2);
+ getInstructionTypes(J, JT1, JT2);
+ unsigned MaxTypeBits = std::max(
+ IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
+ IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
+ if (!TTI && MaxTypeBits > Config.VectorBits)
+ return false;
+
+ // FIXME: handle addsub-type operations!
+
+ if (IsSimpleLoadStore) {
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
+ int64_t OffsetInElmts = 0;
+ if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ IAddressSpace, JAddressSpace,
+ OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+ FixedOrder = (int) OffsetInElmts;
+ unsigned BottomAlignment = IAlignment;
+ if (OffsetInElmts < 0) BottomAlignment = JAlignment;
+
+ Type *aTypeI = isa<StoreInst>(I) ?
+ cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+ Type *aTypeJ = isa<StoreInst>(J) ?
+ cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+ Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
+
+ if (Config.AlignedOnly) {
+ // An aligned load or store is possible only if the instruction
+ // with the lower offset has an alignment suitable for the
+ // vector type.
+
+ unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
+ if (BottomAlignment < VecAlignment)
+ return false;
+ }
+
+ if (TTI) {
+ unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI,
+ IAlignment, IAddressSpace);
+ unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ,
+ JAlignment, JAddressSpace);
+ unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
+ BottomAlignment,
+ IAddressSpace);
+
+ ICost += TTI->getAddressComputationCost(aTypeI);
+ JCost += TTI->getAddressComputationCost(aTypeJ);
+ VCost += TTI->getAddressComputationCost(VType);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned VParts = TTI->getNumberOfParts(VType);
+ if (VParts > 1)
+ return false;
+ else if (!VParts && VCost == ICost + JCost)
+ return false;
+
+ CostSavings = ICost + JCost - VCost;
+ }
+ } else {
+ return false;
+ }
+ } else if (TTI) {
+ unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
+ unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
+ Type *VT1 = getVecTypeForPair(IT1, JT1),
+ *VT2 = getVecTypeForPair(IT2, JT2);
+
+ // Note that this procedure is incorrect for insert and extract element
+ // instructions (because combining these often results in a shuffle),
+ // but this cost is ignored (because insert and extract element
+ // instructions are assigned a zero depth factor and are not really
+ // fused in general).
+ unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned VParts1 = TTI->getNumberOfParts(VT1),
+ VParts2 = TTI->getNumberOfParts(VT2);
+ if (VParts1 > 1 || VParts2 > 1)
+ return false;
+ else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
+ return false;
+
+ CostSavings = ICost + JCost - VCost;
+ }
+
+ // The powi intrinsic is special because only the first argument is
+ // vectorized, the second arguments must be equal.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ Function *FI;
+ if (CI && (FI = CI->getCalledFunction())) {
+ Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+ if (IID == Intrinsic::powi) {
+ Value *A1I = CI->getArgOperand(1),
+ *A1J = cast<CallInst>(J)->getArgOperand(1);
+ const SCEV *A1ISCEV = SE->getSCEV(A1I),
+ *A1JSCEV = SE->getSCEV(A1J);
+ return (A1ISCEV == A1JSCEV);
+ }
+
+ if (IID && TTI) {
+ SmallVector<Type*, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
+
+ Tys.clear();
+ CallInst *CJ = cast<CallInst>(J);
+ for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CJ->getArgOperand(i)->getType());
+ unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
+
+ Tys.clear();
+ assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
+ "Intrinsic argument counts differ");
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (IID == Intrinsic::powi && i == 1)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ else
+ Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
+ CJ->getArgOperand(i)->getType()));
+ }
+
+ Type *RetTy = getVecTypeForPair(IT1, JT1);
+ unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned RetParts = TTI->getNumberOfParts(RetTy);
+ if (RetParts > 1)
+ return false;
+ else if (!RetParts && VCost == ICost + JCost)
+ return false;
+
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (!Tys[i]->isVectorTy())
+ continue;
+
+ unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
+ if (NumParts > 1)
+ return false;
+ else if (!NumParts && VCost == ICost + JCost)
+ return false;
+ }
+
+ CostSavings = ICost + JCost - VCost;
+ }
+ }
+
+ return true;
+ }
+
+ // Figure out whether or not J uses I and update the users and write-set
+ // structures associated with I. Specifically, Users represents the set of
+ // instructions that depend on I. WriteSet represents the set
+ // of memory locations that are dependent on I. If UpdateUsers is true,
+ // and J uses I, then Users is updated to contain J and WriteSet is updated
+ // to contain any memory locations to which J writes. The function returns
+ // true if J uses I. By default, alias analysis is used to determine
+ // whether J reads from memory that overlaps with a location in WriteSet.
+ // If LoadMoveSet is not null, then it is a previously-computed map
+ // where the key is the memory-based user instruction and the value is
+ // the instruction to be compared with I. So, if LoadMoveSet is provided,
+ // then the alias analysis is not used. This is necessary because this
+ // function is called during the process of moving instructions during
+ // vectorization and the results of the alias analysis are not stable during
+ // that process.
+ bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
+ AliasSetTracker &WriteSet, Instruction *I,
+ Instruction *J, bool UpdateUsers,
+ DenseSet<ValuePair> *LoadMoveSetPairs) {
+ bool UsesI = false;
+
+ // This instruction may already be marked as a user due, for example, to
+ // being a member of a selected pair.
+ if (Users.count(J))
+ UsesI = true;
+
+ if (!UsesI)
+ for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
+ JU != JE; ++JU) {
+ Value *V = *JU;
+ if (I == V || Users.count(V)) {
+ UsesI = true;
+ break;
+ }
+ }
+ if (!UsesI && J->mayReadFromMemory()) {
+ if (LoadMoveSetPairs) {
+ UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
+ } else {
+ for (AliasSetTracker::iterator W = WriteSet.begin(),
+ WE = WriteSet.end(); W != WE; ++W) {
+ if (W->aliasesUnknownInst(J, *AA)) {
+ UsesI = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (UsesI && UpdateUsers) {
+ if (J->mayWriteToMemory()) WriteSet.add(J);
+ Users.insert(J);
+ }
+
+ return UsesI;
+ }
+
+ // This function iterates over all instruction pairs in the provided
+ // basic block and collects all candidate pairs for vectorization.
+ bool BBVectorize::getCandidatePairs(BasicBlock &BB,
+ BasicBlock::iterator &Start,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts, bool NonPow2Len) {
+ size_t TotalPairs = 0;
+ BasicBlock::iterator E = BB.end();
+ if (Start == E) return false;
+
+ bool ShouldContinue = false, IAfterStart = false;
+ for (BasicBlock::iterator I = Start++; I != E; ++I) {
+ if (I == Start) IAfterStart = true;
+
+ bool IsSimpleLoadStore;
+ if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+
+ // Look for an instruction with which to pair instruction *I...
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ bool JAfterStart = IAfterStart;
+ BasicBlock::iterator J = llvm::next(I);
+ for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
+ if (J == Start) JAfterStart = true;
+
+ // Determine if J uses I, if so, exit the loop.
+ bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+ if (Config.FastDep) {
+ // Note: For this heuristic to be effective, independent operations
+ // must tend to be intermixed. This is likely to be true from some
+ // kinds of grouped loop unrolling (but not the generic LLVM pass),
+ // but otherwise may require some kind of reordering pass.
+
+ // When using fast dependency analysis,
+ // stop searching after first use:
+ if (UsesI) break;
+ } else {
+ if (UsesI) continue;
+ }
+
+ // J does not use I, and comes before the first use of I, so it can be
+ // merged with I if the instructions are compatible.
+ int CostSavings, FixedOrder;
+ if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
+ CostSavings, FixedOrder)) continue;
+
+ // J is a candidate for merging with I.
+ if (!PairableInsts.size() ||
+ PairableInsts[PairableInsts.size()-1] != I) {
+ PairableInsts.push_back(I);
+ }
+
+ CandidatePairs[I].push_back(J);
+ ++TotalPairs;
+ if (TTI)
+ CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
+ CostSavings));
+
+ if (FixedOrder == 1)
+ FixedOrderPairs.insert(ValuePair(I, J));
+ else if (FixedOrder == -1)
+ FixedOrderPairs.insert(ValuePair(J, I));
+
+ // The next call to this function must start after the last instruction
+ // selected during this invocation.
+ if (JAfterStart) {
+ Start = llvm::next(J);
+ IAfterStart = JAfterStart = false;
+ }
+
+ DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
+ << *I << " <-> " << *J << " (cost savings: " <<
+ CostSavings << ")\n");
+
+ // If we have already found too many pairs, break here and this function
+ // will be called again starting after the last instruction selected
+ // during this invocation.
+ if (PairableInsts.size() >= Config.MaxInsts ||
+ TotalPairs >= Config.MaxPairs) {
+ ShouldContinue = true;
+ break;
+ }
+ }
+
+ if (ShouldContinue)
+ break;
+ }
+
+ DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
+ << " instructions with candidate pairs\n");
+
+ return ShouldContinue;
+ }
+
+ // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
+ // it looks for pairs such that both members have an input which is an
+ // output of PI or PJ.
+ void BBVectorize::computePairsConnectedTo(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ ValuePair P) {
+ StoreInst *SI, *SJ;
+
+ // For each possible pairing for this variable, look at the uses of
+ // the first value...
+ for (Value::use_iterator I = P.first->use_begin(),
+ E = P.first->use_end(); I != E; ++I) {
+ if (isa<LoadInst>(*I)) {
+ // A pair cannot be connected to a load because the load only takes one
+ // operand (the address) and it is a scalar even after vectorization.
+ continue;
+ } else if ((SI = dyn_cast<StoreInst>(*I)) &&
+ P.first == SI->getPointerOperand()) {
+ // Similarly, a pair cannot be connected to a store through its
+ // pointer operand.
+ continue;
+ }
+
+ // For each use of the first variable, look for uses of the second
+ // variable...
+ for (Value::use_iterator J = P.second->use_begin(),
+ E2 = P.second->use_end(); J != E2; ++J) {
+ if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ P.second == SJ->getPointerOperand())
+ continue;
+
+ // Look for <I, J>:
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs[VP.first].push_back(VP.second);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
+ }
+
+ // Look for <J, I>:
+ if (CandidatePairsSet.count(ValuePair(*J, *I))) {
+ VPPair VP(P, ValuePair(*J, *I));
+ ConnectedPairs[VP.first].push_back(VP.second);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
+ }
+ }
+
+ if (Config.SplatBreaksChain) continue;
+ // Look for cases where just the first value in the pair is used by
+ // both members of another pair (splatting).
+ for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
+ if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ P.first == SJ->getPointerOperand())
+ continue;
+
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs[VP.first].push_back(VP.second);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+ }
+ }
+ }
+
+ if (Config.SplatBreaksChain) return;
+ // Look for cases where just the second value in the pair is used by
+ // both members of another pair (splatting).
+ for (Value::use_iterator I = P.second->use_begin(),
+ E = P.second->use_end(); I != E; ++I) {
+ if (isa<LoadInst>(*I))
+ continue;
+ else if ((SI = dyn_cast<StoreInst>(*I)) &&
+ P.second == SI->getPointerOperand())
+ continue;
+
+ for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
+ if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ P.second == SJ->getPointerOperand())
+ continue;
+
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs[VP.first].push_back(VP.second);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+ }
+ }
+ }
+ }
+
+ // This function figures out which pairs are connected. Two pairs are
+ // connected if some output of the first pair forms an input to both members
+ // of the second pair.
+ void BBVectorize::computeConnectedPairs(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes) {
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PE = PairableInsts.end(); PI != PE; ++PI) {
+ DenseMap<Value *, std::vector<Value *> >::iterator PP =
+ CandidatePairs.find(*PI);
+ if (PP == CandidatePairs.end())
+ continue;
+
+ for (std::vector<Value *>::iterator P = PP->second.begin(),
+ E = PP->second.end(); P != E; ++P)
+ computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs,
+ PairConnectionTypes, ValuePair(*PI, *P));
+ }
+
+ DEBUG(size_t TotalPairs = 0;
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
+ ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I)
+ TotalPairs += I->second.size();
+ dbgs() << "BBV: found " << TotalPairs
+ << " pair connections.\n");
+ }
+
+ // This function builds a set of use tuples such that <A, B> is in the set
+ // if B is in the use dag of A. If B is in the use dag of A, then B
+ // depends on the output of A.
+ void BBVectorize::buildDepMap(
+ BasicBlock &BB,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers) {
+ DenseSet<Value *> IsInPair;
+ for (DenseMap<Value *, std::vector<Value *> >::iterator C =
+ CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) {
+ IsInPair.insert(C->first);
+ IsInPair.insert(C->second.begin(), C->second.end());
+ }
+
+ // Iterate through the basic block, recording all users of each
+ // pairable instruction.
+
+ BasicBlock::iterator E = BB.end(), EL =
+ BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
+ for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
+ if (IsInPair.find(I) == IsInPair.end()) continue;
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
+ (void) trackUsesOfI(Users, WriteSet, I, J);
+
+ if (J == EL)
+ break;
+ }
+
+ for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
+ U != E; ++U) {
+ if (IsInPair.find(*U) == IsInPair.end()) continue;
+ PairableInstUsers.insert(ValuePair(I, *U));
+ }
+
+ if (I == EL)
+ break;
+ }
+ }
+
+ // Returns true if an input to pair P is an output of pair Q and also an
+ // input of pair Q is an output of pair P. If this is the case, then these
+ // two pairs cannot be simultaneously fused.
+ bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
+ DenseSet<VPPair> *PairableInstUserPairSet) {
+ // Two pairs are in conflict if they are mutual Users of eachother.
+ bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
+ PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
+ PairableInstUsers.count(ValuePair(P.second, Q.first)) ||
+ PairableInstUsers.count(ValuePair(P.second, Q.second));
+ bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) ||
+ PairableInstUsers.count(ValuePair(Q.first, P.second)) ||
+ PairableInstUsers.count(ValuePair(Q.second, P.first)) ||
+ PairableInstUsers.count(ValuePair(Q.second, P.second));
+ if (PairableInstUserMap) {
+ // FIXME: The expensive part of the cycle check is not so much the cycle
+ // check itself but this edge insertion procedure. This needs some
+ // profiling and probably a different data structure.
+ if (PUsesQ) {
+ if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
+ (*PairableInstUserMap)[Q].push_back(P);
+ }
+ if (QUsesP) {
+ if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
+ (*PairableInstUserMap)[P].push_back(Q);
+ }
+ }
+
+ return (QUsesP && PUsesQ);
+ }
+
+ // This function walks the use graph of current pairs to see if, starting
+ // from P, the walk returns to P.
+ bool BBVectorize::pairWillFormCycle(ValuePair P,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<ValuePair> &CurrentPairs) {
+ DEBUG(if (DebugCycleCheck)
+ dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
+ << *P.second << "\n");
+ // A lookup table of visisted pairs is kept because the PairableInstUserMap
+ // contains non-direct associations.
+ DenseSet<ValuePair> Visited;
+ SmallVector<ValuePair, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(P);
+ do {
+ ValuePair QTop = Q.pop_back_val();
+ Visited.insert(QTop);
+
+ DEBUG(if (DebugCycleCheck)
+ dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
+ << *QTop.second << "\n");
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ PairableInstUserMap.find(QTop);
+ if (QQ == PairableInstUserMap.end())
+ continue;
+
+ for (std::vector<ValuePair>::iterator C = QQ->second.begin(),
+ CE = QQ->second.end(); C != CE; ++C) {
+ if (*C == P) {
+ DEBUG(dbgs()
+ << "BBV: rejected to prevent non-trivial cycle formation: "
+ << QTop.first << " <-> " << C->second << "\n");
+ return true;
+ }
+
+ if (CurrentPairs.count(*C) && !Visited.count(*C))
+ Q.push_back(*C);
+ }
+ } while (!Q.empty());
+
+ return false;
+ }
+
+ // This function builds the initial dag of connected pairs with the
+ // pair J at the root.
+ void BBVectorize::buildInitialDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG, ValuePair J) {
+ // Each of these pairs is viewed as the root node of a DAG. The DAG
+ // is then walked (depth-first). As this happens, we keep track of
+ // the pairs that compose the DAG and the maximum depth of the DAG.
+ SmallVector<ValuePairWithDepth, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+ do {
+ ValuePairWithDepth QTop = Q.back();
+
+ // Push each child onto the queue:
+ bool MoreChildren = false;
+ size_t MaxChildDepth = QTop.second;
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ ConnectedPairs.find(QTop.first);
+ if (QQ != ConnectedPairs.end())
+ for (std::vector<ValuePair>::iterator k = QQ->second.begin(),
+ ke = QQ->second.end(); k != ke; ++k) {
+ // Make sure that this child pair is still a candidate:
+ if (CandidatePairsSet.count(*k)) {
+ DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k);
+ if (C == DAG.end()) {
+ size_t d = getDepthFactor(k->first);
+ Q.push_back(ValuePairWithDepth(*k, QTop.second+d));
+ MoreChildren = true;
+ } else {
+ MaxChildDepth = std::max(MaxChildDepth, C->second);
+ }
+ }
+ }
+
+ if (!MoreChildren) {
+ // Record the current pair as part of the DAG:
+ DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
+ Q.pop_back();
+ }
+ } while (!Q.empty());
+ }
+
+ // Given some initial dag, prune it by removing conflicting pairs (pairs
+ // that cannot be simultaneously chosen for vectorization).
+ void BBVectorize::pruneDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG,
+ DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+ bool UseCycleCheck) {
+ SmallVector<ValuePairWithDepth, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+ do {
+ ValuePairWithDepth QTop = Q.pop_back_val();
+ PrunedDAG.insert(QTop.first);
+
+ // Visit each child, pruning as necessary...
+ SmallVector<ValuePairWithDepth, 8> BestChildren;
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ ConnectedPairs.find(QTop.first);
+ if (QQ == ConnectedPairs.end())
+ continue;
+
+ for (std::vector<ValuePair>::iterator K = QQ->second.begin(),
+ KE = QQ->second.end(); K != KE; ++K) {
+ DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K);
+ if (C == DAG.end()) continue;
+
+ // This child is in the DAG, now we need to make sure it is the
+ // best of any conflicting children. There could be multiple
+ // conflicting children, so first, determine if we're keeping
+ // this child, then delete conflicting children as necessary.
+
+ // It is also necessary to guard against pairing-induced
+ // dependencies. Consider instructions a .. x .. y .. b
+ // such that (a,b) are to be fused and (x,y) are to be fused
+ // but a is an input to x and b is an output from y. This
+ // means that y cannot be moved after b but x must be moved
+ // after b for (a,b) to be fused. In other words, after
+ // fusing (a,b) we have y .. a/b .. x where y is an input
+ // to a/b and x is an output to a/b: x and y can no longer
+ // be legally fused. To prevent this condition, we must
+ // make sure that a child pair added to the DAG is not
+ // both an input and output of an already-selected pair.
+
+ // Pairing-induced dependencies can also form from more complicated
+ // cycles. The pair vs. pair conflicts are easy to check, and so
+ // that is done explicitly for "fast rejection", and because for
+ // child vs. child conflicts, we may prefer to keep the current
+ // pair in preference to the already-selected child.
+ DenseSet<ValuePair> CurrentPairs;
+
+ bool CanAdd = true;
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
+ = BestChildren.begin(), E2 = BestChildren.end();
+ C2 != E2; ++C2) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ if (C2->second >= C->second) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(C2->first);
+ }
+ }
+ if (!CanAdd) continue;
+
+ // Even worse, this child could conflict with another node already
+ // selected for the DAG. If that is the case, ignore this child.
+ for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(),
+ E2 = PrunedDAG.end(); T != E2; ++T) {
+ if (T->first == C->first.first ||
+ T->first == C->first.second ||
+ T->second == C->first.first ||
+ T->second == C->first.second ||
+ pairsConflict(*T, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(*T);
+ }
+ if (!CanAdd) continue;
+
+ // And check the queue too...
+ for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(),
+ E2 = Q.end(); C2 != E2; ++C2) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(C2->first);
+ }
+ if (!CanAdd) continue;
+
+ // Last but not least, check for a conflict with any of the
+ // already-chosen pairs.
+ for (DenseMap<Value *, Value *>::iterator C2 =
+ ChosenPairs.begin(), E2 = ChosenPairs.end();
+ C2 != E2; ++C2) {
+ if (pairsConflict(*C2, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(*C2);
+ }
+ if (!CanAdd) continue;
+
+ // To check for non-trivial cycles formed by the addition of the
+ // current pair we've formed a list of all relevant pairs, now use a
+ // graph walk to check for a cycle. We start from the current pair and
+ // walk the use dag to see if we again reach the current pair. If we
+ // do, then the current pair is rejected.
+
+ // FIXME: It may be more efficient to use a topological-ordering
+ // algorithm to improve the cycle check. This should be investigated.
+ if (UseCycleCheck &&
+ pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
+ continue;
+
+ // This child can be added, but we may have chosen it in preference
+ // to an already-selected child. Check for this here, and if a
+ // conflict is found, then remove the previously-selected child
+ // before adding this one in its place.
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
+ = BestChildren.begin(); C2 != BestChildren.end();) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers))
+ C2 = BestChildren.erase(C2);
+ else
+ ++C2;
+ }
+
+ BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
+ }
+
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C
+ = BestChildren.begin(), E2 = BestChildren.end();
+ C != E2; ++C) {
+ size_t DepthF = getDepthFactor(C->first.first);
+ Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
+ }
+ } while (!Q.empty());
+ }
+
+ // This function finds the best dag of mututally-compatible connected
+ // pairs, given the choice of root pairs as an iterator range.
+ void BBVectorize::findBestDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+ int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+ bool UseCycleCheck) {
+ for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
+ J != JE; ++J) {
+ ValuePair IJ(II, *J);
+ if (!CandidatePairsSet.count(IJ))
+ continue;
+
+ // Before going any further, make sure that this pair does not
+ // conflict with any already-selected pairs (see comment below
+ // near the DAG pruning for more details).
+ DenseSet<ValuePair> ChosenPairSet;
+ bool DoesConflict = false;
+ for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
+ E = ChosenPairs.end(); C != E; ++C) {
+ if (pairsConflict(*C, IJ, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ DoesConflict = true;
+ break;
+ }
+
+ ChosenPairSet.insert(*C);
+ }
+ if (DoesConflict) continue;
+
+ if (UseCycleCheck &&
+ pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
+ continue;
+
+ DenseMap<ValuePair, size_t> DAG;
+ buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs,
+ PairableInstUsers, ChosenPairs, DAG, IJ);
+
+ // Because we'll keep the child with the largest depth, the largest
+ // depth is still the same in the unpruned DAG.
+ size_t MaxDepth = DAG.lookup(IJ);
+
+ DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
+ MaxDepth << " and size " << DAG.size() << "\n");
+
+ // At this point the DAG has been constructed, but, may contain
+ // contradictory children (meaning that different children of
+ // some dag node may be attempting to fuse the same instruction).
+ // So now we walk the dag again, in the case of a conflict,
+ // keep only the child with the largest depth. To break a tie,
+ // favor the first child.
+
+ DenseSet<ValuePair> PrunedDAG;
+ pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, PairableInstUserMap,
+ PairableInstUserPairSet,
+ ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
+
+ int EffSize = 0;
+ if (TTI) {
+ DenseSet<Value *> PrunedDAGInstrs;
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S) {
+ PrunedDAGInstrs.insert(S->first);
+ PrunedDAGInstrs.insert(S->second);
+ }
+
+ // The set of pairs that have already contributed to the total cost.
+ DenseSet<ValuePair> IncomingPairs;
+
+ // If the cost model were perfect, this might not be necessary; but we
+ // need to make sure that we don't get stuck vectorizing our own
+ // shuffle chains.
+ bool HasNontrivialInsts = false;
+
+ // The node weights represent the cost savings associated with
+ // fusing the pair of instructions.
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S) {
+ if (!isa<ShuffleVectorInst>(S->first) &&
+ !isa<InsertElementInst>(S->first) &&
+ !isa<ExtractElementInst>(S->first))
+ HasNontrivialInsts = true;
+
+ bool FlipOrder = false;
+
+ if (getDepthFactor(S->first)) {
+ int ESContrib = CandidatePairCostSavings.find(*S)->second;
+ DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
+ << *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize += ESContrib;
+ }
+
+ // The edge weights contribute in a negative sense: they represent
+ // the cost of shuffles.
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS =
+ ConnectedPairDeps.find(*S);
+ if (SS != ConnectedPairDeps.end()) {
+ unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+ for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+ TE = SS->second.end(); T != TE; ++T) {
+ VPPair Q(*S, *T);
+ if (!PrunedDAG.count(Q.second))
+ continue;
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ ++NumDepsDirect;
+ else if (R->second == PairConnectionSwap)
+ ++NumDepsSwap;
+ }
+
+ // If there are more swaps than direct connections, then
+ // the pair order will be flipped during fusion. So the real
+ // number of swaps is the minimum number.
+ FlipOrder = !FixedOrderPairs.count(*S) &&
+ ((NumDepsSwap > NumDepsDirect) ||
+ FixedOrderPairs.count(ValuePair(S->second, S->first)));
+
+ for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+ TE = SS->second.end(); T != TE; ++T) {
+ VPPair Q(*S, *T);
+ if (!PrunedDAG.count(Q.second))
+ continue;
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ Type *Ty1 = Q.second.first->getType(),
+ *Ty2 = Q.second.second->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+ if ((R->second == PairConnectionDirect && FlipOrder) ||
+ (R->second == PairConnectionSwap && !FlipOrder) ||
+ R->second == PairConnectionSplat) {
+ int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+
+ if (VTy->getVectorNumElements() == 2) {
+ if (R->second == PairConnectionSplat)
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Broadcast, VTy));
+ else
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Reverse, VTy));
+ }
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *Q.second.first << " <-> " << *Q.second.second <<
+ "} -> {" <<
+ *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+ }
+
+ // Compute the cost of outgoing edges. We assume that edges outgoing
+ // to shuffles, inserts or extracts can be merged, and so contribute
+ // no additional cost.
+ if (!S->first->getType()->isVoidTy()) {
+ Type *Ty1 = S->first->getType(),
+ *Ty2 = S->second->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ bool NeedsExtraction = false;
+ for (Value::use_iterator I = S->first->use_begin(),
+ IE = S->first->use_end(); I != IE; ++I) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedDAGInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty1->isVectorTy()) {
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty1, VTy);
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
+ } else
+ ESContrib = (int) TTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 0);
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->first << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+
+ NeedsExtraction = false;
+ for (Value::use_iterator I = S->second->use_begin(),
+ IE = S->second->use_end(); I != IE; ++I) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedDAGInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty2->isVectorTy()) {
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty2, VTy);
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_ExtractSubvector, VTy,
+ Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
+ } else
+ ESContrib = (int) TTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 1);
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->second << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+
+ // Compute the cost of incoming edges.
+ if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
+ Instruction *S1 = cast<Instruction>(S->first),
+ *S2 = cast<Instruction>(S->second);
+ for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
+ Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
+
+ // Combining constants into vector constants (or small vector
+ // constants into larger ones are assumed free).
+ if (isa<Constant>(O1) && isa<Constant>(O2))
+ continue;
+
+ if (FlipOrder)
+ std::swap(O1, O2);
+
+ ValuePair VP = ValuePair(O1, O2);
+ ValuePair VPR = ValuePair(O2, O1);
+
+ // Internal edges are not handled here.
+ if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
+ continue;
+
+ Type *Ty1 = O1->getType(),
+ *Ty2 = O2->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ // Combining vector operations of the same type is also assumed
+ // folded with other operations.
+ if (Ty1 == Ty2) {
+ // If both are insert elements, then both can be widened.
+ InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
+ *IEO2 = dyn_cast<InsertElementInst>(O2);
+ if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
+ continue;
+ // If both are extract elements, and both have the same input
+ // type, then they can be replaced with a shuffle
+ ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
+ *EIO2 = dyn_cast<ExtractElementInst>(O2);
+ if (EIO1 && EIO2 &&
+ EIO1->getOperand(0)->getType() ==
+ EIO2->getOperand(0)->getType())
+ continue;
+ // If both are a shuffle with equal operand types and only two
+ // unqiue operands, then they can be replaced with a single
+ // shuffle
+ ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
+ *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
+ if (SIO1 && SIO2 &&
+ SIO1->getOperand(0)->getType() ==
+ SIO2->getOperand(0)->getType()) {
+ SmallSet<Value *, 4> SIOps;
+ SIOps.insert(SIO1->getOperand(0));
+ SIOps.insert(SIO1->getOperand(1));
+ SIOps.insert(SIO2->getOperand(0));
+ SIOps.insert(SIO2->getOperand(1));
+ if (SIOps.size() <= 2)
+ continue;
+ }
+ }
+
+ int ESContrib;
+ // This pair has already been formed.
+ if (IncomingPairs.count(VP)) {
+ continue;
+ } else if (IncomingPairs.count(VPR)) {
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+
+ if (VTy->getVectorNumElements() == 2)
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Reverse, VTy));
+ } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
+ ESContrib = (int) TTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 0);
+ ESContrib += (int) TTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 1);
+ } else if (!Ty1->isVectorTy()) {
+ // O1 needs to be inserted into a vector of size O2, and then
+ // both need to be shuffled together.
+ ESContrib = (int) TTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty2, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty2);
+ } else if (!Ty2->isVectorTy()) {
+ // O2 needs to be inserted into a vector of size O1, and then
+ // both need to be shuffled together.
+ ESContrib = (int) TTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty1, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty1);
+ } else {
+ Type *TyBig = Ty1, *TySmall = Ty2;
+ if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
+ std::swap(TyBig, TySmall);
+
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, TyBig);
+ if (TyBig != TySmall)
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ TyBig, TySmall);
+ }
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
+ << *O1 << " <-> " << *O2 << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ IncomingPairs.insert(VP);
+ }
+ }
+ }
+
+ if (!HasNontrivialInsts) {
+ DEBUG(if (DebugPairSelection) dbgs() <<
+ "\tNo non-trivial instructions in DAG;"
+ " override to zero effective size\n");
+ EffSize = 0;
+ }
+ } else {
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S)
+ EffSize += (int) getDepthFactor(S->first);
+ }
+
+ DEBUG(if (DebugPairSelection)
+ dbgs() << "BBV: found pruned DAG for pair {"
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
+ MaxDepth << " and size " << PrunedDAG.size() <<
+ " (effective size: " << EffSize << ")\n");
+ if (((TTI && !UseChainDepthWithTI) ||
+ MaxDepth >= Config.ReqChainDepth) &&
+ EffSize > 0 && EffSize > BestEffSize) {
+ BestMaxDepth = MaxDepth;
+ BestEffSize = EffSize;
+ BestDAG = PrunedDAG;
+ }
+ }
+ }
+
+ // Given the list of candidate pairs, this function selects those
+ // that will be fused into vector instructions.
+ void BBVectorize::choosePairs(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs) {
+ bool UseCycleCheck =
+ CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck;
+
+ DenseMap<Value *, std::vector<Value *> > CandidatePairs2;
+ for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(),
+ E = CandidatePairsSet.end(); I != E; ++I) {
+ std::vector<Value *> &JJ = CandidatePairs2[I->second];
+ if (JJ.empty()) JJ.reserve(32);
+ JJ.push_back(I->first);
+ }
+
+ DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap;
+ DenseSet<VPPair> PairableInstUserPairSet;
+ for (std::vector<Value *>::iterator I = PairableInsts.begin(),
+ E = PairableInsts.end(); I != E; ++I) {
+ // The number of possible pairings for this variable:
+ size_t NumChoices = CandidatePairs.lookup(*I).size();
+ if (!NumChoices) continue;
+
+ std::vector<Value *> &JJ = CandidatePairs[*I];
+
+ // The best pair to choose and its dag:
+ size_t BestMaxDepth = 0;
+ int BestEffSize = 0;
+ DenseSet<ValuePair> BestDAG;
+ findBestDAGFor(CandidatePairs, CandidatePairsSet,
+ CandidatePairCostSavings,
+ PairableInsts, FixedOrderPairs, PairConnectionTypes,
+ ConnectedPairs, ConnectedPairDeps,
+ PairableInstUsers, PairableInstUserMap,
+ PairableInstUserPairSet, ChosenPairs,
+ BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
+ UseCycleCheck);
+
+ if (BestDAG.empty())
+ continue;
+
+ // A dag has been chosen (or not) at this point. If no dag was
+ // chosen, then this instruction, I, cannot be paired (and is no longer
+ // considered).
+
+ DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: "
+ << *cast<Instruction>(*I) << "\n");
+
+ for (DenseSet<ValuePair>::iterator S = BestDAG.begin(),
+ SE2 = BestDAG.end(); S != SE2; ++S) {
+ // Insert the members of this dag into the list of chosen pairs.
+ ChosenPairs.insert(ValuePair(S->first, S->second));
+ DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
+ *S->second << "\n");
+
+ // Remove all candidate pairs that have values in the chosen dag.
+ std::vector<Value *> &KK = CandidatePairs[S->first];
+ for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end();
+ K != KE; ++K) {
+ if (*K == S->second)
+ continue;
+
+ CandidatePairsSet.erase(ValuePair(S->first, *K));
+ }
+
+ std::vector<Value *> &LL = CandidatePairs2[S->second];
+ for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end();
+ L != LE; ++L) {
+ if (*L == S->first)
+ continue;
+
+ CandidatePairsSet.erase(ValuePair(*L, S->second));
+ }
+
+ std::vector<Value *> &MM = CandidatePairs[S->second];
+ for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
+ M != ME; ++M) {
+ assert(*M != S->first && "Flipped pair in candidate list?");
+ CandidatePairsSet.erase(ValuePair(S->second, *M));
+ }
+
+ std::vector<Value *> &NN = CandidatePairs2[S->first];
+ for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end();
+ N != NE; ++N) {
+ assert(*N != S->second && "Flipped pair in candidate list?");
+ CandidatePairsSet.erase(ValuePair(*N, S->first));
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
+ }
+
+ std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
+ unsigned n = 0) {
+ if (!I->hasName())
+ return "";
+
+ return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
+ (n > 0 ? "." + utostr(n) : "")).str();
+ }
+
+ // Returns the value that is to be used as the pointer input to the vector
+ // instruction that fuses I with J.
+ Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
+ Instruction *I, Instruction *J, unsigned o) {
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
+ int64_t OffsetInElmts;
+
+ // Note: the analysis might fail here, that is why the pair order has
+ // been precomputed (OffsetInElmts must be unused here).
+ (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ IAddressSpace, JAddressSpace,
+ OffsetInElmts, false);
+
+ // The pointer value is taken to be the one with the lowest offset.
+ Value *VPtr = IPtr;
+
+ Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
+ Type *VArgPtrType = PointerType::get(VArgType,
+ cast<PointerType>(IPtr->getType())->getAddressSpace());
+ return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
+ /* insert before */ I);
+ }
+
+ void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+ unsigned MaskOffset, unsigned NumInElem,
+ unsigned NumInElem1, unsigned IdxOffset,
+ std::vector<Constant*> &Mask) {
+ unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ for (unsigned v = 0; v < NumElem1; ++v) {
+ int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
+ if (m < 0) {
+ Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
+ } else {
+ unsigned mm = m + (int) IdxOffset;
+ if (m >= (int) NumInElem1)
+ mm += (int) NumInElem;
+
+ Mask[v+MaskOffset] =
+ ConstantInt::get(Type::getInt32Ty(Context), mm);
+ }
+ }
+ }
+
+ // Returns the value that is to be used as the vector-shuffle mask to the
+ // vector instruction that fuses I with J.
+ Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
+ Instruction *I, Instruction *J) {
+ // This is the shuffle mask. We need to append the second
+ // mask to the first, and the numbers need to be adjusted.
+
+ Type *ArgTypeI = I->getType();
+ Type *ArgTypeJ = J->getType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
+
+ unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+
+ // Get the total number of elements in the fused vector type.
+ // By definition, this must equal the number of elements in
+ // the final mask.
+ unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ std::vector<Constant*> Mask(NumElem);
+
+ Type *OpTypeI = I->getOperand(0)->getType();
+ unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ Type *OpTypeJ = J->getOperand(0)->getType();
+ unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+
+ // The fused vector will be:
+ // -----------------------------------------------------
+ // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ |
+ // -----------------------------------------------------
+ // from which we'll extract NumElem total elements (where the first NumElemI
+ // of them come from the mask in I and the remainder come from the mask
+ // in J.
+
+ // For the mask from the first pair...
+ fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
+ 0, Mask);
+
+ // For the mask from the second pair...
+ fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
+ NumInElemI, Mask);
+
+ return ConstantVector::get(Mask);
+ }
+
+ bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, Value *&LOp,
+ unsigned numElemL,
+ Type *ArgTypeL, Type *ArgTypeH,
+ bool IBeforeJ, unsigned IdxOff) {
+ bool ExpandedIEChain = false;
+ if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
+ // If we have a pure insertelement chain, then this can be rewritten
+ // into a chain that directly builds the larger type.
+ if (isPureIEChain(LIE)) {
+ SmallVector<Value *, 8> VectElemts(numElemL,
+ UndefValue::get(ArgTypeL->getScalarType()));
+ InsertElementInst *LIENext = LIE;
+ do {
+ unsigned Idx =
+ cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue();
+ VectElemts[Idx] = LIENext->getOperand(1);
+ } while ((LIENext =
+ dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
+
+ LIENext = 0;
+ Value *LIEPrev = UndefValue::get(ArgTypeH);
+ for (unsigned i = 0; i < numElemL; ++i) {
+ if (isa<UndefValue>(VectElemts[i])) continue;
+ LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
+ ConstantInt::get(Type::getInt32Ty(Context),
+ i + IdxOff),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, i+1));
+ LIENext->insertBefore(IBeforeJ ? J : I);
+ LIEPrev = LIENext;
+ }
+
+ LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH);
+ ExpandedIEChain = true;
+ }
+ }
+
+ return ExpandedIEChain;
+ }
+
+ // Returns the value to be used as the specified operand of the vector
+ // instruction that fuses I with J.
+ Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool IBeforeJ) {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+ // Compute the fused vector type for this operand
+ Type *ArgTypeI = I->getOperand(o)->getType();
+ Type *ArgTypeJ = J->getOperand(o)->getType();
+ VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
+
+ Instruction *L = I, *H = J;
+ Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
+
+ unsigned numElemL;
+ if (ArgTypeL->isVectorTy())
+ numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
+ else
+ numElemL = 1;
+
+ unsigned numElemH;
+ if (ArgTypeH->isVectorTy())
+ numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
+ else
+ numElemH = 1;
+
+ Value *LOp = L->getOperand(o);
+ Value *HOp = H->getOperand(o);
+ unsigned numElem = VArgType->getNumElements();
+
+ // First, we check if we can reuse the "original" vector outputs (if these
+ // exist). We might need a shuffle.
+ ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp);
+ ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp);
+ ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp);
+ ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp);
+
+ // FIXME: If we're fusing shuffle instructions, then we can't apply this
+ // optimization. The input vectors to the shuffle might be a different
+ // length from the shuffle outputs. Unfortunately, the replacement
+ // shuffle mask has already been formed, and the mask entries are sensitive
+ // to the sizes of the inputs.
+ bool IsSizeChangeShuffle =
+ isa<ShuffleVectorInst>(L) &&
+ (LOp->getType() != L->getType() || HOp->getType() != H->getType());
+
+ if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
+ // We can have at most two unique vector inputs.
+ bool CanUseInputs = true;
+ Value *I1, *I2 = 0;
+ if (LEE) {
+ I1 = LEE->getOperand(0);
+ } else {
+ I1 = LSV->getOperand(0);
+ I2 = LSV->getOperand(1);
+ if (I2 == I1 || isa<UndefValue>(I2))
+ I2 = 0;
+ }
+
+ if (HEE) {
+ Value *I3 = HEE->getOperand(0);
+ if (!I2 && I3 != I1)
+ I2 = I3;
+ else if (I3 != I1 && I3 != I2)
+ CanUseInputs = false;
+ } else {
+ Value *I3 = HSV->getOperand(0);
+ if (!I2 && I3 != I1)
+ I2 = I3;
+ else if (I3 != I1 && I3 != I2)
+ CanUseInputs = false;
+
+ if (CanUseInputs) {
+ Value *I4 = HSV->getOperand(1);
+ if (!isa<UndefValue>(I4)) {
+ if (!I2 && I4 != I1)
+ I2 = I4;
+ else if (I4 != I1 && I4 != I2)
+ CanUseInputs = false;
+ }
+ }
+ }
+
+ if (CanUseInputs) {
+ unsigned LOpElem =
+ cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
+ ->getNumElements();
+ unsigned HOpElem =
+ cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
+ ->getNumElements();
+
+ // We have one or two input vectors. We need to map each index of the
+ // operands to the index of the original vector.
+ SmallVector<std::pair<int, int>, 8> II(numElem);
+ for (unsigned i = 0; i < numElemL; ++i) {
+ int Idx, INum;
+ if (LEE) {
+ Idx =
+ cast<ConstantInt>(LEE->getOperand(1))->getSExtValue();
+ INum = LEE->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx = LSV->getMaskValue(i);
+ if (Idx < (int) LOpElem) {
+ INum = LSV->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx -= LOpElem;
+ INum = LSV->getOperand(1) == I1 ? 0 : 1;
+ }
+ }
+
+ II[i] = std::pair<int, int>(Idx, INum);
+ }
+ for (unsigned i = 0; i < numElemH; ++i) {
+ int Idx, INum;
+ if (HEE) {
+ Idx =
+ cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
+ INum = HEE->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx = HSV->getMaskValue(i);
+ if (Idx < (int) HOpElem) {
+ INum = HSV->getOperand(0) == I1 ? 0 : 1;
+ } else {
+ Idx -= HOpElem;
+ INum = HSV->getOperand(1) == I1 ? 0 : 1;
+ }
+ }
+
+ II[i + numElemL] = std::pair<int, int>(Idx, INum);
+ }
+
+ // We now have an array which tells us from which index of which
+ // input vector each element of the operand comes.
+ VectorType *I1T = cast<VectorType>(I1->getType());
+ unsigned I1Elem = I1T->getNumElements();
+
+ if (!I2) {
+ // In this case there is only one underlying vector input. Check for
+ // the trivial case where we can use the input directly.
+ if (I1Elem == numElem) {
+ bool ElemInOrder = true;
+ for (unsigned i = 0; i < numElem; ++i) {
+ if (II[i].first != (int) i && II[i].first != -1) {
+ ElemInOrder = false;
+ break;
+ }
+ }
+
+ if (ElemInOrder)
+ return I1;
+ }
+
+ // A shuffle is needed.
+ std::vector<Constant *> Mask(numElem);
+ for (unsigned i = 0; i < numElem; ++i) {
+ int Idx = II[i].first;
+ if (Idx == -1)
+ Mask[i] = UndefValue::get(Type::getInt32Ty(Context));
+ else
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+
+ Instruction *S =
+ new ShuffleVectorInst(I1, UndefValue::get(I1T),
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o));
+ S->insertBefore(IBeforeJ ? J : I);
+ return S;
+ }
+
+ VectorType *I2T = cast<VectorType>(I2->getType());
+ unsigned I2Elem = I2T->getNumElements();
+
+ // This input comes from two distinct vectors. The first step is to
+ // make sure that both vectors are the same length. If not, the
+ // smaller one will need to grow before they can be shuffled together.
+ if (I1Elem < I2Elem) {
+ std::vector<Constant *> Mask(I2Elem);
+ unsigned v = 0;
+ for (; v < I1Elem; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < I2Elem; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ Instruction *NewI1 =
+ new ShuffleVectorInst(I1, UndefValue::get(I1T),
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ NewI1->insertBefore(IBeforeJ ? J : I);
+ I1 = NewI1;
+ I1T = I2T;
+ I1Elem = I2Elem;
+ } else if (I1Elem > I2Elem) {
+ std::vector<Constant *> Mask(I1Elem);
+ unsigned v = 0;
+ for (; v < I2Elem; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < I1Elem; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ Instruction *NewI2 =
+ new ShuffleVectorInst(I2, UndefValue::get(I2T),
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ NewI2->insertBefore(IBeforeJ ? J : I);
+ I2 = NewI2;
+ I2T = I1T;
+ I2Elem = I1Elem;
+ }
+
+ // Now that both I1 and I2 are the same length we can shuffle them
+ // together (and use the result).
+ std::vector<Constant *> Mask(numElem);
+ for (unsigned v = 0; v < numElem; ++v) {
+ if (II[v].first == -1) {
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+ } else {
+ int Idx = II[v].first + II[v].second * I1Elem;
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+ }
+
+ Instruction *NewOp =
+ new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ NewOp->insertBefore(IBeforeJ ? J : I);
+ return NewOp;
+ }
+ }
+
+ Type *ArgType = ArgTypeL;
+ if (numElemL < numElemH) {
+ if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
+ ArgTypeL, VArgType, IBeforeJ, 1)) {
+ // This is another short-circuit case: we're combining a scalar into
+ // a vector that is formed by an IE chain. We've just expanded the IE
+ // chain, now insert the scalar and we're done.
+
+ Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ S->insertBefore(IBeforeJ ? J : I);
+ return S;
+ } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
+ ArgTypeH, IBeforeJ)) {
+ // The two vector inputs to the shuffle must be the same length,
+ // so extend the smaller vector to be the same length as the larger one.
+ Instruction *NLOp;
+ if (numElemL > 1) {
+
+ std::vector<Constant *> Mask(numElemH);
+ unsigned v = 0;
+ for (; v < numElemL; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < numElemH; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ } else {
+ NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ }
+
+ NLOp->insertBefore(IBeforeJ ? J : I);
+ LOp = NLOp;
+ }
+
+ ArgType = ArgTypeH;
+ } else if (numElemL > numElemH) {
+ if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
+ ArgTypeH, VArgType, IBeforeJ)) {
+ Instruction *S =
+ InsertElementInst::Create(LOp, HOp,
+ ConstantInt::get(Type::getInt32Ty(Context),
+ numElemL),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o));
+ S->insertBefore(IBeforeJ ? J : I);
+ return S;
+ } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
+ ArgTypeL, IBeforeJ)) {
+ Instruction *NHOp;
+ if (numElemH > 1) {
+ std::vector<Constant *> Mask(numElemL);
+ unsigned v = 0;
+ for (; v < numElemH; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ for (; v < numElemL; ++v)
+ Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
+
+ NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ } else {
+ NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ }
+
+ NHOp->insertBefore(IBeforeJ ? J : I);
+ HOp = NHOp;
+ }
+ }
+
+ if (ArgType->isVectorTy()) {
+ unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ std::vector<Constant*> Mask(numElem);
+ for (unsigned v = 0; v < numElem; ++v) {
+ unsigned Idx = v;
+ // If the low vector was expanded, we need to skip the extra
+ // undefined entries.
+ if (v >= numElemL && numElemH > numElemL)
+ Idx += (numElemH - numElemL);
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
+ }
+
+ Instruction *BV = new ShuffleVectorInst(LOp, HOp,
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ BV->insertBefore(IBeforeJ ? J : I);
+ return BV;
+ }
+
+ Instruction *BV1 = InsertElementInst::Create(
+ UndefValue::get(VArgType), LOp, CV0,
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ BV1->insertBefore(IBeforeJ ? J : I);
+ Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 2));
+ BV2->insertBefore(IBeforeJ ? J : I);
+ return BV2;
+ }
+
+ // This function creates an array of values that will be used as the inputs
+ // to the vector instruction that fuses I with J.
+ void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
+ Instruction *I, Instruction *J,
+ SmallVector<Value *, 3> &ReplacedOperands,
+ bool IBeforeJ) {
+ unsigned NumOperands = I->getNumOperands();
+
+ for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
+ // Iterate backward so that we look at the store pointer
+ // first and know whether or not we need to flip the inputs.
+
+ if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
+ // This is the pointer for a load/store instruction.
+ ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
+ continue;
+ } else if (isa<CallInst>(I)) {
+ Function *F = cast<CallInst>(I)->getCalledFunction();
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+ if (o == NumOperands-1) {
+ BasicBlock &BB = *I->getParent();
+
+ Module *M = BB.getParent()->getParent();
+ Type *ArgTypeI = I->getType();
+ Type *ArgTypeJ = J->getType();
+ Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
+
+ ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
+ continue;
+ } else if (IID == Intrinsic::powi && o == 1) {
+ // The second argument of powi is a single integer and we've already
+ // checked that both arguments are equal. As a result, we just keep
+ // I's second argument.
+ ReplacedOperands[o] = I->getOperand(o);
+ continue;
+ }
+ } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
+ ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
+ continue;
+ }
+
+ ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
+ }
+ }
+
+ // This function creates two values that represent the outputs of the
+ // original I and J instructions. These are generally vector shuffles
+ // or extracts. In many cases, these will end up being unused and, thus,
+ // eliminated by later passes.
+ void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, Instruction *K,
+ Instruction *&InsertionPt,
+ Instruction *&K1, Instruction *&K2) {
+ if (isa<StoreInst>(I)) {
+ AA->replaceWithNewValue(I, K);
+ AA->replaceWithNewValue(J, K);
+ } else {
+ Type *IType = I->getType();
+ Type *JType = J->getType();
+
+ VectorType *VType = getVecTypeForPair(IType, JType);
+ unsigned numElem = VType->getNumElements();
+
+ unsigned numElemI, numElemJ;
+ if (IType->isVectorTy())
+ numElemI = cast<VectorType>(IType)->getNumElements();
+ else
+ numElemI = 1;
+
+ if (JType->isVectorTy())
+ numElemJ = cast<VectorType>(JType)->getNumElements();
+ else
+ numElemJ = 1;
+
+ if (IType->isVectorTy()) {
+ std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
+ for (unsigned v = 0; v < numElemI; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
+ }
+
+ K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get( Mask1),
+ getReplacementName(K, false, 1));
+ } else {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ K1 = ExtractElementInst::Create(K, CV0,
+ getReplacementName(K, false, 1));
+ }
+
+ if (JType->isVectorTy()) {
+ std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
+ for (unsigned v = 0; v < numElemJ; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
+ }
+
+ K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get( Mask2),
+ getReplacementName(K, false, 2));
+ } else {
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
+ K2 = ExtractElementInst::Create(K, CV1,
+ getReplacementName(K, false, 2));
+ }
+
+ K1->insertAfter(K);
+ K2->insertAfter(K1);
+ InsertionPt = K2;
+ }
+ }
+
+ // Move all uses of the function I (including pairing-induced uses) after J.
+ bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *I, Instruction *J) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (; cast<Instruction>(L) != J; ++L)
+ (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
+
+ assert(cast<Instruction>(L) == J &&
+ "Tracking has not proceeded far enough to check for dependencies");
+ // If J is now in the use set of I, then trackUsesOfI will return true
+ // and we have a dependency cycle (and the fusing operation must abort).
+ return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
+ }
+
+ // Move all uses of the function I (including pairing-induced uses) after J.
+ void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *&InsertionPt,
+ Instruction *I, Instruction *J) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (; cast<Instruction>(L) != J;) {
+ if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
+ // Move this instruction
+ Instruction *InstToMove = L; ++L;
+
+ DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
+ " to after " << *InsertionPt << "\n");
+ InstToMove->removeFromParent();
+ InstToMove->insertAfter(InsertionPt);
+ InsertionPt = InstToMove;
+ } else {
+ ++L;
+ }
+ }
+ }
+
+ // Collect all load instruction that are in the move set of a given first
+ // pair member. These loads depend on the first instruction, I, and so need
+ // to be moved after J (the second instruction) when the pair is fused.
+ void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
+ Instruction *I) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+
+ // Note: We cannot end the loop when we reach J because J could be moved
+ // farther down the use chain by another instruction pairing. Also, J
+ // could be before I if this is an inverted input.
+ for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
+ if (trackUsesOfI(Users, WriteSet, I, L)) {
+ if (L->mayReadFromMemory()) {
+ LoadMoveSet[L].push_back(I);
+ LoadMoveSetPairs.insert(ValuePair(L, I));
+ }
+ }
+ }
+ }
+
+ // In cases where both load/stores and the computation of their pointers
+ // are chosen for vectorization, we can end up in a situation where the
+ // aliasing analysis starts returning different query results as the
+ // process of fusing instruction pairs continues. Because the algorithm
+ // relies on finding the same use dags here as were found earlier, we'll
+ // need to precompute the necessary aliasing information here and then
+ // manually update it during the fusion process.
+ void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs) {
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PIE = PairableInsts.end(); PI != PIE; ++PI) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
+ if (P == ChosenPairs.end()) continue;
+
+ Instruction *I = cast<Instruction>(P->first);
+ collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
+ LoadMoveSetPairs, I);
+ }
+ }
+
+ // When the first instruction in each pair is cloned, it will inherit its
+ // parent's metadata. This metadata must be combined with that of the other
+ // instruction in a safe way.
+ void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) {
+ SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
+ K->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *JMD = J->getMetadata(Kind);
+ MDNode *KMD = Metadata[i].second;
+
+ switch (Kind) {
+ default:
+ K->setMetadata(Kind, 0); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_tbaa:
+ K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
+ break;
+ case LLVMContext::MD_fpmath:
+ K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
+ break;
+ }
+ }
+ }
+
+ // This function fuses the chosen instruction pairs into vector instructions,
+ // taking care preserve any needed scalar outputs and, then, it reorders the
+ // remaining instructions as needed (users of the first member of the pair
+ // need to be moved to after the location of the second member of the pair
+ // because the vector instruction is inserted in the location of the pair's
+ // second member).
+ void BBVectorize::fuseChosenPairs(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
+ LLVMContext& Context = BB.getContext();
+
+ // During the vectorization process, the order of the pairs to be fused
+ // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
+ // list. After a pair is fused, the flipped pair is removed from the list.
+ DenseSet<ValuePair> FlippedPairs;
+ for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
+ E = ChosenPairs.end(); P != E; ++P)
+ FlippedPairs.insert(ValuePair(P->second, P->first));
+ for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
+ E = FlippedPairs.end(); P != E; ++P)
+ ChosenPairs.insert(*P);
+
+ DenseMap<Value *, std::vector<Value *> > LoadMoveSet;
+ DenseSet<ValuePair> LoadMoveSetPairs;
+ collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
+ LoadMoveSet, LoadMoveSetPairs);
+
+ DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
+
+ for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+ if (P == ChosenPairs.end()) {
+ ++PI;
+ continue;
+ }
+
+ if (getDepthFactor(P->first) == 0) {
+ // These instructions are not really fused, but are tracked as though
+ // they are. Any case in which it would be interesting to fuse them
+ // will be taken care of by InstCombine.
+ --NumFusedOps;
+ ++PI;
+ continue;
+ }
+
+ Instruction *I = cast<Instruction>(P->first),
+ *J = cast<Instruction>(P->second);
+
+ DEBUG(dbgs() << "BBV: fusing: " << *I <<
+ " <-> " << *J << "\n");
+
+ // Remove the pair and flipped pair from the list.
+ DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
+ assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
+ ChosenPairs.erase(FP);
+ ChosenPairs.erase(P);
+
+ if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
+ DEBUG(dbgs() << "BBV: fusion of: " << *I <<
+ " <-> " << *J <<
+ " aborted because of non-trivial dependency cycle\n");
+ --NumFusedOps;
+ ++PI;
+ continue;
+ }
+
+ // If the pair must have the other order, then flip it.
+ bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
+ if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
+ // This pair does not have a fixed order, and so we might want to
+ // flip it if that will yield fewer shuffles. We count the number
+ // of dependencies connected via swaps, and those directly connected,
+ // and flip the order if the number of swaps is greater.
+ bool OrigOrder = true;
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ =
+ ConnectedPairDeps.find(ValuePair(I, J));
+ if (IJ == ConnectedPairDeps.end()) {
+ IJ = ConnectedPairDeps.find(ValuePair(J, I));
+ OrigOrder = false;
+ }
+
+ if (IJ != ConnectedPairDeps.end()) {
+ unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+ for (std::vector<ValuePair>::iterator T = IJ->second.begin(),
+ TE = IJ->second.end(); T != TE; ++T) {
+ VPPair Q(IJ->first, *T);
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ ++NumDepsDirect;
+ else if (R->second == PairConnectionSwap)
+ ++NumDepsSwap;
+ }
+
+ if (!OrigOrder)
+ std::swap(NumDepsDirect, NumDepsSwap);
+
+ if (NumDepsSwap > NumDepsDirect) {
+ FlipPairOrder = true;
+ DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
+ " <-> " << *J << "\n");
+ }
+ }
+ }
+
+ Instruction *L = I, *H = J;
+ if (FlipPairOrder)
+ std::swap(H, L);
+
+ // If the pair being fused uses the opposite order from that in the pair
+ // connection map, then we need to flip the types.
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL =
+ ConnectedPairs.find(ValuePair(H, L));
+ if (HL != ConnectedPairs.end())
+ for (std::vector<ValuePair>::iterator T = HL->second.begin(),
+ TE = HL->second.end(); T != TE; ++T) {
+ VPPair Q(HL->first, *T);
+ DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q);
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ R->second = PairConnectionSwap;
+ else if (R->second == PairConnectionSwap)
+ R->second = PairConnectionDirect;
+ }
+
+ bool LBeforeH = !FlipPairOrder;
+ unsigned NumOperands = I->getNumOperands();
+ SmallVector<Value *, 3> ReplacedOperands(NumOperands);
+ getReplacementInputsForPair(Context, L, H, ReplacedOperands,
+ LBeforeH);
+
+ // Make a copy of the original operation, change its type to the vector
+ // type and replace its operands with the vector operands.
+ Instruction *K = L->clone();
+ if (L->hasName())
+ K->takeName(L);
+ else if (H->hasName())
+ K->takeName(H);
+
+ if (!isa<StoreInst>(K))
+ K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
+
+ combineMetadata(K, H);
+ K->intersectOptionalDataWith(H);
+
+ for (unsigned o = 0; o < NumOperands; ++o)
+ K->setOperand(o, ReplacedOperands[o]);
+
+ K->insertAfter(J);
+
+ // Instruction insertion point:
+ Instruction *InsertionPt = K;
+ Instruction *K1 = 0, *K2 = 0;
+ replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
+
+ // The use dag of the first original instruction must be moved to after
+ // the location of the second instruction. The entire use dag of the
+ // first instruction is disjoint from the input dag of the second
+ // (by definition), and so commutes with it.
+
+ moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
+
+ if (!isa<StoreInst>(I)) {
+ L->replaceAllUsesWith(K1);
+ H->replaceAllUsesWith(K2);
+ AA->replaceWithNewValue(L, K1);
+ AA->replaceWithNewValue(H, K2);
+ }
+
+ // Instructions that may read from memory may be in the load move set.
+ // Once an instruction is fused, we no longer need its move set, and so
+ // the values of the map never need to be updated. However, when a load
+ // is fused, we need to merge the entries from both instructions in the
+ // pair in case those instructions were in the move set of some other
+ // yet-to-be-fused pair. The loads in question are the keys of the map.
+ if (I->mayReadFromMemory()) {
+ std::vector<ValuePair> NewSetMembers;
+ DenseMap<Value *, std::vector<Value *> >::iterator II =
+ LoadMoveSet.find(I);
+ if (II != LoadMoveSet.end())
+ for (std::vector<Value *>::iterator N = II->second.begin(),
+ NE = II->second.end(); N != NE; ++N)
+ NewSetMembers.push_back(ValuePair(K, *N));
+ DenseMap<Value *, std::vector<Value *> >::iterator JJ =
+ LoadMoveSet.find(J);
+ if (JJ != LoadMoveSet.end())
+ for (std::vector<Value *>::iterator N = JJ->second.begin(),
+ NE = JJ->second.end(); N != NE; ++N)
+ NewSetMembers.push_back(ValuePair(K, *N));
+ for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
+ AE = NewSetMembers.end(); A != AE; ++A) {
+ LoadMoveSet[A->first].push_back(A->second);
+ LoadMoveSetPairs.insert(*A);
+ }
+ }
+
+ // Before removing I, set the iterator to the next instruction.
+ PI = llvm::next(BasicBlock::iterator(I));
+ if (cast<Instruction>(PI) == J)
+ ++PI;
+
+ SE->forgetValue(I);
+ SE->forgetValue(J);
+ I->eraseFromParent();
+ J->eraseFromParent();
+
+ DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
+ BB << "\n");
+ }
+
+ DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
+ }
+}
+
+char BBVectorize::ID = 0;
+static const char bb_vectorize_name[] = "Basic-Block Vectorization";
+INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+
+BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
+ return new BBVectorize(C);
+}
+
+bool
+llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
+ BBVectorize BBVectorizer(P, C);
+ return BBVectorizer.vectorizeBB(BB);
+}
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+ VectorBits = ::VectorBits;
+ VectorizeBools = !::NoBools;
+ VectorizeInts = !::NoInts;
+ VectorizeFloats = !::NoFloats;
+ VectorizePointers = !::NoPointers;
+ VectorizeCasts = !::NoCasts;
+ VectorizeMath = !::NoMath;
+ VectorizeFMA = !::NoFMA;
+ VectorizeSelect = !::NoSelect;
+ VectorizeCmp = !::NoCmp;
+ VectorizeGEP = !::NoGEP;
+ VectorizeMemOps = !::NoMemOps;
+ AlignedOnly = ::AlignedOnly;
+ ReqChainDepth= ::ReqChainDepth;
+ SearchLimit = ::SearchLimit;
+ MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+ SplatBreaksChain = ::SplatBreaksChain;
+ MaxInsts = ::MaxInsts;
+ MaxPairs = ::MaxPairs;
+ MaxIter = ::MaxIter;
+ Pow2LenOnly = ::Pow2LenOnly;
+ NoMemOpBoost = ::NoMemOpBoost;
+ FastDep = ::FastDep;
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
new file mode 100644
index 000000000000..acf2b819b813
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -0,0 +1,3504 @@
+//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
+// and generates target-independent LLVM-IR. Legalization of the IR is done
+// in the codegen. However, the vectorizer uses (will use) the codegen
+// interfaces to generate IR that is likely to result in an optimal binary.
+//
+// The loop vectorizer combines consecutive loop iterations into a single
+// 'wide' iteration. After this transformation the index is incremented
+// by the SIMD vector width, and not by one.
+//
+// This pass has three parts:
+// 1. The main loop pass that drives the different parts.
+// 2. LoopVectorizationLegality - A unit that checks for the legality
+// of the vectorization.
+// 3. InnerLoopVectorizer - A unit that performs the actual
+// widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+// of vectorization. It decides on the optimal vector width, which
+// can be one, if vectorization is not profitable.
+//
+//===----------------------------------------------------------------------===//
+//
+// The reduction-variable vectorization is based on the paper:
+// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
+//
+// Variable uniformity checks are inspired by:
+// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+//
+// Other ideas/concepts are from:
+// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
+//
+// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of
+// Vectorizing Compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."));
+
+static cl::opt<unsigned>
+VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
+ cl::desc("Sets the vectorization unroll count. "
+ "Zero is autoselect."));
+
+static cl::opt<bool>
+EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+ cl::desc("Enable if-conversion during vectorization."));
+
+/// We don't vectorize loops with a known constant trip count below this number.
+static cl::opt<unsigned>
+TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
+ cl::Hidden,
+ cl::desc("Don't vectorize loops with a constant "
+ "trip count that is smaller than this "
+ "value."));
+
+/// We don't unroll loops with a known constant trip count below this number.
+static const unsigned TinyTripCountUnrollThreshold = 128;
+
+/// When performing a runtime memory check, do not check more than this
+/// number of pointers. Notice that the check is quadratic!
+static const unsigned RuntimeMemoryCheckThreshold = 4;
+
+/// We use a metadata with this name to indicate that a scalar loop was
+/// vectorized and that we don't need to re-vectorize it if we run into it
+/// again.
+static const char*
+AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
+
+namespace {
+
+// Forward declarations.
+class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
+
+/// InnerLoopVectorizer vectorizes loops which contain only one basic
+/// block to a specified vectorization factor (VF).
+/// This class performs the widening of scalars into vectors, or multiple
+/// scalars. This class also implements the following features:
+/// * It inserts an epilogue loop for handling loops that don't have iteration
+/// counts that are known to be a multiple of the vectorization factor.
+/// * It handles the code generation for reduction variables.
+/// * Scalarization (implementation using scalars) of un-vectorizable
+/// instructions.
+/// InnerLoopVectorizer does not perform any vectorization-legality
+/// checks, and relies on the caller to check for the different legality
+/// aspects. The InnerLoopVectorizer relies on the
+/// LoopVectorizationLegality class to provide information about the induction
+/// and reduction variables that were found to a given vectorization factor.
+class InnerLoopVectorizer {
+public:
+ InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned VecWidth,
+ unsigned UnrollFactor)
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+ VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
+ OldInduction(0), WidenMap(UnrollFactor) {}
+
+ // Perform the actual loop widening (vectorization).
+ void vectorize(LoopVectorizationLegality *Legal) {
+ // Create a new empty loop. Unlink the old loop and connect the new one.
+ createEmptyLoop(Legal);
+ // Widen each instruction in the old loop to a new one in the new loop.
+ // Use the Legality module to find the induction and reduction variables.
+ vectorizeLoop(Legal);
+ // Register the new loop and update the analysis passes.
+ updateAnalysis();
+ }
+
+private:
+ /// A small list of PHINodes.
+ typedef SmallVector<PHINode*, 4> PhiVector;
+ /// When we unroll loops we have multiple vector values for each scalar.
+ /// This data structure holds the unrolled and vectorized values that
+ /// originated from one scalar instruction.
+ typedef SmallVector<Value*, 2> VectorParts;
+
+ /// Add code that checks at runtime if the accessed arrays overlap.
+ /// Returns the comparator value or NULL if no check is needed.
+ Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
+ Instruction *Loc);
+ /// Create an empty loop, based on the loop ranges of the old loop.
+ void createEmptyLoop(LoopVectorizationLegality *Legal);
+ /// Copy and widen the instructions from the old loop.
+ void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// A helper function that computes the predicate of the block BB, assuming
+ /// that the header block of the loop is set to True. It returns the *entry*
+ /// mask for the block BB.
+ VectorParts createBlockInMask(BasicBlock *BB);
+ /// A helper function that computes the predicate of the edge between SRC
+ /// and DST.
+ VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
+ /// A helper function to vectorize a single BB within the innermost loop.
+ void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
+ PhiVector *PV);
+
+ /// Insert the new loop to the loop hierarchy and pass manager
+ /// and update the analysis passes.
+ void updateAnalysis();
+
+ /// This instruction is un-vectorizable. Implement it as a sequence
+ /// of scalars.
+ void scalarizeInstruction(Instruction *Instr);
+
+ /// Vectorize Load and Store instructions,
+ void vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal);
+
+ /// Create a broadcast instruction. This method generates a broadcast
+ /// instruction (shuffle) for loop invariant values and for the induction
+ /// value. If this is the induction variable then we extend it to N, N+1, ...
+ /// this is needed because each iteration in the loop corresponds to a SIMD
+ /// element.
+ Value *getBroadcastInstrs(Value *V);
+
+ /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
+ /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
+ /// The sequence starts at StartIndex.
+ Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
+
+ /// When we go over instructions in the basic block we rely on previous
+ /// values within the current basic block or on loop invariant values.
+ /// When we widen (vectorize) values we place them in the map. If the values
+ /// are not within the map, they have to be loop invariant, so we simply
+ /// broadcast them into a vector.
+ VectorParts &getVectorValue(Value *V);
+
+ /// Generate a shuffle sequence that will reverse the vector Vec.
+ Value *reverseVector(Value *Vec);
+
+ /// This is a helper class that holds the vectorizer state. It maps scalar
+ /// instructions to vector instructions. When the code is 'unrolled' then
+ /// then a single scalar value is mapped to multiple vector parts. The parts
+ /// are stored in the VectorPart type.
+ struct ValueMap {
+ /// C'tor. UnrollFactor controls the number of vectors ('parts') that
+ /// are mapped.
+ ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {}
+
+ /// \return True if 'Key' is saved in the Value Map.
+ bool has(Value *Key) const { return MapStorage.count(Key); }
+
+ /// Initializes a new entry in the map. Sets all of the vector parts to the
+ /// save value in 'Val'.
+ /// \return A reference to a vector with splat values.
+ VectorParts &splat(Value *Key, Value *Val) {
+ VectorParts &Entry = MapStorage[Key];
+ Entry.assign(UF, Val);
+ return Entry;
+ }
+
+ ///\return A reference to the value that is stored at 'Key'.
+ VectorParts &get(Value *Key) {
+ VectorParts &Entry = MapStorage[Key];
+ if (Entry.empty())
+ Entry.resize(UF);
+ assert(Entry.size() == UF);
+ return Entry;
+ }
+
+ private:
+ /// The unroll factor. Each entry in the map stores this number of vector
+ /// elements.
+ unsigned UF;
+
+ /// Map storage. We use std::map and not DenseMap because insertions to a
+ /// dense map invalidates its iterators.
+ std::map<Value *, VectorParts> MapStorage;
+ };
+
+ /// The original loop.
+ Loop *OrigLoop;
+ /// Scev analysis to use.
+ ScalarEvolution *SE;
+ /// Loop Info.
+ LoopInfo *LI;
+ /// Dominator Tree.
+ DominatorTree *DT;
+ /// Data Layout.
+ DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
+
+ /// The vectorization SIMD factor to use. Each vector will have this many
+ /// vector elements.
+ unsigned VF;
+ /// The vectorization unroll factor to use. Each scalar is vectorized to this
+ /// many different vector instructions.
+ unsigned UF;
+
+ /// The builder that we use
+ IRBuilder<> Builder;
+
+ // --- Vectorization state ---
+
+ /// The vector-loop preheader.
+ BasicBlock *LoopVectorPreHeader;
+ /// The scalar-loop preheader.
+ BasicBlock *LoopScalarPreHeader;
+ /// Middle Block between the vector and the scalar.
+ BasicBlock *LoopMiddleBlock;
+ ///The ExitBlock of the scalar loop.
+ BasicBlock *LoopExitBlock;
+ ///The vector loop body.
+ BasicBlock *LoopVectorBody;
+ ///The scalar loop body.
+ BasicBlock *LoopScalarBody;
+ /// A list of all bypass blocks. The first block is the entry of the loop.
+ SmallVector<BasicBlock *, 4> LoopBypassBlocks;
+
+ /// The new Induction variable which was added to the new block.
+ PHINode *Induction;
+ /// The induction variable of the old basic block.
+ PHINode *OldInduction;
+ /// Maps scalars to widened vectors.
+ ValueMap WidenMap;
+};
+
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+/// will change the order of memory accesses in a way that will change the
+/// correctness of the program.
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
+/// This class is also used by InnerLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
+ DominatorTree *DT, TargetTransformInfo* TTI,
+ AliasAnalysis *AA, TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
+ Induction(0) {}
+
+ /// This enum represents the kinds of reductions that we support.
+ enum ReductionKind {
+ RK_NoReduction, ///< Not a reduction.
+ RK_IntegerAdd, ///< Sum of integers.
+ RK_IntegerMult, ///< Product of integers.
+ RK_IntegerOr, ///< Bitwise or logical OR of numbers.
+ RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
+ RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
+ RK_FloatAdd, ///< Sum of floats.
+ RK_FloatMult ///< Product of floats.
+ };
+
+ /// This enum represents the kinds of inductions that we support.
+ enum InductionKind {
+ IK_NoInduction, ///< Not an induction variable.
+ IK_IntInduction, ///< Integer induction variable. Step = 1.
+ IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
+ IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
+ IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
+ };
+
+ /// This POD struct holds information about reduction variables.
+ struct ReductionDescriptor {
+ ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
+ Kind(RK_NoReduction) {}
+
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+
+ // The starting value of the reduction.
+ // It does not have to be zero!
+ Value *StartValue;
+ // The instruction who's value is used outside the loop.
+ Instruction *LoopExitInstr;
+ // The kind of the reduction.
+ ReductionKind Kind;
+ };
+
+ // This POD struct holds information about the memory runtime legality
+ // check that a group of pointers do not overlap.
+ struct RuntimePointerCheck {
+ RuntimePointerCheck() : Need(false) {}
+
+ /// Reset the state of the pointer runtime information.
+ void reset() {
+ Need = false;
+ Pointers.clear();
+ Starts.clear();
+ Ends.clear();
+ }
+
+ /// Insert a pointer and calculate the start and end SCEVs.
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+
+ /// This flag indicates if we need to add the runtime check.
+ bool Need;
+ /// Holds the pointers that we need to check.
+ SmallVector<Value*, 2> Pointers;
+ /// Holds the pointer value at the beginning of the loop.
+ SmallVector<const SCEV*, 2> Starts;
+ /// Holds the pointer value at the end of the loop.
+ SmallVector<const SCEV*, 2> Ends;
+ };
+
+ /// A POD for saving information about induction variables.
+ struct InductionInfo {
+ InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
+ InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
+ /// Start value.
+ Value *StartValue;
+ /// Induction kind.
+ InductionKind IK;
+ };
+
+ /// ReductionList contains the reduction descriptors for all
+ /// of the reductions that were found in the loop.
+ typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+
+ /// InductionList saves induction variables and maps them to the
+ /// induction descriptor.
+ typedef MapVector<PHINode*, InductionInfo> InductionList;
+
+ /// Alias(Multi)Map stores the values (GEPs or underlying objects and their
+ /// respective Store/Load instruction(s) to calculate aliasing.
+ typedef MapVector<Value*, Instruction* > AliasMap;
+ typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
+
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ bool canVectorize();
+
+ /// Returns the Induction variable.
+ PHINode *getInduction() { return Induction; }
+
+ /// Returns the reduction variables found in the loop.
+ ReductionList *getReductionVars() { return &Reductions; }
+
+ /// Returns the induction variables found in the loop.
+ InductionList *getInductionVars() { return &Inductions; }
+
+ /// Returns True if V is an induction variable in this loop.
+ bool isInductionVariable(const Value *V);
+
+ /// Return true if the block BB needs to be predicated in order for the loop
+ /// to be vectorized.
+ bool blockNeedsPredication(BasicBlock *BB);
+
+ /// Check if this pointer is consecutive when vectorizing. This happens
+ /// when the last index of the GEP is the induction variable, or that the
+ /// pointer itself is an induction variable.
+ /// This check allows us to vectorize A[idx] into a wide load/store.
+ /// Returns:
+ /// 0 - Stride is unknown or non consecutive.
+ /// 1 - Address is consecutive.
+ /// -1 - Address is consecutive, and decreasing.
+ int isConsecutivePtr(Value *Ptr);
+
+ /// Returns true if the value V is uniform within the loop.
+ bool isUniform(Value *V);
+
+ /// Returns true if this instruction will remain scalar after vectorization.
+ bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
+
+ /// Returns the information that we collected about runtime memory check.
+ RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+private:
+ /// Check if a single basic block loop is vectorizable.
+ /// At this point we know that this is a loop with a constant trip count
+ /// and we only need to check individual instructions.
+ bool canVectorizeInstrs();
+
+ /// When we vectorize loops we may change the order in which
+ /// we read and write from memory. This method checks if it is
+ /// legal to vectorize the code, considering only memory constrains.
+ /// Returns true if the loop is vectorizable
+ bool canVectorizeMemory();
+
+ /// Return true if we can vectorize this loop using the IF-conversion
+ /// transformation.
+ bool canVectorizeWithIfConvert();
+
+ /// Collect the variables that need to stay uniform after vectorization.
+ void collectLoopUniforms();
+
+ /// Return true if all of the instructions in the block can be speculatively
+ /// executed.
+ bool blockCanBePredicated(BasicBlock *BB);
+
+ /// Returns True, if 'Phi' is the kind of reduction variable for type
+ /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
+ bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
+ /// Returns true if the instruction I can be a reduction variable of type
+ /// 'Kind'.
+ bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns the induction kind of Phi. This function may return NoInduction
+ /// if the PHI is not an induction variable.
+ InductionKind isInductionVariable(PHINode *Phi);
+ /// Return true if can compute the address bounds of Ptr within the loop.
+ bool hasComputableBounds(Value *Ptr);
+ /// Return true if there is the chance of write reorder.
+ bool hasPossibleGlobalWriteReorder(Value *Object,
+ Instruction *Inst,
+ AliasMultiMap &WriteObjects,
+ unsigned MaxByteWidth);
+ /// Return the AA location for a load or a store.
+ AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
+
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// DataLayout analysis.
+ DataLayout *DL;
+ /// Dominators.
+ DominatorTree *DT;
+ /// Target Info.
+ TargetTransformInfo *TTI;
+ /// Alias Analysis.
+ AliasAnalysis *AA;
+ /// Target Library Info.
+ TargetLibraryInfo *TLI;
+
+ // --- vectorization state --- //
+
+ /// Holds the integer induction variable. This is the counter of the
+ /// loop.
+ PHINode *Induction;
+ /// Holds the reduction variables.
+ ReductionList Reductions;
+ /// Holds all of the induction variables that we found in the loop.
+ /// Notice that inductions don't need to start at zero and that induction
+ /// variables can be pointers.
+ InductionList Inductions;
+
+ /// Allowed outside users. This holds the reduction
+ /// vars which can be accessed from outside the loop.
+ SmallPtrSet<Value*, 4> AllowedExit;
+ /// This set holds the variables which are known to be uniform after
+ /// vectorization.
+ SmallPtrSet<Instruction*, 4> Uniforms;
+ /// We need to check that all of the pointers in this list are disjoint
+ /// at runtime.
+ RuntimePointerCheck PtrRtCheck;
+};
+
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because of
+/// a number of reasons. In this class we mainly attempt to predict the
+/// expected speedup/slowdowns due to the supported instruction set. We use the
+/// TargetTransformInfo to query the different backends for the cost of
+/// different operations.
+class LoopVectorizationCostModel {
+public:
+ LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
+ LoopVectorizationLegality *Legal,
+ const TargetTransformInfo &TTI,
+ DataLayout *DL, const TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
+
+ /// Information about vectorization costs
+ struct VectorizationFactor {
+ unsigned Width; // Vector width with best cost
+ unsigned Cost; // Cost of the loop with that width
+ };
+ /// \return The most profitable vectorization factor and the cost of that VF.
+ /// This method checks every power of two up to VF. If UserVF is not ZERO
+ /// then this vectorization factor will be selected if vectorization is
+ /// possible.
+ VectorizationFactor selectVectorizationFactor(bool OptForSize,
+ unsigned UserVF);
+
+ /// \return The size (in bits) of the widest type in the code that
+ /// needs to be vectorized. We ignore values that remain scalar such as
+ /// 64 bit loop indices.
+ unsigned getWidestType();
+
+ /// \return The most profitable unroll factor.
+ /// If UserUF is non-zero then this method finds the best unroll-factor
+ /// based on register pressure and other parameters.
+ /// VF and LoopCost are the selected vectorization factor and the cost of the
+ /// selected VF.
+ unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
+ unsigned LoopCost);
+
+ /// \brief A struct that represents some properties of the register usage
+ /// of a loop.
+ struct RegisterUsage {
+ /// Holds the number of loop invariant values that are used in the loop.
+ unsigned LoopInvariantRegs;
+ /// Holds the maximum number of concurrent live intervals in the loop.
+ unsigned MaxLocalUsers;
+ /// Holds the number of instructions in the loop.
+ unsigned NumInstructions;
+ };
+
+ /// \return information about the register usage of the loop.
+ RegisterUsage calculateRegisterUsage();
+
+private:
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width.
+ unsigned expectedCost(unsigned VF);
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+ /// A helper function for converting Scalar types to vector types.
+ /// If the incoming type is void, we return void. If the VF is 1, we return
+ /// the scalar type.
+ static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
+ /// Returns whether the instruction is a load or store and will be a emitted
+ /// as a vector operation.
+ bool isConsecutiveLoadOrStore(Instruction *I);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// Loop Info analysis.
+ LoopInfo *LI;
+ /// Vectorization legality.
+ LoopVectorizationLegality *Legal;
+ /// Vector target information.
+ const TargetTransformInfo &TTI;
+ /// Target data layout information.
+ DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
+};
+
+/// The LoopVectorize Pass.
+struct LoopVectorize : public LoopPass {
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit LoopVectorize() : LoopPass(ID) {
+ initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ LoopInfo *LI;
+ TargetTransformInfo *TTI;
+ DominatorTree *DT;
+ AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+ // We only vectorize innermost loops.
+ if (!L->empty())
+ return false;
+
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ LI = &getAnalysis<LoopInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
+ DEBUG(dbgs() << "LV: Checking a loop in \"" <<
+ L->getHeader()->getParent()->getName() << "\"\n");
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ return false;
+ }
+
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
+
+ // Check the function attributes to find out if this function should be
+ // optimized for size.
+ Function *F = L->getHeader()->getParent();
+ Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
+ Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat;
+ unsigned FnIndex = AttributeSet::FunctionIndex;
+ bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr);
+ bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr);
+
+ if (NoFloat) {
+ DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
+ "attribute is used.\n");
+ return false;
+ }
+
+ // Select the optimal vectorization factor.
+ LoopVectorizationCostModel::VectorizationFactor VF;
+ VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+ // Select the unroll factor.
+ unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll,
+ VF.Width, VF.Cost);
+
+ if (VF.Width == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
+ F->getParent()->getModuleIdentifier()<<"\n");
+ DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
+
+ // If we decided that it is *legal* to vectorize the loop then do it.
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ LB.vectorize(&LVL);
+
+ DEBUG(verifyFunction(*L->getHeader()->getParent()));
+ return true;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ LoopPass::getAnalysisUsage(AU);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ }
+
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
+// LoopVectorizationCostModel.
+//===----------------------------------------------------------------------===//
+
+void
+LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
+ Loop *Lp, Value *Ptr) {
+ const SCEV *Sc = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ assert(AR && "Invalid addrec expression");
+ const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ Pointers.push_back(Ptr);
+ Starts.push_back(AR->getStart());
+ Ends.push_back(ScEnd);
+}
+
+Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
+ // Save the current insertion location.
+ Instruction *Loc = Builder.GetInsertPoint();
+
+ // We need to place the broadcast of invariant variables outside the loop.
+ Instruction *Instr = dyn_cast<Instruction>(V);
+ bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
+ bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
+
+ // Place the code for broadcasting invariant variables in the new preheader.
+ if (Invariant)
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
+
+ // Restore the builder insertion point.
+ if (Invariant)
+ Builder.SetInsertPoint(Loc);
+
+ return Shuf;
+}
+
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+ bool Negate) {
+ assert(Val->getType()->isVectorTy() && "Must be a vector");
+ assert(Val->getType()->getScalarType()->isIntegerTy() &&
+ "Elem must be an integer");
+ // Create the types.
+ Type *ITy = Val->getType()->getScalarType();
+ VectorType *Ty = cast<VectorType>(Val->getType());
+ int VLen = Ty->getNumElements();
+ SmallVector<Constant*, 8> Indices;
+
+ // Create a vector of consecutive numbers from zero to VF.
+ for (int i = 0; i < VLen; ++i) {
+ int Idx = Negate ? (-i): i;
+ Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+ }
+
+ // Add the consecutive indices to the vector value.
+ Constant *Cv = ConstantVector::get(Indices);
+ assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+ return Builder.CreateAdd(Val, Cv, "induction");
+}
+
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+ assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+ // Make sure that the pointer does not point to structs.
+ if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+ return 0;
+
+ // If this value is a pointer induction variable we know it is consecutive.
+ PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
+ if (Phi && Inductions.count(Phi)) {
+ InductionInfo II = Inductions[Phi];
+ if (IK_PtrInduction == II.IK)
+ return 1;
+ else if (IK_ReversePtrInduction == II.IK)
+ return -1;
+ }
+
+ GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
+ if (!Gep)
+ return 0;
+
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = Gep->getOperand(NumOperands - 1);
+
+ Value *GpPtr = Gep->getPointerOperand();
+ // If this GEP value is a consecutive pointer induction variable and all of
+ // the indices are constant then we know it is consecutive. We can
+ Phi = dyn_cast<PHINode>(GpPtr);
+ if (Phi && Inductions.count(Phi)) {
+
+ // Make sure that the pointer does not point to structs.
+ PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
+ if (GepPtrType->getElementType()->isAggregateType())
+ return 0;
+
+ // Make sure that all of the index operands are loop invariant.
+ for (unsigned i = 1; i < NumOperands; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ return 0;
+
+ InductionInfo II = Inductions[Phi];
+ if (IK_PtrInduction == II.IK)
+ return 1;
+ else if (IK_ReversePtrInduction == II.IK)
+ return -1;
+ }
+
+ // Check that all of the gep indices are uniform except for the last.
+ for (unsigned i = 0; i < NumOperands - 1; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ return 0;
+
+ // We can emit wide load/stores only if the last index is the induction
+ // variable.
+ const SCEV *Last = SE->getSCEV(LastIndex);
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // The memory is consecutive because the last index is consecutive
+ // and all other indices are loop invariant.
+ if (Step->isOne())
+ return 1;
+ if (Step->isAllOnesValue())
+ return -1;
+ }
+
+ return 0;
+}
+
+bool LoopVectorizationLegality::isUniform(Value *V) {
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
+InnerLoopVectorizer::VectorParts&
+InnerLoopVectorizer::getVectorValue(Value *V) {
+ assert(V != Induction && "The new induction variable should not be used.");
+ assert(!V->getType()->isVectorTy() && "Can't widen a vector");
+
+ // If we have this scalar in the map, return it.
+ if (WidenMap.has(V))
+ return WidenMap.get(V);
+
+ // If this scalar is unknown, assume that it is a constant or that it is
+ // loop invariant. Broadcast V and save the value for future uses.
+ Value *B = getBroadcastInstrs(V);
+ return WidenMap.splat(V, B);
+}
+
+Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
+ assert(Vec->getType()->isVectorTy() && "Invalid type");
+ SmallVector<Constant*, 8> ShuffleMask;
+ for (unsigned i = 0; i < VF; ++i)
+ ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
+
+ return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
+ ConstantVector::get(ShuffleMask),
+ "reverse");
+}
+
+
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal) {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(Instr);
+ StoreInst *SI = dyn_cast<StoreInst>(Instr);
+
+ assert((LI || SI) && "Invalid Load/Store instruction");
+
+ Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+ Type *DataTy = VectorType::get(ScalarDataTy, VF);
+ Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+ unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+
+ // If the pointer is loop invariant or if it is non consecutive,
+ // scalarize the load.
+ int Stride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = Stride < 0;
+ bool UniformLoad = LI && Legal->isUniform(Ptr);
+ if (Stride == 0 || UniformLoad)
+ return scalarizeInstruction(Instr);
+
+ Constant *Zero = Builder.getInt32(0);
+ VectorParts &Entry = WidenMap.get(Instr);
+
+ // Handle consecutive loads/stores.
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+ Value *PtrOperand = Gep->getPointerOperand();
+ Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
+ FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(0, FirstBasePtr);
+ Gep2->setName("gep.indvar.base");
+ Ptr = Builder.Insert(Gep2);
+ } else if (Gep) {
+ assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
+ OrigLoop) && "Base ptr must be invariant");
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+
+ Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
+ VectorParts &GEPParts = getVectorValue(LastGepOperand);
+ Value *LastIndex = GEPParts[0];
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Gep2->setName("gep.indvar.idx");
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ VectorParts &PtrVal = getVectorValue(Ptr);
+ Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
+ }
+
+ // Handle Stores:
+ if (SI) {
+ assert(!Legal->isUniform(SI->getPointerOperand()) &&
+ "We do not allow storing to uniform addresses");
+
+ VectorParts &StoredVal = getVectorValue(SI->getValueOperand());
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+ if (Reverse) {
+ // If we store to reverse consecutive memory locations then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal[Part] = reverseVector(StoredVal[Part]);
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ }
+
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
+ }
+ }
+
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+ if (Reverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ }
+
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
+ cast<LoadInst>(LI)->setAlignment(Alignment);
+ Entry[Part] = Reverse ? reverseVector(LI) : LI;
+ }
+}
+
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<VectorParts, 4> Params;
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getVectorValue(SrcOp));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && OrigLoop->contains(SrcInst)) {
+ assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap.get(SrcInst));
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ VectorParts Scalars;
+ Scalars.append(UF, SrcOp);
+ Params.push_back(Scalars);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+
+ Value *UndefVec = IsVoidRetTy ? 0 :
+ UndefValue::get(VectorType::get(Instr->getType(), VF));
+ // Create a new entry in the WidenMap and initialize it to Undef or Null.
+ VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+
+ // For each scalar that we create:
+ for (unsigned Width = 0; Width < VF; ++Width) {
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instrucions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op][Part];
+ // Param is a vector. Need to extract the right lane.
+ if (Op->getType()->isVectorTy())
+ Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
+ Builder.getInt32(Width));
+ }
+ }
+}
+
+Instruction *
+InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
+ Instruction *Loc) {
+ LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
+ Legal->getRuntimePointerCheck();
+
+ if (!PtrRtCheck->Need)
+ return NULL;
+
+ Instruction *MemoryRuntimeCheck = 0;
+ unsigned NumPointers = PtrRtCheck->Pointers.size();
+ SmallVector<Value* , 2> Starts;
+ SmallVector<Value* , 2> Ends;
+
+ SCEVExpander Exp(*SE, "induction");
+
+ // Use this type for pointer arithmetic.
+ Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
+
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck->Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, OrigLoop)) {
+ DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+
+ Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
+ Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ IRBuilder<> ChkBuilder(Loc);
+
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i+1; j < NumPointers; ++j) {
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ if (MemoryRuntimeCheck)
+ IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+ "conflict.rdx");
+
+ MemoryRuntimeCheck = cast<Instruction>(IsConflict);
+ }
+ }
+
+ return MemoryRuntimeCheck;
+}
+
+void
+InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+ /*
+ In this function we generate a new loop. The new loop will contain
+ the vectorized instructions while the old loop will continue to run the
+ scalar remainder.
+
+ [ ] <-- vector loop bypass (may consist of multiple blocks).
+ / |
+ / v
+ | [ ] <-- vector pre header.
+ | |
+ | v
+ | [ ] \
+ | [ ]_| <-- vector loop.
+ | |
+ \ v
+ >[ ] <--- middle-block.
+ / |
+ / v
+ | [ ] <--- new preheader.
+ | |
+ | v
+ | [ ] \
+ | [ ]_| <-- old scalar loop to handle remainder.
+ \ |
+ \ v
+ >[ ] <-- exit block.
+ ...
+ */
+
+ BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+ BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+ BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(ExitBlock && "Must have an exit block");
+
+ // Mark the old scalar loop with metadata that tells us not to vectorize this
+ // loop again if we run into it.
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
+
+ // Some loops have a single integer induction variable, while other loops
+ // don't. One example is c++ iterators that often have multiple pointer
+ // induction variables. In the code below we also support a case where we
+ // don't have a single induction variable.
+ OldInduction = Legal->getInduction();
+ Type *IdxTy = OldInduction ? OldInduction->getType() :
+ DL->getIntPtrType(SE->getContext());
+
+ // Find the loop boundaries.
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
+ assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+
+ // Get the total trip count from the count by adding 1.
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(ExitCount->getType(), 1));
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, "induction");
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ BypassBlock->getTerminator());
+
+ // The loop index does not have to start at Zero. Find the original start
+ // value from the induction PHI node. If we don't have an induction variable
+ // then we know that it starts at zero.
+ Value *StartIdx = OldInduction ?
+ OldInduction->getIncomingValueForBlock(BypassBlock):
+ ConstantInt::get(IdxTy, 0);
+
+ assert(BypassBlock && "Invalid loop structure");
+ LoopBypassBlocks.push_back(BypassBlock);
+
+ // Split the single block loop into the two loop structure described above.
+ BasicBlock *VectorPH =
+ BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+ BasicBlock *VecBody =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+ BasicBlock *MiddleBlock =
+ VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
+ BasicBlock *ScalarPH =
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+
+ // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
+ // inside the loop.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Generate the induction variable.
+ Induction = Builder.CreatePHI(IdxTy, 2, "index");
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+
+ // This is the IR builder that we use to add all of the logic for bypassing
+ // the new vector loop.
+ IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ if (Count->getType() != IdxTy) {
+ // The exit count can be of pointer type. Convert it to the correct
+ // integer type.
+ if (ExitCount->getType()->isPointerTy())
+ Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
+ else
+ Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
+ }
+
+ // Add the start index to the loop count to get the new end index.
+ Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
+
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
+ Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
+ Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
+ "end.idx.rnd.down");
+
+ // Now, compare the new count to zero. If it is zero skip the vector loop and
+ // jump to the scalar loop.
+ Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
+ "cmp.zero");
+
+ BasicBlock *LastBypassBlock = BypassBlock;
+
+ // Generate the code that checks in runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
+ BypassBlock->getTerminator());
+ if (MemRuntimeCheck) {
+ // Create a new block containing the memory check.
+ BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
+ "vector.memcheck");
+ LoopBypassBlocks.push_back(CheckBlock);
+
+ // Replace the branch into the memory check block with a conditional branch
+ // for the "few elements case".
+ Instruction *OldTerm = BypassBlock->getTerminator();
+ BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
+ OldTerm->eraseFromParent();
+
+ Cmp = MemRuntimeCheck;
+ LastBypassBlock = CheckBlock;
+ }
+
+ LastBypassBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp,
+ LastBypassBlock);
+
+ // We are going to resume the execution of the scalar loop.
+ // Go over all of the induction variables that we found and fix the
+ // PHIs that are left in the scalar version of the loop.
+ // The starting values of PHI nodes depend on the counter of the last
+ // iteration in the vectorized loop.
+ // If we come from a bypass edge then we need to start from the original
+ // start value.
+
+ // This variable saves the new starting index for the scalar loop.
+ PHINode *ResumeIndex = 0;
+ LoopVectorizationLegality::InductionList::iterator I, E;
+ LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ for (I = List->begin(), E = List->end(); I != E; ++I) {
+ PHINode *OrigPhi = I->first;
+ LoopVectorizationLegality::InductionInfo II = I->second;
+ PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+ MiddleBlock->getTerminator());
+ Value *EndValue = 0;
+ switch (II.IK) {
+ case LoopVectorizationLegality::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case LoopVectorizationLegality::IK_IntInduction: {
+ // Handle the integer induction counter:
+ assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
+ assert(OrigPhi == OldInduction && "Unknown integer PHI");
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ break;
+ }
+ case LoopVectorizationLegality::IK_ReverseIntInduction: {
+ // Convert the CountRoundDown variable to the PHI size.
+ unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
+ unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
+ Value *CRD = CountRoundDown;
+ if (CRDSize > IISize)
+ CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
+ II.StartValue->getType(), "tr.crd",
+ LoopBypassBlocks.back()->getTerminator());
+ else if (CRDSize < IISize)
+ CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
+ II.StartValue->getType(),
+ "sext.crd",
+ LoopBypassBlocks.back()->getTerminator());
+ // Handle reverse integer induction counter:
+ EndValue =
+ BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ break;
+ }
+ case LoopVectorizationLegality::IK_PtrInduction: {
+ // For pointer induction variables, calculate the offset using
+ // the end index.
+ EndValue =
+ GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ break;
+ }
+ case LoopVectorizationLegality::IK_ReversePtrInduction: {
+ // The value at the end of the loop for the reverse pointer is calculated
+ // by creating a GEP with a negative index starting from the start value.
+ Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
+ Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
+ "rev.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
+ "rev.ptr.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ break;
+ }
+ }// end of case
+
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ ResumeVal->addIncoming(EndValue, VecBody);
+
+ // Fix the scalar body counter (PHI node).
+ unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ }
+
+ // If we are generating a new induction variable then we also need to
+ // generate the code that calculates the exit value. This value is not
+ // simply the end of the counter because we may skip the vectorized body
+ // in case of a runtime check.
+ if (!OldInduction){
+ assert(!ResumeIndex && "Unexpected resume value found");
+ ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
+ MiddleBlock->getTerminator());
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
+ ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+ }
+
+ // Make sure that we found the index where scalar loop needs to continue.
+ assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
+ "Invalid resume Index");
+
+ // Add a check in the middle block to see if we have completed
+ // all of the iterations in the first vector loop.
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
+ ResumeIndex, "cmp.n",
+ MiddleBlock->getTerminator());
+
+ BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
+ // Remove the old terminator.
+ MiddleBlock->getTerminator()->eraseFromParent();
+
+ // Create i+1 and fill the PHINode.
+ Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(StartIdx, VectorPH);
+ Induction->addIncoming(NextIdx, VecBody);
+ // Create the compare.
+ Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
+ Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
+
+ // Now we have two terminators. Remove the old one from the block.
+ VecBody->getTerminator()->eraseFromParent();
+
+ // Get ready to start creating new instructions into the vectorized body.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Create and register the new vector loop.
+ Loop* Lp = new Loop();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ // Insert the new loop into the loop nest and register the new basic blocks.
+ if (ParentLoop) {
+ ParentLoop->addChildLoop(Lp);
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+ ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase());
+ ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ } else {
+ LI->addTopLevelLoop(Lp);
+ }
+
+ Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
+ // Save the state.
+ LoopVectorPreHeader = VectorPH;
+ LoopScalarPreHeader = ScalarPH;
+ LoopMiddleBlock = MiddleBlock;
+ LoopExitBlock = ExitBlock;
+ LoopVectorBody = VecBody;
+ LoopScalarBody = OldBasicBlock;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+static Constant*
+getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+ switch (K) {
+ case LoopVectorizationLegality:: RK_IntegerXor:
+ case LoopVectorizationLegality:: RK_IntegerAdd:
+ case LoopVectorizationLegality:: RK_IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return ConstantInt::get(Tp, 0);
+ case LoopVectorizationLegality:: RK_IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantInt::get(Tp, 1);
+ case LoopVectorizationLegality:: RK_IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return ConstantInt::get(Tp, -1, true);
+ case LoopVectorizationLegality:: RK_FloatMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantFP::get(Tp, 1.0L);
+ case LoopVectorizationLegality:: RK_FloatAdd:
+ // Adding zero to a number does not change it.
+ return ConstantFP::get(Tp, 0.0L);
+ default:
+ llvm_unreachable("Unknown reduction kind");
+ }
+}
+
+static Intrinsic::ID
+getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
+ // If we have an intrinsic call, check if it is trivially vectorizable.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::fabs:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::pow:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return II->getIntrinsicID();
+ default:
+ return Intrinsic::not_intrinsic;
+ }
+ }
+
+ if (!TLI)
+ return Intrinsic::not_intrinsic;
+
+ LibFunc::Func Func;
+ Function *F = CI->getCalledFunction();
+ // We're going to make assumptions on the semantics of the functions, check
+ // that the target knows that it's available in this environment.
+ if (!F || !TLI->getLibFunc(F->getName(), Func))
+ return Intrinsic::not_intrinsic;
+
+ // Otherwise check if we have a call to a function that can be turned into a
+ // vector intrinsic.
+ switch (Func) {
+ default:
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ return Intrinsic::sin;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ return Intrinsic::cos;
+ case LibFunc::exp:
+ case LibFunc::expf:
+ case LibFunc::expl:
+ return Intrinsic::exp;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ return Intrinsic::exp2;
+ case LibFunc::log:
+ case LibFunc::logf:
+ case LibFunc::logl:
+ return Intrinsic::log;
+ case LibFunc::log10:
+ case LibFunc::log10f:
+ case LibFunc::log10l:
+ return Intrinsic::log10;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ return Intrinsic::log2;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ return Intrinsic::fabs;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ return Intrinsic::floor;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ return Intrinsic::ceil;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ return Intrinsic::trunc;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ return Intrinsic::rint;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ return Intrinsic::nearbyint;
+ case LibFunc::pow:
+ case LibFunc::powf:
+ case LibFunc::powl:
+ return Intrinsic::pow;
+ }
+
+ return Intrinsic::not_intrinsic;
+}
+
+/// This function translates the reduction kind to an LLVM binary operator.
+static Instruction::BinaryOps
+getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
+ switch (Kind) {
+ case LoopVectorizationLegality::RK_IntegerAdd:
+ return Instruction::Add;
+ case LoopVectorizationLegality::RK_IntegerMult:
+ return Instruction::Mul;
+ case LoopVectorizationLegality::RK_IntegerOr:
+ return Instruction::Or;
+ case LoopVectorizationLegality::RK_IntegerAnd:
+ return Instruction::And;
+ case LoopVectorizationLegality::RK_IntegerXor:
+ return Instruction::Xor;
+ case LoopVectorizationLegality::RK_FloatMult:
+ return Instruction::FMul;
+ case LoopVectorizationLegality::RK_FloatAdd:
+ return Instruction::FAdd;
+ default:
+ llvm_unreachable("Unknown reduction operation");
+ }
+}
+
+void
+InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ //===------------------------------------------------===//
+ //
+ // Notice: any optimization or new instruction that go
+ // into the code below should be also be implemented in
+ // the cost-model.
+ //
+ //===------------------------------------------------===//
+ Constant *Zero = Builder.getInt32(0);
+
+ // In order to support reduction variables we need to be able to vectorize
+ // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
+ // stages. First, we create a new vector PHI node with no incoming edges.
+ // We use this value when we vectorize all of the instructions that use the
+ // PHI. Next, after all of the instructions in the block are complete we
+ // add the new incoming edges to the PHI. At this point all of the
+ // instructions in the basic block are vectorized, so we can use them to
+ // construct the PHI.
+ PhiVector RdxPHIsToFix;
+
+ // Scan the loop in a topological order to ensure that defs are vectorized
+ // before users.
+ LoopBlocksDFS DFS(OrigLoop);
+ DFS.perform(LI);
+
+ // Vectorize all of the blocks in the original loop.
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+ be = DFS.endRPO(); bb != be; ++bb)
+ vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
+
+ // At this point every instruction in the original loop is widened to
+ // a vector form. We are almost done. Now, we need to fix the PHI nodes
+ // that we vectorized. The PHI nodes are currently empty because we did
+ // not want to introduce cycles. Notice that the remaining PHI nodes
+ // that we need to fix are reduction variables.
+
+ // Create the 'reduced' values for each of the induction vars.
+ // The reduced values are the vector values that we scalarize and combine
+ // after the loop is finished.
+ for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
+ it != e; ++it) {
+ PHINode *RdxPhi = *it;
+ assert(RdxPhi && "Unable to recover vectorized PHI");
+
+ // Find the reduction variable descriptor.
+ assert(Legal->getReductionVars()->count(RdxPhi) &&
+ "Unable to find the reduction variable");
+ LoopVectorizationLegality::ReductionDescriptor RdxDesc =
+ (*Legal->getReductionVars())[RdxPhi];
+
+ // We need to generate a reduction vector from the incoming scalar.
+ // To do so, we need to generate the 'identity' vector and overide
+ // one of the elements with the incoming scalar reduction. We need
+ // to do it in the vector-loop preheader.
+ Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
+
+ // This is the vector-clone of the value that leaves the loop.
+ VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+ Type *VecTy = VectorExit[0]->getType();
+
+ // Find the reduction identity variable. Zero for addition, or, xor,
+ // one for multiplication, -1 for And.
+ Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
+ Constant *Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ Value *VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+
+ // Fix the vector-loop phi.
+ // We created the induction variable so we know that the
+ // preheader is the first entry.
+ BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
+ BasicBlock *Latch = OrigLoop->getLoopLatch();
+ Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
+ VectorParts &Val = getVectorValue(LoopVal);
+ for (unsigned part = 0; part < UF; ++part) {
+ // Make sure to add the reduction stat value only to the
+ // first unroll part.
+ Value *StartVal = (part == 0) ? VectorStart : Identity;
+ cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
+ cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
+ }
+
+ // Before each round, move the insertion point right between
+ // the PHIs and the values we are going to write.
+ // This allows us to write both PHINodes and the extractelement
+ // instructions.
+ Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+
+ VectorParts RdxParts;
+ for (unsigned part = 0; part < UF; ++part) {
+ // This PHINode contains the vectorized reduction variable, or
+ // the initial value vector, if we bypass the vector loop.
+ VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+ Value *StartVal = (part == 0) ? VectorStart : Identity;
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
+ NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
+ RdxParts.push_back(NewPhi);
+ }
+
+ // Reduce all of the unrolled parts into a single vector.
+ Value *ReducedPartRdx = RdxParts[0];
+ for (unsigned part = 1; part < UF; ++part) {
+ Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
+ ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
+ "bin.rdx");
+ }
+
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = ReducedPartRdx;
+ SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i/2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf =
+ Builder.CreateShuffleVector(TmpVec,
+ UndefValue::get(TmpVec->getType()),
+ ConstantVector::get(ShuffleMask),
+ "rdx.shuf");
+
+ Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
+ TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+ }
+
+ // The result is in the first element of the vector.
+ Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+
+ // Now, we need to fix the users of the reduction variable
+ // inside and outside of the scalar remainder loop.
+ // We know that the loop is in LCSSA form. We need to update the
+ // PHI nodes in the exit blocks.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi) continue;
+
+ // All PHINodes need to have a single entry edge, or two if
+ // we already fixed them.
+ assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+ // We found our reduction value exit-PHI. Update it with the
+ // incoming bypass edge.
+ if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+ // Add an edge coming from the bypass.
+ LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ break;
+ }
+ }// end of the LCSSA phi scan.
+
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
+ int IncomingEdgeBlockIdx =
+ (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
+ assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
+ // Pick the other block.
+ int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+ }// end of for each redux variable.
+
+ // The Loop exit block may have single value PHI nodes where the incoming
+ // value is 'undef'. While vectorizing we only handled real values that
+ // were defined inside the loop. Here we handle the 'undef case'.
+ // See PR14725.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi) continue;
+ if (LCSSAPhi->getNumIncomingValues() == 1)
+ LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
+ LoopMiddleBlock);
+ }
+}
+
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+ assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
+ "Invalid edge");
+
+ VectorParts SrcMask = createBlockInMask(Src);
+
+ // The terminator has to be a branch inst!
+ BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+ assert(BI && "Unexpected terminator found");
+
+ if (BI->isConditional()) {
+ VectorParts EdgeMask = getVectorValue(BI->getCondition());
+
+ if (BI->getSuccessor(0) != Dst)
+ for (unsigned part = 0; part < UF; ++part)
+ EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
+
+ for (unsigned part = 0; part < UF; ++part)
+ EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
+ return EdgeMask;
+ }
+
+ return SrcMask;
+}
+
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+ assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+
+ // Loop incoming mask is all-one.
+ if (OrigLoop->getHeader() == BB) {
+ Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
+ return getVectorValue(C);
+ }
+
+ // This is the block mask. We OR all incoming edges, and with zero.
+ Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
+ VectorParts BlockMask = getVectorValue(Zero);
+
+ // For each pred:
+ for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
+ VectorParts EM = createEdgeMask(*it, BB);
+ for (unsigned part = 0; part < UF; ++part)
+ BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
+ }
+
+ return BlockMask;
+}
+
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ VectorParts &Entry = WidenMap.get(it);
+ switch (it->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ PHINode* P = cast<PHINode>(it);
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ for (unsigned part = 0; part < UF; ++part) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = VectorType::get(it->getType(), VF);
+ Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody-> getFirstInsertionPt());
+ }
+ PV->push_back(P);
+ continue;
+ }
+
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
+ P->getParent());
+
+ for (unsigned part = 0; part < UF; ++part) {
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
+ VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
+ "predphi");
+ }
+ continue;
+ }
+
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
+
+ LoopVectorizationLegality::InductionInfo II =
+ Legal->getInductionVars()->lookup(P);
+
+ switch (II.IK) {
+ case LoopVectorizationLegality::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case LoopVectorizationLegality::IK_IntInduction: {
+ assert(P == OldInduction && "Unexpected PHI");
+ Value *Broadcasted = getBroadcastInstrs(Induction);
+ // After broadcasting the induction variable we need to make the
+ // vector consecutive by adding 0, 1, 2 ...
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+ continue;
+ }
+ case LoopVectorizationLegality::IK_ReverseIntInduction:
+ case LoopVectorizationLegality::IK_PtrInduction:
+ case LoopVectorizationLegality::IK_ReversePtrInduction:
+ // Handle reverse integer and pointer inductions.
+ Value *StartIdx = 0;
+ // If we have a single integer induction variable then use it.
+ // Otherwise, start counting at zero.
+ if (OldInduction) {
+ LoopVectorizationLegality::InductionInfo OldII =
+ Legal->getInductionVars()->lookup(OldInduction);
+ StartIdx = OldII.StartValue;
+ } else {
+ StartIdx = ConstantInt::get(Induction->getType(), 0);
+ }
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // Handle the reverse integer induction variable case.
+ if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+ IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+ Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+ "resize.norm.idx");
+ Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
+ "reverse.idx");
+
+ // This is a new value so do not hoist it out.
+ Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+ // After broadcasting the induction variable we need to make the
+ // vector consecutive by adding ... -3, -2, -1, 0.
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+ continue;
+ }
+
+ // Handle the pointer induction variable case.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+ // Is this a reverse induction ptr or a consecutive induction ptr.
+ bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+ II.IK);
+
+ // This is the vector of results. Notice that we don't generate
+ // vector geps because scalar geps result in better code.
+ for (unsigned part = 0; part < UF; ++part) {
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (!Reverse)
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ else
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
+ }
+ Entry[part] = VecVal;
+ }
+ continue;
+ }
+
+ }// End of PHI.
+
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+ VectorParts &A = getVectorValue(it->getOperand(0));
+ VectorParts &B = getVectorValue(it->getOperand(1));
+
+ // Use this vector value for all users of the original instruction.
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
+
+ // Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
+ BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
+ if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
+ VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+ VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+ }
+ if (VecOp && isa<PossiblyExactOperator>(VecOp))
+ VecOp->setIsExact(BinOp->isExact());
+
+ Entry[Part] = V;
+ }
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ // If the selector is loop invariant we can create a select
+ // instruction with a scalar condition. Otherwise, use vector-select.
+ bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
+ OrigLoop);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ VectorParts &Cond = getVectorValue(it->getOperand(0));
+ VectorParts &Op0 = getVectorValue(it->getOperand(1));
+ VectorParts &Op1 = getVectorValue(it->getOperand(2));
+ Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
+ Builder.getInt32(0));
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Entry[Part] = Builder.CreateSelect(
+ InvariantCond ? ScalarCond : Cond[Part],
+ Op0[Part],
+ Op1[Part]);
+ }
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (it->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(it);
+ VectorParts &A = getVectorValue(it->getOperand(0));
+ VectorParts &B = getVectorValue(it->getOperand(1));
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *C = 0;
+ if (FCmp)
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
+ else
+ C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ Entry[Part] = C;
+ }
+ break;
+ }
+
+ case Instruction::Store:
+ case Instruction::Load:
+ vectorizeMemoryInstruction(it, Legal);
+ break;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ CastInst *CI = dyn_cast<CastInst>(it);
+ /// Optimize the special case where the source is the induction
+ /// variable. Notice that we can only optimize the 'trunc' case
+ /// because: a. FP conversions lose precision, b. sext/zext may wrap,
+ /// c. other casts depend on pointer size.
+ if (CI->getOperand(0) == OldInduction &&
+ it->getOpcode() == Instruction::Trunc) {
+ Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+ CI->getType());
+ Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+ break;
+ }
+ /// Vectorize casts.
+ Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+
+ VectorParts &A = getVectorValue(it->getOperand(0));
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+ break;
+ }
+
+ case Instruction::Call: {
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ break;
+
+ Module *M = BB->getParent()->getParent();
+ CallInst *CI = cast<CallInst>(it);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value*, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
+ Args.push_back(Arg[Part]);
+ }
+ Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+ Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+ Entry[Part] = Builder.CreateCall(F, Args);
+ }
+ break;
+ }
+
+ default:
+ // All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(it);
+ break;
+ }// end of switch.
+ }// end of for_each instr.
+}
+
+void InnerLoopVectorizer::updateAnalysis() {
+ // Forget the original basic block.
+ SE->forgetLoop(OrigLoop);
+
+ // Update the dominator tree information.
+ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
+ "Entry does not dominate exit.");
+
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+ DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
+ DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
+ DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
+ DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
+ DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
+ DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
+ DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+
+ DEBUG(DT->verifyAnalysis());
+}
+
+bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
+ if (!EnableIfConversion)
+ return false;
+
+ assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+
+ // Collect the blocks that need predication.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // We don't support switch statements inside loops.
+ if (!isa<BranchInst>(BB->getTerminator()))
+ return false;
+
+ // We must have at most two predecessors because we need to convert
+ // all PHIs to selects.
+ unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
+ if (Preds > 2)
+ return false;
+
+ // We must be able to predicate all blocks that need to be predicated.
+ if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
+ return false;
+ }
+
+ // We can if-convert this loop.
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorize() {
+ assert(TheLoop->getLoopPreheader() && "No preheader!!");
+
+ // We can only vectorize innermost loops.
+ if (TheLoop->getSubLoopsVector().size())
+ return false;
+
+ // We must have a single backedge.
+ if (TheLoop->getNumBackEdges() != 1)
+ return false;
+
+ // We must have a single exiting block.
+ if (!TheLoop->getExitingBlock())
+ return false;
+
+ unsigned NumBlocks = TheLoop->getNumBlocks();
+
+ // Check if we can if-convert non single-bb loops.
+ if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
+ DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
+ return false;
+ }
+
+ // We need to have a loop header.
+ BasicBlock *Latch = TheLoop->getLoopLatch();
+ DEBUG(dbgs() << "LV: Found a loop: " <<
+ TheLoop->getHeader()->getName() << "\n");
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
+ return false;
+ }
+
+ // Do not loop-vectorize loops with a tiny trip count.
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
+ if (TC > 0u && TC < TinyTripCountVectorThreshold) {
+ DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
+ "This loop is not worth vectorizing.\n");
+ return false;
+ }
+
+ // Check if we can vectorize the instructions and CFG in this loop.
+ if (!canVectorizeInstrs()) {
+ DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
+ return false;
+ }
+
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeMemory()) {
+ DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
+ return false;
+ }
+
+ // Collect all of the variables that remain uniform after vectorization.
+ collectLoopUniforms();
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop" <<
+ (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
+ <<"!\n");
+
+ // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorizeInstrs() {
+ BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+ BasicBlock *Header = TheLoop->getHeader();
+
+ // If we marked the scalar loop as "already vectorized" then no need
+ // to vectorize it again.
+ if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
+ DEBUG(dbgs() << "LV: This loop was vectorized before\n");
+ return false;
+ }
+
+ // For each block in the loop.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+
+ // Scan the instructions in the block and look for hazards.
+ for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+ ++it) {
+
+ if (PHINode *Phi = dyn_cast<PHINode>(it)) {
+ // This should not happen because the loop should be normalized.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+
+ // Check that this PHI type is allowed.
+ if (!Phi->getType()->isIntegerTy() &&
+ !Phi->getType()->isFloatingPointTy() &&
+ !Phi->getType()->isPointerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+ return false;
+ }
+
+ // If this PHINode is not in the header block, then we know that we
+ // can convert it to select during if-conversion. No need to check if
+ // the PHIs in this block are induction or reduction variables.
+ if (*bb != Header)
+ continue;
+
+ // This is the value coming from the preheader.
+ Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+ // Check if this is an induction variable.
+ InductionKind IK = isInductionVariable(Phi);
+
+ if (IK_NoInduction != IK) {
+ // Int inductions are special because we only allow one IV.
+ if (IK == IK_IntInduction) {
+ if (Induction) {
+ DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+ return false;
+ }
+ Induction = Phi;
+ }
+
+ DEBUG(dbgs() << "LV: Found an induction variable.\n");
+ Inductions[Phi] = InductionInfo(StartValue, IK);
+ continue;
+ }
+
+ if (AddReductionVar(Phi, RK_IntegerAdd)) {
+ DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMult)) {
+ DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_IntegerOr)) {
+ DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_IntegerAnd)) {
+ DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_IntegerXor)) {
+ DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_FloatMult)) {
+ DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, RK_FloatAdd)) {
+ DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+ return false;
+ }// end of PHI handling
+
+ // We still don't handle functions. However, we can ignore dbg intrinsic
+ // calls and we do handle certain intrinsic and libm functions.
+ CallInst *CI = dyn_cast<CallInst>(it);
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+ DEBUG(dbgs() << "LV: Found a call site.\n");
+ return false;
+ }
+
+ // Check that the instruction return type is vectorizable.
+ if (!VectorType::isValidElementType(it->getType()) &&
+ !it->getType()->isVoidTy()) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ return false;
+ }
+
+ // Check that the stored type is vectorizable.
+ if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
+ Type *T = ST->getValueOperand()->getType();
+ if (!VectorType::isValidElementType(T))
+ return false;
+ }
+
+ // Reduction instructions are allowed to have exit users.
+ // All other instructions must not have external users.
+ if (!AllowedExit.count(it))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator I = it->use_begin(), E = it->use_end();
+ I != E; ++I) {
+ Instruction *U = cast<Instruction>(*I);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(U)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return false;
+ }
+ }
+ } // next instr.
+
+ }
+
+ if (!Induction) {
+ DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+ assert(getInductionVars()->size() && "No induction variables");
+ }
+
+ return true;
+}
+
+void LoopVectorizationLegality::collectLoopUniforms() {
+ // We now know that the loop is vectorizable!
+ // Collect variables that will remain uniform after vectorization.
+ std::vector<Value*> Worklist;
+ BasicBlock *Latch = TheLoop->getLoopLatch();
+
+ // Start with the conditional branch and walk up the block.
+ Worklist.push_back(Latch->getTerminator()->getOperand(0));
+
+ while (Worklist.size()) {
+ Instruction *I = dyn_cast<Instruction>(Worklist.back());
+ Worklist.pop_back();
+
+ // Look at instructions inside this loop.
+ // Stop when reaching PHI nodes.
+ // TODO: we need to follow values all over the loop, not only in this block.
+ if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
+ continue;
+
+ // This is a known uniform.
+ Uniforms.insert(I);
+
+ // Insert all operands.
+ for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) {
+ Worklist.push_back(I->getOperand(i));
+ }
+ }
+}
+
+AliasAnalysis::Location
+LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ return AA->getLocation(Store);
+ else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+ return AA->getLocation(Load);
+
+ llvm_unreachable("Should be either load or store instruction");
+}
+
+bool
+LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
+ Value *Object,
+ Instruction *Inst,
+ AliasMultiMap& WriteObjects,
+ unsigned MaxByteWidth) {
+
+ AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
+
+ std::vector<Instruction*>::iterator
+ it = WriteObjects[Object].begin(),
+ end = WriteObjects[Object].end();
+
+ for (; it != end; ++it) {
+ Instruction* I = *it;
+ if (I == Inst)
+ continue;
+
+ AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
+ if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
+ ThatLoc.getWithNewSize(MaxByteWidth)))
+ return true;
+ }
+ return false;
+}
+
+bool LoopVectorizationLegality::canVectorizeMemory() {
+
+ if (TheLoop->isAnnotatedParallel()) {
+ DEBUG(dbgs()
+ << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ return true;
+ }
+
+ typedef SmallVector<Value*, 16> ValueVector;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ // Holds the Load and Store *instructions*.
+ ValueVector Loads;
+ ValueVector Stores;
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+ ++it) {
+
+ // If this is a load, save it. If this instruction can read from memory
+ // but is not a load, then we quit. Notice that we don't handle function
+ // calls that read or write.
+ if (it->mayReadFromMemory()) {
+ LoadInst *Ld = dyn_cast<LoadInst>(it);
+ if (!Ld) return false;
+ if (!Ld->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+ return false;
+ }
+ Loads.push_back(Ld);
+ continue;
+ }
+
+ // Save 'store' instructions. Abort if other instructions write to memory.
+ if (it->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(it);
+ if (!St) return false;
+ if (!St->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+ return false;
+ }
+ Stores.push_back(St);
+ }
+ } // next instr.
+ } // next block.
+
+ // Now we have two lists that hold the loads and the stores.
+ // Next, we find the pointers that they use.
+
+ // Check if we see any stores. If there are no stores, then we don't
+ // care if the pointers are *restrict*.
+ if (!Stores.size()) {
+ DEBUG(dbgs() << "LV: Found a read-only loop!\n");
+ return true;
+ }
+
+ // Holds the read and read-write *pointers* that we find. These maps hold
+ // unique values for pointers (so no need for multi-map).
+ AliasMap Reads;
+ AliasMap ReadWrites;
+
+ // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // multiple times on the same object. If the ptr is accessed twice, once
+ // for read and once for write, it will only appear once (on the write
+ // list). This is okay, since we are going to check for conflicts between
+ // writes and between reads and writes, but not between reads and reads.
+ ValueSet Seen;
+
+ ValueVector::iterator I, IE;
+ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+ StoreInst *ST = cast<StoreInst>(*I);
+ Value* Ptr = ST->getPointerOperand();
+
+ if (isUniform(Ptr)) {
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ return false;
+ }
+
+ // If we did *not* see this pointer before, insert it to
+ // the read-write list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr))
+ ReadWrites.insert(std::make_pair(Ptr, ST));
+ }
+
+ for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+ LoadInst *LD = cast<LoadInst>(*I);
+ Value* Ptr = LD->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to the
+ // read list. If we *did* see it before, then it is already in
+ // the read-write list. This allows us to vectorize expressions
+ // such as A[i] += x; Because the address of A[i] is a read-write
+ // pointer. This only works if the index of A[i] is consecutive.
+ // If the address of i is unknown (for example A[B[i]]) then we may
+ // read a few words, modify, and write a few words, and some of the
+ // words may be written to the same address.
+ if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
+ Reads.insert(std::make_pair(Ptr, LD));
+ }
+
+ // If we write (or read-write) to a single destination and there are no
+ // other reads in this loop then is it safe to vectorize.
+ if (ReadWrites.size() == 1 && Reads.size() == 0) {
+ DEBUG(dbgs() << "LV: Found a write-only loop!\n");
+ return true;
+ }
+
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool CanDoRT = true;
+ AliasMap::iterator MI, ME;
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
+ } else {
+ CanDoRT = false;
+ break;
+ }
+ }
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
+ } else {
+ CanDoRT = false;
+ break;
+ }
+ }
+
+ // Check that we did not collect too many pointers or found a
+ // unsizeable pointer.
+ if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ PtrRtCheck.reset();
+ CanDoRT = false;
+ }
+
+ if (CanDoRT) {
+ DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
+ }
+
+ bool NeedRTCheck = false;
+
+ // Biggest vectorized access possible, vector width * unroll factor.
+ // TODO: We're being very pessimistic here, find a way to know the
+ // real access width before getting here.
+ unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
+ TTI->getMaximumUnrollFactor();
+ // Now that the pointers are in two lists (Reads and ReadWrites), we
+ // can check that there are no conflicts between each of the writes and
+ // between the writes to the reads.
+ // Note that WriteObjects duplicates the stores (indexed now by underlying
+ // objects) to avoid pointing to elements inside ReadWrites.
+ // TODO: Maybe create a new type where they can interact without duplication.
+ AliasMultiMap WriteObjects;
+ ValueVector TempObjects;
+
+ // Check that the read-writes do not conflict with other read-write
+ // pointers.
+ bool AllWritesIdentified = true;
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ Instruction *Inst = (*MI).second;
+
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
+ if (!isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
+ NeedRTCheck = true;
+ AllWritesIdentified = false;
+ }
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty()) {
+ DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
+ WriteObjects[*UI].push_back(Inst);
+ continue;
+ }
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a conflicting global value:"
+ << **UI <<"\n");
+ DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << *UI <<"\n");
+ return false;
+ }
+
+ // Didn't alias, insert into map for further reference.
+ WriteObjects[*UI].push_back(Inst);
+ }
+ TempObjects.clear();
+ }
+
+ /// Check that the reads don't conflict with the read-writes.
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
+ // If all of the writes are identified then we don't care if the read
+ // pointer is identified or not.
+ if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
+ NeedRTCheck = true;
+ }
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty())
+ continue;
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a global value: "
+ << **UI <<"\n");
+ Instruction *Inst = (*MI).second;
+ DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
+ << *UI <<"\n");
+ return false;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ PtrRtCheck.Need = NeedRTCheck;
+ if (NeedRTCheck && !CanDoRT) {
+ DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
+ "the array bounds.\n");
+ PtrRtCheck.reset();
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
+ " need a runtime memory check.\n");
+ return true;
+}
+
+bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
+ ReductionKind Kind) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Reduction variables are only found in the loop header block.
+ if (Phi->getParent() != TheLoop->getHeader())
+ return false;
+
+ // Obtain the reduction start value from the value that comes from the loop
+ // preheader.
+ Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = 0;
+ // Indicates that we found a binary operation in our scan.
+ bool FoundBinOp = false;
+
+ // Iter is our iterator. We start with the PHI node and scan for all of the
+ // users of this instruction. All users must be instructions that can be
+ // used as reduction variables (such as ADD). We may have a single
+ // out-of-block user. The cycle must end with the original PHI.
+ Instruction *Iter = Phi;
+ while (true) {
+ // If the instruction has no users then this is a broken
+ // chain and can't be a reduction variable.
+ if (Iter->use_empty())
+ return false;
+
+ // Did we find a user inside this loop already ?
+ bool FoundInBlockUser = false;
+ // Did we reach the initial PHI node already ?
+ bool FoundStartPHI = false;
+
+ // Is this a bin op ?
+ FoundBinOp |= !isa<PHINode>(Iter);
+
+ // Remember the current instruction.
+ Instruction *OldIter = Iter;
+
+ // For each of the *users* of iter.
+ for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ // We already know that the PHI is a user.
+ if (U == Phi) {
+ FoundStartPHI = true;
+ continue;
+ }
+
+ // Check if we found the exit user.
+ BasicBlock *Parent = U->getParent();
+ if (!TheLoop->contains(Parent)) {
+ // Exit if you find multiple outside users.
+ if (ExitInstruction != 0)
+ return false;
+ ExitInstruction = Iter;
+ }
+
+ // We allow in-loop PHINodes which are not the original reduction PHI
+ // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
+ // structure) then don't skip this PHI.
+ if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
+ U->getParent() != TheLoop->getHeader() &&
+ TheLoop->contains(U) &&
+ Iter->hasNUsesOrMore(2))
+ continue;
+
+ // We can't have multiple inside users.
+ if (FoundInBlockUser)
+ return false;
+ FoundInBlockUser = true;
+
+ // Any reduction instr must be of one of the allowed kinds.
+ if (!isReductionInstr(U, Kind))
+ return false;
+
+ // Reductions of instructions such as Div, and Sub is only
+ // possible if the LHS is the reduction variable.
+ if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+ return false;
+
+ Iter = U;
+ }
+
+ // If all uses were skipped this can't be a reduction variable.
+ if (Iter == OldIter)
+ return false;
+
+ // We found a reduction var if we have reached the original
+ // phi node and we only have a single instruction with out-of-loop
+ // users.
+ if (FoundStartPHI) {
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
+
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ Reductions[Phi] = RD;
+ // We've ended the cycle. This is a reduction variable if we have an
+ // outside user and it has a binary op.
+ return FoundBinOp && ExitInstruction;
+ }
+ }
+}
+
+bool
+LoopVectorizationLegality::isReductionInstr(Instruction *I,
+ ReductionKind Kind) {
+ bool FP = I->getType()->isFloatingPointTy();
+ bool FastMath = (FP && I->isCommutative() && I->isAssociative());
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::PHI:
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
+ return false;
+ // possibly.
+ return true;
+ case Instruction::Sub:
+ case Instruction::Add:
+ return Kind == RK_IntegerAdd;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::Mul:
+ return Kind == RK_IntegerMult;
+ case Instruction::And:
+ return Kind == RK_IntegerAnd;
+ case Instruction::Or:
+ return Kind == RK_IntegerOr;
+ case Instruction::Xor:
+ return Kind == RK_IntegerXor;
+ case Instruction::FMul:
+ return Kind == RK_FloatMult && FastMath;
+ case Instruction::FAdd:
+ return Kind == RK_FloatAdd && FastMath;
+ }
+}
+
+LoopVectorizationLegality::InductionKind
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return IK_NoInduction;
+
+ // Check that the PHI is consecutive.
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return IK_NoInduction;
+ }
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Integer inductions need to have a stride of one.
+ if (PhiTy->isIntegerTy()) {
+ if (Step->isOne())
+ return IK_IntInduction;
+ if (Step->isAllOnesValue())
+ return IK_ReverseIntInduction;
+ return IK_NoInduction;
+ }
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ return IK_NoInduction;
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+ if (C->getValue()->equalsInt(Size))
+ return IK_PtrInduction;
+ else if (C->getValue()->equalsInt(0 - Size))
+ return IK_ReversePtrInduction;
+
+ return IK_NoInduction;
+}
+
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+ Value *In0 = const_cast<Value*>(V);
+ PHINode *PN = dyn_cast_or_null<PHINode>(In0);
+ if (!PN)
+ return false;
+
+ return Inductions.count(PN);
+}
+
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
+ assert(TheLoop->contains(BB) && "Unknown block used");
+
+ // Blocks that do not dominate the latch need predication.
+ BasicBlock* Latch = TheLoop->getLoopLatch();
+ return !DT->dominates(BB, Latch);
+}
+
+bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // We don't predicate loads/stores at the moment.
+ if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ return false;
+
+ // The instructions below can trap.
+ switch (it->getOpcode()) {
+ default: continue;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
+ const SCEV *PhiScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
+}
+
+LoopVectorizationCostModel::VectorizationFactor
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+ unsigned UserVF) {
+ // Width 1 means no vectorize
+ VectorizationFactor Factor = { 1U, 0U };
+ if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+ DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+ return Factor;
+ }
+
+ // Find the trip count.
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+ DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+ unsigned WidestType = getWidestType();
+ unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+ unsigned MaxVectorSize = WidestRegister / WidestType;
+ DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+
+ if (MaxVectorSize == 0) {
+ DEBUG(dbgs() << "LV: The target has no vector registers.\n");
+ MaxVectorSize = 1;
+ }
+
+ assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
+ " into one vector!");
+
+ unsigned VF = MaxVectorSize;
+
+ // If we optimize the program for size, avoid creating the tail loop.
+ if (OptForSize) {
+ // If we are unable to calculate the trip count then don't try to vectorize.
+ if (TC < 2) {
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ return Factor;
+ }
+
+ // Find the maximum SIMD width that can fit within the trip count.
+ VF = TC % MaxVectorSize;
+
+ if (VF == 0)
+ VF = MaxVectorSize;
+
+ // If the trip count that we found modulo the vectorization factor is not
+ // zero then we require a tail.
+ if (VF < 2) {
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ return Factor;
+ }
+ }
+
+ if (UserVF != 0) {
+ assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+ DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+ Factor.Width = UserVF;
+ return Factor;
+ }
+
+ float Cost = expectedCost(1);
+ unsigned Width = 1;
+ DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ for (unsigned i=2; i <= VF; i*=2) {
+ // Notice that the vector loop needs to be executed less times, so
+ // we need to divide the cost of the vector loops by the width of
+ // the vector elements.
+ float VectorCost = expectedCost(i) / (float)i;
+ DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ (int)VectorCost << ".\n");
+ if (VectorCost < Cost) {
+ Cost = VectorCost;
+ Width = i;
+ }
+ }
+
+ DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ Factor.Width = Width;
+ Factor.Cost = Width * Cost;
+ return Factor;
+}
+
+unsigned LoopVectorizationCostModel::getWidestType() {
+ unsigned MaxWidth = 8;
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+ BasicBlock *BB = *bb;
+
+ // For each instruction in the loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Type *T = it->getType();
+
+ // Only examine Loads, Stores and PHINodes.
+ if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
+ continue;
+
+ // Examine PHI nodes that are reduction variables.
+ if (PHINode *PN = dyn_cast<PHINode>(it))
+ if (!Legal->getReductionVars()->count(PN))
+ continue;
+
+ // Examine the stored values.
+ if (StoreInst *ST = dyn_cast<StoreInst>(it))
+ T = ST->getValueOperand()->getType();
+
+ // Ignore loaded pointer types and stored pointer types that are not
+ // consecutive. However, we do want to take consecutive stores/loads of
+ // pointer vectors into account.
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+ continue;
+
+ MaxWidth = std::max(MaxWidth,
+ (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+ }
+ }
+
+ return MaxWidth;
+}
+
+unsigned
+LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
+ unsigned UserUF,
+ unsigned VF,
+ unsigned LoopCost) {
+
+ // -- The unroll heuristics --
+ // We unroll the loop in order to expose ILP and reduce the loop overhead.
+ // There are many micro-architectural considerations that we can't predict
+ // at this level. For example frontend pressure (on decode or fetch) due to
+ // code size, or the number and capabilities of the execution ports.
+ //
+ // We use the following heuristics to select the unroll factor:
+ // 1. If the code has reductions the we unroll in order to break the cross
+ // iteration dependency.
+ // 2. If the loop is really small then we unroll in order to reduce the loop
+ // overhead.
+ // 3. We don't unroll if we think that we will spill registers to memory due
+ // to the increased register pressure.
+
+ // Use the user preference, unless 'auto' is selected.
+ if (UserUF != 0)
+ return UserUF;
+
+ // When we optimize for size we don't unroll.
+ if (OptForSize)
+ return 1;
+
+ // Do not unroll loops with a relatively small trip count.
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop,
+ TheLoop->getLoopLatch());
+ if (TC > 1 && TC < TinyTripCountUnrollThreshold)
+ return 1;
+
+ unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true);
+ DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters <<
+ " vector registers\n");
+
+ LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
+ // We divide by these constants so assume that we have at least one
+ // instruction that uses at least one register.
+ R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
+ R.NumInstructions = std::max(R.NumInstructions, 1U);
+
+ // We calculate the unroll factor using the following formula.
+ // Subtract the number of loop invariants from the number of available
+ // registers. These registers are used by all of the unrolled instances.
+ // Next, divide the remaining registers by the number of registers that is
+ // required by the loop, in order to estimate how many parallel instances
+ // fit without causing spills.
+ unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
+
+ // Clamp the unroll factor ranges to reasonable factors.
+ unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+
+ // If we did not calculate the cost for VF (because the user selected the VF)
+ // then we calculate the cost of VF here.
+ if (LoopCost == 0)
+ LoopCost = expectedCost(VF);
+
+ // Clamp the calculated UF to be between the 1 and the max unroll factor
+ // that the target allows.
+ if (UF > MaxUnrollSize)
+ UF = MaxUnrollSize;
+ else if (UF < 1)
+ UF = 1;
+
+ if (Legal->getReductionVars()->size()) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+ return UF;
+ }
+
+ // We want to unroll tiny loops in order to reduce the loop overhead.
+ // We assume that the cost overhead is 1 and we use the cost model
+ // to estimate the cost of the loop and unroll until the cost of the
+ // loop overhead is about 5% of the cost of the loop.
+ DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
+ if (LoopCost < 20) {
+ DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
+ unsigned NewUF = 20/LoopCost + 1;
+ return std::min(NewUF, UF);
+ }
+
+ DEBUG(dbgs() << "LV: Not Unrolling. \n");
+ return 1;
+}
+
+LoopVectorizationCostModel::RegisterUsage
+LoopVectorizationCostModel::calculateRegisterUsage() {
+ // This function calculates the register usage by measuring the highest number
+ // of values that are alive at a single location. Obviously, this is a very
+ // rough estimation. We scan the loop in a topological order in order and
+ // assign a number to each instruction. We use RPO to ensure that defs are
+ // met before their users. We assume that each instruction that has in-loop
+ // users starts an interval. We record every time that an in-loop value is
+ // used, so we have a list of the first and last occurrences of each
+ // instruction. Next, we transpose this data structure into a multi map that
+ // holds the list of intervals that *end* at a specific location. This multi
+ // map allows us to perform a linear search. We scan the instructions linearly
+ // and record each time that a new interval starts, by placing it in a set.
+ // If we find this value in the multi-map then we remove it from the set.
+ // The max register usage is the maximum size of the set.
+ // We also search for instructions that are defined outside the loop, but are
+ // used inside the loop. We need this number separately from the max-interval
+ // usage number because when we unroll, loop-invariant values do not take
+ // more register.
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
+
+ RegisterUsage R;
+ R.NumInstructions = 0;
+
+ // Each 'key' in the map opens a new interval. The values
+ // of the map are the index of the 'last seen' usage of the
+ // instruction that is the key.
+ typedef DenseMap<Instruction*, unsigned> IntervalMap;
+ // Maps instruction to its index.
+ DenseMap<unsigned, Instruction*> IdxToInstr;
+ // Marks the end of each interval.
+ IntervalMap EndPoint;
+ // Saves the list of instruction indices that are used in the loop.
+ SmallSet<Instruction*, 8> Ends;
+ // Saves the list of values that are used in the loop but are
+ // defined outside the loop, such as arguments and constants.
+ SmallPtrSet<Value*, 8> LoopInvariants;
+
+ unsigned Index = 0;
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+ be = DFS.endRPO(); bb != be; ++bb) {
+ R.NumInstructions += (*bb)->size();
+ for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+ ++it) {
+ Instruction *I = it;
+ IdxToInstr[Index++] = I;
+
+ // Save the end location of each USE.
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *U = I->getOperand(i);
+ Instruction *Instr = dyn_cast<Instruction>(U);
+
+ // Ignore non-instruction values such as arguments, constants, etc.
+ if (!Instr) continue;
+
+ // If this instruction is outside the loop then record it and continue.
+ if (!TheLoop->contains(Instr)) {
+ LoopInvariants.insert(Instr);
+ continue;
+ }
+
+ // Overwrite previous end points.
+ EndPoint[Instr] = Index;
+ Ends.insert(Instr);
+ }
+ }
+ }
+
+ // Saves the list of intervals that end with the index in 'key'.
+ typedef SmallVector<Instruction*, 2> InstrList;
+ DenseMap<unsigned, InstrList> TransposeEnds;
+
+ // Transpose the EndPoints to a list of values that end at each index.
+ for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end();
+ it != e; ++it)
+ TransposeEnds[it->second].push_back(it->first);
+
+ SmallSet<Instruction*, 8> OpenIntervals;
+ unsigned MaxUsage = 0;
+
+
+ DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+ for (unsigned int i = 0; i < Index; ++i) {
+ Instruction *I = IdxToInstr[i];
+ // Ignore instructions that are never used within the loop.
+ if (!Ends.count(I)) continue;
+
+ // Remove all of the instructions that end at this location.
+ InstrList &List = TransposeEnds[i];
+ for (unsigned int j=0, e = List.size(); j < e; ++j)
+ OpenIntervals.erase(List[j]);
+
+ // Count the number of live interals.
+ MaxUsage = std::max(MaxUsage, OpenIntervals.size());
+
+ DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
+ OpenIntervals.size() <<"\n");
+
+ // Add the current instruction to the list of open intervals.
+ OpenIntervals.insert(I);
+ }
+
+ unsigned Invariant = LoopInvariants.size();
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+
+ R.LoopInvariantRegs = Invariant;
+ R.MaxLocalUsers = MaxUsage;
+ return R;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+ unsigned Cost = 0;
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+ unsigned BlockCost = 0;
+ BasicBlock *BB = *bb;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ continue;
+
+ unsigned C = getInstructionCost(it, VF);
+ Cost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
+ VF << " For instruction: "<< *it << "\n");
+ }
+
+ // We assume that if-converted blocks have a 50% chance of being executed.
+ // When the code is scalar then some of the blocks are avoided due to CF.
+ // When the code is vectorized we execute all code paths.
+ if (Legal->blockNeedsPredication(*bb) && VF == 1)
+ BlockCost /= 2;
+
+ Cost += BlockCost;
+ }
+
+ return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+ // If we know that this instruction will remain uniform, check the cost of
+ // the scalar version.
+ if (Legal->isUniformAfterVectorization(I))
+ VF = 1;
+
+ Type *RetTy = I->getType();
+ Type *VectorTy = ToVectorTy(RetTy, VF);
+
+ // TODO: We need to estimate the cost of intrinsic calls.
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // We mark this instruction as zero-cost because the cost of GEPs in
+ // vectorized code depends on whether the corresponding memory instruction
+ // is scalarized or not. Therefore, we handle GEPs with the memory
+ // instruction cost.
+ return 0;
+ case Instruction::Br: {
+ return TTI.getCFInstrCost(I->getOpcode());
+ }
+ case Instruction::PHI:
+ //TODO: IF-converted IFs become selects.
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Certain instructions can be cheaper to vectorize if they have a constant
+ // second vector operand. One example of this are shifts on x86.
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+
+ if (isa<ConstantInt>(I->getOperand(1)))
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+
+ return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ Type *CondTy = SI->getCondition()->getType();
+ if (!ScalarCond)
+ CondTy = VectorType::get(CondTy, VF);
+
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *ValTy = I->getOperand(0)->getType();
+ VectorTy = ToVectorTy(ValTy, VF);
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
+ }
+ case Instruction::Store:
+ case Instruction::Load: {
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ Type *ValTy = (SI ? SI->getValueOperand()->getType() :
+ LI->getType());
+ VectorTy = ToVectorTy(ValTy, VF);
+
+ unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
+ unsigned AS = SI ? SI->getPointerAddressSpace() :
+ LI->getPointerAddressSpace();
+ Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
+ // We add the cost of address computation here instead of with the gep
+ // instruction because only here we know whether the operation is
+ // scalarized.
+ if (VF == 1)
+ return TTI.getAddressComputationCost(VectorTy) +
+ TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
+ // Scalarized loads/stores.
+ int Stride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = Stride < 0;
+ if (0 == Stride) {
+ unsigned Cost = 0;
+ // The cost of extracting from the value vector and pointer vector.
+ Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
+ for (unsigned i = 0; i < VF; ++i) {
+ // The cost of extracting the pointer operand.
+ Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
+ // In case of STORE, the cost of ExtractElement from the vector.
+ // In case of LOAD, the cost of InsertElement into the returned
+ // vector.
+ Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
+ Instruction::InsertElement,
+ VectorTy, i);
+ }
+
+ // The cost of the scalar loads/stores.
+ Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
+ Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
+ Alignment, AS);
+ return Cost;
+ }
+
+ // Wide load/stores.
+ unsigned Cost = TTI.getAddressComputationCost(VectorTy);
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
+ if (Reverse)
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+ VectorTy, 0);
+ return Cost;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ // We optimize the truncation of induction variable.
+ // The cost of these is the same as the scalar operation.
+ if (I->getOpcode() == Instruction::Trunc &&
+ Legal->isInductionVariable(I->getOperand(0)))
+ return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
+ I->getOperand(0)->getType());
+
+ Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+ return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+ }
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(I);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type*, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+ return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+ }
+ default: {
+ // We are scalarizing the instruction. Return the cost of the scalar
+ // instruction, plus the cost of insert and extract into vector
+ // elements, times the vector width.
+ unsigned Cost = 0;
+
+ if (!RetTy->isVoidTy() && VF != 1) {
+ unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
+ VectorTy);
+ unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
+ VectorTy);
+
+ // The cost of inserting the results plus extracting each one of the
+ // operands.
+ Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+ }
+
+ // The cost of executing VF copies of the scalar instruction. This opcode
+ // is unknown. Assume that it is the same as 'mul'.
+ Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
+ return Cost;
+ }
+ }// end of switch.
+}
+
+Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, VF);
+}
+
+char LoopVectorize::ID = 0;
+static const char lv_name[] = "Loop Vectorization";
+INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createLoopVectorizePass() {
+ return new LoopVectorize();
+ }
+}
+
+bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
+ // Check for a store.
+ if (StoreInst *ST = dyn_cast<StoreInst>(Inst))
+ return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
+
+ // Check for a load.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
new file mode 100644
index 000000000000..19eefd2f87e0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -0,0 +1,43 @@
+ //===-- Vectorize.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
+// implements several vectorization transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+
+using namespace llvm;
+
+/// initializeVectorizationPasses - Initialize all passes linked into the
+/// Vectorization library.
+void llvm::initializeVectorization(PassRegistry &Registry) {
+ initializeBBVectorizePass(Registry);
+ initializeLoopVectorizePass(Registry);
+}
+
+void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
+ initializeVectorization(*unwrap(R));
+}
+
+void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBBVectorizePass());
+}
+
+void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopVectorizePass());
+}